From e2878a1bca6baa10f787849f1ef12cee2c3924f4 Mon Sep 17 00:00:00 2001 From: ggurdin Date: Thu, 23 May 2024 13:45:17 -0400 Subject: [PATCH] language detection updates --- lib/pangea/constants/model_keys.dart | 1 + .../language_detection_controller.dart | 34 +++++++++++++++++-- .../models/language_detection_model.dart | 12 ++++--- lib/pangea/repo/igc_repo.dart | 6 ++-- 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/lib/pangea/constants/model_keys.dart b/lib/pangea/constants/model_keys.dart index ce427f3d3..ef84a7064 100644 --- a/lib/pangea/constants/model_keys.dart +++ b/lib/pangea/constants/model_keys.dart @@ -54,6 +54,7 @@ class ModelKey { static const String offset = "offset"; static const String length = "length"; static const String langCode = 'lang_code'; + static const String confidence = 'confidence'; // some old analytics rooms have langCode instead of lang_code in the room creation content static const String oldLangCode = 'langCode'; static const String wordLang = "word_lang"; diff --git a/lib/pangea/controllers/language_detection_controller.dart b/lib/pangea/controllers/language_detection_controller.dart index 0ff18b556..4d3326dde 100644 --- a/lib/pangea/controllers/language_detection_controller.dart +++ b/lib/pangea/controllers/language_detection_controller.dart @@ -3,6 +3,7 @@ import 'dart:convert'; import 'package:fluffychat/pangea/config/environment.dart'; import 'package:fluffychat/pangea/controllers/pangea_controller.dart'; +import 'package:fluffychat/pangea/models/language_detection_model.dart'; import 'package:fluffychat/pangea/network/urls.dart'; import 'package:http/http.dart' as http; @@ -48,7 +49,7 @@ class LanguageDetectionRequest { } class LanguageDetectionResponse { - List> detections; + List detections; String fullText; LanguageDetectionResponse({ @@ -58,7 +59,11 @@ class LanguageDetectionResponse { factory LanguageDetectionResponse.fromJson(Map json) { return LanguageDetectionResponse( - detections: List>.from(json['detections']), + detections: List.from( + json['detections'].map( + (e) => LanguageDetection.fromJson(e), + ), + ), fullText: json['full_text'], ); } @@ -69,6 +74,18 @@ class LanguageDetectionResponse { 'full_text': fullText, }; } + + LanguageDetection? get _bestDetection { + detections.sort((a, b) => b.confidence.compareTo(a.confidence)); + return detections.isNotEmpty ? detections.first : null; + } + + final double _confidenceThreshold = 0.95; + + LanguageDetection? get thresholdedDetection => + (_bestDetection?.confidence ?? 0) >= _confidenceThreshold + ? _bestDetection! + : null; } class _LanguageDetectionCacheItem { @@ -103,6 +120,19 @@ class LanguageDetectionController { _cacheClearTimer?.cancel(); } + Future detectLanguage( + String fullText, + String? userL2, + String? userL1, + ) async { + final LanguageDetectionRequest params = LanguageDetectionRequest( + fullText: fullText, + userL1: userL1, + userL2: userL2, + ); + return get(params); + } + Future get( LanguageDetectionRequest params, ) async { diff --git a/lib/pangea/models/language_detection_model.dart b/lib/pangea/models/language_detection_model.dart index 6fa3d7299..7ed44868c 100644 --- a/lib/pangea/models/language_detection_model.dart +++ b/lib/pangea/models/language_detection_model.dart @@ -1,19 +1,23 @@ +import 'package:fluffychat/pangea/constants/model_keys.dart'; + class LanguageDetection { String langCode; + double confidence; LanguageDetection({ required this.langCode, + required this.confidence, }); factory LanguageDetection.fromJson(Map json) { return LanguageDetection( - langCode: json[_langCodeKey], + langCode: json[ModelKey.langCode], + confidence: json[ModelKey.confidence], ); } - static const _langCodeKey = "lang_code"; - Map toJson() => { - _langCodeKey: langCode, + ModelKey.langCode: langCode, + ModelKey.confidence: confidence, }; } diff --git a/lib/pangea/repo/igc_repo.dart b/lib/pangea/repo/igc_repo.dart index 068a009e8..9517515d0 100644 --- a/lib/pangea/repo/igc_repo.dart +++ b/lib/pangea/repo/igc_repo.dart @@ -1,13 +1,13 @@ import 'dart:convert'; -import 'package:http/http.dart'; - import 'package:fluffychat/pangea/config/environment.dart'; import 'package:fluffychat/pangea/models/language_detection_model.dart'; import 'package:fluffychat/pangea/models/lemma.dart'; import 'package:fluffychat/pangea/models/pangea_match_model.dart'; import 'package:fluffychat/pangea/models/pangea_token_model.dart'; import 'package:fluffychat/pangea/repo/span_data_repo.dart'; +import 'package:http/http.dart'; + import '../constants/model_keys.dart'; import '../models/igc_text_data_model.dart'; import '../network/requests.dart'; @@ -39,7 +39,7 @@ class IgcRepo { await Future.delayed(const Duration(seconds: 2)); final IGCTextData igcTextData = IGCTextData( - detections: [LanguageDetection(langCode: "en")], + detections: [LanguageDetection(langCode: "en", confidence: 0.99)], tokens: [ PangeaToken( text: PangeaTokenText(content: "This", offset: 0, length: 4),