|
|
|
@ -35,6 +35,7 @@ class MessageSpeechToTextCardState extends State<MessageSpeechToTextCard> {
|
|
|
|
bool _fetchingTranscription = true;
|
|
|
|
bool _fetchingTranscription = true;
|
|
|
|
Object? error;
|
|
|
|
Object? error;
|
|
|
|
STTToken? selectedToken;
|
|
|
|
STTToken? selectedToken;
|
|
|
|
|
|
|
|
TextSpan? transcriptText;
|
|
|
|
|
|
|
|
|
|
|
|
String? get l1Code =>
|
|
|
|
String? get l1Code =>
|
|
|
|
MatrixState.pangeaController.languageController.activeL1Code();
|
|
|
|
MatrixState.pangeaController.languageController.activeL1Code();
|
|
|
|
@ -70,82 +71,91 @@ class MessageSpeechToTextCardState extends State<MessageSpeechToTextCard> {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
TextSpan _buildTranscriptText(BuildContext context) {
|
|
|
|
TextSpan _buildTranscriptText(BuildContext context) {
|
|
|
|
final Transcript transcript = speechToTextResponse!.transcript;
|
|
|
|
try {
|
|
|
|
final List<InlineSpan> spans = [];
|
|
|
|
final Transcript transcript = speechToTextResponse!.transcript;
|
|
|
|
String remainingFullText = transcript.text;
|
|
|
|
final List<InlineSpan> spans = [];
|
|
|
|
|
|
|
|
String remainingFullText = transcript.text;
|
|
|
|
if (transcript.sttTokens.isEmpty) {
|
|
|
|
|
|
|
|
return TextSpan(
|
|
|
|
if (transcript.sttTokens.isEmpty) {
|
|
|
|
text: remainingFullText,
|
|
|
|
return TextSpan(
|
|
|
|
style: BotStyle.text(
|
|
|
|
text: remainingFullText,
|
|
|
|
context,
|
|
|
|
style: BotStyle.text(
|
|
|
|
existingStyle: TextStyle(
|
|
|
|
context,
|
|
|
|
color: Theme.of(context).colorScheme.onSurface,
|
|
|
|
setColor: false,
|
|
|
|
),
|
|
|
|
),
|
|
|
|
setColor: false,
|
|
|
|
);
|
|
|
|
),
|
|
|
|
}
|
|
|
|
);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (final token in transcript.sttTokens) {
|
|
|
|
for (final token in transcript.sttTokens) {
|
|
|
|
final offset = remainingFullText.indexOf(token.token.text.content);
|
|
|
|
final offset = remainingFullText.indexOf(token.token.text.content);
|
|
|
|
final length = token.length;
|
|
|
|
if (offset == -1) continue;
|
|
|
|
|
|
|
|
final length = token.length;
|
|
|
|
|
|
|
|
|
|
|
|
if (remainingFullText.substring(0, offset).trim().isNotEmpty) {
|
|
|
|
if (remainingFullText.substring(0, offset).trim().isNotEmpty) {
|
|
|
|
continue;
|
|
|
|
remainingFullText = remainingFullText.substring(offset);
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (offset > 0) {
|
|
|
|
|
|
|
|
// Add any plain text before the token
|
|
|
|
|
|
|
|
spans.add(
|
|
|
|
|
|
|
|
TextSpan(text: remainingFullText.substring(0, offset)),
|
|
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (offset > 0) {
|
|
|
|
|
|
|
|
// Add any plain text before the token
|
|
|
|
|
|
|
|
spans.add(
|
|
|
|
spans.add(
|
|
|
|
TextSpan(text: remainingFullText.substring(0, offset)),
|
|
|
|
TextSpan(
|
|
|
|
|
|
|
|
text: remainingFullText.substring(
|
|
|
|
|
|
|
|
offset,
|
|
|
|
|
|
|
|
offset + token.length,
|
|
|
|
|
|
|
|
),
|
|
|
|
|
|
|
|
style: BotStyle.text(
|
|
|
|
|
|
|
|
context,
|
|
|
|
|
|
|
|
existingStyle: TextStyle(color: token.color(context)),
|
|
|
|
|
|
|
|
setColor: false,
|
|
|
|
|
|
|
|
),
|
|
|
|
|
|
|
|
// gesturRecognizer that sets selectedToken on click
|
|
|
|
|
|
|
|
recognizer: TapGestureRecognizer()
|
|
|
|
|
|
|
|
..onTap = () {
|
|
|
|
|
|
|
|
debugPrint('Token tapped');
|
|
|
|
|
|
|
|
debugPrint(token.toJson().toString());
|
|
|
|
|
|
|
|
if (mounted) {
|
|
|
|
|
|
|
|
setState(() {
|
|
|
|
|
|
|
|
if (selectedToken == token) {
|
|
|
|
|
|
|
|
selectedToken = null;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
selectedToken = token;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
),
|
|
|
|
);
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spans.add(
|
|
|
|
remainingFullText = remainingFullText.substring(offset + length);
|
|
|
|
TextSpan(
|
|
|
|
}
|
|
|
|
text: remainingFullText.substring(
|
|
|
|
|
|
|
|
offset,
|
|
|
|
|
|
|
|
offset + token.length,
|
|
|
|
|
|
|
|
),
|
|
|
|
|
|
|
|
style: BotStyle.text(
|
|
|
|
|
|
|
|
context,
|
|
|
|
|
|
|
|
existingStyle: TextStyle(color: token.color(context)),
|
|
|
|
|
|
|
|
setColor: false,
|
|
|
|
|
|
|
|
),
|
|
|
|
|
|
|
|
// gesturRecognizer that sets selectedToken on click
|
|
|
|
|
|
|
|
recognizer: TapGestureRecognizer()
|
|
|
|
|
|
|
|
..onTap = () {
|
|
|
|
|
|
|
|
debugPrint('Token tapped');
|
|
|
|
|
|
|
|
debugPrint(token.toJson().toString());
|
|
|
|
|
|
|
|
if (mounted) {
|
|
|
|
|
|
|
|
setState(() {
|
|
|
|
|
|
|
|
if (selectedToken == token) {
|
|
|
|
|
|
|
|
selectedToken = null;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
selectedToken = token;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
),
|
|
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
remainingFullText = remainingFullText.substring(offset + length);
|
|
|
|
if (remainingFullText.isNotEmpty) {
|
|
|
|
}
|
|
|
|
// Add any remaining text after the last token
|
|
|
|
|
|
|
|
spans.add(TextSpan(text: remainingFullText));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (remainingFullText.isNotEmpty) {
|
|
|
|
return TextSpan(children: spans);
|
|
|
|
// Add any remaining text after the last token
|
|
|
|
} catch (err, s) {
|
|
|
|
spans.add(TextSpan(text: remainingFullText));
|
|
|
|
ErrorHandler.logError(e: err, s: s);
|
|
|
|
|
|
|
|
setState(() => error = err);
|
|
|
|
|
|
|
|
return const TextSpan(text: '');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return TextSpan(children: spans);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@override
|
|
|
|
@override
|
|
|
|
void initState() {
|
|
|
|
void initState() {
|
|
|
|
super.initState();
|
|
|
|
super.initState();
|
|
|
|
getSpeechToText();
|
|
|
|
getSpeechToText().then((_) {
|
|
|
|
|
|
|
|
if (mounted) {
|
|
|
|
|
|
|
|
setState(() => transcriptText = _buildTranscriptText(context));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
String? get wordsPerMinuteString =>
|
|
|
|
String? get wordsPerMinuteString =>
|
|
|
|
@ -158,7 +168,7 @@ class MessageSpeechToTextCardState extends State<MessageSpeechToTextCard> {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// done fetchig but not results means some kind of error
|
|
|
|
// done fetchig but not results means some kind of error
|
|
|
|
if (speechToTextResponse == null) {
|
|
|
|
if (speechToTextResponse == null || error != null) {
|
|
|
|
return CardErrorWidget(
|
|
|
|
return CardErrorWidget(
|
|
|
|
error: error ?? "Failed to fetch speech to text",
|
|
|
|
error: error ?? "Failed to fetch speech to text",
|
|
|
|
maxWidth: AppConfig.toolbarMinWidth,
|
|
|
|
maxWidth: AppConfig.toolbarMinWidth,
|
|
|
|
@ -173,7 +183,7 @@ class MessageSpeechToTextCardState extends State<MessageSpeechToTextCard> {
|
|
|
|
children: [
|
|
|
|
children: [
|
|
|
|
const SizedBox(height: 8),
|
|
|
|
const SizedBox(height: 8),
|
|
|
|
RichText(
|
|
|
|
RichText(
|
|
|
|
text: _buildTranscriptText(context),
|
|
|
|
text: transcriptText!,
|
|
|
|
),
|
|
|
|
),
|
|
|
|
if (widget.messageEvent.senderId == Matrix.of(context).client.userID)
|
|
|
|
if (widget.messageEvent.senderId == Matrix.of(context).client.userID)
|
|
|
|
Column(
|
|
|
|
Column(
|
|
|
|
|