diff --git a/server/router/frontend/dist/index.html b/server/router/frontend/dist/index.html
index a612ed1f7..3b0a7b561 100644
--- a/server/router/frontend/dist/index.html
+++ b/server/router/frontend/dist/index.html
@@ -1,11 +1,25 @@
-
+
-
+
+
+
+
+
+
+
Memos
+
+
+
+
+
+
+
-
- No embeddable frontend found.
+
+
+
diff --git a/web/src/components/MemoEditor/ActionButton/RecordAudioButton.tsx b/web/src/components/MemoEditor/ActionButton/RecordAudioButton.tsx
index bd5bf2bb4..ce3dbfaf3 100644
--- a/web/src/components/MemoEditor/ActionButton/RecordAudioButton.tsx
+++ b/web/src/components/MemoEditor/ActionButton/RecordAudioButton.tsx
@@ -1,17 +1,169 @@
import { Button } from "@usememos/mui";
import { MicIcon, StopCircleIcon } from "lucide-react";
-import { useCallback, useContext, useState } from "react";
+import { useCallback, useContext, useState, useRef } from "react";
import toast from "react-hot-toast";
import { resourceStore } from "@/store/v2";
import { Resource } from "@/types/proto/api/v1/resource_service";
import { useTranslate } from "@/utils/i18n";
import { MemoEditorContext } from "../types";
+// 声明 Web Speech API 类型
+interface ISpeechRecognition extends EventTarget {
+ continuous: boolean;
+ interimResults: boolean;
+ lang: string;
+ start(): void;
+ stop(): void;
+ abort(): void;
+ onstart: ((this: ISpeechRecognition, ev: Event) => any) | null;
+ onresult: ((this: ISpeechRecognition, ev: SpeechRecognitionEvent) => any) | null;
+ onerror: ((this: ISpeechRecognition, ev: SpeechRecognitionErrorEvent) => any) | null;
+ onend: ((this: ISpeechRecognition, ev: Event) => any) | null;
+}
+
+interface SpeechRecognitionEvent extends Event {
+ resultIndex: number;
+ results: SpeechRecognitionResultList;
+}
+
+interface SpeechRecognitionErrorEvent extends Event {
+ error: string;
+}
+
+declare global {
+ interface Window {
+ SpeechRecognition: new () => ISpeechRecognition;
+ webkitSpeechRecognition: new () => ISpeechRecognition;
+ }
+}
+
const RecordAudioButton = () => {
const t = useTranslate();
const context = useContext(MemoEditorContext);
const [isRecording, setIsRecording] = useState(false);
const [mediaRecorder, setMediaRecorder] = useState(null);
+ const [isTranscribing, setIsTranscribing] = useState(false);
+ const speechRecognitionRef = useRef(null);
+
+ // 用于跟踪临时转写文本的状态
+ const interimTranscriptRef = useRef('');
+ const finalTranscriptRef = useRef('');
+ const insertPositionRef = useRef(0);
+
+ // 检测浏览器是否支持语音识别
+ const isSpeechRecognitionSupported = () => {
+ return 'webkitSpeechRecognition' in window || 'SpeechRecognition' in window;
+ };
+
+ // 初始化语音识别
+ const initSpeechRecognition = useCallback(() => {
+ if (!isSpeechRecognitionSupported()) {
+ return null;
+ }
+
+ const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+ const recognition = new SpeechRecognition();
+
+ recognition.continuous = true;
+ recognition.interimResults = true;
+ recognition.lang = 'zh-CN'; // 默认中文,可以根据需要调整
+
+ recognition.onstart = () => {
+ setIsTranscribing(true);
+ console.log('语音识别已开始');
+
+ // 记录开始位置
+ if (context.editorRef?.current) {
+ const editor = context.editorRef.current;
+ const currentContent = editor.getContent();
+ insertPositionRef.current = currentContent.length;
+
+ // 清空转写状态
+ interimTranscriptRef.current = '';
+ finalTranscriptRef.current = '';
+ }
+ };
+
+ recognition.onresult = (event: SpeechRecognitionEvent) => {
+ let interimTranscript = '';
+ let finalTranscript = '';
+
+ // 处理所有结果
+ for (let i = event.resultIndex; i < event.results.length; i++) {
+ const transcript = event.results[i][0].transcript;
+ if (event.results[i].isFinal) {
+ finalTranscript += transcript;
+ } else {
+ interimTranscript += transcript;
+ }
+ }
+
+ if (context.editorRef?.current) {
+ const editor = context.editorRef.current;
+ const currentContent = editor.getContent();
+
+ // 计算需要移除的旧文本长度
+ const oldTextLength = finalTranscriptRef.current.length + interimTranscriptRef.current.length;
+
+ // 如果有旧的转写文本,先移除它
+ if (oldTextLength > 0) {
+ const newContent = currentContent.slice(0, insertPositionRef.current) +
+ currentContent.slice(insertPositionRef.current + oldTextLength);
+ editor.setContent(newContent);
+ }
+
+ // 更新转写状态
+ if (finalTranscript) {
+ finalTranscriptRef.current += finalTranscript;
+ }
+ interimTranscriptRef.current = interimTranscript;
+
+ // 插入新的转写文本
+ const newTranscriptText = finalTranscriptRef.current + interimTranscript;
+ if (newTranscriptText) {
+ const contentBeforeInsert = editor.getContent();
+ let textToInsert = newTranscriptText;
+
+ // 在插入位置添加适当的空格
+ if (insertPositionRef.current > 0 &&
+ contentBeforeInsert[insertPositionRef.current - 1] &&
+ !contentBeforeInsert[insertPositionRef.current - 1].match(/[\s\n]/)) {
+ textToInsert = ' ' + textToInsert;
+ }
+
+ // 插入文本
+ const newContent = contentBeforeInsert.slice(0, insertPositionRef.current) +
+ textToInsert +
+ contentBeforeInsert.slice(insertPositionRef.current);
+ editor.setContent(newContent);
+
+ // 设置光标位置到文本末尾
+ const cursorPosition = insertPositionRef.current + textToInsert.length;
+ editor.setCursorPosition(cursorPosition);
+ }
+ }
+ };
+
+ recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
+ console.error('语音识别错误:', event.error);
+ if (event.error === 'not-allowed') {
+ toast.error(t("message.microphone-not-available"));
+ } else {
+ toast.error(`语音识别错误: ${event.error}`);
+ }
+ };
+
+ recognition.onend = () => {
+ setIsTranscribing(false);
+ console.log('语音识别已结束');
+
+ // 清空转写状态
+ interimTranscriptRef.current = '';
+ finalTranscriptRef.current = '';
+ };
+
+ return recognition;
+ }, [t, context]);
// 检测浏览器支持的音频格式
const getSupportedMimeType = () => {
@@ -73,6 +225,9 @@ const RecordAudioButton = () => {
}),
});
context.setResourceList([...context.resourceList, resource]);
+
+ // 录音完成提示
+ toast.success(`录音和转写已完成`);
} catch (error: any) {
console.error(error);
toast.error(error.details);
@@ -85,11 +240,23 @@ const RecordAudioButton = () => {
recorder.start(1000);
setMediaRecorder(recorder);
setIsRecording(true);
+
+ // 开始语音识别
+ if (isSpeechRecognitionSupported()) {
+ const recognition = initSpeechRecognition();
+ if (recognition) {
+ speechRecognitionRef.current = recognition;
+ recognition.start();
+ }
+ } else {
+ toast.error("您的浏览器不支持语音识别功能");
+ }
+
} catch (error) {
console.error(error);
toast.error(t("message.microphone-not-available"));
}
- }, [context, resourceStore, t]);
+ }, [context, resourceStore, t, initSpeechRecognition]);
const stopRecording = useCallback(() => {
if (mediaRecorder) {
@@ -97,10 +264,23 @@ const RecordAudioButton = () => {
setMediaRecorder(null);
setIsRecording(false);
}
+
+ // 停止语音识别
+ if (speechRecognitionRef.current) {
+ speechRecognitionRef.current.stop();
+ speechRecognitionRef.current = null;
+ }
+
+ setIsTranscribing(false);
}, [mediaRecorder]);
return (
-