From 0020498c101bbb93ef8fb8246652202fa4a879b1 Mon Sep 17 00:00:00 2001 From: Peng Ding Date: Sun, 9 Apr 2023 20:57:50 +0800 Subject: [PATCH] feat: update Chinese translations in zh.json and zh-Hant.json using locale_updater.py (#1506) update zh.json and zh-Hant.json using locale_updater.py --- web/src/locales/locale_updater.py | 202 ++++++++++++++++++++++++++++++ web/src/locales/zh-Hant.json | 72 +++++++++-- web/src/locales/zh.json | 64 +++++++++- 3 files changed, 324 insertions(+), 14 deletions(-) create mode 100644 web/src/locales/locale_updater.py diff --git a/web/src/locales/locale_updater.py b/web/src/locales/locale_updater.py new file mode 100644 index 00000000..8ce91aa2 --- /dev/null +++ b/web/src/locales/locale_updater.py @@ -0,0 +1,202 @@ +# Author: Oaklight +# GitHub profile: https://github.com/Oaklight +# Date: April 9, 2023 +# Description: This script is used to patch missing translations in a locale file. + +# The script uses 'en.json' as the reference file to find missing keys in other locale files. +# It iterates through each field and their entries in 'en.json' and checks if the same field/entry exists in other files. +# If a field/entry is missing, the script prompts the source string, reference Google translation, and asks for confirmation or correction. +# The resulting file is saved as './*.proposed.json', and you should review it before merging and uploading. + +# usage: locale_updater.py [-h] ref_locale tgt_locale + +#TODO: add other NMT system for different preference and accuracy + +import json +import requests + + +def flatten_json(nested_json, parent_key="", sep=":"): + flattened_dict = {} + for key, value in nested_json.items(): + new_key = parent_key + sep + key if parent_key else key + if isinstance(value, dict): + flattened_dict.update(flatten_json(value, new_key, sep)) + else: + flattened_dict[new_key] = value + return flattened_dict + + +def unflatten_json(flattened_dict, sep=":"): + nested_json = {} + for key, value in flattened_dict.items(): + parts = key.split(sep) + current = nested_json + for part in parts[:-1]: + if part not in current: + current[part] = {} + current = current[part] + current[parts[-1]] = value + return nested_json + + +def sort_nested_json(nested_json): + if isinstance(nested_json, dict): + sorted_dict = {} + for key in sorted(nested_json.keys()): + sorted_dict[key] = sort_nested_json(nested_json[key]) + return sorted_dict + elif isinstance(nested_json, list): + sorted_list = [] + for item in nested_json: + sorted_list.append(sort_nested_json(item)) + return sorted_list + else: + return nested_json + + +def google_translate( + source_text, source_language="en", target_language="zh-CN" +): + # Create post content + new_line = "\r\n" + post_content = "q=" + source_text.replace(new_line, " ") + + # Send post request and get JSON response, using source_language and target_language + # url = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl=zh-CN&dt=t" + url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl={source_language}&tl={target_language}&dt=t" + headers = {"Content-type": "application/x-www-form-urlencoded"} + response = requests.post(url, headers=headers, data=post_content.encode("utf-8")) + json_value = response.json() + + # Extract translations from JSON + translations = [item[0] for item in json_value[0]] + translations = [t.replace(new_line, "") for t in translations] + target_text = translations[0] + + return target_text + + +def get_code_name(json_filename): + # Remove extension and split language and country codes + file_parts = json_filename.split(".")[0].split("_") + lang_code = file_parts[0] + country_code = file_parts[1] if len(file_parts) > 1 else "" + + # Map language code to code name + lang_map = { + "de": "de", + "en": "en", + "es": "es", + "fr": "fr", + "it": "it", + "ko": "ko", + "nl": "nl", + "pl": "pl", + "pt": "pt-BR", + "ru": "ru", + "sl": "sl", + "sv": "sv", + "tr": "tr", + "uk": "uk", + "vi": "vi", + "zh-Hant": "zh-TW", + "zh": "zh-CN", + } + code_name = lang_map.get(lang_code, "") + + # Add country code if available + if country_code: + code_name += "-" + country_code.upper() + + return code_name + + +if __name__ == "__main__": + # ref_locale = "./en.json" + # tgt_locale = "./zh.json" + # receive the reference locale and target locale from the command line using argparse + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("ref_locale", help="reference locale file") + parser.add_argument("tgt_locale", help="target locale file") + args = parser.parse_args() + ref_locale = args.ref_locale + tgt_locale = args.tgt_locale + + ref_codename = get_code_name(ref_locale) + tgt_codename = get_code_name(tgt_locale) + + with open(ref_locale, "r") as f: + ref = json.load(f) + + with open(tgt_locale, "r") as f: + tgt = json.load(f) + + # using the flatten_json function, produce a temp json for each locale and save to the disk + ref_flat = flatten_json(ref) + tgt_flat = flatten_json(tgt) + + # # save the flattened json to the disk + # with open("ref_flat.json", "w") as f: + # json.dump(ref_flat, f, indent=2, ensure_ascii=False) + # with open("tgt_flat.json", "w") as f: + # json.dump(tgt_flat, f, indent=2, ensure_ascii=False) + + # first diff the keys to inform the user of the missing keys + missing_keys = set(ref_flat.keys()) - set(tgt_flat.keys()) + # print total number of missing keys, in red color, number as default + if len(missing_keys) == 0: + print("\033[92m All keys are present in the target locale \033[0m") + exit() + else: + print(f"\033[91m Total missing keys: \033[0m {len(missing_keys)}") + + + # formatted print line by line, wrap the missing key in red color, and the English translation in green color + for key in missing_keys: + # print(f"Missing key: {key} | English: {ref_flat[key]}") + print( + "\033[91m" + + f"Missing key: {key}" + + "\033[0m" + + " | " + + "\033[92m" + + f"English: {ref_flat[key]}" + + "\033[0m" + ) + print("=============================================") + print(f"\033[91m Total missing keys: \033[0m {len(missing_keys)}") + + + # now compare the tgt_flat with ref_flat to find all missing keys and prompt to terminal for translation. Then save back to the tgt_flat + + # iterate over the missing key and their corresponding values in ref_flat, to get reference google translation using google_translate_to_chinese function + # then present the reference translation to the user in the terminal + # then present the user with a prompt to ask for translation + for i, key in enumerate(missing_keys): + print( + f"============================================= {i + 1}/{len(missing_keys)}" + ) + # print wrap the missing key in red color, and the English translation in green color + print("\033[91m" + "Missing key: " + "\033[0m" + key) + print("\033[92m" + f"{ref_codename}: " + "\033[0m" + ref_flat[key]) + # get reference translation from google translate, print in blue + proposal_google = google_translate(ref_flat[key], ref_codename, tgt_codename) + print("\033[94m" + f"Reference {tgt_codename} translation: " + "\033[0m" + proposal_google) + # prompt user for translation, or enter to use the reference translation, in green color + proposal = input("\033[92m" + "Enter translation: " + "\033[0m") + if proposal == "": + proposal = proposal_google + # save the translation to the tgt_flat + tgt_flat[key] = proposal + + # unflatten the ref_flat.json and tgt_flat.json back to the original format. save to another file + ref_unflat = unflatten_json(ref_flat) + tgt_unflat = unflatten_json(tgt_flat) + # save the unflattened json to the disk, with original tgt file name with ".proposed" appended before .json + # by getting the file name from from the tgt_locale path + tgt_locale_name = tgt_locale.split("/")[-1].split(".")[0] + with open(f"{tgt_locale_name}.proposed.json", "w") as f: + json.dump(tgt_unflat, f, indent=2, ensure_ascii=False) diff --git a/web/src/locales/zh-Hant.json b/web/src/locales/zh-Hant.json index c791dae5..5ec9b58b 100644 --- a/web/src/locales/zh-Hant.json +++ b/web/src/locales/zh-Hant.json @@ -52,12 +52,20 @@ "vacuum": "清理", "select": "選擇", "database": "資料庫", - "avatar": "頭像" + "avatar": "頭像", + "upload": "上傳", + "rename": "改名", + "name": "姓名", + "visibility": "能見度", + "clear": "清除", + "preview": "預覽" }, "auth": { "signup-as-host": "註冊為管理員", "host-tip": "你正在註冊為管理員帳號。", - "not-host-tip": "如果你沒有帳號,請聯絡網站管理員。" + "not-host-tip": "如果你沒有帳號,請聯絡網站管理員。", + "new-password": "新密碼", + "repeat-new-password": "重複新密碼" }, "resource": { "description": "查看在 Memo 中的靜態資源。例如:圖片", @@ -78,7 +86,24 @@ "no-files-selected": "沒有文件被選中❗", "upload-successfully": "上傳成功", "file-drag-drop-prompt": "將您的文件拖放到此處以上傳文件", - "select": "選擇" + "select": "選擇", + "create-dialog": { + "upload-method": "上傳方式", + "local-file": { + "choose": "選擇一個文件...", + "option": "本地文件" + }, + "external-link": { + "file-name-placeholder": "文件名", + "option": "外部鏈接", + "type-placeholder": "文件類型", + "link": "鏈接", + "type": "類型", + "file-name": "文件名" + }, + "title": "創建資源" + }, + "search-bar-placeholder": "搜索資源" }, "archived": { "archived-memos": "已封存的 Memo", @@ -104,7 +129,14 @@ "protected": "登入使用者可見", "public": "所有人可見", "disabled": "公共memos已禁用" - } + }, + "fetching-data": "正在獲取數據...", + "archived-memos": "歸檔備忘錄", + "archived-at": "存檔於", + "fetch-more": "點擊此處獲取更多", + "embed": "嵌入", + "no-archived-memos": "沒有存檔的備忘錄。", + "search-placeholder": "搜索備忘錄" }, "memo-list": { "fetching-data": "讀取資料中...", @@ -144,7 +176,10 @@ "text-placeholder": "以 ^ 開頭使用正則表達式" }, "tag-list": { - "tip-text": "輸入`#tag `來新增標籤" + "tip-text": "輸入`#tag `來新增標籤", + "create-tag": "創建標籤", + "tag-name": "標籤名", + "all-tags": "所有標籤" }, "search": { "quickly-filter": "快速過濾" @@ -170,7 +205,9 @@ "mobile-editor-style": "手機版編輯器樣式", "default-memo-sort-option": "Memo 顯示時間", "created_ts": "建立時間", - "updated_ts": "更新時間" + "updated_ts": "更新時間", + "daily-review-time-offset": "每日回顧時間偏移", + "default-resource-visibility": "默認資源可見性" }, "storage-section": { "storage-services-list": "存儲服務列表", @@ -252,7 +289,8 @@ "succeed-update-additional-script": "更新附加腳本成功", "update-succeed": "更新成功", "page-not-found": "404 - 未找到網頁 😥", - "succeed-vacuum-database": "清理資料庫成功" + "succeed-vacuum-database": "清理資料庫成功", + "resource-ready": "所有資源都準備好了" }, "days": { "monday": "星期一", @@ -269,5 +307,23 @@ "sat": "六", "sunday": "星期天", "sun": "日" + }, + "router": { + "back-to-home": "回到首頁" + }, + "ask-ai": { + "not-enabled": "您尚未設置 OpenAI API 密鑰。", + "title": "問AI", + "placeholder": "隨便問", + "go-to-settings": "前往設置" + }, + "embed-memo": { + "only-public-supported": "* 僅公開備忘錄支持。", + "title": "嵌入備忘錄", + "copy": "複製", + "text": "將以下代碼複製並粘貼到您的博客或網站中。" + }, + "daily-review": { + "title": "每日回顧" } -} +} \ No newline at end of file diff --git a/web/src/locales/zh.json b/web/src/locales/zh.json index aba4d6ce..22ddee56 100644 --- a/web/src/locales/zh.json +++ b/web/src/locales/zh.json @@ -52,12 +52,20 @@ "vacuum": "清理", "select": "选择", "database": "数据库", - "avatar": "头像" + "avatar": "头像", + "rename": "改名", + "upload": "上传", + "visibility": "能见度", + "preview": "预览", + "name": "姓名", + "clear": "清除" }, "auth": { "signup-as-host": "注册为 Host", "host-tip": "你正在注册为 Host 用户账号。", - "not-host-tip": "如果你没有账号,请联系站点 Host" + "not-host-tip": "如果你没有账号,请联系站点 Host", + "new-password": "新密码", + "repeat-new-password": "重复新密码" }, "resource": { "description": "查看在 Memo 中的静态资源。例如:图片", @@ -78,7 +86,24 @@ "no-files-selected": "没有文件被选中❗", "upload-successfully": "上传成功", "file-drag-drop-prompt": "将您的文件拖放到此处以上传文件", - "select": "选择" + "select": "选择", + "create-dialog": { + "external-link": { + "type-placeholder": "文件类型", + "link": "链接", + "file-name": "文件名", + "type": "类型", + "file-name-placeholder": "文件名", + "option": "外部链接" + }, + "local-file": { + "choose": "选择一个文件...", + "option": "本地文件" + }, + "upload-method": "上传方式", + "title": "创建资源" + }, + "search-bar-placeholder": "搜索资源" }, "archived": { "archived-memos": "已归档的 Memo", @@ -104,7 +129,14 @@ "protected": "登录用户可见", "public": "所有人可见", "disabled": "公共memos已禁用" - } + }, + "embed": "嵌入", + "search-placeholder": "搜索备忘录", + "archived-at": "存档于", + "no-archived-memos": "没有存档的备忘录。", + "fetching-data": "正在获取数据...", + "archived-memos": "归档备忘录", + "fetch-more": "点击此处获取更多" }, "memo-list": { "fetching-data": "请求数据中...", @@ -173,7 +205,9 @@ "mobile-editor-style": "移动端编辑器样式", "default-memo-sort-option": "Memo 显示时间", "created_ts": "创建时间", - "updated_ts": "更新时间" + "updated_ts": "更新时间", + "daily-review-time-offset": "每日回顾时间偏移", + "default-resource-visibility": "默认资源可见性" }, "storage-section": { "storage-services-list": "存储服务列表", @@ -272,5 +306,23 @@ "sat": "六", "sunday": "星期天", "sun": "日" + }, + "embed-memo": { + "title": "嵌入备忘录", + "copy": "复制", + "only-public-supported": "* 仅支持公开备忘录。", + "text": "将以下代码复制并粘贴到您的博客或网站中。" + }, + "ask-ai": { + "placeholder": "随便问", + "title": "问AI", + "not-enabled": "您尚未设置 OpenAI API 密钥。", + "go-to-settings": "前往设置" + }, + "daily-review": { + "title": "每日回顾" + }, + "router": { + "back-to-home": "回到首页" } -} +} \ No newline at end of file