feat: update Chinese translations in zh.json and zh-Hant.json using locale_updater.py (#1506)

update zh.json and zh-Hant.json using locale_updater.py
2 years ago · 0020498c10
parent 66ed43cbcb
commit 0020498c10
3 changed files with 324 additions and 14 deletions
--- a/web/src/locales/locale_updater.py
+++ b/web/src/locales/locale_updater.py
@ -0,0 +1,202 @@
 # Author: Oaklight
 # GitHub profile: https://github.com/Oaklight
 # Date: April 9, 2023
 # Description: This script is used to patch missing translations in a locale file.
 # The script uses 'en.json' as the reference file to find missing keys in other locale files.
 # It iterates through each field and their entries in 'en.json' and checks if the same field/entry exists in other files.
 # If a field/entry is missing, the script prompts the source string, reference Google translation, and asks for confirmation or correction.
 # The resulting file is saved as './*.proposed.json', and you should review it before merging and uploading.
 # usage: locale_updater.py [-h] ref_locale tgt_locale
 #TODO: add other NMT system for different preference and accuracy
 import json
 import requests
 def flatten_json(nested_json, parent_key="", sep=":"):
    flattened_dict = {}
    for key, value in nested_json.items():
        new_key = parent_key + sep + key if parent_key else key
        if isinstance(value, dict):
            flattened_dict.update(flatten_json(value, new_key, sep))
        else:
            flattened_dict[new_key] = value
    return flattened_dict
 def unflatten_json(flattened_dict, sep=":"):
    nested_json = {}
    for key, value in flattened_dict.items():
        parts = key.split(sep)
        current = nested_json
        for part in parts[:-1]:
            if part not in current:
                current[part] = {}
            current = current[part]
        current[parts[-1]] = value
    return nested_json
 def sort_nested_json(nested_json):
    if isinstance(nested_json, dict):
        sorted_dict = {}
        for key in sorted(nested_json.keys()):
            sorted_dict[key] = sort_nested_json(nested_json[key])
        return sorted_dict
    elif isinstance(nested_json, list):
        sorted_list = []
        for item in nested_json:
            sorted_list.append(sort_nested_json(item))
        return sorted_list
    else:
        return nested_json
 def google_translate(
    source_text, source_language="en", target_language="zh-CN"
 ):
    # Create post content
    new_line = "\r\n"
    post_content = "q=" + source_text.replace(new_line, " ")
    # Send post request and get JSON response, using source_language and target_language
    # url = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl=zh-CN&dt=t"
    url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl={source_language}&tl={target_language}&dt=t"
    headers = {"Content-type": "application/x-www-form-urlencoded"}
    response = requests.post(url, headers=headers, data=post_content.encode("utf-8"))
    json_value = response.json()
    # Extract translations from JSON
    translations = [item[0] for item in json_value[0]]
    translations = [t.replace(new_line, "") for t in translations]
    target_text = translations[0]
    return target_text
 def get_code_name(json_filename):
    # Remove extension and split language and country codes
    file_parts = json_filename.split(".")[0].split("_")
    lang_code = file_parts[0]
    country_code = file_parts[1] if len(file_parts) > 1 else ""
    # Map language code to code name
    lang_map = {
        "de": "de",
        "en": "en",
        "es": "es",
        "fr": "fr",
        "it": "it",
        "ko": "ko",
        "nl": "nl",
        "pl": "pl",
        "pt": "pt-BR",
        "ru": "ru",
        "sl": "sl",
        "sv": "sv",
        "tr": "tr",
        "uk": "uk",
        "vi": "vi",
        "zh-Hant": "zh-TW",
        "zh": "zh-CN",
    }
    code_name = lang_map.get(lang_code, "")
    # Add country code if available
    if country_code:
        code_name += "-" + country_code.upper()
    return code_name
 if __name__ == "__main__":
    # ref_locale = "./en.json"
    # tgt_locale = "./zh.json"
    # receive the reference locale and target locale from the command line using argparse
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("ref_locale", help="reference locale file")
    parser.add_argument("tgt_locale", help="target locale file")
    args = parser.parse_args()
    ref_locale = args.ref_locale
    tgt_locale = args.tgt_locale
    ref_codename = get_code_name(ref_locale)
    tgt_codename = get_code_name(tgt_locale)
    with open(ref_locale, "r") as f:
        ref = json.load(f)
    with open(tgt_locale, "r") as f:
        tgt = json.load(f)
    # using the flatten_json function, produce a temp json for each locale and save to the disk
    ref_flat = flatten_json(ref)
    tgt_flat = flatten_json(tgt)
    # # save the flattened json to the disk
    # with open("ref_flat.json", "w") as f:
    #     json.dump(ref_flat, f, indent=2, ensure_ascii=False)
    # with open("tgt_flat.json", "w") as f:
    #     json.dump(tgt_flat, f, indent=2, ensure_ascii=False)
    # first diff the keys to inform the user of the missing keys
    missing_keys = set(ref_flat.keys()) - set(tgt_flat.keys())
    # print total number of missing keys, in red color, number as default
    if len(missing_keys) == 0:
        print("\033[92m All keys are present in the target locale \033[0m")
        exit()
    else:
        print(f"\033[91m Total missing keys: \033[0m {len(missing_keys)}")
    # formatted print line by line, wrap the missing key in red color, and the English translation in green color
    for key in missing_keys:
        # print(f"Missing key: {key} | English: {ref_flat[key]}")
        print(
            "\033[91m"
            + f"Missing key: {key}"
            + "\033[0m"
            + " | "
            + "\033[92m"
            + f"English: {ref_flat[key]}"
            + "\033[0m"
        )
    print("=============================================")
    print(f"\033[91m Total missing keys: \033[0m {len(missing_keys)}")
    # now compare the tgt_flat with ref_flat to find all missing keys and prompt to terminal for translation. Then save back to the tgt_flat
    # iterate over the missing key and their corresponding values in ref_flat, to get reference google translation using google_translate_to_chinese function
    # then present the reference translation to the user in the terminal
    # then present the user with a prompt to ask for translation
    for i, key in enumerate(missing_keys):
        print(
            f"============================================= {i + 1}/{len(missing_keys)}"
        )
        # print wrap the missing key in red color, and the English translation in green color
        print("\033[91m" + "Missing key: " + "\033[0m" + key)
        print("\033[92m" + f"{ref_codename}: " + "\033[0m" + ref_flat[key])
        # get reference translation from google translate, print in blue
        proposal_google = google_translate(ref_flat[key], ref_codename, tgt_codename)
        print("\033[94m" + f"Reference {tgt_codename} translation: " + "\033[0m" + proposal_google)
        # prompt user for translation, or enter to use the reference translation, in green color
        proposal = input("\033[92m" + "Enter translation: " + "\033[0m")
        if proposal == "":
            proposal = proposal_google
        # save the translation to the tgt_flat
        tgt_flat[key] = proposal
    # unflatten the ref_flat.json and tgt_flat.json back to the original format. save to another file
    ref_unflat = unflatten_json(ref_flat)
    tgt_unflat = unflatten_json(tgt_flat)
    # save the unflattened json to the disk, with original tgt file name with ".proposed" appended before .json
    # by getting the file name from from the tgt_locale path
    tgt_locale_name = tgt_locale.split("/")[-1].split(".")[0]
    with open(f"{tgt_locale_name}.proposed.json", "w") as f:
        json.dump(tgt_unflat, f, indent=2, ensure_ascii=False)
--- a/web/src/locales/zh-Hant.json
+++ b/web/src/locales/zh-Hant.json
@ -52,12 +52,20 @@
    "vacuum": "清理",
    "select": "選擇",
    "database": "資料庫",
-    "avatar": "頭像"
+    "avatar": "頭像",
    "upload": "上傳",
    "rename": "改名",
    "name": "姓名",
    "visibility": "能見度",
    "clear": "清除",
    "preview": "預覽"
  },
  "auth": {
    "signup-as-host": "註冊為管理員",
    "host-tip": "你正在註冊為管理員帳號。",
-    "not-host-tip": "如果你沒有帳號，請聯絡網站管理員。"
+    "not-host-tip": "如果你沒有帳號，請聯絡網站管理員。",
    "new-password": "新密碼",
    "repeat-new-password": "重複新密碼"
  },
  "resource": {
    "description": "查看在 Memo 中的靜態資源。例如：圖片",
@ -78,7 +86,24 @@
    "no-files-selected": "沒有文件被選中❗",
    "upload-successfully": "上傳成功",
    "file-drag-drop-prompt": "將您的文件拖放到此處以上傳文件",
-    "select": "選擇"
+    "select": "選擇",
    "create-dialog": {
      "upload-method": "上傳方式",
      "local-file": {
        "choose": "選擇一個文件...",
        "option": "本地文件"
      },
      "external-link": {
        "file-name-placeholder": "文件名",
        "option": "外部鏈接",
        "type-placeholder": "文件類型",
        "link": "鏈接",
        "type": "類型",
        "file-name": "文件名"
      },
      "title": "創建資源"
    },
    "search-bar-placeholder": "搜索資源"
  },
  "archived": {
    "archived-memos": "已封存的 Memo",
@ -104,7 +129,14 @@
      "protected": "登入使用者可見",
      "public": "所有人可見",
      "disabled": "公共memos已禁用"
-    }
+    },
    "fetching-data": "正在獲取數據...",
    "archived-memos": "歸檔備忘錄",
    "archived-at": "存檔於",
    "fetch-more": "點擊此處獲取更多",
    "embed": "嵌入",
    "no-archived-memos": "沒有存檔的備忘錄。",
    "search-placeholder": "搜索備忘錄"
  },
  "memo-list": {
    "fetching-data": "讀取資料中...",
@ -144,7 +176,10 @@
    "text-placeholder": "以 ^ 開頭使用正則表達式"
  },
  "tag-list": {
-    "tip-text": "輸入`#tag `來新增標籤"
+    "tip-text": "輸入`#tag `來新增標籤",
    "create-tag": "創建標籤",
    "tag-name": "標籤名",
    "all-tags": "所有標籤"
  },
  "search": {
    "quickly-filter": "快速過濾"
@ -170,7 +205,9 @@
      "mobile-editor-style": "手機版編輯器樣式",
      "default-memo-sort-option": "Memo 顯示時間",
      "created_ts": "建立時間",
-      "updated_ts": "更新時間"
+      "updated_ts": "更新時間",
      "daily-review-time-offset": "每日回顧時間偏移",
      "default-resource-visibility": "默認資源可見性"
    },
    "storage-section": {
      "storage-services-list": "存儲服務列表",
@ -252,7 +289,8 @@
    "succeed-update-additional-script": "更新附加腳本成功",
    "update-succeed": "更新成功",
    "page-not-found": "404 - 未找到網頁 😥",
-    "succeed-vacuum-database": "清理資料庫成功"
+    "succeed-vacuum-database": "清理資料庫成功",
    "resource-ready": "所有資源都準備好了"
  },
  "days": {
    "monday": "星期一",
@ -269,5 +307,23 @@
    "sat": "六",
    "sunday": "星期天",
    "sun": "日"
  },
  "router": {
    "back-to-home": "回到首頁"
  },
  "ask-ai": {
    "not-enabled": "您尚未設置 OpenAI API 密鑰。",
    "title": "問AI",
    "placeholder": "隨便問",
    "go-to-settings": "前往設置"
  },
  "embed-memo": {
    "only-public-supported": "* 僅公開備忘錄支持。",
    "title": "嵌入備忘錄",
    "copy": "複製",
    "text": "將以下代碼複製並粘貼到您的博客或網站中。"
  },
  "daily-review": {
    "title": "每日回顧"
  }
 }
--- a/web/src/locales/zh.json
+++ b/web/src/locales/zh.json
@ -52,12 +52,20 @@
    "vacuum": "清理",
    "select": "选择",
    "database": "数据库",
-    "avatar": "头像"
+    "avatar": "头像",
    "rename": "改名",
    "upload": "上传",
    "visibility": "能见度",
    "preview": "预览",
    "name": "姓名",
    "clear": "清除"
  },
  "auth": {
    "signup-as-host": "注册为 Host",
    "host-tip": "你正在注册为 Host 用户账号。",
-    "not-host-tip": "如果你没有账号，请联系站点 Host"
+    "not-host-tip": "如果你没有账号，请联系站点 Host",
    "new-password": "新密码",
    "repeat-new-password": "重复新密码"
  },
  "resource": {
    "description": "查看在 Memo 中的静态资源。例如：图片",
@ -78,7 +86,24 @@
    "no-files-selected": "没有文件被选中❗",
    "upload-successfully": "上传成功",
    "file-drag-drop-prompt": "将您的文件拖放到此处以上传文件",
-    "select": "选择"
+    "select": "选择",
    "create-dialog": {
      "external-link": {
        "type-placeholder": "文件类型",
        "link": "链接",
        "file-name": "文件名",
        "type": "类型",
        "file-name-placeholder": "文件名",
        "option": "外部链接"
      },
      "local-file": {
        "choose": "选择一个文件...",
        "option": "本地文件"
      },
      "upload-method": "上传方式",
      "title": "创建资源"
    },
    "search-bar-placeholder": "搜索资源"
  },
  "archived": {
    "archived-memos": "已归档的 Memo",
@ -104,7 +129,14 @@
      "protected": "登录用户可见",
      "public": "所有人可见",
      "disabled": "公共memos已禁用"
-    }
+    },
    "embed": "嵌入",
    "search-placeholder": "搜索备忘录",
    "archived-at": "存档于",
    "no-archived-memos": "没有存档的备忘录。",
    "fetching-data": "正在获取数据...",
    "archived-memos": "归档备忘录",
    "fetch-more": "点击此处获取更多"
  },
  "memo-list": {
    "fetching-data": "请求数据中...",
@ -173,7 +205,9 @@
      "mobile-editor-style": "移动端编辑器样式",
      "default-memo-sort-option": "Memo 显示时间",
      "created_ts": "创建时间",
-      "updated_ts": "更新时间"
+      "updated_ts": "更新时间",
      "daily-review-time-offset": "每日回顾时间偏移",
      "default-resource-visibility": "默认资源可见性"
    },
    "storage-section": {
      "storage-services-list": "存储服务列表",
@ -272,5 +306,23 @@
    "sat": "六",
    "sunday": "星期天",
    "sun": "日"
  },
  "embed-memo": {
    "title": "嵌入备忘录",
    "copy": "复制",
    "only-public-supported": "* 仅支持公开备忘录。",
    "text": "将以下代码复制并粘贴到您的博客或网站中。"
  },
  "ask-ai": {
    "placeholder": "随便问",
    "title": "问AI",
    "not-enabled": "您尚未设置 OpenAI API 密钥。",
    "go-to-settings": "前往设置"
  },
  "daily-review": {
    "title": "每日回顾"
  },
  "router": {
    "back-to-home": "回到首页"
  }
 }