feat: update Chinese translations in zh.json and zh-Hant.json using locale_updater.py (#1506)

update zh.json and zh-Hant.json using locale_updater.py
pull/1507/head
Peng Ding 2 years ago committed by GitHub
parent 66ed43cbcb
commit 0020498c10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,202 @@
# Author: Oaklight
# GitHub profile: https://github.com/Oaklight
# Date: April 9, 2023
# Description: This script is used to patch missing translations in a locale file.
# The script uses 'en.json' as the reference file to find missing keys in other locale files.
# It iterates through each field and their entries in 'en.json' and checks if the same field/entry exists in other files.
# If a field/entry is missing, the script prompts the source string, reference Google translation, and asks for confirmation or correction.
# The resulting file is saved as './*.proposed.json', and you should review it before merging and uploading.
# usage: locale_updater.py [-h] ref_locale tgt_locale
#TODO: add other NMT system for different preference and accuracy
import json
import requests
def flatten_json(nested_json, parent_key="", sep=":"):
flattened_dict = {}
for key, value in nested_json.items():
new_key = parent_key + sep + key if parent_key else key
if isinstance(value, dict):
flattened_dict.update(flatten_json(value, new_key, sep))
else:
flattened_dict[new_key] = value
return flattened_dict
def unflatten_json(flattened_dict, sep=":"):
nested_json = {}
for key, value in flattened_dict.items():
parts = key.split(sep)
current = nested_json
for part in parts[:-1]:
if part not in current:
current[part] = {}
current = current[part]
current[parts[-1]] = value
return nested_json
def sort_nested_json(nested_json):
if isinstance(nested_json, dict):
sorted_dict = {}
for key in sorted(nested_json.keys()):
sorted_dict[key] = sort_nested_json(nested_json[key])
return sorted_dict
elif isinstance(nested_json, list):
sorted_list = []
for item in nested_json:
sorted_list.append(sort_nested_json(item))
return sorted_list
else:
return nested_json
def google_translate(
source_text, source_language="en", target_language="zh-CN"
):
# Create post content
new_line = "\r\n"
post_content = "q=" + source_text.replace(new_line, " ")
# Send post request and get JSON response, using source_language and target_language
# url = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl=zh-CN&dt=t"
url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl={source_language}&tl={target_language}&dt=t"
headers = {"Content-type": "application/x-www-form-urlencoded"}
response = requests.post(url, headers=headers, data=post_content.encode("utf-8"))
json_value = response.json()
# Extract translations from JSON
translations = [item[0] for item in json_value[0]]
translations = [t.replace(new_line, "") for t in translations]
target_text = translations[0]
return target_text
def get_code_name(json_filename):
# Remove extension and split language and country codes
file_parts = json_filename.split(".")[0].split("_")
lang_code = file_parts[0]
country_code = file_parts[1] if len(file_parts) > 1 else ""
# Map language code to code name
lang_map = {
"de": "de",
"en": "en",
"es": "es",
"fr": "fr",
"it": "it",
"ko": "ko",
"nl": "nl",
"pl": "pl",
"pt": "pt-BR",
"ru": "ru",
"sl": "sl",
"sv": "sv",
"tr": "tr",
"uk": "uk",
"vi": "vi",
"zh-Hant": "zh-TW",
"zh": "zh-CN",
}
code_name = lang_map.get(lang_code, "")
# Add country code if available
if country_code:
code_name += "-" + country_code.upper()
return code_name
if __name__ == "__main__":
# ref_locale = "./en.json"
# tgt_locale = "./zh.json"
# receive the reference locale and target locale from the command line using argparse
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("ref_locale", help="reference locale file")
parser.add_argument("tgt_locale", help="target locale file")
args = parser.parse_args()
ref_locale = args.ref_locale
tgt_locale = args.tgt_locale
ref_codename = get_code_name(ref_locale)
tgt_codename = get_code_name(tgt_locale)
with open(ref_locale, "r") as f:
ref = json.load(f)
with open(tgt_locale, "r") as f:
tgt = json.load(f)
# using the flatten_json function, produce a temp json for each locale and save to the disk
ref_flat = flatten_json(ref)
tgt_flat = flatten_json(tgt)
# # save the flattened json to the disk
# with open("ref_flat.json", "w") as f:
# json.dump(ref_flat, f, indent=2, ensure_ascii=False)
# with open("tgt_flat.json", "w") as f:
# json.dump(tgt_flat, f, indent=2, ensure_ascii=False)
# first diff the keys to inform the user of the missing keys
missing_keys = set(ref_flat.keys()) - set(tgt_flat.keys())
# print total number of missing keys, in red color, number as default
if len(missing_keys) == 0:
print("\033[92m All keys are present in the target locale \033[0m")
exit()
else:
print(f"\033[91m Total missing keys: \033[0m {len(missing_keys)}")
# formatted print line by line, wrap the missing key in red color, and the English translation in green color
for key in missing_keys:
# print(f"Missing key: {key} | English: {ref_flat[key]}")
print(
"\033[91m"
+ f"Missing key: {key}"
+ "\033[0m"
+ " | "
+ "\033[92m"
+ f"English: {ref_flat[key]}"
+ "\033[0m"
)
print("=============================================")
print(f"\033[91m Total missing keys: \033[0m {len(missing_keys)}")
# now compare the tgt_flat with ref_flat to find all missing keys and prompt to terminal for translation. Then save back to the tgt_flat
# iterate over the missing key and their corresponding values in ref_flat, to get reference google translation using google_translate_to_chinese function
# then present the reference translation to the user in the terminal
# then present the user with a prompt to ask for translation
for i, key in enumerate(missing_keys):
print(
f"============================================= {i + 1}/{len(missing_keys)}"
)
# print wrap the missing key in red color, and the English translation in green color
print("\033[91m" + "Missing key: " + "\033[0m" + key)
print("\033[92m" + f"{ref_codename}: " + "\033[0m" + ref_flat[key])
# get reference translation from google translate, print in blue
proposal_google = google_translate(ref_flat[key], ref_codename, tgt_codename)
print("\033[94m" + f"Reference {tgt_codename} translation: " + "\033[0m" + proposal_google)
# prompt user for translation, or enter to use the reference translation, in green color
proposal = input("\033[92m" + "Enter translation: " + "\033[0m")
if proposal == "":
proposal = proposal_google
# save the translation to the tgt_flat
tgt_flat[key] = proposal
# unflatten the ref_flat.json and tgt_flat.json back to the original format. save to another file
ref_unflat = unflatten_json(ref_flat)
tgt_unflat = unflatten_json(tgt_flat)
# save the unflattened json to the disk, with original tgt file name with ".proposed" appended before .json
# by getting the file name from from the tgt_locale path
tgt_locale_name = tgt_locale.split("/")[-1].split(".")[0]
with open(f"{tgt_locale_name}.proposed.json", "w") as f:
json.dump(tgt_unflat, f, indent=2, ensure_ascii=False)

@ -52,12 +52,20 @@
"vacuum": "清理",
"select": "選擇",
"database": "資料庫",
"avatar": "頭像"
"avatar": "頭像",
"upload": "上傳",
"rename": "改名",
"name": "姓名",
"visibility": "能見度",
"clear": "清除",
"preview": "預覽"
},
"auth": {
"signup-as-host": "註冊為管理員",
"host-tip": "你正在註冊為管理員帳號。",
"not-host-tip": "如果你沒有帳號,請聯絡網站管理員。"
"not-host-tip": "如果你沒有帳號,請聯絡網站管理員。",
"new-password": "新密碼",
"repeat-new-password": "重複新密碼"
},
"resource": {
"description": "查看在 Memo 中的靜態資源。例如:圖片",
@ -78,7 +86,24 @@
"no-files-selected": "沒有文件被選中❗",
"upload-successfully": "上傳成功",
"file-drag-drop-prompt": "將您的文件拖放到此處以上傳文件",
"select": "選擇"
"select": "選擇",
"create-dialog": {
"upload-method": "上傳方式",
"local-file": {
"choose": "選擇一個文件...",
"option": "本地文件"
},
"external-link": {
"file-name-placeholder": "文件名",
"option": "外部鏈接",
"type-placeholder": "文件類型",
"link": "鏈接",
"type": "類型",
"file-name": "文件名"
},
"title": "創建資源"
},
"search-bar-placeholder": "搜索資源"
},
"archived": {
"archived-memos": "已封存的 Memo",
@ -104,7 +129,14 @@
"protected": "登入使用者可見",
"public": "所有人可見",
"disabled": "公共memos已禁用"
}
},
"fetching-data": "正在獲取數據...",
"archived-memos": "歸檔備忘錄",
"archived-at": "存檔於",
"fetch-more": "點擊此處獲取更多",
"embed": "嵌入",
"no-archived-memos": "沒有存檔的備忘錄。",
"search-placeholder": "搜索備忘錄"
},
"memo-list": {
"fetching-data": "讀取資料中...",
@ -144,7 +176,10 @@
"text-placeholder": "以 ^ 開頭使用正則表達式"
},
"tag-list": {
"tip-text": "輸入`#tag `來新增標籤"
"tip-text": "輸入`#tag `來新增標籤",
"create-tag": "創建標籤",
"tag-name": "標籤名",
"all-tags": "所有標籤"
},
"search": {
"quickly-filter": "快速過濾"
@ -170,7 +205,9 @@
"mobile-editor-style": "手機版編輯器樣式",
"default-memo-sort-option": "Memo 顯示時間",
"created_ts": "建立時間",
"updated_ts": "更新時間"
"updated_ts": "更新時間",
"daily-review-time-offset": "每日回顧時間偏移",
"default-resource-visibility": "默認資源可見性"
},
"storage-section": {
"storage-services-list": "存儲服務列表",
@ -252,7 +289,8 @@
"succeed-update-additional-script": "更新附加腳本成功",
"update-succeed": "更新成功",
"page-not-found": "404 - 未找到網頁 😥",
"succeed-vacuum-database": "清理資料庫成功"
"succeed-vacuum-database": "清理資料庫成功",
"resource-ready": "所有資源都準備好了"
},
"days": {
"monday": "星期一",
@ -269,5 +307,23 @@
"sat": "六",
"sunday": "星期天",
"sun": "日"
},
"router": {
"back-to-home": "回到首頁"
},
"ask-ai": {
"not-enabled": "您尚未設置 OpenAI API 密鑰。",
"title": "問AI",
"placeholder": "隨便問",
"go-to-settings": "前往設置"
},
"embed-memo": {
"only-public-supported": "* 僅公開備忘錄支持。",
"title": "嵌入備忘錄",
"copy": "複製",
"text": "將以下代碼複製並粘貼到您的博客或網站中。"
},
"daily-review": {
"title": "每日回顧"
}
}
}

@ -52,12 +52,20 @@
"vacuum": "清理",
"select": "选择",
"database": "数据库",
"avatar": "头像"
"avatar": "头像",
"rename": "改名",
"upload": "上传",
"visibility": "能见度",
"preview": "预览",
"name": "姓名",
"clear": "清除"
},
"auth": {
"signup-as-host": "注册为 Host",
"host-tip": "你正在注册为 Host 用户账号。",
"not-host-tip": "如果你没有账号,请联系站点 Host"
"not-host-tip": "如果你没有账号,请联系站点 Host",
"new-password": "新密码",
"repeat-new-password": "重复新密码"
},
"resource": {
"description": "查看在 Memo 中的静态资源。例如:图片",
@ -78,7 +86,24 @@
"no-files-selected": "没有文件被选中❗",
"upload-successfully": "上传成功",
"file-drag-drop-prompt": "将您的文件拖放到此处以上传文件",
"select": "选择"
"select": "选择",
"create-dialog": {
"external-link": {
"type-placeholder": "文件类型",
"link": "链接",
"file-name": "文件名",
"type": "类型",
"file-name-placeholder": "文件名",
"option": "外部链接"
},
"local-file": {
"choose": "选择一个文件...",
"option": "本地文件"
},
"upload-method": "上传方式",
"title": "创建资源"
},
"search-bar-placeholder": "搜索资源"
},
"archived": {
"archived-memos": "已归档的 Memo",
@ -104,7 +129,14 @@
"protected": "登录用户可见",
"public": "所有人可见",
"disabled": "公共memos已禁用"
}
},
"embed": "嵌入",
"search-placeholder": "搜索备忘录",
"archived-at": "存档于",
"no-archived-memos": "没有存档的备忘录。",
"fetching-data": "正在获取数据...",
"archived-memos": "归档备忘录",
"fetch-more": "点击此处获取更多"
},
"memo-list": {
"fetching-data": "请求数据中...",
@ -173,7 +205,9 @@
"mobile-editor-style": "移动端编辑器样式",
"default-memo-sort-option": "Memo 显示时间",
"created_ts": "创建时间",
"updated_ts": "更新时间"
"updated_ts": "更新时间",
"daily-review-time-offset": "每日回顾时间偏移",
"default-resource-visibility": "默认资源可见性"
},
"storage-section": {
"storage-services-list": "存储服务列表",
@ -272,5 +306,23 @@
"sat": "六",
"sunday": "星期天",
"sun": "日"
},
"embed-memo": {
"title": "嵌入备忘录",
"copy": "复制",
"only-public-supported": "* 仅支持公开备忘录。",
"text": "将以下代码复制并粘贴到您的博客或网站中。"
},
"ask-ai": {
"placeholder": "随便问",
"title": "问AI",
"not-enabled": "您尚未设置 OpenAI API 密钥。",
"go-to-settings": "前往设置"
},
"daily-review": {
"title": "每日回顾"
},
"router": {
"back-to-home": "回到首页"
}
}
}
Loading…
Cancel
Save