Copies for all (#1780)
* initial script * copies for all! * revert intl files * mistaken en to vi translation * improve translation * add vi translation, fix trnaalste script to respect existing metdata * revert translation files * fix translation to only add more without changing too much existing translations --------- Co-authored-by: ggurdin <46800240+ggurdin@users.noreply.github.com>pull/1688/head
							parent
							
								
									9a94045e5d
								
							
						
					
					
						commit
						76f6222e95
					
				
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								@ -0,0 +1,304 @@
 | 
			
		||||
"""
 | 
			
		||||
Prerequiresite:
 | 
			
		||||
- Ensure you have an up-to-date `needed-translations.txt` file should you wish to translate only the missing translation keys. To generate an updated `needed-translations.txt` file, run `flutter gen-l10n`
 | 
			
		||||
- Ensure you have python `openai` package installed. If not, run `pip install openai`.
 | 
			
		||||
- Ensure you have an OpenAI API key set in your environment variable `OPENAI_API_KEY`. If not, you can set it by running `export OPENAI_API_KEY=your-api-key` on MacOS/Linux.
 | 
			
		||||
 | 
			
		||||
Usage:
 | 
			
		||||
python scripts/translate.py
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def load_needed_translations() -> dict[str, list[str]]:
 | 
			
		||||
    import json
 | 
			
		||||
    from pathlib import Path
 | 
			
		||||
 | 
			
		||||
    path_to_needed_translations = (
 | 
			
		||||
        Path(__file__).parent.parent / "needed-translations.txt"
 | 
			
		||||
    )
 | 
			
		||||
    if not path_to_needed_translations.exists():
 | 
			
		||||
        raise FileNotFoundError(
 | 
			
		||||
            f"File not found: {path_to_needed_translations}. Please run `flutter gen-l10n` to generate the file."
 | 
			
		||||
        )
 | 
			
		||||
    with open(path_to_needed_translations) as f:
 | 
			
		||||
        needed_translations = json.loads(f.read())
 | 
			
		||||
 | 
			
		||||
    return needed_translations
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def load_translations(lang_code: str) -> dict[str, str]:
 | 
			
		||||
    import json
 | 
			
		||||
    from pathlib import Path
 | 
			
		||||
 | 
			
		||||
    path_to_translations = (
 | 
			
		||||
        Path(__file__).parent.parent / "assets" / "l10n" / f"intl_{lang_code}.arb"
 | 
			
		||||
    )
 | 
			
		||||
    if not path_to_translations.exists():
 | 
			
		||||
        raise FileNotFoundError(
 | 
			
		||||
            f"File not found: {path_to_translations}. Please run `flutter gen-l10n` to generate the file."
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    with open(path_to_translations) as f:
 | 
			
		||||
        translations = json.loads(f.read())
 | 
			
		||||
 | 
			
		||||
    return translations
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def save_translations(lang_code: str, translations: dict[str, str]) -> None:
 | 
			
		||||
    import json
 | 
			
		||||
    from pathlib import Path
 | 
			
		||||
    from datetime import datetime
 | 
			
		||||
    from collections import OrderedDict
 | 
			
		||||
 | 
			
		||||
    path_to_translations = (
 | 
			
		||||
        Path(__file__).parent.parent / "assets" / "l10n" / f"intl_{lang_code}.arb"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    translations["@@locale"] = lang_code
 | 
			
		||||
    translations["@@last_modified"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
 | 
			
		||||
 | 
			
		||||
    # Load existing data to preserve order.
 | 
			
		||||
    if path_to_translations.exists():
 | 
			
		||||
        with open(path_to_translations, "r") as f:
 | 
			
		||||
            try:
 | 
			
		||||
                existing_data = json.load(f, object_pairs_hook=OrderedDict)
 | 
			
		||||
            except json.JSONDecodeError:
 | 
			
		||||
                existing_data = OrderedDict()
 | 
			
		||||
    else:
 | 
			
		||||
        existing_data = OrderedDict()
 | 
			
		||||
 | 
			
		||||
    # Build final_ordered ensuring the metadata immediately follows its translation.
 | 
			
		||||
    final_ordered = OrderedDict()
 | 
			
		||||
    special_keys = ["@@locale", "@@last_modified"]
 | 
			
		||||
    final_ordered["@@locale"] = translations["@@locale"]
 | 
			
		||||
    final_ordered["@@last_modified"] = translations["@@last_modified"]
 | 
			
		||||
 | 
			
		||||
    keys_added = set()
 | 
			
		||||
 | 
			
		||||
    # Use preserved order from existing file.
 | 
			
		||||
    for key in existing_data:
 | 
			
		||||
        if key in special_keys:
 | 
			
		||||
            continue
 | 
			
		||||
        if key in translations:
 | 
			
		||||
            final_ordered[key] = translations[key]
 | 
			
		||||
            keys_added.add(key)
 | 
			
		||||
            meta_key = f"@{key}"
 | 
			
		||||
            if meta_key in translations:
 | 
			
		||||
                final_ordered[meta_key] = translations[meta_key]
 | 
			
		||||
                keys_added.add(meta_key)
 | 
			
		||||
 | 
			
		||||
    # Append new translation keys (and their metadata immediately after) not in existing data.
 | 
			
		||||
    for key in translations:
 | 
			
		||||
        if key in special_keys or key.startswith("@") or key in keys_added:
 | 
			
		||||
            continue
 | 
			
		||||
        final_ordered[key] = translations[key]
 | 
			
		||||
        meta_key = f"@{key}"
 | 
			
		||||
        if meta_key in translations:
 | 
			
		||||
            final_ordered[meta_key] = translations[meta_key]
 | 
			
		||||
            keys_added.add(meta_key)
 | 
			
		||||
        keys_added.add(key)
 | 
			
		||||
 | 
			
		||||
    # Append any leftover metadata keys.
 | 
			
		||||
    for key in translations:
 | 
			
		||||
        if key.startswith("@") and key not in keys_added:
 | 
			
		||||
            final_ordered[key] = translations[key]
 | 
			
		||||
 | 
			
		||||
    with open(path_to_translations, "w") as f:
 | 
			
		||||
        f.write(json.dumps(final_ordered, indent=2, ensure_ascii=False))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def reconcile_metadata(lang_code: str, translation_keys: list[str]) -> None:
 | 
			
		||||
    """
 | 
			
		||||
    For each translation key, update its metadata (the key prefixed with '@') by merging
 | 
			
		||||
    any existing metadata with computed metadata. For basic translations, if no metadata exists,
 | 
			
		||||
    add it; otherwise, leave it as is.
 | 
			
		||||
    """
 | 
			
		||||
    translations = load_translations(lang_code)
 | 
			
		||||
 | 
			
		||||
    for key in translation_keys:
 | 
			
		||||
        translation = translations[key]
 | 
			
		||||
        meta_key = f"@{key}"
 | 
			
		||||
        existing_meta = translations.get(meta_key, {})
 | 
			
		||||
        assert isinstance(translation, str)
 | 
			
		||||
 | 
			
		||||
        # Case 1: Basic translations, no placeholders.
 | 
			
		||||
        if "{" not in translation:
 | 
			
		||||
            if not existing_meta:
 | 
			
		||||
                translations[meta_key] = {"type": "text", "placeholders": {}}
 | 
			
		||||
            # if metadata exists, leave it as is.
 | 
			
		||||
 | 
			
		||||
        # Case 2: Translations with placeholders (no pluralization).
 | 
			
		||||
        elif (
 | 
			
		||||
            "{" in translation
 | 
			
		||||
            and "plural," not in translation
 | 
			
		||||
            and "other{" not in translation
 | 
			
		||||
        ):
 | 
			
		||||
            # Compute placeholders.
 | 
			
		||||
            computed_placeholders = {}
 | 
			
		||||
            for placeholder in translation.split("{")[1:]:
 | 
			
		||||
                placeholder_name = placeholder.split("}")[0]
 | 
			
		||||
                computed_placeholders[placeholder_name] = {"type": "String"}
 | 
			
		||||
            if existing_meta:
 | 
			
		||||
                # Merge computed placeholders into existing metadata.
 | 
			
		||||
                existing_meta.setdefault("type", "text")
 | 
			
		||||
                existing_meta["placeholders"] = computed_placeholders
 | 
			
		||||
                translations[meta_key] = existing_meta
 | 
			
		||||
            else:
 | 
			
		||||
                translations[meta_key] = {
 | 
			
		||||
                    "type": "text",
 | 
			
		||||
                    "placeholders": computed_placeholders,
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
        # Case 3: Translations with pluralization.
 | 
			
		||||
        elif (
 | 
			
		||||
            "{" in translation and "plural," in translation and "other{" in translation
 | 
			
		||||
        ):
 | 
			
		||||
            # Extract placeholders appearing before the plural part.
 | 
			
		||||
            prefix = translation.split("plural,")[0].split("{")[1]
 | 
			
		||||
            placeholders_list = [
 | 
			
		||||
                p.strip() for p in prefix.split(",") if p.strip() != ""
 | 
			
		||||
            ]
 | 
			
		||||
            computed_placeholders = {ph: {} for ph in placeholders_list}
 | 
			
		||||
            if existing_meta:
 | 
			
		||||
                existing_meta.setdefault("type", "text")
 | 
			
		||||
                existing_meta["placeholders"] = computed_placeholders
 | 
			
		||||
                translations[meta_key] = existing_meta
 | 
			
		||||
            else:
 | 
			
		||||
                translations[meta_key] = {
 | 
			
		||||
                    "type": "text",
 | 
			
		||||
                    "placeholders": computed_placeholders,
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
    save_translations(lang_code, translations)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def translate(lang_code: str, lang_display_name: str) -> None:
 | 
			
		||||
    """
 | 
			
		||||
    Translate the needed translations from English to the target language.
 | 
			
		||||
    """
 | 
			
		||||
    import json
 | 
			
		||||
    import random
 | 
			
		||||
    from openai import OpenAI
 | 
			
		||||
 | 
			
		||||
    needed_translations = load_needed_translations()
 | 
			
		||||
    needed_translations = needed_translations.get(lang_code, [])
 | 
			
		||||
    english_translations_dict = load_translations("en")
 | 
			
		||||
    vietnamese_translations_dict = load_translations("vi")
 | 
			
		||||
 | 
			
		||||
    # there are 3 types of translation keys: basic, with placeholders, with pluralization. Read more: TRANSLATORS_GUIDE.md
 | 
			
		||||
 | 
			
		||||
    basic_translation_keys = [
 | 
			
		||||
        k
 | 
			
		||||
        for k in english_translations_dict.keys()
 | 
			
		||||
        if not k.startswith("@") and not english_translations_dict[k].startswith("{")
 | 
			
		||||
    ]
 | 
			
		||||
    example_basic_translation_keys = (
 | 
			
		||||
        random.sample(basic_translation_keys, 2)
 | 
			
		||||
        if len(basic_translation_keys) > 2
 | 
			
		||||
        else basic_translation_keys
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    placeholder_translation_keys = [
 | 
			
		||||
        k
 | 
			
		||||
        for k in english_translations_dict.keys()
 | 
			
		||||
        if not k.startswith("@")
 | 
			
		||||
        and "{" in english_translations_dict[k]
 | 
			
		||||
        and "plural," not in english_translations_dict[k]
 | 
			
		||||
        and "other{" not in english_translations_dict[k]
 | 
			
		||||
    ]
 | 
			
		||||
    example_placeholder_translation_keys = (
 | 
			
		||||
        random.sample(placeholder_translation_keys, 2)
 | 
			
		||||
        if len(placeholder_translation_keys) > 2
 | 
			
		||||
        else placeholder_translation_keys
 | 
			
		||||
    )
 | 
			
		||||
    plural_translation_keys = [
 | 
			
		||||
        k
 | 
			
		||||
        for k in english_translations_dict.keys()
 | 
			
		||||
        if not k.startswith("@")
 | 
			
		||||
        and "{" in english_translations_dict[k]
 | 
			
		||||
        and "plural," in english_translations_dict[k]
 | 
			
		||||
        and "other{" in english_translations_dict[k]
 | 
			
		||||
    ]
 | 
			
		||||
    example_plural_translation_keys = (
 | 
			
		||||
        random.sample(plural_translation_keys, 2)
 | 
			
		||||
        if len(plural_translation_keys) > 2
 | 
			
		||||
        else plural_translation_keys
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # build example translations
 | 
			
		||||
    example_english_translations = {}
 | 
			
		||||
    for key in example_basic_translation_keys:
 | 
			
		||||
        example_english_translations[key] = english_translations_dict[key]
 | 
			
		||||
    for key in example_placeholder_translation_keys:
 | 
			
		||||
        example_english_translations[key] = english_translations_dict[key]
 | 
			
		||||
    for key in example_plural_translation_keys:
 | 
			
		||||
        example_english_translations[key] = english_translations_dict[key]
 | 
			
		||||
 | 
			
		||||
    example_vietnamese_translations = {}
 | 
			
		||||
    for key in example_basic_translation_keys:
 | 
			
		||||
        example_vietnamese_translations[key] = vietnamese_translations_dict[key]
 | 
			
		||||
    for key in example_placeholder_translation_keys:
 | 
			
		||||
        example_vietnamese_translations[key] = vietnamese_translations_dict[key]
 | 
			
		||||
    for key in example_plural_translation_keys:
 | 
			
		||||
        example_vietnamese_translations[key] = vietnamese_translations_dict[key]
 | 
			
		||||
 | 
			
		||||
    new_translations = {}
 | 
			
		||||
    progress = 0
 | 
			
		||||
    for i in range(0, len(needed_translations), 20):
 | 
			
		||||
        chunk = needed_translations[i : i + 20]
 | 
			
		||||
        translation_requests = {}
 | 
			
		||||
        for key in chunk:
 | 
			
		||||
            translation_requests[key] = english_translations_dict[key]
 | 
			
		||||
 | 
			
		||||
        prompt = f"""
 | 
			
		||||
        Please translate the following text from English to {lang_display_name}.
 | 
			
		||||
        Example:
 | 
			
		||||
        req: {json.dumps(example_english_translations, indent=2)}
 | 
			
		||||
        res: {json.dumps(example_vietnamese_translations, indent=2)}
 | 
			
		||||
        ========================
 | 
			
		||||
        req: {json.dumps(translation_requests, indent=2)}
 | 
			
		||||
        res:
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        client = OpenAI()
 | 
			
		||||
        chat_completion = client.chat.completions.create(
 | 
			
		||||
            messages=[
 | 
			
		||||
                {
 | 
			
		||||
                    "role": "system",
 | 
			
		||||
                    "content": "You are a translator that will only response to translation requests in json format without any additional information.",
 | 
			
		||||
                },
 | 
			
		||||
                {
 | 
			
		||||
                    "role": "user",
 | 
			
		||||
                    "content": prompt,
 | 
			
		||||
                },
 | 
			
		||||
            ],
 | 
			
		||||
            model="gpt-4o-mini",
 | 
			
		||||
            temperature=0.0,
 | 
			
		||||
        )
 | 
			
		||||
        response = chat_completion.choices[0].message.content
 | 
			
		||||
        _new_translations = json.loads(response)
 | 
			
		||||
        new_translations.update(_new_translations)
 | 
			
		||||
        print(f"Translated {progress + len(chunk)}/{len(needed_translations)}")
 | 
			
		||||
        progress += len(chunk)
 | 
			
		||||
 | 
			
		||||
    # save translations
 | 
			
		||||
    current_translations = load_translations(lang_code)
 | 
			
		||||
    current_translations.update(new_translations)
 | 
			
		||||
    save_translations(lang_code, current_translations)
 | 
			
		||||
 | 
			
		||||
    # reconcile metadata
 | 
			
		||||
    reconcile_metadata(lang_code, needed_translations)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
"""Example usage:
 | 
			
		||||
python scripts/translate.py
 | 
			
		||||
"""
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    lang_code = input("Enter the language code (e.g. vi, en): ").strip()
 | 
			
		||||
    lang_display_name = input(
 | 
			
		||||
        "Enter the language display name (e.g. Vietnamese, English): "
 | 
			
		||||
    )
 | 
			
		||||
    translate(
 | 
			
		||||
        lang_code=lang_code,
 | 
			
		||||
        lang_display_name=lang_display_name,
 | 
			
		||||
    )
 | 
			
		||||
					Loading…
					
					
				
		Reference in New Issue