Remove lang_update script

2026-02-04 15:06:25 +00:00 · 2023-06-09 14:48:36 -05:00 · 2023-06-09 14:48:36 -05:00 · ca4b316e2f
commit ca4b316e2f
parent 85ab04a29d
2 changed files with 0 additions and 291 deletions
--- a/lang_update/README.md
+++ b/lang_update/README.md
@ -1,39 +0,0 @@
 # Language Names Updater
 This script tries to pull language names from the sources:
 - Unicode CLDR
 - Google Translate
 ## How to use
 ```bash
 git clone https://github.com/dialect-app/po
 cd po/lang_update
 python lang_update.py
 ```
 `lang_update.py` should be run with `po/lang_update` as the working directory and will not work as expected otherwise.
 `-g` or `--google` can be passed to force the usage of Google Translate as the source for language names.
 You can also pass a language code to only update one language:
 ```bash
 python lang_update.py "ca"
 ```
 ## How to contribute
 If you would like to work on language names, please contribute to [Unicode CLDR](https://cldr.unicode.org/).
 If you decide that the language names from Unicode CLDR are not good enough and feel like you could do a better job, you can open an issue at [dialect-app/po](https://github.com/dialect-app/po/issues) and continue updating your translation as per usual. You could also instead add your language code to the `EXCLUDE_LIST` in the `lang_update.py` script and send a PR.
 The `lang_update.py` script has a few things you could help with as well:
 - The `EXCLUDE_LIST` list could be expanded or shortened depending on the accuracy of Unicode CLDR project's language names for a particular language. You could do this by checking `cldr-json`. For example: [French Unicode CLDR languages.json](https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-localenames-full/main/fr/languages.json) . The link format is:
  ```
  https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-localenames-full/main/{language_code_here}/languages.json
  ```
 - If language names should be capitalized in your language, add the language code to `CAPS_LIST`.
 - If your language is named differently in the Unicode CLDR project, add a mapping in `CLDR_NAMES`.
--- a/lang_update/lang_update.py
+++ b/lang_update/lang_update.py
@ -1,252 +0,0 @@
 import argparse
 import json
 import os
 import re
 import requests
 import subprocess
 from bs4 import BeautifulSoup
 LANGUAGES = {
    "af": "Afrikaans",
    "sq": "Albanian",
    "am": "Amharic",
    "ar": "Arabic",
    "hy": "Armenian",
    "az": "Azerbaijani",
    "eu": "Basque",
    "be": "Belarusian",
    "bn": "Bengali",
    "bs": "Bosnian",
    "bg": "Bulgarian",
    "ca": "Catalan",
    "ceb": "Cebuano",
    "ny": "Chichewa",
    "zh": "Chinese",
    "zh-Hans": "Chinese (Simplified)",
    "zh-Hant": "Chinese (Traditional)",
    "co": "Corsican",
    "hr": "Croatian",
    "cs": "Czech",
    "da": "Danish",
    "nl": "Dutch",
    "en": "English",
    "eo": "Esperanto",
    "et": "Estonian",
    "tl": "Filipino",
    "fi": "Finnish",
    "fr": "French",
    "fy": "Frisian",
    "gl": "Galician",
    "ka": "Georgian",
    "de": "German",
    "el": "Greek",
    "gu": "Gujarati",
    "ht": "Haitian Creole",
    "ha": "Hausa",
    "haw": "Hawaiian",
    "iw": "Hebrew",
    "he": "Hebrew",
    "hi": "Hindi",
    "hmn": "Hmong",
    "hu": "Hungarian",
    "is": "Icelandic",
    "ig": "Igbo",
    "id": "Indonesian",
    "ga": "Irish",
    "it": "Italian",
    "ja": "Japanese",
    "jw": "Javanese",
    "kn": "Kannada",
    "kk": "Kazakh",
    "km": "Khmer",
    "rw": "Kinyarwanda",
    "ko": "Korean",
    "ku": "Kurdish (Kurmanji)",
    "ky": "Kyrgyz",
    "lo": "Lao",
    "la": "Latin",
    "lv": "Latvian",
    "lt": "Lithuanian",
    "lb": "Luxembourgish",
    "mk": "Macedonian",
    "mg": "Malagasy",
    "ms": "Malay",
    "ml": "Malayalam",
    "mt": "Maltese",
    "mi": "Maori",
    "mr": "Marathi",
    "mn": "Mongolian",
    "my": "Myanmar (Burmese)",
    "ne": "Nepali",
    "no": "Norwegian",
    "or": "Odia (Oriya)",
    "ps": "Pashto",
    "fa": "Persian",
    "pl": "Polish",
    "pt": "Portuguese",
    "pa": "Punjabi",
    "ro": "Romanian",
    "ru": "Russian",
    "sm": "Samoan",
    "gd": "Scots Gaelic",
    "sr": "Serbian",
    "st": "Sesotho",
    "sn": "Shona",
    "sd": "Sindhi",
    "si": "Sinhala",
    "sk": "Slovak",
    "sl": "Slovenian",
    "so": "Somali",
    "es": "Spanish",
    "su": "Sundanese",
    "sw": "Swahili",
    "sv": "Swedish",
    "tg": "Tajik",
    "ta": "Tamil",
    "tt": "Tatar",
    "te": "Telugu",
    "th": "Thai",
    "tr": "Turkish",
    "tk": "Turkmen",
    "uk": "Ukrainian",
    "ur": "Urdu",
    "ug": "Uyghur",
    "uz": "Uzbek",
    "vi": "Vietnamese",
    "cy": "Welsh",
    "xh": "Xhosa",
    "yi": "Yiddish",
    "yo": "Yoruba",
    "zu": "Zulu",
 }
 CLDR_NAMES = {
    "kmr": "ku",  # They seem to be the same since Kurmanji (ku) is Northern Kurdish (kmr).
    "zh_CN": "zh-Hans",
    "zh_TW": "zh-Hant",
    "zh-CN": "zh-Hans",
    "zh-TW": "zh-Hant",
 }
 # Add any language to this list to exclude it from the automated process.
 EXCLUDE_LIST = [
    # No decent source
    "oc",  # Occitan
    # Was manually updated by the translator
    "eo",  # Esperanto
    "fr",  # French
    "fy",  # Frisian
    "ja",  # Japanese
    "lv",  # Latvian
    "uk",  # Ukranian
    "zh_CN",  # Chinese
 ]
 # Even if a language is in this list, you can pass it as a parameter to the program.
 # If any language is on this list and it shouldn't be, please create an issue:
 # https://www.github.com/dialect-app/po
 # You can also open an issue if any language should be added.
 # All languages that need "capitalization"
 CAPS_LIST = [
    "it",  # Italian
 ]
 parser = argparse.ArgumentParser()
 parser.add_argument(
    "language", nargs="?", help="the language code for language to update"
 )
 parser.add_argument(
    "-g", "--google", help="force use google for language names", action="store_true"
 )
 args = parser.parse_args()
 if not os.path.isdir("cldr-json"):
    print("Cloning Unicode CLDR repository...")
    subprocess.call(["git", "clone", "https://github.com/unicode-org/cldr-json"])
 def process_language(lang, arged=False):
    lang = lang.strip()
    if lang and (arged is True or lang not in EXCLUDE_LIST):
        cldr_present = True  # Assume CLDR file is present.
        cldr_lang = CLDR_NAMES[lang] if lang in CLDR_NAMES else lang.replace("_", "-")
        g_lang = lang.split("_")[0]
        print(f"Reading {lang}.po ...")
        lang_file = open(f"../{lang}.po", "r")
        lang_file_contents = lang_file.read()
        lang_file.close()
        try:
            print("Looking for required CLDR file...")
            cldr_file = open(
                f"cldr-json/cldr-json/cldr-localenames-full/main/{cldr_lang}/languages.json",
                "r",
            )
            cldr_json = json.load(cldr_file)
        except FileNotFoundError:
            print(f"No CLDR file found for language: {cldr_lang}.")
            try:
                cldr_lang = g_lang
                cldr_file = open(
                    f"cldr-json/cldr-json/cldr-localenames-full/main/{cldr_lang}/languages.json",
                    "r",
                )
                cldr_json = json.load(cldr_file)
                print(f"Using file for {cldr_lang} instead.")
            except FileNotFoundError:
                print("Could not find possible substitutes.")
                cldr_present = False  # Correct earlier assumption.
        if cldr_present and not args.google:
            cldr_langs = cldr_json["main"][cldr_lang]["localeDisplayNames"]["languages"]
            for lang_code, lang_name in cldr_langs.items():
                if lang_code not in LANGUAGES:
                    continue
                if cldr_lang in CAPS_LIST:
                    lang_name = lang_name.capitalize()
                lang_file_contents = re.sub(
                    rf'msgid "{re.escape(LANGUAGES[lang_code])}"\nmsgstr ".*"\n',
                    rf'msgid "{LANGUAGES[lang_code]}"\nmsgstr "{lang_name}"\n',
                    lang_file_contents,
                )
        else:
            print("Fetching localized names from Google Translate...")
            page = requests.get("https://translate.google.com/?hl=" + g_lang)
            soup = BeautifulSoup(page.text, "html5lib")
            print("Generating updated string with localized names...")
            for div in soup.find_all("div"):
                if div.attrs.get("class", None) == ["qSb8Pe"]:
                    lang_code = div.attrs["data-language-code"]
                    lang_name = div.find(attrs={"class": "Llmcnf"}).string
                    lang_file_contents = re.sub(
                        rf'msgid "{re.escape(LANGUAGES[lang_code])}"\nmsgstr ".*"\n',
                        rf'msgid "{LANGUAGES[lang_code]}"\nmsgstr "{lang_name}"\n',
                        lang_file_contents,
                    )
        print(f"Saving {lang}.po ...")
        lang_file = open(f"../{lang}.po", "w")
        lang_file.write(lang_file_contents)
        lang_file.close()
        print()
 if args.language:
    process_language(args.language, True)
 else:
    linguas_file = open("../LINGUAS", "r")
    for lang in linguas_file:
        process_language(lang)