Remove lang_update script

2026-02-04 06:56:26 +00:00 · 2023-06-09 14:48:36 -05:00 · 2023-06-09 14:48:36 -05:00 · ca4b316e2f
commit ca4b316e2f
parent 85ab04a29d
2 changed files with 0 additions and 291 deletions
--- a/lang_update/README.md
+++ b/lang_update/README.md
@ -1,39 +0,0 @@
-# Language Names Updater
-
-This script tries to pull language names from the sources:
-
- Unicode CLDR
- Google Translate
-
-## How to use
-
-```bash
-git clone https://github.com/dialect-app/po
-cd po/lang_update
-python lang_update.py
-```
-
-`lang_update.py` should be run with `po/lang_update` as the working directory and will not work as expected otherwise.
-
-`-g` or `--google` can be passed to force the usage of Google Translate as the source for language names.
-
-You can also pass a language code to only update one language:
-
-```bash
-python lang_update.py "ca"
-```
-
-## How to contribute
-
-If you would like to work on language names, please contribute to [Unicode CLDR](https://cldr.unicode.org/).
-
-If you decide that the language names from Unicode CLDR are not good enough and feel like you could do a better job, you can open an issue at [dialect-app/po](https://github.com/dialect-app/po/issues) and continue updating your translation as per usual. You could also instead add your language code to the `EXCLUDE_LIST` in the `lang_update.py` script and send a PR.
-
-The `lang_update.py` script has a few things you could help with as well:
-
- The `EXCLUDE_LIST` list could be expanded or shortened depending on the accuracy of Unicode CLDR project's language names for a particular language. You could do this by checking `cldr-json`. For example: [French Unicode CLDR languages.json](https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-localenames-full/main/fr/languages.json) . The link format is:
-  ```
-  https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-localenames-full/main/{language_code_here}/languages.json
-  ```
- If language names should be capitalized in your language, add the language code to `CAPS_LIST`.
- If your language is named differently in the Unicode CLDR project, add a mapping in `CLDR_NAMES`.
--- a/lang_update/lang_update.py
+++ b/lang_update/lang_update.py
@ -1,252 +0,0 @@
-import argparse
-import json
-import os
-import re
-import requests
-import subprocess
-from bs4 import BeautifulSoup
-
-
-LANGUAGES = {
-    "af": "Afrikaans",
-    "sq": "Albanian",
-    "am": "Amharic",
-    "ar": "Arabic",
-    "hy": "Armenian",
-    "az": "Azerbaijani",
-    "eu": "Basque",
-    "be": "Belarusian",
-    "bn": "Bengali",
-    "bs": "Bosnian",
-    "bg": "Bulgarian",
-    "ca": "Catalan",
-    "ceb": "Cebuano",
-    "ny": "Chichewa",
-    "zh": "Chinese",
-    "zh-Hans": "Chinese (Simplified)",
-    "zh-Hant": "Chinese (Traditional)",
-    "co": "Corsican",
-    "hr": "Croatian",
-    "cs": "Czech",
-    "da": "Danish",
-    "nl": "Dutch",
-    "en": "English",
-    "eo": "Esperanto",
-    "et": "Estonian",
-    "tl": "Filipino",
-    "fi": "Finnish",
-    "fr": "French",
-    "fy": "Frisian",
-    "gl": "Galician",
-    "ka": "Georgian",
-    "de": "German",
-    "el": "Greek",
-    "gu": "Gujarati",
-    "ht": "Haitian Creole",
-    "ha": "Hausa",
-    "haw": "Hawaiian",
-    "iw": "Hebrew",
-    "he": "Hebrew",
-    "hi": "Hindi",
-    "hmn": "Hmong",
-    "hu": "Hungarian",
-    "is": "Icelandic",
-    "ig": "Igbo",
-    "id": "Indonesian",
-    "ga": "Irish",
-    "it": "Italian",
-    "ja": "Japanese",
-    "jw": "Javanese",
-    "kn": "Kannada",
-    "kk": "Kazakh",
-    "km": "Khmer",
-    "rw": "Kinyarwanda",
-    "ko": "Korean",
-    "ku": "Kurdish (Kurmanji)",
-    "ky": "Kyrgyz",
-    "lo": "Lao",
-    "la": "Latin",
-    "lv": "Latvian",
-    "lt": "Lithuanian",
-    "lb": "Luxembourgish",
-    "mk": "Macedonian",
-    "mg": "Malagasy",
-    "ms": "Malay",
-    "ml": "Malayalam",
-    "mt": "Maltese",
-    "mi": "Maori",
-    "mr": "Marathi",
-    "mn": "Mongolian",
-    "my": "Myanmar (Burmese)",
-    "ne": "Nepali",
-    "no": "Norwegian",
-    "or": "Odia (Oriya)",
-    "ps": "Pashto",
-    "fa": "Persian",
-    "pl": "Polish",
-    "pt": "Portuguese",
-    "pa": "Punjabi",
-    "ro": "Romanian",
-    "ru": "Russian",
-    "sm": "Samoan",
-    "gd": "Scots Gaelic",
-    "sr": "Serbian",
-    "st": "Sesotho",
-    "sn": "Shona",
-    "sd": "Sindhi",
-    "si": "Sinhala",
-    "sk": "Slovak",
-    "sl": "Slovenian",
-    "so": "Somali",
-    "es": "Spanish",
-    "su": "Sundanese",
-    "sw": "Swahili",
-    "sv": "Swedish",
-    "tg": "Tajik",
-    "ta": "Tamil",
-    "tt": "Tatar",
-    "te": "Telugu",
-    "th": "Thai",
-    "tr": "Turkish",
-    "tk": "Turkmen",
-    "uk": "Ukrainian",
-    "ur": "Urdu",
-    "ug": "Uyghur",
-    "uz": "Uzbek",
-    "vi": "Vietnamese",
-    "cy": "Welsh",
-    "xh": "Xhosa",
-    "yi": "Yiddish",
-    "yo": "Yoruba",
-    "zu": "Zulu",
-}
-
-CLDR_NAMES = {
-    "kmr": "ku",  # They seem to be the same since Kurmanji (ku) is Northern Kurdish (kmr).
-    "zh_CN": "zh-Hans",
-    "zh_TW": "zh-Hant",
-    "zh-CN": "zh-Hans",
-    "zh-TW": "zh-Hant",
-}
-
-# Add any language to this list to exclude it from the automated process.
-EXCLUDE_LIST = [
-    # No decent source
-    "oc",  # Occitan
-    # Was manually updated by the translator
-    "eo",  # Esperanto
-    "fr",  # French
-    "fy",  # Frisian
-    "ja",  # Japanese
-    "lv",  # Latvian
-    "uk",  # Ukranian
-    "zh_CN",  # Chinese
-]
-# Even if a language is in this list, you can pass it as a parameter to the program.
-
-# If any language is on this list and it shouldn't be, please create an issue:
-# https://www.github.com/dialect-app/po
-# You can also open an issue if any language should be added.
-
-# All languages that need "capitalization"
-CAPS_LIST = [
-    "it",  # Italian
-]
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "language", nargs="?", help="the language code for language to update"
-)
-parser.add_argument(
-    "-g", "--google", help="force use google for language names", action="store_true"
-)
-args = parser.parse_args()
-
-if not os.path.isdir("cldr-json"):
-    print("Cloning Unicode CLDR repository...")
-    subprocess.call(["git", "clone", "https://github.com/unicode-org/cldr-json"])
-
-
-def process_language(lang, arged=False):
-    lang = lang.strip()
-    if lang and (arged is True or lang not in EXCLUDE_LIST):
-        cldr_present = True  # Assume CLDR file is present.
-        cldr_lang = CLDR_NAMES[lang] if lang in CLDR_NAMES else lang.replace("_", "-")
-
-        g_lang = lang.split("_")[0]
-
-        print(f"Reading {lang}.po ...")
-
-        lang_file = open(f"../{lang}.po", "r")
-        lang_file_contents = lang_file.read()
-        lang_file.close()
-
-        try:
-            print("Looking for required CLDR file...")
-            cldr_file = open(
-                f"cldr-json/cldr-json/cldr-localenames-full/main/{cldr_lang}/languages.json",
-                "r",
-            )
-            cldr_json = json.load(cldr_file)
-        except FileNotFoundError:
-            print(f"No CLDR file found for language: {cldr_lang}.")
-            try:
-                cldr_lang = g_lang
-                cldr_file = open(
-                    f"cldr-json/cldr-json/cldr-localenames-full/main/{cldr_lang}/languages.json",
-                    "r",
-                )
-                cldr_json = json.load(cldr_file)
-                print(f"Using file for {cldr_lang} instead.")
-            except FileNotFoundError:
-                print("Could not find possible substitutes.")
-                cldr_present = False  # Correct earlier assumption.
-
-        if cldr_present and not args.google:
-            cldr_langs = cldr_json["main"][cldr_lang]["localeDisplayNames"]["languages"]
-            for lang_code, lang_name in cldr_langs.items():
-                if lang_code not in LANGUAGES:
-                    continue
-
-                if cldr_lang in CAPS_LIST:
-                    lang_name = lang_name.capitalize()
-
-                lang_file_contents = re.sub(
-                    rf'msgid "{re.escape(LANGUAGES[lang_code])}"\nmsgstr ".*"\n',
-                    rf'msgid "{LANGUAGES[lang_code]}"\nmsgstr "{lang_name}"\n',
-                    lang_file_contents,
-                )
-        else:
-            print("Fetching localized names from Google Translate...")
-
-            page = requests.get("https://translate.google.com/?hl=" + g_lang)
-            soup = BeautifulSoup(page.text, "html5lib")
-
-            print("Generating updated string with localized names...")
-
-            for div in soup.find_all("div"):
-                if div.attrs.get("class", None) == ["qSb8Pe"]:
-                    lang_code = div.attrs["data-language-code"]
-                    lang_name = div.find(attrs={"class": "Llmcnf"}).string
-
-                    lang_file_contents = re.sub(
-                        rf'msgid "{re.escape(LANGUAGES[lang_code])}"\nmsgstr ".*"\n',
-                        rf'msgid "{LANGUAGES[lang_code]}"\nmsgstr "{lang_name}"\n',
-                        lang_file_contents,
-                    )
-
-        print(f"Saving {lang}.po ...")
-
-        lang_file = open(f"../{lang}.po", "w")
-        lang_file.write(lang_file_contents)
-        lang_file.close()
-
-        print()
-
-
-if args.language:
-    process_language(args.language, True)
-else:
-    linguas_file = open("../LINGUAS", "r")
-    for lang in linguas_file:
-        process_language(lang)