mirror of
https://github.com/dialect-app/po.git
synced 2025-06-18 20:26:26 +00:00
Remove lang_update script
This commit is contained in:
parent
85ab04a29d
commit
ca4b316e2f
2 changed files with 0 additions and 291 deletions
|
@ -1,39 +0,0 @@
|
||||||
# Language Names Updater
|
|
||||||
|
|
||||||
This script tries to pull language names from the sources:
|
|
||||||
|
|
||||||
- Unicode CLDR
|
|
||||||
- Google Translate
|
|
||||||
|
|
||||||
## How to use
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/dialect-app/po
|
|
||||||
cd po/lang_update
|
|
||||||
python lang_update.py
|
|
||||||
```
|
|
||||||
|
|
||||||
`lang_update.py` should be run with `po/lang_update` as the working directory and will not work as expected otherwise.
|
|
||||||
|
|
||||||
`-g` or `--google` can be passed to force the usage of Google Translate as the source for language names.
|
|
||||||
|
|
||||||
You can also pass a language code to only update one language:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python lang_update.py "ca"
|
|
||||||
```
|
|
||||||
|
|
||||||
## How to contribute
|
|
||||||
|
|
||||||
If you would like to work on language names, please contribute to [Unicode CLDR](https://cldr.unicode.org/).
|
|
||||||
|
|
||||||
If you decide that the language names from Unicode CLDR are not good enough and feel like you could do a better job, you can open an issue at [dialect-app/po](https://github.com/dialect-app/po/issues) and continue updating your translation as per usual. You could also instead add your language code to the `EXCLUDE_LIST` in the `lang_update.py` script and send a PR.
|
|
||||||
|
|
||||||
The `lang_update.py` script has a few things you could help with as well:
|
|
||||||
|
|
||||||
- The `EXCLUDE_LIST` list could be expanded or shortened depending on the accuracy of Unicode CLDR project's language names for a particular language. You could do this by checking `cldr-json`. For example: [French Unicode CLDR languages.json](https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-localenames-full/main/fr/languages.json) . The link format is:
|
|
||||||
```
|
|
||||||
https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-localenames-full/main/{language_code_here}/languages.json
|
|
||||||
```
|
|
||||||
- If language names should be capitalized in your language, add the language code to `CAPS_LIST`.
|
|
||||||
- If your language is named differently in the Unicode CLDR project, add a mapping in `CLDR_NAMES`.
|
|
|
@ -1,252 +0,0 @@
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import requests
|
|
||||||
import subprocess
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
|
|
||||||
LANGUAGES = {
|
|
||||||
"af": "Afrikaans",
|
|
||||||
"sq": "Albanian",
|
|
||||||
"am": "Amharic",
|
|
||||||
"ar": "Arabic",
|
|
||||||
"hy": "Armenian",
|
|
||||||
"az": "Azerbaijani",
|
|
||||||
"eu": "Basque",
|
|
||||||
"be": "Belarusian",
|
|
||||||
"bn": "Bengali",
|
|
||||||
"bs": "Bosnian",
|
|
||||||
"bg": "Bulgarian",
|
|
||||||
"ca": "Catalan",
|
|
||||||
"ceb": "Cebuano",
|
|
||||||
"ny": "Chichewa",
|
|
||||||
"zh": "Chinese",
|
|
||||||
"zh-Hans": "Chinese (Simplified)",
|
|
||||||
"zh-Hant": "Chinese (Traditional)",
|
|
||||||
"co": "Corsican",
|
|
||||||
"hr": "Croatian",
|
|
||||||
"cs": "Czech",
|
|
||||||
"da": "Danish",
|
|
||||||
"nl": "Dutch",
|
|
||||||
"en": "English",
|
|
||||||
"eo": "Esperanto",
|
|
||||||
"et": "Estonian",
|
|
||||||
"tl": "Filipino",
|
|
||||||
"fi": "Finnish",
|
|
||||||
"fr": "French",
|
|
||||||
"fy": "Frisian",
|
|
||||||
"gl": "Galician",
|
|
||||||
"ka": "Georgian",
|
|
||||||
"de": "German",
|
|
||||||
"el": "Greek",
|
|
||||||
"gu": "Gujarati",
|
|
||||||
"ht": "Haitian Creole",
|
|
||||||
"ha": "Hausa",
|
|
||||||
"haw": "Hawaiian",
|
|
||||||
"iw": "Hebrew",
|
|
||||||
"he": "Hebrew",
|
|
||||||
"hi": "Hindi",
|
|
||||||
"hmn": "Hmong",
|
|
||||||
"hu": "Hungarian",
|
|
||||||
"is": "Icelandic",
|
|
||||||
"ig": "Igbo",
|
|
||||||
"id": "Indonesian",
|
|
||||||
"ga": "Irish",
|
|
||||||
"it": "Italian",
|
|
||||||
"ja": "Japanese",
|
|
||||||
"jw": "Javanese",
|
|
||||||
"kn": "Kannada",
|
|
||||||
"kk": "Kazakh",
|
|
||||||
"km": "Khmer",
|
|
||||||
"rw": "Kinyarwanda",
|
|
||||||
"ko": "Korean",
|
|
||||||
"ku": "Kurdish (Kurmanji)",
|
|
||||||
"ky": "Kyrgyz",
|
|
||||||
"lo": "Lao",
|
|
||||||
"la": "Latin",
|
|
||||||
"lv": "Latvian",
|
|
||||||
"lt": "Lithuanian",
|
|
||||||
"lb": "Luxembourgish",
|
|
||||||
"mk": "Macedonian",
|
|
||||||
"mg": "Malagasy",
|
|
||||||
"ms": "Malay",
|
|
||||||
"ml": "Malayalam",
|
|
||||||
"mt": "Maltese",
|
|
||||||
"mi": "Maori",
|
|
||||||
"mr": "Marathi",
|
|
||||||
"mn": "Mongolian",
|
|
||||||
"my": "Myanmar (Burmese)",
|
|
||||||
"ne": "Nepali",
|
|
||||||
"no": "Norwegian",
|
|
||||||
"or": "Odia (Oriya)",
|
|
||||||
"ps": "Pashto",
|
|
||||||
"fa": "Persian",
|
|
||||||
"pl": "Polish",
|
|
||||||
"pt": "Portuguese",
|
|
||||||
"pa": "Punjabi",
|
|
||||||
"ro": "Romanian",
|
|
||||||
"ru": "Russian",
|
|
||||||
"sm": "Samoan",
|
|
||||||
"gd": "Scots Gaelic",
|
|
||||||
"sr": "Serbian",
|
|
||||||
"st": "Sesotho",
|
|
||||||
"sn": "Shona",
|
|
||||||
"sd": "Sindhi",
|
|
||||||
"si": "Sinhala",
|
|
||||||
"sk": "Slovak",
|
|
||||||
"sl": "Slovenian",
|
|
||||||
"so": "Somali",
|
|
||||||
"es": "Spanish",
|
|
||||||
"su": "Sundanese",
|
|
||||||
"sw": "Swahili",
|
|
||||||
"sv": "Swedish",
|
|
||||||
"tg": "Tajik",
|
|
||||||
"ta": "Tamil",
|
|
||||||
"tt": "Tatar",
|
|
||||||
"te": "Telugu",
|
|
||||||
"th": "Thai",
|
|
||||||
"tr": "Turkish",
|
|
||||||
"tk": "Turkmen",
|
|
||||||
"uk": "Ukrainian",
|
|
||||||
"ur": "Urdu",
|
|
||||||
"ug": "Uyghur",
|
|
||||||
"uz": "Uzbek",
|
|
||||||
"vi": "Vietnamese",
|
|
||||||
"cy": "Welsh",
|
|
||||||
"xh": "Xhosa",
|
|
||||||
"yi": "Yiddish",
|
|
||||||
"yo": "Yoruba",
|
|
||||||
"zu": "Zulu",
|
|
||||||
}
|
|
||||||
|
|
||||||
CLDR_NAMES = {
|
|
||||||
"kmr": "ku", # They seem to be the same since Kurmanji (ku) is Northern Kurdish (kmr).
|
|
||||||
"zh_CN": "zh-Hans",
|
|
||||||
"zh_TW": "zh-Hant",
|
|
||||||
"zh-CN": "zh-Hans",
|
|
||||||
"zh-TW": "zh-Hant",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add any language to this list to exclude it from the automated process.
|
|
||||||
EXCLUDE_LIST = [
|
|
||||||
# No decent source
|
|
||||||
"oc", # Occitan
|
|
||||||
# Was manually updated by the translator
|
|
||||||
"eo", # Esperanto
|
|
||||||
"fr", # French
|
|
||||||
"fy", # Frisian
|
|
||||||
"ja", # Japanese
|
|
||||||
"lv", # Latvian
|
|
||||||
"uk", # Ukranian
|
|
||||||
"zh_CN", # Chinese
|
|
||||||
]
|
|
||||||
# Even if a language is in this list, you can pass it as a parameter to the program.
|
|
||||||
|
|
||||||
# If any language is on this list and it shouldn't be, please create an issue:
|
|
||||||
# https://www.github.com/dialect-app/po
|
|
||||||
# You can also open an issue if any language should be added.
|
|
||||||
|
|
||||||
# All languages that need "capitalization"
|
|
||||||
CAPS_LIST = [
|
|
||||||
"it", # Italian
|
|
||||||
]
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument(
|
|
||||||
"language", nargs="?", help="the language code for language to update"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-g", "--google", help="force use google for language names", action="store_true"
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if not os.path.isdir("cldr-json"):
|
|
||||||
print("Cloning Unicode CLDR repository...")
|
|
||||||
subprocess.call(["git", "clone", "https://github.com/unicode-org/cldr-json"])
|
|
||||||
|
|
||||||
|
|
||||||
def process_language(lang, arged=False):
|
|
||||||
lang = lang.strip()
|
|
||||||
if lang and (arged is True or lang not in EXCLUDE_LIST):
|
|
||||||
cldr_present = True # Assume CLDR file is present.
|
|
||||||
cldr_lang = CLDR_NAMES[lang] if lang in CLDR_NAMES else lang.replace("_", "-")
|
|
||||||
|
|
||||||
g_lang = lang.split("_")[0]
|
|
||||||
|
|
||||||
print(f"Reading {lang}.po ...")
|
|
||||||
|
|
||||||
lang_file = open(f"../{lang}.po", "r")
|
|
||||||
lang_file_contents = lang_file.read()
|
|
||||||
lang_file.close()
|
|
||||||
|
|
||||||
try:
|
|
||||||
print("Looking for required CLDR file...")
|
|
||||||
cldr_file = open(
|
|
||||||
f"cldr-json/cldr-json/cldr-localenames-full/main/{cldr_lang}/languages.json",
|
|
||||||
"r",
|
|
||||||
)
|
|
||||||
cldr_json = json.load(cldr_file)
|
|
||||||
except FileNotFoundError:
|
|
||||||
print(f"No CLDR file found for language: {cldr_lang}.")
|
|
||||||
try:
|
|
||||||
cldr_lang = g_lang
|
|
||||||
cldr_file = open(
|
|
||||||
f"cldr-json/cldr-json/cldr-localenames-full/main/{cldr_lang}/languages.json",
|
|
||||||
"r",
|
|
||||||
)
|
|
||||||
cldr_json = json.load(cldr_file)
|
|
||||||
print(f"Using file for {cldr_lang} instead.")
|
|
||||||
except FileNotFoundError:
|
|
||||||
print("Could not find possible substitutes.")
|
|
||||||
cldr_present = False # Correct earlier assumption.
|
|
||||||
|
|
||||||
if cldr_present and not args.google:
|
|
||||||
cldr_langs = cldr_json["main"][cldr_lang]["localeDisplayNames"]["languages"]
|
|
||||||
for lang_code, lang_name in cldr_langs.items():
|
|
||||||
if lang_code not in LANGUAGES:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if cldr_lang in CAPS_LIST:
|
|
||||||
lang_name = lang_name.capitalize()
|
|
||||||
|
|
||||||
lang_file_contents = re.sub(
|
|
||||||
rf'msgid "{re.escape(LANGUAGES[lang_code])}"\nmsgstr ".*"\n',
|
|
||||||
rf'msgid "{LANGUAGES[lang_code]}"\nmsgstr "{lang_name}"\n',
|
|
||||||
lang_file_contents,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
print("Fetching localized names from Google Translate...")
|
|
||||||
|
|
||||||
page = requests.get("https://translate.google.com/?hl=" + g_lang)
|
|
||||||
soup = BeautifulSoup(page.text, "html5lib")
|
|
||||||
|
|
||||||
print("Generating updated string with localized names...")
|
|
||||||
|
|
||||||
for div in soup.find_all("div"):
|
|
||||||
if div.attrs.get("class", None) == ["qSb8Pe"]:
|
|
||||||
lang_code = div.attrs["data-language-code"]
|
|
||||||
lang_name = div.find(attrs={"class": "Llmcnf"}).string
|
|
||||||
|
|
||||||
lang_file_contents = re.sub(
|
|
||||||
rf'msgid "{re.escape(LANGUAGES[lang_code])}"\nmsgstr ".*"\n',
|
|
||||||
rf'msgid "{LANGUAGES[lang_code]}"\nmsgstr "{lang_name}"\n',
|
|
||||||
lang_file_contents,
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"Saving {lang}.po ...")
|
|
||||||
|
|
||||||
lang_file = open(f"../{lang}.po", "w")
|
|
||||||
lang_file.write(lang_file_contents)
|
|
||||||
lang_file.close()
|
|
||||||
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
if args.language:
|
|
||||||
process_language(args.language, True)
|
|
||||||
else:
|
|
||||||
linguas_file = open("../LINGUAS", "r")
|
|
||||||
for lang in linguas_file:
|
|
||||||
process_language(lang)
|
|
Loading…
Add table
Reference in a new issue