Commit 39306492208b850d441ed8a6eaaae3898f8990a5
1 parent
41856690
fix(translation): 补全 NLLB 本地翻译的语言码解析(FLORES 短码 + 完整 tokenizer 码)
问题描述
----------
使用 facebook/nllb-200-distilled-600M(CTranslate2 后端)时,若 API 传入 ISO 639-1
或 FLORES 短标签(如 ca、da、nl、sv、no、tr 等),会触发
「Unsupported NLLB source/target language」。模型与 tokenizer 实际支持这些语言;
根因是 resolve_nllb_language_code 仅依赖 translation/languages.py 里十余条
NLLB_LANGUAGE_CODES 映射,大量合法短码未注册,校验误报为不支持。
修改内容
----------
1. 新增 translation/nllb_flores_short_map.py
- NLLB_FLORES_SHORT_TO_CODE:与 HF 模型卡 language 列表对齐的短标签 ->
NLLB 强制 BOS/src_lang 形式(<ISO639-3>_<ISO15924>,如 cat_Latn)。
- NLLB_TOKENIZER_LANGUAGE_CODES:从 tokenizer.json 提取的 202 个语言 token
全集,供直接传入 deu_Latn 等形式时做规范化解析。
- 额外约定:ISO 639-1「no」映射 nob_Latn(书面挪威语 Bokmål);nb/nn 分别
对应 nob_Latn / nno_Latn;「ar」显式指向 arb_Arab(与 NLLB 一致)。
2. 调整 translation/languages.py
- build_nllb_language_catalog:合并顺序为 FLORES 全表 -> NLLB_LANGUAGE_CODES
(保留少量显式覆盖,如 zh->zho_Hans)-> 调用方 overrides。
- resolve_nllb_language_code:在目录与别名之后,增加基于
NLLB_TOKENIZER_LANGUAGE_CODES 的大小写不敏感匹配(如 eng_latn -> eng_Latn),
覆盖「已传完整 NLLB 码」的场景。
3. tests/test_translation_local_backends.py
- 新增 test_nllb_resolves_flores_short_tags_and_iso_no,覆盖用户关心的短码及
deu_Latn 直通解析。
方案说明
----------
NLLB 接口语义以 Hugging Face NllbTokenizer 为准:语言标识为 FLORES-200 风格
三字母语种码 + 下划线 + 四字母脚本子标签(ISO 15924)。业务侧常用 ISO 639-1
(de、sv)或模型卡短列表(ca、nl),需在服务内统一映射到 tokenizer 特殊 token。
本实现以模型卡 language 字段 + tokenizer 词表为单一事实来源生成静态表,
避免运行时依赖额外库;同时保留原有 NLLB_LANGUAGE_CODES 作为薄覆盖层以兼容
既有配置与测试。
Refs: https://huggingface.co/facebook/nllb-200-distilled-600M
Made-with: Cursor
Showing
3 changed files
with
460 additions
and
5 deletions
Show diff stats
tests/test_translation_local_backends.py
| ... | ... | @@ -5,6 +5,7 @@ import torch |
| 5 | 5 | |
| 6 | 6 | from translation.backends.local_seq2seq import MarianMTTranslationBackend, NLLBTranslationBackend |
| 7 | 7 | from translation.backends.local_ctranslate2 import NLLBCTranslate2TranslationBackend |
| 8 | +from translation.languages import build_nllb_language_catalog, resolve_nllb_language_code | |
| 8 | 9 | from translation.service import TranslationService |
| 9 | 10 | from translation.text_splitter import compute_safe_input_token_limit, split_text_for_translation |
| 10 | 11 | |
| ... | ... | @@ -200,6 +201,22 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch): |
| 200 | 201 | assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]] |
| 201 | 202 | |
| 202 | 203 | |
| 204 | +def test_nllb_resolves_flores_short_tags_and_iso_no(): | |
| 205 | + cat = build_nllb_language_catalog(None) | |
| 206 | + assert resolve_nllb_language_code("ca", cat) == "cat_Latn" | |
| 207 | + assert resolve_nllb_language_code("da", cat) == "dan_Latn" | |
| 208 | + assert resolve_nllb_language_code("eu", cat) == "eus_Latn" | |
| 209 | + assert resolve_nllb_language_code("gl", cat) == "glg_Latn" | |
| 210 | + assert resolve_nllb_language_code("hu", cat) == "hun_Latn" | |
| 211 | + assert resolve_nllb_language_code("id", cat) == "ind_Latn" | |
| 212 | + assert resolve_nllb_language_code("nl", cat) == "nld_Latn" | |
| 213 | + assert resolve_nllb_language_code("no", cat) == "nob_Latn" | |
| 214 | + assert resolve_nllb_language_code("ro", cat) == "ron_Latn" | |
| 215 | + assert resolve_nllb_language_code("SV", cat) == "swe_Latn" | |
| 216 | + assert resolve_nllb_language_code("tr", cat) == "tur_Latn" | |
| 217 | + assert resolve_nllb_language_code("deu_Latn", cat) == "deu_Latn" | |
| 218 | + | |
| 219 | + | |
| 203 | 220 | def test_translation_service_preloads_enabled_backends(monkeypatch): |
| 204 | 221 | created = [] |
| 205 | 222 | ... | ... |
translation/languages.py
| ... | ... | @@ -2,8 +2,14 @@ |
| 2 | 2 | |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | +from functools import lru_cache | |
| 5 | 6 | from typing import Dict, Mapping, Optional, Tuple |
| 6 | 7 | |
| 8 | +from translation.nllb_flores_short_map import ( | |
| 9 | + NLLB_FLORES_SHORT_TO_CODE, | |
| 10 | + NLLB_TOKENIZER_LANGUAGE_CODES, | |
| 11 | +) | |
| 12 | + | |
| 7 | 13 | |
| 8 | 14 | LANGUAGE_LABELS: Dict[str, str] = { |
| 9 | 15 | "zh": "Chinese", |
| ... | ... | @@ -48,6 +54,8 @@ DEEPL_LANGUAGE_CODES: Dict[str, str] = { |
| 48 | 54 | } |
| 49 | 55 | |
| 50 | 56 | |
| 57 | +# Sparse overrides on top of ``NLLB_FLORES_SHORT_TO_CODE`` (same keys win later in | |
| 58 | +# ``build_nllb_language_catalog``). Kept for backward compatibility and explicit defaults. | |
| 51 | 59 | NLLB_LANGUAGE_CODES: Dict[str, str] = { |
| 52 | 60 | "en": "eng_Latn", |
| 53 | 61 | "fi": "fin_Latn", |
| ... | ... | @@ -82,14 +90,24 @@ def normalize_language_key(language: Optional[str]) -> str: |
| 82 | 90 | return str(language or "").strip().lower().replace("-", "_") |
| 83 | 91 | |
| 84 | 92 | |
| 93 | +@lru_cache(maxsize=1) | |
| 94 | +def _nllb_tokenizer_code_by_normalized_key() -> Dict[str, str]: | |
| 95 | + """Map lowercased ``deu_latn``-style keys to canonical tokenizer strings (e.g. ``deu_Latn``).""" | |
| 96 | + return {normalize_language_key(code): code for code in NLLB_TOKENIZER_LANGUAGE_CODES} | |
| 97 | + | |
| 98 | + | |
| 85 | 99 | def build_nllb_language_catalog( |
| 86 | 100 | overrides: Optional[Mapping[str, str]] = None, |
| 87 | 101 | ) -> Dict[str, str]: |
| 88 | - catalog = { | |
| 89 | - normalize_language_key(key): str(value).strip() | |
| 90 | - for key, value in NLLB_LANGUAGE_CODES.items() | |
| 91 | - if str(key).strip() | |
| 92 | - } | |
| 102 | + catalog: Dict[str, str] = {} | |
| 103 | + for key, value in NLLB_FLORES_SHORT_TO_CODE.items(): | |
| 104 | + normalized_key = normalize_language_key(key) | |
| 105 | + if normalized_key: | |
| 106 | + catalog[normalized_key] = str(value).strip() | |
| 107 | + for key, value in NLLB_LANGUAGE_CODES.items(): | |
| 108 | + normalized_key = normalize_language_key(key) | |
| 109 | + if normalized_key: | |
| 110 | + catalog[normalized_key] = str(value).strip() | |
| 93 | 111 | for key, value in (overrides or {}).items(): |
| 94 | 112 | normalized_key = normalize_language_key(key) |
| 95 | 113 | if normalized_key: |
| ... | ... | @@ -116,6 +134,10 @@ def resolve_nllb_language_code( |
| 116 | 134 | if aliased is not None: |
| 117 | 135 | return aliased |
| 118 | 136 | |
| 137 | + tokenizer_hit = _nllb_tokenizer_code_by_normalized_key().get(normalized) | |
| 138 | + if tokenizer_hit is not None: | |
| 139 | + return tokenizer_hit | |
| 140 | + | |
| 119 | 141 | for code in catalog.values(): |
| 120 | 142 | if normalize_language_key(code) == normalized: |
| 121 | 143 | return code | ... | ... |
| ... | ... | @@ -0,0 +1,416 @@ |
| 1 | +"""FLORES short language tags and canonical NLLB tokenizer codes. | |
| 2 | + | |
| 3 | +``NLLB_FLORES_SHORT_TO_CODE`` maps model-card short tags (ISO 639-1 / FLORES ids) | |
| 4 | +to NLLB ``src_lang`` tokens: ``<iso639-3>_<Script>`` (ISO 15924 script). | |
| 5 | + | |
| 6 | +``NLLB_TOKENIZER_LANGUAGE_CODES`` lists every language token in the tokenizer. | |
| 7 | +""" | |
| 8 | +from __future__ import annotations | |
| 9 | + | |
| 10 | +from typing import Dict, FrozenSet | |
| 11 | + | |
| 12 | +NLLB_TOKENIZER_LANGUAGE_CODES: FrozenSet[str] = frozenset({ | |
| 13 | + "ace_Arab", | |
| 14 | + "ace_Latn", | |
| 15 | + "acm_Arab", | |
| 16 | + "acq_Arab", | |
| 17 | + "aeb_Arab", | |
| 18 | + "afr_Latn", | |
| 19 | + "ajp_Arab", | |
| 20 | + "aka_Latn", | |
| 21 | + "als_Latn", | |
| 22 | + "amh_Ethi", | |
| 23 | + "apc_Arab", | |
| 24 | + "arb_Arab", | |
| 25 | + "ars_Arab", | |
| 26 | + "ary_Arab", | |
| 27 | + "arz_Arab", | |
| 28 | + "asm_Beng", | |
| 29 | + "ast_Latn", | |
| 30 | + "awa_Deva", | |
| 31 | + "ayr_Latn", | |
| 32 | + "azb_Arab", | |
| 33 | + "azj_Latn", | |
| 34 | + "bak_Cyrl", | |
| 35 | + "bam_Latn", | |
| 36 | + "ban_Latn", | |
| 37 | + "bel_Cyrl", | |
| 38 | + "bem_Latn", | |
| 39 | + "ben_Beng", | |
| 40 | + "bho_Deva", | |
| 41 | + "bjn_Arab", | |
| 42 | + "bjn_Latn", | |
| 43 | + "bod_Tibt", | |
| 44 | + "bos_Latn", | |
| 45 | + "bug_Latn", | |
| 46 | + "bul_Cyrl", | |
| 47 | + "cat_Latn", | |
| 48 | + "ceb_Latn", | |
| 49 | + "ces_Latn", | |
| 50 | + "cjk_Latn", | |
| 51 | + "ckb_Arab", | |
| 52 | + "crh_Latn", | |
| 53 | + "cym_Latn", | |
| 54 | + "dan_Latn", | |
| 55 | + "deu_Latn", | |
| 56 | + "dik_Latn", | |
| 57 | + "dyu_Latn", | |
| 58 | + "dzo_Tibt", | |
| 59 | + "ell_Grek", | |
| 60 | + "eng_Latn", | |
| 61 | + "epo_Latn", | |
| 62 | + "est_Latn", | |
| 63 | + "eus_Latn", | |
| 64 | + "ewe_Latn", | |
| 65 | + "fao_Latn", | |
| 66 | + "fij_Latn", | |
| 67 | + "fin_Latn", | |
| 68 | + "fon_Latn", | |
| 69 | + "fra_Latn", | |
| 70 | + "fur_Latn", | |
| 71 | + "fuv_Latn", | |
| 72 | + "gaz_Latn", | |
| 73 | + "gla_Latn", | |
| 74 | + "gle_Latn", | |
| 75 | + "glg_Latn", | |
| 76 | + "grn_Latn", | |
| 77 | + "guj_Gujr", | |
| 78 | + "hat_Latn", | |
| 79 | + "hau_Latn", | |
| 80 | + "heb_Hebr", | |
| 81 | + "hin_Deva", | |
| 82 | + "hne_Deva", | |
| 83 | + "hrv_Latn", | |
| 84 | + "hun_Latn", | |
| 85 | + "hye_Armn", | |
| 86 | + "ibo_Latn", | |
| 87 | + "ilo_Latn", | |
| 88 | + "ind_Latn", | |
| 89 | + "isl_Latn", | |
| 90 | + "ita_Latn", | |
| 91 | + "jav_Latn", | |
| 92 | + "jpn_Jpan", | |
| 93 | + "kab_Latn", | |
| 94 | + "kac_Latn", | |
| 95 | + "kam_Latn", | |
| 96 | + "kan_Knda", | |
| 97 | + "kas_Arab", | |
| 98 | + "kas_Deva", | |
| 99 | + "kat_Geor", | |
| 100 | + "kaz_Cyrl", | |
| 101 | + "kbp_Latn", | |
| 102 | + "kea_Latn", | |
| 103 | + "khk_Cyrl", | |
| 104 | + "khm_Khmr", | |
| 105 | + "kik_Latn", | |
| 106 | + "kin_Latn", | |
| 107 | + "kir_Cyrl", | |
| 108 | + "kmb_Latn", | |
| 109 | + "kmr_Latn", | |
| 110 | + "knc_Arab", | |
| 111 | + "knc_Latn", | |
| 112 | + "kon_Latn", | |
| 113 | + "kor_Hang", | |
| 114 | + "lao_Laoo", | |
| 115 | + "lij_Latn", | |
| 116 | + "lim_Latn", | |
| 117 | + "lin_Latn", | |
| 118 | + "lit_Latn", | |
| 119 | + "lmo_Latn", | |
| 120 | + "ltg_Latn", | |
| 121 | + "ltz_Latn", | |
| 122 | + "lua_Latn", | |
| 123 | + "lug_Latn", | |
| 124 | + "luo_Latn", | |
| 125 | + "lus_Latn", | |
| 126 | + "lvs_Latn", | |
| 127 | + "mag_Deva", | |
| 128 | + "mai_Deva", | |
| 129 | + "mal_Mlym", | |
| 130 | + "mar_Deva", | |
| 131 | + "min_Latn", | |
| 132 | + "mkd_Cyrl", | |
| 133 | + "mlt_Latn", | |
| 134 | + "mni_Beng", | |
| 135 | + "mos_Latn", | |
| 136 | + "mri_Latn", | |
| 137 | + "mya_Mymr", | |
| 138 | + "nld_Latn", | |
| 139 | + "nno_Latn", | |
| 140 | + "nob_Latn", | |
| 141 | + "npi_Deva", | |
| 142 | + "nso_Latn", | |
| 143 | + "nus_Latn", | |
| 144 | + "nya_Latn", | |
| 145 | + "oci_Latn", | |
| 146 | + "ory_Orya", | |
| 147 | + "pag_Latn", | |
| 148 | + "pan_Guru", | |
| 149 | + "pap_Latn", | |
| 150 | + "pbt_Arab", | |
| 151 | + "pes_Arab", | |
| 152 | + "plt_Latn", | |
| 153 | + "pol_Latn", | |
| 154 | + "por_Latn", | |
| 155 | + "prs_Arab", | |
| 156 | + "quy_Latn", | |
| 157 | + "ron_Latn", | |
| 158 | + "run_Latn", | |
| 159 | + "rus_Cyrl", | |
| 160 | + "sag_Latn", | |
| 161 | + "san_Deva", | |
| 162 | + "sat_Beng", | |
| 163 | + "scn_Latn", | |
| 164 | + "shn_Mymr", | |
| 165 | + "sin_Sinh", | |
| 166 | + "slk_Latn", | |
| 167 | + "slv_Latn", | |
| 168 | + "smo_Latn", | |
| 169 | + "sna_Latn", | |
| 170 | + "snd_Arab", | |
| 171 | + "som_Latn", | |
| 172 | + "sot_Latn", | |
| 173 | + "spa_Latn", | |
| 174 | + "srd_Latn", | |
| 175 | + "srp_Cyrl", | |
| 176 | + "ssw_Latn", | |
| 177 | + "sun_Latn", | |
| 178 | + "swe_Latn", | |
| 179 | + "swh_Latn", | |
| 180 | + "szl_Latn", | |
| 181 | + "tam_Taml", | |
| 182 | + "taq_Latn", | |
| 183 | + "taq_Tfng", | |
| 184 | + "tat_Cyrl", | |
| 185 | + "tel_Telu", | |
| 186 | + "tgk_Cyrl", | |
| 187 | + "tgl_Latn", | |
| 188 | + "tha_Thai", | |
| 189 | + "tir_Ethi", | |
| 190 | + "tpi_Latn", | |
| 191 | + "tsn_Latn", | |
| 192 | + "tso_Latn", | |
| 193 | + "tuk_Latn", | |
| 194 | + "tum_Latn", | |
| 195 | + "tur_Latn", | |
| 196 | + "twi_Latn", | |
| 197 | + "tzm_Tfng", | |
| 198 | + "uig_Arab", | |
| 199 | + "ukr_Cyrl", | |
| 200 | + "umb_Latn", | |
| 201 | + "urd_Arab", | |
| 202 | + "uzn_Latn", | |
| 203 | + "vec_Latn", | |
| 204 | + "vie_Latn", | |
| 205 | + "war_Latn", | |
| 206 | + "wol_Latn", | |
| 207 | + "xho_Latn", | |
| 208 | + "ydd_Hebr", | |
| 209 | + "yor_Latn", | |
| 210 | + "yue_Hant", | |
| 211 | + "zho_Hans", | |
| 212 | + "zho_Hant", | |
| 213 | + "zsm_Latn", | |
| 214 | + "zul_Latn", | |
| 215 | +}) | |
| 216 | + | |
| 217 | +NLLB_FLORES_SHORT_TO_CODE: Dict[str, str] = { | |
| 218 | + "ace": "ace_Latn", | |
| 219 | + "acm": "acm_Arab", | |
| 220 | + "acq": "acq_Arab", | |
| 221 | + "aeb": "aeb_Arab", | |
| 222 | + "af": "afr_Latn", | |
| 223 | + "ajp": "ajp_Arab", | |
| 224 | + "ak": "aka_Latn", | |
| 225 | + "als": "als_Latn", | |
| 226 | + "am": "amh_Ethi", | |
| 227 | + "apc": "apc_Arab", | |
| 228 | + "ar": "arb_Arab", | |
| 229 | + "ars": "ars_Arab", | |
| 230 | + "ary": "ary_Arab", | |
| 231 | + "arz": "arz_Arab", | |
| 232 | + "as": "asm_Beng", | |
| 233 | + "ast": "ast_Latn", | |
| 234 | + "awa": "awa_Deva", | |
| 235 | + "ayr": "ayr_Latn", | |
| 236 | + "azb": "azb_Arab", | |
| 237 | + "azj": "azj_Latn", | |
| 238 | + "ba": "bak_Cyrl", | |
| 239 | + "ban": "ban_Latn", | |
| 240 | + "be": "bel_Cyrl", | |
| 241 | + "bem": "bem_Latn", | |
| 242 | + "bg": "bul_Cyrl", | |
| 243 | + "bho": "bho_Deva", | |
| 244 | + "bjn": "bjn_Latn", | |
| 245 | + "bm": "bam_Latn", | |
| 246 | + "bn": "ben_Beng", | |
| 247 | + "bo": "bod_Tibt", | |
| 248 | + "bs": "bos_Latn", | |
| 249 | + "bug": "bug_Latn", | |
| 250 | + "ca": "cat_Latn", | |
| 251 | + "ceb": "ceb_Latn", | |
| 252 | + "cjk": "cjk_Latn", | |
| 253 | + "ckb": "ckb_Arab", | |
| 254 | + "crh": "crh_Latn", | |
| 255 | + "cs": "ces_Latn", | |
| 256 | + "cy": "cym_Latn", | |
| 257 | + "da": "dan_Latn", | |
| 258 | + "de": "deu_Latn", | |
| 259 | + "dik": "dik_Latn", | |
| 260 | + "dyu": "dyu_Latn", | |
| 261 | + "dz": "dzo_Tibt", | |
| 262 | + "ee": "ewe_Latn", | |
| 263 | + "el": "ell_Grek", | |
| 264 | + "en": "eng_Latn", | |
| 265 | + "eo": "epo_Latn", | |
| 266 | + "es": "spa_Latn", | |
| 267 | + "et": "est_Latn", | |
| 268 | + "eu": "eus_Latn", | |
| 269 | + "fi": "fin_Latn", | |
| 270 | + "fj": "fij_Latn", | |
| 271 | + "fo": "fao_Latn", | |
| 272 | + "fon": "fon_Latn", | |
| 273 | + "fr": "fra_Latn", | |
| 274 | + "fur": "fur_Latn", | |
| 275 | + "fuv": "fuv_Latn", | |
| 276 | + "ga": "gle_Latn", | |
| 277 | + "gaz": "gaz_Latn", | |
| 278 | + "gd": "gla_Latn", | |
| 279 | + "gl": "glg_Latn", | |
| 280 | + "gn": "grn_Latn", | |
| 281 | + "gu": "guj_Gujr", | |
| 282 | + "ha": "hau_Latn", | |
| 283 | + "he": "heb_Hebr", | |
| 284 | + "hi": "hin_Deva", | |
| 285 | + "hne": "hne_Deva", | |
| 286 | + "hr": "hrv_Latn", | |
| 287 | + "ht": "hat_Latn", | |
| 288 | + "hu": "hun_Latn", | |
| 289 | + "hy": "hye_Armn", | |
| 290 | + "id": "ind_Latn", | |
| 291 | + "ig": "ibo_Latn", | |
| 292 | + "ilo": "ilo_Latn", | |
| 293 | + "is": "isl_Latn", | |
| 294 | + "it": "ita_Latn", | |
| 295 | + "ja": "jpn_Jpan", | |
| 296 | + "jv": "jav_Latn", | |
| 297 | + "ka": "kat_Geor", | |
| 298 | + "kab": "kab_Latn", | |
| 299 | + "kac": "kac_Latn", | |
| 300 | + "kam": "kam_Latn", | |
| 301 | + "kbp": "kbp_Latn", | |
| 302 | + "kea": "kea_Latn", | |
| 303 | + "kg": "kon_Latn", | |
| 304 | + "khk": "khk_Cyrl", | |
| 305 | + "ki": "kik_Latn", | |
| 306 | + "kk": "kaz_Cyrl", | |
| 307 | + "km": "khm_Khmr", | |
| 308 | + "kmb": "kmb_Latn", | |
| 309 | + "kmr": "kmr_Latn", | |
| 310 | + "kn": "kan_Knda", | |
| 311 | + "knc": "knc_Latn", | |
| 312 | + "ko": "kor_Hang", | |
| 313 | + "ks": "kas_Arab", | |
| 314 | + "ky": "kir_Cyrl", | |
| 315 | + "lb": "ltz_Latn", | |
| 316 | + "lg": "lug_Latn", | |
| 317 | + "li": "lim_Latn", | |
| 318 | + "lij": "lij_Latn", | |
| 319 | + "lmo": "lmo_Latn", | |
| 320 | + "ln": "lin_Latn", | |
| 321 | + "lo": "lao_Laoo", | |
| 322 | + "lt": "lit_Latn", | |
| 323 | + "ltg": "ltg_Latn", | |
| 324 | + "lua": "lua_Latn", | |
| 325 | + "luo": "luo_Latn", | |
| 326 | + "lus": "lus_Latn", | |
| 327 | + "lvs": "lvs_Latn", | |
| 328 | + "mag": "mag_Deva", | |
| 329 | + "mai": "mai_Deva", | |
| 330 | + "mar": "mar_Deva", | |
| 331 | + "mi": "mri_Latn", | |
| 332 | + "min": "min_Latn", | |
| 333 | + "mk": "mkd_Cyrl", | |
| 334 | + "ml": "mal_Mlym", | |
| 335 | + "mni": "mni_Beng", | |
| 336 | + "mos": "mos_Latn", | |
| 337 | + "mt": "mlt_Latn", | |
| 338 | + "my": "mya_Mymr", | |
| 339 | + "nb": "nob_Latn", | |
| 340 | + "nl": "nld_Latn", | |
| 341 | + "nn": "nno_Latn", | |
| 342 | + "no": "nob_Latn", | |
| 343 | + "npi": "npi_Deva", | |
| 344 | + "nso": "nso_Latn", | |
| 345 | + "nus": "nus_Latn", | |
| 346 | + "ny": "nya_Latn", | |
| 347 | + "oc": "oci_Latn", | |
| 348 | + "ory": "ory_Orya", | |
| 349 | + "pa": "pan_Guru", | |
| 350 | + "pag": "pag_Latn", | |
| 351 | + "pap": "pap_Latn", | |
| 352 | + "pbt": "pbt_Arab", | |
| 353 | + "pes": "pes_Arab", | |
| 354 | + "pl": "pol_Latn", | |
| 355 | + "plt": "plt_Latn", | |
| 356 | + "prs": "prs_Arab", | |
| 357 | + "pt": "por_Latn", | |
| 358 | + "quy": "quy_Latn", | |
| 359 | + "rn": "run_Latn", | |
| 360 | + "ro": "ron_Latn", | |
| 361 | + "ru": "rus_Cyrl", | |
| 362 | + "rw": "kin_Latn", | |
| 363 | + "sa": "san_Deva", | |
| 364 | + "sat": "sat_Beng", | |
| 365 | + "sc": "srd_Latn", | |
| 366 | + "scn": "scn_Latn", | |
| 367 | + "sd": "snd_Arab", | |
| 368 | + "sg": "sag_Latn", | |
| 369 | + "shn": "shn_Mymr", | |
| 370 | + "si": "sin_Sinh", | |
| 371 | + "sk": "slk_Latn", | |
| 372 | + "sl": "slv_Latn", | |
| 373 | + "sm": "smo_Latn", | |
| 374 | + "sn": "sna_Latn", | |
| 375 | + "so": "som_Latn", | |
| 376 | + "sr": "srp_Cyrl", | |
| 377 | + "ss": "ssw_Latn", | |
| 378 | + "st": "sot_Latn", | |
| 379 | + "su": "sun_Latn", | |
| 380 | + "sv": "swe_Latn", | |
| 381 | + "swh": "swh_Latn", | |
| 382 | + "szl": "szl_Latn", | |
| 383 | + "ta": "tam_Taml", | |
| 384 | + "taq": "taq_Latn", | |
| 385 | + "te": "tel_Telu", | |
| 386 | + "tg": "tgk_Cyrl", | |
| 387 | + "th": "tha_Thai", | |
| 388 | + "ti": "tir_Ethi", | |
| 389 | + "tk": "tuk_Latn", | |
| 390 | + "tl": "tgl_Latn", | |
| 391 | + "tn": "tsn_Latn", | |
| 392 | + "tpi": "tpi_Latn", | |
| 393 | + "tr": "tur_Latn", | |
| 394 | + "ts": "tso_Latn", | |
| 395 | + "tt": "tat_Cyrl", | |
| 396 | + "tum": "tum_Latn", | |
| 397 | + "tw": "twi_Latn", | |
| 398 | + "tzm": "tzm_Tfng", | |
| 399 | + "ug": "uig_Arab", | |
| 400 | + "uk": "ukr_Cyrl", | |
| 401 | + "umb": "umb_Latn", | |
| 402 | + "ur": "urd_Arab", | |
| 403 | + "uzn": "uzn_Latn", | |
| 404 | + "vec": "vec_Latn", | |
| 405 | + "vi": "vie_Latn", | |
| 406 | + "war": "war_Latn", | |
| 407 | + "wo": "wol_Latn", | |
| 408 | + "xh": "xho_Latn", | |
| 409 | + "ydd": "ydd_Hebr", | |
| 410 | + "yo": "yor_Latn", | |
| 411 | + "yue": "yue_Hant", | |
| 412 | + "zh": "zho_Hans", | |
| 413 | + "zsm": "zsm_Latn", | |
| 414 | + "zu": "zul_Latn", | |
| 415 | +} | |
| 416 | + | ... | ... |