0fd2f875
tangwang
translate
|
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
"ru": "Russian",
"ar": "Arabic",
"ja": "Japanese",
"es": "Spanish",
"de": "German",
"fr": "French",
"it": "Italian",
"pt": "Portuguese",
}
QWEN_LANGUAGE_CODES: Dict[str, str] = {
"zh": "Chinese",
"en": "English",
"ru": "Russian",
"ar": "Arabic",
"ja": "Japanese",
"es": "Spanish",
"de": "German",
"fr": "French",
"it": "Italian",
"pt": "Portuguese",
}
DEEPL_LANGUAGE_CODES: Dict[str, str] = {
"zh": "ZH",
"en": "EN",
"ru": "RU",
"ar": "AR",
"ja": "JA",
"es": "ES",
"de": "DE",
"fr": "FR",
"it": "IT",
"pt": "PT",
}
NLLB_LANGUAGE_CODES: Dict[str, str] = {
"en": "eng_Latn",
|
0fd2f875
tangwang
translate
|
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
"zh": "zho_Hans",
"ru": "rus_Cyrl",
"ar": "arb_Arab",
"ja": "jpn_Jpan",
"es": "spa_Latn",
"de": "deu_Latn",
"fr": "fra_Latn",
"it": "ita_Latn",
"pt": "por_Latn",
}
MARIAN_LANGUAGE_DIRECTIONS: Dict[str, Tuple[str, str]] = {
"opus-mt-zh-en": ("zh", "en"),
"opus-mt-en-zh": ("en", "zh"),
}
|
14e67b71
tangwang
分句后的 batching 现在是...
|
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
NLLB_LANGUAGE_ALIASES: Dict[str, str] = {
"fi_fi": "fi",
"fin": "fi",
"fin_fin": "fi",
"zh_cn": "zh",
"zh_hans": "zh",
}
def normalize_language_key(language: Optional[str]) -> str:
return str(language or "").strip().lower().replace("-", "_")
def build_nllb_language_catalog(
overrides: Optional[Mapping[str, str]] = None,
) -> Dict[str, str]:
catalog = {
normalize_language_key(key): str(value).strip()
for key, value in NLLB_LANGUAGE_CODES.items()
if str(key).strip()
}
for key, value in (overrides or {}).items():
normalized_key = normalize_language_key(key)
if normalized_key:
catalog[normalized_key] = str(value).strip()
return catalog
def resolve_nllb_language_code(
language: Optional[str],
language_codes: Optional[Mapping[str, str]] = None,
) -> Optional[str]:
normalized = normalize_language_key(language)
if not normalized:
return None
catalog = build_nllb_language_catalog(language_codes)
direct = catalog.get(normalized)
if direct is not None:
return direct
alias = NLLB_LANGUAGE_ALIASES.get(normalized)
if alias is not None:
aliased = catalog.get(normalize_language_key(alias))
if aliased is not None:
return aliased
for code in catalog.values():
if normalize_language_key(code) == normalized:
return code
return None
|