Commit 8140e942c1712fe5f307552183bd2c8df1852a29

Authored by tangwang
1 parent 86d0e83d

translator model priority

config/config.yaml
@@ -209,6 +209,17 @@ services: @@ -209,6 +209,17 @@ services:
209 cache: 209 cache:
210 ttl_seconds: 62208000 210 ttl_seconds: 62208000
211 sliding_expiration: true 211 sliding_expiration: true
  212 + # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups).
  213 + enable_model_quality_tier_cache: true
  214 + # Higher tier = better quality. Multiple models may share one tier (同级).
  215 + # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers).
  216 + model_quality_tiers:
  217 + deepl: 30
  218 + qwen-mt: 30
  219 + llm: 30
  220 + nllb-200-distilled-600m: 20
  221 + opus-mt-zh-en: 10
  222 + opus-mt-en-zh: 10
212 capabilities: 223 capabilities:
213 qwen-mt: 224 qwen-mt:
214 enabled: true 225 enabled: true
@@ -89,32 +89,6 @@ image_embedding改为,一个spu有多个sku向量,每个向量内部properti @@ -89,32 +89,6 @@ image_embedding改为,一个spu有多个sku向量,每个向量内部properti
89 89
90 90
91 91
92 -2026-03-21 10:29:23,698 - elastic_transport.transport - INFO - POST http://localhost:9200/search_products_tenant_163/_search?include_named_queries_score=false [status:200 duration:0.009s]  
93 -2026-03-21 10:29:23,700 - request_context - INFO - 分页详情回填 | ids=20 | filled=20 | took=7ms  
94 -2026-03-21 10:29:23,700 - request_context - INFO - 重排分页切片 | from=20, size=20, 返回=20条  
95 -2026-03-21 10:29:23,720 - embeddings.text_encoder - ERROR - TextEmbeddingEncoder service request failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1  
96 -Traceback (most recent call last):  
97 - File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service  
98 - response.raise_for_status()  
99 - File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status  
100 - raise HTTPError(http_error_msg, response=self)  
101 -requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1  
102 -2026-03-21 10:29:23,720 - search.searcher - WARNING - Failed to encode SKU option1 values for final-page sorting: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1  
103 -Traceback (most recent call last):  
104 - File "/data/saas-search/search/searcher.py", line 448, in _apply_sku_sorting_for_page_hits  
105 - encoded_option_vectors = text_encoder.encode(option1_values_to_encode, priority=1)  
106 - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^  
107 - File "/data/saas-search/embeddings/text_encoder.py", line 112, in encode  
108 - response_data = self._call_service(  
109 - ^^^^^^^^^^^^^^^^^^^  
110 - File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service  
111 - response.raise_for_status()  
112 - File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status  
113 - raise HTTPError(http_error_msg, response=self)  
114 -requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1  
115 -2026-03-21 10:29:23,721 - request_context - WARNING - SKU option embedding failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1  
116 -  
117 -  
118 92
119 93
120 是否需要: 94 是否需要:
tests/test_translator_failure_semantics.py
1 import logging 1 import logging
2 2
  3 +import pytest
  4 +
3 from translation.cache import TranslationCache 5 from translation.cache import TranslationCache
4 from translation.logging_utils import ( 6 from translation.logging_utils import (
5 TranslationRequestFilter, 7 TranslationRequestFilter,
@@ -7,6 +9,7 @@ from translation.logging_utils import ( @@ -7,6 +9,7 @@ from translation.logging_utils import (
7 reset_translation_request_id, 9 reset_translation_request_id,
8 ) 10 )
9 from translation.service import TranslationService 11 from translation.service import TranslationService
  12 +from translation.settings import build_translation_config, translation_cache_probe_models
10 13
11 14
12 class _FakeCache: 15 class _FakeCache:
@@ -16,7 +19,8 @@ class _FakeCache: @@ -16,7 +19,8 @@ class _FakeCache:
16 self.get_calls = [] 19 self.get_calls = []
17 self.set_calls = [] 20 self.set_calls = []
18 21
19 - def get(self, *, model, target_lang, source_text): 22 + def get(self, *, model, target_lang, source_text, log_lookup=True):
  23 + del log_lookup
20 self.get_calls.append((model, target_lang, source_text)) 24 self.get_calls.append((model, target_lang, source_text))
21 return self.storage.get((model, target_lang, source_text)) 25 return self.storage.get((model, target_lang, source_text))
22 26
@@ -191,3 +195,262 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog): @@ -191,3 +195,262 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog):
191 assert route_messages == [ 195 assert route_messages == [
192 "Translation route | backend=llm request_type=single use_cache=True cache_available=False" 196 "Translation route | backend=llm request_type=single use_cache=True cache_available=False"
193 ] 197 ]
  198 +
  199 +
  200 +def test_translation_cache_probe_models_order():
  201 + cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}}
  202 + assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"]
  203 + assert translation_cache_probe_models(cfg, "mid") == ["high", "mid"]
  204 + assert translation_cache_probe_models(cfg, "high") == ["high"]
  205 + assert translation_cache_probe_models(cfg, "unknown") == ["unknown"]
  206 +
  207 +
  208 +def test_translation_cache_probe_models_respects_enable_switch():
  209 + cfg = {
  210 + "cache": {
  211 + "enable_model_quality_tier_cache": False,
  212 + "model_quality_tiers": {"peer-a": 50, "peer-b": 50, "top": 100},
  213 + }
  214 + }
  215 + assert translation_cache_probe_models(cfg, "peer-a") == ["peer-a"]
  216 +
  217 +
  218 +def test_translation_cache_probe_models_same_tier_included():
  219 + """Same numeric tier: all peers are probed (higher tier first, then name order)."""
  220 + cfg = {"cache": {"model_quality_tiers": {"peer-a": 50, "peer-b": 50, "top": 100}}}
  221 + assert translation_cache_probe_models(cfg, "peer-a") == ["top", "peer-a", "peer-b"]
  222 + assert translation_cache_probe_models(cfg, "peer-b") == ["top", "peer-b", "peer-a"]
  223 +
  224 +
  225 +def test_model_quality_tiers_unknown_capability_raises():
  226 + with pytest.raises(ValueError, match="unknown capability"):
  227 + build_translation_config(
  228 + {
  229 + "service_url": "http://127.0.0.1:6006",
  230 + "timeout_sec": 10.0,
  231 + "default_model": "llm",
  232 + "default_scene": "general",
  233 + "cache": {
  234 + "ttl_seconds": 60,
  235 + "sliding_expiration": True,
  236 + "model_quality_tiers": {"ghost": 1},
  237 + },
  238 + "capabilities": {
  239 + "llm": {
  240 + "enabled": True,
  241 + "backend": "llm",
  242 + "model": "dummy-llm",
  243 + "base_url": "https://example.com",
  244 + "timeout_sec": 10.0,
  245 + "use_cache": True,
  246 + }
  247 + },
  248 + }
  249 + )
  250 +
  251 +
  252 +def test_tiered_cache_reuses_higher_tier_entry(monkeypatch):
  253 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  254 + translate_calls = []
  255 +
  256 + def _fake_create_backend(self, *, name, backend_type, cfg):
  257 + del self, backend_type, cfg
  258 +
  259 + class _Backend:
  260 + model = name
  261 +
  262 + @property
  263 + def supports_batch(self):
  264 + return True
  265 +
  266 + def translate(self, text, target_lang, source_lang=None, scene=None):
  267 + del target_lang, source_lang, scene
  268 + translate_calls.append((name, text))
  269 + if isinstance(text, list):
  270 + return [f"{name}:{item}" for item in text]
  271 + return f"{name}:{text}"
  272 +
  273 + return _Backend()
  274 +
  275 + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend)
  276 + config = {
  277 + "service_url": "http://127.0.0.1:6006",
  278 + "timeout_sec": 10.0,
  279 + "default_model": "opus-mt-zh-en",
  280 + "default_scene": "general",
  281 + "capabilities": {
  282 + "deepl": {
  283 + "enabled": True,
  284 + "backend": "deepl",
  285 + "api_url": "https://api.deepl.com/v2/translate",
  286 + "timeout_sec": 10.0,
  287 + "use_cache": True,
  288 + },
  289 + "opus-mt-zh-en": {
  290 + "enabled": True,
  291 + "backend": "local_marian",
  292 + "model_id": "dummy",
  293 + "model_dir": "dummy",
  294 + "device": "cpu",
  295 + "torch_dtype": "float32",
  296 + "batch_size": 8,
  297 + "max_input_length": 16,
  298 + "max_new_tokens": 16,
  299 + "num_beams": 1,
  300 + "use_cache": True,
  301 + },
  302 + },
  303 + "cache": {
  304 + "ttl_seconds": 60,
  305 + "sliding_expiration": True,
  306 + "model_quality_tiers": {"deepl": 100, "opus-mt-zh-en": 40},
  307 + },
  308 + }
  309 +
  310 + service = TranslationService(config)
  311 + fake_cache = _FakeCache()
  312 + fake_cache.storage[("deepl", "en", "商品标题")] = "from-deepl"
  313 + service._translation_cache = fake_cache
  314 +
  315 + out = service.translate("商品标题", target_lang="en", source_lang="zh", model="opus-mt-zh-en")
  316 + assert out == "from-deepl"
  317 + assert translate_calls == []
  318 + assert fake_cache.get_calls == [("deepl", "en", "商品标题")]
  319 +
  320 +
  321 +def test_tiered_cache_reuses_same_tier_peer(monkeypatch):
  322 + """Model A may use cache written under model B when both share the same tier."""
  323 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  324 + translate_calls = []
  325 +
  326 + def _fake_create_backend(self, *, name, backend_type, cfg):
  327 + del self, backend_type, cfg
  328 +
  329 + class _Backend:
  330 + model = name
  331 +
  332 + @property
  333 + def supports_batch(self):
  334 + return True
  335 +
  336 + def translate(self, text, target_lang, source_lang=None, scene=None):
  337 + del target_lang, source_lang, scene
  338 + translate_calls.append((name, text))
  339 + if isinstance(text, list):
  340 + return [f"{name}:{item}" for item in text]
  341 + return f"{name}:{text}"
  342 +
  343 + return _Backend()
  344 +
  345 + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend)
  346 + marian_cap = {
  347 + "enabled": True,
  348 + "backend": "local_marian",
  349 + "model_id": "dummy",
  350 + "model_dir": "dummy",
  351 + "device": "cpu",
  352 + "torch_dtype": "float32",
  353 + "batch_size": 8,
  354 + "max_input_length": 16,
  355 + "max_new_tokens": 16,
  356 + "num_beams": 1,
  357 + "use_cache": True,
  358 + }
  359 + config = {
  360 + "service_url": "http://127.0.0.1:6006",
  361 + "timeout_sec": 10.0,
  362 + "default_model": "opus-mt-en-zh",
  363 + "default_scene": "general",
  364 + "capabilities": {
  365 + "opus-mt-zh-en": dict(marian_cap),
  366 + "opus-mt-en-zh": dict(marian_cap),
  367 + },
  368 + "cache": {
  369 + "ttl_seconds": 60,
  370 + "sliding_expiration": True,
  371 + "model_quality_tiers": {"opus-mt-zh-en": 50, "opus-mt-en-zh": 50},
  372 + },
  373 + }
  374 +
  375 + service = TranslationService(config)
  376 + fake_cache = _FakeCache()
  377 + fake_cache.storage[("opus-mt-zh-en", "en", "hello")] = "from-zh-en"
  378 + service._translation_cache = fake_cache
  379 +
  380 + out = service.translate("hello", target_lang="en", source_lang="zh", model="opus-mt-en-zh")
  381 + assert out == "from-zh-en"
  382 + assert translate_calls == []
  383 + assert fake_cache.get_calls == [
  384 + ("opus-mt-en-zh", "en", "hello"),
  385 + ("opus-mt-zh-en", "en", "hello"),
  386 + ]
  387 +
  388 +
  389 +def test_tiered_cache_switch_off_uses_exact_model_only(monkeypatch):
  390 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  391 + translate_calls = []
  392 +
  393 + def _fake_create_backend(self, *, name, backend_type, cfg):
  394 + del self, backend_type, cfg
  395 +
  396 + class _Backend:
  397 + model = name
  398 +
  399 + @property
  400 + def supports_batch(self):
  401 + return True
  402 +
  403 + def translate(self, text, target_lang, source_lang=None, scene=None):
  404 + del target_lang, source_lang, scene
  405 + translate_calls.append((name, text))
  406 + if isinstance(text, list):
  407 + return [f"{name}:{item}" for item in text]
  408 + return f"{name}:{text}"
  409 +
  410 + return _Backend()
  411 +
  412 + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend)
  413 + config = {
  414 + "service_url": "http://127.0.0.1:6006",
  415 + "timeout_sec": 10.0,
  416 + "default_model": "opus-mt-zh-en",
  417 + "default_scene": "general",
  418 + "capabilities": {
  419 + "deepl": {
  420 + "enabled": True,
  421 + "backend": "deepl",
  422 + "api_url": "https://api.deepl.com/v2/translate",
  423 + "timeout_sec": 10.0,
  424 + "use_cache": True,
  425 + },
  426 + "opus-mt-zh-en": {
  427 + "enabled": True,
  428 + "backend": "local_marian",
  429 + "model_id": "dummy",
  430 + "model_dir": "dummy",
  431 + "device": "cpu",
  432 + "torch_dtype": "float32",
  433 + "batch_size": 8,
  434 + "max_input_length": 16,
  435 + "max_new_tokens": 16,
  436 + "num_beams": 1,
  437 + "use_cache": True,
  438 + },
  439 + },
  440 + "cache": {
  441 + "ttl_seconds": 60,
  442 + "sliding_expiration": True,
  443 + "enable_model_quality_tier_cache": False,
  444 + "model_quality_tiers": {"deepl": 100, "opus-mt-zh-en": 40},
  445 + },
  446 + }
  447 +
  448 + service = TranslationService(config)
  449 + fake_cache = _FakeCache()
  450 + fake_cache.storage[("deepl", "en", "商品标题")] = "from-deepl"
  451 + service._translation_cache = fake_cache
  452 +
  453 + out = service.translate("商品标题", target_lang="en", source_lang="zh", model="opus-mt-zh-en")
  454 + assert out == "opus-mt-zh-en:商品标题"
  455 + assert translate_calls == [("opus-mt-zh-en", "商品标题")]
  456 + assert fake_cache.get_calls == [("opus-mt-zh-en", "en", "商品标题")]
translation/cache.py
@@ -36,7 +36,13 @@ class TranslationCache: @@ -36,7 +36,13 @@ class TranslationCache:
36 digest = hashlib.sha256(text.encode("utf-8")).hexdigest() 36 digest = hashlib.sha256(text.encode("utf-8")).hexdigest()
37 return f"trans:{normalized_model}:{normalized_target_lang}:{text_prefix}{digest}" 37 return f"trans:{normalized_model}:{normalized_target_lang}:{text_prefix}{digest}"
38 38
39 - def get(self, *, model: str, target_lang: str, source_text: str) -> Optional[str]: 39 + def get(
  40 + self,
  41 + *,
  42 + model: str,
  43 + target_lang: str,
  44 + source_text: str
  45 + ) -> Optional[str]:
40 if self.redis_client is None: 46 if self.redis_client is None:
41 return None 47 return None
42 key = self.build_key(model=model, target_lang=target_lang, source_text=source_text) 48 key = self.build_key(model=model, target_lang=target_lang, source_text=source_text)
translation/service.py
@@ -3,7 +3,7 @@ @@ -3,7 +3,7 @@
3 from __future__ import annotations 3 from __future__ import annotations
4 4
5 import logging 5 import logging
6 -from typing import Dict, List, Optional 6 +from typing import Dict, List, Optional, Tuple
7 7
8 from config.loader import get_app_config 8 from config.loader import get_app_config
9 from config.schema import AppConfig 9 from config.schema import AppConfig
@@ -15,6 +15,7 @@ from translation.settings import ( @@ -15,6 +15,7 @@ from translation.settings import (
15 get_translation_capability, 15 get_translation_capability,
16 normalize_translation_model, 16 normalize_translation_model,
17 normalize_translation_scene, 17 normalize_translation_scene,
  18 + translation_cache_probe_models,
18 ) 19 )
19 20
20 logger = logging.getLogger(__name__) 21 logger = logging.getLogger(__name__)
@@ -247,7 +248,11 @@ class TranslationService: @@ -247,7 +248,11 @@ class TranslationService:
247 ) -> Optional[str]: 248 ) -> Optional[str]:
248 if not text.strip(): 249 if not text.strip():
249 return text 250 return text
250 - cached = self._translation_cache.get(model=model, target_lang=target_lang, source_text=text) 251 + cached, _served = self._tiered_cache_get(
  252 + request_model=model,
  253 + target_lang=target_lang,
  254 + source_text=text,
  255 + )
251 if cached is not None: 256 if cached is not None:
252 logger.info( 257 logger.info(
253 "Translation cache served | request_type=single text_len=%s", 258 "Translation cache served | request_type=single text_len=%s",
@@ -279,6 +284,30 @@ class TranslationService: @@ -279,6 +284,30 @@ class TranslationService:
279 ) 284 )
280 return translated 285 return translated
281 286
  287 + def _tiered_cache_get(
  288 + self,
  289 + *,
  290 + request_model: str,
  291 + target_lang: str,
  292 + source_text: str,
  293 + ) -> Tuple[Optional[str], Optional[str]]:
  294 + """Redis lookup: cache from higher-tier or **same-tier** models may satisfy A.
  295 +
  296 + Lower-tier entries are never read. Returns ``(translated, served_model)``.
  297 + """
  298 + probe_models = translation_cache_probe_models(self.config, request_model)
  299 +
  300 + for probe_model in probe_models:
  301 + hit = self._translation_cache.get(
  302 + model=probe_model,
  303 + target_lang=target_lang,
  304 + source_text=source_text,
  305 + )
  306 + if hit is not None:
  307 + return hit, probe_model
  308 +
  309 + return None, None
  310 +
282 def _translate_batch_with_cache( 311 def _translate_batch_with_cache(
283 self, 312 self,
284 *, 313 *,
@@ -300,8 +329,8 @@ class TranslationService: @@ -300,8 +329,8 @@ class TranslationService:
300 if not normalized_text.strip(): 329 if not normalized_text.strip():
301 results[idx] = normalized_text 330 results[idx] = normalized_text
302 continue 331 continue
303 - cached = self._translation_cache.get(  
304 - model=model, 332 + cached, _served = self._tiered_cache_get(
  333 + request_model=model,
305 target_lang=target_lang, 334 target_lang=target_lang,
306 source_text=normalized_text, 335 source_text=normalized_text,
307 ) 336 )
translation/settings.py
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 2
3 from __future__ import annotations 3 from __future__ import annotations
4 4
5 -from typing import Any, Dict, List, Mapping, Optional 5 +from typing import Any, Dict, List, Mapping, Optional, Tuple
6 6
7 from translation.scenes import normalize_scene_name 7 from translation.scenes import normalize_scene_name
8 8
@@ -38,6 +38,7 @@ def build_translation_config(raw_cfg: Mapping[str, Any]) -> TranslationConfig: @@ -38,6 +38,7 @@ def build_translation_config(raw_cfg: Mapping[str, Any]) -> TranslationConfig:
38 if not get_enabled_translation_models(config): 38 if not get_enabled_translation_models(config):
39 raise ValueError("At least one translation capability must be enabled") 39 raise ValueError("At least one translation capability must be enabled")
40 40
  41 + _validate_model_quality_tiers(config)
41 return config 42 return config
42 43
43 44
@@ -86,18 +87,107 @@ def get_translation_cache(config: Mapping[str, Any]) -> Dict[str, Any]: @@ -86,18 +87,107 @@ def get_translation_cache(config: Mapping[str, Any]) -> Dict[str, Any]:
86 return dict(cache) 87 return dict(cache)
87 88
88 89
  90 +def translation_cache_probe_models(config: Mapping[str, Any], request_model: str) -> List[str]:
  91 + """Redis cache key models to try.
  92 +
  93 + Sort order: (1) **tier** descending (higher quality first); (2) within the same tier,
  94 + the **request model** before other peers; (3) remaining ties by model name.
  95 +
  96 + For a request to model A with tier T, probes every configured model whose tier is
  97 + **greater than or equal to** T. Lower tiers are never used.
  98 +
  99 + When ``enable_model_quality_tier_cache`` is false, only the request model is probed.
  100 +
  101 + When ``model_quality_tiers`` is empty or ``request_model`` is not listed, only the
  102 + request model is probed (legacy exact-match behavior).
  103 + """
  104 + rm = str(request_model or "").strip().lower()
  105 + cache = config.get("cache")
  106 + if not isinstance(cache, Mapping):
  107 + return [rm]
  108 + if not bool(cache.get("enable_model_quality_tier_cache", True)):
  109 + return [rm]
  110 + tiers = cache.get("model_quality_tiers")
  111 + if not isinstance(tiers, Mapping) or not tiers:
  112 + return [rm]
  113 + if rm not in tiers:
  114 + return [rm]
  115 + threshold = int(tiers[rm])
  116 + scored: List[Tuple[int, str]] = []
  117 + for name, tier_val in tiers.items():
  118 + n = str(name).strip().lower()
  119 + t = int(tier_val)
  120 + if t >= threshold:
  121 + scored.append((t, n))
  122 + scored.sort(
  123 + key=lambda item: (
  124 + -item[0],
  125 + 0 if item[1] == rm else 1,
  126 + item[1],
  127 + )
  128 + )
  129 + out: List[str] = []
  130 + seen: set[str] = set()
  131 + for _t, n in scored:
  132 + if n not in seen:
  133 + seen.add(n)
  134 + out.append(n)
  135 + return out
  136 +
  137 +
89 def _build_cache_config(raw_cache: Any) -> Dict[str, Any]: 138 def _build_cache_config(raw_cache: Any) -> Dict[str, Any]:
90 if not isinstance(raw_cache, Mapping): 139 if not isinstance(raw_cache, Mapping):
91 raise ValueError("services.translation.cache must be a mapping") 140 raise ValueError("services.translation.cache must be a mapping")
  141 + if "enable_model_quality_tier_cache" in raw_cache:
  142 + enable_tier_cache = _require_bool(
  143 + raw_cache["enable_model_quality_tier_cache"],
  144 + "services.translation.cache.enable_model_quality_tier_cache",
  145 + )
  146 + else:
  147 + enable_tier_cache = True
92 return { 148 return {
93 "ttl_seconds": _require_positive_int(raw_cache.get("ttl_seconds"), "services.translation.cache.ttl_seconds"), 149 "ttl_seconds": _require_positive_int(raw_cache.get("ttl_seconds"), "services.translation.cache.ttl_seconds"),
94 "sliding_expiration": _require_bool( 150 "sliding_expiration": _require_bool(
95 raw_cache.get("sliding_expiration"), 151 raw_cache.get("sliding_expiration"),
96 "services.translation.cache.sliding_expiration", 152 "services.translation.cache.sliding_expiration",
97 ), 153 ),
  154 + "enable_model_quality_tier_cache": enable_tier_cache,
  155 + "model_quality_tiers": _build_model_quality_tiers(raw_cache.get("model_quality_tiers")),
98 } 156 }
99 157
100 158
  159 +def _build_model_quality_tiers(raw: Any) -> Dict[str, int]:
  160 + if raw is None:
  161 + return {}
  162 + if not isinstance(raw, Mapping):
  163 + raise ValueError("services.translation.cache.model_quality_tiers must be a mapping")
  164 + resolved: Dict[str, int] = {}
  165 + for name, tier_val in raw.items():
  166 + cap = _require_string(name, "services.translation.cache.model_quality_tiers key").lower()
  167 + field = f"services.translation.cache.model_quality_tiers.{cap}"
  168 + resolved[cap] = _require_non_negative_int(tier_val, field)
  169 + return resolved
  170 +
  171 +
  172 +def _validate_model_quality_tiers(config: TranslationConfig) -> None:
  173 + tiers = config["cache"].get("model_quality_tiers")
  174 + if not isinstance(tiers, Mapping) or not tiers:
  175 + return
  176 + caps = config["capabilities"]
  177 + for name in tiers:
  178 + if name not in caps:
  179 + raise ValueError(
  180 + f"services.translation.cache.model_quality_tiers references unknown capability '{name}'"
  181 + )
  182 +
  183 +
  184 +def _require_non_negative_int(value: Any, field_name: str) -> int:
  185 + parsed = _require_int(value, field_name)
  186 + if parsed < 0:
  187 + raise ValueError(f"{field_name} must be >= 0")
  188 + return parsed
  189 +
  190 +
101 def _build_capabilities(raw_capabilities: Any) -> Dict[str, Dict[str, Any]]: 191 def _build_capabilities(raw_capabilities: Any) -> Dict[str, Dict[str, Any]]:
102 if not isinstance(raw_capabilities, Mapping): 192 if not isinstance(raw_capabilities, Mapping):
103 raise ValueError("services.translation.capabilities must be a mapping") 193 raise ValueError("services.translation.capabilities must be a mapping")