Commit 8140e942c1712fe5f307552183bd2c8df1852a29
1 parent
86d0e83d
translator model priority
Showing
6 changed files
with
406 additions
and
33 deletions
Show diff stats
config/config.yaml
| ... | ... | @@ -209,6 +209,17 @@ services: |
| 209 | 209 | cache: |
| 210 | 210 | ttl_seconds: 62208000 |
| 211 | 211 | sliding_expiration: true |
| 212 | + # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups). | |
| 213 | + enable_model_quality_tier_cache: true | |
| 214 | + # Higher tier = better quality. Multiple models may share one tier (同级). | |
| 215 | + # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers). | |
| 216 | + model_quality_tiers: | |
| 217 | + deepl: 30 | |
| 218 | + qwen-mt: 30 | |
| 219 | + llm: 30 | |
| 220 | + nllb-200-distilled-600m: 20 | |
| 221 | + opus-mt-zh-en: 10 | |
| 222 | + opus-mt-en-zh: 10 | |
| 212 | 223 | capabilities: |
| 213 | 224 | qwen-mt: |
| 214 | 225 | enabled: true | ... | ... |
docs/TODO.txt
| ... | ... | @@ -89,32 +89,6 @@ image_embedding改为,一个spu有多个sku向量,每个向量内部properti |
| 89 | 89 | |
| 90 | 90 | |
| 91 | 91 | |
| 92 | -2026-03-21 10:29:23,698 - elastic_transport.transport - INFO - POST http://localhost:9200/search_products_tenant_163/_search?include_named_queries_score=false [status:200 duration:0.009s] | |
| 93 | -2026-03-21 10:29:23,700 - request_context - INFO - 分页详情回填 | ids=20 | filled=20 | took=7ms | |
| 94 | -2026-03-21 10:29:23,700 - request_context - INFO - 重排分页切片 | from=20, size=20, 返回=20条 | |
| 95 | -2026-03-21 10:29:23,720 - embeddings.text_encoder - ERROR - TextEmbeddingEncoder service request failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 96 | -Traceback (most recent call last): | |
| 97 | - File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service | |
| 98 | - response.raise_for_status() | |
| 99 | - File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status | |
| 100 | - raise HTTPError(http_error_msg, response=self) | |
| 101 | -requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 102 | -2026-03-21 10:29:23,720 - search.searcher - WARNING - Failed to encode SKU option1 values for final-page sorting: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 103 | -Traceback (most recent call last): | |
| 104 | - File "/data/saas-search/search/searcher.py", line 448, in _apply_sku_sorting_for_page_hits | |
| 105 | - encoded_option_vectors = text_encoder.encode(option1_values_to_encode, priority=1) | |
| 106 | - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| 107 | - File "/data/saas-search/embeddings/text_encoder.py", line 112, in encode | |
| 108 | - response_data = self._call_service( | |
| 109 | - ^^^^^^^^^^^^^^^^^^^ | |
| 110 | - File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service | |
| 111 | - response.raise_for_status() | |
| 112 | - File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status | |
| 113 | - raise HTTPError(http_error_msg, response=self) | |
| 114 | -requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 115 | -2026-03-21 10:29:23,721 - request_context - WARNING - SKU option embedding failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 116 | - | |
| 117 | - | |
| 118 | 92 | |
| 119 | 93 | |
| 120 | 94 | 是否需要: | ... | ... |
tests/test_translator_failure_semantics.py
| 1 | 1 | import logging |
| 2 | 2 | |
| 3 | +import pytest | |
| 4 | + | |
| 3 | 5 | from translation.cache import TranslationCache |
| 4 | 6 | from translation.logging_utils import ( |
| 5 | 7 | TranslationRequestFilter, |
| ... | ... | @@ -7,6 +9,7 @@ from translation.logging_utils import ( |
| 7 | 9 | reset_translation_request_id, |
| 8 | 10 | ) |
| 9 | 11 | from translation.service import TranslationService |
| 12 | +from translation.settings import build_translation_config, translation_cache_probe_models | |
| 10 | 13 | |
| 11 | 14 | |
| 12 | 15 | class _FakeCache: |
| ... | ... | @@ -16,7 +19,8 @@ class _FakeCache: |
| 16 | 19 | self.get_calls = [] |
| 17 | 20 | self.set_calls = [] |
| 18 | 21 | |
| 19 | - def get(self, *, model, target_lang, source_text): | |
| 22 | + def get(self, *, model, target_lang, source_text, log_lookup=True): | |
| 23 | + del log_lookup | |
| 20 | 24 | self.get_calls.append((model, target_lang, source_text)) |
| 21 | 25 | return self.storage.get((model, target_lang, source_text)) |
| 22 | 26 | |
| ... | ... | @@ -191,3 +195,262 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog): |
| 191 | 195 | assert route_messages == [ |
| 192 | 196 | "Translation route | backend=llm request_type=single use_cache=True cache_available=False" |
| 193 | 197 | ] |
| 198 | + | |
| 199 | + | |
| 200 | +def test_translation_cache_probe_models_order(): | |
| 201 | + cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}} | |
| 202 | + assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"] | |
| 203 | + assert translation_cache_probe_models(cfg, "mid") == ["high", "mid"] | |
| 204 | + assert translation_cache_probe_models(cfg, "high") == ["high"] | |
| 205 | + assert translation_cache_probe_models(cfg, "unknown") == ["unknown"] | |
| 206 | + | |
| 207 | + | |
| 208 | +def test_translation_cache_probe_models_respects_enable_switch(): | |
| 209 | + cfg = { | |
| 210 | + "cache": { | |
| 211 | + "enable_model_quality_tier_cache": False, | |
| 212 | + "model_quality_tiers": {"peer-a": 50, "peer-b": 50, "top": 100}, | |
| 213 | + } | |
| 214 | + } | |
| 215 | + assert translation_cache_probe_models(cfg, "peer-a") == ["peer-a"] | |
| 216 | + | |
| 217 | + | |
| 218 | +def test_translation_cache_probe_models_same_tier_included(): | |
| 219 | + """Same numeric tier: all peers are probed (higher tier first, then name order).""" | |
| 220 | + cfg = {"cache": {"model_quality_tiers": {"peer-a": 50, "peer-b": 50, "top": 100}}} | |
| 221 | + assert translation_cache_probe_models(cfg, "peer-a") == ["top", "peer-a", "peer-b"] | |
| 222 | + assert translation_cache_probe_models(cfg, "peer-b") == ["top", "peer-b", "peer-a"] | |
| 223 | + | |
| 224 | + | |
| 225 | +def test_model_quality_tiers_unknown_capability_raises(): | |
| 226 | + with pytest.raises(ValueError, match="unknown capability"): | |
| 227 | + build_translation_config( | |
| 228 | + { | |
| 229 | + "service_url": "http://127.0.0.1:6006", | |
| 230 | + "timeout_sec": 10.0, | |
| 231 | + "default_model": "llm", | |
| 232 | + "default_scene": "general", | |
| 233 | + "cache": { | |
| 234 | + "ttl_seconds": 60, | |
| 235 | + "sliding_expiration": True, | |
| 236 | + "model_quality_tiers": {"ghost": 1}, | |
| 237 | + }, | |
| 238 | + "capabilities": { | |
| 239 | + "llm": { | |
| 240 | + "enabled": True, | |
| 241 | + "backend": "llm", | |
| 242 | + "model": "dummy-llm", | |
| 243 | + "base_url": "https://example.com", | |
| 244 | + "timeout_sec": 10.0, | |
| 245 | + "use_cache": True, | |
| 246 | + } | |
| 247 | + }, | |
| 248 | + } | |
| 249 | + ) | |
| 250 | + | |
| 251 | + | |
| 252 | +def test_tiered_cache_reuses_higher_tier_entry(monkeypatch): | |
| 253 | + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None)) | |
| 254 | + translate_calls = [] | |
| 255 | + | |
| 256 | + def _fake_create_backend(self, *, name, backend_type, cfg): | |
| 257 | + del self, backend_type, cfg | |
| 258 | + | |
| 259 | + class _Backend: | |
| 260 | + model = name | |
| 261 | + | |
| 262 | + @property | |
| 263 | + def supports_batch(self): | |
| 264 | + return True | |
| 265 | + | |
| 266 | + def translate(self, text, target_lang, source_lang=None, scene=None): | |
| 267 | + del target_lang, source_lang, scene | |
| 268 | + translate_calls.append((name, text)) | |
| 269 | + if isinstance(text, list): | |
| 270 | + return [f"{name}:{item}" for item in text] | |
| 271 | + return f"{name}:{text}" | |
| 272 | + | |
| 273 | + return _Backend() | |
| 274 | + | |
| 275 | + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend) | |
| 276 | + config = { | |
| 277 | + "service_url": "http://127.0.0.1:6006", | |
| 278 | + "timeout_sec": 10.0, | |
| 279 | + "default_model": "opus-mt-zh-en", | |
| 280 | + "default_scene": "general", | |
| 281 | + "capabilities": { | |
| 282 | + "deepl": { | |
| 283 | + "enabled": True, | |
| 284 | + "backend": "deepl", | |
| 285 | + "api_url": "https://api.deepl.com/v2/translate", | |
| 286 | + "timeout_sec": 10.0, | |
| 287 | + "use_cache": True, | |
| 288 | + }, | |
| 289 | + "opus-mt-zh-en": { | |
| 290 | + "enabled": True, | |
| 291 | + "backend": "local_marian", | |
| 292 | + "model_id": "dummy", | |
| 293 | + "model_dir": "dummy", | |
| 294 | + "device": "cpu", | |
| 295 | + "torch_dtype": "float32", | |
| 296 | + "batch_size": 8, | |
| 297 | + "max_input_length": 16, | |
| 298 | + "max_new_tokens": 16, | |
| 299 | + "num_beams": 1, | |
| 300 | + "use_cache": True, | |
| 301 | + }, | |
| 302 | + }, | |
| 303 | + "cache": { | |
| 304 | + "ttl_seconds": 60, | |
| 305 | + "sliding_expiration": True, | |
| 306 | + "model_quality_tiers": {"deepl": 100, "opus-mt-zh-en": 40}, | |
| 307 | + }, | |
| 308 | + } | |
| 309 | + | |
| 310 | + service = TranslationService(config) | |
| 311 | + fake_cache = _FakeCache() | |
| 312 | + fake_cache.storage[("deepl", "en", "商品标题")] = "from-deepl" | |
| 313 | + service._translation_cache = fake_cache | |
| 314 | + | |
| 315 | + out = service.translate("商品标题", target_lang="en", source_lang="zh", model="opus-mt-zh-en") | |
| 316 | + assert out == "from-deepl" | |
| 317 | + assert translate_calls == [] | |
| 318 | + assert fake_cache.get_calls == [("deepl", "en", "商品标题")] | |
| 319 | + | |
| 320 | + | |
| 321 | +def test_tiered_cache_reuses_same_tier_peer(monkeypatch): | |
| 322 | + """Model A may use cache written under model B when both share the same tier.""" | |
| 323 | + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None)) | |
| 324 | + translate_calls = [] | |
| 325 | + | |
| 326 | + def _fake_create_backend(self, *, name, backend_type, cfg): | |
| 327 | + del self, backend_type, cfg | |
| 328 | + | |
| 329 | + class _Backend: | |
| 330 | + model = name | |
| 331 | + | |
| 332 | + @property | |
| 333 | + def supports_batch(self): | |
| 334 | + return True | |
| 335 | + | |
| 336 | + def translate(self, text, target_lang, source_lang=None, scene=None): | |
| 337 | + del target_lang, source_lang, scene | |
| 338 | + translate_calls.append((name, text)) | |
| 339 | + if isinstance(text, list): | |
| 340 | + return [f"{name}:{item}" for item in text] | |
| 341 | + return f"{name}:{text}" | |
| 342 | + | |
| 343 | + return _Backend() | |
| 344 | + | |
| 345 | + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend) | |
| 346 | + marian_cap = { | |
| 347 | + "enabled": True, | |
| 348 | + "backend": "local_marian", | |
| 349 | + "model_id": "dummy", | |
| 350 | + "model_dir": "dummy", | |
| 351 | + "device": "cpu", | |
| 352 | + "torch_dtype": "float32", | |
| 353 | + "batch_size": 8, | |
| 354 | + "max_input_length": 16, | |
| 355 | + "max_new_tokens": 16, | |
| 356 | + "num_beams": 1, | |
| 357 | + "use_cache": True, | |
| 358 | + } | |
| 359 | + config = { | |
| 360 | + "service_url": "http://127.0.0.1:6006", | |
| 361 | + "timeout_sec": 10.0, | |
| 362 | + "default_model": "opus-mt-en-zh", | |
| 363 | + "default_scene": "general", | |
| 364 | + "capabilities": { | |
| 365 | + "opus-mt-zh-en": dict(marian_cap), | |
| 366 | + "opus-mt-en-zh": dict(marian_cap), | |
| 367 | + }, | |
| 368 | + "cache": { | |
| 369 | + "ttl_seconds": 60, | |
| 370 | + "sliding_expiration": True, | |
| 371 | + "model_quality_tiers": {"opus-mt-zh-en": 50, "opus-mt-en-zh": 50}, | |
| 372 | + }, | |
| 373 | + } | |
| 374 | + | |
| 375 | + service = TranslationService(config) | |
| 376 | + fake_cache = _FakeCache() | |
| 377 | + fake_cache.storage[("opus-mt-zh-en", "en", "hello")] = "from-zh-en" | |
| 378 | + service._translation_cache = fake_cache | |
| 379 | + | |
| 380 | + out = service.translate("hello", target_lang="en", source_lang="zh", model="opus-mt-en-zh") | |
| 381 | + assert out == "from-zh-en" | |
| 382 | + assert translate_calls == [] | |
| 383 | + assert fake_cache.get_calls == [ | |
| 384 | + ("opus-mt-en-zh", "en", "hello"), | |
| 385 | + ("opus-mt-zh-en", "en", "hello"), | |
| 386 | + ] | |
| 387 | + | |
| 388 | + | |
| 389 | +def test_tiered_cache_switch_off_uses_exact_model_only(monkeypatch): | |
| 390 | + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None)) | |
| 391 | + translate_calls = [] | |
| 392 | + | |
| 393 | + def _fake_create_backend(self, *, name, backend_type, cfg): | |
| 394 | + del self, backend_type, cfg | |
| 395 | + | |
| 396 | + class _Backend: | |
| 397 | + model = name | |
| 398 | + | |
| 399 | + @property | |
| 400 | + def supports_batch(self): | |
| 401 | + return True | |
| 402 | + | |
| 403 | + def translate(self, text, target_lang, source_lang=None, scene=None): | |
| 404 | + del target_lang, source_lang, scene | |
| 405 | + translate_calls.append((name, text)) | |
| 406 | + if isinstance(text, list): | |
| 407 | + return [f"{name}:{item}" for item in text] | |
| 408 | + return f"{name}:{text}" | |
| 409 | + | |
| 410 | + return _Backend() | |
| 411 | + | |
| 412 | + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend) | |
| 413 | + config = { | |
| 414 | + "service_url": "http://127.0.0.1:6006", | |
| 415 | + "timeout_sec": 10.0, | |
| 416 | + "default_model": "opus-mt-zh-en", | |
| 417 | + "default_scene": "general", | |
| 418 | + "capabilities": { | |
| 419 | + "deepl": { | |
| 420 | + "enabled": True, | |
| 421 | + "backend": "deepl", | |
| 422 | + "api_url": "https://api.deepl.com/v2/translate", | |
| 423 | + "timeout_sec": 10.0, | |
| 424 | + "use_cache": True, | |
| 425 | + }, | |
| 426 | + "opus-mt-zh-en": { | |
| 427 | + "enabled": True, | |
| 428 | + "backend": "local_marian", | |
| 429 | + "model_id": "dummy", | |
| 430 | + "model_dir": "dummy", | |
| 431 | + "device": "cpu", | |
| 432 | + "torch_dtype": "float32", | |
| 433 | + "batch_size": 8, | |
| 434 | + "max_input_length": 16, | |
| 435 | + "max_new_tokens": 16, | |
| 436 | + "num_beams": 1, | |
| 437 | + "use_cache": True, | |
| 438 | + }, | |
| 439 | + }, | |
| 440 | + "cache": { | |
| 441 | + "ttl_seconds": 60, | |
| 442 | + "sliding_expiration": True, | |
| 443 | + "enable_model_quality_tier_cache": False, | |
| 444 | + "model_quality_tiers": {"deepl": 100, "opus-mt-zh-en": 40}, | |
| 445 | + }, | |
| 446 | + } | |
| 447 | + | |
| 448 | + service = TranslationService(config) | |
| 449 | + fake_cache = _FakeCache() | |
| 450 | + fake_cache.storage[("deepl", "en", "商品标题")] = "from-deepl" | |
| 451 | + service._translation_cache = fake_cache | |
| 452 | + | |
| 453 | + out = service.translate("商品标题", target_lang="en", source_lang="zh", model="opus-mt-zh-en") | |
| 454 | + assert out == "opus-mt-zh-en:商品标题" | |
| 455 | + assert translate_calls == [("opus-mt-zh-en", "商品标题")] | |
| 456 | + assert fake_cache.get_calls == [("opus-mt-zh-en", "en", "商品标题")] | ... | ... |
translation/cache.py
| ... | ... | @@ -36,7 +36,13 @@ class TranslationCache: |
| 36 | 36 | digest = hashlib.sha256(text.encode("utf-8")).hexdigest() |
| 37 | 37 | return f"trans:{normalized_model}:{normalized_target_lang}:{text_prefix}{digest}" |
| 38 | 38 | |
| 39 | - def get(self, *, model: str, target_lang: str, source_text: str) -> Optional[str]: | |
| 39 | + def get( | |
| 40 | + self, | |
| 41 | + *, | |
| 42 | + model: str, | |
| 43 | + target_lang: str, | |
| 44 | + source_text: str | |
| 45 | + ) -> Optional[str]: | |
| 40 | 46 | if self.redis_client is None: |
| 41 | 47 | return None |
| 42 | 48 | key = self.build_key(model=model, target_lang=target_lang, source_text=source_text) | ... | ... |
translation/service.py
| ... | ... | @@ -3,7 +3,7 @@ |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | 5 | import logging |
| 6 | -from typing import Dict, List, Optional | |
| 6 | +from typing import Dict, List, Optional, Tuple | |
| 7 | 7 | |
| 8 | 8 | from config.loader import get_app_config |
| 9 | 9 | from config.schema import AppConfig |
| ... | ... | @@ -15,6 +15,7 @@ from translation.settings import ( |
| 15 | 15 | get_translation_capability, |
| 16 | 16 | normalize_translation_model, |
| 17 | 17 | normalize_translation_scene, |
| 18 | + translation_cache_probe_models, | |
| 18 | 19 | ) |
| 19 | 20 | |
| 20 | 21 | logger = logging.getLogger(__name__) |
| ... | ... | @@ -247,7 +248,11 @@ class TranslationService: |
| 247 | 248 | ) -> Optional[str]: |
| 248 | 249 | if not text.strip(): |
| 249 | 250 | return text |
| 250 | - cached = self._translation_cache.get(model=model, target_lang=target_lang, source_text=text) | |
| 251 | + cached, _served = self._tiered_cache_get( | |
| 252 | + request_model=model, | |
| 253 | + target_lang=target_lang, | |
| 254 | + source_text=text, | |
| 255 | + ) | |
| 251 | 256 | if cached is not None: |
| 252 | 257 | logger.info( |
| 253 | 258 | "Translation cache served | request_type=single text_len=%s", |
| ... | ... | @@ -279,6 +284,30 @@ class TranslationService: |
| 279 | 284 | ) |
| 280 | 285 | return translated |
| 281 | 286 | |
| 287 | + def _tiered_cache_get( | |
| 288 | + self, | |
| 289 | + *, | |
| 290 | + request_model: str, | |
| 291 | + target_lang: str, | |
| 292 | + source_text: str, | |
| 293 | + ) -> Tuple[Optional[str], Optional[str]]: | |
| 294 | + """Redis lookup: cache from higher-tier or **same-tier** models may satisfy A. | |
| 295 | + | |
| 296 | + Lower-tier entries are never read. Returns ``(translated, served_model)``. | |
| 297 | + """ | |
| 298 | + probe_models = translation_cache_probe_models(self.config, request_model) | |
| 299 | + | |
| 300 | + for probe_model in probe_models: | |
| 301 | + hit = self._translation_cache.get( | |
| 302 | + model=probe_model, | |
| 303 | + target_lang=target_lang, | |
| 304 | + source_text=source_text, | |
| 305 | + ) | |
| 306 | + if hit is not None: | |
| 307 | + return hit, probe_model | |
| 308 | + | |
| 309 | + return None, None | |
| 310 | + | |
| 282 | 311 | def _translate_batch_with_cache( |
| 283 | 312 | self, |
| 284 | 313 | *, |
| ... | ... | @@ -300,8 +329,8 @@ class TranslationService: |
| 300 | 329 | if not normalized_text.strip(): |
| 301 | 330 | results[idx] = normalized_text |
| 302 | 331 | continue |
| 303 | - cached = self._translation_cache.get( | |
| 304 | - model=model, | |
| 332 | + cached, _served = self._tiered_cache_get( | |
| 333 | + request_model=model, | |
| 305 | 334 | target_lang=target_lang, |
| 306 | 335 | source_text=normalized_text, |
| 307 | 336 | ) | ... | ... |
translation/settings.py
| ... | ... | @@ -2,7 +2,7 @@ |
| 2 | 2 | |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | -from typing import Any, Dict, List, Mapping, Optional | |
| 5 | +from typing import Any, Dict, List, Mapping, Optional, Tuple | |
| 6 | 6 | |
| 7 | 7 | from translation.scenes import normalize_scene_name |
| 8 | 8 | |
| ... | ... | @@ -38,6 +38,7 @@ def build_translation_config(raw_cfg: Mapping[str, Any]) -> TranslationConfig: |
| 38 | 38 | if not get_enabled_translation_models(config): |
| 39 | 39 | raise ValueError("At least one translation capability must be enabled") |
| 40 | 40 | |
| 41 | + _validate_model_quality_tiers(config) | |
| 41 | 42 | return config |
| 42 | 43 | |
| 43 | 44 | |
| ... | ... | @@ -86,18 +87,107 @@ def get_translation_cache(config: Mapping[str, Any]) -> Dict[str, Any]: |
| 86 | 87 | return dict(cache) |
| 87 | 88 | |
| 88 | 89 | |
| 90 | +def translation_cache_probe_models(config: Mapping[str, Any], request_model: str) -> List[str]: | |
| 91 | + """Redis cache key models to try. | |
| 92 | + | |
| 93 | + Sort order: (1) **tier** descending (higher quality first); (2) within the same tier, | |
| 94 | + the **request model** before other peers; (3) remaining ties by model name. | |
| 95 | + | |
| 96 | + For a request to model A with tier T, probes every configured model whose tier is | |
| 97 | + **greater than or equal to** T. Lower tiers are never used. | |
| 98 | + | |
| 99 | + When ``enable_model_quality_tier_cache`` is false, only the request model is probed. | |
| 100 | + | |
| 101 | + When ``model_quality_tiers`` is empty or ``request_model`` is not listed, only the | |
| 102 | + request model is probed (legacy exact-match behavior). | |
| 103 | + """ | |
| 104 | + rm = str(request_model or "").strip().lower() | |
| 105 | + cache = config.get("cache") | |
| 106 | + if not isinstance(cache, Mapping): | |
| 107 | + return [rm] | |
| 108 | + if not bool(cache.get("enable_model_quality_tier_cache", True)): | |
| 109 | + return [rm] | |
| 110 | + tiers = cache.get("model_quality_tiers") | |
| 111 | + if not isinstance(tiers, Mapping) or not tiers: | |
| 112 | + return [rm] | |
| 113 | + if rm not in tiers: | |
| 114 | + return [rm] | |
| 115 | + threshold = int(tiers[rm]) | |
| 116 | + scored: List[Tuple[int, str]] = [] | |
| 117 | + for name, tier_val in tiers.items(): | |
| 118 | + n = str(name).strip().lower() | |
| 119 | + t = int(tier_val) | |
| 120 | + if t >= threshold: | |
| 121 | + scored.append((t, n)) | |
| 122 | + scored.sort( | |
| 123 | + key=lambda item: ( | |
| 124 | + -item[0], | |
| 125 | + 0 if item[1] == rm else 1, | |
| 126 | + item[1], | |
| 127 | + ) | |
| 128 | + ) | |
| 129 | + out: List[str] = [] | |
| 130 | + seen: set[str] = set() | |
| 131 | + for _t, n in scored: | |
| 132 | + if n not in seen: | |
| 133 | + seen.add(n) | |
| 134 | + out.append(n) | |
| 135 | + return out | |
| 136 | + | |
| 137 | + | |
| 89 | 138 | def _build_cache_config(raw_cache: Any) -> Dict[str, Any]: |
| 90 | 139 | if not isinstance(raw_cache, Mapping): |
| 91 | 140 | raise ValueError("services.translation.cache must be a mapping") |
| 141 | + if "enable_model_quality_tier_cache" in raw_cache: | |
| 142 | + enable_tier_cache = _require_bool( | |
| 143 | + raw_cache["enable_model_quality_tier_cache"], | |
| 144 | + "services.translation.cache.enable_model_quality_tier_cache", | |
| 145 | + ) | |
| 146 | + else: | |
| 147 | + enable_tier_cache = True | |
| 92 | 148 | return { |
| 93 | 149 | "ttl_seconds": _require_positive_int(raw_cache.get("ttl_seconds"), "services.translation.cache.ttl_seconds"), |
| 94 | 150 | "sliding_expiration": _require_bool( |
| 95 | 151 | raw_cache.get("sliding_expiration"), |
| 96 | 152 | "services.translation.cache.sliding_expiration", |
| 97 | 153 | ), |
| 154 | + "enable_model_quality_tier_cache": enable_tier_cache, | |
| 155 | + "model_quality_tiers": _build_model_quality_tiers(raw_cache.get("model_quality_tiers")), | |
| 98 | 156 | } |
| 99 | 157 | |
| 100 | 158 | |
| 159 | +def _build_model_quality_tiers(raw: Any) -> Dict[str, int]: | |
| 160 | + if raw is None: | |
| 161 | + return {} | |
| 162 | + if not isinstance(raw, Mapping): | |
| 163 | + raise ValueError("services.translation.cache.model_quality_tiers must be a mapping") | |
| 164 | + resolved: Dict[str, int] = {} | |
| 165 | + for name, tier_val in raw.items(): | |
| 166 | + cap = _require_string(name, "services.translation.cache.model_quality_tiers key").lower() | |
| 167 | + field = f"services.translation.cache.model_quality_tiers.{cap}" | |
| 168 | + resolved[cap] = _require_non_negative_int(tier_val, field) | |
| 169 | + return resolved | |
| 170 | + | |
| 171 | + | |
| 172 | +def _validate_model_quality_tiers(config: TranslationConfig) -> None: | |
| 173 | + tiers = config["cache"].get("model_quality_tiers") | |
| 174 | + if not isinstance(tiers, Mapping) or not tiers: | |
| 175 | + return | |
| 176 | + caps = config["capabilities"] | |
| 177 | + for name in tiers: | |
| 178 | + if name not in caps: | |
| 179 | + raise ValueError( | |
| 180 | + f"services.translation.cache.model_quality_tiers references unknown capability '{name}'" | |
| 181 | + ) | |
| 182 | + | |
| 183 | + | |
| 184 | +def _require_non_negative_int(value: Any, field_name: str) -> int: | |
| 185 | + parsed = _require_int(value, field_name) | |
| 186 | + if parsed < 0: | |
| 187 | + raise ValueError(f"{field_name} must be >= 0") | |
| 188 | + return parsed | |
| 189 | + | |
| 190 | + | |
| 101 | 191 | def _build_capabilities(raw_capabilities: Any) -> Dict[str, Dict[str, Any]]: |
| 102 | 192 | if not isinstance(raw_capabilities, Mapping): |
| 103 | 193 | raise ValueError("services.translation.capabilities must be a mapping") | ... | ... |