42e3aea6
tangwang
tidy
|
1
|
"""
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
2
|
Services configuration - single source for translation, embedding, rerank.
|
42e3aea6
tangwang
tidy
|
3
|
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
4
5
6
|
Translation is modeled as:
- one translator service endpoint used by business callers
- multiple translation capabilities loaded inside the translator service
|
42e3aea6
tangwang
tidy
|
7
8
9
10
11
12
13
14
|
"""
from __future__ import annotations
import os
from dataclasses import dataclass, field
from functools import lru_cache
from pathlib import Path
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
15
|
from typing import Any, Dict, List, Optional
|
42e3aea6
tangwang
tidy
|
16
17
|
import yaml
|
0fd2f875
tangwang
translate
|
18
|
from translation.settings import TranslationConfig, build_translation_config, get_translation_cache
|
42e3aea6
tangwang
tidy
|
19
20
21
22
|
@dataclass
class ServiceConfig:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
23
24
|
"""Config for one capability (embedding/rerank)."""
|
42e3aea6
tangwang
tidy
|
25
26
27
28
|
provider: str
providers: Dict[str, Any] = field(default_factory=dict)
def get_provider_cfg(self) -> Dict[str, Any]:
|
42e3aea6
tangwang
tidy
|
29
30
31
32
|
p = (self.provider or "").strip().lower()
return self.providers.get(p, {}) if isinstance(self.providers, dict) else {}
|
42e3aea6
tangwang
tidy
|
33
|
def _load_services_raw(config_path: Optional[Path] = None) -> Dict[str, Any]:
|
42e3aea6
tangwang
tidy
|
34
35
36
37
|
if config_path is None:
config_path = Path(__file__).parent / "config.yaml"
path = Path(config_path)
if not path.exists():
|
26b910bd
tangwang
refactor service ...
|
38
|
raise FileNotFoundError(f"services config file not found: {path}")
|
42e3aea6
tangwang
tidy
|
39
40
41
|
try:
with open(path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)
|
26b910bd
tangwang
refactor service ...
|
42
43
44
45
46
47
48
49
50
51
|
except Exception as exc:
raise RuntimeError(f"failed to parse services config from {path}: {exc}") from exc
if not isinstance(data, dict):
raise RuntimeError(f"invalid config format in {path}: expected mapping root")
services = data.get("services")
if not isinstance(services, dict):
raise RuntimeError("config.yaml must contain a valid 'services' mapping")
return services
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
52
|
def _resolve_provider_name(env_name: str, config_provider: Any, capability: str) -> str:
|
26b910bd
tangwang
refactor service ...
|
53
54
55
56
57
58
59
|
provider = os.getenv(env_name) or config_provider
if not provider:
raise ValueError(
f"services.{capability}.provider is required "
f"(or set env override {env_name})"
)
return str(provider).strip().lower()
|
42e3aea6
tangwang
tidy
|
60
61
|
|
0fd2f875
tangwang
translate
|
62
|
def _resolve_translation() -> TranslationConfig:
|
42e3aea6
tangwang
tidy
|
63
64
|
raw = _load_services_raw()
cfg = raw.get("translation", {}) if isinstance(raw.get("translation"), dict) else {}
|
0fd2f875
tangwang
translate
|
65
|
return build_translation_config(cfg)
|
42e3aea6
tangwang
tidy
|
66
67
68
69
70
71
72
|
def _resolve_embedding() -> ServiceConfig:
raw = _load_services_raw()
cfg = raw.get("embedding", {}) if isinstance(raw.get("embedding"), dict) else {}
providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {}
|
26b910bd
tangwang
refactor service ...
|
73
74
75
76
|
provider = _resolve_provider_name(
env_name="EMBEDDING_PROVIDER",
config_provider=cfg.get("provider"),
capability="embedding",
|
42e3aea6
tangwang
tidy
|
77
|
)
|
26b910bd
tangwang
refactor service ...
|
78
79
|
if provider != "http":
raise ValueError(f"Unsupported embedding provider: {provider}")
|
42e3aea6
tangwang
tidy
|
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
env_url = os.getenv("EMBEDDING_SERVICE_URL")
if env_url and provider == "http":
providers = dict(providers)
providers["http"] = dict(providers.get("http", {}))
providers["http"]["base_url"] = env_url.rstrip("/")
return ServiceConfig(provider=provider, providers=providers)
def _resolve_rerank() -> ServiceConfig:
raw = _load_services_raw()
cfg = raw.get("rerank", {}) if isinstance(raw.get("rerank"), dict) else {}
providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {}
|
26b910bd
tangwang
refactor service ...
|
95
96
97
98
|
provider = _resolve_provider_name(
env_name="RERANK_PROVIDER",
config_provider=cfg.get("provider"),
capability="rerank",
|
42e3aea6
tangwang
tidy
|
99
|
)
|
26b910bd
tangwang
refactor service ...
|
100
101
|
if provider != "http":
raise ValueError(f"Unsupported rerank provider: {provider}")
|
42e3aea6
tangwang
tidy
|
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
env_url = os.getenv("RERANKER_SERVICE_URL")
if env_url:
url = env_url.rstrip("/")
if not url.endswith("/rerank"):
url = f"{url}/rerank" if "/rerank" not in url else url
providers = dict(providers)
providers["http"] = dict(providers.get("http", {}))
providers["http"]["base_url"] = url.replace("/rerank", "")
providers["http"]["service_url"] = url
return ServiceConfig(provider=provider, providers=providers)
|
701ae503
tangwang
docs
|
116
|
def get_rerank_backend_config() -> tuple[str, dict]:
|
701ae503
tangwang
docs
|
117
118
119
|
raw = _load_services_raw()
cfg = raw.get("rerank", {}) if isinstance(raw.get("rerank"), dict) else {}
backends = cfg.get("backends", {}) if isinstance(cfg.get("backends"), dict) else {}
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
120
|
name = os.getenv("RERANK_BACKEND") or cfg.get("backend")
|
26b910bd
tangwang
refactor service ...
|
121
122
|
if not name:
raise ValueError("services.rerank.backend is required (or env RERANK_BACKEND)")
|
07cf5a93
tangwang
START_EMBEDDING=...
|
123
124
|
name = str(name).strip().lower()
backend_cfg = backends.get(name, {}) if isinstance(backends.get(name), dict) else {}
|
26b910bd
tangwang
refactor service ...
|
125
126
|
if not backend_cfg:
raise ValueError(f"services.rerank.backends.{name} is required")
|
07cf5a93
tangwang
START_EMBEDDING=...
|
127
128
129
130
|
return name, backend_cfg
def get_embedding_backend_config() -> tuple[str, dict]:
|
07cf5a93
tangwang
START_EMBEDDING=...
|
131
132
133
|
raw = _load_services_raw()
cfg = raw.get("embedding", {}) if isinstance(raw.get("embedding"), dict) else {}
backends = cfg.get("backends", {}) if isinstance(cfg.get("backends"), dict) else {}
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
134
|
name = os.getenv("EMBEDDING_BACKEND") or cfg.get("backend")
|
26b910bd
tangwang
refactor service ...
|
135
136
|
if not name:
raise ValueError("services.embedding.backend is required (or env EMBEDDING_BACKEND)")
|
701ae503
tangwang
docs
|
137
138
|
name = str(name).strip().lower()
backend_cfg = backends.get(name, {}) if isinstance(backends.get(name), dict) else {}
|
26b910bd
tangwang
refactor service ...
|
139
140
|
if not backend_cfg:
raise ValueError(f"services.embedding.backends.{name} is required")
|
701ae503
tangwang
docs
|
141
142
143
|
return name, backend_cfg
|
42e3aea6
tangwang
tidy
|
144
|
@lru_cache(maxsize=1)
|
0fd2f875
tangwang
translate
|
145
|
def get_translation_config() -> TranslationConfig:
|
42e3aea6
tangwang
tidy
|
146
147
148
149
150
|
return _resolve_translation()
@lru_cache(maxsize=1)
def get_embedding_config() -> ServiceConfig:
|
42e3aea6
tangwang
tidy
|
151
152
153
154
155
|
return _resolve_embedding()
@lru_cache(maxsize=1)
def get_rerank_config() -> ServiceConfig:
|
42e3aea6
tangwang
tidy
|
156
157
158
159
|
return _resolve_rerank()
def get_translation_base_url() -> str:
|
0fd2f875
tangwang
translate
|
160
|
return str(get_translation_config()["service_url"])
|
42e3aea6
tangwang
tidy
|
161
162
|
|
d4cadc13
tangwang
翻译重构
|
163
|
def get_translation_cache_config() -> Dict[str, Any]:
|
0fd2f875
tangwang
translate
|
164
|
return get_translation_cache(get_translation_config())
|
d4cadc13
tangwang
翻译重构
|
165
166
|
|
42e3aea6
tangwang
tidy
|
167
|
def get_embedding_base_url() -> str:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
168
|
base = os.getenv("EMBEDDING_SERVICE_URL") or get_embedding_config().providers.get("http", {}).get("base_url")
|
26b910bd
tangwang
refactor service ...
|
169
170
|
if not base:
raise ValueError("Embedding HTTP base_url is not configured")
|
42e3aea6
tangwang
tidy
|
171
172
173
|
return str(base).rstrip("/")
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
174
|
def get_rerank_base_url() -> str:
|
42e3aea6
tangwang
tidy
|
175
176
177
178
|
base = (
os.getenv("RERANKER_SERVICE_URL")
or get_rerank_config().providers.get("http", {}).get("service_url")
or get_rerank_config().providers.get("http", {}).get("base_url")
|
42e3aea6
tangwang
tidy
|
179
|
)
|
26b910bd
tangwang
refactor service ...
|
180
|
if not base:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
181
182
|
raise ValueError("Rerank HTTP base_url is not configured")
return str(base).rstrip("/")
|
42e3aea6
tangwang
tidy
|
183
184
|
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
185
186
187
|
def get_rerank_service_url() -> str:
"""Backward-compatible alias."""
return get_rerank_base_url()
|