42e3aea6
tangwang
tidy
|
1
|
"""
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
2
|
Services configuration - single source for translation, embedding, rerank.
|
42e3aea6
tangwang
tidy
|
3
|
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
4
5
6
|
Translation is modeled as:
- one translator service endpoint used by business callers
- multiple translation capabilities loaded inside the translator service
|
42e3aea6
tangwang
tidy
|
7
8
9
10
11
12
13
14
|
"""
from __future__ import annotations
import os
from dataclasses import dataclass, field
from functools import lru_cache
from pathlib import Path
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
15
|
from typing import Any, Dict, List, Optional
|
42e3aea6
tangwang
tidy
|
16
17
|
import yaml
|
0fd2f875
tangwang
translate
|
18
|
from translation.settings import TranslationConfig, build_translation_config, get_translation_cache
|
42e3aea6
tangwang
tidy
|
19
20
21
22
|
@dataclass
class ServiceConfig:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
23
24
|
"""Config for one capability (embedding/rerank)."""
|
42e3aea6
tangwang
tidy
|
25
26
27
28
|
provider: str
providers: Dict[str, Any] = field(default_factory=dict)
def get_provider_cfg(self) -> Dict[str, Any]:
|
42e3aea6
tangwang
tidy
|
29
30
31
32
|
p = (self.provider or "").strip().lower()
return self.providers.get(p, {}) if isinstance(self.providers, dict) else {}
|
42e3aea6
tangwang
tidy
|
33
|
def _load_services_raw(config_path: Optional[Path] = None) -> Dict[str, Any]:
|
42e3aea6
tangwang
tidy
|
34
35
36
37
|
if config_path is None:
config_path = Path(__file__).parent / "config.yaml"
path = Path(config_path)
if not path.exists():
|
26b910bd
tangwang
refactor service ...
|
38
|
raise FileNotFoundError(f"services config file not found: {path}")
|
42e3aea6
tangwang
tidy
|
39
40
41
|
try:
with open(path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)
|
26b910bd
tangwang
refactor service ...
|
42
43
44
45
46
47
48
49
50
51
|
except Exception as exc:
raise RuntimeError(f"failed to parse services config from {path}: {exc}") from exc
if not isinstance(data, dict):
raise RuntimeError(f"invalid config format in {path}: expected mapping root")
services = data.get("services")
if not isinstance(services, dict):
raise RuntimeError("config.yaml must contain a valid 'services' mapping")
return services
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
52
|
def _resolve_provider_name(env_name: str, config_provider: Any, capability: str) -> str:
|
26b910bd
tangwang
refactor service ...
|
53
54
55
56
57
58
59
|
provider = os.getenv(env_name) or config_provider
if not provider:
raise ValueError(
f"services.{capability}.provider is required "
f"(or set env override {env_name})"
)
return str(provider).strip().lower()
|
42e3aea6
tangwang
tidy
|
60
61
|
|
0fd2f875
tangwang
translate
|
62
|
def _resolve_translation() -> TranslationConfig:
|
42e3aea6
tangwang
tidy
|
63
64
|
raw = _load_services_raw()
cfg = raw.get("translation", {}) if isinstance(raw.get("translation"), dict) else {}
|
0fd2f875
tangwang
translate
|
65
|
return build_translation_config(cfg)
|
42e3aea6
tangwang
tidy
|
66
67
68
69
70
71
72
|
def _resolve_embedding() -> ServiceConfig:
raw = _load_services_raw()
cfg = raw.get("embedding", {}) if isinstance(raw.get("embedding"), dict) else {}
providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {}
|
26b910bd
tangwang
refactor service ...
|
73
74
75
76
|
provider = _resolve_provider_name(
env_name="EMBEDDING_PROVIDER",
config_provider=cfg.get("provider"),
capability="embedding",
|
42e3aea6
tangwang
tidy
|
77
|
)
|
26b910bd
tangwang
refactor service ...
|
78
79
|
if provider != "http":
raise ValueError(f"Unsupported embedding provider: {provider}")
|
42e3aea6
tangwang
tidy
|
80
81
|
env_url = os.getenv("EMBEDDING_SERVICE_URL")
|
7214c2e7
tangwang
mplemented**
|
82
83
84
|
env_text_url = os.getenv("EMBEDDING_TEXT_SERVICE_URL")
env_image_url = os.getenv("EMBEDDING_IMAGE_SERVICE_URL")
if (env_url or env_text_url or env_image_url) and provider == "http":
|
42e3aea6
tangwang
tidy
|
85
86
|
providers = dict(providers)
providers["http"] = dict(providers.get("http", {}))
|
7214c2e7
tangwang
mplemented**
|
87
88
89
90
91
92
|
if env_url:
providers["http"]["base_url"] = env_url.rstrip("/")
if env_text_url:
providers["http"]["text_base_url"] = env_text_url.rstrip("/")
if env_image_url:
providers["http"]["image_base_url"] = env_image_url.rstrip("/")
|
42e3aea6
tangwang
tidy
|
93
94
95
96
97
98
99
100
101
|
return ServiceConfig(provider=provider, providers=providers)
def _resolve_rerank() -> ServiceConfig:
raw = _load_services_raw()
cfg = raw.get("rerank", {}) if isinstance(raw.get("rerank"), dict) else {}
providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {}
|
26b910bd
tangwang
refactor service ...
|
102
103
104
105
|
provider = _resolve_provider_name(
env_name="RERANK_PROVIDER",
config_provider=cfg.get("provider"),
capability="rerank",
|
42e3aea6
tangwang
tidy
|
106
|
)
|
26b910bd
tangwang
refactor service ...
|
107
108
|
if provider != "http":
raise ValueError(f"Unsupported rerank provider: {provider}")
|
42e3aea6
tangwang
tidy
|
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
env_url = os.getenv("RERANKER_SERVICE_URL")
if env_url:
url = env_url.rstrip("/")
if not url.endswith("/rerank"):
url = f"{url}/rerank" if "/rerank" not in url else url
providers = dict(providers)
providers["http"] = dict(providers.get("http", {}))
providers["http"]["base_url"] = url.replace("/rerank", "")
providers["http"]["service_url"] = url
return ServiceConfig(provider=provider, providers=providers)
|
701ae503
tangwang
docs
|
123
|
def get_rerank_backend_config() -> tuple[str, dict]:
|
701ae503
tangwang
docs
|
124
125
126
|
raw = _load_services_raw()
cfg = raw.get("rerank", {}) if isinstance(raw.get("rerank"), dict) else {}
backends = cfg.get("backends", {}) if isinstance(cfg.get("backends"), dict) else {}
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
127
|
name = os.getenv("RERANK_BACKEND") or cfg.get("backend")
|
26b910bd
tangwang
refactor service ...
|
128
129
|
if not name:
raise ValueError("services.rerank.backend is required (or env RERANK_BACKEND)")
|
07cf5a93
tangwang
START_EMBEDDING=...
|
130
131
|
name = str(name).strip().lower()
backend_cfg = backends.get(name, {}) if isinstance(backends.get(name), dict) else {}
|
26b910bd
tangwang
refactor service ...
|
132
133
|
if not backend_cfg:
raise ValueError(f"services.rerank.backends.{name} is required")
|
07cf5a93
tangwang
START_EMBEDDING=...
|
134
135
136
137
|
return name, backend_cfg
def get_embedding_backend_config() -> tuple[str, dict]:
|
07cf5a93
tangwang
START_EMBEDDING=...
|
138
139
140
|
raw = _load_services_raw()
cfg = raw.get("embedding", {}) if isinstance(raw.get("embedding"), dict) else {}
backends = cfg.get("backends", {}) if isinstance(cfg.get("backends"), dict) else {}
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
141
|
name = os.getenv("EMBEDDING_BACKEND") or cfg.get("backend")
|
26b910bd
tangwang
refactor service ...
|
142
143
|
if not name:
raise ValueError("services.embedding.backend is required (or env EMBEDDING_BACKEND)")
|
701ae503
tangwang
docs
|
144
145
|
name = str(name).strip().lower()
backend_cfg = backends.get(name, {}) if isinstance(backends.get(name), dict) else {}
|
26b910bd
tangwang
refactor service ...
|
146
147
|
if not backend_cfg:
raise ValueError(f"services.embedding.backends.{name} is required")
|
701ae503
tangwang
docs
|
148
149
150
|
return name, backend_cfg
|
42e3aea6
tangwang
tidy
|
151
|
@lru_cache(maxsize=1)
|
0fd2f875
tangwang
translate
|
152
|
def get_translation_config() -> TranslationConfig:
|
42e3aea6
tangwang
tidy
|
153
154
155
156
157
|
return _resolve_translation()
@lru_cache(maxsize=1)
def get_embedding_config() -> ServiceConfig:
|
42e3aea6
tangwang
tidy
|
158
159
160
161
162
|
return _resolve_embedding()
@lru_cache(maxsize=1)
def get_rerank_config() -> ServiceConfig:
|
42e3aea6
tangwang
tidy
|
163
164
165
166
|
return _resolve_rerank()
def get_translation_base_url() -> str:
|
0fd2f875
tangwang
translate
|
167
|
return str(get_translation_config()["service_url"])
|
42e3aea6
tangwang
tidy
|
168
169
|
|
d4cadc13
tangwang
翻译重构
|
170
|
def get_translation_cache_config() -> Dict[str, Any]:
|
0fd2f875
tangwang
translate
|
171
|
return get_translation_cache(get_translation_config())
|
d4cadc13
tangwang
翻译重构
|
172
173
|
|
42e3aea6
tangwang
tidy
|
174
|
def get_embedding_base_url() -> str:
|
7214c2e7
tangwang
mplemented**
|
175
176
177
178
179
180
181
|
provider_cfg = get_embedding_config().providers.get("http", {})
base = (
os.getenv("EMBEDDING_SERVICE_URL")
or provider_cfg.get("base_url")
or provider_cfg.get("text_base_url")
or provider_cfg.get("image_base_url")
)
|
26b910bd
tangwang
refactor service ...
|
182
183
|
if not base:
raise ValueError("Embedding HTTP base_url is not configured")
|
42e3aea6
tangwang
tidy
|
184
185
186
|
return str(base).rstrip("/")
|
7214c2e7
tangwang
mplemented**
|
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
def get_embedding_text_base_url() -> str:
provider_cfg = get_embedding_config().providers.get("http", {})
base = (
os.getenv("EMBEDDING_TEXT_SERVICE_URL")
or provider_cfg.get("text_base_url")
or os.getenv("EMBEDDING_SERVICE_URL")
or provider_cfg.get("base_url")
)
if not base:
raise ValueError("Embedding text HTTP base_url is not configured")
return str(base).rstrip("/")
def get_embedding_image_base_url() -> str:
provider_cfg = get_embedding_config().providers.get("http", {})
base = (
os.getenv("EMBEDDING_IMAGE_SERVICE_URL")
or provider_cfg.get("image_base_url")
or os.getenv("EMBEDDING_SERVICE_URL")
or provider_cfg.get("base_url")
)
if not base:
raise ValueError("Embedding image HTTP base_url is not configured")
return str(base).rstrip("/")
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
213
|
def get_rerank_base_url() -> str:
|
42e3aea6
tangwang
tidy
|
214
215
216
217
|
base = (
os.getenv("RERANKER_SERVICE_URL")
or get_rerank_config().providers.get("http", {}).get("service_url")
or get_rerank_config().providers.get("http", {}).get("base_url")
|
42e3aea6
tangwang
tidy
|
218
|
)
|
26b910bd
tangwang
refactor service ...
|
219
|
if not base:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
220
221
|
raise ValueError("Rerank HTTP base_url is not configured")
return str(base).rstrip("/")
|
42e3aea6
tangwang
tidy
|
222
223
|
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
224
225
226
|
def get_rerank_service_url() -> str:
"""Backward-compatible alias."""
return get_rerank_base_url()
|