Commit 26b910bd6bb837fc328a2c4f0f45305a0bf31b79
1 parent
54ccf28c
refactor service init and tighten multi-tenant search contracts
Showing
18 changed files
with
332 additions
and
137 deletions
Show diff stats
api/app.py
| ... | ... | @@ -206,7 +206,7 @@ async def startup_event(): |
| 206 | 206 | logger.info("Service initialized successfully") |
| 207 | 207 | except Exception as e: |
| 208 | 208 | logger.error(f"Failed to initialize service: {e}", exc_info=True) |
| 209 | - logger.warning("Service will start but may not function correctly") | |
| 209 | + raise | |
| 210 | 210 | |
| 211 | 211 | |
| 212 | 212 | @app.on_event("shutdown") | ... | ... |
api/routes/admin.py
| ... | ... | @@ -2,10 +2,11 @@ |
| 2 | 2 | Admin API routes for configuration and management. |
| 3 | 3 | """ |
| 4 | 4 | |
| 5 | -from fastapi import APIRouter, HTTPException | |
| 5 | +from fastapi import APIRouter, HTTPException, Request | |
| 6 | 6 | from typing import Dict |
| 7 | 7 | |
| 8 | 8 | from ..models import HealthResponse, ErrorResponse |
| 9 | +from indexer.mapping_generator import get_tenant_index_name | |
| 9 | 10 | |
| 10 | 11 | router = APIRouter(prefix="/admin", tags=["admin"]) |
| 11 | 12 | |
| ... | ... | @@ -57,6 +58,8 @@ async def get_configuration(): |
| 57 | 58 | "spu_enabled": config.spu_config.enabled |
| 58 | 59 | } |
| 59 | 60 | |
| 61 | + except HTTPException: | |
| 62 | + raise | |
| 60 | 63 | except Exception as e: |
| 61 | 64 | raise HTTPException(status_code=500, detail=str(e)) |
| 62 | 65 | |
| ... | ... | @@ -105,29 +108,50 @@ async def get_rewrite_rules(): |
| 105 | 108 | |
| 106 | 109 | |
| 107 | 110 | @router.get("/stats") |
| 108 | -async def get_index_stats(): | |
| 111 | +async def get_index_stats(http_request: Request): | |
| 109 | 112 | """ |
| 110 | 113 | Get index statistics. |
| 111 | 114 | """ |
| 112 | 115 | try: |
| 113 | - from ..app import get_es_client, get_config | |
| 116 | + from urllib.parse import parse_qs | |
| 117 | + from ..app import get_es_client | |
| 114 | 118 | |
| 115 | 119 | es_client = get_es_client() |
| 116 | - config = get_config() | |
| 120 | + | |
| 121 | + tenant_id = http_request.headers.get("X-Tenant-ID") | |
| 122 | + if not tenant_id: | |
| 123 | + query_string = http_request.url.query | |
| 124 | + if query_string: | |
| 125 | + params = parse_qs(query_string) | |
| 126 | + tenant_id = params.get("tenant_id", [None])[0] | |
| 127 | + | |
| 128 | + if not tenant_id: | |
| 129 | + raise HTTPException( | |
| 130 | + status_code=400, | |
| 131 | + detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'", | |
| 132 | + ) | |
| 133 | + | |
| 134 | + index_name = get_tenant_index_name(tenant_id) | |
| 135 | + if not es_client.client.indices.exists(index=index_name): | |
| 136 | + raise HTTPException( | |
| 137 | + status_code=404, | |
| 138 | + detail=f"Tenant index not found: {index_name}", | |
| 139 | + ) | |
| 117 | 140 | |
| 118 | 141 | # Get document count |
| 119 | - doc_count = es_client.count(config.es_index_name) | |
| 142 | + doc_count = es_client.client.count(index=index_name).get("count", 0) | |
| 120 | 143 | |
| 121 | 144 | # Get index size (if available) |
| 122 | 145 | try: |
| 123 | - stats = es_client.client.indices.stats(index=config.es_index_name) | |
| 124 | - size_in_bytes = stats['indices'][config.es_index_name]['total']['store']['size_in_bytes'] | |
| 146 | + stats = es_client.client.indices.stats(index=index_name) | |
| 147 | + size_in_bytes = stats["indices"][index_name]["total"]["store"]["size_in_bytes"] | |
| 125 | 148 | size_mb = size_in_bytes / (1024 * 1024) |
| 126 | - except: | |
| 149 | + except Exception: | |
| 127 | 150 | size_mb = None |
| 128 | 151 | |
| 129 | 152 | return { |
| 130 | - "index_name": config.es_index_name, | |
| 153 | + "tenant_id": str(tenant_id), | |
| 154 | + "index_name": index_name, | |
| 131 | 155 | "document_count": doc_count, |
| 132 | 156 | "size_mb": round(size_mb, 2) if size_mb else None |
| 133 | 157 | } | ... | ... |
api/routes/search.py
| ... | ... | @@ -327,7 +327,6 @@ async def search_suggestions( |
| 327 | 327 | async def instant_search( |
| 328 | 328 | q: str = Query(..., min_length=2, description="搜索查询"), |
| 329 | 329 | size: int = Query(5, ge=1, le=20, description="结果数量"), |
| 330 | - tenant_id: str = Query(..., description="租户ID") | |
| 331 | 330 | ): |
| 332 | 331 | """ |
| 333 | 332 | 即时搜索(Instant Search)。 |
| ... | ... | @@ -336,32 +335,15 @@ async def instant_search( |
| 336 | 335 | - 边输入边搜索,无需点击搜索按钮 |
| 337 | 336 | - 返回简化的搜索结果 |
| 338 | 337 | |
| 339 | - 注意:此功能暂未实现,调用标准搜索接口。 | |
| 340 | - TODO: 优化即时搜索性能 | |
| 341 | - - 添加防抖/节流 | |
| 342 | - - 实现结果缓存 | |
| 343 | - - 简化返回字段 | |
| 338 | + 注意:此功能暂未开放,当前明确返回 501。 | |
| 344 | 339 | """ |
| 345 | - from api.app import get_searcher | |
| 346 | - searcher = get_searcher() | |
| 347 | - | |
| 348 | - result = searcher.search( | |
| 349 | - query=q, | |
| 350 | - tenant_id=tenant_id, | |
| 351 | - size=size, | |
| 352 | - from_=0, | |
| 353 | - sku_filter_dimension=None # Instant search doesn't support SKU filtering | |
| 354 | - ) | |
| 355 | - | |
| 356 | - return SearchResponse( | |
| 357 | - results=result.results, | |
| 358 | - total=result.total, | |
| 359 | - max_score=result.max_score, | |
| 360 | - took_ms=result.took_ms, | |
| 361 | - facets=result.facets, | |
| 362 | - query_info=result.query_info, | |
| 363 | - suggestions=result.suggestions, | |
| 364 | - related_searches=result.related_searches | |
| 340 | + # 明确暴露当前接口尚未完成实现,避免调用不完整逻辑导致隐式运行时错误。 | |
| 341 | + raise HTTPException( | |
| 342 | + status_code=501, | |
| 343 | + detail=( | |
| 344 | + "/search/instant is not implemented yet. " | |
| 345 | + "Use POST /search/ for production traffic." | |
| 346 | + ), | |
| 365 | 347 | ) |
| 366 | 348 | |
| 367 | 349 | ... | ... |
api/translator_app.py
| ... | ... | @@ -187,6 +187,7 @@ async def startup_event(): |
| 187 | 187 | logger.info(f"Translation service ready with default model: {default_model}") |
| 188 | 188 | except Exception as e: |
| 189 | 189 | logger.error(f"Failed to initialize translator: {e}", exc_info=True) |
| 190 | + raise | |
| 190 | 191 | |
| 191 | 192 | |
| 192 | 193 | @app.get("/health") |
| ... | ... | @@ -310,4 +311,3 @@ if __name__ == "__main__": |
| 310 | 311 | port=args.port, |
| 311 | 312 | reload=args.reload |
| 312 | 313 | ) |
| 313 | - | ... | ... |
config/config_loader.py
| ... | ... | @@ -367,6 +367,31 @@ class ConfigLoader: |
| 367 | 367 | f"not in supported languages: {config.query_config.supported_languages}" |
| 368 | 368 | ) |
| 369 | 369 | |
| 370 | + # Validate source_fields tri-state semantics | |
| 371 | + source_fields = config.query_config.source_fields | |
| 372 | + if source_fields is not None: | |
| 373 | + if not isinstance(source_fields, list): | |
| 374 | + errors.append("query_config.source_fields must be null or list[str]") | |
| 375 | + else: | |
| 376 | + for idx, field_name in enumerate(source_fields): | |
| 377 | + if not isinstance(field_name, str) or not field_name.strip(): | |
| 378 | + errors.append( | |
| 379 | + f"query_config.source_fields[{idx}] must be a non-empty string" | |
| 380 | + ) | |
| 381 | + | |
| 382 | + # Validate tenant config shape (default must exist in config) | |
| 383 | + tenant_cfg = config.tenant_config | |
| 384 | + if not isinstance(tenant_cfg, dict): | |
| 385 | + errors.append("tenant_config must be an object") | |
| 386 | + else: | |
| 387 | + default_cfg = tenant_cfg.get("default") | |
| 388 | + if not isinstance(default_cfg, dict): | |
| 389 | + errors.append("tenant_config.default must be configured") | |
| 390 | + else: | |
| 391 | + index_languages = default_cfg.get("index_languages") | |
| 392 | + if not isinstance(index_languages, list) or len(index_languages) == 0: | |
| 393 | + errors.append("tenant_config.default.index_languages must be a non-empty list") | |
| 394 | + | |
| 370 | 395 | return errors |
| 371 | 396 | |
| 372 | 397 | def to_dict(self, config: SearchConfig) -> Dict[str, Any]: | ... | ... |
config/services_config.py
| ... | ... | @@ -2,7 +2,7 @@ |
| 2 | 2 | Services configuration - single source for translation, embedding, rerank providers. |
| 3 | 3 | |
| 4 | 4 | All provider selection and endpoint resolution is centralized here. |
| 5 | -Priority: env vars > config.yaml > defaults. | |
| 5 | +Priority: env vars > config.yaml. | |
| 6 | 6 | """ |
| 7 | 7 | |
| 8 | 8 | from __future__ import annotations |
| ... | ... | @@ -34,14 +34,32 @@ def _load_services_raw(config_path: Optional[Path] = None) -> Dict[str, Any]: |
| 34 | 34 | config_path = Path(__file__).parent / "config.yaml" |
| 35 | 35 | path = Path(config_path) |
| 36 | 36 | if not path.exists(): |
| 37 | - return {} | |
| 37 | + raise FileNotFoundError(f"services config file not found: {path}") | |
| 38 | 38 | try: |
| 39 | 39 | with open(path, "r", encoding="utf-8") as f: |
| 40 | 40 | data = yaml.safe_load(f) |
| 41 | - except Exception: | |
| 42 | - return {} | |
| 43 | - services = data.get("services") if isinstance(data, dict) else {} | |
| 44 | - return services if isinstance(services, dict) else {} | |
| 41 | + except Exception as exc: | |
| 42 | + raise RuntimeError(f"failed to parse services config from {path}: {exc}") from exc | |
| 43 | + if not isinstance(data, dict): | |
| 44 | + raise RuntimeError(f"invalid config format in {path}: expected mapping root") | |
| 45 | + services = data.get("services") | |
| 46 | + if not isinstance(services, dict): | |
| 47 | + raise RuntimeError("config.yaml must contain a valid 'services' mapping") | |
| 48 | + return services | |
| 49 | + | |
| 50 | + | |
| 51 | +def _resolve_provider_name( | |
| 52 | + env_name: str, | |
| 53 | + config_provider: Any, | |
| 54 | + capability: str, | |
| 55 | +) -> str: | |
| 56 | + provider = os.getenv(env_name) or config_provider | |
| 57 | + if not provider: | |
| 58 | + raise ValueError( | |
| 59 | + f"services.{capability}.provider is required " | |
| 60 | + f"(or set env override {env_name})" | |
| 61 | + ) | |
| 62 | + return str(provider).strip().lower() | |
| 45 | 63 | |
| 46 | 64 | |
| 47 | 65 | def _resolve_translation() -> ServiceConfig: |
| ... | ... | @@ -49,12 +67,13 @@ def _resolve_translation() -> ServiceConfig: |
| 49 | 67 | cfg = raw.get("translation", {}) if isinstance(raw.get("translation"), dict) else {} |
| 50 | 68 | providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {} |
| 51 | 69 | |
| 52 | - provider = ( | |
| 53 | - os.getenv("TRANSLATION_PROVIDER") | |
| 54 | - or cfg.get("provider") | |
| 55 | - or "direct" | |
| 70 | + provider = _resolve_provider_name( | |
| 71 | + env_name="TRANSLATION_PROVIDER", | |
| 72 | + config_provider=cfg.get("provider"), | |
| 73 | + capability="translation", | |
| 56 | 74 | ) |
| 57 | - provider = str(provider).strip().lower() | |
| 75 | + if provider not in ("direct", "local", "inprocess", "http", "service"): | |
| 76 | + raise ValueError(f"Unsupported translation provider: {provider}") | |
| 58 | 77 | |
| 59 | 78 | # Env override for http base_url |
| 60 | 79 | env_url = os.getenv("TRANSLATION_SERVICE_URL") |
| ... | ... | @@ -71,12 +90,13 @@ def _resolve_embedding() -> ServiceConfig: |
| 71 | 90 | cfg = raw.get("embedding", {}) if isinstance(raw.get("embedding"), dict) else {} |
| 72 | 91 | providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {} |
| 73 | 92 | |
| 74 | - provider = ( | |
| 75 | - os.getenv("EMBEDDING_PROVIDER") | |
| 76 | - or cfg.get("provider") | |
| 77 | - or "http" | |
| 93 | + provider = _resolve_provider_name( | |
| 94 | + env_name="EMBEDDING_PROVIDER", | |
| 95 | + config_provider=cfg.get("provider"), | |
| 96 | + capability="embedding", | |
| 78 | 97 | ) |
| 79 | - provider = str(provider).strip().lower() | |
| 98 | + if provider != "http": | |
| 99 | + raise ValueError(f"Unsupported embedding provider: {provider}") | |
| 80 | 100 | |
| 81 | 101 | env_url = os.getenv("EMBEDDING_SERVICE_URL") |
| 82 | 102 | if env_url and provider == "http": |
| ... | ... | @@ -92,12 +112,13 @@ def _resolve_rerank() -> ServiceConfig: |
| 92 | 112 | cfg = raw.get("rerank", {}) if isinstance(raw.get("rerank"), dict) else {} |
| 93 | 113 | providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {} |
| 94 | 114 | |
| 95 | - provider = ( | |
| 96 | - os.getenv("RERANK_PROVIDER") | |
| 97 | - or cfg.get("provider") | |
| 98 | - or "http" | |
| 115 | + provider = _resolve_provider_name( | |
| 116 | + env_name="RERANK_PROVIDER", | |
| 117 | + config_provider=cfg.get("provider"), | |
| 118 | + capability="rerank", | |
| 99 | 119 | ) |
| 100 | - provider = str(provider).strip().lower() | |
| 120 | + if provider != "http": | |
| 121 | + raise ValueError(f"Unsupported rerank provider: {provider}") | |
| 101 | 122 | |
| 102 | 123 | env_url = os.getenv("RERANKER_SERVICE_URL") |
| 103 | 124 | if env_url: |
| ... | ... | @@ -124,10 +145,13 @@ def get_rerank_backend_config() -> tuple[str, dict]: |
| 124 | 145 | name = ( |
| 125 | 146 | os.getenv("RERANK_BACKEND") |
| 126 | 147 | or cfg.get("backend") |
| 127 | - or "qwen3_vllm" | |
| 128 | 148 | ) |
| 149 | + if not name: | |
| 150 | + raise ValueError("services.rerank.backend is required (or env RERANK_BACKEND)") | |
| 129 | 151 | name = str(name).strip().lower() |
| 130 | 152 | backend_cfg = backends.get(name, {}) if isinstance(backends.get(name), dict) else {} |
| 153 | + if not backend_cfg: | |
| 154 | + raise ValueError(f"services.rerank.backends.{name} is required") | |
| 131 | 155 | return name, backend_cfg |
| 132 | 156 | |
| 133 | 157 | |
| ... | ... | @@ -143,10 +167,13 @@ def get_embedding_backend_config() -> tuple[str, dict]: |
| 143 | 167 | name = ( |
| 144 | 168 | os.getenv("EMBEDDING_BACKEND") |
| 145 | 169 | or cfg.get("backend") |
| 146 | - or "tei" | |
| 147 | 170 | ) |
| 171 | + if not name: | |
| 172 | + raise ValueError("services.embedding.backend is required (or env EMBEDDING_BACKEND)") | |
| 148 | 173 | name = str(name).strip().lower() |
| 149 | 174 | backend_cfg = backends.get(name, {}) if isinstance(backends.get(name), dict) else {} |
| 175 | + if not backend_cfg: | |
| 176 | + raise ValueError(f"services.embedding.backends.{name} is required") | |
| 150 | 177 | return name, backend_cfg |
| 151 | 178 | |
| 152 | 179 | |
| ... | ... | @@ -173,8 +200,9 @@ def get_translation_base_url() -> str: |
| 173 | 200 | base = ( |
| 174 | 201 | os.getenv("TRANSLATION_SERVICE_URL") |
| 175 | 202 | or get_translation_config().providers.get("http", {}).get("base_url") |
| 176 | - or "http://127.0.0.1:6006" | |
| 177 | 203 | ) |
| 204 | + if not base: | |
| 205 | + raise ValueError("Translation HTTP base_url is not configured") | |
| 178 | 206 | return str(base).rstrip("/") |
| 179 | 207 | |
| 180 | 208 | |
| ... | ... | @@ -183,8 +211,9 @@ def get_embedding_base_url() -> str: |
| 183 | 211 | base = ( |
| 184 | 212 | os.getenv("EMBEDDING_SERVICE_URL") |
| 185 | 213 | or get_embedding_config().providers.get("http", {}).get("base_url") |
| 186 | - or "http://127.0.0.1:6005" | |
| 187 | 214 | ) |
| 215 | + if not base: | |
| 216 | + raise ValueError("Embedding HTTP base_url is not configured") | |
| 188 | 217 | return str(base).rstrip("/") |
| 189 | 218 | |
| 190 | 219 | |
| ... | ... | @@ -194,8 +223,9 @@ def get_rerank_service_url() -> str: |
| 194 | 223 | os.getenv("RERANKER_SERVICE_URL") |
| 195 | 224 | or get_rerank_config().providers.get("http", {}).get("service_url") |
| 196 | 225 | or get_rerank_config().providers.get("http", {}).get("base_url") |
| 197 | - or "http://127.0.0.1:6007" | |
| 198 | 226 | ) |
| 227 | + if not base: | |
| 228 | + raise ValueError("Rerank HTTP service_url/base_url is not configured") | |
| 199 | 229 | base = str(base).rstrip("/") |
| 200 | 230 | return base if base.endswith("/rerank") else f"{base}/rerank" |
| 201 | 231 | ... | ... |
config/tenant_config_loader.py
| ... | ... | @@ -51,18 +51,16 @@ SUPPORTED_INDEX_LANGUAGES: Dict[str, str] = { |
| 51 | 51 | "bg": "Bulgarian", |
| 52 | 52 | } |
| 53 | 53 | |
| 54 | -DEFAULT_INDEX_LANGUAGES: List[str] = ["en", "zh"] | |
| 55 | - | |
| 56 | - | |
| 57 | 54 | def normalize_index_languages(value: Any, primary_language: str = "en") -> List[str]: |
| 58 | 55 | """ |
| 59 | 56 | 将 index_languages 配置规范化为合法语言代码列表。 |
| 60 | - None 或空时返回 DEFAULT_INDEX_LANGUAGES。 | |
| 57 | + 仅做规范化,不做默认值兜底。 | |
| 61 | 58 | """ |
| 59 | + del primary_language | |
| 62 | 60 | if value is None: |
| 63 | - return list(DEFAULT_INDEX_LANGUAGES) | |
| 61 | + return [] | |
| 64 | 62 | if not isinstance(value, (list, tuple)): |
| 65 | - return list(DEFAULT_INDEX_LANGUAGES) | |
| 63 | + return [] | |
| 66 | 64 | valid: List[str] = [] |
| 67 | 65 | seen: set = set() |
| 68 | 66 | for item in value: |
| ... | ... | @@ -72,19 +70,23 @@ def normalize_index_languages(value: Any, primary_language: str = "en") -> List[ |
| 72 | 70 | if code in SUPPORTED_INDEX_LANGUAGES: |
| 73 | 71 | valid.append(code) |
| 74 | 72 | seen.add(code) |
| 75 | - return valid if valid else list(DEFAULT_INDEX_LANGUAGES) | |
| 73 | + return valid | |
| 76 | 74 | |
| 77 | 75 | |
| 78 | -def resolve_index_languages(tenant_config: Dict[str, Any]) -> List[str]: | |
| 76 | +def resolve_index_languages( | |
| 77 | + tenant_config: Dict[str, Any], | |
| 78 | + default_index_languages: List[str], | |
| 79 | +) -> List[str]: | |
| 79 | 80 | """ |
| 80 | 81 | 从租户配置解析 index_languages。 |
| 81 | 82 | 若存在 index_languages 则用之;否则按旧配置 translate_to_en / translate_to_zh 推导。 |
| 82 | 83 | """ |
| 83 | 84 | if "index_languages" in tenant_config: |
| 84 | - return normalize_index_languages( | |
| 85 | + normalized = normalize_index_languages( | |
| 85 | 86 | tenant_config["index_languages"], |
| 86 | 87 | tenant_config.get("primary_language") or "en", |
| 87 | 88 | ) |
| 89 | + return normalized if normalized else list(default_index_languages) | |
| 88 | 90 | primary = (tenant_config.get("primary_language") or "en").strip().lower() |
| 89 | 91 | to_en = bool(tenant_config.get("translate_to_en")) |
| 90 | 92 | to_zh = bool(tenant_config.get("translate_to_zh")) |
| ... | ... | @@ -95,7 +97,7 @@ def resolve_index_languages(tenant_config: Dict[str, Any]) -> List[str]: |
| 95 | 97 | if code not in langs and ((code == "en" and to_en) or (code == "zh" and to_zh)): |
| 96 | 98 | if code in SUPPORTED_INDEX_LANGUAGES: |
| 97 | 99 | langs.append(code) |
| 98 | - return langs if langs else list(DEFAULT_INDEX_LANGUAGES) | |
| 100 | + return langs if langs else list(default_index_languages) | |
| 99 | 101 | |
| 100 | 102 | |
| 101 | 103 | class TenantConfigLoader: |
| ... | ... | @@ -117,18 +119,43 @@ class TenantConfigLoader: |
| 117 | 119 | |
| 118 | 120 | try: |
| 119 | 121 | from config import ConfigLoader |
| 122 | + | |
| 120 | 123 | config_loader = ConfigLoader() |
| 121 | 124 | search_config = config_loader.load_config() |
| 122 | - self._config = search_config.tenant_config | |
| 125 | + tenant_cfg = search_config.tenant_config | |
| 126 | + if not isinstance(tenant_cfg, dict): | |
| 127 | + raise RuntimeError("tenant_config must be an object") | |
| 128 | + | |
| 129 | + default_cfg = tenant_cfg.get("default") | |
| 130 | + if not isinstance(default_cfg, dict): | |
| 131 | + raise RuntimeError("tenant_config.default must be configured in config.yaml") | |
| 132 | + default_primary = (default_cfg.get("primary_language") or "en").strip().lower() | |
| 133 | + default_index_languages = normalize_index_languages( | |
| 134 | + default_cfg.get("index_languages"), | |
| 135 | + default_primary, | |
| 136 | + ) | |
| 137 | + if not default_index_languages: | |
| 138 | + raise RuntimeError( | |
| 139 | + "tenant_config.default.index_languages must include at least one supported language" | |
| 140 | + ) | |
| 141 | + | |
| 142 | + tenants_cfg = tenant_cfg.get("tenants", {}) | |
| 143 | + if not isinstance(tenants_cfg, dict): | |
| 144 | + raise RuntimeError("tenant_config.tenants must be an object") | |
| 145 | + | |
| 146 | + normalized_default = dict(default_cfg) | |
| 147 | + normalized_default["primary_language"] = default_primary | |
| 148 | + normalized_default["index_languages"] = default_index_languages | |
| 149 | + | |
| 150 | + self._config = { | |
| 151 | + "default": normalized_default, | |
| 152 | + "tenants": tenants_cfg, | |
| 153 | + } | |
| 123 | 154 | logger.info("Loaded tenant config from unified config.yaml") |
| 124 | 155 | return self._config |
| 125 | 156 | except Exception as e: |
| 126 | 157 | logger.error(f"Failed to load tenant config: {e}", exc_info=True) |
| 127 | - self._config = { | |
| 128 | - "default": {"primary_language": "en", "index_languages": ["en", "zh"]}, | |
| 129 | - "tenants": {}, | |
| 130 | - } | |
| 131 | - return self._config | |
| 158 | + raise | |
| 132 | 159 | |
| 133 | 160 | def get_tenant_config(self, tenant_id: str) -> Dict[str, Any]: |
| 134 | 161 | """ |
| ... | ... | @@ -142,13 +169,20 @@ class TenantConfigLoader: |
| 142 | 169 | """ |
| 143 | 170 | config = self.load_config() |
| 144 | 171 | tenant_id_str = str(tenant_id) |
| 145 | - default = config.get("default", {"primary_language": "en", "index_languages": ["en", "zh"]}) | |
| 172 | + default = config["default"] | |
| 146 | 173 | tenants = config.get("tenants", {}) |
| 147 | - raw = tenants[tenant_id_str] if tenant_id_str in tenants else default | |
| 174 | + raw = tenants[tenant_id_str] if tenant_id_str in tenants else {} | |
| 175 | + if raw and not isinstance(raw, dict): | |
| 176 | + raise RuntimeError(f"tenant_config.tenants.{tenant_id_str} must be an object") | |
| 148 | 177 | if tenant_id_str not in tenants: |
| 149 | 178 | logger.debug(f"Tenant {tenant_id} not found in config, using default") |
| 150 | - out = dict(raw) | |
| 151 | - out["index_languages"] = resolve_index_languages(raw) | |
| 179 | + merged = dict(default) | |
| 180 | + merged.update(raw) | |
| 181 | + out = dict(merged) | |
| 182 | + out["index_languages"] = resolve_index_languages( | |
| 183 | + merged, | |
| 184 | + default_index_languages=default["index_languages"], | |
| 185 | + ) | |
| 152 | 186 | return out |
| 153 | 187 | |
| 154 | 188 | def reload(self): |
| ... | ... | @@ -167,4 +201,3 @@ def get_tenant_config_loader() -> TenantConfigLoader: |
| 167 | 201 | if _tenant_config_loader is None: |
| 168 | 202 | _tenant_config_loader = TenantConfigLoader() |
| 169 | 203 | return _tenant_config_loader |
| 170 | - | ... | ... |
docs/DEVELOPER_GUIDE.md
| ... | ... | @@ -218,9 +218,18 @@ docs/ # 文档(含本指南) |
| 218 | 218 | |
| 219 | 219 | ### 5.7 错误处理 |
| 220 | 220 | |
| 221 | -- 外部能力(翻译、向量、重排)调用失败时应立即报错并中止请求,禁止静默降级;通过日志与监控尽早暴露问题并修复。 | |
| 221 | +- **启动期(fail-fast)**:配置加载、依赖初始化、模型/资源加载失败必须直接启动失败,不允许“继续启动但不可用”。 | |
| 222 | +- **运行期(分层处理)**: | |
| 223 | + - 核心链路(ES 主检索)失败:请求直接失败并返回错误,禁止伪装成空结果。 | |
| 224 | + - 增强链路(翻译/向量/重排)失败:记录 warning,不应拖垮主检索流程。 | |
| 225 | +- 禁止通过“静默回退到另一套实现”掩盖问题;默认行为必须由配置显式给出。 | |
| 222 | 226 | |
| 223 | -### 5.8 环境隔离 | |
| 227 | +### 5.8 启动初始化约束 | |
| 228 | + | |
| 229 | +- 重资源与关键依赖(如 translator、text/image encoder)应在服务启动期初始化一次并复用,避免请求期懒加载。 | |
| 230 | +- 若配置声明启用某能力(例如 GPU 后端),但运行资源不满足,应直接启动失败,不自动降级为其它后端。 | |
| 231 | + | |
| 232 | +### 5.9 环境隔离 | |
| 224 | 233 | |
| 225 | 234 | - 主程序(backend/indexer/frontend)只使用 `.venv`,不引入 `clip-as-service`/`vllm` 的重依赖。 |
| 226 | 235 | - embedding 服务使用 `.venv-embedding`;reranker 服务使用 `.venv-reranker`;CN-CLIP 使用 `.venv-cnclip`;TEI 使用 Docker 容器。 | ... | ... |
docs/QUICKSTART.md
| ... | ... | @@ -102,7 +102,7 @@ curl "http://localhost:6002/search/suggestions?q=玩&size=5" \ |
| 102 | 102 | |
| 103 | 103 | # 健康与统计 |
| 104 | 104 | curl http://localhost:6002/admin/health |
| 105 | -curl http://localhost:6002/admin/stats | |
| 105 | +curl http://localhost:6002/admin/stats -H "X-Tenant-ID: 162" | |
| 106 | 106 | ``` |
| 107 | 107 | |
| 108 | 108 | API 文档:`http://localhost:6002/docs` | ... | ... |
docs/Usage-Guide.md
docs/搜索API对接指南.md
| ... | ... | @@ -139,7 +139,7 @@ curl -X POST "http://43.166.252.75:6002/search/" \ |
| 139 | 139 | |------|------|------|------| |
| 140 | 140 | | 搜索 | POST | `/search/` | 执行搜索查询 | |
| 141 | 141 | | 搜索建议 | GET | `/search/suggestions` | 搜索建议(自动补全/热词,多语言 + 结果直达) | |
| 142 | -| 即时搜索 | GET | `/search/instant` | 边输入边搜索(框架) ⚠️ TODO | | |
| 142 | +| 即时搜索 | GET | `/search/instant` | 即时搜索预留接口(当前返回 `501 Not Implemented`) | | |
| 143 | 143 | | 获取文档 | GET | `/search/{doc_id}` | 获取单个文档 | |
| 144 | 144 | | 全量索引 | POST | `/indexer/reindex` | 全量索引接口(导入数据,不删除索引,仅推荐自测使用) | |
| 145 | 145 | | 增量索引 | POST | `/indexer/index` | 增量索引接口(指定SPU ID列表进行索引,支持自动检测删除和显式删除,仅推荐自测使用) | |
| ... | ... | @@ -149,7 +149,7 @@ curl -X POST "http://43.166.252.75:6002/search/" \ |
| 149 | 149 | | 索引健康检查 | GET | `/indexer/health` | 检查索引服务状态 | |
| 150 | 150 | | 健康检查 | GET | `/admin/health` | 服务健康检查 | |
| 151 | 151 | | 获取配置 | GET | `/admin/config` | 获取租户配置 | |
| 152 | -| 索引统计 | GET | `/admin/stats` | 获取索引统计信息 | | |
| 152 | +| 索引统计 | GET | `/admin/stats` | 获取租户索引统计信息(需 tenant_id) | | |
| 153 | 153 | |
| 154 | 154 | **微服务(独立端口,外部可直连)**: |
| 155 | 155 | |
| ... | ... | @@ -633,10 +633,10 @@ curl "http://localhost:6002/search/suggestions?q=芭&size=5&language=zh&with_res |
| 633 | 633 | |
| 634 | 634 | ### 3.8 即时搜索接口 |
| 635 | 635 | |
| 636 | -> ⚠️ **TODO**: 此接口当前为框架实现,功能暂未实现,调用标准搜索接口。后续需要优化即时搜索性能(添加防抖/节流、实现结果缓存、简化返回字段)。 | |
| 636 | +> ⚠️ 当前版本未开放该能力。接口会明确返回 `501 Not Implemented`,避免误用未完成实现。 | |
| 637 | 637 | |
| 638 | 638 | - **端点**: `GET /search/instant` |
| 639 | -- **描述**: 边输入边搜索,采用轻量参数响应当前输入。底层复用标准搜索能力。 | |
| 639 | +- **描述**: 即时搜索预留端点,后续会在独立实现完成后开放。 | |
| 640 | 640 | |
| 641 | 641 | #### 查询参数 |
| 642 | 642 | |
| ... | ... | @@ -651,6 +651,15 @@ curl "http://localhost:6002/search/suggestions?q=芭&size=5&language=zh&with_res |
| 651 | 651 | curl "http://localhost:6002/search/instant?q=玩具&size=5" |
| 652 | 652 | ``` |
| 653 | 653 | |
| 654 | +#### 当前响应 | |
| 655 | + | |
| 656 | +```json | |
| 657 | +{ | |
| 658 | + "error": "/search/instant is not implemented yet. Use POST /search/ for production traffic.", | |
| 659 | + "status_code": 501 | |
| 660 | +} | |
| 661 | +``` | |
| 662 | + | |
| 654 | 663 | ### 3.9 获取单个文档 |
| 655 | 664 | |
| 656 | 665 | - **端点**: `GET /search/{doc_id}` |
| ... | ... | @@ -1534,11 +1543,13 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ |
| 1534 | 1543 | ### 6.3 索引统计 |
| 1535 | 1544 | |
| 1536 | 1545 | - **端点**: `GET /admin/stats` |
| 1537 | -- **描述**: 获取索引文档数量与磁盘大小,方便监控。 | |
| 1546 | +- **描述**: 获取指定租户索引文档数量与磁盘大小,方便监控。 | |
| 1547 | +- **租户标识**:通过请求头 `X-Tenant-ID` 或 query 参数 `tenant_id` 传递(必填)。 | |
| 1538 | 1548 | |
| 1539 | 1549 | ```json |
| 1540 | 1550 | { |
| 1541 | - "index_name": "search_tenant1", | |
| 1551 | + "tenant_id": "162", | |
| 1552 | + "index_name": "search_products_tenant_162", | |
| 1542 | 1553 | "document_count": 10000, |
| 1543 | 1554 | "size_mb": 523.45 |
| 1544 | 1555 | } | ... | ... |
docs/搜索API速查表.md
| ... | ... | @@ -296,13 +296,13 @@ POST /search/image |
| 296 | 296 | |
| 297 | 297 | GET /search/suggestions?q=芭&size=5&language=zh&with_results=true |
| 298 | 298 | |
| 299 | -GET /search/instant?q=玩具&size=5 | |
| 299 | +GET /search/instant?q=玩具&size=5 # 当前返回 501 Not Implemented | |
| 300 | 300 | |
| 301 | 301 | GET /search/{doc_id} |
| 302 | 302 | |
| 303 | 303 | GET /admin/health |
| 304 | 304 | GET /admin/config |
| 305 | -GET /admin/stats | |
| 305 | +GET /admin/stats # 需 X-Tenant-ID 或 tenant_id | |
| 306 | 306 | ``` |
| 307 | 307 | |
| 308 | 308 | --- | ... | ... |
providers/embedding.py
| ... | ... | @@ -22,15 +22,19 @@ class EmbeddingProvider: |
| 22 | 22 | |
| 23 | 23 | def __init__(self) -> None: |
| 24 | 24 | self._base_url = get_embedding_base_url() |
| 25 | + from embeddings.text_encoder import TextEmbeddingEncoder | |
| 26 | + from embeddings.image_encoder import CLIPImageEncoder | |
| 27 | + | |
| 28 | + # Initialize once; avoid per-access instantiation. | |
| 29 | + self._text_encoder = TextEmbeddingEncoder(service_url=self._base_url) | |
| 30 | + self._image_encoder = CLIPImageEncoder(service_url=self._base_url) | |
| 25 | 31 | |
| 26 | 32 | @property |
| 27 | 33 | def text_encoder(self): |
| 28 | - """Lazy-created text encoder (TextEmbeddingEncoder).""" | |
| 29 | - from embeddings.text_encoder import TextEmbeddingEncoder | |
| 30 | - return TextEmbeddingEncoder(service_url=self._base_url) | |
| 34 | + """Pre-initialized text encoder (TextEmbeddingEncoder).""" | |
| 35 | + return self._text_encoder | |
| 31 | 36 | |
| 32 | 37 | @property |
| 33 | 38 | def image_encoder(self): |
| 34 | - """Lazy-created image encoder (CLIPImageEncoder).""" | |
| 35 | - from embeddings.image_encoder import CLIPImageEncoder | |
| 36 | - return CLIPImageEncoder(service_url=self._base_url) | |
| 39 | + """Pre-initialized image encoder (CLIPImageEncoder).""" | |
| 40 | + return self._image_encoder | ... | ... |
query/query_parser.py
| ... | ... | @@ -85,8 +85,8 @@ class QueryParser: |
| 85 | 85 | |
| 86 | 86 | Args: |
| 87 | 87 | config: SearchConfig instance |
| 88 | - text_encoder: Text embedding encoder (lazy loaded if not provided) | |
| 89 | - translator: Translator instance (lazy loaded if not provided) | |
| 88 | + text_encoder: Text embedding encoder (initialized at startup if not provided) | |
| 89 | + translator: Translator instance (initialized at startup if not provided) | |
| 90 | 90 | """ |
| 91 | 91 | self.config = config |
| 92 | 92 | self._text_encoder = text_encoder |
| ... | ... | @@ -114,22 +114,24 @@ class QueryParser: |
| 114 | 114 | else: |
| 115 | 115 | logger.info("HanLP not installed; using simple tokenizer") |
| 116 | 116 | |
| 117 | + # Eager initialization (startup-time failure visibility, no lazy init in request path) | |
| 118 | + if self.config.query_config.enable_text_embedding and self._text_encoder is None: | |
| 119 | + logger.info("Initializing text encoder at QueryParser construction...") | |
| 120 | + self._text_encoder = TextEmbeddingEncoder() | |
| 121 | + if self._translator is None: | |
| 122 | + from config.services_config import get_translation_config | |
| 123 | + cfg = get_translation_config() | |
| 124 | + logger.info("Initializing translator at QueryParser construction (provider=%s)...", cfg.provider) | |
| 125 | + self._translator = create_translation_provider(self.config.query_config) | |
| 126 | + | |
| 117 | 127 | @property |
| 118 | 128 | def text_encoder(self) -> TextEmbeddingEncoder: |
| 119 | - """Lazy load text encoder.""" | |
| 120 | - if self._text_encoder is None and self.config.query_config.enable_text_embedding: | |
| 121 | - logger.info("Initializing text encoder (lazy load)...") | |
| 122 | - self._text_encoder = TextEmbeddingEncoder() | |
| 129 | + """Return pre-initialized text encoder.""" | |
| 123 | 130 | return self._text_encoder |
| 124 | 131 | |
| 125 | 132 | @property |
| 126 | 133 | def translator(self) -> Any: |
| 127 | - """Lazy load translator.""" | |
| 128 | - if self._translator is None: | |
| 129 | - from config.services_config import get_translation_config | |
| 130 | - cfg = get_translation_config() | |
| 131 | - logger.info("Initializing translator (provider=%s)...", cfg.provider) | |
| 132 | - self._translator = create_translation_provider(self.config.query_config) | |
| 134 | + """Return pre-initialized translator.""" | |
| 133 | 135 | return self._translator |
| 134 | 136 | |
| 135 | 137 | def _simple_tokenize(self, text: str) -> List[str]: |
| ... | ... | @@ -343,6 +345,8 @@ class QueryParser: |
| 343 | 345 | encoding_executor = None |
| 344 | 346 | if should_generate_embedding: |
| 345 | 347 | try: |
| 348 | + if self.text_encoder is None: | |
| 349 | + raise RuntimeError("Text embedding is enabled but text encoder is not initialized") | |
| 346 | 350 | log_debug("Starting query vector generation (async)") |
| 347 | 351 | # Submit encoding task to thread pool for async execution |
| 348 | 352 | encoding_executor = ThreadPoolExecutor(max_workers=1) | ... | ... |
search/es_query_builder.py
| ... | ... | @@ -50,6 +50,22 @@ class ESQueryBuilder: |
| 50 | 50 | self.default_language = default_language |
| 51 | 51 | self.knn_boost = knn_boost |
| 52 | 52 | |
| 53 | + def _apply_source_filter(self, es_query: Dict[str, Any]) -> None: | |
| 54 | + """ | |
| 55 | + Apply tri-state _source semantics: | |
| 56 | + - None: do not set _source (return all source fields) | |
| 57 | + - []: _source=false | |
| 58 | + - [..]: _source.includes=[..] | |
| 59 | + """ | |
| 60 | + if self.source_fields is None: | |
| 61 | + return | |
| 62 | + if not isinstance(self.source_fields, list): | |
| 63 | + raise ValueError("query_config.source_fields must be null or list[str]") | |
| 64 | + if len(self.source_fields) == 0: | |
| 65 | + es_query["_source"] = False | |
| 66 | + return | |
| 67 | + es_query["_source"] = {"includes": self.source_fields} | |
| 68 | + | |
| 53 | 69 | def _split_filters_for_faceting( |
| 54 | 70 | self, |
| 55 | 71 | filters: Optional[Dict[str, Any]], |
| ... | ... | @@ -146,11 +162,8 @@ class ESQueryBuilder: |
| 146 | 162 | "from": from_ |
| 147 | 163 | } |
| 148 | 164 | |
| 149 | - # Add _source filtering if source_fields are configured | |
| 150 | - if self.source_fields: | |
| 151 | - es_query["_source"] = { | |
| 152 | - "includes": self.source_fields | |
| 153 | - } | |
| 165 | + # Add _source filtering with explicit tri-state semantics. | |
| 166 | + self._apply_source_filter(es_query) | |
| 154 | 167 | |
| 155 | 168 | # 1. Build recall queries (text or embedding) |
| 156 | 169 | recall_clauses = [] | ... | ... |
search/searcher.py
| ... | ... | @@ -82,7 +82,8 @@ class Searcher: |
| 82 | 82 | self, |
| 83 | 83 | es_client: ESClient, |
| 84 | 84 | config: SearchConfig, |
| 85 | - query_parser: Optional[QueryParser] = None | |
| 85 | + query_parser: Optional[QueryParser] = None, | |
| 86 | + image_encoder: Optional[CLIPImageEncoder] = None, | |
| 86 | 87 | ): |
| 87 | 88 | """ |
| 88 | 89 | Initialize searcher. |
| ... | ... | @@ -91,6 +92,7 @@ class Searcher: |
| 91 | 92 | es_client: Elasticsearch client |
| 92 | 93 | config: SearchConfig instance |
| 93 | 94 | query_parser: Query parser (created if not provided) |
| 95 | + image_encoder: Optional pre-initialized image encoder | |
| 94 | 96 | """ |
| 95 | 97 | self.es_client = es_client |
| 96 | 98 | self.config = config |
| ... | ... | @@ -103,8 +105,12 @@ class Searcher: |
| 103 | 105 | # Get match fields from config |
| 104 | 106 | self.match_fields = get_match_fields_for_index(config, "default") |
| 105 | 107 | self.text_embedding_field = config.query_config.text_embedding_field or "title_embedding" |
| 106 | - self.image_embedding_field = config.query_config.image_embedding_field or "image_embedding" | |
| 107 | - self.source_fields = config.query_config.source_fields or [] | |
| 108 | + self.image_embedding_field = config.query_config.image_embedding_field | |
| 109 | + if self.image_embedding_field and image_encoder is None: | |
| 110 | + self.image_encoder = CLIPImageEncoder() | |
| 111 | + else: | |
| 112 | + self.image_encoder = image_encoder | |
| 113 | + self.source_fields = config.query_config.source_fields | |
| 108 | 114 | |
| 109 | 115 | # Query builder - simplified single-layer architecture |
| 110 | 116 | self.query_builder = ESQueryBuilder( |
| ... | ... | @@ -118,6 +124,22 @@ class Searcher: |
| 118 | 124 | knn_boost=self.config.query_config.knn_boost |
| 119 | 125 | ) |
| 120 | 126 | |
| 127 | + def _apply_source_filter(self, es_query: Dict[str, Any]) -> None: | |
| 128 | + """ | |
| 129 | + Apply tri-state _source semantics: | |
| 130 | + - None: do not set _source (return full source) | |
| 131 | + - []: _source=false (return no source fields) | |
| 132 | + - [..]: _source.includes=[..] | |
| 133 | + """ | |
| 134 | + if self.source_fields is None: | |
| 135 | + return | |
| 136 | + if not isinstance(self.source_fields, list): | |
| 137 | + raise ValueError("query_config.source_fields must be null or list[str]") | |
| 138 | + if len(self.source_fields) == 0: | |
| 139 | + es_query["_source"] = False | |
| 140 | + return | |
| 141 | + es_query["_source"] = {"includes": self.source_fields} | |
| 142 | + | |
| 121 | 143 | def search( |
| 122 | 144 | self, |
| 123 | 145 | query: str, |
| ... | ... | @@ -568,8 +590,9 @@ class Searcher: |
| 568 | 590 | raise ValueError("Image embedding field not configured") |
| 569 | 591 | |
| 570 | 592 | # Generate image embedding |
| 571 | - image_encoder = CLIPImageEncoder() | |
| 572 | - image_vector = image_encoder.encode_image_from_url(image_url) | |
| 593 | + if self.image_encoder is None: | |
| 594 | + raise RuntimeError("Image encoder is not initialized at startup") | |
| 595 | + image_vector = self.image_encoder.encode_image_from_url(image_url) | |
| 573 | 596 | |
| 574 | 597 | if image_vector is None: |
| 575 | 598 | raise ValueError(f"Failed to encode image: {image_url}") |
| ... | ... | @@ -590,11 +613,8 @@ class Searcher: |
| 590 | 613 | } |
| 591 | 614 | } |
| 592 | 615 | |
| 593 | - # Add _source filtering if source_fields are configured | |
| 594 | - if self.source_fields: | |
| 595 | - es_query["_source"] = { | |
| 596 | - "includes": self.source_fields | |
| 597 | - } | |
| 616 | + # Apply source filtering semantics (None / [] / list) | |
| 617 | + self._apply_source_filter(es_query) | |
| 598 | 618 | |
| 599 | 619 | if filters or range_filters: |
| 600 | 620 | filter_clauses = self.query_builder._build_filters(filters, range_filters) | ... | ... |
tests/ci/test_service_api_contracts.py
| ... | ... | @@ -95,6 +95,52 @@ def test_suggestion_api_contract(search_client: TestClient): |
| 95 | 95 | assert len(data["suggestions"]) == 1 |
| 96 | 96 | |
| 97 | 97 | |
| 98 | +def test_instant_search_not_implemented(search_client: TestClient): | |
| 99 | + response = search_client.get("/search/instant?q=iph&size=5") | |
| 100 | + assert response.status_code == 501 | |
| 101 | + | |
| 102 | + | |
| 103 | +def test_admin_stats_contract(search_client: TestClient, monkeypatch): | |
| 104 | + import api.app as search_app | |
| 105 | + | |
| 106 | + class _FakeIndices: | |
| 107 | + @staticmethod | |
| 108 | + def exists(index: str) -> bool: | |
| 109 | + return index.endswith("search_products_tenant_162") | |
| 110 | + | |
| 111 | + @staticmethod | |
| 112 | + def stats(index: str): | |
| 113 | + return { | |
| 114 | + "indices": { | |
| 115 | + index: { | |
| 116 | + "total": { | |
| 117 | + "store": { | |
| 118 | + "size_in_bytes": 2 * 1024 * 1024, | |
| 119 | + } | |
| 120 | + } | |
| 121 | + } | |
| 122 | + } | |
| 123 | + } | |
| 124 | + | |
| 125 | + class _FakeClient: | |
| 126 | + indices = _FakeIndices() | |
| 127 | + | |
| 128 | + @staticmethod | |
| 129 | + def count(index: str): | |
| 130 | + assert index.endswith("search_products_tenant_162") | |
| 131 | + return {"count": 123} | |
| 132 | + | |
| 133 | + monkeypatch.setattr(search_app, "get_es_client", lambda: SimpleNamespace(client=_FakeClient())) | |
| 134 | + | |
| 135 | + response = search_client.get("/admin/stats", headers={"X-Tenant-ID": "162"}) | |
| 136 | + assert response.status_code == 200 | |
| 137 | + data = response.json() | |
| 138 | + assert data["tenant_id"] == "162" | |
| 139 | + assert data["index_name"].endswith("search_products_tenant_162") | |
| 140 | + assert data["document_count"] == 123 | |
| 141 | + assert data["size_mb"] == 2.0 | |
| 142 | + | |
| 143 | + | |
| 98 | 144 | class _FakeBulkService: |
| 99 | 145 | def bulk_index(self, tenant_id: str, recreate_index: bool, batch_size: int): |
| 100 | 146 | return { | ... | ... |
utils/es_client.py
| ... | ... | @@ -193,13 +193,7 @@ class ESClient: |
| 193 | 193 | ) |
| 194 | 194 | except Exception as e: |
| 195 | 195 | logger.error(f"Search failed: {e}", exc_info=True) |
| 196 | - return { | |
| 197 | - 'hits': { | |
| 198 | - 'total': {'value': 0}, | |
| 199 | - 'hits': [] | |
| 200 | - }, | |
| 201 | - 'error': str(e) | |
| 202 | - } | |
| 196 | + raise RuntimeError(f"Elasticsearch search failed for index '{index_name}': {e}") from e | |
| 203 | 197 | |
| 204 | 198 | def get_mapping(self, index_name: str) -> Dict[str, Any]: |
| 205 | 199 | """Get index mapping.""" |
| ... | ... | @@ -234,7 +228,7 @@ class ESClient: |
| 234 | 228 | return result['count'] |
| 235 | 229 | except Exception as e: |
| 236 | 230 | logger.error(f"Count failed: {e}", exc_info=True) |
| 237 | - return 0 | |
| 231 | + raise RuntimeError(f"Elasticsearch count failed for index '{index_name}': {e}") from e | |
| 238 | 232 | |
| 239 | 233 | |
| 240 | 234 | def get_es_client_from_env() -> ESClient: | ... | ... |