Commit 26b910bd6bb837fc328a2c4f0f45305a0bf31b79

Authored by tangwang
1 parent 54ccf28c

refactor service init and tighten multi-tenant search contracts

@@ -206,7 +206,7 @@ async def startup_event(): @@ -206,7 +206,7 @@ async def startup_event():
206 logger.info("Service initialized successfully") 206 logger.info("Service initialized successfully")
207 except Exception as e: 207 except Exception as e:
208 logger.error(f"Failed to initialize service: {e}", exc_info=True) 208 logger.error(f"Failed to initialize service: {e}", exc_info=True)
209 - logger.warning("Service will start but may not function correctly") 209 + raise
210 210
211 211
212 @app.on_event("shutdown") 212 @app.on_event("shutdown")
api/routes/admin.py
@@ -2,10 +2,11 @@ @@ -2,10 +2,11 @@
2 Admin API routes for configuration and management. 2 Admin API routes for configuration and management.
3 """ 3 """
4 4
5 -from fastapi import APIRouter, HTTPException 5 +from fastapi import APIRouter, HTTPException, Request
6 from typing import Dict 6 from typing import Dict
7 7
8 from ..models import HealthResponse, ErrorResponse 8 from ..models import HealthResponse, ErrorResponse
  9 +from indexer.mapping_generator import get_tenant_index_name
9 10
10 router = APIRouter(prefix="/admin", tags=["admin"]) 11 router = APIRouter(prefix="/admin", tags=["admin"])
11 12
@@ -57,6 +58,8 @@ async def get_configuration(): @@ -57,6 +58,8 @@ async def get_configuration():
57 "spu_enabled": config.spu_config.enabled 58 "spu_enabled": config.spu_config.enabled
58 } 59 }
59 60
  61 + except HTTPException:
  62 + raise
60 except Exception as e: 63 except Exception as e:
61 raise HTTPException(status_code=500, detail=str(e)) 64 raise HTTPException(status_code=500, detail=str(e))
62 65
@@ -105,29 +108,50 @@ async def get_rewrite_rules(): @@ -105,29 +108,50 @@ async def get_rewrite_rules():
105 108
106 109
107 @router.get("/stats") 110 @router.get("/stats")
108 -async def get_index_stats(): 111 +async def get_index_stats(http_request: Request):
109 """ 112 """
110 Get index statistics. 113 Get index statistics.
111 """ 114 """
112 try: 115 try:
113 - from ..app import get_es_client, get_config 116 + from urllib.parse import parse_qs
  117 + from ..app import get_es_client
114 118
115 es_client = get_es_client() 119 es_client = get_es_client()
116 - config = get_config() 120 +
  121 + tenant_id = http_request.headers.get("X-Tenant-ID")
  122 + if not tenant_id:
  123 + query_string = http_request.url.query
  124 + if query_string:
  125 + params = parse_qs(query_string)
  126 + tenant_id = params.get("tenant_id", [None])[0]
  127 +
  128 + if not tenant_id:
  129 + raise HTTPException(
  130 + status_code=400,
  131 + detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'",
  132 + )
  133 +
  134 + index_name = get_tenant_index_name(tenant_id)
  135 + if not es_client.client.indices.exists(index=index_name):
  136 + raise HTTPException(
  137 + status_code=404,
  138 + detail=f"Tenant index not found: {index_name}",
  139 + )
117 140
118 # Get document count 141 # Get document count
119 - doc_count = es_client.count(config.es_index_name) 142 + doc_count = es_client.client.count(index=index_name).get("count", 0)
120 143
121 # Get index size (if available) 144 # Get index size (if available)
122 try: 145 try:
123 - stats = es_client.client.indices.stats(index=config.es_index_name)  
124 - size_in_bytes = stats['indices'][config.es_index_name]['total']['store']['size_in_bytes'] 146 + stats = es_client.client.indices.stats(index=index_name)
  147 + size_in_bytes = stats["indices"][index_name]["total"]["store"]["size_in_bytes"]
125 size_mb = size_in_bytes / (1024 * 1024) 148 size_mb = size_in_bytes / (1024 * 1024)
126 - except: 149 + except Exception:
127 size_mb = None 150 size_mb = None
128 151
129 return { 152 return {
130 - "index_name": config.es_index_name, 153 + "tenant_id": str(tenant_id),
  154 + "index_name": index_name,
131 "document_count": doc_count, 155 "document_count": doc_count,
132 "size_mb": round(size_mb, 2) if size_mb else None 156 "size_mb": round(size_mb, 2) if size_mb else None
133 } 157 }
api/routes/search.py
@@ -327,7 +327,6 @@ async def search_suggestions( @@ -327,7 +327,6 @@ async def search_suggestions(
327 async def instant_search( 327 async def instant_search(
328 q: str = Query(..., min_length=2, description="搜索查询"), 328 q: str = Query(..., min_length=2, description="搜索查询"),
329 size: int = Query(5, ge=1, le=20, description="结果数量"), 329 size: int = Query(5, ge=1, le=20, description="结果数量"),
330 - tenant_id: str = Query(..., description="租户ID")  
331 ): 330 ):
332 """ 331 """
333 即时搜索(Instant Search)。 332 即时搜索(Instant Search)。
@@ -336,32 +335,15 @@ async def instant_search( @@ -336,32 +335,15 @@ async def instant_search(
336 - 边输入边搜索,无需点击搜索按钮 335 - 边输入边搜索,无需点击搜索按钮
337 - 返回简化的搜索结果 336 - 返回简化的搜索结果
338 337
339 - 注意:此功能暂未实现,调用标准搜索接口。  
340 - TODO: 优化即时搜索性能  
341 - - 添加防抖/节流  
342 - - 实现结果缓存  
343 - - 简化返回字段 338 + 注意:此功能暂未开放,当前明确返回 501。
344 """ 339 """
345 - from api.app import get_searcher  
346 - searcher = get_searcher()  
347 -  
348 - result = searcher.search(  
349 - query=q,  
350 - tenant_id=tenant_id,  
351 - size=size,  
352 - from_=0,  
353 - sku_filter_dimension=None # Instant search doesn't support SKU filtering  
354 - )  
355 -  
356 - return SearchResponse(  
357 - results=result.results,  
358 - total=result.total,  
359 - max_score=result.max_score,  
360 - took_ms=result.took_ms,  
361 - facets=result.facets,  
362 - query_info=result.query_info,  
363 - suggestions=result.suggestions,  
364 - related_searches=result.related_searches 340 + # 明确暴露当前接口尚未完成实现,避免调用不完整逻辑导致隐式运行时错误。
  341 + raise HTTPException(
  342 + status_code=501,
  343 + detail=(
  344 + "/search/instant is not implemented yet. "
  345 + "Use POST /search/ for production traffic."
  346 + ),
365 ) 347 )
366 348
367 349
api/translator_app.py
@@ -187,6 +187,7 @@ async def startup_event(): @@ -187,6 +187,7 @@ async def startup_event():
187 logger.info(f"Translation service ready with default model: {default_model}") 187 logger.info(f"Translation service ready with default model: {default_model}")
188 except Exception as e: 188 except Exception as e:
189 logger.error(f"Failed to initialize translator: {e}", exc_info=True) 189 logger.error(f"Failed to initialize translator: {e}", exc_info=True)
  190 + raise
190 191
191 192
192 @app.get("/health") 193 @app.get("/health")
@@ -310,4 +311,3 @@ if __name__ == "__main__": @@ -310,4 +311,3 @@ if __name__ == "__main__":
310 port=args.port, 311 port=args.port,
311 reload=args.reload 312 reload=args.reload
312 ) 313 )
313 -  
config/config_loader.py
@@ -367,6 +367,31 @@ class ConfigLoader: @@ -367,6 +367,31 @@ class ConfigLoader:
367 f"not in supported languages: {config.query_config.supported_languages}" 367 f"not in supported languages: {config.query_config.supported_languages}"
368 ) 368 )
369 369
  370 + # Validate source_fields tri-state semantics
  371 + source_fields = config.query_config.source_fields
  372 + if source_fields is not None:
  373 + if not isinstance(source_fields, list):
  374 + errors.append("query_config.source_fields must be null or list[str]")
  375 + else:
  376 + for idx, field_name in enumerate(source_fields):
  377 + if not isinstance(field_name, str) or not field_name.strip():
  378 + errors.append(
  379 + f"query_config.source_fields[{idx}] must be a non-empty string"
  380 + )
  381 +
  382 + # Validate tenant config shape (default must exist in config)
  383 + tenant_cfg = config.tenant_config
  384 + if not isinstance(tenant_cfg, dict):
  385 + errors.append("tenant_config must be an object")
  386 + else:
  387 + default_cfg = tenant_cfg.get("default")
  388 + if not isinstance(default_cfg, dict):
  389 + errors.append("tenant_config.default must be configured")
  390 + else:
  391 + index_languages = default_cfg.get("index_languages")
  392 + if not isinstance(index_languages, list) or len(index_languages) == 0:
  393 + errors.append("tenant_config.default.index_languages must be a non-empty list")
  394 +
370 return errors 395 return errors
371 396
372 def to_dict(self, config: SearchConfig) -> Dict[str, Any]: 397 def to_dict(self, config: SearchConfig) -> Dict[str, Any]:
config/services_config.py
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 Services configuration - single source for translation, embedding, rerank providers. 2 Services configuration - single source for translation, embedding, rerank providers.
3 3
4 All provider selection and endpoint resolution is centralized here. 4 All provider selection and endpoint resolution is centralized here.
5 -Priority: env vars > config.yaml > defaults. 5 +Priority: env vars > config.yaml.
6 """ 6 """
7 7
8 from __future__ import annotations 8 from __future__ import annotations
@@ -34,14 +34,32 @@ def _load_services_raw(config_path: Optional[Path] = None) -> Dict[str, Any]: @@ -34,14 +34,32 @@ def _load_services_raw(config_path: Optional[Path] = None) -> Dict[str, Any]:
34 config_path = Path(__file__).parent / "config.yaml" 34 config_path = Path(__file__).parent / "config.yaml"
35 path = Path(config_path) 35 path = Path(config_path)
36 if not path.exists(): 36 if not path.exists():
37 - return {} 37 + raise FileNotFoundError(f"services config file not found: {path}")
38 try: 38 try:
39 with open(path, "r", encoding="utf-8") as f: 39 with open(path, "r", encoding="utf-8") as f:
40 data = yaml.safe_load(f) 40 data = yaml.safe_load(f)
41 - except Exception:  
42 - return {}  
43 - services = data.get("services") if isinstance(data, dict) else {}  
44 - return services if isinstance(services, dict) else {} 41 + except Exception as exc:
  42 + raise RuntimeError(f"failed to parse services config from {path}: {exc}") from exc
  43 + if not isinstance(data, dict):
  44 + raise RuntimeError(f"invalid config format in {path}: expected mapping root")
  45 + services = data.get("services")
  46 + if not isinstance(services, dict):
  47 + raise RuntimeError("config.yaml must contain a valid 'services' mapping")
  48 + return services
  49 +
  50 +
  51 +def _resolve_provider_name(
  52 + env_name: str,
  53 + config_provider: Any,
  54 + capability: str,
  55 +) -> str:
  56 + provider = os.getenv(env_name) or config_provider
  57 + if not provider:
  58 + raise ValueError(
  59 + f"services.{capability}.provider is required "
  60 + f"(or set env override {env_name})"
  61 + )
  62 + return str(provider).strip().lower()
45 63
46 64
47 def _resolve_translation() -> ServiceConfig: 65 def _resolve_translation() -> ServiceConfig:
@@ -49,12 +67,13 @@ def _resolve_translation() -> ServiceConfig: @@ -49,12 +67,13 @@ def _resolve_translation() -> ServiceConfig:
49 cfg = raw.get("translation", {}) if isinstance(raw.get("translation"), dict) else {} 67 cfg = raw.get("translation", {}) if isinstance(raw.get("translation"), dict) else {}
50 providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {} 68 providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {}
51 69
52 - provider = (  
53 - os.getenv("TRANSLATION_PROVIDER")  
54 - or cfg.get("provider")  
55 - or "direct" 70 + provider = _resolve_provider_name(
  71 + env_name="TRANSLATION_PROVIDER",
  72 + config_provider=cfg.get("provider"),
  73 + capability="translation",
56 ) 74 )
57 - provider = str(provider).strip().lower() 75 + if provider not in ("direct", "local", "inprocess", "http", "service"):
  76 + raise ValueError(f"Unsupported translation provider: {provider}")
58 77
59 # Env override for http base_url 78 # Env override for http base_url
60 env_url = os.getenv("TRANSLATION_SERVICE_URL") 79 env_url = os.getenv("TRANSLATION_SERVICE_URL")
@@ -71,12 +90,13 @@ def _resolve_embedding() -> ServiceConfig: @@ -71,12 +90,13 @@ def _resolve_embedding() -> ServiceConfig:
71 cfg = raw.get("embedding", {}) if isinstance(raw.get("embedding"), dict) else {} 90 cfg = raw.get("embedding", {}) if isinstance(raw.get("embedding"), dict) else {}
72 providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {} 91 providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {}
73 92
74 - provider = (  
75 - os.getenv("EMBEDDING_PROVIDER")  
76 - or cfg.get("provider")  
77 - or "http" 93 + provider = _resolve_provider_name(
  94 + env_name="EMBEDDING_PROVIDER",
  95 + config_provider=cfg.get("provider"),
  96 + capability="embedding",
78 ) 97 )
79 - provider = str(provider).strip().lower() 98 + if provider != "http":
  99 + raise ValueError(f"Unsupported embedding provider: {provider}")
80 100
81 env_url = os.getenv("EMBEDDING_SERVICE_URL") 101 env_url = os.getenv("EMBEDDING_SERVICE_URL")
82 if env_url and provider == "http": 102 if env_url and provider == "http":
@@ -92,12 +112,13 @@ def _resolve_rerank() -> ServiceConfig: @@ -92,12 +112,13 @@ def _resolve_rerank() -> ServiceConfig:
92 cfg = raw.get("rerank", {}) if isinstance(raw.get("rerank"), dict) else {} 112 cfg = raw.get("rerank", {}) if isinstance(raw.get("rerank"), dict) else {}
93 providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {} 113 providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {}
94 114
95 - provider = (  
96 - os.getenv("RERANK_PROVIDER")  
97 - or cfg.get("provider")  
98 - or "http" 115 + provider = _resolve_provider_name(
  116 + env_name="RERANK_PROVIDER",
  117 + config_provider=cfg.get("provider"),
  118 + capability="rerank",
99 ) 119 )
100 - provider = str(provider).strip().lower() 120 + if provider != "http":
  121 + raise ValueError(f"Unsupported rerank provider: {provider}")
101 122
102 env_url = os.getenv("RERANKER_SERVICE_URL") 123 env_url = os.getenv("RERANKER_SERVICE_URL")
103 if env_url: 124 if env_url:
@@ -124,10 +145,13 @@ def get_rerank_backend_config() -> tuple[str, dict]: @@ -124,10 +145,13 @@ def get_rerank_backend_config() -> tuple[str, dict]:
124 name = ( 145 name = (
125 os.getenv("RERANK_BACKEND") 146 os.getenv("RERANK_BACKEND")
126 or cfg.get("backend") 147 or cfg.get("backend")
127 - or "qwen3_vllm"  
128 ) 148 )
  149 + if not name:
  150 + raise ValueError("services.rerank.backend is required (or env RERANK_BACKEND)")
129 name = str(name).strip().lower() 151 name = str(name).strip().lower()
130 backend_cfg = backends.get(name, {}) if isinstance(backends.get(name), dict) else {} 152 backend_cfg = backends.get(name, {}) if isinstance(backends.get(name), dict) else {}
  153 + if not backend_cfg:
  154 + raise ValueError(f"services.rerank.backends.{name} is required")
131 return name, backend_cfg 155 return name, backend_cfg
132 156
133 157
@@ -143,10 +167,13 @@ def get_embedding_backend_config() -> tuple[str, dict]: @@ -143,10 +167,13 @@ def get_embedding_backend_config() -> tuple[str, dict]:
143 name = ( 167 name = (
144 os.getenv("EMBEDDING_BACKEND") 168 os.getenv("EMBEDDING_BACKEND")
145 or cfg.get("backend") 169 or cfg.get("backend")
146 - or "tei"  
147 ) 170 )
  171 + if not name:
  172 + raise ValueError("services.embedding.backend is required (or env EMBEDDING_BACKEND)")
148 name = str(name).strip().lower() 173 name = str(name).strip().lower()
149 backend_cfg = backends.get(name, {}) if isinstance(backends.get(name), dict) else {} 174 backend_cfg = backends.get(name, {}) if isinstance(backends.get(name), dict) else {}
  175 + if not backend_cfg:
  176 + raise ValueError(f"services.embedding.backends.{name} is required")
150 return name, backend_cfg 177 return name, backend_cfg
151 178
152 179
@@ -173,8 +200,9 @@ def get_translation_base_url() -> str: @@ -173,8 +200,9 @@ def get_translation_base_url() -> str:
173 base = ( 200 base = (
174 os.getenv("TRANSLATION_SERVICE_URL") 201 os.getenv("TRANSLATION_SERVICE_URL")
175 or get_translation_config().providers.get("http", {}).get("base_url") 202 or get_translation_config().providers.get("http", {}).get("base_url")
176 - or "http://127.0.0.1:6006"  
177 ) 203 )
  204 + if not base:
  205 + raise ValueError("Translation HTTP base_url is not configured")
178 return str(base).rstrip("/") 206 return str(base).rstrip("/")
179 207
180 208
@@ -183,8 +211,9 @@ def get_embedding_base_url() -> str: @@ -183,8 +211,9 @@ def get_embedding_base_url() -> str:
183 base = ( 211 base = (
184 os.getenv("EMBEDDING_SERVICE_URL") 212 os.getenv("EMBEDDING_SERVICE_URL")
185 or get_embedding_config().providers.get("http", {}).get("base_url") 213 or get_embedding_config().providers.get("http", {}).get("base_url")
186 - or "http://127.0.0.1:6005"  
187 ) 214 )
  215 + if not base:
  216 + raise ValueError("Embedding HTTP base_url is not configured")
188 return str(base).rstrip("/") 217 return str(base).rstrip("/")
189 218
190 219
@@ -194,8 +223,9 @@ def get_rerank_service_url() -> str: @@ -194,8 +223,9 @@ def get_rerank_service_url() -> str:
194 os.getenv("RERANKER_SERVICE_URL") 223 os.getenv("RERANKER_SERVICE_URL")
195 or get_rerank_config().providers.get("http", {}).get("service_url") 224 or get_rerank_config().providers.get("http", {}).get("service_url")
196 or get_rerank_config().providers.get("http", {}).get("base_url") 225 or get_rerank_config().providers.get("http", {}).get("base_url")
197 - or "http://127.0.0.1:6007"  
198 ) 226 )
  227 + if not base:
  228 + raise ValueError("Rerank HTTP service_url/base_url is not configured")
199 base = str(base).rstrip("/") 229 base = str(base).rstrip("/")
200 return base if base.endswith("/rerank") else f"{base}/rerank" 230 return base if base.endswith("/rerank") else f"{base}/rerank"
201 231
config/tenant_config_loader.py
@@ -51,18 +51,16 @@ SUPPORTED_INDEX_LANGUAGES: Dict[str, str] = { @@ -51,18 +51,16 @@ SUPPORTED_INDEX_LANGUAGES: Dict[str, str] = {
51 "bg": "Bulgarian", 51 "bg": "Bulgarian",
52 } 52 }
53 53
54 -DEFAULT_INDEX_LANGUAGES: List[str] = ["en", "zh"]  
55 -  
56 -  
57 def normalize_index_languages(value: Any, primary_language: str = "en") -> List[str]: 54 def normalize_index_languages(value: Any, primary_language: str = "en") -> List[str]:
58 """ 55 """
59 将 index_languages 配置规范化为合法语言代码列表。 56 将 index_languages 配置规范化为合法语言代码列表。
60 - None 或空时返回 DEFAULT_INDEX_LANGUAGES 57 + 仅做规范化,不做默认值兜底
61 """ 58 """
  59 + del primary_language
62 if value is None: 60 if value is None:
63 - return list(DEFAULT_INDEX_LANGUAGES) 61 + return []
64 if not isinstance(value, (list, tuple)): 62 if not isinstance(value, (list, tuple)):
65 - return list(DEFAULT_INDEX_LANGUAGES) 63 + return []
66 valid: List[str] = [] 64 valid: List[str] = []
67 seen: set = set() 65 seen: set = set()
68 for item in value: 66 for item in value:
@@ -72,19 +70,23 @@ def normalize_index_languages(value: Any, primary_language: str = "en") -> List[ @@ -72,19 +70,23 @@ def normalize_index_languages(value: Any, primary_language: str = "en") -> List[
72 if code in SUPPORTED_INDEX_LANGUAGES: 70 if code in SUPPORTED_INDEX_LANGUAGES:
73 valid.append(code) 71 valid.append(code)
74 seen.add(code) 72 seen.add(code)
75 - return valid if valid else list(DEFAULT_INDEX_LANGUAGES) 73 + return valid
76 74
77 75
78 -def resolve_index_languages(tenant_config: Dict[str, Any]) -> List[str]: 76 +def resolve_index_languages(
  77 + tenant_config: Dict[str, Any],
  78 + default_index_languages: List[str],
  79 +) -> List[str]:
79 """ 80 """
80 从租户配置解析 index_languages。 81 从租户配置解析 index_languages。
81 若存在 index_languages 则用之;否则按旧配置 translate_to_en / translate_to_zh 推导。 82 若存在 index_languages 则用之;否则按旧配置 translate_to_en / translate_to_zh 推导。
82 """ 83 """
83 if "index_languages" in tenant_config: 84 if "index_languages" in tenant_config:
84 - return normalize_index_languages( 85 + normalized = normalize_index_languages(
85 tenant_config["index_languages"], 86 tenant_config["index_languages"],
86 tenant_config.get("primary_language") or "en", 87 tenant_config.get("primary_language") or "en",
87 ) 88 )
  89 + return normalized if normalized else list(default_index_languages)
88 primary = (tenant_config.get("primary_language") or "en").strip().lower() 90 primary = (tenant_config.get("primary_language") or "en").strip().lower()
89 to_en = bool(tenant_config.get("translate_to_en")) 91 to_en = bool(tenant_config.get("translate_to_en"))
90 to_zh = bool(tenant_config.get("translate_to_zh")) 92 to_zh = bool(tenant_config.get("translate_to_zh"))
@@ -95,7 +97,7 @@ def resolve_index_languages(tenant_config: Dict[str, Any]) -> List[str]: @@ -95,7 +97,7 @@ def resolve_index_languages(tenant_config: Dict[str, Any]) -> List[str]:
95 if code not in langs and ((code == "en" and to_en) or (code == "zh" and to_zh)): 97 if code not in langs and ((code == "en" and to_en) or (code == "zh" and to_zh)):
96 if code in SUPPORTED_INDEX_LANGUAGES: 98 if code in SUPPORTED_INDEX_LANGUAGES:
97 langs.append(code) 99 langs.append(code)
98 - return langs if langs else list(DEFAULT_INDEX_LANGUAGES) 100 + return langs if langs else list(default_index_languages)
99 101
100 102
101 class TenantConfigLoader: 103 class TenantConfigLoader:
@@ -117,18 +119,43 @@ class TenantConfigLoader: @@ -117,18 +119,43 @@ class TenantConfigLoader:
117 119
118 try: 120 try:
119 from config import ConfigLoader 121 from config import ConfigLoader
  122 +
120 config_loader = ConfigLoader() 123 config_loader = ConfigLoader()
121 search_config = config_loader.load_config() 124 search_config = config_loader.load_config()
122 - self._config = search_config.tenant_config 125 + tenant_cfg = search_config.tenant_config
  126 + if not isinstance(tenant_cfg, dict):
  127 + raise RuntimeError("tenant_config must be an object")
  128 +
  129 + default_cfg = tenant_cfg.get("default")
  130 + if not isinstance(default_cfg, dict):
  131 + raise RuntimeError("tenant_config.default must be configured in config.yaml")
  132 + default_primary = (default_cfg.get("primary_language") or "en").strip().lower()
  133 + default_index_languages = normalize_index_languages(
  134 + default_cfg.get("index_languages"),
  135 + default_primary,
  136 + )
  137 + if not default_index_languages:
  138 + raise RuntimeError(
  139 + "tenant_config.default.index_languages must include at least one supported language"
  140 + )
  141 +
  142 + tenants_cfg = tenant_cfg.get("tenants", {})
  143 + if not isinstance(tenants_cfg, dict):
  144 + raise RuntimeError("tenant_config.tenants must be an object")
  145 +
  146 + normalized_default = dict(default_cfg)
  147 + normalized_default["primary_language"] = default_primary
  148 + normalized_default["index_languages"] = default_index_languages
  149 +
  150 + self._config = {
  151 + "default": normalized_default,
  152 + "tenants": tenants_cfg,
  153 + }
123 logger.info("Loaded tenant config from unified config.yaml") 154 logger.info("Loaded tenant config from unified config.yaml")
124 return self._config 155 return self._config
125 except Exception as e: 156 except Exception as e:
126 logger.error(f"Failed to load tenant config: {e}", exc_info=True) 157 logger.error(f"Failed to load tenant config: {e}", exc_info=True)
127 - self._config = {  
128 - "default": {"primary_language": "en", "index_languages": ["en", "zh"]},  
129 - "tenants": {},  
130 - }  
131 - return self._config 158 + raise
132 159
133 def get_tenant_config(self, tenant_id: str) -> Dict[str, Any]: 160 def get_tenant_config(self, tenant_id: str) -> Dict[str, Any]:
134 """ 161 """
@@ -142,13 +169,20 @@ class TenantConfigLoader: @@ -142,13 +169,20 @@ class TenantConfigLoader:
142 """ 169 """
143 config = self.load_config() 170 config = self.load_config()
144 tenant_id_str = str(tenant_id) 171 tenant_id_str = str(tenant_id)
145 - default = config.get("default", {"primary_language": "en", "index_languages": ["en", "zh"]}) 172 + default = config["default"]
146 tenants = config.get("tenants", {}) 173 tenants = config.get("tenants", {})
147 - raw = tenants[tenant_id_str] if tenant_id_str in tenants else default 174 + raw = tenants[tenant_id_str] if tenant_id_str in tenants else {}
  175 + if raw and not isinstance(raw, dict):
  176 + raise RuntimeError(f"tenant_config.tenants.{tenant_id_str} must be an object")
148 if tenant_id_str not in tenants: 177 if tenant_id_str not in tenants:
149 logger.debug(f"Tenant {tenant_id} not found in config, using default") 178 logger.debug(f"Tenant {tenant_id} not found in config, using default")
150 - out = dict(raw)  
151 - out["index_languages"] = resolve_index_languages(raw) 179 + merged = dict(default)
  180 + merged.update(raw)
  181 + out = dict(merged)
  182 + out["index_languages"] = resolve_index_languages(
  183 + merged,
  184 + default_index_languages=default["index_languages"],
  185 + )
152 return out 186 return out
153 187
154 def reload(self): 188 def reload(self):
@@ -167,4 +201,3 @@ def get_tenant_config_loader() -> TenantConfigLoader: @@ -167,4 +201,3 @@ def get_tenant_config_loader() -> TenantConfigLoader:
167 if _tenant_config_loader is None: 201 if _tenant_config_loader is None:
168 _tenant_config_loader = TenantConfigLoader() 202 _tenant_config_loader = TenantConfigLoader()
169 return _tenant_config_loader 203 return _tenant_config_loader
170 -  
docs/DEVELOPER_GUIDE.md
@@ -218,9 +218,18 @@ docs/ # 文档(含本指南) @@ -218,9 +218,18 @@ docs/ # 文档(含本指南)
218 218
219 ### 5.7 错误处理 219 ### 5.7 错误处理
220 220
221 -- 外部能力(翻译、向量、重排)调用失败时应立即报错并中止请求,禁止静默降级;通过日志与监控尽早暴露问题并修复。 221 +- **启动期(fail-fast)**:配置加载、依赖初始化、模型/资源加载失败必须直接启动失败,不允许“继续启动但不可用”。
  222 +- **运行期(分层处理)**:
  223 + - 核心链路(ES 主检索)失败:请求直接失败并返回错误,禁止伪装成空结果。
  224 + - 增强链路(翻译/向量/重排)失败:记录 warning,不应拖垮主检索流程。
  225 +- 禁止通过“静默回退到另一套实现”掩盖问题;默认行为必须由配置显式给出。
222 226
223 -### 5.8 环境隔离 227 +### 5.8 启动初始化约束
  228 +
  229 +- 重资源与关键依赖(如 translator、text/image encoder)应在服务启动期初始化一次并复用,避免请求期懒加载。
  230 +- 若配置声明启用某能力(例如 GPU 后端),但运行资源不满足,应直接启动失败,不自动降级为其它后端。
  231 +
  232 +### 5.9 环境隔离
224 233
225 - 主程序(backend/indexer/frontend)只使用 `.venv`,不引入 `clip-as-service`/`vllm` 的重依赖。 234 - 主程序(backend/indexer/frontend)只使用 `.venv`,不引入 `clip-as-service`/`vllm` 的重依赖。
226 - embedding 服务使用 `.venv-embedding`;reranker 服务使用 `.venv-reranker`;CN-CLIP 使用 `.venv-cnclip`;TEI 使用 Docker 容器。 235 - embedding 服务使用 `.venv-embedding`;reranker 服务使用 `.venv-reranker`;CN-CLIP 使用 `.venv-cnclip`;TEI 使用 Docker 容器。
docs/QUICKSTART.md
@@ -102,7 +102,7 @@ curl "http://localhost:6002/search/suggestions?q=玩&size=5" \ @@ -102,7 +102,7 @@ curl "http://localhost:6002/search/suggestions?q=玩&size=5" \
102 102
103 # 健康与统计 103 # 健康与统计
104 curl http://localhost:6002/admin/health 104 curl http://localhost:6002/admin/health
105 -curl http://localhost:6002/admin/stats 105 +curl http://localhost:6002/admin/stats -H "X-Tenant-ID: 162"
106 ``` 106 ```
107 107
108 API 文档:`http://localhost:6002/docs` 108 API 文档:`http://localhost:6002/docs`
docs/Usage-Guide.md
@@ -385,7 +385,7 @@ curl http://localhost:6002/admin/health @@ -385,7 +385,7 @@ curl http://localhost:6002/admin/health
385 ### 2. 索引统计 385 ### 2. 索引统计
386 386
387 ```bash 387 ```bash
388 -curl http://localhost:6002/admin/stats 388 +curl http://localhost:6002/admin/stats -H "X-Tenant-ID: 162"
389 ``` 389 ```
390 390
391 ### 3. 简单搜索测试 391 ### 3. 简单搜索测试
docs/搜索API对接指南.md
@@ -139,7 +139,7 @@ curl -X POST "http://43.166.252.75:6002/search/" \ @@ -139,7 +139,7 @@ curl -X POST "http://43.166.252.75:6002/search/" \
139 |------|------|------|------| 139 |------|------|------|------|
140 | 搜索 | POST | `/search/` | 执行搜索查询 | 140 | 搜索 | POST | `/search/` | 执行搜索查询 |
141 | 搜索建议 | GET | `/search/suggestions` | 搜索建议(自动补全/热词,多语言 + 结果直达) | 141 | 搜索建议 | GET | `/search/suggestions` | 搜索建议(自动补全/热词,多语言 + 结果直达) |
142 -| 即时搜索 | GET | `/search/instant` | 边输入边搜索(框架) ⚠️ TODO | 142 +| 即时搜索 | GET | `/search/instant` | 即时搜索预留接口(当前返回 `501 Not Implemented`) |
143 | 获取文档 | GET | `/search/{doc_id}` | 获取单个文档 | 143 | 获取文档 | GET | `/search/{doc_id}` | 获取单个文档 |
144 | 全量索引 | POST | `/indexer/reindex` | 全量索引接口(导入数据,不删除索引,仅推荐自测使用) | 144 | 全量索引 | POST | `/indexer/reindex` | 全量索引接口(导入数据,不删除索引,仅推荐自测使用) |
145 | 增量索引 | POST | `/indexer/index` | 增量索引接口(指定SPU ID列表进行索引,支持自动检测删除和显式删除,仅推荐自测使用) | 145 | 增量索引 | POST | `/indexer/index` | 增量索引接口(指定SPU ID列表进行索引,支持自动检测删除和显式删除,仅推荐自测使用) |
@@ -149,7 +149,7 @@ curl -X POST "http://43.166.252.75:6002/search/" \ @@ -149,7 +149,7 @@ curl -X POST "http://43.166.252.75:6002/search/" \
149 | 索引健康检查 | GET | `/indexer/health` | 检查索引服务状态 | 149 | 索引健康检查 | GET | `/indexer/health` | 检查索引服务状态 |
150 | 健康检查 | GET | `/admin/health` | 服务健康检查 | 150 | 健康检查 | GET | `/admin/health` | 服务健康检查 |
151 | 获取配置 | GET | `/admin/config` | 获取租户配置 | 151 | 获取配置 | GET | `/admin/config` | 获取租户配置 |
152 -| 索引统计 | GET | `/admin/stats` | 获取索引统计信息 | 152 +| 索引统计 | GET | `/admin/stats` | 获取租户索引统计信息(需 tenant_id) |
153 153
154 **微服务(独立端口,外部可直连)**: 154 **微服务(独立端口,外部可直连)**:
155 155
@@ -633,10 +633,10 @@ curl "http://localhost:6002/search/suggestions?q=芭&size=5&language=zh&with_res @@ -633,10 +633,10 @@ curl "http://localhost:6002/search/suggestions?q=芭&size=5&language=zh&with_res
633 633
634 ### 3.8 即时搜索接口 634 ### 3.8 即时搜索接口
635 635
636 -> ⚠️ **TODO**: 此接口当前为框架实现,功能暂未实现,调用标准搜索接口。后续需要优化即时搜索性能(添加防抖/节流、实现结果缓存、简化返回字段) 636 +> ⚠️ 当前版本未开放该能力。接口会明确返回 `501 Not Implemented`,避免误用未完成实现
637 637
638 - **端点**: `GET /search/instant` 638 - **端点**: `GET /search/instant`
639 -- **描述**: 边输入边搜索,采用轻量参数响应当前输入。底层复用标准搜索能力 639 +- **描述**: 即时搜索预留端点,后续会在独立实现完成后开放
640 640
641 #### 查询参数 641 #### 查询参数
642 642
@@ -651,6 +651,15 @@ curl "http://localhost:6002/search/suggestions?q=芭&size=5&language=zh&with_res @@ -651,6 +651,15 @@ curl "http://localhost:6002/search/suggestions?q=芭&size=5&language=zh&with_res
651 curl "http://localhost:6002/search/instant?q=玩具&size=5" 651 curl "http://localhost:6002/search/instant?q=玩具&size=5"
652 ``` 652 ```
653 653
  654 +#### 当前响应
  655 +
  656 +```json
  657 +{
  658 + "error": "/search/instant is not implemented yet. Use POST /search/ for production traffic.",
  659 + "status_code": 501
  660 +}
  661 +```
  662 +
654 ### 3.9 获取单个文档 663 ### 3.9 获取单个文档
655 664
656 - **端点**: `GET /search/{doc_id}` 665 - **端点**: `GET /search/{doc_id}`
@@ -1534,11 +1543,13 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ @@ -1534,11 +1543,13 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \
1534 ### 6.3 索引统计 1543 ### 6.3 索引统计
1535 1544
1536 - **端点**: `GET /admin/stats` 1545 - **端点**: `GET /admin/stats`
1537 -- **描述**: 获取索引文档数量与磁盘大小,方便监控。 1546 +- **描述**: 获取指定租户索引文档数量与磁盘大小,方便监控。
  1547 +- **租户标识**:通过请求头 `X-Tenant-ID` 或 query 参数 `tenant_id` 传递(必填)。
1538 1548
1539 ```json 1549 ```json
1540 { 1550 {
1541 - "index_name": "search_tenant1", 1551 + "tenant_id": "162",
  1552 + "index_name": "search_products_tenant_162",
1542 "document_count": 10000, 1553 "document_count": 10000,
1543 "size_mb": 523.45 1554 "size_mb": 523.45
1544 } 1555 }
docs/搜索API速查表.md
@@ -296,13 +296,13 @@ POST /search/image @@ -296,13 +296,13 @@ POST /search/image
296 296
297 GET /search/suggestions?q=芭&size=5&language=zh&with_results=true 297 GET /search/suggestions?q=芭&size=5&language=zh&with_results=true
298 298
299 -GET /search/instant?q=玩具&size=5 299 +GET /search/instant?q=玩具&size=5 # 当前返回 501 Not Implemented
300 300
301 GET /search/{doc_id} 301 GET /search/{doc_id}
302 302
303 GET /admin/health 303 GET /admin/health
304 GET /admin/config 304 GET /admin/config
305 -GET /admin/stats 305 +GET /admin/stats # 需 X-Tenant-ID 或 tenant_id
306 ``` 306 ```
307 307
308 --- 308 ---
providers/embedding.py
@@ -22,15 +22,19 @@ class EmbeddingProvider: @@ -22,15 +22,19 @@ class EmbeddingProvider:
22 22
23 def __init__(self) -> None: 23 def __init__(self) -> None:
24 self._base_url = get_embedding_base_url() 24 self._base_url = get_embedding_base_url()
  25 + from embeddings.text_encoder import TextEmbeddingEncoder
  26 + from embeddings.image_encoder import CLIPImageEncoder
  27 +
  28 + # Initialize once; avoid per-access instantiation.
  29 + self._text_encoder = TextEmbeddingEncoder(service_url=self._base_url)
  30 + self._image_encoder = CLIPImageEncoder(service_url=self._base_url)
25 31
26 @property 32 @property
27 def text_encoder(self): 33 def text_encoder(self):
28 - """Lazy-created text encoder (TextEmbeddingEncoder)."""  
29 - from embeddings.text_encoder import TextEmbeddingEncoder  
30 - return TextEmbeddingEncoder(service_url=self._base_url) 34 + """Pre-initialized text encoder (TextEmbeddingEncoder)."""
  35 + return self._text_encoder
31 36
32 @property 37 @property
33 def image_encoder(self): 38 def image_encoder(self):
34 - """Lazy-created image encoder (CLIPImageEncoder)."""  
35 - from embeddings.image_encoder import CLIPImageEncoder  
36 - return CLIPImageEncoder(service_url=self._base_url) 39 + """Pre-initialized image encoder (CLIPImageEncoder)."""
  40 + return self._image_encoder
query/query_parser.py
@@ -85,8 +85,8 @@ class QueryParser: @@ -85,8 +85,8 @@ class QueryParser:
85 85
86 Args: 86 Args:
87 config: SearchConfig instance 87 config: SearchConfig instance
88 - text_encoder: Text embedding encoder (lazy loaded if not provided)  
89 - translator: Translator instance (lazy loaded if not provided) 88 + text_encoder: Text embedding encoder (initialized at startup if not provided)
  89 + translator: Translator instance (initialized at startup if not provided)
90 """ 90 """
91 self.config = config 91 self.config = config
92 self._text_encoder = text_encoder 92 self._text_encoder = text_encoder
@@ -114,22 +114,24 @@ class QueryParser: @@ -114,22 +114,24 @@ class QueryParser:
114 else: 114 else:
115 logger.info("HanLP not installed; using simple tokenizer") 115 logger.info("HanLP not installed; using simple tokenizer")
116 116
  117 + # Eager initialization (startup-time failure visibility, no lazy init in request path)
  118 + if self.config.query_config.enable_text_embedding and self._text_encoder is None:
  119 + logger.info("Initializing text encoder at QueryParser construction...")
  120 + self._text_encoder = TextEmbeddingEncoder()
  121 + if self._translator is None:
  122 + from config.services_config import get_translation_config
  123 + cfg = get_translation_config()
  124 + logger.info("Initializing translator at QueryParser construction (provider=%s)...", cfg.provider)
  125 + self._translator = create_translation_provider(self.config.query_config)
  126 +
117 @property 127 @property
118 def text_encoder(self) -> TextEmbeddingEncoder: 128 def text_encoder(self) -> TextEmbeddingEncoder:
119 - """Lazy load text encoder."""  
120 - if self._text_encoder is None and self.config.query_config.enable_text_embedding:  
121 - logger.info("Initializing text encoder (lazy load)...")  
122 - self._text_encoder = TextEmbeddingEncoder() 129 + """Return pre-initialized text encoder."""
123 return self._text_encoder 130 return self._text_encoder
124 131
125 @property 132 @property
126 def translator(self) -> Any: 133 def translator(self) -> Any:
127 - """Lazy load translator."""  
128 - if self._translator is None:  
129 - from config.services_config import get_translation_config  
130 - cfg = get_translation_config()  
131 - logger.info("Initializing translator (provider=%s)...", cfg.provider)  
132 - self._translator = create_translation_provider(self.config.query_config) 134 + """Return pre-initialized translator."""
133 return self._translator 135 return self._translator
134 136
135 def _simple_tokenize(self, text: str) -> List[str]: 137 def _simple_tokenize(self, text: str) -> List[str]:
@@ -343,6 +345,8 @@ class QueryParser: @@ -343,6 +345,8 @@ class QueryParser:
343 encoding_executor = None 345 encoding_executor = None
344 if should_generate_embedding: 346 if should_generate_embedding:
345 try: 347 try:
  348 + if self.text_encoder is None:
  349 + raise RuntimeError("Text embedding is enabled but text encoder is not initialized")
346 log_debug("Starting query vector generation (async)") 350 log_debug("Starting query vector generation (async)")
347 # Submit encoding task to thread pool for async execution 351 # Submit encoding task to thread pool for async execution
348 encoding_executor = ThreadPoolExecutor(max_workers=1) 352 encoding_executor = ThreadPoolExecutor(max_workers=1)
search/es_query_builder.py
@@ -50,6 +50,22 @@ class ESQueryBuilder: @@ -50,6 +50,22 @@ class ESQueryBuilder:
50 self.default_language = default_language 50 self.default_language = default_language
51 self.knn_boost = knn_boost 51 self.knn_boost = knn_boost
52 52
  53 + def _apply_source_filter(self, es_query: Dict[str, Any]) -> None:
  54 + """
  55 + Apply tri-state _source semantics:
  56 + - None: do not set _source (return all source fields)
  57 + - []: _source=false
  58 + - [..]: _source.includes=[..]
  59 + """
  60 + if self.source_fields is None:
  61 + return
  62 + if not isinstance(self.source_fields, list):
  63 + raise ValueError("query_config.source_fields must be null or list[str]")
  64 + if len(self.source_fields) == 0:
  65 + es_query["_source"] = False
  66 + return
  67 + es_query["_source"] = {"includes": self.source_fields}
  68 +
53 def _split_filters_for_faceting( 69 def _split_filters_for_faceting(
54 self, 70 self,
55 filters: Optional[Dict[str, Any]], 71 filters: Optional[Dict[str, Any]],
@@ -146,11 +162,8 @@ class ESQueryBuilder: @@ -146,11 +162,8 @@ class ESQueryBuilder:
146 "from": from_ 162 "from": from_
147 } 163 }
148 164
149 - # Add _source filtering if source_fields are configured  
150 - if self.source_fields:  
151 - es_query["_source"] = {  
152 - "includes": self.source_fields  
153 - } 165 + # Add _source filtering with explicit tri-state semantics.
  166 + self._apply_source_filter(es_query)
154 167
155 # 1. Build recall queries (text or embedding) 168 # 1. Build recall queries (text or embedding)
156 recall_clauses = [] 169 recall_clauses = []
search/searcher.py
@@ -82,7 +82,8 @@ class Searcher: @@ -82,7 +82,8 @@ class Searcher:
82 self, 82 self,
83 es_client: ESClient, 83 es_client: ESClient,
84 config: SearchConfig, 84 config: SearchConfig,
85 - query_parser: Optional[QueryParser] = None 85 + query_parser: Optional[QueryParser] = None,
  86 + image_encoder: Optional[CLIPImageEncoder] = None,
86 ): 87 ):
87 """ 88 """
88 Initialize searcher. 89 Initialize searcher.
@@ -91,6 +92,7 @@ class Searcher: @@ -91,6 +92,7 @@ class Searcher:
91 es_client: Elasticsearch client 92 es_client: Elasticsearch client
92 config: SearchConfig instance 93 config: SearchConfig instance
93 query_parser: Query parser (created if not provided) 94 query_parser: Query parser (created if not provided)
  95 + image_encoder: Optional pre-initialized image encoder
94 """ 96 """
95 self.es_client = es_client 97 self.es_client = es_client
96 self.config = config 98 self.config = config
@@ -103,8 +105,12 @@ class Searcher: @@ -103,8 +105,12 @@ class Searcher:
103 # Get match fields from config 105 # Get match fields from config
104 self.match_fields = get_match_fields_for_index(config, "default") 106 self.match_fields = get_match_fields_for_index(config, "default")
105 self.text_embedding_field = config.query_config.text_embedding_field or "title_embedding" 107 self.text_embedding_field = config.query_config.text_embedding_field or "title_embedding"
106 - self.image_embedding_field = config.query_config.image_embedding_field or "image_embedding"  
107 - self.source_fields = config.query_config.source_fields or [] 108 + self.image_embedding_field = config.query_config.image_embedding_field
  109 + if self.image_embedding_field and image_encoder is None:
  110 + self.image_encoder = CLIPImageEncoder()
  111 + else:
  112 + self.image_encoder = image_encoder
  113 + self.source_fields = config.query_config.source_fields
108 114
109 # Query builder - simplified single-layer architecture 115 # Query builder - simplified single-layer architecture
110 self.query_builder = ESQueryBuilder( 116 self.query_builder = ESQueryBuilder(
@@ -118,6 +124,22 @@ class Searcher: @@ -118,6 +124,22 @@ class Searcher:
118 knn_boost=self.config.query_config.knn_boost 124 knn_boost=self.config.query_config.knn_boost
119 ) 125 )
120 126
  127 + def _apply_source_filter(self, es_query: Dict[str, Any]) -> None:
  128 + """
  129 + Apply tri-state _source semantics:
  130 + - None: do not set _source (return full source)
  131 + - []: _source=false (return no source fields)
  132 + - [..]: _source.includes=[..]
  133 + """
  134 + if self.source_fields is None:
  135 + return
  136 + if not isinstance(self.source_fields, list):
  137 + raise ValueError("query_config.source_fields must be null or list[str]")
  138 + if len(self.source_fields) == 0:
  139 + es_query["_source"] = False
  140 + return
  141 + es_query["_source"] = {"includes": self.source_fields}
  142 +
121 def search( 143 def search(
122 self, 144 self,
123 query: str, 145 query: str,
@@ -568,8 +590,9 @@ class Searcher: @@ -568,8 +590,9 @@ class Searcher:
568 raise ValueError("Image embedding field not configured") 590 raise ValueError("Image embedding field not configured")
569 591
570 # Generate image embedding 592 # Generate image embedding
571 - image_encoder = CLIPImageEncoder()  
572 - image_vector = image_encoder.encode_image_from_url(image_url) 593 + if self.image_encoder is None:
  594 + raise RuntimeError("Image encoder is not initialized at startup")
  595 + image_vector = self.image_encoder.encode_image_from_url(image_url)
573 596
574 if image_vector is None: 597 if image_vector is None:
575 raise ValueError(f"Failed to encode image: {image_url}") 598 raise ValueError(f"Failed to encode image: {image_url}")
@@ -590,11 +613,8 @@ class Searcher: @@ -590,11 +613,8 @@ class Searcher:
590 } 613 }
591 } 614 }
592 615
593 - # Add _source filtering if source_fields are configured  
594 - if self.source_fields:  
595 - es_query["_source"] = {  
596 - "includes": self.source_fields  
597 - } 616 + # Apply source filtering semantics (None / [] / list)
  617 + self._apply_source_filter(es_query)
598 618
599 if filters or range_filters: 619 if filters or range_filters:
600 filter_clauses = self.query_builder._build_filters(filters, range_filters) 620 filter_clauses = self.query_builder._build_filters(filters, range_filters)
tests/ci/test_service_api_contracts.py
@@ -95,6 +95,52 @@ def test_suggestion_api_contract(search_client: TestClient): @@ -95,6 +95,52 @@ def test_suggestion_api_contract(search_client: TestClient):
95 assert len(data["suggestions"]) == 1 95 assert len(data["suggestions"]) == 1
96 96
97 97
  98 +def test_instant_search_not_implemented(search_client: TestClient):
  99 + response = search_client.get("/search/instant?q=iph&size=5")
  100 + assert response.status_code == 501
  101 +
  102 +
  103 +def test_admin_stats_contract(search_client: TestClient, monkeypatch):
  104 + import api.app as search_app
  105 +
  106 + class _FakeIndices:
  107 + @staticmethod
  108 + def exists(index: str) -> bool:
  109 + return index.endswith("search_products_tenant_162")
  110 +
  111 + @staticmethod
  112 + def stats(index: str):
  113 + return {
  114 + "indices": {
  115 + index: {
  116 + "total": {
  117 + "store": {
  118 + "size_in_bytes": 2 * 1024 * 1024,
  119 + }
  120 + }
  121 + }
  122 + }
  123 + }
  124 +
  125 + class _FakeClient:
  126 + indices = _FakeIndices()
  127 +
  128 + @staticmethod
  129 + def count(index: str):
  130 + assert index.endswith("search_products_tenant_162")
  131 + return {"count": 123}
  132 +
  133 + monkeypatch.setattr(search_app, "get_es_client", lambda: SimpleNamespace(client=_FakeClient()))
  134 +
  135 + response = search_client.get("/admin/stats", headers={"X-Tenant-ID": "162"})
  136 + assert response.status_code == 200
  137 + data = response.json()
  138 + assert data["tenant_id"] == "162"
  139 + assert data["index_name"].endswith("search_products_tenant_162")
  140 + assert data["document_count"] == 123
  141 + assert data["size_mb"] == 2.0
  142 +
  143 +
98 class _FakeBulkService: 144 class _FakeBulkService:
99 def bulk_index(self, tenant_id: str, recreate_index: bool, batch_size: int): 145 def bulk_index(self, tenant_id: str, recreate_index: bool, batch_size: int):
100 return { 146 return {
utils/es_client.py
@@ -193,13 +193,7 @@ class ESClient: @@ -193,13 +193,7 @@ class ESClient:
193 ) 193 )
194 except Exception as e: 194 except Exception as e:
195 logger.error(f"Search failed: {e}", exc_info=True) 195 logger.error(f"Search failed: {e}", exc_info=True)
196 - return {  
197 - 'hits': {  
198 - 'total': {'value': 0},  
199 - 'hits': []  
200 - },  
201 - 'error': str(e)  
202 - } 196 + raise RuntimeError(f"Elasticsearch search failed for index '{index_name}': {e}") from e
203 197
204 def get_mapping(self, index_name: str) -> Dict[str, Any]: 198 def get_mapping(self, index_name: str) -> Dict[str, Any]:
205 """Get index mapping.""" 199 """Get index mapping."""
@@ -234,7 +228,7 @@ class ESClient: @@ -234,7 +228,7 @@ class ESClient:
234 return result['count'] 228 return result['count']
235 except Exception as e: 229 except Exception as e:
236 logger.error(f"Count failed: {e}", exc_info=True) 230 logger.error(f"Count failed: {e}", exc_info=True)
237 - return 0 231 + raise RuntimeError(f"Elasticsearch count failed for index '{index_name}': {e}") from e
238 232
239 233
240 def get_es_client_from_env() -> ESClient: 234 def get_es_client_from_env() -> ESClient: