tangwang · tangwang · tangwang · tangwang
Showing 21 changed files Show diff stats
api/app.py
api/models.py
api/result_formatter.py
api/routes/search.py
config/__init__.py
config/config.yaml
config/config_loader.py
config/utils.py
context/request_context.py
docs/常用查询 - ES.md
docs/搜索API对接指南.md
frontend/index.html
frontend/static/css/style.css
frontend/static/js/app.js
main.py
query/query_parser.py
search/es_query_builder.py
search/query_config.py
search/searcher.py
tests/conftest.py
@@ -41,15 +41,16 @@ limiter = Limiter(key_func=get_remote_address)
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
 from config.env_config import ES_CONFIG
+from config import ConfigLoader
 from utils import ESClient
 from search import Searcher
-from search.query_config import DEFAULT_INDEX_NAME
 from query import QueryParser
  
 # Global instances
 _es_client: Optional[ESClient] = None
 _searcher: Optional[Searcher] = None
 _query_parser: Optional[QueryParser] = None
+_config = None
  
  
 def init_service(es_host: str = "http://localhost:9200"):
@@ -59,11 +60,17 @@ def init_service(es_host: str = &quot;http://localhost:9200&quot;):
     Args:
         es_host: Elasticsearch host URL
     """
-    global _es_client, _searcher, _query_parser
+    global _es_client, _searcher, _query_parser, _config
  
     start_time = time.time()
     logger.info("Initializing search service (multi-tenant)")
  
+    # Load configuration
+    logger.info("Loading configuration...")
+    config_loader = ConfigLoader("config/config.yaml")
+    _config = config_loader.load_config()
+    logger.info("Configuration loaded")
+
     # Get ES credentials
     es_username = os.getenv('ES_USERNAME') or ES_CONFIG.get('username')
     es_password = os.getenv('ES_PASSWORD') or ES_CONFIG.get('password')
@@ -81,13 +88,13 @@ def init_service(es_host: str = &quot;http://localhost:9200&quot;):
  
     # Initialize components
     logger.info("Initializing query parser...")
-    _query_parser = QueryParser()
+    _query_parser = QueryParser(_config)
  
     logger.info("Initializing searcher...")
-    _searcher = Searcher(_es_client, _query_parser, index_name=DEFAULT_INDEX_NAME)
+    _searcher = Searcher(_es_client, _config, _query_parser)
  
     elapsed = time.time() - start_time
-    logger.info(f"Search service ready! (took {elapsed:.2f}s) | Index: {DEFAULT_INDEX_NAME}")
+    logger.info(f"Search service ready! (took {elapsed:.2f}s) | Index: {_config.es_index_name}")
  
  
  
@@ -113,6 +120,13 @@ def get_query_parser() -&gt; QueryParser:
     return _query_parser
  
  
+def get_config():
+    """Get global config instance."""
+    if _config is None:
+        raise RuntimeError("Service not initialized")
+    return _config
+
+
 # Create FastAPI app with enhanced configuration
 app = FastAPI(
     title="E-Commerce Search API",
@@ -146,9 +146,14 @@ class SearchRequest(BaseModel):
     debug: bool = Field(False, description="是否返回调试信息")
  
     # SKU筛选参数
-    sku_filter_dimension: Optional[str] = Field(
+    sku_filter_dimension: Optional[List[str]] = Field(
         None,
-        description="子SKU筛选维度（店铺配置）。指定后，每个SPU下的SKU将按该维度分组，每组选择第一个SKU返回。例如：'color'表示按颜色分组，每种颜色选一款。支持的值：'option1'、'option2'、'option3'或specifications中的name（如'color'、'size'）"
+        description=(
+            "子SKU筛选维度（店铺配置），为字符串列表。"
+            "指定后，每个SPU下的SKU将按这些维度的组合进行分组，每个维度组合只保留一个SKU返回。"
+            "例如：['color'] 表示按颜色分组，每种颜色选一款；['color', 'size'] 表示按颜色+尺码组合分组。"
+            "支持的值：'option1'、'option2'、'option3' 或选项名称（如 'color'、'size'，将通过 option1_name/2_name/3_name 匹配）。"
+        )
     )
  
     # 个性化参数（预留）
@@ -14,7 +14,7 @@ class ResultFormatter:
         es_hits: List[Dict[str, Any]],
         max_score: float = 1.0,
         language: str = "zh",
-        sku_filter_dimension: Optional[str] = None
+        sku_filter_dimension: Optional[List[str]] = None
     ) -> List[SpuResult]:
         """
         Convert ES hits to SpuResult list.
@@ -85,10 +85,10 @@ class ResultFormatter:
                     )
                     skus.append(sku)
  
-            # Apply SKU filtering if dimension is specified
+            # Apply SKU filtering if dimension list is specified
             if sku_filter_dimension and skus:
-                skus = ResultFormatter._filter_skus_by_dimension(
-                    skus, 
+                skus = ResultFormatter._filter_skus_by_dimensions(
+                    skus,
                     sku_filter_dimension,
                     source.get('option1_name'),
                     source.get('option2_name'),
@@ -138,22 +138,22 @@ class ResultFormatter:
         return results
  
     @staticmethod
-    def _filter_skus_by_dimension(
+    def _filter_skus_by_dimensions(
         skus: List[SkuResult],
-        dimension: str,
+        dimensions: List[str],
         option1_name: Optional[str] = None,
         option2_name: Optional[str] = None,
         option3_name: Optional[str] = None,
         specifications: Optional[List[Dict[str, Any]]] = None
     ) -> List[SkuResult]:
         """
-        Filter SKUs by dimension, keeping only one SKU per dimension value.
+        Filter SKUs by one or more dimensions, keeping only one SKU per dimension value combination.
  
         Args:
             skus: List of SKU results to filter
-            dimension: Filter dimension, can be:
+            dimensions: Filter dimensions, each dimension can be:
                 - 'option1', 'option2', 'option3': Direct option field
-                - A specification name (e.g., 'color', 'size'): Match by option name
+                - A specification/option name (e.g., 'color', 'size'): Match by option name
             option1_name: Name of option1 (e.g., 'color')
             option2_name: Name of option2 (e.g., 'size')
             option3_name: Name of option3
@@ -162,54 +162,59 @@ class ResultFormatter:
         Returns:
             Filtered list of SKUs (one per dimension value)
         """
-        if not skus:
+        if not skus or not dimensions:
             return skus
-        
-        # Determine which field to use for filtering
-        filter_field = None
-        
-        # Direct option field (option1, option2, option3)
-        if dimension.lower() == 'option1':
-            filter_field = 'option1_value'
-        elif dimension.lower() == 'option2':
-            filter_field = 'option2_value'
-        elif dimension.lower() == 'option3':
-            filter_field = 'option3_value'
-        else:
-            # Try to match by option name
-            dimension_lower = dimension.lower()
-            if option1_name and option1_name.lower() == dimension_lower:
-                filter_field = 'option1_value'
-            elif option2_name and option2_name.lower() == dimension_lower:
-                filter_field = 'option2_value'
-            elif option3_name and option3_name.lower() == dimension_lower:
-                filter_field = 'option3_value'
-        
-        # If no matching field found, return all SKUs (no filtering)
-        if not filter_field:
-            return skus
-        
-        # Group SKUs by dimension value and select first one from each group
-        dimension_groups: Dict[str, SkuResult] = {}
-        
+
+        # Resolve each dimension to an underlying SKU field (option1_value / option2_value / option3_value)
+        filter_fields: List[str] = []
+
+        for dim in dimensions:
+            if not dim:
+                continue
+            dim_lower = dim.lower()
+
+            field_name: Optional[str] = None
+            # Direct option field (option1, option2, option3)
+            if dim_lower == 'option1':
+                field_name = 'option1_value'
+            elif dim_lower == 'option2':
+                field_name = 'option2_value'
+            elif dim_lower == 'option3':
+                field_name = 'option3_value'
+            else:
+                # Try to match by option name
+                if option1_name and option1_name.lower() == dim_lower:
+                    field_name = 'option1_value'
+                elif option2_name and option2_name.lower() == dim_lower:
+                    field_name = 'option2_value'
+                elif option3_name and option3_name.lower() == dim_lower:
+                    field_name = 'option3_value'
+
+            if field_name and field_name not in filter_fields:
+                filter_fields.append(field_name)
+
+        # If no matching field found for all dimensions, do not return any child SKUs
+        if not filter_fields:
+            return []
+
+        # Group SKUs by dimension value combination and select first one from each group
+        dimension_groups: Dict[tuple, SkuResult] = {}
+
         for sku in skus:
-            # Get dimension value from the determined field
-            dimension_value = None
-            if filter_field == 'option1_value':
-                dimension_value = sku.option1_value
-            elif filter_field == 'option2_value':
-                dimension_value = sku.option2_value
-            elif filter_field == 'option3_value':
-                dimension_value = sku.option3_value
-            
-            # Use empty string as key for None values
-            key = str(dimension_value) if dimension_value is not None else ''
-            
-            # Keep first SKU for each dimension value
+            # Build key as combination of all dimension values
+            key_values: List[str] = []
+            for field in filter_fields:
+                dimension_value = getattr(sku, field, None)
+                # Use empty string as key part for None values
+                key_values.append(str(dimension_value) if dimension_value is not None else '')
+
+            key = tuple(key_values)
+
+            # Keep first SKU for each dimension combination
             if key not in dimension_groups:
                 dimension_groups[key] = sku
-        
-        # Return filtered SKUs (one per dimension value)
+
+        # Return filtered SKUs (one per dimension combination)
         return list(dimension_groups.values())
  
     @staticmethod
@@ -24,8 +24,8 @@ def extract_request_info(request: Request) -&gt; tuple[str, str]:
     # Try to get request ID from headers
     reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8]
  
-    # Try to get user ID from headers or default to anonymous
-    uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or 'anonymous'
+    # Try to get user ID from headers; if not found, use "-1" for correlation
+    uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or "-1"
  
     return reqid, uid
  
@@ -70,10 +70,24 @@ async def search(request: SearchRequest, http_request: Request):
     set_current_request_context(context)
  
     try:
-        # Log request start
+        # Log request start (English logs, with key search parameters)
+        client_ip = http_request.client.host if http_request.client else "unknown"
+        user_agent = http_request.headers.get("User-Agent", "unknown")[:200]
         context.logger.info(
-            f"收到搜索请求 | Tenant: {tenant_id} | IP: {http_request.client.host if http_request.client else 'unknown'} | "
-            f"用户代理: {http_request.headers.get('User-Agent', 'unknown')[:100]}",
+            "Received search request | "
+            f"Tenant: {tenant_id} | "
+            f"Query: {request.query} | "
+            f"IP: {client_ip} | "
+            f"User agent: {user_agent} | "
+            f"size: {request.size} | from: {request.from_} | "
+            f"sort_by: {request.sort_by} | sort_order: {request.sort_order} | "
+            f"min_score: {request.min_score} | "
+            f"language: {request.language} | "
+            f"debug: {request.debug} | "
+            f"sku_filter_dimension: {request.sku_filter_dimension} | "
+            f"filters: {request.filters} | "
+            f"range_filters: {request.range_filters} | "
+            f"facets: {request.facets}",
             extra={'reqid': context.reqid, 'uid': context.uid}
         )
  
@@ -121,7 +135,7 @@ async def search(request: SearchRequest, http_request: Request):
         if context:
             context.set_error(e)
             context.logger.error(
-                f"搜索请求失败 | 错误: {str(e)}",
+                f"Search request failed | error: {str(e)}",
                 extra={'reqid': context.reqid, 'uid': context.uid}
             )
         raise HTTPException(status_code=500, detail=str(e))
@@ -164,10 +178,13 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
     set_current_request_context(context)
  
     try:
-        # Log request start
+        # Log request start for image search (English)
+        client_ip = http_request.client.host if http_request.client else "unknown"
         context.logger.info(
-            f"收到图片搜索请求 | Tenant: {tenant_id} | 图片URL: {request.image_url} | "
-            f"IP: {http_request.client.host if http_request.client else 'unknown'}",
+            "Received image search request | "
+            f"Tenant: {tenant_id} | "
+            f"Image URL: {request.image_url} | "
+            f"IP: {client_ip}",
             extra={'reqid': context.reqid, 'uid': context.uid}
         )
  
@@ -202,7 +219,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
         if context:
             context.set_error(e)
             context.logger.error(
-                f"图片搜索请求参数错误 | 错误: {str(e)}",
+                f"Image search request parameter error | error: {str(e)}",
                 extra={'reqid': context.reqid, 'uid': context.uid}
             )
         raise HTTPException(status_code=400, detail=str(e))
@@ -210,7 +227,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
         if context:
             context.set_error(e)
             context.logger.error(
-                f"图片搜索请求失败 | 错误: {str(e)}",
+                f"Image search request failed | error: {str(e)}",
                 extra={'reqid': context.reqid, 'uid': context.uid}
             )
         raise HTTPException(status_code=500, detail=str(e))
@@ -23,6 +23,10 @@ from .config_loader import (
     RerankConfig,
     ConfigurationError
 )
+from .utils import (
+    get_match_fields_for_index,
+    get_domain_fields
+)
  
 __all__ = [
     # Field types
@@ -46,4 +50,6 @@ __all__ = [
     'FunctionScoreConfig',
     'RerankConfig',
     'ConfigurationError',
+    'get_match_fields_for_index',
+    'get_domain_fields',
 ]
@@ -412,6 +412,11 @@ query_config:
   text_embedding_field: "title_embedding"  # Field name for text embeddings
   image_embedding_field: null  # Field name for image embeddings (if not set, will auto-detect)
  
+  # Embedding disable thresholds (disable vector search for short queries)
+  embedding_disable_thresholds:
+    chinese_char_limit: 4  # Disable embedding for Chinese queries with <= 4 characters
+    english_word_limit: 3  # Disable embedding for English queries with <= 3 words
+
   # Translation API (DeepL)
   translation_service: "deepl"
   translation_api_key: null  # Set via environment variable
@@ -58,6 +58,10 @@ class QueryConfig:
     text_embedding_field: Optional[str] = None  # Field name for text embeddings (e.g., "title_embedding")
     image_embedding_field: Optional[str] = None  # Field name for image embeddings (e.g., "image_embedding")
  
+    # Embedding disable thresholds (disable vector search for short queries)
+    embedding_disable_chinese_char_limit: int = 4  # Disable embedding for Chinese queries with <= this many characters
+    embedding_disable_english_word_limit: int = 3  # Disable embedding for English queries with <= this many words
+
     # ES source fields configuration - fields to return in search results
     # If None, auto-collect from field configs (fields with return_in_source=True)
     # If empty list, return all fields. Otherwise, only return specified fields.
@@ -165,15 +169,18 @@ class ConfigLoader:
  
         return rewrite_dict
  
-    def load_config(self) -> SearchConfig:
+    def load_config(self, validate: bool = True) -> SearchConfig:
         """
         Load unified configuration from YAML file.
  
+        Args:
+            validate: Whether to validate configuration after loading (default: True)
+
         Returns:
             SearchConfig object
  
         Raises:
-            ConfigurationError: If config file not found or invalid
+            ConfigurationError: If config file not found, invalid, or validation fails
         """
         if not self.config_file.exists():
             raise ConfigurationError(f"Configuration file not found: {self.config_file}")
@@ -184,7 +191,16 @@ class ConfigLoader:
         except yaml.YAMLError as e:
             raise ConfigurationError(f"Invalid YAML in {self.config_file}: {e}")
  
-        return self._parse_config(config_data)
+        config = self._parse_config(config_data)
+        
+        # Auto-validate configuration
+        if validate:
+            errors = self.validate_config(config)
+            if errors:
+                error_msg = "Configuration validation failed:\n" + "\n".join(f"  - {err}" for err in errors)
+                raise ConfigurationError(error_msg)
+        
+        return config
  
     def _parse_config(self, config_data: Dict[str, Any]) -> SearchConfig:
         """Parse configuration dictionary into SearchConfig object."""
@@ -214,43 +230,48 @@ class ConfigLoader:
                 if field.return_in_source
             ]
  
+        # Parse embedding disable thresholds
+        embedding_thresholds = query_config_data.get("embedding_disable_thresholds", {})
+        
         query_config = QueryConfig(
-            supported_languages=query_config_data.get("supported_languages", ["zh", "en"]),
-            default_language=query_config_data.get("default_language", "zh"),
+            supported_languages=query_config_data.get("supported_languages") or ["zh", "en"],
+            default_language=query_config_data.get("default_language") or "zh",
             enable_translation=query_config_data.get("enable_translation", True),
             enable_text_embedding=query_config_data.get("enable_text_embedding", True),
             enable_query_rewrite=query_config_data.get("enable_query_rewrite", True),
             rewrite_dictionary=rewrite_dictionary,
             translation_api_key=query_config_data.get("translation_api_key"),
-            translation_service=query_config_data.get("translation_service", "deepl"),
+            translation_service=query_config_data.get("translation_service") or "deepl",
             translation_glossary_id=query_config_data.get("translation_glossary_id"),
-            translation_context=query_config_data.get("translation_context", "e-commerce product search"),
+            translation_context=query_config_data.get("translation_context") or "e-commerce product search",
             text_embedding_field=query_config_data.get("text_embedding_field"),
             image_embedding_field=query_config_data.get("image_embedding_field"),
+            embedding_disable_chinese_char_limit=embedding_thresholds.get("chinese_char_limit", 4),
+            embedding_disable_english_word_limit=embedding_thresholds.get("english_word_limit", 3),
             source_fields=source_fields
         )
  
         # Parse ranking config
         ranking_data = config_data.get("ranking", {})
         ranking = RankingConfig(
-            expression=ranking_data.get("expression", "bm25() + 0.2*text_embedding_relevance()"),
-            description=ranking_data.get("description", "Default BM25 + text embedding ranking")
+            expression=ranking_data.get("expression") or "bm25() + 0.2*text_embedding_relevance()",
+            description=ranking_data.get("description") or "Default BM25 + text embedding ranking"
         )
  
         # Parse Function Score configuration
         fs_data = config_data.get("function_score", {})
         function_score = FunctionScoreConfig(
-            score_mode=fs_data.get("score_mode", "sum"),
-            boost_mode=fs_data.get("boost_mode", "multiply"),
-            functions=fs_data.get("functions", [])
+            score_mode=fs_data.get("score_mode") or "sum",
+            boost_mode=fs_data.get("boost_mode") or "multiply",
+            functions=fs_data.get("functions") or []
         )
  
         # Parse Rerank configuration
         rerank_data = config_data.get("rerank", {})
         rerank = RerankConfig(
             enabled=rerank_data.get("enabled", False),
-            expression=rerank_data.get("expression", ""),
-            description=rerank_data.get("description", "")
+            expression=rerank_data.get("expression") or "",
+            description=rerank_data.get("description") or ""
         )
  
         # Parse SPU config
@@ -447,21 +468,43 @@ class ConfigLoader:
             output_path = Path(output_path)
  
         # Convert config back to dictionary format
+        query_config_dict = {
+            "supported_languages": config.query_config.supported_languages,
+            "default_language": config.query_config.default_language,
+            "enable_translation": config.query_config.enable_translation,
+            "enable_text_embedding": config.query_config.enable_text_embedding,
+            "enable_query_rewrite": config.query_config.enable_query_rewrite,
+            "translation_service": config.query_config.translation_service,
+        }
+        
+        # Add optional fields only if they are set
+        if config.query_config.translation_api_key:
+            query_config_dict["translation_api_key"] = config.query_config.translation_api_key
+        if config.query_config.translation_glossary_id:
+            query_config_dict["translation_glossary_id"] = config.query_config.translation_glossary_id
+        if config.query_config.translation_context:
+            query_config_dict["translation_context"] = config.query_config.translation_context
+        if config.query_config.text_embedding_field:
+            query_config_dict["text_embedding_field"] = config.query_config.text_embedding_field
+        if config.query_config.image_embedding_field:
+            query_config_dict["image_embedding_field"] = config.query_config.image_embedding_field
+        if config.query_config.source_fields:
+            query_config_dict["source_fields"] = config.query_config.source_fields
+        
+        # Add embedding disable thresholds
+        if (config.query_config.embedding_disable_chinese_char_limit != 4 or 
+            config.query_config.embedding_disable_english_word_limit != 3):
+            query_config_dict["embedding_disable_thresholds"] = {
+                "chinese_char_limit": config.query_config.embedding_disable_chinese_char_limit,
+                "english_word_limit": config.query_config.embedding_disable_english_word_limit
+            }
+        
         config_dict = {
             "es_index_name": config.es_index_name,
             "es_settings": config.es_settings,
             "fields": [self._field_to_dict(field) for field in config.fields],
             "indexes": [self._index_to_dict(index) for index in config.indexes],
-            "query_config": {
-                "supported_languages": config.query_config.supported_languages,
-                "default_language": config.query_config.default_language,
-                "enable_translation": config.query_config.enable_translation,
-                "enable_text_embedding": config.query_config.enable_text_embedding,
-                "enable_query_rewrite": config.query_config.enable_query_rewrite,
-                # rewrite_dictionary is stored in separate file, not in config
-                "translation_api_key": config.query_config.translation_api_key,
-                "translation_service": config.query_config.translation_service,
-            },
+            "query_config": query_config_dict,
             "ranking": {
                 "expression": config.ranking.expression,
                 "description": config.ranking.description
@@ -505,7 +548,7 @@ class ConfigLoader:
                 f.write(f"{key}\t{value}\n")
  
     def _field_to_dict(self, field: FieldConfig) -> Dict[str, Any]:
-        """Convert FieldConfig to dictionary."""
+        """Convert FieldConfig to dictionary, preserving all fields."""
         result = {
             "name": field.name,
             "type": field.field_type.value,
@@ -513,36 +556,49 @@ class ConfigLoader:
             "boost": field.boost,
             "store": field.store,
             "index": field.index,
+            "return_in_source": field.return_in_source,
         }
  
+        # Add optional fields only if they differ from defaults or are set
         if field.analyzer:
             result["analyzer"] = field.analyzer.value
         if field.search_analyzer:
             result["search_analyzer"] = field.search_analyzer.value
         if field.multi_language:
             result["multi_language"] = field.multi_language
-            result["languages"] = field.languages
+            if field.languages:
+                result["languages"] = field.languages
         if field.embedding_dims != 1024:
             result["embedding_dims"] = field.embedding_dims
         if field.embedding_similarity != "dot_product":
             result["embedding_similarity"] = field.embedding_similarity
         if field.nested:
             result["nested"] = field.nested
-            result["nested_properties"] = field.nested_properties
+            if field.nested_properties:
+                result["nested_properties"] = field.nested_properties
+        if field.keyword_subfield:
+            result["keyword_subfield"] = field.keyword_subfield
+            if field.keyword_ignore_above != 256:
+                result["keyword_ignore_above"] = field.keyword_ignore_above
+            if field.keyword_normalizer:
+                result["keyword_normalizer"] = field.keyword_normalizer
  
         return result
  
     def _index_to_dict(self, index: IndexConfig) -> Dict[str, Any]:
-        """Convert IndexConfig to dictionary."""
+        """Convert IndexConfig to dictionary, preserving all fields."""
         result = {
             "name": index.name,
             "label": index.label,
             "fields": index.fields,
             "analyzer": index.analyzer.value,
-            "boost": index.boost,
-            "example": index.example
         }
-
+        
+        # Add optional fields only if they differ from defaults or are set
+        if index.boost != 1.0:
+            result["boost"] = index.boost
+        if index.example:
+            result["example"] = index.example
         if index.language_field_mapping:
             result["language_field_mapping"] = index.language_field_mapping
  
@@ -0,0 +1,70 @@
+"""
+Configuration utility functions.
+
+Helper functions for working with SearchConfig objects.
+"""
+
+from typing import Dict, List
+from .config_loader import SearchConfig
+
+
+def get_match_fields_for_index(config: SearchConfig, index_name: str = "default") -> List[str]:
+    """
+    Generate match fields list with boost from IndexConfig and FieldConfig.
+    
+    Args:
+        config: SearchConfig instance
+        index_name: Name of the index domain (default: "default")
+    
+    Returns:
+        List of field names with boost, e.g., ["title_zh^3.0", "brief_zh^1.5"]
+    """
+    # Find the index config
+    index_config = None
+    for idx in config.indexes:
+        if idx.name == index_name:
+            index_config = idx
+            break
+    
+    if not index_config:
+        return []
+    
+    # Create a field name to FieldConfig mapping
+    field_map = {field.name: field for field in config.fields}
+    
+    # Generate match fields with boost
+    match_fields = []
+    for field_name in index_config.fields:
+        field_config = field_map.get(field_name)
+        if field_config:
+            # Combine index boost and field boost
+            total_boost = index_config.boost * field_config.boost
+            if total_boost != 1.0:
+                match_fields.append(f"{field_name}^{total_boost}")
+            else:
+                match_fields.append(field_name)
+        else:
+            # Field not found in config, use index boost only
+            if index_config.boost != 1.0:
+                match_fields.append(f"{field_name}^{index_config.boost}")
+            else:
+                match_fields.append(field_name)
+    
+    return match_fields
+
+
+def get_domain_fields(config: SearchConfig) -> Dict[str, List[str]]:
+    """
+    Generate domain-specific match fields from all index configs.
+    
+    Args:
+        config: SearchConfig instance
+    
+    Returns:
+        Dictionary mapping domain name to list of match fields
+    """
+    domain_fields = {}
+    for index_config in config.indexes:
+        domain_fields[index_config.name] = get_match_fields_for_index(config, index_config.name)
+    return domain_fields
+
@@ -59,9 +59,10 @@ class RequestContext:
     """
  
     def __init__(self, reqid: str = None, uid: str = None):
-        # 生成唯一请求ID
+        # 生成唯一请求ID；如果外部未提供，则自动生成
+        # 如果无法获取到 uid，则使用 "-1" 作为占位，用于日志关联
         self.reqid = reqid or str(uuid.uuid4())[:8]
-        self.uid = uid or 'anonymous'
+        self.uid = uid or "-1"
  
         # 查询分析结果
         self.query_analysis = QueryAnalysisResult()
@@ -111,7 +112,10 @@ class RequestContext:
         """
         start_time = time.time()
         self.performance_metrics.stage_start_times[stage.value] = start_time
-        self.logger.debug(f"开始阶段 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid})
+        self.logger.debug(
+            f"Start stage | {stage.value}",
+            extra={'reqid': self.reqid, 'uid': self.uid}
+        )
         return start_time
  
     def end_stage(self, stage: RequestContextStage) -> float:
@@ -125,7 +129,10 @@ class RequestContext:
             阶段耗时（毫秒）
         """
         if stage.value not in self.performance_metrics.stage_start_times:
-            self.logger.warning(f"阶段未开始计时 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid})
+            self.logger.warning(
+                f"Stage not started | {stage.value}",
+                extra={'reqid': self.reqid, 'uid': self.uid}
+            )
             return 0.0
  
         start_time = self.performance_metrics.stage_start_times[stage.value]
@@ -133,7 +140,7 @@ class RequestContext:
         self.performance_metrics.stage_timings[stage.value] = duration_ms
  
         self.logger.debug(
-            f"结束阶段 | {stage.value} | 耗时: {duration_ms:.2f}ms",
+            f"End stage | {stage.value} | duration: {duration_ms:.2f}ms",
             extra={'reqid': self.reqid, 'uid': self.uid}
         )
         return duration_ms
@@ -162,7 +169,7 @@ class RequestContext:
                 setattr(self.query_analysis, key, value)
             else:
                 self.logger.warning(
-                    f"未知的查询分析字段 | {key}",
+                    f"Unknown query analysis field | {key}",
                     extra={'reqid': self.reqid, 'uid': self.uid}
                 )
  
@@ -175,7 +182,10 @@ class RequestContext:
             value: 结果值
         """
         self.intermediate_results[key] = value
-        self.logger.debug(f"存储中间结果 | {key}", extra={'reqid': self.reqid, 'uid': self.uid})
+        self.logger.debug(
+            f"Store intermediate result | {key}",
+            extra={'reqid': self.reqid, 'uid': self.uid}
+        )
  
     def get_intermediate_result(self, key: str, default: Any = None) -> Any:
         """
@@ -213,7 +223,7 @@ class RequestContext:
             'details': {}
         }
         self.logger.error(
-            f"设置错误信息 | {type(error).__name__}: {str(error)}",
+            f"Set error info | {type(error).__name__}: {str(error)}",
             extra={'reqid': self.reqid, 'uid': self.uid}
         )
  
@@ -286,13 +296,13 @@ class RequestContext:
  
         # 构建详细的日志消息
         msg_parts = [
-            f"搜索请求性能摘要 | reqid: {self.reqid}",
-            f"总耗时: {summary['performance']['total_duration_ms']:.2f}ms"
+            f"Search request performance summary | reqid: {self.reqid}",
+            f"Total duration: {summary['performance']['total_duration_ms']:.2f}ms"
         ]
  
         # 添加各阶段耗时
         if summary['performance']['stage_timings_ms']:
-            msg_parts.append("阶段耗时:")
+            msg_parts.append("Stage durations:")
             for stage, duration in summary['performance']['stage_timings_ms'].items():
                 percentage = summary['performance']['stage_percentages'].get(stage, 0)
                 msg_parts.append(f"  - {stage}: {duration:.2f}ms ({percentage}%)")
@@ -300,25 +310,26 @@ class RequestContext:
         # 添加查询信息
         if summary['query_analysis']['original_query']:
             msg_parts.append(
-                f"查询: '{summary['query_analysis']['original_query']}' "
+                "Query: "
+                f"'{summary['query_analysis']['original_query']}' "
                 f"-> '{summary['query_analysis']['rewritten_query']}' "
                 f"({summary['query_analysis']['detected_language']})"
             )
  
         # 添加结果统计
         msg_parts.append(
-            f"结果: {summary['results']['total_hits']} hits "
-            f"ES查询: {summary['results']['es_query_size']} chars"
+            f"Results: {summary['results']['total_hits']} hits "
+            f"ES query size: {summary['results']['es_query_size']} chars"
         )
  
         # 添加错误信息（如果有）
         if summary['request_info']['has_error']:
             error_info = self.metadata['error_info']
-            msg_parts.append(f"错误: {error_info['type']}: {error_info['message']}")
+            msg_parts.append(f"Error: {error_info['type']}: {error_info['message']}")
  
         # 添加警告信息（如果有）
         if summary['request_info']['warnings_count'] > 0:
-            msg_parts.append(f"警告: {summary['request_info']['warnings_count']} 个")
+            msg_parts.append(f"Warnings: {summary['request_info']['warnings_count']}")
  
         log_message = " | ".join(msg_parts)
  
@@ -17,4 +17,19 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39;   -X GET &#39;http://localhost:9200/search_products/
         ]
       }
     }
-  }'
 \ No newline at end of file
+  }'
+
+
+
+curl -u 'essa:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products/_search?pretty'   -H 'Content-Type: application/json'   -d '{
+    "size": 5,
+    "query": {
+      "bool": {
+        "filter": [
+          { "term": { "spu_id": "74123" } }
+        ]
+      }
+    }
+  }'
+
+
@@ -104,7 +104,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
   "sort_by": "string",
   "sort_order": "desc",
   "min_score": 0.0,
-  "sku_filter_dimension": "string",
+  "sku_filter_dimension": ["string"],
   "debug": false,
   "user_id": "string",
   "session_id": "string"
@@ -127,7 +127,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
 | `sort_by` | string | N | null | 排序字段名（如 `min_price`, `max_price`） |
 | `sort_order` | string | N | "desc" | 排序方向：`asc`（升序）或 `desc`（降序） |
 | `min_score` | float | N | null | 最小相关性分数阈值 |
-| `sku_filter_dimension` | string | N | null | 子SKU筛选维度（店铺配置）。指定后，每个SPU下的SKU将按该维度分组，每组选择第一个SKU返回。支持的值：`option1`、`option2`、`option3` 或 specifications 中的 name（如 `color`、`size`）。详见下文说明 |
+| `sku_filter_dimension` | array[string] | N | null | 子SKU筛选维度列表（店铺配置）。指定后，每个SPU下的SKU将按这些维度的组合进行分组，每个组合只返回第一个SKU。支持的值：`option1`、`option2`、`option3` 或选项名称（如 `color`、`size`）。详见下文说明 |
 | `debug` | boolean | N | false | 是否返回调试信息 |
 | `user_id` | string | N | null | 用户ID（用于个性化，预留） |
 | `session_id` | string | N | null | 会话ID（用于分析，预留） |
@@ -349,7 +349,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
 ### SKU筛选维度 (sku_filter_dimension)
  
 **功能说明**:
-`sku_filter_dimension` 用于控制每个SPU下返回的SKU数量。当指定此参数后，系统会按指定维度对SKU进行分组，每个分组只返回第一个SKU（从简实现，选择该维度下的第一款）。
+`sku_filter_dimension` 用于控制每个SPU下返回的SKU数量，为字符串列表。当指定此参数后，系统会按指定维度**组合**对SKU进行分组，每个维度组合只返回第一个SKU（从简实现，选择该组合下的第一款）。
  
 **使用场景**:
 - 店铺配置了SKU筛选维度（如 `color`），希望每个SPU下每种颜色只显示一个SKU
@@ -360,8 +360,8 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
 1. **直接选项字段**: `option1`、`option2`、`option3`
    - 直接使用对应的 `option1_value`、`option2_value`、`option3_value` 字段进行分组
  
-2. **规格名称**: 通过 `option1_name`、`option2_name`、`option3_name` 匹配
-   - 例如：如果 `option1_name` 为 `"color"`，则可以使用 `sku_filter_dimension: "color"` 来按颜色分组
+2. **规格/选项名称**: 通过 `option1_name`、`option2_name`、`option3_name` 匹配
+   - 例如：如果 `option1_name` 为 `"color"`，则可以使用 `sku_filter_dimension: ["color"]` 来按颜色分组
  
 **示例**:
  
@@ -369,7 +369,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
 ```json
 {
   "query": "芭比娃娃",
-  "sku_filter_dimension": "color"
+  "sku_filter_dimension": ["color"]
 }
 ```
  
@@ -377,7 +377,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
 ```json
 {
   "query": "芭比娃娃",
-  "sku_filter_dimension": "option1"
+  "sku_filter_dimension": ["option1"]
 }
 ```
  
@@ -385,7 +385,15 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
 ```json
 {
   "query": "芭比娃娃",
-  "sku_filter_dimension": "option2"
+  "sku_filter_dimension": ["option2"]
+}
+```
+
+**按颜色 + 尺寸组合筛选（假设 option1_name = "color", option2_name = "size"）**:
+```json
+{
+  "query": "芭比娃娃",
+  "sku_filter_dimension": ["color", "size"]
 }
 ```
  
@@ -25,6 +25,10 @@
                 <label for="tenantInput">tenant ID:</label>
                 <input type="text" id="tenantInput" placeholder="请输入租户ID" value="1">
             </div>
+            <div class="tenant-input-wrapper">
+                <label for="skuFilterDimension">sku_filter_dimension:</label>
+                <input type="text" id="skuFilterDimension" placeholder="SKU筛选维度" value="color">
+            </div>
             <input type="text" id="searchInput" placeholder="输入搜索关键词... (支持中文、英文、俄文)"
                    onkeypress="handleKeyPress(event)">
             <button onclick="performSearch()" class="search-btn">Search</button>
@@ -100,9 +104,10 @@
  
             <div class="sort-right">
                 <select id="resultSize" onchange="performSearch()">
-                    <option value="10">10 per page</option>
-                    <option value="20" selected>20 per page</option>
-                    <option value="50">50 per page</option>
+                    <option value="20">20 per page</option>
+                    <option value="50" selected>50 per page</option>
+                    <option value="100">50 per page</option>
+                    <option value="200">50 per page</option>
                 </select>
             </div>
         </div>
@@ -130,6 +135,6 @@
         <p>SearchEngine © 2025 | API: <span id="apiUrl">Loading...</span></p>
     </footer>
  
-    <script src="/static/js/app.js?v=3.2"></script>
+    <script src="/static/js/app.js?v=3.4"></script>
 </body>
 </html>
@@ -84,7 +84,8 @@ body {
     white-space: nowrap;
 }
  
-#tenantInput {
+#tenantInput,
+#skuFilterDimension {
     width: 120px;
     padding: 10px 15px;
     font-size: 14px;
@@ -93,7 +94,8 @@ body {
     outline: none;
 }
  
-#tenantInput:focus {
+#tenantInput:focus,
+#skuFilterDimension:focus {
     border-color: #e74c3c;
 }
  
@@ -14,6 +14,21 @@ function getTenantId() {
     return '1'; // Default fallback
 }
  
+// Get sku_filter_dimension (as list) from input
+function getSkuFilterDimension() {
+    const skuFilterInput = document.getElementById('skuFilterDimension');
+    if (skuFilterInput) {
+        const value = skuFilterInput.value.trim();
+        if (!value.length) {
+            return null;
+        }
+        // 支持用逗号分隔多个维度，例如：color,size 或 option1,color
+        const parts = value.split(',').map(v => v.trim()).filter(v => v.length > 0);
+        return parts.length > 0 ? parts : null;
+    }
+    return null;
+}
+
 // State Management
 let state = {
     query: '',
@@ -51,6 +66,7 @@ function toggleFilters() {
 async function performSearch(page = 1) {
     const query = document.getElementById('searchInput').value.trim();
     const tenantId = getTenantId();
+    const skuFilterDimension = getSkuFilterDimension();
  
     if (!query) {
         alert('Please enter search keywords');
@@ -96,6 +112,7 @@ async function performSearch(page = 1) {
                 facets: facets,
                 sort_by: state.sortBy || null,
                 sort_order: state.sortOrder,
+                sku_filter_dimension: skuFilterDimension,
                 debug: state.debug
             })
         });
@@ -93,7 +93,7 @@ def cmd_search(args):
  
     from query import QueryParser
     query_parser = QueryParser(config)
-    searcher = Searcher(config, es_client, query_parser)
+    searcher = Searcher(es_client, config, query_parser)
  
     # Execute search
     print(f"Searching for: '{args.query}' (tenant: {args.tenant_id})")
@@ -9,13 +9,7 @@ import numpy as np
 import logging
  
 from embeddings import BgeEncoder
-from search.query_config import (
-    ENABLE_TEXT_EMBEDDING,
-    ENABLE_TRANSLATION,
-    REWRITE_DICTIONARY,
-    TRANSLATION_API_KEY,
-    TRANSLATION_SERVICE
-)
+from config import SearchConfig
 from .language_detector import LanguageDetector
 from .translator import Translator
 from .query_rewriter import QueryRewriter, QueryNormalizer
@@ -70,6 +64,7 @@ class QueryParser:
  
     def __init__(
         self,
+        config: SearchConfig,
         text_encoder: Optional[BgeEncoder] = None,
         translator: Optional[Translator] = None
     ):
@@ -77,21 +72,23 @@ class QueryParser:
         Initialize query parser.
  
         Args:
+            config: SearchConfig instance
             text_encoder: Text embedding encoder (lazy loaded if not provided)
             translator: Translator instance (lazy loaded if not provided)
         """
+        self.config = config
         self._text_encoder = text_encoder
         self._translator = translator
  
         # Initialize components
         self.normalizer = QueryNormalizer()
         self.language_detector = LanguageDetector()
-        self.rewriter = QueryRewriter(REWRITE_DICTIONARY)
+        self.rewriter = QueryRewriter(config.query_config.rewrite_dictionary)
  
     @property
     def text_encoder(self) -> BgeEncoder:
         """Lazy load text encoder."""
-        if self._text_encoder is None and ENABLE_TEXT_EMBEDDING:
+        if self._text_encoder is None and self.config.query_config.enable_text_embedding:
             logger.info("Initializing text encoder (lazy load)...")
             self._text_encoder = BgeEncoder()
         return self._text_encoder
@@ -99,13 +96,13 @@ class QueryParser:
     @property
     def translator(self) -> Translator:
         """Lazy load translator."""
-        if self._translator is None and ENABLE_TRANSLATION:
+        if self._translator is None and self.config.query_config.enable_translation:
             logger.info("Initializing translator (lazy load)...")
             self._translator = Translator(
-                api_key=TRANSLATION_API_KEY,
+                api_key=self.config.query_config.translation_api_key,
                 use_cache=True,
-                glossary_id=None,  # Can be added to query_config if needed
-                translation_context='e-commerce product search'
+                glossary_id=self.config.query_config.translation_glossary_id,
+                translation_context=self.config.query_config.translation_context
             )
         return self._translator
  
@@ -156,7 +153,7 @@ class QueryParser:
  
         # Stage 2: Query rewriting
         rewritten = None
-        if REWRITE_DICTIONARY:  # Enable rewrite if dictionary exists
+        if self.config.query_config.rewrite_dictionary:  # Enable rewrite if dictionary exists
             rewritten = self.rewriter.rewrite(query_text)
             if rewritten != query_text:
                 log_info(f"查询重写 | '{query_text}' -> '{rewritten}'")
@@ -173,7 +170,7 @@ class QueryParser:
  
         # Stage 4: Translation
         translations = {}
-        if ENABLE_TRANSLATION:
+        if self.config.query_config.enable_translation:
             try:
                 # Determine target languages for translation
                 # Simplified: always translate to Chinese and English
@@ -210,19 +207,47 @@ class QueryParser:
         # Stage 5: Text embedding
         query_vector = None
         if (generate_vector and
-            ENABLE_TEXT_EMBEDDING and
+            self.config.query_config.enable_text_embedding and
             domain == "default"):  # Only generate vector for default domain
-            try:
-                log_debug("开始生成查询向量")
-                query_vector = self.text_encoder.encode([query_text])[0]
-                log_debug(f"查询向量生成完成 | 形状: {query_vector.shape}")
-                if context:
-                    context.store_intermediate_result('query_vector_shape', query_vector.shape)
-            except Exception as e:
-                error_msg = f"查询向量生成失败 | 错误: {str(e)}"
-                log_info(error_msg)
-                if context:
-                    context.add_warning(error_msg)
+            # Get thresholds from config
+            chinese_limit = self.config.query_config.embedding_disable_chinese_char_limit
+            english_limit = self.config.query_config.embedding_disable_english_word_limit
+            
+            # Check if embedding should be disabled for short queries
+            should_disable_embedding = False
+            disable_reason = None
+            
+            if detected_lang == 'zh':
+                # For Chinese: disable embedding if character count <= threshold
+                char_count = len(query_text.strip())
+                if char_count <= chinese_limit:
+                    should_disable_embedding = True
+                    disable_reason = f"中文查询字数({char_count}) <= {chinese_limit}，禁用向量搜索"
+                    log_info(disable_reason)
+                    if context:
+                        context.store_intermediate_result('embedding_disabled_reason', disable_reason)
+            else:
+                # For English: disable embedding if word count <= threshold
+                word_count = len(query_text.strip().split())
+                if word_count <= english_limit:
+                    should_disable_embedding = True
+                    disable_reason = f"英文查询单词数({word_count}) <= {english_limit}，禁用向量搜索"
+                    log_info(disable_reason)
+                    if context:
+                        context.store_intermediate_result('embedding_disabled_reason', disable_reason)
+            
+            if not should_disable_embedding:
+                try:
+                    log_debug("开始生成查询向量")
+                    query_vector = self.text_encoder.encode([query_text])[0]
+                    log_debug(f"查询向量生成完成 | 形状: {query_vector.shape}")
+                    if context:
+                        context.store_intermediate_result('query_vector_shape', query_vector.shape)
+                except Exception as e:
+                    error_msg = f"查询向量生成失败 | 错误: {str(e)}"
+                    log_info(error_msg)
+                    if context:
+                        context.add_warning(error_msg)
  
         # Build result
         result = ParsedQuery(
@@ -11,7 +11,7 @@ Simplified architecture:
 from typing import Dict, Any, List, Optional, Union
 import numpy as np
 from .boolean_parser import QueryNode
-from .query_config import FUNCTION_SCORE_CONFIG
+from config import FunctionScoreConfig
  
  
 class ESQueryBuilder:
@@ -23,7 +23,8 @@ class ESQueryBuilder:
         match_fields: List[str],
         text_embedding_field: Optional[str] = None,
         image_embedding_field: Optional[str] = None,
-        source_fields: Optional[List[str]] = None
+        source_fields: Optional[List[str]] = None,
+        function_score_config: Optional[FunctionScoreConfig] = None
     ):
         """
         Initialize query builder.
@@ -34,12 +35,14 @@ class ESQueryBuilder:
             text_embedding_field: Field name for text embeddings
             image_embedding_field: Field name for image embeddings
             source_fields: Fields to return in search results (_source includes)
+            function_score_config: Function score configuration
         """
         self.index_name = index_name
         self.match_fields = match_fields
         self.text_embedding_field = text_embedding_field
         self.image_embedding_field = image_embedding_field
         self.source_fields = source_fields
+        self.function_score_config = function_score_config
  
     def build_query(
         self,
@@ -182,12 +185,15 @@ class ESQueryBuilder:
             return query
  
         # Build function_score query
+        score_mode = self.function_score_config.score_mode if self.function_score_config else "sum"
+        boost_mode = self.function_score_config.boost_mode if self.function_score_config else "multiply"
+        
         function_score_query = {
             "function_score": {
                 "query": query,
                 "functions": functions,
-                "score_mode": FUNCTION_SCORE_CONFIG.get("score_mode", "sum"),
-                "boost_mode": FUNCTION_SCORE_CONFIG.get("boost_mode", "multiply")
+                "score_mode": score_mode,
+                "boost_mode": boost_mode
             }
         }
  
@@ -201,7 +207,10 @@ class ESQueryBuilder:
             List of function score functions
         """
         functions = []
-        config_functions = FUNCTION_SCORE_CONFIG.get("functions", [])
+        if not self.function_score_config:
+            return functions
+        
+        config_functions = self.function_score_config.functions or []
  
         for func_config in config_functions:
             func_type = func_config.get("type")
@@ -5,7 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting.
 """
  
 from typing import Dict, Any, List, Optional, Union
-import time
+import time, json
 import logging
  
 from utils.es_client import ESClient
@@ -14,16 +14,8 @@ from embeddings import CLIPImageEncoder
 from .boolean_parser import BooleanParser, QueryNode
 from .es_query_builder import ESQueryBuilder
 from .rerank_engine import RerankEngine
-from .query_config import (
-    DEFAULT_INDEX_NAME,
-    DEFAULT_MATCH_FIELDS,
-    TEXT_EMBEDDING_FIELD,
-    IMAGE_EMBEDDING_FIELD,
-    SOURCE_FIELDS,
-    ENABLE_TRANSLATION,
-    ENABLE_TEXT_EMBEDDING,
-    RANKING_EXPRESSION
-)
+from config import SearchConfig
+from config.utils import get_match_fields_for_index
 from context.request_context import RequestContext, RequestContextStage, create_request_context
 from api.models import FacetResult, FacetValue
 from api.result_formatter import ResultFormatter
@@ -87,37 +79,40 @@ class Searcher:
     def __init__(
         self,
         es_client: ESClient,
-        query_parser: Optional[QueryParser] = None,
-        index_name: str = DEFAULT_INDEX_NAME
+        config: SearchConfig,
+        query_parser: Optional[QueryParser] = None
     ):
         """
         Initialize searcher.
  
         Args:
             es_client: Elasticsearch client
+            config: SearchConfig instance
             query_parser: Query parser (created if not provided)
-            index_name: ES index name (default: search_products)
         """
         self.es_client = es_client
-        self.index_name = index_name
-        self.query_parser = query_parser or QueryParser()
+        self.config = config
+        self.index_name = config.es_index_name
+        self.query_parser = query_parser or QueryParser(config)
  
         # Initialize components
         self.boolean_parser = BooleanParser()
-        self.rerank_engine = RerankEngine(RANKING_EXPRESSION, enabled=False)
+        self.rerank_engine = RerankEngine(config.ranking.expression, enabled=False)
  
-        # Use constants from query_config
-        self.match_fields = DEFAULT_MATCH_FIELDS
-        self.text_embedding_field = TEXT_EMBEDDING_FIELD
-        self.image_embedding_field = IMAGE_EMBEDDING_FIELD
+        # Get match fields from config
+        self.match_fields = get_match_fields_for_index(config, "default")
+        self.text_embedding_field = config.query_config.text_embedding_field or "title_embedding"
+        self.image_embedding_field = config.query_config.image_embedding_field or "image_embedding"
+        self.source_fields = config.query_config.source_fields or []
  
         # Query builder - simplified single-layer architecture
         self.query_builder = ESQueryBuilder(
-            index_name=index_name,
+            index_name=self.index_name,
             match_fields=self.match_fields,
             text_embedding_field=self.text_embedding_field,
             image_embedding_field=self.image_embedding_field,
-            source_fields=SOURCE_FIELDS
+            source_fields=self.source_fields,
+            function_score_config=self.config.function_score
         )
  
     def search(
@@ -135,7 +130,7 @@ class Searcher:
         sort_order: Optional[str] = "desc",
         debug: bool = False,
         language: str = "zh",
-        sku_filter_dimension: Optional[str] = None,
+        sku_filter_dimension: Optional[List[str]] = None,
     ) -> SearchResult:
         """
         Execute search query (外部友好格式).
@@ -162,8 +157,8 @@ class Searcher:
             context = create_request_context()
  
         # Always use config defaults (these are backend configuration, not user parameters)
-        enable_translation = ENABLE_TRANSLATION
-        enable_embedding = ENABLE_TEXT_EMBEDDING
+        enable_translation = self.config.query_config.enable_translation
+        enable_embedding = self.config.query_config.enable_text_embedding
         enable_rerank = False  # Temporarily disabled
  
         # Start timing
@@ -305,14 +300,14 @@ class Searcher:
             context.store_intermediate_result('es_query', es_query)
             context.store_intermediate_result('es_body_for_search', body_for_es)
  
+            # Serialize ES query as a compact JSON string (no spaces or newlines)
+            es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(',', ':'))
+
             context.logger.info(
-                f"ES查询构建完成 | 大小: {len(str(es_query))}字符 | "
-                f"KNN: {'是' if enable_embedding and parsed_query.query_vector is not None else '否'} | "
-                f"分面: {'是' if facets else '否'}",
-                extra={'reqid': context.reqid, 'uid': context.uid}
-            )
-            context.logger.debug(
-                f"ES查询详情: {es_query}",
+                f"ES query built | size: {len(es_query_compact)} chars | "
+                f"KNN: {'yes' if enable_embedding and parsed_query.query_vector is not None else 'no'} | "
+                f"facets: {'yes' if facets else 'no'} | "
+                f"query: {es_query_compact}",
                 extra={'reqid': context.reqid, 'uid': context.uid}
             )
         except Exception as e:
@@ -508,9 +503,9 @@ class Searcher:
         }
  
         # Add _source filtering if source_fields are configured
-        if SOURCE_FIELDS:
+        if self.source_fields:
             es_query["_source"] = {
-                "includes": SOURCE_FIELDS
+                "includes": self.source_fields
             }
  
         if filters or range_filters:
@@ -137,8 +137,8 @@ def mock_es_client() -&gt; Mock:
 def test_searcher(sample_search_config, mock_es_client) -> Searcher:
     """测试用Searcher实例"""
     return Searcher(
-        config=sample_search_config,
-        es_client=mock_es_client
+        es_client=mock_es_client,
+        config=sample_search_config
     )
  
  
@@ -38,10 +38,11 @@ class StructuredFormatter(logging.Formatter):
         # Add request context if available
         reqid = getattr(record, 'reqid', None)
         uid = getattr(record, 'uid', None)
-        if reqid or uid:
+        if reqid is not None or uid is not None:
+            # Normalize missing values to "-1" for easier correlation
             log_entry['request_context'] = {
-                'reqid': reqid,
-                'uid': uid
+                'reqid': reqid if reqid is not None else "-1",
+                'uid': uid if uid is not None else "-1"
             }
  
         # Add extra data if available
@@ -98,13 +99,31 @@ class RequestContextFilter(logging.Filter):
             from context.request_context import get_current_request_context
             context = get_current_request_context()
             if context:
-                record.reqid = context.reqid
-                record.uid = context.uid
+                # Ensure every request-scoped log record carries reqid/uid.
+                # If they are missing in the context, fall back to "-1".
+                record.reqid = getattr(context, "reqid", None) or "-1"
+                record.uid = getattr(context, "uid", None) or "-1"
         except (ImportError, AttributeError):
             pass
         return True
  
  
+class ContextAwareConsoleFormatter(logging.Formatter):
+    """
+    Console formatter that injects reqid/uid into the log line.
+
+    For non-request logs (no context), reqid/uid will be "-1".
+    """
+
+    def format(self, record: logging.LogRecord) -> str:
+        # Provide safe defaults so format string never fails
+        if not hasattr(record, "reqid"):
+            record.reqid = "-1"
+        if not hasattr(record, "uid"):
+            record.uid = "-1"
+        return super().format(record)
+
+
 def setup_logging(
     log_level: str = "INFO",
     log_dir: str = "logs",
@@ -137,8 +156,8 @@ def setup_logging(
  
     # Create formatters
     structured_formatter = StructuredFormatter()
-    console_formatter = logging.Formatter(
-        '%(asctime)s | %(levelname)-8s | %(name)-15s | %(message)s'
+    console_formatter = ContextAwareConsoleFormatter(
+        '%(asctime)s | reqid:%(reqid)s | uid:%(uid)s | %(levelname)-8s | %(name)-15s | %(message)s'
     )
  
     # Add console handler