From 9cb7528e728624c7f2b361f6a8696f655ab4cc99 Mon Sep 17 00:00:00 2001 From: tangwang Date: Thu, 13 Nov 2025 15:13:26 +0800 Subject: [PATCH] 店匠体系数据的搜索:mock data -> mysql, mysql->ES --- api/app.py | 6 +++--- config/__init__.py | 4 ++-- config/config_loader.py | 24 ++++++++++++------------ config/env_config.py | 11 ----------- indexer/data_transformer.py | 6 +++--- indexer/mapping_generator.py | 8 ++++---- query/query_parser.py | 6 +++--- search/multilang_query_builder.py | 4 ++-- search/searcher.py | 6 +++--- test_source_fields.py | 2 +- tests/conftest.py | 16 ++++++++-------- 11 files changed, 41 insertions(+), 52 deletions(-) diff --git a/api/app.py b/api/app.py index 16eab02..2f06c51 100644 --- a/api/app.py +++ b/api/app.py @@ -39,13 +39,13 @@ limiter = Limiter(key_func=get_remote_address) # Add parent directory to path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from config import ConfigLoader, CustomerConfig +from config import ConfigLoader, SearchConfig from utils import ESClient from search import Searcher from query import QueryParser # Global instances -_config: Optional[CustomerConfig] = None +_config: Optional[SearchConfig] = None _es_client: Optional[ESClient] = None _searcher: Optional[Searcher] = None _query_parser: Optional[QueryParser] = None @@ -111,7 +111,7 @@ def init_service(es_host: str = "http://localhost:9200"): print("Search service ready!") -def get_config() -> CustomerConfig: +def get_config() -> SearchConfig: """Get search engine configuration.""" if _config is None: raise RuntimeError("Service not initialized") diff --git a/config/__init__.py b/config/__init__.py index 249e38c..d8c5d12 100644 --- a/config/__init__.py +++ b/config/__init__.py @@ -14,7 +14,7 @@ from .field_types import ( from .config_loader import ( ConfigLoader, - CustomerConfig, + SearchConfig, IndexConfig, RankingConfig, QueryConfig, @@ -38,7 +38,7 @@ __all__ = [ # Config loader 'ConfigLoader', - 'CustomerConfig', + 'SearchConfig', 'IndexConfig', 'RankingConfig', 'QueryConfig', diff --git a/config/config_loader.py b/config/config_loader.py index 40c19b0..27ae6c2 100644 --- a/config/config_loader.py +++ b/config/config_loader.py @@ -1,8 +1,8 @@ """ -Configuration loader and validator for customer-specific search configurations. +Configuration loader and validator for search engine configurations. This module handles loading, parsing, and validating YAML configuration files -that define how each customer's data should be indexed and searched. +that define how search engine data should be indexed and searched. """ import yaml @@ -85,7 +85,7 @@ class RerankConfig: @dataclass -class CustomerConfig: +class SearchConfig: """Complete configuration for search engine (multi-tenant).""" # Field definitions fields: List[FieldConfig] @@ -161,12 +161,12 @@ class ConfigLoader: return rewrite_dict - def load_config(self) -> CustomerConfig: + def load_config(self) -> SearchConfig: """ Load unified configuration from YAML file. Returns: - CustomerConfig object + SearchConfig object Raises: ConfigurationError: If config file not found or invalid @@ -182,8 +182,8 @@ class ConfigLoader: return self._parse_config(config_data) - def _parse_config(self, config_data: Dict[str, Any]) -> CustomerConfig: - """Parse configuration dictionary into CustomerConfig object.""" + def _parse_config(self, config_data: Dict[str, Any]) -> SearchConfig: + """Parse configuration dictionary into SearchConfig object.""" # Parse fields fields = [] @@ -243,7 +243,7 @@ class ConfigLoader: inner_hits_size=spu_data.get("inner_hits_size", 3) ) - return CustomerConfig( + return SearchConfig( fields=fields, indexes=indexes, query_config=query_config, @@ -312,12 +312,12 @@ class ConfigLoader: language_field_mapping=language_field_mapping ) - def validate_config(self, config: CustomerConfig) -> List[str]: + def validate_config(self, config: SearchConfig) -> List[str]: """ - Validate customer configuration. + Validate search configuration. Args: - config: Customer configuration to validate + config: Search configuration to validate Returns: List of validation error messages (empty if valid) @@ -407,7 +407,7 @@ class ConfigLoader: return errors - def save_config(self, config: CustomerConfig, output_path: Optional[str] = None) -> None: + def save_config(self, config: SearchConfig, output_path: Optional[str] = None) -> None: """ Save configuration to YAML file. diff --git a/config/env_config.py b/config/env_config.py index 2e5bb45..6d6c82c 100644 --- a/config/env_config.py +++ b/config/env_config.py @@ -31,9 +31,6 @@ REDIS_CONFIG = { # DeepL API Key DEEPL_AUTH_KEY = os.getenv('DEEPL_AUTH_KEY') -# Customer Configuration -CUSTOMER_ID = os.getenv('CUSTOMER_ID', 'customer1') - # API Service Configuration API_HOST = os.getenv('API_HOST', '0.0.0.0') API_PORT = int(os.getenv('API_PORT', 6002)) @@ -74,11 +71,6 @@ def get_deepl_key() -> str: return DEEPL_AUTH_KEY -def get_customer_id() -> str: - """Get default customer ID.""" - return CUSTOMER_ID - - def get_db_config() -> Dict[str, Any]: """Get MySQL database configuration.""" return DB_CONFIG.copy() @@ -103,9 +95,6 @@ def print_config(): print("\nDeepL:") print(f" API Key: {'*' * 10 if DEEPL_AUTH_KEY else 'None (translation disabled)'}") - print("\nCustomer:") - print(f" Customer ID: {CUSTOMER_ID}") - print("\nAPI Service:") print(f" Host: {API_HOST}") print(f" Port: {API_PORT}") diff --git a/indexer/data_transformer.py b/indexer/data_transformer.py index bfb6832..af00113 100644 --- a/indexer/data_transformer.py +++ b/indexer/data_transformer.py @@ -8,7 +8,7 @@ import pandas as pd import numpy as np import datetime from typing import Dict, Any, List, Optional -from config import CustomerConfig, FieldConfig, FieldType +from config import SearchConfig, FieldConfig, FieldType from embeddings import BgeEncoder, CLIPImageEncoder from utils.cache import EmbeddingCache @@ -18,7 +18,7 @@ class DataTransformer: def __init__( self, - config: CustomerConfig, + config: SearchConfig, text_encoder: Optional[BgeEncoder] = None, image_encoder: Optional[CLIPImageEncoder] = None, use_cache: bool = True @@ -27,7 +27,7 @@ class DataTransformer: Initialize data transformer. Args: - config: Customer configuration + config: Search configuration text_encoder: Text embedding encoder (lazy loaded if not provided) image_encoder: Image embedding encoder (lazy loaded if not provided) use_cache: Whether to use embedding cache diff --git a/indexer/mapping_generator.py b/indexer/mapping_generator.py index 34f1f86..e5b387f 100644 --- a/indexer/mapping_generator.py +++ b/indexer/mapping_generator.py @@ -1,12 +1,12 @@ """ Elasticsearch mapping generator. -Generates Elasticsearch index mappings from customer configuration. +Generates Elasticsearch index mappings from search configuration. """ from typing import Dict, Any from config import ( - CustomerConfig, + SearchConfig, FieldConfig, get_es_mapping_for_field, get_default_analyzers, @@ -15,9 +15,9 @@ from config import ( class MappingGenerator: - """Generates Elasticsearch mapping from customer configuration.""" + """Generates Elasticsearch mapping from search configuration.""" - def __init__(self, config: CustomerConfig): + def __init__(self, config: SearchConfig): self.config = config def generate_mapping(self) -> Dict[str, Any]: diff --git a/query/query_parser.py b/query/query_parser.py index 242da9a..a31b4c5 100644 --- a/query/query_parser.py +++ b/query/query_parser.py @@ -7,7 +7,7 @@ Handles query rewriting, translation, and embedding generation. from typing import Dict, List, Optional, Any import numpy as np -from config import CustomerConfig, QueryConfig +from config import SearchConfig, QueryConfig from embeddings import BgeEncoder from .language_detector import LanguageDetector from .translator import Translator @@ -61,7 +61,7 @@ class QueryParser: def __init__( self, - config: CustomerConfig, + config: SearchConfig, text_encoder: Optional[BgeEncoder] = None, translator: Optional[Translator] = None ): @@ -69,7 +69,7 @@ class QueryParser: Initialize query parser. Args: - config: Customer configuration + config: Search configuration text_encoder: Text embedding encoder (lazy loaded if not provided) translator: Translator instance (lazy loaded if not provided) """ diff --git a/search/multilang_query_builder.py b/search/multilang_query_builder.py index a8d27dc..2338f19 100644 --- a/search/multilang_query_builder.py +++ b/search/multilang_query_builder.py @@ -9,7 +9,7 @@ maintaining a unified external interface. from typing import Dict, Any, List, Optional import numpy as np -from config import CustomerConfig, IndexConfig +from config import SearchConfig, IndexConfig from query import ParsedQuery from .es_query_builder import ESQueryBuilder @@ -26,7 +26,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): def __init__( self, - config: CustomerConfig, + config: SearchConfig, index_name: str, text_embedding_field: Optional[str] = None, image_embedding_field: Optional[str] = None, diff --git a/search/searcher.py b/search/searcher.py index 969f61f..99bdf9c 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -7,7 +7,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. from typing import Dict, Any, List, Optional, Union import time -from config import CustomerConfig +from config import SearchConfig from utils.es_client import ESClient from query import QueryParser, ParsedQuery from indexer import MappingGenerator @@ -75,7 +75,7 @@ class Searcher: def __init__( self, - config: CustomerConfig, + config: SearchConfig, es_client: ESClient, query_parser: Optional[QueryParser] = None ): @@ -83,7 +83,7 @@ class Searcher: Initialize searcher. Args: - config: Customer configuration + config: Search configuration es_client: Elasticsearch client query_parser: Query parser (created if not provided) """ diff --git a/test_source_fields.py b/test_source_fields.py index b129bd6..e4897ce 100644 --- a/test_source_fields.py +++ b/test_source_fields.py @@ -7,7 +7,7 @@ import sys import os sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from config import ConfigLoader, CustomerConfig +from config import ConfigLoader, SearchConfig def test_source_fields_config(): """测试source_fields配置是否正确加载""" diff --git a/tests/conftest.py b/tests/conftest.py index 28646c8..7e1421d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,7 +15,7 @@ from unittest.mock import Mock, MagicMock project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, project_root) -from config import CustomerConfig, QueryConfig, IndexConfig, FieldConfig, SPUConfig, RankingConfig, FunctionScoreConfig, RerankConfig +from config import SearchConfig, QueryConfig, IndexConfig, FieldConfig, SPUConfig, RankingConfig, FunctionScoreConfig, RerankConfig from config.field_types import FieldType, AnalyzerType from utils.es_client import ESClient from search import Searcher @@ -51,8 +51,8 @@ def sample_index_config() -> IndexConfig: @pytest.fixture -def sample_customer_config(sample_index_config) -> CustomerConfig: - """样例客户配置""" +def sample_search_config(sample_index_config) -> SearchConfig: + """样例搜索配置""" query_config = QueryConfig( enable_query_rewrite=True, enable_translation=True, @@ -74,7 +74,7 @@ def sample_customer_config(sample_index_config) -> CustomerConfig: function_score_config = FunctionScoreConfig() rerank_config = RerankConfig() - return CustomerConfig( + return SearchConfig( es_index_name="test_products", fields=[ FieldConfig(name="tenant_id", field_type=FieldType.KEYWORD, required=True), @@ -134,18 +134,18 @@ def mock_es_client() -> Mock: @pytest.fixture -def test_searcher(sample_customer_config, mock_es_client) -> Searcher: +def test_searcher(sample_search_config, mock_es_client) -> Searcher: """测试用Searcher实例""" return Searcher( - config=sample_customer_config, + config=sample_search_config, es_client=mock_es_client ) @pytest.fixture -def test_query_parser(sample_customer_config) -> QueryParser: +def test_query_parser(sample_search_config) -> QueryParser: """测试用QueryParser实例""" - return QueryParser(sample_customer_config) + return QueryParser(sample_search_config) @pytest.fixture -- libgit2 0.21.2