""" 索引工具函数。 提取公共逻辑,避免代码重复。 """ import logging from typing import Dict, Any, Optional from sqlalchemy import Engine, text from config import ConfigLoader from config.tenant_config_loader import get_tenant_config_loader from indexer.document_transformer import SPUDocumentTransformer logger = logging.getLogger(__name__) def load_category_mapping(db_engine: Engine) -> Dict[str, str]: """ 加载分类ID到名称的映射(全局,所有租户共享)。 Args: db_engine: SQLAlchemy database engine Returns: Dictionary mapping category_id to category_name """ query = text(""" SELECT DISTINCT category_id, category FROM shoplazza_product_spu WHERE deleted = 0 AND category_id IS NOT NULL """) mapping = {} try: with db_engine.connect() as conn: result = conn.execute(query) for row in result: category_id = str(int(row.category_id)) category_name = row.category if not category_name or not category_name.strip(): logger.warning(f"Category ID {category_id} has empty name, skipping") continue mapping[category_id] = category_name except Exception as e: logger.error(f"Failed to load category mapping: {e}", exc_info=True) return mapping def create_document_transformer( category_id_to_name: Dict[str, str], tenant_id: str, searchable_option_dimensions: Optional[list] = None, translator: Optional[Any] = None, translation_prompts: Optional[Dict[str, str]] = None ) -> SPUDocumentTransformer: """ 创建文档转换器(统一初始化逻辑)。 Args: category_id_to_name: 分类ID到名称的映射 tenant_id: 租户ID searchable_option_dimensions: 可搜索的option维度列表(如果为None则从配置加载) translator: 翻译器实例(如果为None则根据配置初始化) translation_prompts: 翻译提示词配置(如果为None则从配置加载) Returns: SPUDocumentTransformer实例 """ # 加载租户配置 tenant_config_loader = get_tenant_config_loader() tenant_config = tenant_config_loader.get_tenant_config(tenant_id) # 加载搜索配置(如果需要) if searchable_option_dimensions is None or translator is None or translation_prompts is None: try: config_loader = ConfigLoader() config = config_loader.load_config() if searchable_option_dimensions is None: searchable_option_dimensions = config.spu_config.searchable_option_dimensions if translator is None and config.query_config.enable_translation: from query.translator import Translator translator = Translator( api_key=config.query_config.translation_api_key, use_cache=True, glossary_id=config.query_config.translation_glossary_id, translation_context=config.query_config.translation_context ) if translation_prompts is None: translation_prompts = config.query_config.translation_prompts except Exception as e: logger.warning(f"Failed to load config, using defaults: {e}") if searchable_option_dimensions is None: searchable_option_dimensions = ['option1', 'option2', 'option3'] if translation_prompts is None: translation_prompts = {} return SPUDocumentTransformer( category_id_to_name=category_id_to_name, searchable_option_dimensions=searchable_option_dimensions, tenant_config=tenant_config, translator=translator, translation_prompts=translation_prompts )