indexing_utils.py 3.95 KB
"""
索引工具函数。

提取公共逻辑,避免代码重复。
"""

import logging
from typing import Dict, Any, Optional
from sqlalchemy import Engine, text
from config import ConfigLoader
from config.tenant_config_loader import get_tenant_config_loader
from indexer.document_transformer import SPUDocumentTransformer

logger = logging.getLogger(__name__)


def load_category_mapping(db_engine: Engine) -> Dict[str, str]:
    """
    加载分类ID到名称的映射(全局,所有租户共享)。
    
    Args:
        db_engine: SQLAlchemy database engine
        
    Returns:
        Dictionary mapping category_id to category_name
    """
    query = text("""
        SELECT DISTINCT
            category_id,
            category
        FROM shoplazza_product_spu
        WHERE deleted = 0 AND category_id IS NOT NULL
    """)
    
    mapping = {}
    try:
        with db_engine.connect() as conn:
            result = conn.execute(query)
            for row in result:
                category_id = str(int(row.category_id))
                category_name = row.category
                
                if not category_name or not category_name.strip():
                    logger.warning(f"Category ID {category_id} has empty name, skipping")
                    continue
                
                mapping[category_id] = category_name
    except Exception as e:
        logger.error(f"Failed to load category mapping: {e}", exc_info=True)
    
    return mapping


def create_document_transformer(
    category_id_to_name: Dict[str, str],
    tenant_id: str,
    searchable_option_dimensions: Optional[list] = None,
    translator: Optional[Any] = None,
    translation_prompts: Optional[Dict[str, str]] = None
) -> SPUDocumentTransformer:
    """
    创建文档转换器(统一初始化逻辑)。
    
    Args:
        category_id_to_name: 分类ID到名称的映射
        tenant_id: 租户ID
        searchable_option_dimensions: 可搜索的option维度列表(如果为None则从配置加载)
        translator: 翻译器实例(如果为None则根据配置初始化)
        translation_prompts: 翻译提示词配置(如果为None则从配置加载)
        
    Returns:
        SPUDocumentTransformer实例
    """
    # 加载租户配置
    tenant_config_loader = get_tenant_config_loader()
    tenant_config = tenant_config_loader.get_tenant_config(tenant_id)
    
    # 加载搜索配置(如果需要)
    if searchable_option_dimensions is None or translator is None or translation_prompts is None:
        try:
            config_loader = ConfigLoader()
            config = config_loader.load_config()
            
            if searchable_option_dimensions is None:
                searchable_option_dimensions = config.spu_config.searchable_option_dimensions
            
            if translator is None and config.query_config.enable_translation:
                from query.translator import Translator
                translator = Translator(
                    api_key=config.query_config.translation_api_key,
                    use_cache=True,
                    glossary_id=config.query_config.translation_glossary_id,
                    translation_context=config.query_config.translation_context
                )
            
            if translation_prompts is None:
                translation_prompts = config.query_config.translation_prompts
        except Exception as e:
            logger.warning(f"Failed to load config, using defaults: {e}")
            if searchable_option_dimensions is None:
                searchable_option_dimensions = ['option1', 'option2', 'option3']
            if translation_prompts is None:
                translation_prompts = {}
    
    return SPUDocumentTransformer(
        category_id_to_name=category_id_to_name,
        searchable_option_dimensions=searchable_option_dimensions,
        tenant_config=tenant_config,
        translator=translator,
        translation_prompts=translation_prompts
    )