diff --git a/config/config.yaml b/config/config.yaml index 465bf65..65936a5 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,11 +1,3 @@ -# Unified Configuration for Multi-Tenant Search Engine -# 统一配置文件,所有租户共用一套配置 -# 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为 -# -# 约定:下列键为必填;进程环境变量可覆盖 infrastructure / runtime 中同名语义项 -#(如 ES_HOST、API_PORT 等),未设置环境变量时使用本文件中的值。 - -# Process / bind addresses (环境变量 APP_ENV、RUNTIME_ENV、ES_INDEX_NAMESPACE 可覆盖前两者的语义) runtime: environment: prod index_namespace: '' @@ -21,8 +13,6 @@ runtime: translator_port: 6006 reranker_host: 0.0.0.0 reranker_port: 6007 - -# 基础设施连接(敏感项优先读环境变量:ES_*、REDIS_*、DB_*、DASHSCOPE_API_KEY、DEEPL_AUTH_KEY) infrastructure: elasticsearch: host: http://localhost:9200 @@ -49,23 +39,12 @@ infrastructure: secrets: dashscope_api_key: null deepl_auth_key: null - -# Elasticsearch Index es_index_name: search_products - -# 检索域 / 索引列表(可为空列表;每项字段均需显式给出) indexes: [] - -# Config assets assets: query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict - -# Product content understanding (LLM enrich-content) configuration product_enrich: max_workers: 40 - -# 离线 / Web 相关性评估(scripts/evaluation、eval-web) -# CLI 未显式传参时使用此处默认值;search_base_url 未配置时自动为 http://127.0.0.1:{runtime.api_port} search_evaluation: artifact_root: artifacts/search_evaluation queries_file: scripts/evaluation/queries/queries.txt @@ -98,23 +77,18 @@ search_evaluation: rebuild_irrelevant_stop_ratio: 0.799 rebuild_irrel_low_combined_stop_ratio: 0.959 rebuild_irrelevant_stop_streak: 3 - -# ES Index Settings (基础设置) es_settings: number_of_shards: 1 number_of_replicas: 0 refresh_interval: 30s -# 字段权重配置(用于搜索时的字段boost) -# 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang}。 -# 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。 +# 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} field_boosts: title: 3.0 # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重 qanchors: 1.0 enriched_tags: 1.0 enriched_attributes.value: 1.5 - # enriched_taxonomy_attributes.value: 0.3 category_name_text: 2.0 category_path: 2.0 keywords: 2.0 @@ -126,38 +100,25 @@ field_boosts: description: 1.0 vendor: 1.0 -# Query Configuration(查询配置) query_config: - # 支持的语言 supported_languages: - zh - en default_language: en - - # 功能开关(翻译开关由tenant_config控制) enable_text_embedding: true enable_query_rewrite: true - # 查询翻译模型(须与 services.translation.capabilities 中某项一致) - # 源语种在租户 index_languages 内:主召回可打在源语种字段,用下面三项。 - zh_to_en_model: nllb-200-distilled-600m # "opus-mt-zh-en" - en_to_zh_model: nllb-200-distilled-600m # "opus-mt-en-zh" - default_translation_model: nllb-200-distilled-600m - # zh_to_en_model: deepl - # en_to_zh_model: deepl - # default_translation_model: deepl - # 源语种不在 index_languages:翻译对可检索文本更关键,可单独指定(缺省则与上一组相同) - zh_to_en_model__source_not_in_index: nllb-200-distilled-600m - en_to_zh_model__source_not_in_index: nllb-200-distilled-600m - default_translation_model__source_not_in_index: nllb-200-distilled-600m - # zh_to_en_model__source_not_in_index: deepl - # en_to_zh_model__source_not_in_index: deepl - # default_translation_model__source_not_in_index: deepl + zh_to_en_model: deepl # nllb-200-distilled-600m + en_to_zh_model: deepl + default_translation_model: deepl + # 源语种不在 index_languages时翻译质量比较重要,因此单独配置 + zh_to_en_model__source_not_in_index: deepl + en_to_zh_model__source_not_in_index: deepl + default_translation_model__source_not_in_index: deepl - # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。 - # 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。 - translation_embedding_wait_budget_ms_source_in_index: 300 # 80 - translation_embedding_wait_budget_ms_source_not_in_index: 400 # 200 + # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒) + translation_embedding_wait_budget_ms_source_in_index: 300 + translation_embedding_wait_budget_ms_source_not_in_index: 400 style_intent: enabled: true selected_sku_boost: 1.2 @@ -184,11 +145,8 @@ query_config: product_title_exclusion: enabled: true dictionary_path: config/dictionaries/product_title_exclusion.tsv - - # 动态多语言检索字段配置 - # multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式; - # shared_fields 为无语言后缀字段。 search_fields: + # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} multilingual_fields: - title - keywords @@ -205,13 +163,14 @@ query_config: # - description # - vendor # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values + shared_fields: null core_multilingual_fields: - title - qanchors - category_name_text - # 统一文本召回策略(主查询 + 翻译查询) + # 文本召回(主查询 + 翻译查询) text_query_strategy: base_minimum_should_match: 60% translation_minimum_should_match: 60% @@ -226,14 +185,10 @@ query_config: title: 5.0 qanchors: 4.0 phrase_match_boost: 3.0 - - # Embedding字段名称 text_embedding_field: title_embedding image_embedding_field: image_embedding.vector - # 返回字段配置(_source includes) - # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段 - # 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致 + # null表示返回所有字段,[]表示不返回任何字段 source_fields: - spu_id - handle @@ -255,6 +210,7 @@ query_config: # - enriched_tags # - enriched_attributes # - # enriched_taxonomy_attributes.value + - min_price - compare_at_price - image_url @@ -274,22 +230,17 @@ query_config: # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates) knn_text_boost: 4 knn_image_boost: 4 - - # knn_text_num_candidates = k * 3.4 knn_text_k: 160 - knn_text_num_candidates: 560 + knn_text_num_candidates: 560 # k * 3.4 knn_text_k_long: 400 knn_text_num_candidates_long: 1200 knn_image_k: 400 knn_image_num_candidates: 1200 -# Function Score配置(ES层打分规则) function_score: score_mode: sum boost_mode: multiply functions: [] - -# 粗排配置(仅融合 ES 文本/向量信号,不调用模型) coarse_rank: enabled: true input_window: 480 @@ -305,24 +256,20 @@ coarse_rank: knn_text_weight: 1.0 knn_image_weight: 2.0 knn_tie_breaker: 0.3 - knn_bias: 0.6 - knn_exponent: 0.4 - -# 精排配置(轻量 reranker) -# enabled=false 时仍进入 fine 阶段,但保序透传,不调用 fine 模型服务 + knn_bias: 0.0 + knn_exponent: 5.6 + knn_text_exponent: 0.0 + knn_image_exponent: 0.0 fine_rank: - enabled: false + enabled: false # false 时保序透传 input_window: 160 output_window: 80 timeout_sec: 10.0 rerank_query_template: '{query}' rerank_doc_template: '{title}' service_profile: fine - -# 重排配置(provider/URL 在 services.rerank) -# enabled=false 时仍进入 rerank 阶段,但保序透传,不调用最终 rerank 服务 rerank: - enabled: true + enabled: false # false 时保序透传 rerank_window: 160 exact_knn_rescore_enabled: true exact_knn_rescore_window: 160 @@ -332,7 +279,6 @@ rerank: rerank_query_template: '{query}' rerank_doc_template: '{title}' service_profile: default - # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn) # 其中 knn_score 先做一层 dis_max: # max(knn_text_weight * text_knn, knn_image_weight * image_knn) @@ -345,30 +291,28 @@ rerank: fine_bias: 0.1 fine_exponent: 1.0 text_bias: 0.1 - text_exponent: 0.25 # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) + text_exponent: 0.25 text_translation_weight: 0.8 knn_text_weight: 1.0 knn_image_weight: 2.0 knn_tie_breaker: 0.3 - knn_bias: 0.6 - knn_exponent: 0.4 + knn_bias: 0.0 + knn_exponent: 5.6 -# 可扩展服务/provider 注册表(单一配置源) services: translation: service_url: http://127.0.0.1:6006 - # default_model: nllb-200-distilled-600m default_model: nllb-200-distilled-600m default_scene: general timeout_sec: 10.0 cache: ttl_seconds: 62208000 sliding_expiration: true - # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups). - enable_model_quality_tier_cache: true + # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups) # Higher tier = better quality. Multiple models may share one tier (同级). # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers). + enable_model_quality_tier_cache: true model_quality_tiers: deepl: 30 qwen-mt: 30 @@ -462,13 +406,12 @@ services: num_beams: 1 use_cache: true embedding: - provider: http # http + provider: http providers: http: text_base_url: http://127.0.0.1:6005 image_base_url: http://127.0.0.1:6008 - # 服务内文本后端(embedding 进程启动时读取) - backend: tei # tei | local_st + backend: tei backends: tei: base_url: http://127.0.0.1:8080 @@ -508,8 +451,8 @@ services: request: max_docs: 1000 normalize: true - default_instance: default # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。 + default_instance: default instances: default: host: 0.0.0.0 @@ -551,6 +494,7 @@ services: enforce_eager: false infer_batch_size: 100 sort_by_doc_length: true + # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct) instruction_format: standard # compact standard # instruction: "Given a query, score the product for relevance" @@ -564,6 +508,7 @@ services: # instruction: "Rank products by query with category & style match prioritized" # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query" instruction: rank products by given query + # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。 qwen3_vllm_score: @@ -591,15 +536,10 @@ services: qwen3_transformers: model_name: Qwen/Qwen3-Reranker-0.6B instruction: rank products by given query - # instruction: "Score the product’s relevance to the given query" max_length: 8192 batch_size: 64 use_fp16: true - # sdpa:默认无需 flash-attn;若已安装 flash_attn 可改为 flash_attention_2 attn_implementation: sdpa - # Packed Transformers backend: shared query prefix + custom position_ids/attention_mask. - # For 1 query + many short docs (for example 400 product titles), this usually reduces - # repeated prefix work and padding waste compared with pairwise batching. qwen3_transformers_packed: model_name: Qwen/Qwen3-Reranker-0.6B instruction: Rank products by query with category & style match prioritized @@ -608,8 +548,6 @@ services: max_docs_per_pack: 0 use_fp16: true sort_by_doc_length: true - # Packed mode relies on a custom 4D attention mask. "eager" is the safest default. - # If your torch/transformers stack validates it, you can benchmark "sdpa". attn_implementation: eager qwen3_gguf: repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF @@ -617,7 +555,6 @@ services: cache_dir: ./model_cache local_dir: ./models/reranker/qwen3-reranker-4b-gguf instruction: Rank products by query with category & style match prioritized - # T4 16GB / 性能优先配置:全量层 offload,实测比保守配置明显更快 n_ctx: 512 n_batch: 512 n_ubatch: 512 @@ -640,8 +577,6 @@ services: cache_dir: ./model_cache local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf instruction: Rank products by query with category & style match prioritized - # 0.6B GGUF / online rerank baseline: - # 实测 400 titles 单请求约 265s,因此它更适合作为低显存功能后备,不适合在线低延迟主路由。 n_ctx: 256 n_batch: 256 n_ubatch: 256 @@ -661,20 +596,15 @@ services: verbose: false dashscope_rerank: model_name: qwen3-rerank - # 按地域选择 endpoint: - # 中国: https://dashscope.aliyuncs.com/compatible-api/v1/reranks - # 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks - # 美国: https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks api_key_env: RERANK_DASHSCOPE_API_KEY_CN timeout_sec: 10.0 - top_n_cap: 0 # 0 表示 top_n=当前请求文档数;>0 则限制 top_n 上限 - batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) + top_n_cap: 0 # 0 表示 top_n=当前请求文档数 + batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) instruct: Given a shopping query, rank product titles by relevance max_retries: 2 retry_backoff_sec: 0.2 -# SPU配置(已启用,使用嵌套skus) spu_config: enabled: true spu_field: spu_id @@ -686,7 +616,6 @@ spu_config: - option2 - option3 -# 租户配置(Tenant Configuration) # 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) # 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集 tenant_config: diff --git a/config/loader.py b/config/loader.py index 919c725..25e012c 100644 --- a/config/loader.py +++ b/config/loader.py @@ -587,6 +587,14 @@ class AppConfigLoader: knn_tie_breaker=float(coarse_fusion_raw.get("knn_tie_breaker", 0.0)), knn_bias=float(coarse_fusion_raw.get("knn_bias", 0.6)), knn_exponent=float(coarse_fusion_raw.get("knn_exponent", 0.2)), + knn_text_bias=float( + coarse_fusion_raw.get("knn_text_bias", coarse_fusion_raw.get("knn_bias", 0.6)) + ), + knn_text_exponent=float(coarse_fusion_raw.get("knn_text_exponent", 0.0)), + knn_image_bias=float( + coarse_fusion_raw.get("knn_image_bias", coarse_fusion_raw.get("knn_bias", 0.6)) + ), + knn_image_exponent=float(coarse_fusion_raw.get("knn_image_exponent", 0.0)), text_translation_weight=float( coarse_fusion_raw.get("text_translation_weight", 0.8) ), @@ -636,6 +644,14 @@ class AppConfigLoader: knn_tie_breaker=float(fusion_raw.get("knn_tie_breaker", 0.0)), knn_bias=float(fusion_raw.get("knn_bias", 0.6)), knn_exponent=float(fusion_raw.get("knn_exponent", 0.2)), + knn_text_bias=float( + fusion_raw.get("knn_text_bias", fusion_raw.get("knn_bias", 0.6)) + ), + knn_text_exponent=float(fusion_raw.get("knn_text_exponent", 0.0)), + knn_image_bias=float( + fusion_raw.get("knn_image_bias", fusion_raw.get("knn_bias", 0.6)) + ), + knn_image_exponent=float(fusion_raw.get("knn_image_exponent", 0.0)), fine_bias=float(fusion_raw.get("fine_bias", 0.00001)), fine_exponent=float(fusion_raw.get("fine_exponent", 1.0)), text_translation_weight=float( diff --git a/config/schema.py b/config/schema.py index 72d81c3..1864384 100644 --- a/config/schema.py +++ b/config/schema.py @@ -119,6 +119,18 @@ class RerankFusionConfig: knn_tie_breaker: float = 0.0 knn_bias: float = 0.6 knn_exponent: float = 0.2 + #: Optional additive floor for the weighted text KNN term. + #: Falls back to knn_bias when omitted in config loading. + knn_text_bias: float = 0.6 + #: Optional extra multiplicative term on weighted text KNN. + #: Uses knn_text_bias as the additive floor. + knn_text_exponent: float = 0.0 + #: Optional additive floor for the weighted image KNN term. + #: Falls back to knn_bias when omitted in config loading. + knn_image_bias: float = 0.6 + #: Optional extra multiplicative term on weighted image KNN. + #: Uses knn_image_bias as the additive floor. + knn_image_exponent: float = 0.0 fine_bias: float = 0.00001 fine_exponent: float = 1.0 #: 翻译子句 named query 分数相对原文 base_query 的权重(加权后再与原文做 dismax 融合) @@ -143,6 +155,18 @@ class CoarseRankFusionConfig: knn_tie_breaker: float = 0.0 knn_bias: float = 0.6 knn_exponent: float = 0.2 + #: Optional additive floor for the weighted text KNN term. + #: Falls back to knn_bias when omitted in config loading. + knn_text_bias: float = 0.6 + #: Optional extra multiplicative term on weighted text KNN. + #: Uses knn_text_bias as the additive floor. + knn_text_exponent: float = 0.0 + #: Optional additive floor for the weighted image KNN term. + #: Falls back to knn_bias when omitted in config loading. + knn_image_bias: float = 0.6 + #: Optional extra multiplicative term on weighted image KNN. + #: Uses knn_image_bias as the additive floor. + knn_image_exponent: float = 0.0 #: 翻译子句 named query 分数相对原文 base_query 的权重(加权后再与原文做 dismax 融合) text_translation_weight: float = 0.8 diff --git a/docs/caches-inventory.md b/docs/caches-inventory.md new file mode 100644 index 0000000..3838279 --- /dev/null +++ b/docs/caches-inventory.md @@ -0,0 +1,133 @@ +# 本项目缓存一览 + +本文档梳理仓库内**与业务相关的各类缓存**:说明用途、键与过期策略,并汇总运维脚本。按「分布式(Redis)→ 进程内 → 磁盘/模型 → 第三方」组织。 + +--- + +## 一、Redis 集中式缓存(生产主路径) + +所有下列缓存默认连接 **`infrastructure.redis`**(`config/config.yaml` 与 `REDIS_*` 环境变量),**数据库编号一般为 `db=0`**(脚本可通过参数覆盖)。`snapshot_db` 仅在配置中存在,供快照/运维场景选用,应用代码未按该字段切换业务缓存的 DB。 + +### 1. 文本 / 图像向量缓存(Embedding) + +- **作用**:缓存 BGE/TEI 文本向量与 CN-CLIP 图像向量、CLIP 文本塔向量,避免重复推理。 +- **实现**:`embeddings/redis_embedding_cache.py` 的 `RedisEmbeddingCache`;键构造见 `embeddings/cache_keys.py`。 +- **Key 形态**(最终 Redis 键 = `前缀` + `可选 namespace` + `逻辑键`): + - **前缀**:`infrastructure.redis.embedding_cache_prefix`(默认 `embedding`,可用 `REDIS_EMBEDDING_CACHE_PREFIX` 覆盖)。 + - **命名空间**:`embeddings/server.py` 与客户端中分为: + - 文本:`namespace=""` → `{prefix}:{embed:norm0|1:...}` + - 图像:`namespace="image"` → `{prefix}:image:{embed:模型名:txt:norm0|1:...}` + - CLIP 文本:`namespace="clip_text"` → `{prefix}:clip_text:{embed:模型名:img:norm0|1:...}` + - 逻辑键段含 `embed:`、`norm0/1`、模型名(多模态)、过长文本/URL 时用 `h:sha256:...` 摘要(见 `cache_keys.py` 注释)。 +- **值格式**:BF16 压缩后的字节(`embeddings/bf16.py`),非 JSON。 +- **TTL**:`infrastructure.redis.cache_expire_days`(默认 **720 天**,`REDIS_CACHE_EXPIRE_DAYS`)。写入用 `SETEX`;**命中时滑动续期**(`EXPIRE` 刷新为同一时长)。 +- **Redis 客户端**:`decode_responses=False`(二进制)。 + +**主要代码**:`embeddings/server.py`、`embeddings/text_encoder.py`、`embeddings/image_encoder.py`。 + +--- + +### 2. 翻译结果缓存(Translation) + +- **作用**:按「翻译模型 + 目标语言 + 原文」缓存译文;支持**模型质量分层探测**(高 tier 模型写入的缓存可被同 tier 或更高 tier 的请求命中,见 `translation/settings.py` 中 `translation_cache_probe_models`)。 +- **Key 形态**:`trans:{model}:{target_lang}:{text前4字符}{sha256全文}`(`translation/cache.py` 的 `build_key`)。 +- **值格式**:UTF-8 译文字符串。 +- **TTL**:`services.translation.cache.ttl_seconds`(默认 **62208000 秒 = 720 天**)。若 `sliding_expiration: true`,命中时刷新 TTL。 +- **能力级开关**:各 `capabilities.*.use_cache` 为 `false` 时该后端不落 Redis。 +- **Redis 客户端**:`decode_responses=True`。 + +**主要代码**:`translation/cache.py`、`translation/service.py`;翻译 HTTP 服务:`api/translator_app.py`(`get_translation_service()` 使用 `lru_cache` 单例,见下文进程内缓存)。 + +--- + +### 3. 商品内容理解 / Anchors 与语义分析缓存(Indexer) + +- **作用**:缓存 LLM 对商品标题等拼出的 **prompt 输入** 所做的分析结果(anchors、语义属性等),避免重复调用大模型。键与 `analysis_kind`、`prompt` 契约版本、`target_lang` 及输入摘要相关。 +- **Key 形态**:`{anchor_cache_prefix}:{analysis_kind}:{prompt_contract_hash[:12]}:{target_lang}:{prompt_input[:4]}{md5}`(`indexer/product_enrich.py` 中 `_make_analysis_cache_key`)。 +- **前缀**:`infrastructure.redis.anchor_cache_prefix`(默认 `product_anchors`,`REDIS_ANCHOR_CACHE_PREFIX`)。 +- **值格式**:JSON 字符串(规范化后的分析结果)。 +- **TTL**:`anchor_cache_expire_days`(默认 **30 天**),以秒写入 `SETEX`(**非滑动**,与向量/翻译不同)。 +- **读逻辑**:无 TTL 刷新;仅校验内容是否「有意义」再返回。 + +**主要代码**:`indexer/product_enrich.py`;与 HTTP 侧对齐说明见 `api/routes/indexer.py` 注释。 + +--- + +## 二、进程内缓存(非共享、随进程重启失效) + +| 名称 | 用途 | 范围/生命周期 | +|------|------|----------------| +| **`get_app_config()`** | 解析并缓存全局 `AppConfig` | `config/loader.py`:`@lru_cache(maxsize=1)`;`reload_app_config()` 可 `cache_clear()` | +| **`TranslationService` 单例** | 翻译服务进程内复用后端与 Redis 客户端 | `api/translator_app.py`:`get_translation_service()` | +| **`_nllb_tokenizer_code_by_normalized_key`** | NLLB tokenizer 语言码映射 | `translation/languages.py`:`@lru_cache(maxsize=1)` | +| **`QueryTextAnalysisCache`** | 单次查询解析内复用分词、tokenizer 结果 | `query/tokenization.py`,随 `QueryParser` 一次 parse | +| **`_SelectionContext`(SKU 意图)** | 归一化文本、分词、匹配布尔等小字典 | `search/sku_intent_selector.py`,单次选择流程 | +| **`incremental_service` transformer 缓存** | 按 `tenant_id` 缓存文档转换器 | `indexer/incremental_service.py`,**无界**、多租户进程长期存活时需注意内存 | +| **NLLB batch 内 `token_count_cache`** | 同一 batch 内避免重复计 token | `translation/backends/local_ctranslate2.py` | +| **CLIP 分词器 `@lru_cache`**(第三方) | 简单 tokenizer 缓存 | `third-party/clip-as-service/.../simple_tokenizer.py` | + +**说明**:`utils/cache.py` 中的 **`DictCache`**(文件 JSON:默认 `.cache/dict_cache.json`)已导出,但仓库内**无直接 `DictCache(` 调用**,视为可复用工具/预留,非当前主路径。 + +--- + +## 三、磁盘与模型相关「缓存」(非 Redis) + +| 名称 | 用途 | 配置/位置 | +|------|------|-----------| +| **Hugging Face / 本地模型目录** | 重排器、翻译本地模型等权重下载与缓存 | `services.rerank.backends.*.cache_dir` 等,常见默认 **`./model_cache`**(`config/config.yaml`) | +| **vLLM `enable_prefix_caching`** | 重排服务内 **Prefix KV 缓存**(加速同前缀批推理) | `services.rerank.backends.qwen3_vllm*`、`reranker/backends/qwen3_vllm*.py` | +| **运行时目录** | 重排服务状态/引擎文件 | `services.rerank.instances.*.runtime_dir`(如 `./.runtime/reranker/...`) | + +翻译能力里的 **`use_cache: true`**(如 NLLB、Marian)在多数后端指 **推理时的 KV cache(Transformer)**,与 Redis 译文缓存是不同层次;Redis 译文缓存仍由 `TranslationCache` 控制。 + +--- + +## 四、Elasticsearch 内部缓存 + +索引设置中的 `refresh_interval` 等影响近实时可见性,但**不属于应用层键值缓存**。若需调优 ES 查询缓存、节点堆等,见运维文档与集群配置,此处不展开。 + +--- + +## 五、运维与巡检脚本(Redis) + +| 脚本 | 作用 | +|------|------| +| `scripts/redis/redis_cache_health_check.py` | 按 **embedding / translation / anchors** 三类前缀巡检:key 数量估算、TTL 采样、`IDLETIME` 等 | +| `scripts/redis/redis_cache_prefix_stats.py` | 按前缀统计 key 数量与 **MEMORY USAGE**(可多 DB) | +| `scripts/redis/redis_memory_heavy_keys.py` | 扫描占用内存最大的 key,辅助排查「统计与总内存不一致」 | +| `scripts/redis/monitor_eviction.py` | 实时监控 **eviction** 相关事件,用于容量与驱逐策略排查 | + +使用前需加载项目配置(如 `source activate.sh`)以保证 `REDIS_CONFIG` 与生产一致。脚本注释中给出了 **`redis-cli` 手工统计**示例(按前缀 `wc -l`、`MEMORY STATS` 等)。 + +--- + +## 六、总表(Redis 与各层缓存) + +| 缓存名称 | 业务模块 | 存储 | Key 前缀 / 命名模式 | 过期时间 | 过期策略 | 值摘要 | 配置键 / 环境变量 | +|----------|----------|------|---------------------|----------|----------|--------|-------------------| +| 文本向量 | 检索 / 索引 / Embedding 服务 | Redis db≈0 | `{embedding_cache_prefix}:*`(逻辑键以 `embed:norm…` 开头) | `cache_expire_days`(默认 720 天) | 写入 TTL + 命中滑动续期 | BF16 字节向量 | `infrastructure.redis.*`;`REDIS_EMBEDDING_CACHE_PREFIX`、`REDIS_CACHE_EXPIRE_DAYS` | +| 图像向量(CLIP 图) | 图搜 / 多模态 | 同上 | `{prefix}:image:*` | 同上 | 同上 | BF16 字节 | 同上 | +| CLIP 文本塔向量 | 图搜文本侧 | 同上 | `{prefix}:clip_text:*` | 同上 | 同上 | BF16 字节 | 同上 | +| 翻译译文 | 查询翻译、翻译服务 | 同上 | `trans:{model}:{lang}:*` | `services.translation.cache.ttl_seconds`(默认 720 天) | 可配置滑动(`sliding_expiration`) | UTF-8 字符串 | `services.translation.cache.*`;各能力 `use_cache` | +| 商品分析 / Anchors | 索引富化、LLM 内容理解 | 同上 | `{anchor_cache_prefix}:{kind}:{hash}:{lang}:*` | `anchor_cache_expire_days`(默认 30 天) | 固定 TTL,不滑动 | JSON 字符串 | `anchor_cache_prefix`、`anchor_cache_expire_days`;`REDIS_ANCHOR_*` | +| 应用配置 | 全栈 | 进程内存 | N/A(单例) | 进程生命周期 | `reload_app_config` 清除 | `AppConfig` 对象 | `config/loader.py` | +| 翻译服务实例 | 翻译 API | 进程内存 | N/A | 进程生命周期 | 单例 | `TranslationService` | `api/translator_app.py` | +| 查询分词缓存 | 查询解析 | 单次请求内 | N/A | 单次 parse | — | 分词与中间结果 | `query/tokenization.py` | +| SKU 意图辅助字典 | 搜索排序辅助 | 单次请求内 | N/A | 单次选择 | — | 小 dict | `search/sku_intent_selector.py` | +| 增量索引 Transformer | 索引管道 | 进程内存 | `tenant_id` 字符串键 | 长期(无界) | 无自动淘汰 | Transformer 元组 | `indexer/incremental_service.py` | +| 重排 / 翻译模型权重 | 推理服务 | 本地磁盘 | 目录路径 | 无自动删除(人工清理) | — | 模型文件 | `cache_dir: ./model_cache` 等 | +| vLLM Prefix 缓存 | 重排(Qwen3 等) | GPU/引擎内 | 引擎内部 | 引擎管理 | — | KV Cache | `enable_prefix_caching` | +| 文件 Dict 缓存(可选) | 通用 | `.cache/dict_cache.json` | 分类 + 自定义 key | 持久直至删除 | — | JSON 可序列化值 | `utils/cache.py`(当前无调用方) | + +--- + +## 七、维护建议(简要) + +1. **容量**:三类 Redis 缓存(embedding / trans / anchors)可共用同一实例;大租户或图搜多时 **embedding** 与 **trans** 往往占主要内存,可用 `redis_cache_prefix_stats.py` 分前缀观察。 +2. **键迁移**:变更 `embedding_cache_prefix`、CLIP `model_name` 或 prompt 契约会自然**隔离新键空间**;旧键依赖 TTL 或人工批量删除。 +3. **一致性**:向量缓存对异常向量会 **delete key**(`RedisEmbeddingCache.get`);anchors 依赖 `cache_version` 与契约 hash 防止错误复用。 +4. **监控**:除脚本外,Embedding HTTP 服务健康检查会报告各 lane 的 **`cache_enabled`**(`embeddings/server.py`)。 + +--- + +*文档随代码扫描生成;若新增 Redis 用途,请同步更新本文件与 `scripts/redis/redis_cache_health_check.py` 中的 `_load_known_cache_types()`。* diff --git a/search/es_query_builder.py b/search/es_query_builder.py index 4c20e93..56fdec5 100644 --- a/search/es_query_builder.py +++ b/search/es_query_builder.py @@ -8,6 +8,7 @@ Simplified architecture: - function_score wrapper for boosting fields """ +from dataclasses import dataclass from typing import Dict, Any, List, Optional, Tuple import numpy as np @@ -114,6 +115,171 @@ class ESQueryBuilder: self.phrase_match_tie_breaker = float(phrase_match_tie_breaker) self.phrase_match_boost = float(phrase_match_boost) + @dataclass(frozen=True) + class KNNClausePlan: + field: str + boost: float + k: Optional[int] = None + num_candidates: Optional[int] = None + nested_path: Optional[str] = None + + @staticmethod + def _vector_to_list(vector: Any) -> List[float]: + if vector is None: + return [] + if hasattr(vector, "tolist"): + values = vector.tolist() + else: + values = list(vector) + return [float(v) for v in values] + + @staticmethod + def _query_token_count(parsed_query: Optional[Any]) -> int: + if parsed_query is None: + return 0 + query_tokens = getattr(parsed_query, "query_tokens", None) or [] + return len(query_tokens) + + def get_text_knn_plan(self, parsed_query: Optional[Any] = None) -> Optional[KNNClausePlan]: + if not self.text_embedding_field: + return None + boost = self.knn_text_boost + final_knn_k = self.knn_text_k + final_knn_num_candidates = self.knn_text_num_candidates + if self._query_token_count(parsed_query) >= 5: + final_knn_k = self.knn_text_k_long + final_knn_num_candidates = self.knn_text_num_candidates_long + boost = self.knn_text_boost * 1.4 + return self.KNNClausePlan( + field=str(self.text_embedding_field), + boost=float(boost), + k=int(final_knn_k), + num_candidates=int(final_knn_num_candidates), + ) + + def get_image_knn_plan(self) -> Optional[KNNClausePlan]: + if not self.image_embedding_field: + return None + nested_path, _, _ = str(self.image_embedding_field).rpartition(".") + return self.KNNClausePlan( + field=str(self.image_embedding_field), + boost=float(self.knn_image_boost), + k=int(self.knn_image_k), + num_candidates=int(self.knn_image_num_candidates), + nested_path=nested_path or None, + ) + + def build_text_knn_clause( + self, + query_vector: Any, + *, + parsed_query: Optional[Any] = None, + query_name: str = "knn_query", + ) -> Optional[Dict[str, Any]]: + plan = self.get_text_knn_plan(parsed_query) + if plan is None or query_vector is None: + return None + return { + "knn": { + "field": plan.field, + "query_vector": self._vector_to_list(query_vector), + "k": plan.k, + "num_candidates": plan.num_candidates, + "boost": plan.boost, + "_name": query_name, + } + } + + def build_image_knn_clause( + self, + image_query_vector: Any, + *, + query_name: str = "image_knn_query", + ) -> Optional[Dict[str, Any]]: + plan = self.get_image_knn_plan() + if plan is None or image_query_vector is None: + return None + image_knn_query = { + "field": plan.field, + "query_vector": self._vector_to_list(image_query_vector), + "k": plan.k, + "num_candidates": plan.num_candidates, + "boost": plan.boost, + } + if plan.nested_path: + return { + "nested": { + "path": plan.nested_path, + "_name": query_name, + "query": {"knn": image_knn_query}, + "score_mode": "max", + } + } + return { + "knn": { + **image_knn_query, + "_name": query_name, + } + } + + def build_exact_text_knn_rescore_clause( + self, + query_vector: Any, + *, + parsed_query: Optional[Any] = None, + query_name: str = "exact_text_knn_query", + ) -> Optional[Dict[str, Any]]: + plan = self.get_text_knn_plan(parsed_query) + if plan is None or query_vector is None: + return None + return { + "script_score": { + "_name": query_name, + "query": {"exists": {"field": plan.field}}, + "script": { + "source": ( + f"((dotProduct(params.query_vector, '{plan.field}') + 1.0) / 2.0) * params.boost" + ), + "params": { + "query_vector": self._vector_to_list(query_vector), + "boost": float(plan.boost), + }, + }, + } + } + + def build_exact_image_knn_rescore_clause( + self, + image_query_vector: Any, + *, + query_name: str = "exact_image_knn_query", + ) -> Optional[Dict[str, Any]]: + plan = self.get_image_knn_plan() + if plan is None or image_query_vector is None: + return None + script_score_query = { + "query": {"exists": {"field": plan.field}}, + "script": { + "source": ( + f"((dotProduct(params.query_vector, '{plan.field}') + 1.0) / 2.0) * params.boost" + ), + "params": { + "query_vector": self._vector_to_list(image_query_vector), + "boost": float(plan.boost), + }, + }, + } + if plan.nested_path: + return { + "nested": { + "path": plan.nested_path, + "_name": query_name, + "score_mode": "max", + "query": {"script_score": script_score_query}, + } + } + return {"script_score": {"_name": query_name, **script_score_query}} + def _apply_source_filter(self, es_query: Dict[str, Any]) -> None: """ Apply tri-state _source semantics: @@ -250,52 +416,21 @@ class ESQueryBuilder: # 3. Add KNN search clauses alongside lexical clauses under the same bool.should # Text KNN: k / num_candidates from config; long queries use *_long and higher boost if has_embedding: - text_knn_boost = self.knn_text_boost - final_knn_k = self.knn_text_k - final_knn_num_candidates = self.knn_text_num_candidates - if parsed_query: - query_tokens = getattr(parsed_query, 'query_tokens', None) or [] - token_count = len(query_tokens) - if token_count >= 5: - final_knn_k = self.knn_text_k_long - final_knn_num_candidates = self.knn_text_num_candidates_long - text_knn_boost = self.knn_text_boost * 1.4 - recall_clauses.append({ - "knn": { - "field": self.text_embedding_field, - "query_vector": query_vector.tolist(), - "k": final_knn_k, - "num_candidates": final_knn_num_candidates, - "boost": text_knn_boost, - "_name": "knn_query", - } - }) + text_knn_clause = self.build_text_knn_clause( + query_vector, + parsed_query=parsed_query, + query_name="knn_query", + ) + if text_knn_clause: + recall_clauses.append(text_knn_clause) if has_image_embedding: - nested_path, _, _ = str(self.image_embedding_field).rpartition(".") - image_knn_query = { - "field": self.image_embedding_field, - "query_vector": image_query_vector.tolist(), - "k": self.knn_image_k, - "num_candidates": self.knn_image_num_candidates, - "boost": self.knn_image_boost, - } - if nested_path: - recall_clauses.append({ - "nested": { - "path": nested_path, - "_name": "image_knn_query", - "query": {"knn": image_knn_query}, - "score_mode": "max", - } - }) - else: - recall_clauses.append({ - "knn": { - **image_knn_query, - "_name": "image_knn_query", - } - }) + image_knn_clause = self.build_image_knn_clause( + image_query_vector, + query_name="image_knn_query", + ) + if image_knn_clause: + recall_clauses.append(image_knn_clause) # 4. Build main query structure: filters and recall if recall_clauses: diff --git a/search/rerank_client.py b/search/rerank_client.py index aa9b2c6..a236196 100644 --- a/search/rerank_client.py +++ b/search/rerank_client.py @@ -396,12 +396,50 @@ def _build_ltr_feature_block( } +def _maybe_append_weighted_knn_terms( + *, + term_rows: List[Dict[str, Any]], + fusion: CoarseRankFusionConfig | RerankFusionConfig, + knn_components: Optional[Dict[str, Any]], +) -> None: + if not knn_components: + return + + weighted_text_knn_score = _to_score(knn_components.get("weighted_text_knn_score")) + weighted_image_knn_score = _to_score(knn_components.get("weighted_image_knn_score")) + + if float(getattr(fusion, "knn_text_exponent", 0.0)) != 0.0: + text_bias = float(getattr(fusion, "knn_text_bias", fusion.knn_bias)) + term_rows.append( + { + "name": "weighted_text_knn_score", + "raw_score": weighted_text_knn_score, + "bias": text_bias, + "exponent": float(fusion.knn_text_exponent), + "factor": (max(weighted_text_knn_score, 0.0) + text_bias) ** float(fusion.knn_text_exponent), + } + ) + if float(getattr(fusion, "knn_image_exponent", 0.0)) != 0.0: + image_bias = float(getattr(fusion, "knn_image_bias", fusion.knn_bias)) + term_rows.append( + { + "name": "weighted_image_knn_score", + "raw_score": weighted_image_knn_score, + "bias": image_bias, + "exponent": float(fusion.knn_image_exponent), + "factor": (max(weighted_image_knn_score, 0.0) + image_bias) + ** float(fusion.knn_image_exponent), + } + ) + + def _compute_multiplicative_fusion( *, es_score: float, text_score: float, knn_score: float, fusion: RerankFusionConfig, + knn_components: Optional[Dict[str, Any]] = None, rerank_score: Optional[float] = None, fine_score: Optional[float] = None, style_boost: float = 1.0, @@ -427,6 +465,7 @@ def _compute_multiplicative_fusion( _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent) _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent) _add_term("knn_score", knn_score, fusion.knn_bias, fusion.knn_exponent) + _maybe_append_weighted_knn_terms(term_rows=term_rows, fusion=fusion, knn_components=knn_components) fused = 1.0 factors: Dict[str, float] = {} @@ -450,12 +489,30 @@ def _multiply_coarse_fusion_factors( es_score: float, text_score: float, knn_score: float, + knn_components: Dict[str, Any], fusion: CoarseRankFusionConfig, -) -> Tuple[float, float, float, float]: +) -> Tuple[float, float, float, float, float, float]: es_factor = (max(es_score, 0.0) + fusion.es_bias) ** fusion.es_exponent text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent - return es_factor, text_factor, knn_factor, es_factor * text_factor * knn_factor + text_knn_bias = float(getattr(fusion, "knn_text_bias", fusion.knn_bias)) + image_knn_bias = float(getattr(fusion, "knn_image_bias", fusion.knn_bias)) + text_knn_factor = ( + (max(_to_score(knn_components.get("weighted_text_knn_score")), 0.0) + text_knn_bias) + ** float(getattr(fusion, "knn_text_exponent", 0.0)) + ) + image_knn_factor = ( + (max(_to_score(knn_components.get("weighted_image_knn_score")), 0.0) + image_knn_bias) + ** float(getattr(fusion, "knn_image_exponent", 0.0)) + ) + return ( + es_factor, + text_factor, + knn_factor, + text_knn_factor, + image_knn_factor, + es_factor * text_factor * knn_factor * text_knn_factor * image_knn_factor, + ) def _has_selected_sku(hit: Dict[str, Any]) -> bool: @@ -481,10 +538,18 @@ def coarse_resort_hits( knn_components = signal_bundle["knn_components"] text_score = signal_bundle["text_score"] knn_score = signal_bundle["knn_score"] - es_factor, text_factor, knn_factor, coarse_score = _multiply_coarse_fusion_factors( + ( + es_factor, + text_factor, + knn_factor, + text_knn_factor, + image_knn_factor, + coarse_score, + ) = _multiply_coarse_fusion_factors( es_score=es_score, text_score=text_score, knn_score=knn_score, + knn_components=knn_components, fusion=f, ) @@ -535,6 +600,8 @@ def coarse_resort_hits( "coarse_es_factor": es_factor, "coarse_text_factor": text_factor, "coarse_knn_factor": knn_factor, + "coarse_text_knn_factor": text_knn_factor, + "coarse_image_knn_factor": image_knn_factor, "coarse_score": coarse_score, "matched_queries": matched_queries, "ltr_features": ltr_features, @@ -576,7 +643,7 @@ def fuse_scores_and_resort( - _rerank_score: 重排服务返回的分数 - _fused_score: 融合分数 - _text_score: 文本相关性分数(优先取 named queries 的 base_query 分数) - - _knn_score: KNN 分数(优先取 named queries 的 knn_query 分数) + - _knn_score: KNN 分数(优先取 exact named queries,缺失时回退 ANN named queries) Args: es_hits: ES hits 列表(会被原地修改) @@ -612,6 +679,7 @@ def fuse_scores_and_resort( text_score=text_score, knn_score=knn_score, fusion=f, + knn_components=knn_components, style_boost=style_boost, ) fused = fusion_result["score"] @@ -678,6 +746,8 @@ def fuse_scores_and_resort( "es_factor": fusion_result["factors"].get("es_score"), "text_factor": fusion_result["factors"].get("text_score"), "knn_factor": fusion_result["factors"].get("knn_score"), + "text_knn_factor": fusion_result["factors"].get("weighted_text_knn_score"), + "image_knn_factor": fusion_result["factors"].get("weighted_image_knn_score"), "style_intent_selected_sku": sku_selected, "style_intent_selected_sku_boost": style_boost, "matched_queries": signal_bundle["matched_queries"], @@ -810,6 +880,7 @@ def run_lightweight_rerank( text_score=text_score, knn_score=knn_score, fusion=f, + knn_components=signal_bundle["knn_components"], style_boost=style_boost, ) @@ -846,6 +917,8 @@ def run_lightweight_rerank( "es_factor": fusion_result["factors"].get("es_score"), "text_factor": fusion_result["factors"].get("text_score"), "knn_factor": fusion_result["factors"].get("knn_score"), + "text_knn_factor": fusion_result["factors"].get("weighted_text_knn_score"), + "image_knn_factor": fusion_result["factors"].get("weighted_image_knn_score"), "style_intent_selected_sku": sku_selected, "style_intent_selected_sku_boost": style_boost, "ltr_features": ltr_features, diff --git a/search/searcher.py b/search/searcher.py index 03fae01..f2e068c 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -242,67 +242,29 @@ class Searcher: return configured return int(self.config.rerank.rerank_window) - @staticmethod - def _vector_to_list(vector: Any) -> List[float]: - if vector is None: - return [] - if hasattr(vector, "tolist"): - values = vector.tolist() - else: - values = list(vector) - return [float(v) for v in values] - def _build_exact_knn_rescore( self, *, query_vector: Any, image_query_vector: Any, + parsed_query: Optional[ParsedQuery] = None, ) -> Optional[Dict[str, Any]]: clauses: List[Dict[str, Any]] = [] - if query_vector is not None and self.text_embedding_field: - clauses.append( - { - "script_score": { - "_name": "exact_text_knn_query", - "query": {"exists": {"field": self.text_embedding_field}}, - "script": { - # Keep exact score on the same [0, 1]-ish scale as KNN dot_product recall. - "source": ( - f"(dotProduct(params.query_vector, '{self.text_embedding_field}') + 1.0) / 2.0" - ), - "params": {"query_vector": self._vector_to_list(query_vector)}, - }, - } - } - ) + text_clause = self.query_builder.build_exact_text_knn_rescore_clause( + query_vector, + parsed_query=parsed_query, + query_name="exact_text_knn_query", + ) + if text_clause: + clauses.append(text_clause) - if image_query_vector is not None and self.image_embedding_field: - nested_path, _, _ = str(self.image_embedding_field).rpartition(".") - if nested_path: - clauses.append( - { - "nested": { - "path": nested_path, - "_name": "exact_image_knn_query", - "score_mode": "max", - "query": { - "script_score": { - "query": {"exists": {"field": self.image_embedding_field}}, - "script": { - # Keep exact score on the same [0, 1]-ish scale as KNN dot_product recall. - "source": ( - f"(dotProduct(params.query_vector, '{self.image_embedding_field}') + 1.0) / 2.0" - ), - "params": { - "query_vector": self._vector_to_list(image_query_vector), - }, - }, - } - }, - } - } - ) + image_clause = self.query_builder.build_exact_image_knn_rescore_clause( + image_query_vector, + query_name="exact_image_knn_query", + ) + if image_clause: + clauses.append(image_clause) if not clauses: return None @@ -330,12 +292,14 @@ class Searcher: in_rank_window: bool, query_vector: Any, image_query_vector: Any, + parsed_query: Optional[ParsedQuery] = None, ) -> None: if not in_rank_window or not self.config.rerank.exact_knn_rescore_enabled: return rescore = self._build_exact_knn_rescore( query_vector=query_vector, image_query_vector=image_query_vector, + parsed_query=parsed_query, ) if not rescore: return @@ -689,6 +653,7 @@ class Searcher: in_rank_window=in_rank_window, query_vector=parsed_query.query_vector if enable_embedding else None, image_query_vector=image_query_vector, + parsed_query=parsed_query, ) # Add facets for faceted search diff --git a/tests/test_es_query_builder.py b/tests/test_es_query_builder.py index 4e35cff..9be8393 100644 --- a/tests/test_es_query_builder.py +++ b/tests/test_es_query_builder.py @@ -208,3 +208,36 @@ def test_image_knn_clause_is_added_alongside_base_translation_and_text_knn(): assert image_knn["path"] == "image_embedding" assert image_knn["score_mode"] == "max" assert image_knn["query"]["knn"]["field"] == "image_embedding.vector" + + +def test_text_knn_plan_is_reused_for_ann_and_exact_rescore(): + qb = _builder() + parsed_query = SimpleNamespace(query_tokens=["a", "b", "c", "d", "e"]) + + ann_clause = qb.build_text_knn_clause( + np.array([0.1, 0.2, 0.3]), + parsed_query=parsed_query, + ) + exact_clause = qb.build_exact_text_knn_rescore_clause( + np.array([0.1, 0.2, 0.3]), + parsed_query=parsed_query, + ) + + assert ann_clause is not None + assert exact_clause is not None + assert ann_clause["knn"]["k"] == qb.knn_text_k_long + assert ann_clause["knn"]["num_candidates"] == qb.knn_text_num_candidates_long + assert ann_clause["knn"]["boost"] == qb.knn_text_boost * 1.4 + assert exact_clause["script_score"]["script"]["params"]["boost"] == qb.knn_text_boost * 1.4 + + +def test_image_knn_plan_is_reused_for_ann_and_exact_rescore(): + qb = _builder() + + ann_clause = qb.build_image_knn_clause(np.array([0.4, 0.5, 0.6])) + exact_clause = qb.build_exact_image_knn_rescore_clause(np.array([0.4, 0.5, 0.6])) + + assert ann_clause is not None + assert exact_clause is not None + assert ann_clause["nested"]["query"]["knn"]["boost"] == qb.knn_image_boost + assert exact_clause["nested"]["query"]["script_score"]["script"]["params"]["boost"] == qb.knn_image_boost diff --git a/tests/test_rerank_client.py b/tests/test_rerank_client.py index 77a1d85..b77a2a9 100644 --- a/tests/test_rerank_client.py +++ b/tests/test_rerank_client.py @@ -1,7 +1,7 @@ from math import isclose -from config.schema import RerankFusionConfig -from search.rerank_client import fuse_scores_and_resort, run_lightweight_rerank +from config.schema import CoarseRankFusionConfig, RerankFusionConfig +from search.rerank_client import coarse_resort_hits, fuse_scores_and_resort, run_lightweight_rerank def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_primary(): @@ -257,6 +257,96 @@ def test_fuse_scores_and_resort_applies_knn_dismax_weights_and_tie_breaker(): assert isclose(debug[0]["knn_support_score"], 0.5, rel_tol=1e-9) +def test_fuse_scores_and_resort_can_add_weighted_text_and_image_knn_factors(): + hits = [ + { + "_id": "a", + "_score": 1.0, + "matched_queries": { + "base_query": 2.0, + "knn_query": 0.4, + "image_knn_query": 0.5, + }, + } + ] + fusion = RerankFusionConfig( + rerank_bias=0.0, + rerank_exponent=1.0, + text_bias=0.0, + text_exponent=1.0, + knn_text_weight=2.0, + knn_image_weight=1.0, + knn_tie_breaker=0.25, + knn_bias=0.1, + knn_exponent=1.0, + knn_text_exponent=2.0, + knn_image_exponent=3.0, + ) + + debug = fuse_scores_and_resort(hits, [0.8], fusion=fusion, debug=True) + + weighted_text_knn = 0.8 + weighted_image_knn = 0.5 + expected_knn = weighted_text_knn + 0.25 * weighted_image_knn + expected_fused = ( + 0.8 + * 2.0 + * (expected_knn + 0.1) + * ((weighted_text_knn + 0.1) ** 2.0) + * ((weighted_image_knn + 0.1) ** 3.0) + ) + + assert isclose(hits[0]["_fused_score"], expected_fused, rel_tol=1e-9) + assert isclose(debug[0]["text_knn_factor"], (weighted_text_knn + 0.1) ** 2.0, rel_tol=1e-9) + assert isclose(debug[0]["image_knn_factor"], (weighted_image_knn + 0.1) ** 3.0, rel_tol=1e-9) + assert "weighted_text_knn_score=" in debug[0]["fusion_summary"] + assert "weighted_image_knn_score=" in debug[0]["fusion_summary"] + + +def test_coarse_resort_hits_can_add_weighted_text_and_image_knn_factors(): + hits = [ + { + "_id": "coarse-a", + "_score": 1.0, + "matched_queries": { + "base_query": 2.0, + "knn_query": 0.4, + "image_knn_query": 0.5, + }, + } + ] + fusion = CoarseRankFusionConfig( + es_bias=0.0, + es_exponent=1.0, + text_bias=0.0, + text_exponent=1.0, + knn_text_weight=2.0, + knn_image_weight=1.0, + knn_tie_breaker=0.25, + knn_bias=0.1, + knn_exponent=1.0, + knn_text_exponent=2.0, + knn_image_exponent=3.0, + ) + + debug = coarse_resort_hits(hits, fusion=fusion, debug=True) + + weighted_text_knn = 0.8 + weighted_image_knn = 0.5 + expected_knn = weighted_text_knn + 0.25 * weighted_image_knn + expected_coarse = ( + 1.0 + * 2.0 + * (expected_knn + 0.1) + * ((weighted_text_knn + 0.1) ** 2.0) + * ((weighted_image_knn + 0.1) ** 3.0) + ) + + assert isclose(hits[0]["_coarse_score"], expected_coarse, rel_tol=1e-9) + assert isclose(debug[0]["coarse_text_knn_factor"], (weighted_text_knn + 0.1) ** 2.0, rel_tol=1e-9) + assert isclose(debug[0]["coarse_image_knn_factor"], (weighted_image_knn + 0.1) ** 3.0, rel_tol=1e-9) + + def test_run_lightweight_rerank_sorts_by_fused_stage_score(monkeypatch): hits = [ { diff --git a/tests/test_search_rerank_window.py b/tests/test_search_rerank_window.py index b8396ef..08ffd00 100644 --- a/tests/test_search_rerank_window.py +++ b/tests/test_search_rerank_window.py @@ -1055,6 +1055,7 @@ def test_searcher_attaches_exact_knn_rescore_for_rank_window(monkeypatch): translations={}, query_vector=np.array([0.1, 0.2, 0.3], dtype=np.float32), image_query_vector=np.array([0.4, 0.5, 0.6], dtype=np.float32), + query_tokens=["dress", "formal", "spring", "summer", "floral"], ) es_client = _FakeESClient(total_hits=5) @@ -1081,8 +1082,12 @@ def test_searcher_attaches_exact_knn_rescore_for_rank_window(monkeypatch): es_index_name=base.es_index_name, es_settings=base.es_settings, ) - searcher = _build_searcher(config, es_client) - searcher.query_parser = _VectorQueryParser() + searcher = Searcher( + es_client=es_client, + config=config, + query_parser=_VectorQueryParser(), + image_encoder=SimpleNamespace(), + ) context = create_request_context(reqid="exact-rescore", uid="u-exact") monkeypatch.setattr( @@ -1112,6 +1117,36 @@ def test_searcher_attaches_exact_knn_rescore_for_rank_window(monkeypatch): elif "nested" in clause: names.append(clause["nested"]["_name"]) assert names == ["exact_text_knn_query", "exact_image_knn_query"] + recall_query = body["query"] + if "bool" in recall_query and recall_query["bool"].get("must"): + recall_query = recall_query["bool"]["must"][0] + if "function_score" in recall_query: + recall_query = recall_query["function_score"]["query"] + recall_should = recall_query["bool"]["should"] + text_knn_clause = next( + clause["knn"] + for clause in recall_should + if clause.get("knn", {}).get("_name") == "knn_query" + ) + image_knn_clause = next( + clause["nested"]["query"]["knn"] + for clause in recall_should + if clause.get("nested", {}).get("_name") == "image_knn_query" + ) + exact_text_clause = next( + clause["script_score"] + for clause in should + if clause.get("script_score", {}).get("_name") == "exact_text_knn_query" + ) + exact_image_clause = next( + clause["nested"]["query"]["script_score"] + for clause in should + if clause.get("nested", {}).get("_name") == "exact_image_knn_query" + ) + assert text_knn_clause["boost"] == 28.0 + assert exact_text_clause["script"]["params"]["boost"] == text_knn_clause["boost"] + assert image_knn_clause["boost"] == 20.0 + assert exact_image_clause["script"]["params"]["boost"] == image_knn_clause["boost"] def test_searcher_skips_exact_knn_rescore_outside_rank_window(monkeypatch): -- libgit2 0.21.2