Compare View
Commits (8)
-
## 背景 多语言索引下,用户查询常中英混写;需在解析阶段显式标记脚本类型,并在 BM25 子句中同时覆盖对应语言字段。 ## 方案 ### 1. Query 分析(query_parser.ParsedQuery) - 新增 `contains_chinese`:query 文本含 CJK(沿用 _contains_cjk)。 - 新增 `contains_english`:分词结果中存在「纯英文、len>=3」token(fullmatch 字母及可选连字符)。 - 写入 to_dict、请求 context 中间结果,便于调试与 API 透出。 ### 2. ES 文本召回(es_query_builder._build_advanced_text_query) - 对每个 search_lang 子句:若含英文且子句语言非 en(且租户 index_languages 含 en),合并 en 列字段;若含中文且子句语言非 zh(且含 zh),合并 zh 列字段。 - 合并进来的字段 boost 乘以 `mixed_script_merged_field_boost_scale`(默认 0.8,可在 ESQueryBuilder 构造参数调整)。 - fallback_original_query_* 分支同样应用上述逻辑。 ### 3. 实现整理 - 引入 `MatchFieldSpec = (field_path, boost)`:`_build_match_field_specs` 为唯一权重来源;`_merge_supplemental_lang_field_specs` / `_expand_match_field_specs_for_mixed_script` 在 tuple 上合并与缩放;最后 `_format_match_field_specs` 再格式化为 ES `path^boost`,避免先拼字符串再解析。 ## 测试 - tests/test_query_parser_mixed_language.py:脚本标记与 token 规则。 - tests/test_es_query_builder.py:合并字段、0.8 缩放、index_languages 限制。 Made-with: Cursor
-
ResultFormatter.format_search_results() runs. What changed: For each final paginated SPU hit, the searcher now scans skus[].option1_value against the query text set built from the original query, normalized query, rewritten query, and translations. If no option1_value matches textually, it falls back to embedding similarity and picks the SKU with the highest inner product against the query embedding. The matched SKU is promoted to the front of the SPU’s skus list. The SPU-level image_url is replaced with that matched SKU’s image_src. I left api/result_formatter.py unchanged because it already preserves the SKU order and reads image_url from _source; updating the page hits in searcher makes the formatter return the desired result automatically. Verification: ReadLints on the edited files: no errors Passed targeted tests: pytest tests/test_search_rerank_window.py -k "translated_query or no_direct_option_match"
Showing
29 changed files
Show diff stats
.env.example
| ... | ... | @@ -0,0 +1,17 @@ |
| 1 | +docs | |
| 2 | +# Please enter the commit message for your changes. Lines starting | |
| 3 | +# with '#' will be ignored, and an empty message aborts the commit. | |
| 4 | +# | |
| 5 | +# On branch master | |
| 6 | +# Your branch is ahead of 'origin/master' by 5 commits. | |
| 7 | +# (use "git push" to publish your local commits) | |
| 8 | +# | |
| 9 | +# Changes to be committed: | |
| 10 | +# modified: config/config.yaml | |
| 11 | +# modified: docs/TODO.txt | |
| 12 | +# modified: "docs/\346\220\234\347\264\242API\345\257\271\346\216\245\346\214\207\345\215\227-07-\345\276\256\346\234\215\345\212\241\346\216\245\345\217\243\357\274\210Embedding-Reranker-Translation\357\274\211.md" | |
| 13 | +# modified: "docs/\347\233\270\345\205\263\346\200\247\346\243\200\347\264\242\344\274\230\345\214\226\350\257\264\346\230\216.md" | |
| 14 | +# | |
| 15 | +# Changes not staged for commit: | |
| 16 | +# modified: third-party/clip-as-service (untracked content) | |
| 17 | +# | ... | ... |
config/config.yaml
| ... | ... | @@ -31,9 +31,9 @@ field_boosts: |
| 31 | 31 | category_path: 1.5 |
| 32 | 32 | category_name_text: 1.5 |
| 33 | 33 | tags: 1.0 |
| 34 | - option1_values: 0.5 | |
| 35 | - option2_values: 0.5 | |
| 36 | - option3_values: 0.5 | |
| 34 | + option1_values: 0.6 | |
| 35 | + option2_values: 0.4 | |
| 36 | + option3_values: 0.4 | |
| 37 | 37 | |
| 38 | 38 | # Query Configuration(查询配置) |
| 39 | 39 | query_config: |
| ... | ... | @@ -47,6 +47,11 @@ query_config: |
| 47 | 47 | enable_text_embedding: true |
| 48 | 48 | enable_query_rewrite: true |
| 49 | 49 | |
| 50 | + # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。 | |
| 51 | + # 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。 | |
| 52 | + translation_embedding_wait_budget_ms_source_in_index: 80 | |
| 53 | + translation_embedding_wait_budget_ms_source_not_in_index: 200 | |
| 54 | + | |
| 50 | 55 | # 动态多语言检索字段配置 |
| 51 | 56 | # multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式; |
| 52 | 57 | # shared_fields 为无语言后缀字段。 |
| ... | ... | @@ -85,7 +90,34 @@ query_config: |
| 85 | 90 | |
| 86 | 91 | # 返回字段配置(_source includes) |
| 87 | 92 | # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段 |
| 88 | - source_fields: null | |
| 93 | + # 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致 | |
| 94 | + source_fields: | |
| 95 | + - spu_id | |
| 96 | + - handle | |
| 97 | + - title | |
| 98 | + - brief | |
| 99 | + - description | |
| 100 | + - vendor | |
| 101 | + - category_name | |
| 102 | + - category_name_text | |
| 103 | + - category_path | |
| 104 | + - category_id | |
| 105 | + - category_level | |
| 106 | + - category1_name | |
| 107 | + - category2_name | |
| 108 | + - category3_name | |
| 109 | + - tags | |
| 110 | + - min_price | |
| 111 | + - compare_at_price | |
| 112 | + - image_url | |
| 113 | + - sku_prices | |
| 114 | + - sku_weights | |
| 115 | + - sku_weight_units | |
| 116 | + - total_inventory | |
| 117 | + - option1_name | |
| 118 | + - option1_values | |
| 119 | + - specifications | |
| 120 | + - skus | |
| 89 | 121 | |
| 90 | 122 | # KNN boost配置(向量召回的boost值) |
| 91 | 123 | knn_boost: 0.25 # Lower boost for embedding recall |
| ... | ... | @@ -110,7 +142,7 @@ rerank: |
| 110 | 142 | services: |
| 111 | 143 | translation: |
| 112 | 144 | service_url: "http://127.0.0.1:6006" |
| 113 | - default_model: "llm" | |
| 145 | + default_model: "nllb-200-distilled-600m" | |
| 114 | 146 | default_scene: "general" |
| 115 | 147 | timeout_sec: 10.0 |
| 116 | 148 | cache: | ... | ... |
config/loader.py
| ... | ... | @@ -297,6 +297,12 @@ class AppConfigLoader: |
| 297 | 297 | default_translation_model=str( |
| 298 | 298 | query_cfg.get("default_translation_model") or "nllb-200-distilled-600m" |
| 299 | 299 | ), |
| 300 | + translation_embedding_wait_budget_ms_source_in_index=int( | |
| 301 | + query_cfg.get("translation_embedding_wait_budget_ms_source_in_index", 80) | |
| 302 | + ), | |
| 303 | + translation_embedding_wait_budget_ms_source_not_in_index=int( | |
| 304 | + query_cfg.get("translation_embedding_wait_budget_ms_source_not_in_index", 200) | |
| 305 | + ), | |
| 300 | 306 | ) |
| 301 | 307 | |
| 302 | 308 | function_score_cfg = raw.get("function_score") if isinstance(raw.get("function_score"), dict) else {} |
| ... | ... | @@ -445,7 +451,7 @@ class AppConfigLoader: |
| 445 | 451 | api_port=int(os.getenv("API_PORT", 6002)), |
| 446 | 452 | indexer_host=os.getenv("INDEXER_HOST", "0.0.0.0"), |
| 447 | 453 | indexer_port=int(os.getenv("INDEXER_PORT", 6004)), |
| 448 | - embedding_host=os.getenv("EMBEDDING_HOST", "127.0.0.1"), | |
| 454 | + embedding_host=os.getenv("EMBEDDING_HOST", "0.0.0.0"), | |
| 449 | 455 | embedding_port=int(os.getenv("EMBEDDING_PORT", 6005)), |
| 450 | 456 | embedding_text_port=int(os.getenv("EMBEDDING_TEXT_PORT", 6005)), |
| 451 | 457 | embedding_image_port=int(os.getenv("EMBEDDING_IMAGE_PORT", 6008)), | ... | ... |
config/schema.py
| ... | ... | @@ -61,6 +61,11 @@ class QueryConfig: |
| 61 | 61 | zh_to_en_model: str = "opus-mt-zh-en" |
| 62 | 62 | en_to_zh_model: str = "opus-mt-en-zh" |
| 63 | 63 | default_translation_model: str = "nllb-200-distilled-600m" |
| 64 | + # 查询阶段:翻译与向量生成并发提交后,共用同一等待预算(毫秒)。 | |
| 65 | + # 检测语言已在租户 index_languages 内:偏快返回,预算较短。 | |
| 66 | + # 检测语言不在 index_languages 内:翻译对召回更关键,预算较长。 | |
| 67 | + translation_embedding_wait_budget_ms_source_in_index: int = 80 | |
| 68 | + translation_embedding_wait_budget_ms_source_not_in_index: int = 200 | |
| 64 | 69 | |
| 65 | 70 | |
| 66 | 71 | @dataclass(frozen=True) |
| ... | ... | @@ -254,13 +259,13 @@ class RuntimeConfig: |
| 254 | 259 | api_port: int = 6002 |
| 255 | 260 | indexer_host: str = "0.0.0.0" |
| 256 | 261 | indexer_port: int = 6004 |
| 257 | - embedding_host: str = "127.0.0.1" | |
| 262 | + embedding_host: str = "0.0.0.0" | |
| 258 | 263 | embedding_port: int = 6005 |
| 259 | 264 | embedding_text_port: int = 6005 |
| 260 | 265 | embedding_image_port: int = 6008 |
| 261 | - translator_host: str = "127.0.0.1" | |
| 266 | + translator_host: str = "0.0.0.0" | |
| 262 | 267 | translator_port: int = 6006 |
| 263 | - reranker_host: str = "127.0.0.1" | |
| 268 | + reranker_host: str = "0.0.0.0" | |
| 264 | 269 | reranker_port: int = 6007 |
| 265 | 270 | |
| 266 | 271 | ... | ... |
docs/TODO.txt
| 1 | 1 | |
| 2 | 2 | |
| 3 | 3 | |
| 4 | +先阅读文本embedding相关的代码: | |
| 5 | +@embeddings/README.md @embeddings/server.py @docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md @embeddings/text_encoder.py | |
| 6 | +目前有TEXT_MAX_INFLIGHT / IMAGE_MAX_INFLIGHT 准入限制,超限返回过载状态码。 | |
| 7 | + | |
| 8 | +文本embedding服务,要支持 priority 查询参数,priority > 0:不计入上述 inflight、不会因准入被拒绝(图片embedding不需要支持,因为只有离线需要用到图片embedding) | |
| 9 | +priority == 0(默认,适合做索引之类的离线任务):仍走原有 TEXT_MAX_INFLIGHT / IMAGE_MAX_INFLIGHT 准入;超限返回过载状态码。 | |
| 10 | +priority > 0(或者==1)(适合在线请求):不会因准入被拒绝,但是仍然需要占用inflight,这样保证在线请求不被限制,并且在线请求很多的时候可以拒绝掉离线的请求。 | |
| 11 | + | |
| 12 | +除了限制规则的修改,更进一步的,也需要保证这种请求是优先处理的(priority=1的相比=0的更优先被处理)。 | |
| 13 | +关于技术方案,有Worker + 双队列、PriorityMutex等等,除此之外,也请你思考合适的方案。 | |
| 14 | +成熟稳定、不带来复杂度、性能、稳定性方面的副作用,是最重要的。请先了解代码、需求,深度思考解决方案 | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | +配置体系的重构。 | |
| 19 | + | |
| 20 | +Referring to @docs/config-system-review-and-redesign.md , most of the modifications have been completed. Could you conduct a review to check what else needs improvement in the configuration documentation system? Are there any outstanding issues? | |
| 21 | + | |
| 22 | +一、仍然存在大量通过环境变量获取配置的地方 | |
| 23 | +_SERVICE_KIND = (os.getenv("EMBEDDING_SERVICE_KIND", "all") or "all").strip().lower() | |
| 24 | +if _SERVICE_KIND not in {"all", "text", "image"}: | |
| 25 | + raise RuntimeError( | |
| 26 | + f"Invalid EMBEDDING_SERVICE_KIND={_SERVICE_KIND!r}; expected all, text, or image" | |
| 27 | + ) | |
| 28 | +_TEXT_ENABLED_BY_ENV = os.getenv("EMBEDDING_ENABLE_TEXT_MODEL", "true").lower() in ("1", "true", "yes") | |
| 29 | +_IMAGE_ENABLED_BY_ENV = os.getenv("EMBEDDING_ENABLE_IMAGE_MODEL", "true").lower() in ("1", "true", "yes") | |
| 30 | +open_text_model = _TEXT_ENABLED_BY_ENV and _SERVICE_KIND in {"all", "text"} | |
| 31 | +open_image_model = _IMAGE_ENABLED_BY_ENV and _SERVICE_KIND in {"all", "image"} | |
| 32 | + | |
| 33 | +_text_encode_lock = threading.Lock() | |
| 34 | +_image_encode_lock = threading.Lock() | |
| 35 | + | |
| 36 | +_TEXT_MICROBATCH_WINDOW_SEC = max( | |
| 37 | + 0.0, float(os.getenv("TEXT_MICROBATCH_WINDOW_MS", "4")) / 1000.0 | |
| 38 | +) | |
| 39 | +_TEXT_REQUEST_TIMEOUT_SEC = max( | |
| 40 | + 1.0, float(os.getenv("TEXT_REQUEST_TIMEOUT_SEC", "30")) | |
| 41 | +) | |
| 42 | +_TEXT_MAX_INFLIGHT = max(1, int(os.getenv("TEXT_MAX_INFLIGHT", "32"))) | |
| 43 | +_IMAGE_MAX_INFLIGHT = max(1, int(os.getenv("IMAGE_MAX_INFLIGHT", "1"))) | |
| 44 | +_OVERLOAD_STATUS_CODE = int(os.getenv("EMBEDDING_OVERLOAD_STATUS_CODE", "503")) | |
| 45 | +_LOG_PREVIEW_COUNT = max(1, int(os.getenv("EMBEDDING_LOG_PREVIEW_COUNT", "3"))) | |
| 46 | +_LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("EMBEDDING_LOG_TEXT_PREVIEW_CHARS", "120"))) | |
| 47 | +_LOG_IMAGE_PREVIEW_CHARS = max(32, int(os.getenv("EMBEDDING_LOG_IMAGE_PREVIEW_CHARS", "180"))) | |
| 48 | +_VECTOR_PREVIEW_DIMS = max(1, int(os.getenv("EMBEDDING_VECTOR_PREVIEW_DIMS", "6"))) | |
| 49 | +_CACHE_PREFIX = str(REDIS_CONFIG.get("embedding_cache_prefix", "embedding")).strip() or "embedding" | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | +还有这些写死的地址 @embedding/config.py | |
| 56 | + | |
| 57 | +self.TEI_BASE_URL = str(text_backend.get("base_url") or "http://127.0.0.1:8080") | |
| 58 | +self.TEI_TIMEOUT_SEC = int(text_backend.get("timeout_sec", 60)) | |
| 59 | + | |
| 60 | +self.USE_CLIP_AS_SERVICE = services.image_backend == "clip_as_service" | |
| 61 | +self.CLIP_AS_SERVICE_SERVER = str(image_backend.get("server") or "grpc://127.0.0.1:51000") | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | +看起来似乎并没有完全遵循这些原则? | |
| 67 | +4. 重新设计的设计原则 | |
| 68 | +重新设计应遵循以下规则。 | |
| 69 | + | |
| 70 | +4.1 单一逻辑配置系统 | |
| 71 | +可以有多个文件,但不能有多个职责重叠的加载器。 | |
| 72 | +必须有一个加载器管道,能够生成一个类型化的 AppConfig 对象。 | |
| 73 | + | |
| 74 | +4.2 配置文件负责声明,解析代码负责解释,环境变量负责运行时注入 | |
| 75 | +职责应明确如下: | |
| 76 | +配置文件 | |
| 77 | +声明非敏感的目标行为和可部署的非敏感设置 | |
| 78 | +解析逻辑 | |
| 79 | +加载、合并、验证、规范化并暴露类型化的配置 | |
| 80 | +绝不发明隐藏的业务行为 | |
| 81 | +环境变量 | |
| 82 | +承载密钥和少量运行时/进程相关的值 | |
| 83 | +不随意地重新定义业务行为 | |
| 84 | + | |
| 85 | +4.3 整个系统采用单一的优先级规则 | |
| 86 | +除非明确豁免,否则每个配置类别都应遵循相同的合并模型。 | |
| 87 | + | |
| 88 | +4.4 业务行为不得有静默的隐式后备 | |
| 89 | +在启动时,如果必需的配置缺失或无效,应快速失败。 | |
| 90 | +不要静默地回退到诸如硬编码语言列表之类的遗留行为。 | |
| 91 | + | |
| 92 | +4.5 有效配置必须可观测 | |
| 93 | +每个服务都应能够展示: | |
| 94 | +配置版本或哈希值 | |
| 95 | +加载的源文件 | |
| 96 | +环境名称 | |
| 97 | +经过清理的有效配置 | |
| 98 | + | |
| 99 | +5. 推荐的目标设计 | |
| 100 | + | |
| 101 | +5.1 边界模型 | |
| 102 | +使用三个清晰的层级。 | |
| 103 | +层级 1:代码仓库管理的静态配置 | |
| 104 | +目的: | |
| 105 | +搜索行为 | |
| 106 | +租户行为 | |
| 107 | +提供商/后端注册表 | |
| 108 | +非敏感的服务拓扑默认值 | |
| 109 | +功能开关 | |
| 110 | +示例: | |
| 111 | +字段权重 | |
| 112 | +查询策略 | |
| 113 | +重排序融合参数 | |
| 114 | +租户语言方案 | |
| 115 | +翻译能力注册表 | |
| 116 | +嵌入后端选择默认值 | |
| 117 | + | |
| 118 | +层级 2:特定环境的层叠配置 | |
| 119 | +目的: | |
| 120 | +按环境区分的非敏感差异 | |
| 121 | +按环境区分的服务端点 | |
| 122 | +按环境区分的资源大小默认值 | |
| 123 | +开发/测试/生产环境的运维差异 | |
| 124 | +示例: | |
| 125 | +本地嵌入 URL 与生产环境嵌入 URL | |
| 126 | +开发环境重排序后端与生产环境重排序后端 | |
| 127 | +本地开发环境中较低的并发度 | |
| 128 | + | |
| 129 | +层级 3:环境变量 | |
| 130 | +目的: | |
| 131 | +密钥 | |
| 132 | +绑定主机/端口 | |
| 133 | +外部基础设施凭证 | |
| 134 | +容器编排器的最后一步注入 | |
| 135 | +示例: | |
| 136 | +ES_HOST, ES_USERNAME, ES_PASSWORD | |
| 137 | +DB_HOST, DB_USERNAME, DB_PASSWORD | |
| 138 | +REDIS_HOST, REDIS_PASSWORD | |
| 139 | +DASHSCOPE_API_KEY, DEEPL_AUTH_KEY | |
| 140 | +API_HOST, API_PORT, INDEXER_PORT, TRANSLATION_PORT | |
| 141 | +规则: | |
| 142 | +环境变量不应成为选择业务行为(如翻译模型、嵌入后端或租户语言策略)的常规途径 | |
| 143 | +如果允许对非敏感字段进行环境变量覆盖,则必须将其明确列出并记录为运维覆盖手段,而非隐藏的约定 | |
| 144 | + | |
| 145 | +5.2 统一的优先级 | |
| 146 | +推荐的优先级顺序: | |
| 147 | +代码中的模式(schema)默认值 | |
| 148 | +config/base.yaml | |
| 149 | +config/environments/<env>.yaml | |
| 150 | +来自 config/tenants/ 的租户层叠配置 | |
| 151 | +针对明确允许的运行时键的环境变量 | |
| 152 | +仅适用于当前进程的命令行标志 | |
| 153 | +重要规则: | |
| 154 | +只能有一个模块实现此合并逻辑 | |
| 155 | +任何业务模块不得直接调用 os.getenv() 来获取配置 | |
| 156 | + | |
| 157 | + | |
| 158 | + | |
| 159 | +二、cursor(composer2)帮我审查的 | |
| 160 | + | |
| 161 | +## 配置文档系统审查 | |
| 162 | + | |
| 163 | +这是对**配置文档系统**的简要审查,对照 [`docs/config-system-review-and-redesign.md`](docs/config-system-review-and-redesign.md) 和当前代码库进行。 | |
| 164 | + | |
| 165 | +### 实现已匹配的部分 | |
| 166 | + | |
| 167 | +代码库已朝着重新设计的目标迈进了很大一步: | |
| 168 | + | |
| 169 | +- **单一类型化根节点**:`AppConfig` 位于 [`config/schema.py`](config/schema.py),通过 [`config/loader.py`](config/loader.py) 和 [`get_app_config()`](config/loader.py) 加载。 | |
| 170 | +- **精简的 `services_config`**:[`config/services_config.py`](config/services_config.py) 委托给 `get_app_config()`(解决了服务层的发现 A)。 | |
| 171 | +- **租户目录**:[`config/tenant_config_loader.py`](config/tenant_config_loader.py) 使用 `get_app_config().tenants`。 | |
| 172 | +- **重写资产路径**:`config/dictionaries/query_rewrite.dict` 已存在(解决了发现 E 中的文件名不匹配问题)。 | |
| 173 | +- **可观测性**:[`GET /admin/config`](api/routes/admin.py) 返回**经过脱敏处理的有效配置树**;[`GET /admin/config/meta`](api/routes/admin.py) 暴露环境信息、`config_hash`、`loaded_files`、`deprecated_keys`(涵盖了 §5.10 的意图;文档中提议的是 `/admin/config/effective`,但实际实现位于 `/admin/config`)。 | |
| 174 | + | |
| 175 | +因此,**"单一加载器 + 有效配置可见性"** 的故事在代码中已基本实现;文档尚未完全跟上。 | |
| 176 | + | |
| 177 | +--- | |
| 178 | + | |
| 179 | +## 文档问题(影响最大) | |
| 180 | + | |
| 181 | +### 1. 管理 API 文档中关于 `/admin/config` 的描述错误 | |
| 182 | + | |
| 183 | +[`docs/搜索API对接指南.md`](docs/搜索API对接指南.md)(管理部分附近)和 [`docs/搜索API对接指南-06-管理接口(Admin).md`](docs/搜索API对接指南-06-管理接口(Admin).md) 仍将 `/admin/config` 描述为**按租户**的 JSON(包含 `tenant_id`、`es_index_name`、`supported_languages` 等字段)。实际实现返回的是 **`AppConfig.sanitized_dict()`**(完整的应用配置,敏感信息已脱敏),而不是租户摘要字段。 | |
| 184 | + | |
| 185 | +**这些指南中还缺少:** `GET /admin/config/meta`。 | |
| 186 | + | |
| 187 | +**健康检查:** 拆分指南中的示例包含了 [`HealthResponse`](api/models.py) 中不存在的字段(只有 `status` 和 `elasticsearch`)。 | |
| 188 | + | |
| 189 | +对于任何仅根据文档进行 API 集成的人来说,这是最明显的"未解决问题"。 | |
| 190 | + | |
| 191 | +### 2. 面向开发者的指南仍将 `services_config` 作为"配置解析器"的核心 | |
| 192 | + | |
| 193 | +[`docs/DEVELOPER_GUIDE.md`](docs/DEVELOPER_GUIDE.md) §5.2 仍说搜索配置由 **`ConfigLoader`** 加载,服务由 **`config/services_config`** "解析"。§6.2 仍将 **`config/services_config.py`** 列为主要的"解析入口"。[`docs/QUICKSTART.md`](docs/QUICKSTART.md) §3.1 仍说"配置解析:`config/services_config.py`"。 | |
| 194 | + | |
| 195 | +文档中准确的说法应该是:**规范入口是 `config/loader.py` + `get_app_config()`**;[`config/config_loader.py`](config/config_loader.py) 中的 `ConfigLoader` 包装了统一加载器;`services_config` 是现有调用点的**兼容性外观**。 | |
| 196 | + | |
| 197 | +### 3. 重新设计文档本身不是"活的"状态文档 | |
| 198 | + | |
| 199 | +[`docs/config-system-review-and-redesign.md`](docs/config-system-review-and-redesign.md) 读起来仍是**纯粹的问题陈述 + 目标**,没有简短的**"已实现 vs 剩余"**部分。这很容易让人假设什么都没做,或者重复工作。添加一个小附录(或一页 `config/README.md` —— 见下文)可以解决这个问题。 | |
| 200 | + | |
| 201 | +### 4. 缺少 `config/README.md`(§5.3 中推荐) | |
| 202 | + | |
| 203 | +仍然没有专门的 **`config/README.md`** 来描述:加载器入口点、高级优先级、字典存放位置、指向 `/admin/config` + `/admin/config/meta` 的链接,以及重新设计文档的链接。这是重新设计中明确的交付物,可以锚定"文档系统"。 | |
| 204 | + | |
| 205 | +### 5. 轻微的文档整洁问题 | |
| 206 | + | |
| 207 | +- [`docs/QUICKSTART.md`](docs/QUICKSTART.md) §1.9 环境变量项目后的行有一个多余字符:`---·`(可能是打字错误)。 | |
| 208 | +- [`docs/DEVELOPER_GUIDE.md`](docs/DEVELOPER_GUIDE.md) §10 **文档索引**没有列出 `config-system-review-and-redesign.md` 或未来的 `config/README.md`。 | |
| 209 | + | |
| 210 | +--- | |
| 211 | + | |
| 212 | +## 重新设计目标与当前代码之间的差距(文档不应声称"已完成") | |
| 213 | + | |
| 214 | +这些影响文档的诚实度: | |
| 215 | + | |
| 216 | +| 主题 | 状态 | | |
| 217 | +|--------|--------| | |
| 218 | +| **`config dump` CLI**(§5.10) | `main.py` 中不存在;运维人员依赖 HTTP 或临时脚本。 | | |
| 219 | +| **隐藏的 `["en", "zh"]` 回退**(阶段 3 / 发现 D) | 仍在 [`indexer/document_transformer.py`](indexer/document_transformer.py)、[`suggestion/builder.py`](suggestion/builder.py) 等中使用。 | | |
| 220 | +| **加载器外的 `os.getenv`**(规则 1–2) | 仍在例如 [`embeddings/server.py`](embeddings/server.py)、[`reranker/server.py`](reranker/server.py)、[`api/app.py`](api/app.py) 中使用 —— 文档声称"仅加载器"将是夸大其词。 | | |
| 221 | +| **拆分 `base.yaml` / `environments/` / `tenants/*.yaml`**(阶段 5) | 未采用;仍是单一的 [`config/config.yaml`](config/config.yaml)。 | | |
| 222 | +| **遗留租户标志**(阶段 6 / 发现 H) | [`indexer/README.md`](indexer/README.md) 仍描述上游 MySQL 的 `translate_to_en` / `translate_to_zh`(这可能作为**上游模式**文档保留;需与 Python `tenant_config` 模型区分开来)。 | | |
| 223 | + | |
| 224 | +--- | |
| 225 | + | |
| 226 | +## 推荐的后续步骤(仅文档,按优先级排序) | |
| 227 | + | |
| 228 | +1. **修复管理 API 文档**(合并指南 + `-06-` 拆分):`/admin/config` 的响应格式,添加 `/admin/config/meta`,使健康检查示例与 [`HealthResponse`](api/models.py) 一致。 | |
| 229 | +2. **更新 DEVELOPER_GUIDE §5–§6 和 QUICKSTART §1.9 / §3.1**,将 **`get_app_config()` / `loader.py`** 描述为主要入口,将 **`services_config`** 描述为适配器。 | |
| 230 | +3. **添加 `config/README.md`**(简短的操作 + 开发者入口)。 | |
| 231 | +4. **在 `config-system-review-and-redesign.md` 中添加**带日期的**实现状态表**(已交付 vs 推迟的内容),使审查文档不与现实矛盾。 | |
| 232 | +5. **DEVELOPER_GUIDE §9 检查清单**:将"配置来自 `services_config`"替换为允许 `get_app_config()` 或精简适配器的语言,与 §6 保持一致。 | |
| 233 | + | |
| 234 | +如果需要,我可以在后续处理中为项目 1–3 和重新设计文档中的简短状态块应用补丁。 | |
| 235 | + | |
| 236 | + | |
| 237 | + | |
| 238 | + | |
| 239 | + | |
| 240 | + | |
| 241 | + | |
| 242 | + | |
| 243 | + | |
| 244 | + | |
| 245 | + | |
| 246 | + | |
| 247 | + | |
| 248 | + | |
| 249 | + | |
| 250 | +检索相关性优化: | |
| 251 | +原始搜索词和翻译的词,都需要有对应的主干分析 | |
| 252 | +这个主干可以根据词性简单提取名词即可 | |
| 253 | +在搜索时,原始词和主干都成对地出现,原始词和trunk_keywords一起组成一个或查询。 | |
| 254 | +有一种方案是把原始词和主干词拼接起来。但是bm25要调tf系数。 | |
| 255 | + | |
| 256 | + | |
| 257 | + | |
| 258 | + | |
| 4 | 259 | nllb-200-distilled-600M性能优化 |
| 5 | 260 | 已完成(2026-03) |
| 6 | 261 | - CTranslate2 迁移 + float16 转换 | ... | ... |
docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md
| ... | ... | @@ -38,6 +38,10 @@ |
| 38 | 38 | - `TEXT_MAX_INFLIGHT` |
| 39 | 39 | - `IMAGE_MAX_INFLIGHT` |
| 40 | 40 | - 当超过处理能力时,服务会直接返回过载错误,而不是无限排队。 |
| 41 | +- 文本与图片服务均支持 `priority` query 参数(图片不做队列插队,仅 admission 规则与文本一致): | |
| 42 | + - `priority=0`(默认):适合离线索引,仍分别受 `TEXT_MAX_INFLIGHT` / `IMAGE_MAX_INFLIGHT` admission control 约束。 | |
| 43 | + - `priority>0`(建议在线请求用 `1`):不会因 admission control 被拒绝,但仍会占用对应 text/image 的 inflight。 | |
| 44 | + - 文本服务端会优先处理高优先级文本请求;图片端不实现插队,顺序按请求到达处理即可。 | |
| 41 | 45 | - `GET /health` 会返回各自的 `limits`、`stats`、`cache_enabled` 等状态;`GET /ready` 用于就绪探针。 |
| 42 | 46 | |
| 43 | 47 | #### 7.1.1 `POST /embed/text` — 文本向量化 |
| ... | ... | @@ -59,11 +63,15 @@ |
| 59 | 63 | **完整 curl 示例**: |
| 60 | 64 | |
| 61 | 65 | ```bash |
| 62 | -curl -X POST "http://localhost:6005/embed/text?normalize=true" \ | |
| 66 | +curl -X POST "http://localhost:6005/embed/text?normalize=true&priority=1" \ | |
| 63 | 67 | -H "Content-Type: application/json" \ |
| 64 | 68 | -d '["芭比娃娃 儿童玩具", "纯棉T恤 短袖"]' |
| 65 | 69 | ``` |
| 66 | 70 | |
| 71 | +说明: | |
| 72 | +- 在线 query / 实时请求:建议显式传 `priority=1` | |
| 73 | +- 离线索引 / 批量回填:保持默认 `priority=0` 即可 | |
| 74 | + | |
| 67 | 75 | #### 7.1.2 `POST /embed/image` — 图片向量化 |
| 68 | 76 | |
| 69 | 77 | 将图片 URL 或路径转为向量,用于以图搜图。 |
| ... | ... | @@ -85,11 +93,13 @@ curl -X POST "http://localhost:6005/embed/text?normalize=true" \ |
| 85 | 93 | **完整 curl 示例**: |
| 86 | 94 | |
| 87 | 95 | ```bash |
| 88 | -curl -X POST "http://localhost:6008/embed/image?normalize=true" \ | |
| 96 | +curl -X POST "http://localhost:6008/embed/image?normalize=true&priority=1" \ | |
| 89 | 97 | -H "Content-Type: application/json" \ |
| 90 | 98 | -d '["https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg"]' |
| 91 | 99 | ``` |
| 92 | 100 | |
| 101 | +在线以图搜图等实时场景可传 `priority=1`;离线索引回填保持默认 `priority=0`。 | |
| 102 | + | |
| 93 | 103 | #### 7.1.3 `GET /health` — 健康检查 |
| 94 | 104 | |
| 95 | 105 | ```bash |
| ... | ... | @@ -118,6 +128,8 @@ curl "http://localhost:6008/ready" |
| 118 | 128 | - cache key 已区分 `normalize=true/false`,避免不同归一化策略命中同一条缓存。 |
| 119 | 129 | - 当服务端发现请求是 **full-cache-hit** 时,会直接返回,不占用模型并发槽位。 |
| 120 | 130 | - 当服务端发现超过 `TEXT_MAX_INFLIGHT` / `IMAGE_MAX_INFLIGHT` 时,会直接拒绝,而不是无限排队。 |
| 131 | +- 其中 `POST /embed/text` 的 `priority=0` 会按上面的 inflight 规则直接拒绝;`priority>0` 不会被 admission 拒绝,但仍计入 inflight,并在服务端排队时优先于 `priority=0` 请求。 | |
| 132 | +- `POST /embed/image` 的 `priority=0` 受 `IMAGE_MAX_INFLIGHT` 约束;`priority>0` 不会被 admission 拒绝,但仍计入 inflight(无插队)。 | |
| 121 | 133 | |
| 122 | 134 | #### 7.1.6 TEI 统一调优建议(主服务) |
| 123 | 135 | |
| ... | ... | @@ -252,9 +264,9 @@ curl "http://localhost:6007/health" |
| 252 | 264 | - 如果是en-zh互译、期待更高的速度,可以考虑`opus-mt-zh-en` / `opus-mt-en-zh`。(质量未详细评测,一些文章说比blib-200-600m更好,但是我看了些case感觉要差不少) |
| 253 | 265 | |
| 254 | 266 | **实时翻译选型建议**: |
| 255 | -- 在线 query 翻译如果只是 `en/zh` 互译,优先使用 `opus-mt-zh-en` 或 `opus-mt-en-zh`,它们是当前已测本地模型里延迟最低的一档。 | |
| 267 | +- 在线 query 翻译如果只是 `en/zh` 互译,优先使用 `opus-mt-zh-en` 或 `opus-mt-en-zh`。 | |
| 256 | 268 | - 如果涉及其他语言,或对质量要求高于本地轻量模型,优先考虑 `deepl`。 |
| 257 | -- `nllb-200-distilled-600m` 不建议作为在线 query 翻译默认方案;我们在 `Tesla T4` 上测到 `batch_size=1` 时,`zh -> en` p50 约 `292.54 ms`、p95 约 `624.12 ms`,`en -> zh` p50 约 `481.61 ms`、p95 约 `1171.71 ms`。 | |
| 269 | +- `nllb-200-distilled-600m` 不建议作为在线 query 翻译默认方案;我们在 `Tesla T4` 上测到 `batch_size=1` 时,根据query长短,耗时大概在70-150ms之间。 | |
| 258 | 270 | |
| 259 | 271 | **Batch Size / 调用方式建议**: |
| 260 | 272 | - 本接口支持 `text: string[]`;离线或批量索引翻译时,应尽量合并请求,让底层 backend 发挥批处理能力。 | ... | ... |
docs/搜索API速查表.md renamed to docs/搜索API对接指南-速查表.md
docs/相关性检索优化说明.md
| ... | ... | @@ -17,9 +17,9 @@ |
| 17 | 17 | 查询链路(文本相关): |
| 18 | 18 | |
| 19 | 19 | 1. `QueryParser.parse()` |
| 20 | - 输出 `detected_language`、`query_text_by_lang`、`search_langs`、`index_languages`、`source_in_index_languages`。 | |
| 20 | + 输出 `detected_language`、`query_text_by_lang`、`search_langs`、`index_languages`、`source_in_index_languages`;另输出 `contains_chinese` / `contains_english`(仅服务混写辅助召回,见 §4 末)。 | |
| 21 | 21 | 2. `ESQueryBuilder._build_advanced_text_query()` |
| 22 | - 按 `search_langs` 动态拼接 `title/brief/description/vendor/category_*` 的 `.{lang}` 字段,叠加 shared 字段(`tags`、`option*_values`)。 | |
| 22 | + 按 `search_langs` 动态拼接 `title/brief/description/vendor/category_*` 的 `.{lang}` 字段,叠加 shared 字段(`tags`、`option*_values`);若命中混写辅助条件,在同一子句内并入另一语种列(§4 末)。 | |
| 23 | 23 | 3. `build_query()` |
| 24 | 24 | 统一走文本策略,不再有布尔 AST 枝路。 |
| 25 | 25 | |
| ... | ... | @@ -40,14 +40,20 @@ |
| 40 | 40 | 3. 若第 2 步翻译部分失败或全部失败: |
| 41 | 41 | 对缺失翻译的 `index_languages` 字段,追加“原文低权重兜底”子句,避免完全丢失这些语种索引面的召回机会。 |
| 42 | 42 | |
| 43 | -### 3.2 翻译等待策略 | |
| 43 | +### 3.2 翻译与向量:并发提交与共享超时 | |
| 44 | 44 | |
| 45 | -`QueryParser.parse()` 中: | |
| 45 | +`QueryParser.parse()` 内(Stage 4–6)对**离线调用**采用线程池提交 + **一次** `concurrent.futures.wait`: | |
| 46 | 46 | |
| 47 | -- 当源语种不在 `index_languages`:使用 `translate_multi_async(...)` 并等待 futures 收敛 | |
| 48 | -- 当源语种在 `index_languages`:使用 `translate_multi(..., async_mode=True)`,优先缓存命中,未命中可后台补齐 | |
| 47 | +- **翻译**:对 `index_languages` 中除 `detected_language` 外的每个目标语种各提交一个 `translator.translate` 任务(多目标时并发执行)。 | |
| 48 | +- **查询向量**(若开启 `enable_text_embedding` 且域为 default):再提交一个 `text_encoder.encode` 任务。 | |
| 49 | +- 上述任务进入**同一** future 集合;例如租户索引为 `[zh, en]` 且检测语种**不在**索引内时,常为 **2 路翻译 + 1 路向量,共 3 个任务并发**,共用超时。 | |
| 49 | 50 | |
| 50 | -这保证了“必须翻译才能检索”的场景不会直接空跑。 | |
| 51 | +**等待预算(毫秒)**由 `detected_language` 是否属于租户 `index_languages` 决定(`query_config`): | |
| 52 | + | |
| 53 | +- **在索引内**:`translation_embedding_wait_budget_ms_source_in_index`(默认较短,如 80ms)— 主召回已能打在源语种字段,翻译/向量稍慢可容忍。 | |
| 54 | +- **不在索引内**:`translation_embedding_wait_budget_ms_source_not_in_index`(默认较长,如 200ms)— 翻译对可检索文本更关键,给足时间。 | |
| 55 | + | |
| 56 | +超时未完成的任务会被丢弃并记 warning,解析继续(可能无部分译文或无数向量)。 | |
| 51 | 57 | |
| 52 | 58 | ## 4. 统一文本召回表达式 |
| 53 | 59 | |
| ... | ... | @@ -68,8 +74,16 @@ |
| 68 | 74 | |
| 69 | 75 | 最终按 `bool.should` 组合,`minimum_should_match: 1`。 |
| 70 | 76 | |
| 77 | +> **附 — 混写辅助召回** | |
| 78 | +> 当中英(或多脚本)混写时,为略抬召回:`QueryParser` 用 `contains_chinese`(文中有汉字)、`contains_english`(分词中有长度 ≥3 的纯英文 token)打标;`ESQueryBuilder` 在某一语言的 `multi_match` 上,按规则把**另一语种**的同类字段并入同一 `fields`(受 `index_languages` 限制),并入列的 boost 为配置值再乘 **`mixed_script_merged_field_boost_scale`(默认 0.8,`ESQueryBuilder` 构造参数)**。`fallback_original_query_*` 同样适用。字段在内部以 `(path, boost)` 列表合并后再格式化为 ES 字符串。 | |
| 79 | + | |
| 71 | 80 | ## 5. 关键配置项(文本策略) |
| 72 | 81 | |
| 82 | +`query_config` 下与解析等待相关的项: | |
| 83 | + | |
| 84 | +- `translation_embedding_wait_budget_ms_source_in_index` | |
| 85 | +- `translation_embedding_wait_budget_ms_source_not_in_index` | |
| 86 | + | |
| 73 | 87 | 位于 `config/config.yaml -> query_config.text_query_strategy`: |
| 74 | 88 | |
| 75 | 89 | - `base_minimum_should_match` |
| ... | ... | @@ -137,6 +151,7 @@ |
| 137 | 151 | - `query_text_by_lang` |
| 138 | 152 | - `source_in_index_languages` |
| 139 | 153 | - `index_languages` |
| 154 | + - `contains_chinese` / `contains_english` | |
| 140 | 155 | - `ESQueryBuilder` 负责“表达式展开”: |
| 141 | 156 | - 动态字段组装 |
| 142 | 157 | - 子句权重分配 | ... | ... |
embeddings/README.md
| ... | ... | @@ -30,13 +30,13 @@ |
| 30 | 30 | - 文本服务(默认 `6005`) |
| 31 | 31 | - `POST /embed/text` |
| 32 | 32 | - 请求体:`["文本1", "文本2", ...]` |
| 33 | - - 可选 query 参数:`normalize=true|false` | |
| 33 | + - 可选 query 参数:`normalize=true|false`、`priority=0|1` | |
| 34 | 34 | - 返回:`[[...], [...], ...]` |
| 35 | 35 | - 健康接口:`GET /health`、`GET /ready` |
| 36 | 36 | - 图片服务(默认 `6008`) |
| 37 | 37 | - `POST /embed/image` |
| 38 | 38 | - 请求体:`["url或本地路径1", ...]` |
| 39 | - - 可选 query 参数:`normalize=true|false` | |
| 39 | + - 可选 query 参数:`normalize=true|false`、`priority=0|1` | |
| 40 | 40 | - 返回:`[[...], [...], ...]` |
| 41 | 41 | - 健康接口:`GET /health`、`GET /ready` |
| 42 | 42 | |
| ... | ... | @@ -61,6 +61,11 @@ |
| 61 | 61 | - 图片服务可以配置得比文本更严格。 |
| 62 | 62 | - 请求若是 full-cache-hit,会在服务端直接返回,不占用模型并发槽位。 |
| 63 | 63 | - 超过处理能力时直接拒绝,比无限排队更稳定。 |
| 64 | +- 文本服务支持 `priority`: | |
| 65 | + - `priority=0`(默认,适合离线索引)仍受 `TEXT_MAX_INFLIGHT` 限制,超限直接返回 overload。 | |
| 66 | + - `priority>0`(建议在线 query 用 `1`)不会因 admission control 被拒绝,但仍会计入 inflight。 | |
| 67 | + - 文本服务内部使用双队列调度,处理时会优先消费高优先级请求,避免在线请求长期排在离线批量任务后面。 | |
| 68 | +- 图片服务同样支持 `priority`(语义与文本一致,按 `IMAGE_MAX_INFLIGHT` 计数;不做队列插队,仅 admission 规则不同)。 | |
| 64 | 69 | |
| 65 | 70 | ### 图片向量:clip-as-service(推荐) |
| 66 | 71 | |
| ... | ... | @@ -86,6 +91,14 @@ |
| 86 | 91 | - `CLIP_AS_SERVICE_MODEL_NAME=CN-CLIP/ViT-L-14` |
| 87 | 92 | - `scripts/start_cnclip_service.sh` 默认会读取同一个 `CLIP_AS_SERVICE_MODEL_NAME`,也可用 `CNCLIP_MODEL_NAME` 或 `--model-name` 临时覆盖 |
| 88 | 93 | |
| 94 | +### 性能与压测(沿用仓库脚本) | |
| 95 | + | |
| 96 | +- 接口级压测(与 `perf_reports/2026-03-12/matrix_report/` 等方法一致):`scripts/perf_api_benchmark.py` | |
| 97 | + - 示例:`python scripts/perf_api_benchmark.py --scenario embed_text --duration 30 --concurrency 20` | |
| 98 | + - 文本/图片向量可带 `priority`(与线上 admission 语义一致):`--embed-text-priority 1`、`--embed-image-priority 1` | |
| 99 | + - 自定义请求模板:`--cases-file scripts/perf_cases.json.example` | |
| 100 | +- 历史矩阵结果与说明见 `perf_reports/2026-03-12/matrix_report/summary.md`。 | |
| 101 | + | |
| 89 | 102 | ### 启动服务 |
| 90 | 103 | |
| 91 | 104 | 使用仓库脚本启动: | ... | ... |
embeddings/image_encoder.py
| ... | ... | @@ -35,7 +35,12 @@ class CLIPImageEncoder: |
| 35 | 35 | namespace="image", |
| 36 | 36 | ) |
| 37 | 37 | |
| 38 | - def _call_service(self, request_data: List[str], normalize_embeddings: bool = True) -> List[Any]: | |
| 38 | + def _call_service( | |
| 39 | + self, | |
| 40 | + request_data: List[str], | |
| 41 | + normalize_embeddings: bool = True, | |
| 42 | + priority: int = 0, | |
| 43 | + ) -> List[Any]: | |
| 39 | 44 | """ |
| 40 | 45 | Call the embedding service API. |
| 41 | 46 | |
| ... | ... | @@ -48,7 +53,10 @@ class CLIPImageEncoder: |
| 48 | 53 | try: |
| 49 | 54 | response = requests.post( |
| 50 | 55 | self.endpoint, |
| 51 | - params={"normalize": "true" if normalize_embeddings else "false"}, | |
| 56 | + params={ | |
| 57 | + "normalize": "true" if normalize_embeddings else "false", | |
| 58 | + "priority": max(0, int(priority)), | |
| 59 | + }, | |
| 52 | 60 | json=request_data, |
| 53 | 61 | timeout=60 |
| 54 | 62 | ) |
| ... | ... | @@ -66,7 +74,12 @@ class CLIPImageEncoder: |
| 66 | 74 | """ |
| 67 | 75 | raise NotImplementedError("encode_image with PIL Image is not supported by embedding service") |
| 68 | 76 | |
| 69 | - def encode_image_from_url(self, url: str, normalize_embeddings: bool = True) -> np.ndarray: | |
| 77 | + def encode_image_from_url( | |
| 78 | + self, | |
| 79 | + url: str, | |
| 80 | + normalize_embeddings: bool = True, | |
| 81 | + priority: int = 0, | |
| 82 | + ) -> np.ndarray: | |
| 70 | 83 | """ |
| 71 | 84 | Generate image embedding via network service using URL. |
| 72 | 85 | |
| ... | ... | @@ -81,7 +94,11 @@ class CLIPImageEncoder: |
| 81 | 94 | if cached is not None: |
| 82 | 95 | return cached |
| 83 | 96 | |
| 84 | - response_data = self._call_service([url], normalize_embeddings=normalize_embeddings) | |
| 97 | + response_data = self._call_service( | |
| 98 | + [url], | |
| 99 | + normalize_embeddings=normalize_embeddings, | |
| 100 | + priority=priority, | |
| 101 | + ) | |
| 85 | 102 | if not response_data or len(response_data) != 1 or response_data[0] is None: |
| 86 | 103 | raise RuntimeError(f"No image embedding returned for URL: {url}") |
| 87 | 104 | vec = np.array(response_data[0], dtype=np.float32) |
| ... | ... | @@ -95,6 +112,7 @@ class CLIPImageEncoder: |
| 95 | 112 | images: List[Union[str, Image.Image]], |
| 96 | 113 | batch_size: int = 8, |
| 97 | 114 | normalize_embeddings: bool = True, |
| 115 | + priority: int = 0, | |
| 98 | 116 | ) -> List[np.ndarray]: |
| 99 | 117 | """ |
| 100 | 118 | Encode a batch of images efficiently via network service. |
| ... | ... | @@ -129,7 +147,11 @@ class CLIPImageEncoder: |
| 129 | 147 | |
| 130 | 148 | for i in range(0, len(pending_urls), batch_size): |
| 131 | 149 | batch_urls = pending_urls[i : i + batch_size] |
| 132 | - response_data = self._call_service(batch_urls, normalize_embeddings=normalize_embeddings) | |
| 150 | + response_data = self._call_service( | |
| 151 | + batch_urls, | |
| 152 | + normalize_embeddings=normalize_embeddings, | |
| 153 | + priority=priority, | |
| 154 | + ) | |
| 133 | 155 | if not response_data or len(response_data) != len(batch_urls): |
| 134 | 156 | raise RuntimeError( |
| 135 | 157 | f"Image embedding response length mismatch: expected {len(batch_urls)}, " |
| ... | ... | @@ -153,6 +175,7 @@ class CLIPImageEncoder: |
| 153 | 175 | urls: List[str], |
| 154 | 176 | batch_size: Optional[int] = None, |
| 155 | 177 | normalize_embeddings: bool = True, |
| 178 | + priority: int = 0, | |
| 156 | 179 | ) -> List[np.ndarray]: |
| 157 | 180 | """ |
| 158 | 181 | 与 ClipImageModel / ClipAsServiceImageEncoder 一致的接口,供索引器 document_transformer 调用。 |
| ... | ... | @@ -168,4 +191,5 @@ class CLIPImageEncoder: |
| 168 | 191 | urls, |
| 169 | 192 | batch_size=batch_size or 8, |
| 170 | 193 | normalize_embeddings=normalize_embeddings, |
| 194 | + priority=priority, | |
| 171 | 195 | ) | ... | ... |
embeddings/server.py
| ... | ... | @@ -129,7 +129,7 @@ _TEXT_REQUEST_TIMEOUT_SEC = max( |
| 129 | 129 | 1.0, float(os.getenv("TEXT_REQUEST_TIMEOUT_SEC", "30")) |
| 130 | 130 | ) |
| 131 | 131 | _TEXT_MAX_INFLIGHT = max(1, int(os.getenv("TEXT_MAX_INFLIGHT", "32"))) |
| 132 | -_IMAGE_MAX_INFLIGHT = max(1, int(os.getenv("IMAGE_MAX_INFLIGHT", "1"))) | |
| 132 | +_IMAGE_MAX_INFLIGHT = max(1, int(os.getenv("IMAGE_MAX_INFLIGHT", "20"))) | |
| 133 | 133 | _OVERLOAD_STATUS_CODE = int(os.getenv("EMBEDDING_OVERLOAD_STATUS_CODE", "503")) |
| 134 | 134 | _LOG_PREVIEW_COUNT = max(1, int(os.getenv("EMBEDDING_LOG_PREVIEW_COUNT", "3"))) |
| 135 | 135 | _LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("EMBEDDING_LOG_TEXT_PREVIEW_CHARS", "120"))) |
| ... | ... | @@ -206,23 +206,24 @@ class _InflightLimiter: |
| 206 | 206 | def __init__(self, name: str, limit: int): |
| 207 | 207 | self.name = name |
| 208 | 208 | self.limit = max(1, int(limit)) |
| 209 | - self._sem = threading.BoundedSemaphore(self.limit) | |
| 210 | 209 | self._lock = threading.Lock() |
| 211 | 210 | self._active = 0 |
| 212 | 211 | self._rejected = 0 |
| 213 | 212 | self._completed = 0 |
| 214 | 213 | self._failed = 0 |
| 215 | 214 | self._max_active = 0 |
| 215 | + self._priority_bypass_total = 0 | |
| 216 | 216 | |
| 217 | - def try_acquire(self) -> tuple[bool, int]: | |
| 218 | - if not self._sem.acquire(blocking=False): | |
| 219 | - with self._lock: | |
| 217 | + def try_acquire(self, *, bypass_limit: bool = False) -> tuple[bool, int]: | |
| 218 | + with self._lock: | |
| 219 | + if not bypass_limit and self._active >= self.limit: | |
| 220 | 220 | self._rejected += 1 |
| 221 | 221 | active = self._active |
| 222 | - return False, active | |
| 223 | - with self._lock: | |
| 222 | + return False, active | |
| 224 | 223 | self._active += 1 |
| 225 | 224 | self._max_active = max(self._max_active, self._active) |
| 225 | + if bypass_limit: | |
| 226 | + self._priority_bypass_total += 1 | |
| 226 | 227 | active = self._active |
| 227 | 228 | return True, active |
| 228 | 229 | |
| ... | ... | @@ -234,7 +235,6 @@ class _InflightLimiter: |
| 234 | 235 | else: |
| 235 | 236 | self._failed += 1 |
| 236 | 237 | active = self._active |
| 237 | - self._sem.release() | |
| 238 | 238 | return active |
| 239 | 239 | |
| 240 | 240 | def snapshot(self) -> Dict[str, int]: |
| ... | ... | @@ -246,9 +246,157 @@ class _InflightLimiter: |
| 246 | 246 | "completed_total": self._completed, |
| 247 | 247 | "failed_total": self._failed, |
| 248 | 248 | "max_active": self._max_active, |
| 249 | + "priority_bypass_total": self._priority_bypass_total, | |
| 249 | 250 | } |
| 250 | 251 | |
| 251 | 252 | |
| 253 | +def _effective_priority(priority: int) -> int: | |
| 254 | + return 1 if int(priority) > 0 else 0 | |
| 255 | + | |
| 256 | + | |
| 257 | +def _priority_label(priority: int) -> str: | |
| 258 | + return "high" if _effective_priority(priority) > 0 else "normal" | |
| 259 | + | |
| 260 | + | |
| 261 | +@dataclass | |
| 262 | +class _TextDispatchTask: | |
| 263 | + normalized: List[str] | |
| 264 | + effective_normalize: bool | |
| 265 | + request_id: str | |
| 266 | + priority: int | |
| 267 | + created_at: float | |
| 268 | + done: threading.Event | |
| 269 | + result: Optional[_EmbedResult] = None | |
| 270 | + error: Optional[Exception] = None | |
| 271 | + | |
| 272 | + | |
| 273 | +_text_dispatch_high_queue: "deque[_TextDispatchTask]" = deque() | |
| 274 | +_text_dispatch_normal_queue: "deque[_TextDispatchTask]" = deque() | |
| 275 | +_text_dispatch_cv = threading.Condition() | |
| 276 | +_text_dispatch_workers: List[threading.Thread] = [] | |
| 277 | +_text_dispatch_worker_stop = False | |
| 278 | +_text_dispatch_worker_count = 0 | |
| 279 | + | |
| 280 | + | |
| 281 | +def _text_dispatch_queue_depth() -> Dict[str, int]: | |
| 282 | + with _text_dispatch_cv: | |
| 283 | + return { | |
| 284 | + "high": len(_text_dispatch_high_queue), | |
| 285 | + "normal": len(_text_dispatch_normal_queue), | |
| 286 | + "total": len(_text_dispatch_high_queue) + len(_text_dispatch_normal_queue), | |
| 287 | + } | |
| 288 | + | |
| 289 | + | |
| 290 | +def _pop_text_dispatch_task_locked() -> Optional["_TextDispatchTask"]: | |
| 291 | + if _text_dispatch_high_queue: | |
| 292 | + return _text_dispatch_high_queue.popleft() | |
| 293 | + if _text_dispatch_normal_queue: | |
| 294 | + return _text_dispatch_normal_queue.popleft() | |
| 295 | + return None | |
| 296 | + | |
| 297 | + | |
| 298 | +def _start_text_dispatch_workers() -> None: | |
| 299 | + global _text_dispatch_workers, _text_dispatch_worker_stop, _text_dispatch_worker_count | |
| 300 | + if _text_model is None: | |
| 301 | + return | |
| 302 | + target_worker_count = 1 if _text_backend_name == "local_st" else _TEXT_MAX_INFLIGHT | |
| 303 | + alive_workers = [worker for worker in _text_dispatch_workers if worker.is_alive()] | |
| 304 | + if len(alive_workers) == target_worker_count: | |
| 305 | + _text_dispatch_workers = alive_workers | |
| 306 | + _text_dispatch_worker_count = target_worker_count | |
| 307 | + return | |
| 308 | + _text_dispatch_worker_stop = False | |
| 309 | + _text_dispatch_worker_count = target_worker_count | |
| 310 | + _text_dispatch_workers = [] | |
| 311 | + for idx in range(target_worker_count): | |
| 312 | + worker = threading.Thread( | |
| 313 | + target=_text_dispatch_worker_loop, | |
| 314 | + args=(idx,), | |
| 315 | + name=f"embed-text-dispatch-{idx}", | |
| 316 | + daemon=True, | |
| 317 | + ) | |
| 318 | + worker.start() | |
| 319 | + _text_dispatch_workers.append(worker) | |
| 320 | + logger.info( | |
| 321 | + "Started text dispatch workers | backend=%s workers=%d", | |
| 322 | + _text_backend_name, | |
| 323 | + target_worker_count, | |
| 324 | + ) | |
| 325 | + | |
| 326 | + | |
| 327 | +def _stop_text_dispatch_workers() -> None: | |
| 328 | + global _text_dispatch_worker_stop | |
| 329 | + with _text_dispatch_cv: | |
| 330 | + _text_dispatch_worker_stop = True | |
| 331 | + _text_dispatch_cv.notify_all() | |
| 332 | + | |
| 333 | + | |
| 334 | +def _text_dispatch_worker_loop(worker_idx: int) -> None: | |
| 335 | + while True: | |
| 336 | + with _text_dispatch_cv: | |
| 337 | + while ( | |
| 338 | + not _text_dispatch_high_queue | |
| 339 | + and not _text_dispatch_normal_queue | |
| 340 | + and not _text_dispatch_worker_stop | |
| 341 | + ): | |
| 342 | + _text_dispatch_cv.wait() | |
| 343 | + if _text_dispatch_worker_stop: | |
| 344 | + return | |
| 345 | + task = _pop_text_dispatch_task_locked() | |
| 346 | + if task is None: | |
| 347 | + continue | |
| 348 | + try: | |
| 349 | + queue_wait_ms = (time.perf_counter() - task.created_at) * 1000.0 | |
| 350 | + logger.info( | |
| 351 | + "text dispatch start | worker=%d priority=%s inputs=%d queue_wait_ms=%.2f", | |
| 352 | + worker_idx, | |
| 353 | + _priority_label(task.priority), | |
| 354 | + len(task.normalized), | |
| 355 | + queue_wait_ms, | |
| 356 | + extra=_request_log_extra(task.request_id), | |
| 357 | + ) | |
| 358 | + task.result = _embed_text_impl( | |
| 359 | + task.normalized, | |
| 360 | + task.effective_normalize, | |
| 361 | + task.request_id, | |
| 362 | + task.priority, | |
| 363 | + ) | |
| 364 | + except Exception as exc: | |
| 365 | + task.error = exc | |
| 366 | + finally: | |
| 367 | + task.done.set() | |
| 368 | + | |
| 369 | + | |
| 370 | +def _submit_text_dispatch_and_wait( | |
| 371 | + normalized: List[str], | |
| 372 | + effective_normalize: bool, | |
| 373 | + request_id: str, | |
| 374 | + priority: int, | |
| 375 | +) -> _EmbedResult: | |
| 376 | + if not any(worker.is_alive() for worker in _text_dispatch_workers): | |
| 377 | + _start_text_dispatch_workers() | |
| 378 | + task = _TextDispatchTask( | |
| 379 | + normalized=normalized, | |
| 380 | + effective_normalize=effective_normalize, | |
| 381 | + request_id=request_id, | |
| 382 | + priority=_effective_priority(priority), | |
| 383 | + created_at=time.perf_counter(), | |
| 384 | + done=threading.Event(), | |
| 385 | + ) | |
| 386 | + with _text_dispatch_cv: | |
| 387 | + if task.priority > 0: | |
| 388 | + _text_dispatch_high_queue.append(task) | |
| 389 | + else: | |
| 390 | + _text_dispatch_normal_queue.append(task) | |
| 391 | + _text_dispatch_cv.notify() | |
| 392 | + task.done.wait() | |
| 393 | + if task.error is not None: | |
| 394 | + raise task.error | |
| 395 | + if task.result is None: | |
| 396 | + raise RuntimeError("Text dispatch worker returned empty result") | |
| 397 | + return task.result | |
| 398 | + | |
| 399 | + | |
| 252 | 400 | _text_request_limiter = _InflightLimiter(name="text", limit=_TEXT_MAX_INFLIGHT) |
| 253 | 401 | _image_request_limiter = _InflightLimiter(name="image", limit=_IMAGE_MAX_INFLIGHT) |
| 254 | 402 | _text_stats = _EndpointStats(name="text") |
| ... | ... | @@ -261,6 +409,7 @@ _image_cache = RedisEmbeddingCache(key_prefix=_CACHE_PREFIX, namespace="image") |
| 261 | 409 | class _SingleTextTask: |
| 262 | 410 | text: str |
| 263 | 411 | normalize: bool |
| 412 | + priority: int | |
| 264 | 413 | created_at: float |
| 265 | 414 | request_id: str |
| 266 | 415 | done: threading.Event |
| ... | ... | @@ -268,12 +417,30 @@ class _SingleTextTask: |
| 268 | 417 | error: Optional[Exception] = None |
| 269 | 418 | |
| 270 | 419 | |
| 271 | -_text_single_queue: "deque[_SingleTextTask]" = deque() | |
| 420 | +_text_single_high_queue: "deque[_SingleTextTask]" = deque() | |
| 421 | +_text_single_normal_queue: "deque[_SingleTextTask]" = deque() | |
| 272 | 422 | _text_single_queue_cv = threading.Condition() |
| 273 | 423 | _text_batch_worker: Optional[threading.Thread] = None |
| 274 | 424 | _text_batch_worker_stop = False |
| 275 | 425 | |
| 276 | 426 | |
| 427 | +def _text_microbatch_queue_depth() -> Dict[str, int]: | |
| 428 | + with _text_single_queue_cv: | |
| 429 | + return { | |
| 430 | + "high": len(_text_single_high_queue), | |
| 431 | + "normal": len(_text_single_normal_queue), | |
| 432 | + "total": len(_text_single_high_queue) + len(_text_single_normal_queue), | |
| 433 | + } | |
| 434 | + | |
| 435 | + | |
| 436 | +def _pop_single_text_task_locked() -> Optional["_SingleTextTask"]: | |
| 437 | + if _text_single_high_queue: | |
| 438 | + return _text_single_high_queue.popleft() | |
| 439 | + if _text_single_normal_queue: | |
| 440 | + return _text_single_normal_queue.popleft() | |
| 441 | + return None | |
| 442 | + | |
| 443 | + | |
| 277 | 444 | def _compact_preview(text: str, max_chars: int) -> str: |
| 278 | 445 | compact = " ".join((text or "").split()) |
| 279 | 446 | if len(compact) <= max_chars: |
| ... | ... | @@ -356,30 +523,41 @@ def _text_batch_worker_loop() -> None: |
| 356 | 523 | max_batch = max(1, int(CONFIG.TEXT_BATCH_SIZE)) |
| 357 | 524 | while True: |
| 358 | 525 | with _text_single_queue_cv: |
| 359 | - while not _text_single_queue and not _text_batch_worker_stop: | |
| 526 | + while ( | |
| 527 | + not _text_single_high_queue | |
| 528 | + and not _text_single_normal_queue | |
| 529 | + and not _text_batch_worker_stop | |
| 530 | + ): | |
| 360 | 531 | _text_single_queue_cv.wait() |
| 361 | 532 | if _text_batch_worker_stop: |
| 362 | 533 | return |
| 363 | 534 | |
| 364 | - batch: List[_SingleTextTask] = [_text_single_queue.popleft()] | |
| 535 | + first_task = _pop_single_text_task_locked() | |
| 536 | + if first_task is None: | |
| 537 | + continue | |
| 538 | + batch: List[_SingleTextTask] = [first_task] | |
| 365 | 539 | deadline = time.perf_counter() + _TEXT_MICROBATCH_WINDOW_SEC |
| 366 | 540 | |
| 367 | 541 | while len(batch) < max_batch: |
| 368 | 542 | remaining = deadline - time.perf_counter() |
| 369 | 543 | if remaining <= 0: |
| 370 | 544 | break |
| 371 | - if not _text_single_queue: | |
| 545 | + if not _text_single_high_queue and not _text_single_normal_queue: | |
| 372 | 546 | _text_single_queue_cv.wait(timeout=remaining) |
| 373 | 547 | continue |
| 374 | - while _text_single_queue and len(batch) < max_batch: | |
| 375 | - batch.append(_text_single_queue.popleft()) | |
| 548 | + while len(batch) < max_batch: | |
| 549 | + next_task = _pop_single_text_task_locked() | |
| 550 | + if next_task is None: | |
| 551 | + break | |
| 552 | + batch.append(next_task) | |
| 376 | 553 | |
| 377 | 554 | try: |
| 378 | 555 | queue_wait_ms = [(time.perf_counter() - task.created_at) * 1000.0 for task in batch] |
| 379 | 556 | reqids = [task.request_id for task in batch] |
| 380 | 557 | logger.info( |
| 381 | - "text microbatch dispatch | size=%d queue_wait_ms_min=%.2f queue_wait_ms_max=%.2f reqids=%s preview=%s", | |
| 558 | + "text microbatch dispatch | size=%d priority=%s queue_wait_ms_min=%.2f queue_wait_ms_max=%.2f reqids=%s preview=%s", | |
| 382 | 559 | len(batch), |
| 560 | + _priority_label(max(task.priority for task in batch)), | |
| 383 | 561 | min(queue_wait_ms) if queue_wait_ms else 0.0, |
| 384 | 562 | max(queue_wait_ms) if queue_wait_ms else 0.0, |
| 385 | 563 | reqids, |
| ... | ... | @@ -423,22 +601,32 @@ def _text_batch_worker_loop() -> None: |
| 423 | 601 | task.done.set() |
| 424 | 602 | |
| 425 | 603 | |
| 426 | -def _encode_single_text_with_microbatch(text: str, normalize: bool, request_id: str) -> List[float]: | |
| 604 | +def _encode_single_text_with_microbatch( | |
| 605 | + text: str, | |
| 606 | + normalize: bool, | |
| 607 | + request_id: str, | |
| 608 | + priority: int, | |
| 609 | +) -> List[float]: | |
| 427 | 610 | task = _SingleTextTask( |
| 428 | 611 | text=text, |
| 429 | 612 | normalize=normalize, |
| 613 | + priority=_effective_priority(priority), | |
| 430 | 614 | created_at=time.perf_counter(), |
| 431 | 615 | request_id=request_id, |
| 432 | 616 | done=threading.Event(), |
| 433 | 617 | ) |
| 434 | 618 | with _text_single_queue_cv: |
| 435 | - _text_single_queue.append(task) | |
| 619 | + if task.priority > 0: | |
| 620 | + _text_single_high_queue.append(task) | |
| 621 | + else: | |
| 622 | + _text_single_normal_queue.append(task) | |
| 436 | 623 | _text_single_queue_cv.notify() |
| 437 | 624 | |
| 438 | 625 | if not task.done.wait(timeout=_TEXT_REQUEST_TIMEOUT_SEC): |
| 439 | 626 | with _text_single_queue_cv: |
| 627 | + queue = _text_single_high_queue if task.priority > 0 else _text_single_normal_queue | |
| 440 | 628 | try: |
| 441 | - _text_single_queue.remove(task) | |
| 629 | + queue.remove(task) | |
| 442 | 630 | except ValueError: |
| 443 | 631 | pass |
| 444 | 632 | raise RuntimeError( |
| ... | ... | @@ -489,6 +677,7 @@ def load_models(): |
| 489 | 677 | f"Unsupported embedding backend: {backend_name}. " |
| 490 | 678 | "Supported: tei, local_st" |
| 491 | 679 | ) |
| 680 | + _start_text_dispatch_workers() | |
| 492 | 681 | logger.info("Text backend loaded successfully: %s", _text_backend_name) |
| 493 | 682 | except Exception as e: |
| 494 | 683 | logger.error("Failed to load text model: %s", e, exc_info=True) |
| ... | ... | @@ -532,6 +721,7 @@ def load_models(): |
| 532 | 721 | @app.on_event("shutdown") |
| 533 | 722 | def stop_workers() -> None: |
| 534 | 723 | _stop_text_batch_worker() |
| 724 | + _stop_text_dispatch_workers() | |
| 535 | 725 | |
| 536 | 726 | |
| 537 | 727 | def _normalize_vector(vec: np.ndarray) -> np.ndarray: |
| ... | ... | @@ -602,6 +792,8 @@ def _try_full_image_cache_hit( |
| 602 | 792 | def health() -> Dict[str, Any]: |
| 603 | 793 | """Health check endpoint. Returns status and current throttling stats.""" |
| 604 | 794 | ready = (not open_text_model or _text_model is not None) and (not open_image_model or _image_model is not None) |
| 795 | + text_dispatch_depth = _text_dispatch_queue_depth() | |
| 796 | + text_microbatch_depth = _text_microbatch_queue_depth() | |
| 605 | 797 | return { |
| 606 | 798 | "status": "ok" if ready else "degraded", |
| 607 | 799 | "service_kind": _SERVICE_KIND, |
| ... | ... | @@ -620,9 +812,18 @@ def health() -> Dict[str, Any]: |
| 620 | 812 | "text": _text_stats.snapshot(), |
| 621 | 813 | "image": _image_stats.snapshot(), |
| 622 | 814 | }, |
| 815 | + "text_dispatch": { | |
| 816 | + "workers": _text_dispatch_worker_count, | |
| 817 | + "workers_alive": sum(1 for worker in _text_dispatch_workers if worker.is_alive()), | |
| 818 | + "queue_depth": text_dispatch_depth["total"], | |
| 819 | + "queue_depth_high": text_dispatch_depth["high"], | |
| 820 | + "queue_depth_normal": text_dispatch_depth["normal"], | |
| 821 | + }, | |
| 623 | 822 | "text_microbatch": { |
| 624 | 823 | "window_ms": round(_TEXT_MICROBATCH_WINDOW_SEC * 1000.0, 3), |
| 625 | - "queue_depth": len(_text_single_queue), | |
| 824 | + "queue_depth": text_microbatch_depth["total"], | |
| 825 | + "queue_depth_high": text_microbatch_depth["high"], | |
| 826 | + "queue_depth_normal": text_microbatch_depth["normal"], | |
| 626 | 827 | "worker_alive": bool(_text_batch_worker is not None and _text_batch_worker.is_alive()), |
| 627 | 828 | "request_timeout_sec": _TEXT_REQUEST_TIMEOUT_SEC, |
| 628 | 829 | }, |
| ... | ... | @@ -654,6 +855,7 @@ def _embed_text_impl( |
| 654 | 855 | normalized: List[str], |
| 655 | 856 | effective_normalize: bool, |
| 656 | 857 | request_id: str, |
| 858 | + priority: int = 0, | |
| 657 | 859 | ) -> _EmbedResult: |
| 658 | 860 | if _text_model is None: |
| 659 | 861 | raise RuntimeError("Text model not loaded") |
| ... | ... | @@ -703,6 +905,7 @@ def _embed_text_impl( |
| 703 | 905 | missing_texts[0], |
| 704 | 906 | normalize=effective_normalize, |
| 705 | 907 | request_id=request_id, |
| 908 | + priority=priority, | |
| 706 | 909 | ) |
| 707 | 910 | ] |
| 708 | 911 | mode = "microbatch-single" |
| ... | ... | @@ -777,6 +980,7 @@ async def embed_text( |
| 777 | 980 | http_request: Request, |
| 778 | 981 | response: Response, |
| 779 | 982 | normalize: Optional[bool] = None, |
| 983 | + priority: int = 0, | |
| 780 | 984 | ) -> List[Optional[List[float]]]: |
| 781 | 985 | if _text_model is None: |
| 782 | 986 | raise HTTPException(status_code=503, detail="Text embedding model not loaded in this service") |
| ... | ... | @@ -784,6 +988,9 @@ async def embed_text( |
| 784 | 988 | request_id = _resolve_request_id(http_request) |
| 785 | 989 | response.headers["X-Request-ID"] = request_id |
| 786 | 990 | |
| 991 | + if priority < 0: | |
| 992 | + raise HTTPException(status_code=400, detail="priority must be >= 0") | |
| 993 | + effective_priority = _effective_priority(priority) | |
| 787 | 994 | effective_normalize = bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS) if normalize is None else bool(normalize) |
| 788 | 995 | normalized: List[str] = [] |
| 789 | 996 | for i, t in enumerate(texts): |
| ... | ... | @@ -806,8 +1013,9 @@ async def embed_text( |
| 806 | 1013 | cache_misses=0, |
| 807 | 1014 | ) |
| 808 | 1015 | logger.info( |
| 809 | - "embed_text response | backend=%s mode=cache-only inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=0 first_vector=%s latency_ms=%.2f", | |
| 1016 | + "embed_text response | backend=%s mode=cache-only priority=%s inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=0 first_vector=%s latency_ms=%.2f", | |
| 810 | 1017 | _text_backend_name, |
| 1018 | + _priority_label(effective_priority), | |
| 811 | 1019 | len(normalized), |
| 812 | 1020 | effective_normalize, |
| 813 | 1021 | len(cache_only.vectors[0]) if cache_only.vectors and cache_only.vectors[0] is not None else 0, |
| ... | ... | @@ -818,13 +1026,14 @@ async def embed_text( |
| 818 | 1026 | ) |
| 819 | 1027 | return cache_only.vectors |
| 820 | 1028 | |
| 821 | - accepted, active = _text_request_limiter.try_acquire() | |
| 1029 | + accepted, active = _text_request_limiter.try_acquire(bypass_limit=effective_priority > 0) | |
| 822 | 1030 | if not accepted: |
| 823 | 1031 | _text_stats.record_rejected() |
| 824 | 1032 | logger.warning( |
| 825 | - "embed_text rejected | client=%s backend=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", | |
| 1033 | + "embed_text rejected | client=%s backend=%s priority=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", | |
| 826 | 1034 | _request_client(http_request), |
| 827 | 1035 | _text_backend_name, |
| 1036 | + _priority_label(effective_priority), | |
| 828 | 1037 | len(normalized), |
| 829 | 1038 | effective_normalize, |
| 830 | 1039 | active, |
| ... | ... | @@ -834,7 +1043,10 @@ async def embed_text( |
| 834 | 1043 | ) |
| 835 | 1044 | raise HTTPException( |
| 836 | 1045 | status_code=_OVERLOAD_STATUS_CODE, |
| 837 | - detail=f"Text embedding service busy: active={active}, limit={_TEXT_MAX_INFLIGHT}", | |
| 1046 | + detail=( | |
| 1047 | + "Text embedding service busy for priority=0 requests: " | |
| 1048 | + f"active={active}, limit={_TEXT_MAX_INFLIGHT}" | |
| 1049 | + ), | |
| 838 | 1050 | ) |
| 839 | 1051 | |
| 840 | 1052 | request_started = time.perf_counter() |
| ... | ... | @@ -844,9 +1056,10 @@ async def embed_text( |
| 844 | 1056 | cache_misses = 0 |
| 845 | 1057 | try: |
| 846 | 1058 | logger.info( |
| 847 | - "embed_text request | client=%s backend=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", | |
| 1059 | + "embed_text request | client=%s backend=%s priority=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", | |
| 848 | 1060 | _request_client(http_request), |
| 849 | 1061 | _text_backend_name, |
| 1062 | + _priority_label(effective_priority), | |
| 850 | 1063 | len(normalized), |
| 851 | 1064 | effective_normalize, |
| 852 | 1065 | active, |
| ... | ... | @@ -855,13 +1068,20 @@ async def embed_text( |
| 855 | 1068 | extra=_request_log_extra(request_id), |
| 856 | 1069 | ) |
| 857 | 1070 | verbose_logger.info( |
| 858 | - "embed_text detail | payload=%s normalize=%s backend=%s", | |
| 1071 | + "embed_text detail | payload=%s normalize=%s backend=%s priority=%s", | |
| 859 | 1072 | normalized, |
| 860 | 1073 | effective_normalize, |
| 861 | 1074 | _text_backend_name, |
| 1075 | + _priority_label(effective_priority), | |
| 862 | 1076 | extra=_request_log_extra(request_id), |
| 863 | 1077 | ) |
| 864 | - result = await run_in_threadpool(_embed_text_impl, normalized, effective_normalize, request_id) | |
| 1078 | + result = await run_in_threadpool( | |
| 1079 | + _submit_text_dispatch_and_wait, | |
| 1080 | + normalized, | |
| 1081 | + effective_normalize, | |
| 1082 | + request_id, | |
| 1083 | + effective_priority, | |
| 1084 | + ) | |
| 865 | 1085 | success = True |
| 866 | 1086 | backend_elapsed_ms = result.backend_elapsed_ms |
| 867 | 1087 | cache_hits = result.cache_hits |
| ... | ... | @@ -875,9 +1095,10 @@ async def embed_text( |
| 875 | 1095 | cache_misses=cache_misses, |
| 876 | 1096 | ) |
| 877 | 1097 | logger.info( |
| 878 | - "embed_text response | backend=%s mode=%s inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=%d first_vector=%s latency_ms=%.2f", | |
| 1098 | + "embed_text response | backend=%s mode=%s priority=%s inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=%d first_vector=%s latency_ms=%.2f", | |
| 879 | 1099 | _text_backend_name, |
| 880 | 1100 | result.mode, |
| 1101 | + _priority_label(effective_priority), | |
| 881 | 1102 | len(normalized), |
| 882 | 1103 | effective_normalize, |
| 883 | 1104 | len(result.vectors[0]) if result.vectors and result.vectors[0] is not None else 0, |
| ... | ... | @@ -888,8 +1109,9 @@ async def embed_text( |
| 888 | 1109 | extra=_request_log_extra(request_id), |
| 889 | 1110 | ) |
| 890 | 1111 | verbose_logger.info( |
| 891 | - "embed_text result detail | count=%d first_vector=%s latency_ms=%.2f", | |
| 1112 | + "embed_text result detail | count=%d priority=%s first_vector=%s latency_ms=%.2f", | |
| 892 | 1113 | len(result.vectors), |
| 1114 | + _priority_label(effective_priority), | |
| 893 | 1115 | result.vectors[0][: _VECTOR_PREVIEW_DIMS] |
| 894 | 1116 | if result.vectors and result.vectors[0] is not None |
| 895 | 1117 | else [], |
| ... | ... | @@ -909,8 +1131,9 @@ async def embed_text( |
| 909 | 1131 | cache_misses=cache_misses, |
| 910 | 1132 | ) |
| 911 | 1133 | logger.error( |
| 912 | - "embed_text failed | backend=%s inputs=%d normalize=%s latency_ms=%.2f error=%s", | |
| 1134 | + "embed_text failed | backend=%s priority=%s inputs=%d normalize=%s latency_ms=%.2f error=%s", | |
| 913 | 1135 | _text_backend_name, |
| 1136 | + _priority_label(effective_priority), | |
| 914 | 1137 | len(normalized), |
| 915 | 1138 | effective_normalize, |
| 916 | 1139 | latency_ms, |
| ... | ... | @@ -922,8 +1145,9 @@ async def embed_text( |
| 922 | 1145 | finally: |
| 923 | 1146 | remaining = _text_request_limiter.release(success=success) |
| 924 | 1147 | logger.info( |
| 925 | - "embed_text finalize | success=%s active_after=%d", | |
| 1148 | + "embed_text finalize | success=%s priority=%s active_after=%d", | |
| 926 | 1149 | success, |
| 1150 | + _priority_label(effective_priority), | |
| 927 | 1151 | remaining, |
| 928 | 1152 | extra=_request_log_extra(request_id), |
| 929 | 1153 | ) |
| ... | ... | @@ -1019,6 +1243,7 @@ async def embed_image( |
| 1019 | 1243 | http_request: Request, |
| 1020 | 1244 | response: Response, |
| 1021 | 1245 | normalize: Optional[bool] = None, |
| 1246 | + priority: int = 0, | |
| 1022 | 1247 | ) -> List[Optional[List[float]]]: |
| 1023 | 1248 | if _image_model is None: |
| 1024 | 1249 | raise HTTPException(status_code=503, detail="Image embedding model not loaded in this service") |
| ... | ... | @@ -1026,6 +1251,10 @@ async def embed_image( |
| 1026 | 1251 | request_id = _resolve_request_id(http_request) |
| 1027 | 1252 | response.headers["X-Request-ID"] = request_id |
| 1028 | 1253 | |
| 1254 | + if priority < 0: | |
| 1255 | + raise HTTPException(status_code=400, detail="priority must be >= 0") | |
| 1256 | + effective_priority = _effective_priority(priority) | |
| 1257 | + | |
| 1029 | 1258 | effective_normalize = bool(CONFIG.IMAGE_NORMALIZE_EMBEDDINGS) if normalize is None else bool(normalize) |
| 1030 | 1259 | urls: List[str] = [] |
| 1031 | 1260 | for i, url_or_path in enumerate(images): |
| ... | ... | @@ -1048,7 +1277,8 @@ async def embed_image( |
| 1048 | 1277 | cache_misses=0, |
| 1049 | 1278 | ) |
| 1050 | 1279 | logger.info( |
| 1051 | - "embed_image response | mode=cache-only inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=0 first_vector=%s latency_ms=%.2f", | |
| 1280 | + "embed_image response | mode=cache-only priority=%s inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=0 first_vector=%s latency_ms=%.2f", | |
| 1281 | + _priority_label(effective_priority), | |
| 1052 | 1282 | len(urls), |
| 1053 | 1283 | effective_normalize, |
| 1054 | 1284 | len(cache_only.vectors[0]) if cache_only.vectors and cache_only.vectors[0] is not None else 0, |
| ... | ... | @@ -1059,12 +1289,13 @@ async def embed_image( |
| 1059 | 1289 | ) |
| 1060 | 1290 | return cache_only.vectors |
| 1061 | 1291 | |
| 1062 | - accepted, active = _image_request_limiter.try_acquire() | |
| 1292 | + accepted, active = _image_request_limiter.try_acquire(bypass_limit=effective_priority > 0) | |
| 1063 | 1293 | if not accepted: |
| 1064 | 1294 | _image_stats.record_rejected() |
| 1065 | 1295 | logger.warning( |
| 1066 | - "embed_image rejected | client=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", | |
| 1296 | + "embed_image rejected | client=%s priority=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", | |
| 1067 | 1297 | _request_client(http_request), |
| 1298 | + _priority_label(effective_priority), | |
| 1068 | 1299 | len(urls), |
| 1069 | 1300 | effective_normalize, |
| 1070 | 1301 | active, |
| ... | ... | @@ -1074,7 +1305,10 @@ async def embed_image( |
| 1074 | 1305 | ) |
| 1075 | 1306 | raise HTTPException( |
| 1076 | 1307 | status_code=_OVERLOAD_STATUS_CODE, |
| 1077 | - detail=f"Image embedding service busy: active={active}, limit={_IMAGE_MAX_INFLIGHT}", | |
| 1308 | + detail=( | |
| 1309 | + "Image embedding service busy for priority=0 requests: " | |
| 1310 | + f"active={active}, limit={_IMAGE_MAX_INFLIGHT}" | |
| 1311 | + ), | |
| 1078 | 1312 | ) |
| 1079 | 1313 | |
| 1080 | 1314 | request_started = time.perf_counter() |
| ... | ... | @@ -1084,8 +1318,9 @@ async def embed_image( |
| 1084 | 1318 | cache_misses = 0 |
| 1085 | 1319 | try: |
| 1086 | 1320 | logger.info( |
| 1087 | - "embed_image request | client=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", | |
| 1321 | + "embed_image request | client=%s priority=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", | |
| 1088 | 1322 | _request_client(http_request), |
| 1323 | + _priority_label(effective_priority), | |
| 1089 | 1324 | len(urls), |
| 1090 | 1325 | effective_normalize, |
| 1091 | 1326 | active, |
| ... | ... | @@ -1094,9 +1329,10 @@ async def embed_image( |
| 1094 | 1329 | extra=_request_log_extra(request_id), |
| 1095 | 1330 | ) |
| 1096 | 1331 | verbose_logger.info( |
| 1097 | - "embed_image detail | payload=%s normalize=%s", | |
| 1332 | + "embed_image detail | payload=%s normalize=%s priority=%s", | |
| 1098 | 1333 | urls, |
| 1099 | 1334 | effective_normalize, |
| 1335 | + _priority_label(effective_priority), | |
| 1100 | 1336 | extra=_request_log_extra(request_id), |
| 1101 | 1337 | ) |
| 1102 | 1338 | result = await run_in_threadpool(_embed_image_impl, urls, effective_normalize, request_id) |
| ... | ... | @@ -1113,8 +1349,9 @@ async def embed_image( |
| 1113 | 1349 | cache_misses=cache_misses, |
| 1114 | 1350 | ) |
| 1115 | 1351 | logger.info( |
| 1116 | - "embed_image response | mode=%s inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=%d first_vector=%s latency_ms=%.2f", | |
| 1352 | + "embed_image response | mode=%s priority=%s inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=%d first_vector=%s latency_ms=%.2f", | |
| 1117 | 1353 | result.mode, |
| 1354 | + _priority_label(effective_priority), | |
| 1118 | 1355 | len(urls), |
| 1119 | 1356 | effective_normalize, |
| 1120 | 1357 | len(result.vectors[0]) if result.vectors and result.vectors[0] is not None else 0, |
| ... | ... | @@ -1146,7 +1383,8 @@ async def embed_image( |
| 1146 | 1383 | cache_misses=cache_misses, |
| 1147 | 1384 | ) |
| 1148 | 1385 | logger.error( |
| 1149 | - "embed_image failed | inputs=%d normalize=%s latency_ms=%.2f error=%s", | |
| 1386 | + "embed_image failed | priority=%s inputs=%d normalize=%s latency_ms=%.2f error=%s", | |
| 1387 | + _priority_label(effective_priority), | |
| 1150 | 1388 | len(urls), |
| 1151 | 1389 | effective_normalize, |
| 1152 | 1390 | latency_ms, |
| ... | ... | @@ -1158,8 +1396,9 @@ async def embed_image( |
| 1158 | 1396 | finally: |
| 1159 | 1397 | remaining = _image_request_limiter.release(success=success) |
| 1160 | 1398 | logger.info( |
| 1161 | - "embed_image finalize | success=%s active_after=%d", | |
| 1399 | + "embed_image finalize | success=%s priority=%s active_after=%d", | |
| 1162 | 1400 | success, |
| 1401 | + _priority_label(effective_priority), | |
| 1163 | 1402 | remaining, |
| 1164 | 1403 | extra=_request_log_extra(request_id), |
| 1165 | 1404 | ) | ... | ... |
embeddings/text_encoder.py
| ... | ... | @@ -35,7 +35,12 @@ class TextEmbeddingEncoder: |
| 35 | 35 | expire_time=self.expire_time, |
| 36 | 36 | ) |
| 37 | 37 | |
| 38 | - def _call_service(self, request_data: List[str], normalize_embeddings: bool = True) -> List[Any]: | |
| 38 | + def _call_service( | |
| 39 | + self, | |
| 40 | + request_data: List[str], | |
| 41 | + normalize_embeddings: bool = True, | |
| 42 | + priority: int = 0, | |
| 43 | + ) -> List[Any]: | |
| 39 | 44 | """ |
| 40 | 45 | Call the embedding service API. |
| 41 | 46 | |
| ... | ... | @@ -48,7 +53,10 @@ class TextEmbeddingEncoder: |
| 48 | 53 | try: |
| 49 | 54 | response = requests.post( |
| 50 | 55 | self.endpoint, |
| 51 | - params={"normalize": "true" if normalize_embeddings else "false"}, | |
| 56 | + params={ | |
| 57 | + "normalize": "true" if normalize_embeddings else "false", | |
| 58 | + "priority": max(0, int(priority)), | |
| 59 | + }, | |
| 52 | 60 | json=request_data, |
| 53 | 61 | timeout=60 |
| 54 | 62 | ) |
| ... | ... | @@ -62,6 +70,7 @@ class TextEmbeddingEncoder: |
| 62 | 70 | self, |
| 63 | 71 | sentences: Union[str, List[str]], |
| 64 | 72 | normalize_embeddings: bool = True, |
| 73 | + priority: int = 0, | |
| 65 | 74 | device: str = 'cpu', |
| 66 | 75 | batch_size: int = 32 |
| 67 | 76 | ) -> np.ndarray: |
| ... | ... | @@ -100,7 +109,11 @@ class TextEmbeddingEncoder: |
| 100 | 109 | |
| 101 | 110 | # If there are uncached texts, call service |
| 102 | 111 | if uncached_texts: |
| 103 | - response_data = self._call_service(request_data, normalize_embeddings=normalize_embeddings) | |
| 112 | + response_data = self._call_service( | |
| 113 | + request_data, | |
| 114 | + normalize_embeddings=normalize_embeddings, | |
| 115 | + priority=priority, | |
| 116 | + ) | |
| 104 | 117 | |
| 105 | 118 | # Process response |
| 106 | 119 | for i, text in enumerate(uncached_texts): | ... | ... |
frontend/static/css/style.css
| ... | ... | @@ -371,9 +371,61 @@ body { |
| 371 | 371 | margin-bottom: 2px; |
| 372 | 372 | } |
| 373 | 373 | |
| 374 | +.product-debug-actions { | |
| 375 | + display: flex; | |
| 376 | + flex-wrap: wrap; | |
| 377 | + align-items: center; | |
| 378 | + gap: 10px 14px; | |
| 379 | + margin-top: 8px; | |
| 380 | +} | |
| 381 | + | |
| 382 | +.product-debug-inline-es-btn { | |
| 383 | + font-family: inherit; | |
| 384 | + font-size: 12px; | |
| 385 | + padding: 4px 10px; | |
| 386 | + border: 1px solid #ccc; | |
| 387 | + border-radius: 4px; | |
| 388 | + background: #fafafa; | |
| 389 | + color: #333; | |
| 390 | + cursor: pointer; | |
| 391 | +} | |
| 392 | + | |
| 393 | +.product-debug-inline-es-btn:hover { | |
| 394 | + background: #f0f0f0; | |
| 395 | + border-color: #bbb; | |
| 396 | +} | |
| 397 | + | |
| 398 | +.product-debug--es-expanded { | |
| 399 | + max-height: min(70vh, 720px); | |
| 400 | +} | |
| 401 | + | |
| 402 | +.product-es-doc-panel { | |
| 403 | + margin-top: 10px; | |
| 404 | + padding-top: 8px; | |
| 405 | + border-top: 1px dashed #e8e8e8; | |
| 406 | +} | |
| 407 | + | |
| 408 | +.product-es-doc-panel-status { | |
| 409 | + font-size: 12px; | |
| 410 | + color: #888; | |
| 411 | +} | |
| 412 | + | |
| 413 | +.product-es-doc-pre { | |
| 414 | + margin: 6px 0 0; | |
| 415 | + padding: 10px; | |
| 416 | + background: #f5f5f5; | |
| 417 | + border-radius: 4px; | |
| 418 | + overflow: auto; | |
| 419 | + max-height: 50vh; | |
| 420 | + font-size: 11px; | |
| 421 | + line-height: 1.35; | |
| 422 | + white-space: pre-wrap; | |
| 423 | + word-break: break-word; | |
| 424 | +} | |
| 425 | + | |
| 374 | 426 | .product-debug-link { |
| 375 | 427 | display: inline-block; |
| 376 | - margin-top: 6px; | |
| 428 | + margin-top: 0; | |
| 377 | 429 | font-size: 12px; |
| 378 | 430 | color: #e67e22; |
| 379 | 431 | text-decoration: none; | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -68,12 +68,85 @@ function initializeApp() { |
| 68 | 68 | // 初始化租户下拉框和分面面板 |
| 69 | 69 | console.log('Initializing app...'); |
| 70 | 70 | initTenantSelect(); |
| 71 | + setupProductGridEsDocToggle(); | |
| 71 | 72 | const searchInput = document.getElementById('searchInput'); |
| 72 | 73 | if (searchInput) { |
| 73 | 74 | searchInput.focus(); |
| 74 | 75 | } |
| 75 | 76 | } |
| 76 | 77 | |
| 78 | +/** Delegated handler: toggle inline ES raw response under each result card (survives innerHTML refresh on re-search). */ | |
| 79 | +function setupProductGridEsDocToggle() { | |
| 80 | + const grid = document.getElementById('productGrid'); | |
| 81 | + if (!grid || grid.dataset.esDocToggleBound === '1') { | |
| 82 | + return; | |
| 83 | + } | |
| 84 | + grid.dataset.esDocToggleBound = '1'; | |
| 85 | + grid.addEventListener('click', onProductGridEsDocToggleClick); | |
| 86 | +} | |
| 87 | + | |
| 88 | +async function onProductGridEsDocToggleClick(event) { | |
| 89 | + const btn = event.target.closest('[data-action="toggle-es-inline-doc"]'); | |
| 90 | + if (!btn) { | |
| 91 | + return; | |
| 92 | + } | |
| 93 | + event.preventDefault(); | |
| 94 | + const debugRoot = btn.closest('.product-debug'); | |
| 95 | + if (!debugRoot) { | |
| 96 | + return; | |
| 97 | + } | |
| 98 | + const panel = debugRoot.querySelector('.product-es-doc-panel'); | |
| 99 | + const pre = debugRoot.querySelector('.product-es-doc-pre'); | |
| 100 | + const statusEl = debugRoot.querySelector('.product-es-doc-panel-status'); | |
| 101 | + if (!panel || !pre || !statusEl) { | |
| 102 | + return; | |
| 103 | + } | |
| 104 | + | |
| 105 | + const spuId = btn.getAttribute('data-spu-id') || ''; | |
| 106 | + const tenantId = getTenantId(); | |
| 107 | + const url = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; | |
| 108 | + | |
| 109 | + if (debugRoot.dataset.esInlineOpen === '1') { | |
| 110 | + panel.setAttribute('hidden', ''); | |
| 111 | + debugRoot.classList.remove('product-debug--es-expanded'); | |
| 112 | + debugRoot.dataset.esInlineOpen = '0'; | |
| 113 | + btn.textContent = '在结果中显示 ES 文档'; | |
| 114 | + return; | |
| 115 | + } | |
| 116 | + | |
| 117 | + panel.removeAttribute('hidden'); | |
| 118 | + debugRoot.classList.add('product-debug--es-expanded'); | |
| 119 | + debugRoot.dataset.esInlineOpen = '1'; | |
| 120 | + btn.textContent = '隐藏 ES 文档'; | |
| 121 | + | |
| 122 | + if (pre.textContent.length > 0) { | |
| 123 | + panel.scrollIntoView({ behavior: 'smooth', block: 'nearest' }); | |
| 124 | + return; | |
| 125 | + } | |
| 126 | + | |
| 127 | + statusEl.style.display = ''; | |
| 128 | + statusEl.textContent = '加载中…'; | |
| 129 | + pre.style.display = 'none'; | |
| 130 | + | |
| 131 | + try { | |
| 132 | + const response = await fetch(url); | |
| 133 | + if (!response.ok) { | |
| 134 | + const errText = await response.text(); | |
| 135 | + throw new Error(`HTTP ${response.status}: ${errText.slice(0, 200)}`); | |
| 136 | + } | |
| 137 | + const data = await response.json(); | |
| 138 | + pre.textContent = customStringify(data); | |
| 139 | + statusEl.style.display = 'none'; | |
| 140 | + pre.style.display = 'block'; | |
| 141 | + } catch (err) { | |
| 142 | + console.error('ES doc fetch failed', err); | |
| 143 | + statusEl.textContent = `加载失败: ${err.message || err}`; | |
| 144 | + pre.style.display = 'none'; | |
| 145 | + } | |
| 146 | + | |
| 147 | + panel.scrollIntoView({ behavior: 'smooth', block: 'nearest' }); | |
| 148 | +} | |
| 149 | + | |
| 77 | 150 | // 在 DOM 加载完成后初始化 |
| 78 | 151 | if (document.readyState === 'loading') { |
| 79 | 152 | document.addEventListener('DOMContentLoaded', initializeApp); |
| ... | ... | @@ -401,9 +474,20 @@ function displayResults(data) { |
| 401 | 474 | <div class="product-debug-line">Rerank score: ${rerankScore}</div> |
| 402 | 475 | <div class="product-debug-line">Fused score: ${fusedScore}</div> |
| 403 | 476 | ${titleLines} |
| 404 | - <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer"> | |
| 405 | - 查看 ES 原始文档 | |
| 406 | - </a> | |
| 477 | + <div class="product-debug-actions"> | |
| 478 | + <button type="button" class="product-debug-inline-es-btn" | |
| 479 | + data-action="toggle-es-inline-doc" | |
| 480 | + data-spu-id="${escapeAttr(String(spuId || ''))}"> | |
| 481 | + 在结果中显示 ES 文档 | |
| 482 | + </button> | |
| 483 | + <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer"> | |
| 484 | + 查看 ES 原始文档 | |
| 485 | + </a> | |
| 486 | + </div> | |
| 487 | + <div class="product-es-doc-panel" hidden> | |
| 488 | + <div class="product-es-doc-panel-status"></div> | |
| 489 | + <pre class="product-es-doc-pre"></pre> | |
| 490 | + </div> | |
| 407 | 491 | </div> |
| 408 | 492 | `; |
| 409 | 493 | } | ... | ... |
| ... | ... | @@ -0,0 +1,34 @@ |
| 1 | +# 性能测试报告索引 | |
| 2 | + | |
| 3 | +本目录存放各次压测/矩阵的原始 JSON 与说明。**推荐复用**仓库脚本,避免重复造轮子: | |
| 4 | + | |
| 5 | +| 脚本 | 用途 | | |
| 6 | +|------|------| | |
| 7 | +| `scripts/perf_api_benchmark.py` | 搜索后端、向量、翻译、重排等 HTTP 接口压测;支持 `--embed-text-priority` / `--embed-image-priority` 与 `scripts/perf_cases.json.example` | | |
| 8 | + | |
| 9 | +历史矩阵示例(并发扫描): | |
| 10 | + | |
| 11 | +- `2026-03-12/matrix_report/summary.md` — 与 `summary.json` 同目录 | |
| 12 | + | |
| 13 | +## 2026-03-20 — 向量服务 `priority` 参数烟测 | |
| 14 | + | |
| 15 | +环境:本机 `127.0.0.1:6005`(文本)、`127.0.0.1:6008`(图片),命令与结果见同目录 JSON: | |
| 16 | + | |
| 17 | +| 报告文件 | 场景 | 说明 | | |
| 18 | +|----------|------|------| | |
| 19 | +| `2026-03-20_embed_text_p0.json` | `embed_text` | `priority=0`(默认),8s,并发 10 | | |
| 20 | +| `2026-03-20_embed_text_p1.json` | `embed_text` | `--embed-text-priority 1`,8s,并发 10 | | |
| 21 | +| `2026-03-20_embed_image_p0.json` | `embed_image` | `priority=0`,8s,并发 5 | | |
| 22 | +| `2026-03-20_embed_image_p1.json` | `embed_image` | `--embed-image-priority 1`,8s,并发 5 | | |
| 23 | + | |
| 24 | +复现示例: | |
| 25 | + | |
| 26 | +```bash | |
| 27 | +source activate.sh | |
| 28 | +python scripts/perf_api_benchmark.py --scenario embed_text --duration 8 --concurrency 10 --timeout 30 --output perf_reports/2026-03-20_embed_text_p0.json | |
| 29 | +python scripts/perf_api_benchmark.py --scenario embed_text --duration 8 --concurrency 10 --embed-text-priority 1 --output perf_reports/2026-03-20_embed_text_p1.json | |
| 30 | +python scripts/perf_api_benchmark.py --scenario embed_image --duration 8 --concurrency 5 --timeout 60 --output perf_reports/2026-03-20_embed_image_p0.json | |
| 31 | +python scripts/perf_api_benchmark.py --scenario embed_image --duration 8 --concurrency 5 --embed-image-priority 1 --output perf_reports/2026-03-20_embed_image_p1.json | |
| 32 | +``` | |
| 33 | + | |
| 34 | +说明:本次为 **8 秒 smoke**,与 `2026-03-12` 矩阵的时长/并发不可直接横向对比;仅验证 `priority` 参数下服务仍返回 200 且 payload 校验通过。 | ... | ... |
query/query_parser.py
| ... | ... | @@ -8,7 +8,7 @@ from typing import Dict, List, Optional, Any, Union |
| 8 | 8 | import numpy as np |
| 9 | 9 | import logging |
| 10 | 10 | import re |
| 11 | -from concurrent.futures import ThreadPoolExecutor, as_completed, wait | |
| 11 | +from concurrent.futures import ThreadPoolExecutor, wait | |
| 12 | 12 | |
| 13 | 13 | from embeddings.text_encoder import TextEmbeddingEncoder |
| 14 | 14 | from config import SearchConfig |
| ... | ... | @@ -42,6 +42,8 @@ class ParsedQuery: |
| 42 | 42 | search_langs: Optional[List[str]] = None, |
| 43 | 43 | index_languages: Optional[List[str]] = None, |
| 44 | 44 | source_in_index_languages: bool = True, |
| 45 | + contains_chinese: bool = False, | |
| 46 | + contains_english: bool = False, | |
| 45 | 47 | ): |
| 46 | 48 | self.original_query = original_query |
| 47 | 49 | self.query_normalized = query_normalized |
| ... | ... | @@ -58,6 +60,8 @@ class ParsedQuery: |
| 58 | 60 | self.search_langs = search_langs or [] |
| 59 | 61 | self.index_languages = index_languages or [] |
| 60 | 62 | self.source_in_index_languages = bool(source_in_index_languages) |
| 63 | + self.contains_chinese = bool(contains_chinese) | |
| 64 | + self.contains_english = bool(contains_english) | |
| 61 | 65 | |
| 62 | 66 | def to_dict(self) -> Dict[str, Any]: |
| 63 | 67 | """Convert to dictionary representation.""" |
| ... | ... | @@ -73,6 +77,8 @@ class ParsedQuery: |
| 73 | 77 | result["search_langs"] = self.search_langs |
| 74 | 78 | result["index_languages"] = self.index_languages |
| 75 | 79 | result["source_in_index_languages"] = self.source_in_index_languages |
| 80 | + result["contains_chinese"] = self.contains_chinese | |
| 81 | + result["contains_english"] = self.contains_english | |
| 76 | 82 | return result |
| 77 | 83 | |
| 78 | 84 | |
| ... | ... | @@ -139,7 +145,6 @@ class QueryParser: |
| 139 | 145 | cfg.get("default_model"), |
| 140 | 146 | ) |
| 141 | 147 | self._translator = create_translation_client() |
| 142 | - self._translation_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="query-translation") | |
| 143 | 148 | |
| 144 | 149 | @property |
| 145 | 150 | def text_encoder(self) -> TextEmbeddingEncoder: |
| ... | ... | @@ -218,6 +223,16 @@ class QueryParser: |
| 218 | 223 | return bool(re.search(r"[\u4e00-\u9fff]", text or "")) |
| 219 | 224 | |
| 220 | 225 | @staticmethod |
| 226 | + def _is_pure_english_word_token(token: str) -> bool: | |
| 227 | + """ | |
| 228 | + A tokenizer token counts as English iff it is letters only (optional internal hyphens) | |
| 229 | + and length >= 3. | |
| 230 | + """ | |
| 231 | + if not token or len(token) < 3: | |
| 232 | + return False | |
| 233 | + return bool(re.fullmatch(r"[A-Za-z]+(?:-[A-Za-z]+)*", token)) | |
| 234 | + | |
| 235 | + @staticmethod | |
| 221 | 236 | def _extract_latin_tokens(text: str) -> List[str]: |
| 222 | 237 | """Extract latin word tokens from query text.""" |
| 223 | 238 | return re.findall(r"[A-Za-z]+(?:-[A-Za-z]+)*", text or "") |
| ... | ... | @@ -332,11 +347,14 @@ class QueryParser: |
| 332 | 347 | if context: |
| 333 | 348 | context.store_intermediate_result('detected_language', detected_lang) |
| 334 | 349 | |
| 335 | - # Stage 4: Translation (with async support and conditional waiting) | |
| 336 | - translations = {} | |
| 337 | - translation_futures = {} | |
| 338 | - translation_executor = None | |
| 350 | + # Stage 4: Translation — always submit to thread pool; results are collected together with | |
| 351 | + # embedding in one wait() that uses a configurable budget (short vs long by source-in-index). | |
| 352 | + translations: Dict[str, str] = {} | |
| 353 | + translation_futures: Dict[str, Any] = {} | |
| 354 | + translation_executor: Optional[ThreadPoolExecutor] = None | |
| 339 | 355 | index_langs: List[str] = [] |
| 356 | + detected_norm = str(detected_lang or "").strip().lower() | |
| 357 | + | |
| 340 | 358 | try: |
| 341 | 359 | # 根据租户配置的 index_languages 决定翻译目标语言 |
| 342 | 360 | from config.tenant_config_loader import get_tenant_config_loader |
| ... | ... | @@ -352,59 +370,32 @@ class QueryParser: |
| 352 | 370 | seen_langs.add(norm_lang) |
| 353 | 371 | index_langs.append(norm_lang) |
| 354 | 372 | |
| 355 | - target_langs_for_translation = [lang for lang in index_langs if lang != detected_lang] | |
| 373 | + target_langs_for_translation = [lang for lang in index_langs if lang != detected_norm] | |
| 356 | 374 | |
| 357 | 375 | if target_langs_for_translation: |
| 358 | - target_langs = target_langs_for_translation | |
| 359 | - | |
| 360 | - if target_langs: | |
| 361 | - # Determine if we need to wait for translation results | |
| 362 | - # If detected_lang is not in index_languages, we must wait for translation | |
| 363 | - need_wait_translation = detected_lang not in index_langs | |
| 364 | - | |
| 365 | - if need_wait_translation: | |
| 366 | - translation_executor = ThreadPoolExecutor( | |
| 367 | - max_workers=max(1, min(len(target_langs), 4)), | |
| 368 | - thread_name_prefix="query-translation-wait", | |
| 369 | - ) | |
| 370 | - for lang in target_langs: | |
| 371 | - model_name = self._pick_query_translation_model(detected_lang, lang, self.config) | |
| 372 | - log_debug( | |
| 373 | - f"Submitting query translation | source={detected_lang} target={lang} model={model_name}" | |
| 374 | - ) | |
| 375 | - translation_futures[lang] = translation_executor.submit( | |
| 376 | - self.translator.translate, | |
| 377 | - query_text, | |
| 378 | - lang, | |
| 379 | - detected_lang, | |
| 380 | - "ecommerce_search_query", | |
| 381 | - model_name, | |
| 382 | - ) | |
| 383 | - else: | |
| 384 | - for lang in target_langs: | |
| 385 | - model_name = self._pick_query_translation_model(detected_lang, lang, self.config) | |
| 386 | - log_debug( | |
| 387 | - f"Submitting query translation | source={detected_lang} target={lang} model={model_name}" | |
| 388 | - ) | |
| 389 | - self._translation_executor.submit( | |
| 390 | - self.translator.translate, | |
| 391 | - query_text, | |
| 392 | - lang, | |
| 393 | - detected_lang, | |
| 394 | - "ecommerce_search_query", | |
| 395 | - model_name, | |
| 396 | - ) | |
| 376 | + translation_executor = ThreadPoolExecutor( | |
| 377 | + max_workers=max(1, min(len(target_langs_for_translation), 4)), | |
| 378 | + thread_name_prefix="query-translation", | |
| 379 | + ) | |
| 380 | + for lang in target_langs_for_translation: | |
| 381 | + model_name = self._pick_query_translation_model(detected_lang, lang, self.config) | |
| 382 | + log_debug( | |
| 383 | + f"Submitting query translation | source={detected_lang} target={lang} model={model_name}" | |
| 384 | + ) | |
| 385 | + translation_futures[lang] = translation_executor.submit( | |
| 386 | + self.translator.translate, | |
| 387 | + query_text, | |
| 388 | + lang, | |
| 389 | + detected_lang, | |
| 390 | + "ecommerce_search_query", | |
| 391 | + model_name, | |
| 392 | + ) | |
| 397 | 393 | |
| 398 | - if translations: | |
| 399 | - log_info(f"Translation completed (cache hit) | Query text: '{query_text}' | Results: {translations}") | |
| 400 | - if translation_futures: | |
| 401 | - log_debug(f"Translation in progress, waiting for results... | Query text: '{query_text}' | Languages: {list(translation_futures.keys())}") | |
| 402 | - | |
| 403 | - if context: | |
| 404 | - context.store_intermediate_result('translations', translations) | |
| 405 | - for lang, translation in translations.items(): | |
| 406 | - if translation: | |
| 407 | - context.store_intermediate_result(f'translation_{lang}', translation) | |
| 394 | + if context: | |
| 395 | + context.store_intermediate_result('translations', translations) | |
| 396 | + for lang, translation in translations.items(): | |
| 397 | + if translation: | |
| 398 | + context.store_intermediate_result(f'translation_{lang}', translation) | |
| 408 | 399 | |
| 409 | 400 | except Exception as e: |
| 410 | 401 | error_msg = f"Translation failed | Error: {str(e)}" |
| ... | ... | @@ -416,13 +407,18 @@ class QueryParser: |
| 416 | 407 | keywords = self._extract_keywords(query_text) |
| 417 | 408 | query_tokens = self._get_query_tokens(query_text) |
| 418 | 409 | token_count = len(query_tokens) |
| 410 | + contains_chinese = self._contains_cjk(query_text) | |
| 411 | + contains_english = any(self._is_pure_english_word_token(t) for t in query_tokens) | |
| 419 | 412 | |
| 420 | 413 | log_debug(f"Query analysis | Keywords: {keywords} | Token count: {token_count} | " |
| 421 | - f"Query tokens: {query_tokens}") | |
| 414 | + f"Query tokens: {query_tokens} | contains_chinese={contains_chinese} | " | |
| 415 | + f"contains_english={contains_english}") | |
| 422 | 416 | if context: |
| 423 | 417 | context.store_intermediate_result('keywords', keywords) |
| 424 | 418 | context.store_intermediate_result('token_count', token_count) |
| 425 | 419 | context.store_intermediate_result('query_tokens', query_tokens) |
| 420 | + context.store_intermediate_result('contains_chinese', contains_chinese) | |
| 421 | + context.store_intermediate_result('contains_english', contains_english) | |
| 426 | 422 | |
| 427 | 423 | # Stage 6: Text embedding (only for non-short queries) - async execution |
| 428 | 424 | query_vector = None |
| ... | ... | @@ -442,7 +438,7 @@ class QueryParser: |
| 442 | 438 | # Submit encoding task to thread pool for async execution |
| 443 | 439 | encoding_executor = ThreadPoolExecutor(max_workers=1) |
| 444 | 440 | def _encode_query_vector() -> Optional[np.ndarray]: |
| 445 | - arr = self.text_encoder.encode([query_text]) | |
| 441 | + arr = self.text_encoder.encode([query_text], priority=1) | |
| 446 | 442 | if arr is None or len(arr) == 0: |
| 447 | 443 | return None |
| 448 | 444 | vec = arr[0] |
| ... | ... | @@ -458,45 +454,66 @@ class QueryParser: |
| 458 | 454 | encoding_executor = None |
| 459 | 455 | embedding_future = None |
| 460 | 456 | |
| 461 | - # Wait for all async tasks to complete (translation and embedding) | |
| 457 | + # Wait for translation + embedding concurrently; shared budget (ms) depends on whether | |
| 458 | + # the detected language is in tenant index_languages. | |
| 459 | + qc = self.config.query_config | |
| 460 | + source_in_index_for_budget = detected_norm in index_langs | |
| 461 | + budget_ms = ( | |
| 462 | + qc.translation_embedding_wait_budget_ms_source_in_index | |
| 463 | + if source_in_index_for_budget | |
| 464 | + else qc.translation_embedding_wait_budget_ms_source_not_in_index | |
| 465 | + ) | |
| 466 | + budget_sec = max(0.0, float(budget_ms) / 1000.0) | |
| 467 | + | |
| 468 | + if translation_futures: | |
| 469 | + log_info( | |
| 470 | + f"Translation+embedding shared wait budget | budget_ms={budget_ms} | " | |
| 471 | + f"source_in_index_languages={source_in_index_for_budget} | " | |
| 472 | + f"translation_targets={list(translation_futures.keys())}" | |
| 473 | + ) | |
| 474 | + | |
| 462 | 475 | if translation_futures or embedding_future: |
| 463 | - log_debug("Waiting for async tasks to complete...") | |
| 464 | - | |
| 465 | - # Collect all futures with their identifiers | |
| 466 | - all_futures = [] | |
| 467 | - future_to_lang = {} | |
| 476 | + log_debug( | |
| 477 | + f"Waiting for async tasks (translation+embedding) | budget_ms={budget_ms} | " | |
| 478 | + f"source_in_index_languages={source_in_index_for_budget}" | |
| 479 | + ) | |
| 480 | + | |
| 481 | + all_futures: List[Any] = [] | |
| 482 | + future_to_lang: Dict[Any, tuple] = {} | |
| 468 | 483 | for lang, future in translation_futures.items(): |
| 469 | 484 | all_futures.append(future) |
| 470 | - future_to_lang[future] = ('translation', lang) | |
| 471 | - | |
| 485 | + future_to_lang[future] = ("translation", lang) | |
| 486 | + | |
| 472 | 487 | if embedding_future: |
| 473 | 488 | all_futures.append(embedding_future) |
| 474 | - future_to_lang[embedding_future] = ('embedding', None) | |
| 475 | - | |
| 476 | - # Enforce a hard timeout for translation-related work (300ms budget) | |
| 477 | - done, not_done = wait(all_futures, timeout=0.3) | |
| 489 | + future_to_lang[embedding_future] = ("embedding", None) | |
| 490 | + | |
| 491 | + done, not_done = wait(all_futures, timeout=budget_sec) | |
| 478 | 492 | for future in done: |
| 479 | 493 | task_type, lang = future_to_lang[future] |
| 480 | 494 | try: |
| 481 | 495 | result = future.result() |
| 482 | - if task_type == 'translation': | |
| 496 | + if task_type == "translation": | |
| 483 | 497 | if result: |
| 484 | 498 | translations[lang] = result |
| 485 | 499 | log_info( |
| 486 | - f"Translation completed | Query text: '{query_text}' | Target language: {lang} | Translation result: '{result}'" | |
| 500 | + f"Translation completed | Query text: '{query_text}' | " | |
| 501 | + f"Target language: {lang} | Translation result: '{result}'" | |
| 487 | 502 | ) |
| 488 | 503 | if context: |
| 489 | - context.store_intermediate_result(f'translation_{lang}', result) | |
| 490 | - elif task_type == 'embedding': | |
| 504 | + context.store_intermediate_result(f"translation_{lang}", result) | |
| 505 | + elif task_type == "embedding": | |
| 491 | 506 | query_vector = result |
| 492 | 507 | if query_vector is not None: |
| 493 | 508 | log_debug(f"Query vector generation completed | Shape: {query_vector.shape}") |
| 494 | 509 | if context: |
| 495 | - context.store_intermediate_result('query_vector_shape', query_vector.shape) | |
| 510 | + context.store_intermediate_result("query_vector_shape", query_vector.shape) | |
| 496 | 511 | else: |
| 497 | - log_info("Query vector generation completed but result is None, will process without vector") | |
| 512 | + log_info( | |
| 513 | + "Query vector generation completed but result is None, will process without vector" | |
| 514 | + ) | |
| 498 | 515 | except Exception as e: |
| 499 | - if task_type == 'translation': | |
| 516 | + if task_type == "translation": | |
| 500 | 517 | error_msg = f"Translation failed | Language: {lang} | Error: {str(e)}" |
| 501 | 518 | else: |
| 502 | 519 | error_msg = f"Query vector generation failed | Error: {str(e)}" |
| ... | ... | @@ -504,30 +521,29 @@ class QueryParser: |
| 504 | 521 | if context: |
| 505 | 522 | context.add_warning(error_msg) |
| 506 | 523 | |
| 507 | - # Log timeouts for any futures that did not finish within 300ms | |
| 508 | 524 | if not_done: |
| 509 | 525 | for future in not_done: |
| 510 | 526 | task_type, lang = future_to_lang[future] |
| 511 | - if task_type == 'translation': | |
| 527 | + if task_type == "translation": | |
| 512 | 528 | timeout_msg = ( |
| 513 | - f"Translation timeout (>300ms) | Language: {lang} | " | |
| 529 | + f"Translation timeout (>{budget_ms}ms) | Language: {lang} | " | |
| 514 | 530 | f"Query text: '{query_text}'" |
| 515 | 531 | ) |
| 516 | 532 | else: |
| 517 | - timeout_msg = "Query vector generation timeout (>300ms), proceeding without embedding result" | |
| 533 | + timeout_msg = ( | |
| 534 | + f"Query vector generation timeout (>{budget_ms}ms), proceeding without embedding result" | |
| 535 | + ) | |
| 518 | 536 | log_info(timeout_msg) |
| 519 | 537 | if context: |
| 520 | 538 | context.add_warning(timeout_msg) |
| 521 | 539 | |
| 522 | - # Clean up encoding executor | |
| 523 | 540 | if encoding_executor: |
| 524 | 541 | encoding_executor.shutdown(wait=False) |
| 525 | 542 | if translation_executor: |
| 526 | 543 | translation_executor.shutdown(wait=False) |
| 527 | - | |
| 528 | - # Update translations in context after all are complete | |
| 544 | + | |
| 529 | 545 | if translations and context: |
| 530 | - context.store_intermediate_result('translations', translations) | |
| 546 | + context.store_intermediate_result("translations", translations) | |
| 531 | 547 | |
| 532 | 548 | # Build language-scoped query plan: source language + available translations |
| 533 | 549 | query_text_by_lang: Dict[str, str] = {} |
| ... | ... | @@ -547,7 +563,7 @@ class QueryParser: |
| 547 | 563 | # Use the original mixed-script query as a robust fallback probe for that language field set. |
| 548 | 564 | query_text_by_lang[lang] = query_text |
| 549 | 565 | |
| 550 | - source_in_index_languages = detected_lang in index_langs | |
| 566 | + source_in_index_languages = detected_norm in index_langs | |
| 551 | 567 | ordered_search_langs: List[str] = [] |
| 552 | 568 | seen_order = set() |
| 553 | 569 | if detected_lang in query_text_by_lang: |
| ... | ... | @@ -583,6 +599,8 @@ class QueryParser: |
| 583 | 599 | search_langs=ordered_search_langs, |
| 584 | 600 | index_languages=index_langs, |
| 585 | 601 | source_in_index_languages=source_in_index_languages, |
| 602 | + contains_chinese=contains_chinese, | |
| 603 | + contains_english=contains_english, | |
| 586 | 604 | ) |
| 587 | 605 | |
| 588 | 606 | if context and hasattr(context, 'logger'): | ... | ... |
scripts/perf_api_benchmark.py
| ... | ... | @@ -15,6 +15,9 @@ Examples: |
| 15 | 15 | python scripts/perf_api_benchmark.py --scenario backend_suggest --duration 30 --concurrency 50 --tenant-id 162 |
| 16 | 16 | python scripts/perf_api_benchmark.py --scenario all --duration 60 --concurrency 80 --tenant-id 162 |
| 17 | 17 | python scripts/perf_api_benchmark.py --scenario all --cases-file scripts/perf_cases.json.example --output perf_result.json |
| 18 | + # Embedding admission / priority (query param `priority`; same semantics as embedding service): | |
| 19 | + python scripts/perf_api_benchmark.py --scenario embed_text --embed-text-priority 1 --duration 30 --concurrency 20 | |
| 20 | + python scripts/perf_api_benchmark.py --scenario embed_image --embed-image-priority 1 --duration 30 --concurrency 10 | |
| 18 | 21 | """ |
| 19 | 22 | |
| 20 | 23 | from __future__ import annotations |
| ... | ... | @@ -72,9 +75,9 @@ def validate_response_payload( |
| 72 | 75 | ) -> Tuple[bool, str]: |
| 73 | 76 | """ |
| 74 | 77 | Lightweight payload validation for correctness-aware perf tests. |
| 75 | - Currently strict for embed_text to catch NaN/null vector regressions. | |
| 78 | + Strict for embed_text / embed_image to catch NaN/null vector regressions. | |
| 76 | 79 | """ |
| 77 | - if scenario_name != "embed_text": | |
| 80 | + if scenario_name not in ("embed_text", "embed_image"): | |
| 78 | 81 | return True, "" |
| 79 | 82 | |
| 80 | 83 | expected_len = len(tpl.json_body) if isinstance(tpl.json_body, list) else None |
| ... | ... | @@ -219,6 +222,43 @@ def load_cases_from_file(path: Path, tenant_id: str) -> Dict[str, List[RequestTe |
| 219 | 222 | return out |
| 220 | 223 | |
| 221 | 224 | |
| 225 | +def apply_embed_priority_params( | |
| 226 | + scenarios: Dict[str, Scenario], | |
| 227 | + embed_text_priority: int, | |
| 228 | + embed_image_priority: int, | |
| 229 | +) -> None: | |
| 230 | + """ | |
| 231 | + Merge default `priority` query param into embed templates when absent. | |
| 232 | + `scripts/perf_cases.json` may set per-request `params.priority` to override. | |
| 233 | + """ | |
| 234 | + mapping = { | |
| 235 | + "embed_text": max(0, int(embed_text_priority)), | |
| 236 | + "embed_image": max(0, int(embed_image_priority)), | |
| 237 | + } | |
| 238 | + for name, pri in mapping.items(): | |
| 239 | + if name not in scenarios: | |
| 240 | + continue | |
| 241 | + scen = scenarios[name] | |
| 242 | + new_templates: List[RequestTemplate] = [] | |
| 243 | + for t in scen.templates: | |
| 244 | + params = dict(t.params or {}) | |
| 245 | + params.setdefault("priority", str(pri)) | |
| 246 | + new_templates.append( | |
| 247 | + RequestTemplate( | |
| 248 | + method=t.method, | |
| 249 | + path=t.path, | |
| 250 | + params=params, | |
| 251 | + json_body=t.json_body, | |
| 252 | + headers=t.headers, | |
| 253 | + ) | |
| 254 | + ) | |
| 255 | + scenarios[name] = Scenario( | |
| 256 | + name=scen.name, | |
| 257 | + templates=new_templates, | |
| 258 | + timeout_sec=scen.timeout_sec, | |
| 259 | + ) | |
| 260 | + | |
| 261 | + | |
| 222 | 262 | def build_scenarios(args: argparse.Namespace) -> Dict[str, Scenario]: |
| 223 | 263 | defaults = make_default_templates(args.tenant_id) |
| 224 | 264 | if args.cases_file: |
| ... | ... | @@ -252,6 +292,11 @@ def build_scenarios(args: argparse.Namespace) -> Dict[str, Scenario]: |
| 252 | 292 | ) |
| 253 | 293 | ) |
| 254 | 294 | scenarios[name] = Scenario(name=name, templates=rewritten, timeout_sec=args.timeout) |
| 295 | + apply_embed_priority_params( | |
| 296 | + scenarios, | |
| 297 | + embed_text_priority=args.embed_text_priority, | |
| 298 | + embed_image_priority=args.embed_image_priority, | |
| 299 | + ) | |
| 255 | 300 | return scenarios |
| 256 | 301 | |
| 257 | 302 | |
| ... | ... | @@ -483,6 +528,18 @@ def parse_args() -> argparse.Namespace: |
| 483 | 528 | default=0, |
| 484 | 529 | help="Optional top_n for rerank requests in dynamic docs mode (0 means omit top_n).", |
| 485 | 530 | ) |
| 531 | + parser.add_argument( | |
| 532 | + "--embed-text-priority", | |
| 533 | + type=int, | |
| 534 | + default=0, | |
| 535 | + help="Default query param priority= for embed_text (0=offline admission; >0 bypasses rejection). Merged into params unless set in --cases-file.", | |
| 536 | + ) | |
| 537 | + parser.add_argument( | |
| 538 | + "--embed-image-priority", | |
| 539 | + type=int, | |
| 540 | + default=0, | |
| 541 | + help="Default query param priority= for embed_image (same semantics as embed-text-priority).", | |
| 542 | + ) | |
| 486 | 543 | return parser.parse_args() |
| 487 | 544 | |
| 488 | 545 | |
| ... | ... | @@ -609,6 +666,8 @@ async def main_async() -> int: |
| 609 | 666 | print(f" embedding_image_base={args.embedding_image_base}") |
| 610 | 667 | print(f" translator_base={args.translator_base}") |
| 611 | 668 | print(f" reranker_base={args.reranker_base}") |
| 669 | + print(f" embed_text_priority={args.embed_text_priority}") | |
| 670 | + print(f" embed_image_priority={args.embed_image_priority}") | |
| 612 | 671 | if args.rerank_dynamic_docs: |
| 613 | 672 | print(" rerank_dynamic_docs=True") |
| 614 | 673 | print(f" rerank_doc_count={args.rerank_doc_count}") |
| ... | ... | @@ -667,6 +726,8 @@ async def main_async() -> int: |
| 667 | 726 | "rerank_query": args.rerank_query, |
| 668 | 727 | "rerank_seed": args.rerank_seed, |
| 669 | 728 | "rerank_top_n": args.rerank_top_n, |
| 729 | + "embed_text_priority": args.embed_text_priority, | |
| 730 | + "embed_image_priority": args.embed_image_priority, | |
| 670 | 731 | }, |
| 671 | 732 | "results": results, |
| 672 | 733 | "overall": aggregate_results(results), | ... | ... |
scripts/perf_cases.json.example
| ... | ... | @@ -32,9 +32,18 @@ |
| 32 | 32 | { |
| 33 | 33 | "method": "POST", |
| 34 | 34 | "path": "/embed/text", |
| 35 | + "params": {"priority": "0"}, | |
| 35 | 36 | "json": ["wireless mouse", "gaming keyboard", "USB-C cable", "barbie doll"] |
| 36 | 37 | } |
| 37 | 38 | ], |
| 39 | + "embed_image": [ | |
| 40 | + { | |
| 41 | + "method": "POST", | |
| 42 | + "path": "/embed/image", | |
| 43 | + "params": {"normalize": "true", "priority": "0"}, | |
| 44 | + "json": ["/data/saas-search/docs/image-dress1.png"] | |
| 45 | + } | |
| 46 | + ], | |
| 38 | 47 | "translate": [ |
| 39 | 48 | { |
| 40 | 49 | "method": "POST", | ... | ... |
search/es_query_builder.py
| ... | ... | @@ -9,9 +9,13 @@ Simplified architecture: |
| 9 | 9 | """ |
| 10 | 10 | |
| 11 | 11 | from typing import Dict, Any, List, Optional, Union, Tuple |
| 12 | + | |
| 12 | 13 | import numpy as np |
| 13 | 14 | from config import FunctionScoreConfig |
| 14 | 15 | |
| 16 | +# (Elasticsearch field path, boost before formatting as "path^boost") | |
| 17 | +MatchFieldSpec = Tuple[str, float] | |
| 18 | + | |
| 15 | 19 | |
| 16 | 20 | class ESQueryBuilder: |
| 17 | 21 | """Builds Elasticsearch DSL queries.""" |
| ... | ... | @@ -36,6 +40,7 @@ class ESQueryBuilder: |
| 36 | 40 | source_boost_when_missing: float = 0.6, |
| 37 | 41 | original_query_fallback_boost_when_translation_missing: float = 0.2, |
| 38 | 42 | tie_breaker_base_query: float = 0.9, |
| 43 | + mixed_script_merged_field_boost_scale: float = 0.6, | |
| 39 | 44 | ): |
| 40 | 45 | """ |
| 41 | 46 | Initialize query builder. |
| ... | ... | @@ -51,6 +56,7 @@ class ESQueryBuilder: |
| 51 | 56 | function_score_config: Function score configuration |
| 52 | 57 | default_language: Default language to use when detection fails or returns "unknown" |
| 53 | 58 | knn_boost: Boost value for KNN (embedding recall) |
| 59 | + mixed_script_merged_field_boost_scale: Multiply per-field ^boost for cross-script merged fields | |
| 54 | 60 | """ |
| 55 | 61 | self.match_fields = match_fields |
| 56 | 62 | self.field_boosts = field_boosts or {} |
| ... | ... | @@ -74,6 +80,7 @@ class ESQueryBuilder: |
| 74 | 80 | original_query_fallback_boost_when_translation_missing |
| 75 | 81 | ) |
| 76 | 82 | self.tie_breaker_base_query = float(tie_breaker_base_query) |
| 83 | + self.mixed_script_merged_field_boost_scale = float(mixed_script_merged_field_boost_scale) | |
| 77 | 84 | |
| 78 | 85 | def _apply_source_filter(self, es_query: Dict[str, Any]) -> None: |
| 79 | 86 | """ |
| ... | ... | @@ -414,7 +421,7 @@ class ESQueryBuilder: |
| 414 | 421 | def _format_field_with_boost(self, field_name: str, boost: float) -> str: |
| 415 | 422 | if abs(float(boost) - 1.0) < 1e-9: |
| 416 | 423 | return field_name |
| 417 | - return f"{field_name}^{boost}" | |
| 424 | + return f"{field_name}^{round(boost, 2)}" | |
| 418 | 425 | |
| 419 | 426 | def _get_field_boost(self, base_field: str, language: Optional[str] = None) -> float: |
| 420 | 427 | # Language-specific override first (e.g. title.de), then base field (e.g. title) |
| ... | ... | @@ -426,36 +433,74 @@ class ESQueryBuilder: |
| 426 | 433 | return float(self.field_boosts[base_field]) |
| 427 | 434 | return 1.0 |
| 428 | 435 | |
| 429 | - def _get_match_fields(self, language: str) -> Tuple[List[str], List[str]]: | |
| 436 | + def _build_match_field_specs(self, language: str) -> Tuple[List[MatchFieldSpec], List[MatchFieldSpec]]: | |
| 430 | 437 | """ |
| 431 | - Build dynamic match fields for one language. | |
| 432 | - | |
| 433 | - Args: | |
| 434 | - language: Language code (e.g. zh/en/de/fr/...) | |
| 435 | - | |
| 436 | - Returns: | |
| 437 | - (all_fields, core_fields) - core_fields are for phrase/keyword queries | |
| 438 | + Per-language match targets as (field_path, boost). Single source of truth before string formatting. | |
| 439 | + Returns (all_fields, core_fields); core_fields are for phrase/keyword strategies elsewhere. | |
| 438 | 440 | """ |
| 439 | 441 | lang = (language or "").strip().lower() |
| 440 | - all_fields: List[str] = [] | |
| 441 | - core_fields: List[str] = [] | |
| 442 | + all_specs: List[MatchFieldSpec] = [] | |
| 443 | + core_specs: List[MatchFieldSpec] = [] | |
| 442 | 444 | |
| 443 | 445 | for base in self.multilingual_fields: |
| 444 | 446 | field = f"{base}.{lang}" |
| 445 | - boost = self._get_field_boost(base, lang) | |
| 446 | - all_fields.append(self._format_field_with_boost(field, boost)) | |
| 447 | + all_specs.append((field, self._get_field_boost(base, lang))) | |
| 447 | 448 | |
| 448 | 449 | for shared in self.shared_fields: |
| 449 | - boost = self._get_field_boost(shared, None) | |
| 450 | - all_fields.append(self._format_field_with_boost(shared, boost)) | |
| 450 | + all_specs.append((shared, self._get_field_boost(shared, None))) | |
| 451 | 451 | |
| 452 | 452 | for base in self.core_multilingual_fields: |
| 453 | 453 | field = f"{base}.{lang}" |
| 454 | - boost = self._get_field_boost(base, lang) | |
| 455 | - core_fields.append(self._format_field_with_boost(field, boost)) | |
| 454 | + core_specs.append((field, self._get_field_boost(base, lang))) | |
| 455 | + | |
| 456 | + return all_specs, core_specs | |
| 457 | + | |
| 458 | + def _format_match_field_specs(self, specs: List[MatchFieldSpec]) -> List[str]: | |
| 459 | + """Format (field_path, boost) pairs for Elasticsearch multi_match ``fields``.""" | |
| 460 | + return [self._format_field_with_boost(path, boost) for path, boost in specs] | |
| 461 | + | |
| 462 | + def _merge_supplemental_lang_field_specs( | |
| 463 | + self, | |
| 464 | + specs: List[MatchFieldSpec], | |
| 465 | + supplemental_lang: str, | |
| 466 | + ) -> List[MatchFieldSpec]: | |
| 467 | + """Append supplemental-language columns; boosts multiplied by mixed_script scale.""" | |
| 468 | + scale = float(self.mixed_script_merged_field_boost_scale) | |
| 469 | + extra_all, _ = self._build_match_field_specs(supplemental_lang) | |
| 470 | + seen = {path for path, _ in specs} | |
| 471 | + out = list(specs) | |
| 472 | + for path, boost in extra_all: | |
| 473 | + if path not in seen: | |
| 474 | + out.append((path, boost * scale)) | |
| 475 | + seen.add(path) | |
| 476 | + return out | |
| 477 | + | |
| 478 | + def _expand_match_field_specs_for_mixed_script( | |
| 479 | + self, | |
| 480 | + lang: str, | |
| 481 | + specs: List[MatchFieldSpec], | |
| 482 | + contains_chinese: bool, | |
| 483 | + contains_english: bool, | |
| 484 | + index_languages: List[str], | |
| 485 | + ) -> List[MatchFieldSpec]: | |
| 486 | + """ | |
| 487 | + When the query mixes scripts, widen each clause to indexed fields for the other script | |
| 488 | + (e.g. zh clause also searches title.en when the query contains an English word token). | |
| 489 | + """ | |
| 490 | + norm = {str(x or "").strip().lower() for x in (index_languages or []) if str(x or "").strip()} | |
| 491 | + allow = norm or {"zh", "en"} | |
| 492 | + | |
| 493 | + def can_use(lcode: str) -> bool: | |
| 494 | + return lcode in allow if norm else True | |
| 495 | + | |
| 496 | + out = list(specs) | |
| 497 | + lnorm = (lang or "").strip().lower() | |
| 498 | + if contains_english and lnorm != "en" and can_use("en"): | |
| 499 | + out = self._merge_supplemental_lang_field_specs(out, "en") | |
| 500 | + if contains_chinese and lnorm != "zh" and can_use("zh"): | |
| 501 | + out = self._merge_supplemental_lang_field_specs(out, "zh") | |
| 502 | + return out | |
| 456 | 503 | |
| 457 | - return all_fields, core_fields | |
| 458 | - | |
| 459 | 504 | def _get_embedding_field(self, language: str) -> str: |
| 460 | 505 | """Get embedding field name for a language.""" |
| 461 | 506 | # Currently using unified embedding field |
| ... | ... | @@ -486,6 +531,8 @@ class ESQueryBuilder: |
| 486 | 531 | source_in_index_languages = True |
| 487 | 532 | index_languages: List[str] = [] |
| 488 | 533 | |
| 534 | + contains_chinese = False | |
| 535 | + contains_english = False | |
| 489 | 536 | if parsed_query: |
| 490 | 537 | query_text_by_lang = getattr(parsed_query, "query_text_by_lang", None) or {} |
| 491 | 538 | search_langs = getattr(parsed_query, "search_langs", None) or [] |
| ... | ... | @@ -495,6 +542,8 @@ class ESQueryBuilder: |
| 495 | 542 | getattr(parsed_query, "source_in_index_languages", True) |
| 496 | 543 | ) |
| 497 | 544 | index_languages = getattr(parsed_query, "index_languages", None) or [] |
| 545 | + contains_chinese = bool(getattr(parsed_query, "contains_chinese", False)) | |
| 546 | + contains_english = bool(getattr(parsed_query, "contains_english", False)) | |
| 498 | 547 | |
| 499 | 548 | if not query_text_by_lang: |
| 500 | 549 | query_text_by_lang = {source_lang: query_text} |
| ... | ... | @@ -508,7 +557,15 @@ class ESQueryBuilder: |
| 508 | 557 | lang_query = query_text_by_lang.get(lang) |
| 509 | 558 | if not lang_query: |
| 510 | 559 | continue |
| 511 | - match_fields, _ = self._get_match_fields(lang) | |
| 560 | + all_specs, _ = self._build_match_field_specs(lang) | |
| 561 | + expanded_specs = self._expand_match_field_specs_for_mixed_script( | |
| 562 | + lang, | |
| 563 | + all_specs, | |
| 564 | + contains_chinese, | |
| 565 | + contains_english, | |
| 566 | + index_languages, | |
| 567 | + ) | |
| 568 | + match_fields = self._format_match_field_specs(expanded_specs) | |
| 512 | 569 | if not match_fields: |
| 513 | 570 | continue |
| 514 | 571 | |
| ... | ... | @@ -559,7 +616,15 @@ class ESQueryBuilder: |
| 559 | 616 | continue |
| 560 | 617 | if lang in query_text_by_lang: |
| 561 | 618 | continue |
| 562 | - match_fields, _ = self._get_match_fields(lang) | |
| 619 | + fb_specs, _ = self._build_match_field_specs(lang) | |
| 620 | + expanded_fb = self._expand_match_field_specs_for_mixed_script( | |
| 621 | + lang, | |
| 622 | + fb_specs, | |
| 623 | + contains_chinese, | |
| 624 | + contains_english, | |
| 625 | + index_languages, | |
| 626 | + ) | |
| 627 | + match_fields = self._format_match_field_specs(expanded_fb) | |
| 563 | 628 | if not match_fields: |
| 564 | 629 | continue |
| 565 | 630 | should_clauses.append({ | ... | ... |
search/searcher.py
| ... | ... | @@ -4,12 +4,13 @@ Main Searcher module - executes search queries against Elasticsearch. |
| 4 | 4 | Handles query parsing, ranking, and result formatting. |
| 5 | 5 | """ |
| 6 | 6 | |
| 7 | -from typing import Dict, Any, List, Optional, Union | |
| 7 | +from typing import Dict, Any, List, Optional, Union, Tuple | |
| 8 | 8 | import os |
| 9 | 9 | import time, json |
| 10 | 10 | import logging |
| 11 | 11 | import hashlib |
| 12 | 12 | from string import Formatter |
| 13 | +import numpy as np | |
| 13 | 14 | |
| 14 | 15 | from utils.es_client import ESClient |
| 15 | 16 | from query import QueryParser, ParsedQuery |
| ... | ... | @@ -224,6 +225,265 @@ class Searcher: |
| 224 | 225 | hits_by_id[str(hid)] = hit |
| 225 | 226 | return hits_by_id, int(resp.get("took", 0) or 0) |
| 226 | 227 | |
| 228 | + @staticmethod | |
| 229 | + def _normalize_sku_match_text(value: Optional[str]) -> str: | |
| 230 | + """Normalize free text for lightweight SKU option matching.""" | |
| 231 | + if value is None: | |
| 232 | + return "" | |
| 233 | + return " ".join(str(value).strip().casefold().split()) | |
| 234 | + | |
| 235 | + @staticmethod | |
| 236 | + def _sku_option1_embedding_key( | |
| 237 | + sku: Dict[str, Any], | |
| 238 | + spu_option1_name: Optional[Any] = None, | |
| 239 | + ) -> Optional[str]: | |
| 240 | + """ | |
| 241 | + Text sent to the embedding service for option1 must be "name:value" | |
| 242 | + (option name from SKU row or SPU-level option1_name). | |
| 243 | + """ | |
| 244 | + value_raw = sku.get("option1_value") | |
| 245 | + if value_raw is None: | |
| 246 | + return None | |
| 247 | + value = str(value_raw).strip() | |
| 248 | + if not value: | |
| 249 | + return None | |
| 250 | + name = sku.get("option1_name") | |
| 251 | + if name is None or not str(name).strip(): | |
| 252 | + name = spu_option1_name | |
| 253 | + name_str = str(name).strip() if name is not None and str(name).strip() else "" | |
| 254 | + if name_str: | |
| 255 | + value = f"{name_str}:{value}" | |
| 256 | + return value.casefold() | |
| 257 | + | |
| 258 | + def _build_sku_query_texts(self, parsed_query: ParsedQuery) -> List[str]: | |
| 259 | + """Collect original and translated query texts for SKU option matching.""" | |
| 260 | + candidates: List[str] = [] | |
| 261 | + for text in ( | |
| 262 | + getattr(parsed_query, "original_query", None), | |
| 263 | + getattr(parsed_query, "query_normalized", None), | |
| 264 | + getattr(parsed_query, "rewritten_query", None), | |
| 265 | + ): | |
| 266 | + normalized = self._normalize_sku_match_text(text) | |
| 267 | + if normalized: | |
| 268 | + candidates.append(normalized) | |
| 269 | + | |
| 270 | + query_text_by_lang = getattr(parsed_query, "query_text_by_lang", {}) or {} | |
| 271 | + if isinstance(query_text_by_lang, dict): | |
| 272 | + for text in query_text_by_lang.values(): | |
| 273 | + normalized = self._normalize_sku_match_text(text) | |
| 274 | + if normalized: | |
| 275 | + candidates.append(normalized) | |
| 276 | + | |
| 277 | + translations = getattr(parsed_query, "translations", {}) or {} | |
| 278 | + if isinstance(translations, dict): | |
| 279 | + for text in translations.values(): | |
| 280 | + normalized = self._normalize_sku_match_text(text) | |
| 281 | + if normalized: | |
| 282 | + candidates.append(normalized) | |
| 283 | + | |
| 284 | + deduped: List[str] = [] | |
| 285 | + seen = set() | |
| 286 | + for text in candidates: | |
| 287 | + if text in seen: | |
| 288 | + continue | |
| 289 | + seen.add(text) | |
| 290 | + deduped.append(text) | |
| 291 | + return deduped | |
| 292 | + | |
| 293 | + def _find_query_matching_sku_index( | |
| 294 | + self, | |
| 295 | + skus: List[Dict[str, Any]], | |
| 296 | + query_texts: List[str], | |
| 297 | + spu_option1_name: Optional[Any] = None, | |
| 298 | + ) -> Optional[int]: | |
| 299 | + """Return the first SKU whose option1_value (or name:value) appears in query texts.""" | |
| 300 | + if not skus or not query_texts: | |
| 301 | + return None | |
| 302 | + | |
| 303 | + for index, sku in enumerate(skus): | |
| 304 | + option1_value = self._normalize_sku_match_text(sku.get("option1_value")) | |
| 305 | + if not option1_value: | |
| 306 | + continue | |
| 307 | + if any(option1_value in query_text for query_text in query_texts): | |
| 308 | + return index | |
| 309 | + embed_key = self._sku_option1_embedding_key(sku, spu_option1_name) | |
| 310 | + if embed_key and embed_key != option1_value: | |
| 311 | + composite_norm = self._normalize_sku_match_text(embed_key.replace(":", " ")) | |
| 312 | + if any(composite_norm in query_text for query_text in query_texts): | |
| 313 | + return index | |
| 314 | + if any(embed_key.casefold() in query_text for query_text in query_texts): | |
| 315 | + return index | |
| 316 | + return None | |
| 317 | + | |
| 318 | + def _encode_query_vector_for_sku_matching( | |
| 319 | + self, | |
| 320 | + parsed_query: ParsedQuery, | |
| 321 | + context: Optional[RequestContext] = None, | |
| 322 | + ) -> Optional[np.ndarray]: | |
| 323 | + """Best-effort fallback query embedding for final-page SKU matching.""" | |
| 324 | + query_text = ( | |
| 325 | + getattr(parsed_query, "rewritten_query", None) | |
| 326 | + or getattr(parsed_query, "query_normalized", None) | |
| 327 | + or getattr(parsed_query, "original_query", None) | |
| 328 | + ) | |
| 329 | + if not query_text: | |
| 330 | + return None | |
| 331 | + | |
| 332 | + text_encoder = getattr(self.query_parser, "text_encoder", None) | |
| 333 | + if text_encoder is None: | |
| 334 | + return None | |
| 335 | + | |
| 336 | + try: | |
| 337 | + vectors = text_encoder.encode([query_text], priority=1) | |
| 338 | + except Exception as exc: | |
| 339 | + logger.warning("Failed to encode query vector for SKU matching: %s", exc, exc_info=True) | |
| 340 | + if context is not None: | |
| 341 | + context.add_warning(f"SKU query embedding failed: {exc}") | |
| 342 | + return None | |
| 343 | + | |
| 344 | + if vectors is None or len(vectors) == 0: | |
| 345 | + return None | |
| 346 | + | |
| 347 | + vector = vectors[0] | |
| 348 | + if vector is None: | |
| 349 | + return None | |
| 350 | + return np.asarray(vector, dtype=np.float32) | |
| 351 | + | |
| 352 | + def _select_sku_by_embedding( | |
| 353 | + self, | |
| 354 | + skus: List[Dict[str, Any]], | |
| 355 | + option1_vectors: Dict[str, np.ndarray], | |
| 356 | + query_vector: np.ndarray, | |
| 357 | + spu_option1_name: Optional[Any] = None, | |
| 358 | + ) -> Tuple[Optional[int], Optional[float]]: | |
| 359 | + """Select the SKU whose option1 embedding key (name:value) is most similar to the query.""" | |
| 360 | + best_index: Optional[int] = None | |
| 361 | + best_score: Optional[float] = None | |
| 362 | + | |
| 363 | + for index, sku in enumerate(skus): | |
| 364 | + embed_key = self._sku_option1_embedding_key(sku, spu_option1_name) | |
| 365 | + if not embed_key: | |
| 366 | + continue | |
| 367 | + option_vector = option1_vectors.get(embed_key) | |
| 368 | + if option_vector is None: | |
| 369 | + continue | |
| 370 | + score = float(np.inner(query_vector, option_vector)) | |
| 371 | + if best_score is None or score > best_score: | |
| 372 | + best_index = index | |
| 373 | + best_score = score | |
| 374 | + | |
| 375 | + return best_index, best_score | |
| 376 | + | |
| 377 | + @staticmethod | |
| 378 | + def _promote_matching_sku(source: Dict[str, Any], match_index: int) -> Optional[Dict[str, Any]]: | |
| 379 | + """Move the matched SKU to the front and swap the SPU image.""" | |
| 380 | + skus = source.get("skus") | |
| 381 | + if not isinstance(skus, list) or match_index < 0 or match_index >= len(skus): | |
| 382 | + return None | |
| 383 | + | |
| 384 | + matched_sku = skus.pop(match_index) | |
| 385 | + skus.insert(0, matched_sku) | |
| 386 | + | |
| 387 | + image_src = matched_sku.get("image_src") or matched_sku.get("imageSrc") | |
| 388 | + if image_src: | |
| 389 | + source["image_url"] = image_src | |
| 390 | + return matched_sku | |
| 391 | + | |
| 392 | + def _apply_sku_sorting_for_page_hits( | |
| 393 | + self, | |
| 394 | + es_hits: List[Dict[str, Any]], | |
| 395 | + parsed_query: ParsedQuery, | |
| 396 | + context: Optional[RequestContext] = None, | |
| 397 | + ) -> None: | |
| 398 | + """Sort each page hit's SKUs so the best-matching SKU is first.""" | |
| 399 | + if not es_hits: | |
| 400 | + return | |
| 401 | + | |
| 402 | + query_texts = self._build_sku_query_texts(parsed_query) | |
| 403 | + unmatched_hits: List[Dict[str, Any]] = [] | |
| 404 | + option1_values_to_encode: List[str] = [] | |
| 405 | + seen_option1_values = set() | |
| 406 | + text_matched = 0 | |
| 407 | + embedding_matched = 0 | |
| 408 | + | |
| 409 | + for hit in es_hits: | |
| 410 | + source = hit.get("_source") | |
| 411 | + if not isinstance(source, dict): | |
| 412 | + continue | |
| 413 | + skus = source.get("skus") | |
| 414 | + if not isinstance(skus, list) or not skus: | |
| 415 | + continue | |
| 416 | + | |
| 417 | + spu_option1_name = source.get("option1_name") | |
| 418 | + match_index = self._find_query_matching_sku_index( | |
| 419 | + skus, query_texts, spu_option1_name=spu_option1_name | |
| 420 | + ) | |
| 421 | + if match_index is not None: | |
| 422 | + self._promote_matching_sku(source, match_index) | |
| 423 | + text_matched += 1 | |
| 424 | + continue | |
| 425 | + | |
| 426 | + unmatched_hits.append(hit) | |
| 427 | + for sku in skus: | |
| 428 | + embed_key = self._sku_option1_embedding_key(sku, spu_option1_name) | |
| 429 | + if not embed_key or embed_key in seen_option1_values: | |
| 430 | + continue | |
| 431 | + seen_option1_values.add(embed_key) | |
| 432 | + option1_values_to_encode.append(embed_key) | |
| 433 | + | |
| 434 | + if not unmatched_hits or not option1_values_to_encode: | |
| 435 | + return | |
| 436 | + | |
| 437 | + query_vector = getattr(parsed_query, "query_vector", None) | |
| 438 | + if query_vector is None: | |
| 439 | + query_vector = self._encode_query_vector_for_sku_matching(parsed_query, context=context) | |
| 440 | + if query_vector is None: | |
| 441 | + return | |
| 442 | + | |
| 443 | + text_encoder = getattr(self.query_parser, "text_encoder", None) | |
| 444 | + if text_encoder is None: | |
| 445 | + return | |
| 446 | + | |
| 447 | + try: | |
| 448 | + encoded_option_vectors = text_encoder.encode(option1_values_to_encode, priority=1) | |
| 449 | + except Exception as exc: | |
| 450 | + logger.warning("Failed to encode SKU option1 values for final-page sorting: %s", exc, exc_info=True) | |
| 451 | + if context is not None: | |
| 452 | + context.add_warning(f"SKU option embedding failed: {exc}") | |
| 453 | + return | |
| 454 | + | |
| 455 | + option1_vectors: Dict[str, np.ndarray] = {} | |
| 456 | + for option1_value, vector in zip(option1_values_to_encode, encoded_option_vectors): | |
| 457 | + if vector is None: | |
| 458 | + continue | |
| 459 | + option1_vectors[option1_value] = np.asarray(vector, dtype=np.float32) | |
| 460 | + | |
| 461 | + query_vector_array = np.asarray(query_vector, dtype=np.float32) | |
| 462 | + for hit in unmatched_hits: | |
| 463 | + source = hit.get("_source") | |
| 464 | + if not isinstance(source, dict): | |
| 465 | + continue | |
| 466 | + skus = source.get("skus") | |
| 467 | + if not isinstance(skus, list) or not skus: | |
| 468 | + continue | |
| 469 | + match_index, _ = self._select_sku_by_embedding( | |
| 470 | + skus, | |
| 471 | + option1_vectors, | |
| 472 | + query_vector_array, | |
| 473 | + spu_option1_name=source.get("option1_name"), | |
| 474 | + ) | |
| 475 | + if match_index is None: | |
| 476 | + continue | |
| 477 | + self._promote_matching_sku(source, match_index) | |
| 478 | + embedding_matched += 1 | |
| 479 | + | |
| 480 | + if text_matched or embedding_matched: | |
| 481 | + logger.info( | |
| 482 | + "Final-page SKU sorting completed | text_matched=%s | embedding_matched=%s", | |
| 483 | + text_matched, | |
| 484 | + embedding_matched, | |
| 485 | + ) | |
| 486 | + | |
| 227 | 487 | def search( |
| 228 | 488 | self, |
| 229 | 489 | query: str, |
| ... | ... | @@ -622,6 +882,8 @@ class Searcher: |
| 622 | 882 | continue |
| 623 | 883 | rerank_debug_by_doc[str(doc_id)] = item |
| 624 | 884 | |
| 885 | + self._apply_sku_sorting_for_page_hits(es_hits, parsed_query, context=context) | |
| 886 | + | |
| 625 | 887 | # Format results using ResultFormatter |
| 626 | 888 | formatted_results = ResultFormatter.format_search_results( |
| 627 | 889 | es_hits, |
| ... | ... | @@ -791,7 +1053,7 @@ class Searcher: |
| 791 | 1053 | # Generate image embedding |
| 792 | 1054 | if self.image_encoder is None: |
| 793 | 1055 | raise RuntimeError("Image encoder is not initialized at startup") |
| 794 | - image_vector = self.image_encoder.encode_image_from_url(image_url) | |
| 1056 | + image_vector = self.image_encoder.encode_image_from_url(image_url, priority=1) | |
| 795 | 1057 | |
| 796 | 1058 | if image_vector is None: |
| 797 | 1059 | raise ValueError(f"Failed to encode image: {image_url}") | ... | ... |
tests/ci/test_service_api_contracts.py
| ... | ... | @@ -540,7 +540,15 @@ def test_indexer_index_validation_max_delete_spu_ids(indexer_client: TestClient) |
| 540 | 540 | |
| 541 | 541 | |
| 542 | 542 | class _FakeTextModel: |
| 543 | - def encode_batch(self, texts, batch_size=32, device="cpu", normalize_embeddings=True): | |
| 543 | + """Matches TEI / server path: `_text_model.encode(...)` (not encode_batch).""" | |
| 544 | + | |
| 545 | + def encode( | |
| 546 | + self, | |
| 547 | + texts, | |
| 548 | + batch_size=32, | |
| 549 | + device="cpu", | |
| 550 | + normalize_embeddings=True, | |
| 551 | + ): | |
| 544 | 552 | return [np.array([0.1, 0.2, 0.3], dtype=np.float32) for _ in texts] |
| 545 | 553 | |
| 546 | 554 | |
| ... | ... | @@ -549,6 +557,18 @@ class _FakeImageModel: |
| 549 | 557 | return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls] |
| 550 | 558 | |
| 551 | 559 | |
| 560 | +class _EmbeddingCacheMiss: | |
| 561 | + """Avoid Redis/module cache hits so contract tests exercise the encode path.""" | |
| 562 | + | |
| 563 | + redis_client = None | |
| 564 | + | |
| 565 | + def get(self, key): | |
| 566 | + return None | |
| 567 | + | |
| 568 | + def set(self, key, value): | |
| 569 | + return True | |
| 570 | + | |
| 571 | + | |
| 552 | 572 | @pytest.fixture |
| 553 | 573 | def embedding_module(): |
| 554 | 574 | import embeddings.server as emb_server |
| ... | ... | @@ -556,17 +576,31 @@ def embedding_module(): |
| 556 | 576 | emb_server.app.router.on_startup.clear() |
| 557 | 577 | emb_server._text_model = _FakeTextModel() |
| 558 | 578 | emb_server._image_model = _FakeImageModel() |
| 579 | + emb_server._text_backend_name = "tei" | |
| 580 | + emb_server._text_cache = _EmbeddingCacheMiss() | |
| 581 | + emb_server._image_cache = _EmbeddingCacheMiss() | |
| 559 | 582 | yield emb_server |
| 560 | 583 | |
| 561 | 584 | |
| 562 | 585 | def test_embedding_text_contract(embedding_module): |
| 563 | - data = embedding_module.embed_text(["hello", "world"]) | |
| 586 | + """Contract via HTTP like production; route handlers require Request/Response.""" | |
| 587 | + from fastapi.testclient import TestClient | |
| 588 | + | |
| 589 | + with TestClient(embedding_module.app) as client: | |
| 590 | + resp = client.post("/embed/text", json=["hello", "world"]) | |
| 591 | + assert resp.status_code == 200 | |
| 592 | + data = resp.json() | |
| 564 | 593 | assert len(data) == 2 |
| 565 | 594 | assert len(data[0]) == 3 |
| 566 | 595 | |
| 567 | 596 | |
| 568 | 597 | def test_embedding_image_contract(embedding_module): |
| 569 | - data = embedding_module.embed_image(["https://example.com/a.jpg"]) | |
| 598 | + from fastapi.testclient import TestClient | |
| 599 | + | |
| 600 | + with TestClient(embedding_module.app) as client: | |
| 601 | + resp = client.post("/embed/image", json=["https://example.com/a.jpg"]) | |
| 602 | + assert resp.status_code == 200 | |
| 603 | + data = resp.json() | |
| 570 | 604 | assert len(data[0]) == 3 |
| 571 | 605 | |
| 572 | 606 | ... | ... |
tests/test_embedding_pipeline.py
| ... | ... | @@ -63,7 +63,11 @@ class _FakeTranslator: |
| 63 | 63 | |
| 64 | 64 | |
| 65 | 65 | class _FakeQueryEncoder: |
| 66 | + def __init__(self): | |
| 67 | + self.calls = [] | |
| 68 | + | |
| 66 | 69 | def encode(self, sentences, **kwargs): |
| 70 | + self.calls.append({"sentences": sentences, "kwargs": dict(kwargs)}) | |
| 67 | 71 | if isinstance(sentences, str): |
| 68 | 72 | sentences = [sentences] |
| 69 | 73 | return np.array([np.array([0.11, 0.22, 0.33], dtype=np.float32) for _ in sentences], dtype=object) |
| ... | ... | @@ -98,9 +102,7 @@ def _build_test_config() -> SearchConfig: |
| 98 | 102 | rerank=RerankConfig(), |
| 99 | 103 | spu_config=SPUConfig(enabled=True, spu_field="spu_id", inner_hits_size=3), |
| 100 | 104 | es_index_name="test_products", |
| 101 | - tenant_config={}, | |
| 102 | 105 | es_settings={}, |
| 103 | - services={}, | |
| 104 | 106 | ) |
| 105 | 107 | |
| 106 | 108 | |
| ... | ... | @@ -111,6 +113,7 @@ def test_text_embedding_encoder_response_alignment(monkeypatch): |
| 111 | 113 | def _fake_post(url, json, timeout, **kwargs): |
| 112 | 114 | assert url.endswith("/embed/text") |
| 113 | 115 | assert json == ["hello", "world"] |
| 116 | + assert kwargs["params"]["priority"] == 0 | |
| 114 | 117 | return _FakeResponse([[0.1, 0.2], [0.3, 0.4]]) |
| 115 | 118 | |
| 116 | 119 | monkeypatch.setattr("embeddings.text_encoder.requests.post", _fake_post) |
| ... | ... | @@ -172,6 +175,7 @@ def test_image_embedding_encoder_cache_hit(monkeypatch): |
| 172 | 175 | |
| 173 | 176 | def _fake_post(url, params, json, timeout, **kwargs): |
| 174 | 177 | calls["count"] += 1 |
| 178 | + assert params["priority"] == 0 | |
| 175 | 179 | return _FakeResponse([[0.1, 0.2]]) |
| 176 | 180 | |
| 177 | 181 | monkeypatch.setattr("embeddings.image_encoder.requests.post", _fake_post) |
| ... | ... | @@ -184,16 +188,35 @@ def test_image_embedding_encoder_cache_hit(monkeypatch): |
| 184 | 188 | assert np.allclose(out[1], np.array([0.1, 0.2], dtype=np.float32)) |
| 185 | 189 | |
| 186 | 190 | |
| 191 | +def test_image_embedding_encoder_passes_priority(monkeypatch): | |
| 192 | + fake_cache = _FakeEmbeddingCache() | |
| 193 | + monkeypatch.setattr("embeddings.image_encoder.RedisEmbeddingCache", lambda **kwargs: fake_cache) | |
| 194 | + | |
| 195 | + def _fake_post(url, params, json, timeout, **kwargs): | |
| 196 | + assert params["priority"] == 1 | |
| 197 | + return _FakeResponse([[0.1, 0.2]]) | |
| 198 | + | |
| 199 | + monkeypatch.setattr("embeddings.image_encoder.requests.post", _fake_post) | |
| 200 | + | |
| 201 | + encoder = CLIPImageEncoder(service_url="http://127.0.0.1:6008") | |
| 202 | + out = encoder.encode_batch(["https://example.com/a.jpg"], priority=1) | |
| 203 | + assert len(out) == 1 | |
| 204 | + assert np.allclose(out[0], np.array([0.1, 0.2], dtype=np.float32)) | |
| 205 | + | |
| 206 | + | |
| 187 | 207 | def test_query_parser_generates_query_vector_with_encoder(): |
| 208 | + encoder = _FakeQueryEncoder() | |
| 188 | 209 | parser = QueryParser( |
| 189 | 210 | config=_build_test_config(), |
| 190 | - text_encoder=_FakeQueryEncoder(), | |
| 211 | + text_encoder=encoder, | |
| 191 | 212 | translator=_FakeTranslator(), |
| 192 | 213 | ) |
| 193 | 214 | |
| 194 | 215 | parsed = parser.parse("red dress", tenant_id="162", generate_vector=True) |
| 195 | 216 | assert parsed.query_vector is not None |
| 196 | 217 | assert parsed.query_vector.shape == (3,) |
| 218 | + assert encoder.calls | |
| 219 | + assert encoder.calls[0]["kwargs"]["priority"] == 1 | |
| 197 | 220 | |
| 198 | 221 | |
| 199 | 222 | def test_query_parser_skips_query_vector_when_disabled(): | ... | ... |
tests/test_embedding_service_limits.py
| ... | ... | @@ -69,6 +69,8 @@ def test_health_exposes_limit_stats(monkeypatch): |
| 69 | 69 | |
| 70 | 70 | |
| 71 | 71 | def test_embed_image_rejects_when_image_lane_is_full(monkeypatch): |
| 72 | + # Ensure no cache hit (module-level Redis cache may contain this URL from other tests). | |
| 73 | + monkeypatch.setattr(embedding_server, "_image_cache", _FakeCache({})) | |
| 72 | 74 | limiter = embedding_server._InflightLimiter("image", 1) |
| 73 | 75 | acquired, _ = limiter.try_acquire() |
| 74 | 76 | assert acquired is True | ... | ... |
| ... | ... | @@ -0,0 +1,81 @@ |
| 1 | +import threading | |
| 2 | + | |
| 3 | +import embeddings.server as emb_server | |
| 4 | + | |
| 5 | + | |
| 6 | +def test_text_inflight_limiter_priority_bypass(): | |
| 7 | + limiter = emb_server._InflightLimiter(name="text", limit=1) | |
| 8 | + | |
| 9 | + accepted, active = limiter.try_acquire() | |
| 10 | + assert accepted is True | |
| 11 | + assert active == 1 | |
| 12 | + | |
| 13 | + accepted, active = limiter.try_acquire() | |
| 14 | + assert accepted is False | |
| 15 | + assert active == 1 | |
| 16 | + | |
| 17 | + accepted, active = limiter.try_acquire(bypass_limit=True) | |
| 18 | + assert accepted is True | |
| 19 | + assert active == 2 | |
| 20 | + | |
| 21 | + snapshot = limiter.snapshot() | |
| 22 | + assert snapshot["priority_bypass_total"] == 1 | |
| 23 | + | |
| 24 | + limiter.release(success=True) | |
| 25 | + limiter.release(success=True) | |
| 26 | + | |
| 27 | + | |
| 28 | +def test_text_dispatch_prefers_high_priority_queue(): | |
| 29 | + high_task = emb_server._TextDispatchTask( | |
| 30 | + normalized=["online"], | |
| 31 | + effective_normalize=True, | |
| 32 | + request_id="high", | |
| 33 | + priority=1, | |
| 34 | + created_at=0.0, | |
| 35 | + done=threading.Event(), | |
| 36 | + ) | |
| 37 | + normal_task = emb_server._TextDispatchTask( | |
| 38 | + normalized=["offline"], | |
| 39 | + effective_normalize=True, | |
| 40 | + request_id="normal", | |
| 41 | + priority=0, | |
| 42 | + created_at=0.0, | |
| 43 | + done=threading.Event(), | |
| 44 | + ) | |
| 45 | + | |
| 46 | + with emb_server._text_dispatch_cv: | |
| 47 | + emb_server._text_dispatch_high_queue.clear() | |
| 48 | + emb_server._text_dispatch_normal_queue.clear() | |
| 49 | + emb_server._text_dispatch_normal_queue.append(normal_task) | |
| 50 | + emb_server._text_dispatch_high_queue.append(high_task) | |
| 51 | + | |
| 52 | + first = emb_server._pop_text_dispatch_task_locked() | |
| 53 | + second = emb_server._pop_text_dispatch_task_locked() | |
| 54 | + | |
| 55 | + emb_server._text_dispatch_high_queue.clear() | |
| 56 | + emb_server._text_dispatch_normal_queue.clear() | |
| 57 | + | |
| 58 | + assert first is high_task | |
| 59 | + assert second is normal_task | |
| 60 | + | |
| 61 | + | |
| 62 | +def test_image_inflight_limiter_priority_bypass(): | |
| 63 | + limiter = emb_server._InflightLimiter(name="image", limit=1) | |
| 64 | + | |
| 65 | + accepted, active = limiter.try_acquire() | |
| 66 | + assert accepted is True | |
| 67 | + assert active == 1 | |
| 68 | + | |
| 69 | + accepted, active = limiter.try_acquire() | |
| 70 | + assert accepted is False | |
| 71 | + assert active == 1 | |
| 72 | + | |
| 73 | + accepted, active = limiter.try_acquire(bypass_limit=True) | |
| 74 | + assert accepted is True | |
| 75 | + assert active == 2 | |
| 76 | + | |
| 77 | + snapshot = limiter.snapshot() | |
| 78 | + assert snapshot["priority_bypass_total"] == 1 | |
| 79 | + | |
| 80 | + limiter.release(success=True) | |
| 81 | + limiter.release(success=True) | ... | ... |
tests/test_es_query_builder.py
| ... | ... | @@ -80,3 +80,102 @@ def test_text_query_contains_only_base_translation_and_fallback_named_queries(): |
| 80 | 80 | names = [clause["multi_match"]["_name"] for clause in should] |
| 81 | 81 | |
| 82 | 82 | assert names == ["base_query", "base_query_trans_zh", "fallback_original_query_fr"] |
| 83 | + | |
| 84 | + | |
| 85 | +def test_mixed_script_merges_en_fields_into_zh_clause(): | |
| 86 | + qb = ESQueryBuilder( | |
| 87 | + match_fields=["title.en^3.0"], | |
| 88 | + multilingual_fields=["title", "brief"], | |
| 89 | + shared_fields=[], | |
| 90 | + text_embedding_field="title_embedding", | |
| 91 | + default_language="en", | |
| 92 | + ) | |
| 93 | + parsed_query = SimpleNamespace( | |
| 94 | + query_text_by_lang={"zh": "法式 dress"}, | |
| 95 | + search_langs=["zh"], | |
| 96 | + detected_language="zh", | |
| 97 | + source_in_index_languages=True, | |
| 98 | + index_languages=["zh", "en"], | |
| 99 | + contains_chinese=True, | |
| 100 | + contains_english=True, | |
| 101 | + ) | |
| 102 | + q = qb.build_query(query_text="法式 dress", parsed_query=parsed_query, enable_knn=False) | |
| 103 | + fields = q["query"]["multi_match"]["fields"] | |
| 104 | + bases = {f.split("^", 1)[0] for f in fields} | |
| 105 | + assert "title.zh" in bases and "title.en" in bases | |
| 106 | + assert "brief.zh" in bases and "brief.en" in bases | |
| 107 | + # Merged supplemental language fields use boost * 0.8 (implicit 1.0 -> ^0.8) | |
| 108 | + assert "title.en^0.8" in fields | |
| 109 | + assert "brief.en^0.8" in fields | |
| 110 | + | |
| 111 | + | |
| 112 | +def test_mixed_script_merges_zh_fields_into_en_clause(): | |
| 113 | + qb = ESQueryBuilder( | |
| 114 | + match_fields=["title.en^3.0"], | |
| 115 | + multilingual_fields=["title"], | |
| 116 | + shared_fields=[], | |
| 117 | + text_embedding_field="title_embedding", | |
| 118 | + default_language="en", | |
| 119 | + ) | |
| 120 | + parsed_query = SimpleNamespace( | |
| 121 | + query_text_by_lang={"en": "red 连衣裙"}, | |
| 122 | + search_langs=["en"], | |
| 123 | + detected_language="en", | |
| 124 | + source_in_index_languages=True, | |
| 125 | + index_languages=["zh", "en"], | |
| 126 | + contains_chinese=True, | |
| 127 | + contains_english=True, | |
| 128 | + ) | |
| 129 | + q = qb.build_query(query_text="red 连衣裙", parsed_query=parsed_query, enable_knn=False) | |
| 130 | + fields = q["query"]["multi_match"]["fields"] | |
| 131 | + bases = {f.split("^", 1)[0] for f in fields} | |
| 132 | + assert "title.en" in bases and "title.zh" in bases | |
| 133 | + assert "title.zh^0.8" in fields | |
| 134 | + | |
| 135 | + | |
| 136 | +def test_mixed_script_merged_fields_scale_configured_boosts(): | |
| 137 | + qb = ESQueryBuilder( | |
| 138 | + match_fields=["title.en^3.0"], | |
| 139 | + multilingual_fields=["title"], | |
| 140 | + shared_fields=[], | |
| 141 | + field_boosts={"title.zh": 5.0, "title.en": 10.0}, | |
| 142 | + text_embedding_field="title_embedding", | |
| 143 | + default_language="en", | |
| 144 | + ) | |
| 145 | + parsed_query = SimpleNamespace( | |
| 146 | + query_text_by_lang={"zh": "法式 dress"}, | |
| 147 | + search_langs=["zh"], | |
| 148 | + detected_language="zh", | |
| 149 | + source_in_index_languages=True, | |
| 150 | + index_languages=["zh", "en"], | |
| 151 | + contains_chinese=True, | |
| 152 | + contains_english=True, | |
| 153 | + ) | |
| 154 | + q = qb.build_query(query_text="法式 dress", parsed_query=parsed_query, enable_knn=False) | |
| 155 | + fields = q["query"]["multi_match"]["fields"] | |
| 156 | + assert "title.zh^5.0" in fields | |
| 157 | + assert "title.en^8.0" in fields # 10.0 * 0.8 | |
| 158 | + | |
| 159 | + | |
| 160 | +def test_mixed_script_does_not_merge_en_when_not_in_index_languages(): | |
| 161 | + qb = ESQueryBuilder( | |
| 162 | + match_fields=["title.zh^3.0"], | |
| 163 | + multilingual_fields=["title"], | |
| 164 | + shared_fields=[], | |
| 165 | + text_embedding_field="title_embedding", | |
| 166 | + default_language="zh", | |
| 167 | + ) | |
| 168 | + parsed_query = SimpleNamespace( | |
| 169 | + query_text_by_lang={"zh": "法式 dress"}, | |
| 170 | + search_langs=["zh"], | |
| 171 | + detected_language="zh", | |
| 172 | + source_in_index_languages=True, | |
| 173 | + index_languages=["zh"], | |
| 174 | + contains_chinese=True, | |
| 175 | + contains_english=True, | |
| 176 | + ) | |
| 177 | + q = qb.build_query(query_text="法式 dress", parsed_query=parsed_query, enable_knn=False) | |
| 178 | + fields = q["query"]["multi_match"]["fields"] | |
| 179 | + bases = {f.split("^", 1)[0] for f in fields} | |
| 180 | + assert "title.zh" in bases | |
| 181 | + assert "title.en" not in bases | ... | ... |
tests/test_query_parser_mixed_language.py
| ... | ... | @@ -9,6 +9,14 @@ class _DummyTranslator: |
| 9 | 9 | return f"{text}-{target_lang}" |
| 10 | 10 | |
| 11 | 11 | |
| 12 | +def test_pure_english_word_token_length_and_script(): | |
| 13 | + assert QueryParser._is_pure_english_word_token("ab") is False | |
| 14 | + assert QueryParser._is_pure_english_word_token("abc") is True | |
| 15 | + assert QueryParser._is_pure_english_word_token("wi-fi") is True | |
| 16 | + assert QueryParser._is_pure_english_word_token("连衣裙") is False | |
| 17 | + assert QueryParser._is_pure_english_word_token("ab12") is False | |
| 18 | + | |
| 19 | + | |
| 12 | 20 | def _build_config() -> SearchConfig: |
| 13 | 21 | return SearchConfig( |
| 14 | 22 | es_index_name="test_products", |
| ... | ... | @@ -38,8 +46,11 @@ def test_parse_adds_en_fields_for_mixed_chinese_query_with_meaningful_english(mo |
| 38 | 46 | result = parser.parse("法式 dress 连衣裙", tenant_id="162", generate_vector=False) |
| 39 | 47 | |
| 40 | 48 | assert result.detected_language == "zh" |
| 49 | + assert result.contains_chinese is True | |
| 50 | + assert result.contains_english is True | |
| 41 | 51 | assert "en" in result.search_langs |
| 42 | - assert result.query_text_by_lang["en"] == "法式 dress 连衣裙" | |
| 52 | + # 翻译在预算内完成时会写入目标语言字段(优于仅用原文做 supplemental 探测) | |
| 53 | + assert result.query_text_by_lang["en"] == "法式 dress 连衣裙-en" | |
| 43 | 54 | assert result.query_text_by_lang["zh"] == "法式 dress 连衣裙" |
| 44 | 55 | |
| 45 | 56 | |
| ... | ... | @@ -55,6 +66,28 @@ def test_parse_adds_zh_fields_for_english_query_when_cjk_present(monkeypatch): |
| 55 | 66 | result = parser.parse("red 连衣裙", tenant_id="0", generate_vector=False) |
| 56 | 67 | |
| 57 | 68 | assert result.detected_language == "en" |
| 69 | + assert result.contains_chinese is True | |
| 70 | + assert result.contains_english is True | |
| 58 | 71 | assert "zh" in result.search_langs |
| 59 | - assert result.query_text_by_lang["zh"] == "red 连衣裙" | |
| 72 | + assert result.query_text_by_lang["zh"] == "red 连衣裙-zh" | |
| 60 | 73 | assert result.query_text_by_lang["en"] == "red 连衣裙" |
| 74 | + | |
| 75 | + | |
| 76 | +def test_parse_waits_for_translation_when_source_in_index_languages(monkeypatch): | |
| 77 | + """en 在 index_languages 内时仍应等待并采纳 en->zh 翻译结果(与向量共用预算)。""" | |
| 78 | + parser = QueryParser(_build_config(), translator=_DummyTranslator()) | |
| 79 | + monkeypatch.setattr(parser.language_detector, "detect", lambda text: "en") | |
| 80 | + monkeypatch.setattr( | |
| 81 | + "query.query_parser.get_tenant_config_loader", | |
| 82 | + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}), | |
| 83 | + raising=False, | |
| 84 | + ) | |
| 85 | + | |
| 86 | + result = parser.parse("off shoulder top", tenant_id="0", generate_vector=False) | |
| 87 | + | |
| 88 | + assert result.detected_language == "en" | |
| 89 | + assert result.contains_chinese is False | |
| 90 | + assert result.contains_english is True | |
| 91 | + assert result.translations.get("zh") == "off shoulder top-zh" | |
| 92 | + assert result.query_text_by_lang.get("zh") == "off shoulder top-zh" | |
| 93 | + assert result.source_in_index_languages is True | ... | ... |
tests/test_search_rerank_window.py
| ... | ... | @@ -5,6 +5,7 @@ from pathlib import Path |
| 5 | 5 | from types import SimpleNamespace |
| 6 | 6 | from typing import Any, Dict, List |
| 7 | 7 | |
| 8 | +import numpy as np | |
| 8 | 9 | import yaml |
| 9 | 10 | |
| 10 | 11 | from config import ( |
| ... | ... | @@ -157,9 +158,7 @@ def _build_search_config(*, rerank_enabled: bool = True, rerank_window: int = 38 |
| 157 | 158 | rerank=RerankConfig(enabled=rerank_enabled, rerank_window=rerank_window), |
| 158 | 159 | spu_config=SPUConfig(enabled=False), |
| 159 | 160 | es_index_name="test_products", |
| 160 | - tenant_config={}, | |
| 161 | 161 | es_settings={}, |
| 162 | - services={}, | |
| 163 | 162 | ) |
| 164 | 163 | |
| 165 | 164 | |
| ... | ... | @@ -173,6 +172,19 @@ def _build_searcher(config: SearchConfig, es_client: _FakeESClient) -> Searcher: |
| 173 | 172 | return searcher |
| 174 | 173 | |
| 175 | 174 | |
| 175 | +class _FakeTextEncoder: | |
| 176 | + def __init__(self, vectors: Dict[str, List[float]]): | |
| 177 | + self.vectors = { | |
| 178 | + key: np.array(value, dtype=np.float32) | |
| 179 | + for key, value in vectors.items() | |
| 180 | + } | |
| 181 | + | |
| 182 | + def encode(self, sentences, priority: int = 0, **kwargs): | |
| 183 | + if isinstance(sentences, str): | |
| 184 | + sentences = [sentences] | |
| 185 | + return np.array([self.vectors[text] for text in sentences], dtype=object) | |
| 186 | + | |
| 187 | + | |
| 176 | 188 | def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path): |
| 177 | 189 | config_data = { |
| 178 | 190 | "es_index_name": "test_products", |
| ... | ... | @@ -327,3 +339,118 @@ def test_searcher_skips_rerank_when_page_exceeds_window(monkeypatch): |
| 327 | 339 | assert es_client.calls[0]["size"] == 10 |
| 328 | 340 | assert es_client.calls[0]["include_named_queries_score"] is False |
| 329 | 341 | assert len(es_client.calls) == 1 |
| 342 | + | |
| 343 | + | |
| 344 | +def test_searcher_promotes_sku_when_option1_matches_translated_query(monkeypatch): | |
| 345 | + es_client = _FakeESClient(total_hits=1) | |
| 346 | + searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client) | |
| 347 | + context = create_request_context(reqid="sku-text", uid="u-sku-text") | |
| 348 | + | |
| 349 | + monkeypatch.setattr( | |
| 350 | + "search.searcher.get_tenant_config_loader", | |
| 351 | + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}), | |
| 352 | + ) | |
| 353 | + | |
| 354 | + class _TranslatedQueryParser: | |
| 355 | + text_encoder = None | |
| 356 | + | |
| 357 | + def parse(self, query: str, tenant_id: str, generate_vector: bool, context: Any): | |
| 358 | + return _FakeParsedQuery( | |
| 359 | + original_query=query, | |
| 360 | + query_normalized=query, | |
| 361 | + rewritten_query=query, | |
| 362 | + translations={"en": "black dress"}, | |
| 363 | + ) | |
| 364 | + | |
| 365 | + searcher.query_parser = _TranslatedQueryParser() | |
| 366 | + | |
| 367 | + def _full_source_with_skus(doc_id: str) -> Dict[str, Any]: | |
| 368 | + return { | |
| 369 | + "spu_id": doc_id, | |
| 370 | + "title": {"en": f"product-{doc_id}"}, | |
| 371 | + "brief": {"en": f"brief-{doc_id}"}, | |
| 372 | + "vendor": {"en": f"vendor-{doc_id}"}, | |
| 373 | + "option1_name": "Color", | |
| 374 | + "image_url": "https://img/default.jpg", | |
| 375 | + "skus": [ | |
| 376 | + {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"}, | |
| 377 | + {"sku_id": "sku-black", "option1_value": "Black", "image_src": "https://img/black.jpg"}, | |
| 378 | + ], | |
| 379 | + } | |
| 380 | + | |
| 381 | + monkeypatch.setattr(_FakeESClient, "_full_source", staticmethod(_full_source_with_skus)) | |
| 382 | + | |
| 383 | + result = searcher.search( | |
| 384 | + query="黑色 连衣裙", | |
| 385 | + tenant_id="162", | |
| 386 | + from_=0, | |
| 387 | + size=1, | |
| 388 | + context=context, | |
| 389 | + enable_rerank=False, | |
| 390 | + ) | |
| 391 | + | |
| 392 | + assert len(result.results) == 1 | |
| 393 | + assert result.results[0].skus[0].sku_id == "sku-black" | |
| 394 | + assert result.results[0].image_url == "https://img/black.jpg" | |
| 395 | + | |
| 396 | + | |
| 397 | +def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_match(monkeypatch): | |
| 398 | + es_client = _FakeESClient(total_hits=1) | |
| 399 | + searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client) | |
| 400 | + context = create_request_context(reqid="sku-embed", uid="u-sku-embed") | |
| 401 | + | |
| 402 | + monkeypatch.setattr( | |
| 403 | + "search.searcher.get_tenant_config_loader", | |
| 404 | + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}), | |
| 405 | + ) | |
| 406 | + | |
| 407 | + encoder = _FakeTextEncoder( | |
| 408 | + { | |
| 409 | + "linen summer dress": [0.8, 0.2], | |
| 410 | + "color:Red": [1.0, 0.0], | |
| 411 | + "color:Blue": [0.0, 1.0], | |
| 412 | + } | |
| 413 | + ) | |
| 414 | + | |
| 415 | + class _EmbeddingQueryParser: | |
| 416 | + text_encoder = encoder | |
| 417 | + | |
| 418 | + def parse(self, query: str, tenant_id: str, generate_vector: bool, context: Any): | |
| 419 | + return _FakeParsedQuery( | |
| 420 | + original_query=query, | |
| 421 | + query_normalized=query, | |
| 422 | + rewritten_query=query, | |
| 423 | + translations={}, | |
| 424 | + query_vector=np.array([0.0, 1.0], dtype=np.float32), | |
| 425 | + ) | |
| 426 | + | |
| 427 | + searcher.query_parser = _EmbeddingQueryParser() | |
| 428 | + | |
| 429 | + def _full_source_with_skus(doc_id: str) -> Dict[str, Any]: | |
| 430 | + return { | |
| 431 | + "spu_id": doc_id, | |
| 432 | + "title": {"en": f"product-{doc_id}"}, | |
| 433 | + "brief": {"en": f"brief-{doc_id}"}, | |
| 434 | + "vendor": {"en": f"vendor-{doc_id}"}, | |
| 435 | + "option1_name": "Color", | |
| 436 | + "image_url": "https://img/default.jpg", | |
| 437 | + "skus": [ | |
| 438 | + {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"}, | |
| 439 | + {"sku_id": "sku-blue", "option1_value": "Blue", "image_src": "https://img/blue.jpg"}, | |
| 440 | + ], | |
| 441 | + } | |
| 442 | + | |
| 443 | + monkeypatch.setattr(_FakeESClient, "_full_source", staticmethod(_full_source_with_skus)) | |
| 444 | + | |
| 445 | + result = searcher.search( | |
| 446 | + query="linen summer dress", | |
| 447 | + tenant_id="162", | |
| 448 | + from_=0, | |
| 449 | + size=1, | |
| 450 | + context=context, | |
| 451 | + enable_rerank=False, | |
| 452 | + ) | |
| 453 | + | |
| 454 | + assert len(result.results) == 1 | |
| 455 | + assert result.results[0].skus[0].sku_id == "sku-blue" | |
| 456 | + assert result.results[0].image_url == "https://img/blue.jpg" | ... | ... |