Commit ccbdf87013773107a41f81410c45cbc9f52bfe77
1 parent
639bee0a
enriched_attributes.value字段参与搜索
Showing
6 changed files
with
422 additions
and
51 deletions
Show diff stats
config/config.yaml
| @@ -110,18 +110,19 @@ es_settings: | @@ -110,18 +110,19 @@ es_settings: | ||
| 110 | # 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。 | 110 | # 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。 |
| 111 | field_boosts: | 111 | field_boosts: |
| 112 | title: 3.0 | 112 | title: 3.0 |
| 113 | - qanchors: 2.3 | ||
| 114 | - enriched_tags: 2.3 | ||
| 115 | - keywords: 2.0 | ||
| 116 | - tags: 2.0 | 113 | + qanchors: 2.5 |
| 114 | + enriched_tags: 2.5 | ||
| 115 | + enriched_attributes.value: 2.3 | ||
| 117 | category_name_text: 2.0 | 116 | category_name_text: 2.0 |
| 118 | category_path: 2.0 | 117 | category_path: 2.0 |
| 119 | - brief: 1.5 | ||
| 120 | - description: 1.5 | ||
| 121 | - vendor: 1.5 | ||
| 122 | - option1_values: 1.5 | ||
| 123 | - option2_values: 1.5 | ||
| 124 | - option3_values: 1.5 | 118 | + keywords: 2.0 |
| 119 | + tags: 2.0 | ||
| 120 | + option1_values: 1.7 | ||
| 121 | + option2_values: 1.7 | ||
| 122 | + option3_values: 1.7 | ||
| 123 | + brief: 1.0 | ||
| 124 | + description: 1.0 | ||
| 125 | + vendor: 1.0 | ||
| 125 | 126 | ||
| 126 | # Query Configuration(查询配置) | 127 | # Query Configuration(查询配置) |
| 127 | query_config: | 128 | query_config: |
| @@ -188,17 +189,18 @@ query_config: | @@ -188,17 +189,18 @@ query_config: | ||
| 188 | search_fields: | 189 | search_fields: |
| 189 | multilingual_fields: | 190 | multilingual_fields: |
| 190 | - title | 191 | - title |
| 191 | - - qanchors | ||
| 192 | - keywords | 192 | - keywords |
| 193 | + - qanchors | ||
| 193 | - enriched_tags | 194 | - enriched_tags |
| 195 | + - enriched_attributes.value | ||
| 194 | - option1_values | 196 | - option1_values |
| 195 | - option2_values | 197 | - option2_values |
| 196 | - option3_values | 198 | - option3_values |
| 197 | - category_path | 199 | - category_path |
| 198 | - category_name_text | 200 | - category_name_text |
| 199 | - - brief | ||
| 200 | - - description | ||
| 201 | - - vendor | 201 | + # - brief |
| 202 | + # - description | ||
| 203 | + # - vendor | ||
| 202 | # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values | 204 | # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values |
| 203 | shared_fields: null | 205 | shared_fields: null |
| 204 | core_multilingual_fields: | 206 | core_multilingual_fields: |
| @@ -244,7 +246,11 @@ query_config: | @@ -244,7 +246,11 @@ query_config: | ||
| 244 | - category1_name | 246 | - category1_name |
| 245 | - category2_name | 247 | - category2_name |
| 246 | - category3_name | 248 | - category3_name |
| 247 | - - tags | 249 | + # - tags |
| 250 | + # - keywords | ||
| 251 | + # - qanchors | ||
| 252 | + # - enriched_tags | ||
| 253 | + # - enriched_attributes | ||
| 248 | - min_price | 254 | - min_price |
| 249 | - compare_at_price | 255 | - compare_at_price |
| 250 | - image_url | 256 | - image_url |
| @@ -0,0 +1,181 @@ | @@ -0,0 +1,181 @@ | ||
| 1 | + | ||
| 2 | +特征工程: | ||
| 3 | +对因子做非线性处理,作为重排阶段的乘法融合 / 特征融合,和LR、FM、浅层 MLP的输入。 | ||
| 4 | + | ||
| 5 | +分成两张表: | ||
| 6 | + | ||
| 7 | +1. **常见非线性映射总表** | ||
| 8 | +2. **按“你想达到什么效果”来选方法** | ||
| 9 | + | ||
| 10 | +--- | ||
| 11 | + | ||
| 12 | +# 一、常见非线性映射对比表 | ||
| 13 | + | ||
| 14 | +> 记号说明: | ||
| 15 | +> | ||
| 16 | +> * (x):原始因子,默认 (x\ge 0) | ||
| 17 | +> * (y=f(x)):映射后特征 | ||
| 18 | +> * “头部/高值/尾部”分别指:高质量区、较大数值区、低影响区 | ||
| 19 | +> * “弹性”可理解为乘法融合里更接近“相对重要性”的量 | ||
| 20 | + | ||
| 21 | +| 方法 | 公式 | 单调性 | 斜率/敏感性特征 | 弹性/相对敏感性特征 | 主要效果 | 适合场景 | 风险/注意点 | | ||
| 22 | +| ----------------------- | ---------------------------------- | ----: | -------------------- | ------------------------- | ---------------------- | ------------------ | ------------------ | | ||
| 23 | +| 加偏置 | (y=x+b) | 单调增 | (\frac{dy}{dx}=1) 不变 | (\frac{x}{x+b}),随(x)增大而增大 | **主要削弱低值区相对重要性** | 防止小值/0值过度伤害乘法分数 | 对高值区抑制很弱 | | ||
| 24 | +| 对数 | (y=\log(1+x)) | 单调增 | 高值区越来越平 | 高值相对敏感性下降明显 | **强压缩大值/长尾** | 计数、曝光、频次、热度 | 小值区区分度有时不够 | | ||
| 25 | +| 幂次(0~1) | (y=x^\gamma,\ 0<\gamma<1) | 单调增 | 高值区变平 | 弹性恒为(\gamma<1) | **整体降权,压缩高值** | 重尾分布、避免大值碾压 | 需要处理0附近/尺度问题 | | ||
| 26 | +| 幂次(>1) | (y=x^\gamma,\ \gamma>1) | 单调增 | 高值区更陡 | 弹性恒为(\gamma>1) | **整体放大,强调高值** | 想突出强信号头部 | 容易放大异常值、过拟合 | | ||
| 27 | +| 负幂 / 倒数幂 | (y=(x+\epsilon)^\gamma,\ \gamma<0) | 单调减 | 小(x)处敏感 | 适合“坏度/距离/排名”类反向量 | **把大值变小、小值变大** | 距离、惩罚项、rank-like特征 | 必须防0,数值稳定要小心 | | ||
| 28 | +| Michaelis-Menten / 饱和分式 | (y=\frac{x}{x+K}) | 单调增 | 前陡后平 | (\frac{K}{x+K}),随(x)增大而减小 | **高值区重要性显著衰减** | 相似度、质量分、置信度饱和 | 需选(K),解释阈值要清晰 | | ||
| 29 | +| 指数饱和 | (y=1-e^{-\alpha x}) | 单调增 | 初期快,后期快饱和 | 高值区快速失敏 | **前期收益大,后期收益递减** | “够好即可”的因子 | (\alpha)过大易太早饱和 | | ||
| 30 | +| 倒数/双曲线 | (y=\frac{1}{x+c}) | 单调减 | **头部陡、尾部平** | 对小(x)更敏感 | **强头部区分,尾部压平** | rank、距离、位置惩罚 | 语义是反向量时更自然 | | ||
| 31 | +| RRF | (y=\frac{1}{k+r}) | 单调减 | **头部下降快,尾部下降慢** | 离散相邻rank差在头部更大 | **强调Top位置,压缩长尾rank差异** | 多路召回排序融合 | 更适合rank,不适合已校准连续分数 | | ||
| 32 | +| Sigmoid | (y=\sigma(\alpha(x-c))) | 单调增 | **中间陡,两端平** | 阈值附近最敏感 | **中段阈值化,低高两端压缩** | 有明显阈值的质量因子 | 容易过压缩,参数敏感 | | ||
| 33 | +| Tanh | (y=\tanh(\alpha(x-c))) | 单调增 | **中间陡,两端平** | 以中心点为对称中枢 | **适合有“好/坏偏离”语义** | 偏离均值、标准化后特征 | 要先中心化更合理 | | ||
| 34 | +| Softplus | (y=\log(1+e^{\alpha(x-c)})) | 单调增 | 平滑版ReLU | 阈值后近似线性 | **软阈值激活** | 希望“超过阈值才开始起作用” | 不如硬阈值直观 | | ||
| 35 | +| ReLU/铰链 | (y=\max(0,x-c)) | 单调增 | 阈值前0,后线性 | 明确阈值激活 | **只让超过阈值部分生效** | 明确业务门槛 | 不连续,不够平滑 | | ||
| 36 | +| 截断/裁剪 | (y=\min(\max(x,L),U)) | 单调 | 两端直接压平 | 控制极值影响 | **抗异常值,防爆** | 样本少、分布脏 | 可能损失极值信息 | | ||
| 37 | +| 分段线性 | 分段定义 | 单调可控 | 可手工指定各段斜率 | 可按业务调敏感区间 | **可解释、稳、好控** | 规则清晰的业务场景 | 需要人工定阈值 | | ||
| 38 | +| Arctan | (y=\arctan(\alpha(x-c))) | 单调增 | 类S形但更柔和 | 中间敏感、两端平 | **温和版S型压缩** | 不想用太激进sigmoid时 | 解释性略弱 | | ||
| 39 | +| 分位数/Percentile | (y=\text{percentile}(x)) | 单调增 | 基于排序,不看绝对差值 | 消除原始尺度影响 | **保序、抗异常、跨源统一尺度** | 多源分数难校准 | 丢失绝对量纲信息 | | ||
| 40 | +| 分桶/Binning | 区间映射到桶 | 不一定连续 | 桶内不敏感,桶间跳变 | 强离散化 | **把非线性变成离散模式** | 样本少、LR很常见 | 桶边界敏感 | | ||
| 41 | +| Box-Cox | (\frac{x^\lambda-1}{\lambda}) | 单调增 | 介于log与power之间 | 可调分布形态 | **系统化连续压缩族** | 想系统试幂/log家族 | 解释性不如手工映射 | | ||
| 42 | +| Yeo-Johnson | 可处理负值 | 单调 | 类似Box-Cox | 可处理(\le 0) | **负值/零值也能做分布矫正** | 特征可能有负值 | 工程解释性一般 | | ||
| 43 | + | ||
| 44 | +--- | ||
| 45 | + | ||
| 46 | +# 二、按“想达到什么效果”选方法 | ||
| 47 | + | ||
| 48 | +这个表更适合你做特征工程时快速决策。 | ||
| 49 | + | ||
| 50 | +| 目标 | 推荐方法 | 核心机制 | 典型用途 | | ||
| 51 | +| -------------- | ----------------------------------------------------------------------- | --------------- | ---------------- | | ||
| 52 | +| 防止小值/0值把乘法分数打穿 | (x+b) | 给低值加保护垫 | 质量分、置信分、召回弱信号保护 | | ||
| 53 | +| 压制大值主导、做收益递减 | (\log(1+x)), (x^\gamma(0<\gamma<1)), (\frac{x}{x+K}), (1-e^{-\alpha x}) | 高值区斜率变小 | 热度、频次、相似度、历史点击率 | | ||
| 54 | +| 明显削弱高值区重要性 | (\frac{x}{x+K}) | 弹性随(x)增大而下降 | “高了以后别再太影响排序” | | ||
| 55 | +| 整体降低某因子的乘法重要性 | (x^\gamma,\ 0<\gamma<1) | 弹性恒定缩小到(\gamma) | 统一降权某类因子 | | ||
| 56 | +| 整体放大某因子的乘法重要性 | (x^\gamma,\ \gamma>1) | 弹性恒定放大到(\gamma) | 想强化强信号 | | ||
| 57 | +| 强调Top,压平长尾 | (\frac{1}{x+c}), RRF | 头部陡、尾部平 | rank融合、位置因子、多路召回 | | ||
| 58 | +| 只在阈值附近最敏感 | Sigmoid / Tanh / Arctan | 中间陡、两端平 | 质量过线、置信阈值、门控因子 | | ||
| 59 | +| 超过阈值才起作用 | ReLU / Hinge / Softplus | 阈值激活 | “达到一定水平才算有效” | | ||
| 60 | +| 抗异常值、防极值爆炸 | Clip / Winsorize / Log | 直接限幅或压缩长尾 | 脏数据、样本少、稳定性优先 | | ||
| 61 | +| 分布跨源不一致、量纲不统一 | Quantile / Percentile | 保序统一尺度 | 多路打分融合、异构召回分 | | ||
| 62 | +| 业务规则清晰、想强可解释 | 分段线性 / 分桶 | 手工指定各区间作用方式 | 规则强、可解释要求高 | | ||
| 63 | +| 距离/惩罚/坏度越大越差 | 倒数、负幂、指数衰减 | 反向单调映射 | 距离、时延、惩罚项 | | ||
| 64 | + | ||
| 65 | +--- | ||
| 66 | + | ||
| 67 | +# 三、几个你当前最关心的方法,单独再压缩成小表 | ||
| 68 | + | ||
| 69 | +## 1)加偏置 vs Michaelis-Menten | ||
| 70 | + | ||
| 71 | +| 方法 | 公式 | 低值区 | 高值区 | 适合作用 | | ||
| 72 | +| ---- | --------------- | -------- | -------- | -------- | | ||
| 73 | +| 加偏置 | (x+b) | **削弱更多** | 基本保留 | 防止低值过分伤害 | | ||
| 74 | +| MM饱和 | (\frac{x}{x+K}) | 保留较强敏感性 | **削弱更多** | 防止高值持续主导 | | ||
| 75 | + | ||
| 76 | +一句话区别: | ||
| 77 | + | ||
| 78 | +* **加偏置**:主要“救低值” | ||
| 79 | +* **MM饱和**:主要“压高值” | ||
| 80 | + | ||
| 81 | +--- | ||
| 82 | + | ||
| 83 | +## 2)RRF 的特征 | ||
| 84 | + | ||
| 85 | +| 维度 | 结论 | | ||
| 86 | +| -------- | --------------------- | | ||
| 87 | +| 公式 | (\frac{1}{k+r}) | | ||
| 88 | +| 单调性 | 随rank变差单调下降 | | ||
| 89 | +| 头部变化 | **更陡** | | ||
| 90 | +| 尾部变化 | **更平** | | ||
| 91 | +| 相邻rank差异 | rank越靠前,相邻差越大 | | ||
| 92 | +| 适合 | 多路召回融合、强调Top结果 | | ||
| 93 | +| 本质 | 是一种“头部敏感、尾部压缩”的rank映射 | | ||
| 94 | + | ||
| 95 | +你的判断是对的:**RRF 确实是头部变化陡、尾部变化平。** | ||
| 96 | + | ||
| 97 | +--- | ||
| 98 | + | ||
| 99 | +## 3)Sigmoid / Tanh / ReLU 三者区别 | ||
| 100 | + | ||
| 101 | +| 方法 | 形状 | 最敏感区域 | 适用语义 | | ||
| 102 | +| ---------- | -------------- | ------ | ------------ | | ||
| 103 | +| Sigmoid | S型,输出(0\sim1) | 中心阈值附近 | 质量是否过线、概率型因子 | | ||
| 104 | +| Tanh | S型,输出(-1\sim1) | 中心附近 | 正负偏离、相对均值偏差 | | ||
| 105 | +| ReLU/Hinge | 折线 | 阈值以上 | 超过门槛才开始加分 | | ||
| 106 | + | ||
| 107 | +--- | ||
| 108 | + | ||
| 109 | +# 四、用于 LR / FM 输入时,最推荐的一组“低风险特征字典” | ||
| 110 | + | ||
| 111 | +如果你现在是要做**有限样本下的工程化输入**,建议不要一下上太多复杂函数,而是每个核心因子先派生一个**小型稳定字典**: | ||
| 112 | + | ||
| 113 | +| 变换类别 | 推荐形式 | 作用 | | ||
| 114 | +| ----- | -------------------------------- | ------------ | | ||
| 115 | +| 原始 | (x) | 保留原信息 | | ||
| 116 | +| 稳定 | (x+b) | 防止低值过伤 | | ||
| 117 | +| 压缩 | (\log(1+x)) | 压长尾 | | ||
| 118 | +| 弱压缩 | (\sqrt{x}) | 温和收益递减 | | ||
| 119 | +| 饱和 | (\frac{x}{x+K}) | 明确高值衰减 | | ||
| 120 | +| 截断 | (\min(x,U)) | 防极值爆炸 | | ||
| 121 | +| 阈值 | (\mathbf 1[x>t]) 或 (\max(0,x-t)) | 强化门槛效应 | | ||
| 122 | +| rank型 | (\frac{1}{x+c}) / RRF | 处理位置、rank、距离 | | ||
| 123 | + | ||
| 124 | +--- | ||
| 125 | + | ||
| 126 | +# 五、如果你要在实际工程里优先试哪些 | ||
| 127 | + | ||
| 128 | +我建议优先级这样排: | ||
| 129 | + | ||
| 130 | +## 第一梯队:最稳 | ||
| 131 | + | ||
| 132 | +* (x) | ||
| 133 | +* (x+b) | ||
| 134 | +* (\log(1+x)) | ||
| 135 | +* (\sqrt{x}) | ||
| 136 | +* (\frac{x}{x+K}) | ||
| 137 | +* clip | ||
| 138 | +* rank倒数 / RRF | ||
| 139 | + | ||
| 140 | +## 第二梯队:有明确业务阈值时 | ||
| 141 | + | ||
| 142 | +* sigmoid | ||
| 143 | +* tanh | ||
| 144 | +* relu / hinge | ||
| 145 | +* softplus | ||
| 146 | +* 分桶 | ||
| 147 | + | ||
| 148 | +## 第三梯队:谨慎使用 | ||
| 149 | + | ||
| 150 | +* 指数放大 | ||
| 151 | +* 高次幂 (x^3,x^4) | ||
| 152 | +* 很激进的负幂 | ||
| 153 | +* 过多参数化函数族 | ||
| 154 | + | ||
| 155 | +因为样本少时,最怕的是: | ||
| 156 | + | ||
| 157 | +* 变换过激 | ||
| 158 | +* 参数太多 | ||
| 159 | +* 极值被放大 | ||
| 160 | +* 输入间强共线后模型不稳 | ||
| 161 | + | ||
| 162 | +--- | ||
| 163 | + | ||
| 164 | +# 六、最后给你一个超短总结版 | ||
| 165 | + | ||
| 166 | +| 方法族 | 代表公式 | 本质效果 | | ||
| 167 | +| --- | ---------------------------------- | ---------- | | ||
| 168 | +| 偏置族 | (x+b) | 救低值 | | ||
| 169 | +| 压缩族 | (\log(1+x), \sqrt{x}) | 压大值、减长尾 | | ||
| 170 | +| 饱和族 | (\frac{x}{x+K}, 1-e^{-\alpha x}) | 高值收益递减 | | ||
| 171 | +| 放大族 | (x^\gamma,\gamma>1) | 强化高值 | | ||
| 172 | +| 反向族 | (\frac{1}{x+c}, x^\gamma,\gamma<0) | 距离/排名/惩罚映射 | | ||
| 173 | +| S型族 | sigmoid, tanh, arctan | 中间敏感、两端变平 | | ||
| 174 | +| 阈值族 | ReLU, hinge, softplus | 超过门槛才生效 | | ||
| 175 | +| 稳定族 | clip, winsorize, quantile | 抗异常、保稳 | | ||
| 176 | +| 离散族 | binning, percentile | 保序/分段表达 | | ||
| 177 | + | ||
| 178 | +--- | ||
| 179 | + | ||
| 180 | +如果你愿意,我下一步可以继续帮你整理成一版**“适合直接放进方案文档/评审PPT”的表格版**,再补一列: | ||
| 181 | +**“推荐用于哪些 rerank 因子(相关性、质量、热度、位置、先验)”**。 |
docs/issues/issue-2026-04-01-评估框架-四级label-done-0402.md
| @@ -168,12 +168,6 @@ Typical examples: | @@ -168,12 +168,6 @@ Typical examples: | ||
| 168 | - Barely acceptable substitute → **Low Relevant** | 168 | - Barely acceptable substitute → **Low Relevant** |
| 169 | - Hardly substitutable at all → **Irrelevant** | 169 | - Hardly substitutable at all → **Irrelevant** |
| 170 | 170 | ||
| 171 | -6. **When product information is insufficient, do not treat “cannot confirm†as “conflictâ€.** | ||
| 172 | - If a product does not mention an attribute, that does not mean the attribute is definitely violated. | ||
| 173 | - Therefore: | ||
| 174 | - - If the attribute is not mentioned or cannot be confirmed, prefer **High Relevant**; | ||
| 175 | - - Only treat it as a conflict when the product information clearly shows the opposite of the query requirement. | ||
| 176 | - | ||
| 177 | Query: {query} | 171 | Query: {query} |
| 178 | {intent_suffix} | 172 | {intent_suffix} |
| 179 | 173 | ||
| @@ -312,12 +306,6 @@ _CLASSIFY_TEMPLATE_ZH = """ä½ æ˜¯ä¸€ä¸ªæœé¥°ç”µå•†æœç´¢ç³»ç»Ÿä¸çš„ç›¸å…³æ€§åˆ | @@ -312,12 +306,6 @@ _CLASSIFY_TEMPLATE_ZH = """ä½ æ˜¯ä¸€ä¸ªæœé¥°ç”µå•†æœç´¢ç³»ç»Ÿä¸çš„ç›¸å…³æ€§åˆ | ||
| 312 | - å‹‰å¼ºæ›¿ä»£å“ â†’ **弱相关** | 306 | - å‹‰å¼ºæ›¿ä»£å“ â†’ **弱相关** |
| 313 | - å‡ ä¹Žä¸å¯æ›¿ä»£ → **ä¸ç›¸å…³** | 307 | - å‡ ä¹Žä¸å¯æ›¿ä»£ → **ä¸ç›¸å…³** |
| 314 | 308 | ||
| 315 | -6. **若商å“ä¿¡æ¯ä¸è¶³ï¼Œä¸è¦æŠŠâ€œæ— 法确认â€è¯¯åˆ¤ä¸ºâ€œå†²çªâ€ã€‚** | ||
| 316 | - 商哿œªå†™æ˜ŽæŸå±žæ€§ï¼Œä¸ç‰äºŽè¯¥å±žæ€§ä¸€å®šä¸ç¬¦åˆã€‚ | ||
| 317 | - å› æ¤ï¼š | ||
| 318 | - - 未æåŠ / æ— æ³•ç¡®è®¤ï¼Œä¼˜å…ˆæŒ‰â€œåŸºæœ¬ç›¸å…³â€å¤„ç†ï¼› | ||
| 319 | - - åªæœ‰å½“商å“ä¿¡æ¯æ˜Žç¡®æ˜¾ç¤ºä¸ŽæŸ¥è¯¢è¦æ±‚ç›¸åæ—¶ï¼Œæ‰è§†ä¸ºå±žæ€§å†²çªã€‚ | ||
| 320 | - | ||
| 321 | 查询:{query} | 309 | 查询:{query} |
| 322 | {intent_suffix} | 310 | {intent_suffix} |
| 323 | 311 |
docs/常用查询 - ES.md
| @@ -348,7 +348,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -348,7 +348,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 348 | 348 | ||
| 349 | #### 1.1 查询特定租户的商品,显示分面相关字段 | 349 | #### 1.1 查询特定租户的商品,显示分面相关字段 |
| 350 | ```bash | 350 | ```bash |
| 351 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 351 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 352 | "query": { | 352 | "query": { |
| 353 | "term": { "tenant_id": "162" } | 353 | "term": { "tenant_id": "162" } |
| 354 | }, | 354 | }, |
| @@ -363,7 +363,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -363,7 +363,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 363 | 363 | ||
| 364 | #### 1.2 验证 category1_name 字段是否有数据 | 364 | #### 1.2 验证 category1_name 字段是否有数据 |
| 365 | ```bash | 365 | ```bash |
| 366 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 366 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 367 | "query": { | 367 | "query": { |
| 368 | "bool": { | 368 | "bool": { |
| 369 | "filter": [ | 369 | "filter": [ |
| @@ -378,7 +378,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -378,7 +378,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 378 | 378 | ||
| 379 | #### 1.3 验证 specifications 字段是否有数据 | 379 | #### 1.3 验证 specifications 字段是否有数据 |
| 380 | ```bash | 380 | ```bash |
| 381 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 381 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 382 | "query": { | 382 | "query": { |
| 383 | "bool": { | 383 | "bool": { |
| 384 | "filter": [ | 384 | "filter": [ |
| @@ -397,7 +397,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -397,7 +397,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 397 | 397 | ||
| 398 | #### 2.1 category1_name 分面聚合 | 398 | #### 2.1 category1_name 分面聚合 |
| 399 | ```bash | 399 | ```bash |
| 400 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 400 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 401 | "query": { "match_all": {} }, | 401 | "query": { "match_all": {} }, |
| 402 | "size": 0, | 402 | "size": 0, |
| 403 | "aggs": { | 403 | "aggs": { |
| @@ -410,7 +410,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -410,7 +410,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 410 | 410 | ||
| 411 | #### 2.2 specifications.color 分面聚合 | 411 | #### 2.2 specifications.color 分面聚合 |
| 412 | ```bash | 412 | ```bash |
| 413 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 413 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 414 | "query": { "match_all": {} }, | 414 | "query": { "match_all": {} }, |
| 415 | "size": 0, | 415 | "size": 0, |
| 416 | "aggs": { | 416 | "aggs": { |
| @@ -431,7 +431,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -431,7 +431,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 431 | 431 | ||
| 432 | #### 2.3 specifications.size 分面聚合 | 432 | #### 2.3 specifications.size 分面聚合 |
| 433 | ```bash | 433 | ```bash |
| 434 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 434 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 435 | "query": { "match_all": {} }, | 435 | "query": { "match_all": {} }, |
| 436 | "size": 0, | 436 | "size": 0, |
| 437 | "aggs": { | 437 | "aggs": { |
| @@ -452,7 +452,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -452,7 +452,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 452 | 452 | ||
| 453 | #### 2.4 specifications.material 分面聚合 | 453 | #### 2.4 specifications.material 分面聚合 |
| 454 | ```bash | 454 | ```bash |
| 455 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 455 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 456 | "query": { "match_all": {} }, | 456 | "query": { "match_all": {} }, |
| 457 | "size": 0, | 457 | "size": 0, |
| 458 | "aggs": { | 458 | "aggs": { |
| @@ -473,7 +473,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -473,7 +473,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 473 | 473 | ||
| 474 | #### 2.5 综合分面聚合(category + color + size + material) | 474 | #### 2.5 综合分面聚合(category + color + size + material) |
| 475 | ```bash | 475 | ```bash |
| 476 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 476 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 477 | "query": { "match_all": {} }, | 477 | "query": { "match_all": {} }, |
| 478 | "size": 0, | 478 | "size": 0, |
| 479 | "aggs": { | 479 | "aggs": { |
| @@ -545,13 +545,172 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | @@ -545,13 +545,172 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | ||
| 545 | }' | 545 | }' |
| 546 | ``` | 546 | ``` |
| 547 | 547 | ||
| 548 | +#### 3.3 `enriched_attributes`:`.value.zh` / `.value.en` 的 keyword 精确匹配与 text 全文匹配 | ||
| 549 | + | ||
| 550 | +> `enriched_attributes` 为 **nested**,检索需包在 `nested` 里。`.keyword` 子字段带 `lowercase` normalizer,英文词建议用小写做 `term`。 | ||
| 551 | + | ||
| 552 | +**keyword 精确匹配**(示例词:中文 `法式风格`,英文 `long skirt`) | ||
| 553 | + | ||
| 554 | +```bash | ||
| 555 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 556 | + "size": 1, | ||
| 557 | + "_source": ["spu_id", "title", "enriched_attributes"], | ||
| 558 | + "query": { | ||
| 559 | + "nested": { | ||
| 560 | + "path": "enriched_attributes", | ||
| 561 | + "query": { | ||
| 562 | + "bool": { | ||
| 563 | + "should": [ | ||
| 564 | + { "term": { "enriched_attributes.value.zh.keyword": "法式风格" } }, | ||
| 565 | + { "term": { "enriched_attributes.value.en.keyword": "long skirt" } } | ||
| 566 | + ], | ||
| 567 | + "minimum_should_match": 2 | ||
| 568 | + } | ||
| 569 | + } | ||
| 570 | + } | ||
| 571 | + } | ||
| 572 | +}' | ||
| 573 | +``` | ||
| 574 | + | ||
| 575 | +**text 全文匹配**(经 `index_ik` / `english` 分词;可与上式对照) | ||
| 576 | + | ||
| 577 | +```bash | ||
| 578 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 579 | + "size": 1, | ||
| 580 | + "_source": ["spu_id", "title", "enriched_attributes"], | ||
| 581 | + "query": { | ||
| 582 | + "nested": { | ||
| 583 | + "path": "enriched_attributes", | ||
| 584 | + "query": { | ||
| 585 | + "bool": { | ||
| 586 | + "should": [ | ||
| 587 | + { "match": { "enriched_attributes.value.zh": "法式风格" } }, | ||
| 588 | + { "match": { "enriched_attributes.value.en": "long skirt" } } | ||
| 589 | + ], | ||
| 590 | + "minimum_should_match": 2 | ||
| 591 | + } | ||
| 592 | + } | ||
| 593 | + } | ||
| 594 | + } | ||
| 595 | +}' | ||
| 596 | +``` | ||
| 597 | + | ||
| 598 | +若需要 **拼写容错**,可在 `match` 上增加 `"fuzziness": "AUTO"`(对英文更常见)。 | ||
| 599 | + | ||
| 600 | +#### 3.4 `option1_values`:keyword 与 text 分别查 `蓝色` / `blue` | ||
| 601 | + | ||
| 602 | +**keyword 精确匹配** | ||
| 603 | + | ||
| 604 | +```bash | ||
| 605 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 606 | + "size": 1, | ||
| 607 | + "_source": ["spu_id", "title", "option1_values"], | ||
| 608 | + "query": { | ||
| 609 | + "bool": { | ||
| 610 | + "should": [ | ||
| 611 | + { "term": { "option1_values.zh.keyword": "蓝色" } }, | ||
| 612 | + { "term": { "option1_values.en.keyword": "blue" } } | ||
| 613 | + ], | ||
| 614 | + "minimum_should_match": 2 | ||
| 615 | + } | ||
| 616 | + } | ||
| 617 | +}' | ||
| 618 | +``` | ||
| 619 | + | ||
| 620 | +**text 全文匹配** | ||
| 621 | + | ||
| 622 | +```bash | ||
| 623 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 624 | + "size": 1, | ||
| 625 | + "_source": ["spu_id", "title", "option1_values"], | ||
| 626 | + "query": { | ||
| 627 | + "bool": { | ||
| 628 | + "should": [ | ||
| 629 | + { "match": { "option1_values.zh": "蓝色" } }, | ||
| 630 | + { "match": { "option1_values.en": "blue" } } | ||
| 631 | + ], | ||
| 632 | + "minimum_should_match": 2 | ||
| 633 | + } | ||
| 634 | + } | ||
| 635 | +}' | ||
| 636 | +``` | ||
| 637 | + | ||
| 638 | +#### 3.5 `enriched_tags.zh` / `enriched_tags.en`:keyword 与 text(`高腰` / `high waist`) | ||
| 639 | + | ||
| 640 | +**keyword 精确匹配** | ||
| 641 | + | ||
| 642 | +```bash | ||
| 643 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 644 | + "size": 1, | ||
| 645 | + "_source": ["spu_id", "title", "enriched_tags"], | ||
| 646 | + "query": { | ||
| 647 | + "bool": { | ||
| 648 | + "should": [ | ||
| 649 | + { "term": { "enriched_tags.zh.keyword": "高腰" } }, | ||
| 650 | + { "term": { "enriched_tags.en.keyword": "high waist" } } | ||
| 651 | + ], | ||
| 652 | + "minimum_should_match": 2 | ||
| 653 | + } | ||
| 654 | + } | ||
| 655 | +}' | ||
| 656 | +``` | ||
| 657 | + | ||
| 658 | +**text 全文匹配** | ||
| 659 | + | ||
| 660 | +```bash | ||
| 661 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 662 | + "size": 1, | ||
| 663 | + "_source": ["spu_id", "title", "enriched_tags"], | ||
| 664 | + "query": { | ||
| 665 | + "bool": { | ||
| 666 | + "should": [ | ||
| 667 | + { "match": { "enriched_tags.zh": "高腰" } }, | ||
| 668 | + { "match": { "enriched_tags.en": "high waist" } } | ||
| 669 | + ], | ||
| 670 | + "minimum_should_match": 2 | ||
| 671 | + } | ||
| 672 | + } | ||
| 673 | +}' | ||
| 674 | +``` | ||
| 675 | + | ||
| 676 | +#### 3.6 `specifications`:`value_keyword` 与 `value_text.zh` / `value_text.en`(`蓝色` / `blue`) | ||
| 677 | + | ||
| 678 | +> `specifications` 为 **nested**,`value_keyword` 为整词匹配;`value_text.*` 可同时 `term` 子字段或 `match` 主 text。 | ||
| 679 | + | ||
| 680 | +```bash | ||
| 681 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 682 | + "size": 1, | ||
| 683 | + "_source": ["spu_id", "title", "specifications"], | ||
| 684 | + "query": { | ||
| 685 | + "nested": { | ||
| 686 | + "path": "specifications", | ||
| 687 | + "query": { | ||
| 688 | + "bool": { | ||
| 689 | + "should": [ | ||
| 690 | + { "term": { "specifications.value_keyword": "蓝色" } }, | ||
| 691 | + { "term": { "specifications.value_keyword": "blue" } }, | ||
| 692 | + { "term": { "specifications.value_text.zh.keyword": "蓝色" } }, | ||
| 693 | + { "term": { "specifications.value_text.en.keyword": "blue" } }, | ||
| 694 | + { "match": { "specifications.value_text.zh": "蓝色" } }, | ||
| 695 | + { "match": { "specifications.value_text.en": "blue" } } | ||
| 696 | + ], | ||
| 697 | + "minimum_should_match": 5 | ||
| 698 | + } | ||
| 699 | + } | ||
| 700 | + } | ||
| 701 | + } | ||
| 702 | +}' | ||
| 703 | +``` | ||
| 704 | + | ||
| 705 | +仅查 **keyword 类**(`value_keyword` + `value_text.*.keyword`)时可从上面 `should` 里删掉两条 `match`;仅 **全文** 时可只保留两条 `match`。 | ||
| 706 | + | ||
| 548 | --- | 707 | --- |
| 549 | 708 | ||
| 550 | ### 4. 统计查询 | 709 | ### 4. 统计查询 |
| 551 | 710 | ||
| 552 | #### 4.1 统计有 category1_name 的文档数量 | 711 | #### 4.1 统计有 category1_name 的文档数量 |
| 553 | ```bash | 712 | ```bash |
| 554 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ | 713 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_count?pretty' -H 'Content-Type: application/json' -d '{ |
| 555 | "query": { | 714 | "query": { |
| 556 | "bool": { | 715 | "bool": { |
| 557 | "filter": [ | 716 | "filter": [ |
| @@ -564,7 +723,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -564,7 +723,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 564 | 723 | ||
| 565 | #### 4.2 统计有 specifications 的文档数量 | 724 | #### 4.2 统计有 specifications 的文档数量 |
| 566 | ```bash | 725 | ```bash |
| 567 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ | 726 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_count?pretty' -H 'Content-Type: application/json' -d '{ |
| 568 | "query": { | 727 | "query": { |
| 569 | "bool": { | 728 | "bool": { |
| 570 | "filter": [ | 729 | "filter": [ |
| @@ -581,7 +740,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -581,7 +740,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 581 | 740 | ||
| 582 | #### 5.1 查找没有 category1_name 但有 category 的文档(MySQL 有数据但 ES 没有) | 741 | #### 5.1 查找没有 category1_name 但有 category 的文档(MySQL 有数据但 ES 没有) |
| 583 | ```bash | 742 | ```bash |
| 584 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 743 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 585 | "query": { | 744 | "query": { |
| 586 | "bool": { | 745 | "bool": { |
| 587 | "filter": [ | 746 | "filter": [ |
| @@ -599,7 +758,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -599,7 +758,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 599 | 758 | ||
| 600 | #### 5.2 查找有 option 但没有 specifications 的文档(数据转换问题) | 759 | #### 5.2 查找有 option 但没有 specifications 的文档(数据转换问题) |
| 601 | ```bash | 760 | ```bash |
| 602 | -curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 761 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 603 | "query": { | 762 | "query": { |
| 604 | "bool": { | 763 | "bool": { |
| 605 | "filter": [ | 764 | "filter": [ |
docs/相关性检索优化说明.md
| @@ -850,3 +850,52 @@ title.zh: Dockers 男士ç»å…¸ç‰ˆåž‹å·¥ä½œæ—¥å¡å…¶è‰²æ™ºèƒ½360度弹力裤(æ | @@ -850,3 +850,52 @@ title.zh: Dockers 男士ç»å…¸ç‰ˆåž‹å·¥ä½œæ—¥å¡å…¶è‰²æ™ºèƒ½360度弹力裤(æ | ||
| 850 | Rerank score: 0.0981 | 850 | Rerank score: 0.0981 |
| 851 | title.en: Lazy One Pajama Shorts for Men, Men's Pajama Bottoms, Sleepwear | 851 | title.en: Lazy One Pajama Shorts for Men, Men's Pajama Bottoms, Sleepwear |
| 852 | title.zh: 懒人男士ç¡è£¤ï¼Œç”·å¼å®¶å±…裤,ç¡çœ æœé¥° | 852 | title.zh: 懒人男士ç¡è£¤ï¼Œç”·å¼å®¶å±…裤,ç¡çœ æœé¥° |
| 853 | + | ||
| 854 | + | ||
| 855 | + | ||
| 856 | + | ||
| 857 | +q=修身牛仔裤 | ||
| 858 | + | ||
| 859 | +这些好结果得分很低: | ||
| 860 | + | ||
| 861 | +rerank_score:0.0564 | ||
| 862 | + "en": "Judy Blue Women's High Waist Button Fly Skinny Jeans 82319", | ||
| 863 | + "zh": "Judy Blue 女士高腰纽扣开å‰ä¿®èº«ç‰›ä»”裤 82319" | ||
| 864 | + | ||
| 865 | + | ||
| 866 | +rerank_score:0.0790 | ||
| 867 | + "en": "2025 New Fashion European and American Women's Jeans High-Waisted Slim Straight Denim Pants Popular Floor-Length Pants", | ||
| 868 | + "zh": "2025新款欧美风女å¼é«˜è…°æ˜¾ç˜¦ç›´ç’牛仔裤 æ—¶å°šåŠåœ°é•¿è£¤" | ||
| 869 | + | ||
| 870 | + | ||
| 871 | +rerank_score:0.0822 | ||
| 872 | + "en": "roswear Women's Trendy Stretchy Flare Jeans Mid Rise Bootcut Curvy Denim Pants", | ||
| 873 | + "zh": "Roswear 女士时尚弹力喇å牛仔裤 ä¸è…°é«˜è…°ä¿®èº«ç›´ç’牛仔裤" | ||
| 874 | + | ||
| 875 | + | ||
| 876 | +rerank_score:0.0956 | ||
| 877 | + "en": "POSHGLAM Women's Maternity Jeans Over Belly 29'' Skinny Denim Jeggings Comfy Stretch Clearance Pregnancy Pants", | ||
| 878 | + "zh": "POSHGLAM 女士å•产期高腰显瘦牛仔紧身裤 29英寸 紧身弹力å•妇裤 休闲舒适 清仓特价" | ||
| 879 | + | ||
| 880 | +(带有 Slim Stretch Jeansï¼Œä½†æ˜¯æ‰“åˆ†åªæœ‰0.0135,æžä½Žï¼‰ | ||
| 881 | +rerank_score:0.0135 | ||
| 882 | + "en": "European and American Export Temu American Retro Sexy Bell-Bottomed Pants Slim Slim Stretch Jeans Women's Pants", | ||
| 883 | + "zh": "æ¬§ç¾Žå‡ºå£ è’‚å§†ç¾Žå›½å¤å¤æ€§æ„Ÿå–‡å裤 修身弹力女裤" | ||
| 884 | + | ||
| 885 | + | ||
| 886 | +è¿™å‡ ä¸ªç»“æžœæ¯”è¾ƒå·®ï¼Œä½†æ˜¯å¾—åˆ†å¾ˆé«˜ï¼š | ||
| 887 | + | ||
| 888 | +rerank_score:0.4692 | ||
| 889 | + "en": "American Vintage Low Waist Non-Elastic Washed Straight-Leg Jeans Women's Autumn New Street Wide Leg Denim Women's Pants", | ||
| 890 | + "zh": "美å¼å¤å¤ä½Žè…°æ— 弹洗水直ç’阔腿牛仔裤 å¥³å£«ç§‹å£æ–°æ¬¾é˜”腿牛仔裤" | ||
| 891 | + | ||
| 892 | + | ||
| 893 | +rerank_score:0.4784 | ||
| 894 | + "en": "Europe and the United States cross-border foreign trade 2025 spring and summer new Amazon independent station washed waist adjustable Denim pants", | ||
| 895 | + "zh": "欧美跨境外贸2025æ˜¥å¤æ–°æ¬¾äºšé©¬é€Šç‹¬ç«‹ç«™æ´—æ°´è…° adjustable 牛仔裤" | ||
| 896 | + | ||
| 897 | + | ||
| 898 | +rerank_score:0.5849 | ||
| 899 | + "zh": "新款女士修身仿旧牛仔çŸè£¤ – 休闲性感磨边水洗牛仔çŸè£¤ï¼Œæ—¶å°šèˆ’", | ||
| 900 | + "en": "New Women's Slim-fit Vintage Washed Denim Shorts – Casual Sexy Frayed Hem, Fashionable & Comfortable" | ||
| 901 | + |
scripts/evaluation/eval_framework/prompts.py
| @@ -175,12 +175,6 @@ Typical examples: | @@ -175,12 +175,6 @@ Typical examples: | ||
| 175 | - Barely acceptable substitute → **Low Relevant** | 175 | - Barely acceptable substitute → **Low Relevant** |
| 176 | - Hardly substitutable at all → **Irrelevant** | 176 | - Hardly substitutable at all → **Irrelevant** |
| 177 | 177 | ||
| 178 | -6. **When product information is insufficient, do not treat “cannot confirm†as “conflictâ€.** | ||
| 179 | - If a product does not mention an attribute, that does not mean the attribute is definitely violated. | ||
| 180 | - Therefore: | ||
| 181 | - - If the attribute is not mentioned or cannot be confirmed, prefer **High Relevant**; | ||
| 182 | - - Only treat it as a conflict when the product information clearly shows the opposite of the query requirement. | ||
| 183 | - | ||
| 184 | Query: {query} | 178 | Query: {query} |
| 185 | {intent_suffix} | 179 | {intent_suffix} |
| 186 | 180 | ||
| @@ -319,12 +313,6 @@ _CLASSIFY_TEMPLATE_ZH = """ä½ æ˜¯ä¸€ä¸ªæœé¥°ç”µå•†æœç´¢ç³»ç»Ÿä¸çš„ç›¸å…³æ€§åˆ | @@ -319,12 +313,6 @@ _CLASSIFY_TEMPLATE_ZH = """ä½ æ˜¯ä¸€ä¸ªæœé¥°ç”µå•†æœç´¢ç³»ç»Ÿä¸çš„ç›¸å…³æ€§åˆ | ||
| 319 | - å‹‰å¼ºæ›¿ä»£å“ â†’ **弱相关** | 313 | - å‹‰å¼ºæ›¿ä»£å“ â†’ **弱相关** |
| 320 | - å‡ ä¹Žä¸å¯æ›¿ä»£ → **ä¸ç›¸å…³** | 314 | - å‡ ä¹Žä¸å¯æ›¿ä»£ → **ä¸ç›¸å…³** |
| 321 | 315 | ||
| 322 | -6. **若商å“ä¿¡æ¯ä¸è¶³ï¼Œä¸è¦æŠŠâ€œæ— 法确认â€è¯¯åˆ¤ä¸ºâ€œå†²çªâ€ã€‚** | ||
| 323 | - 商哿œªå†™æ˜ŽæŸå±žæ€§ï¼Œä¸ç‰äºŽè¯¥å±žæ€§ä¸€å®šä¸ç¬¦åˆã€‚ | ||
| 324 | - å› æ¤ï¼š | ||
| 325 | - - 未æåŠ / æ— æ³•ç¡®è®¤ï¼Œä¼˜å…ˆæŒ‰â€œåŸºæœ¬ç›¸å…³â€å¤„ç†ï¼› | ||
| 326 | - - åªæœ‰å½“商å“ä¿¡æ¯æ˜Žç¡®æ˜¾ç¤ºä¸ŽæŸ¥è¯¢è¦æ±‚ç›¸åæ—¶ï¼Œæ‰è§†ä¸ºå±žæ€§å†²çªã€‚ | ||
| 327 | - | ||
| 328 | 查询:{query} | 316 | 查询:{query} |
| 329 | {intent_suffix} | 317 | {intent_suffix} |
| 330 | 318 |