Commit 92d5eb07e1ec77734508e28fc57de39afe5df903
1 parent
b2e50710
fix:前端直接显示了类目ID。 因为类目表中不存在这个类目ID,因此找不到类目名称,因此直接用了ID。
Showing
1 changed file
with
16 additions
and
4 deletions
Show diff stats
indexer/document_transformer.py
| @@ -299,20 +299,32 @@ class SPUDocumentTransformer: | @@ -299,20 +299,32 @@ class SPUDocumentTransformer: | ||
| 299 | 299 | ||
| 300 | def _fill_category_fields(self, doc: Dict[str, Any], spu_row: pd.Series): | 300 | def _fill_category_fields(self, doc: Dict[str, Any], spu_row: pd.Series): |
| 301 | """填充类目相关字段。""" | 301 | """填充类目相关字段。""" |
| 302 | + # 数据质量兜底: | ||
| 303 | + # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目) | ||
| 304 | + # - 仅记录错误日志,不阻塞索引流程 | ||
| 305 | + | ||
| 302 | if pd.notna(spu_row.get('category_path')): | 306 | if pd.notna(spu_row.get('category_path')): |
| 303 | category_path = str(spu_row['category_path']) | 307 | category_path = str(spu_row['category_path']) |
| 304 | 308 | ||
| 305 | # 解析category_path - 这是逗号分隔的类目ID列表 | 309 | # 解析category_path - 这是逗号分隔的类目ID列表 |
| 306 | category_ids = [cid.strip() for cid in category_path.split(',') if cid.strip()] | 310 | category_ids = [cid.strip() for cid in category_path.split(',') if cid.strip()] |
| 307 | - | ||
| 308 | - # 将ID映射为名称 | 311 | + # 将ID映射为名称,如果找不到映射则记录错误并跳过 |
| 309 | category_names = [] | 312 | category_names = [] |
| 313 | + missing_ids = [] | ||
| 310 | for cid in category_ids: | 314 | for cid in category_ids: |
| 311 | if cid in self.category_id_to_name: | 315 | if cid in self.category_id_to_name: |
| 312 | category_names.append(self.category_id_to_name[cid]) | 316 | category_names.append(self.category_id_to_name[cid]) |
| 313 | else: | 317 | else: |
| 314 | - logger.error(f"Category ID {cid} not found in mapping for SPU {spu_row['id']} (title: {spu_row.get('title', 'N/A')}), category_path={category_path}") | ||
| 315 | - category_names.append(cid) # 使用ID作为备选 | 318 | + missing_ids.append(cid) |
| 319 | + | ||
| 320 | + # 如果有缺失的类目ID,记录错误日志,不写入类目字段(当成没有类目) | ||
| 321 | + if missing_ids: | ||
| 322 | + logger.error( | ||
| 323 | + f"Category ID(s) not found in mapping for SPU {spu_row.get('id')} " | ||
| 324 | + f"(title: {spu_row.get('title', 'N/A')}), missing_ids={missing_ids}, " | ||
| 325 | + f"category_path={category_path}. Treating as no-category." | ||
| 326 | + ) | ||
| 327 | + return | ||
| 316 | 328 | ||
| 317 | # 构建类目路径字符串(用于搜索) | 329 | # 构建类目路径字符串(用于搜索) |
| 318 | if category_names: | 330 | if category_names: |