Commit 53fd5f08170ef9f554e8f8f5929364e919250d54
1 parent
0b73c877
部分文件id格式不对 fix
Showing
4 changed files
with
20 additions
and
0 deletions
Show diff stats
offline_tasks/scripts/generate_session.py
| ... | ... | @@ -190,6 +190,10 @@ def main(): |
| 190 | 190 | df = pd.read_sql(sql_query, engine) |
| 191 | 191 | logger.info(f"获取到 {len(df)} 条记录") |
| 192 | 192 | |
| 193 | + # 确保ID为整数类型 | |
| 194 | + df['item_id'] = df['item_id'].astype(int) | |
| 195 | + df['user_id'] = df['user_id'].astype(str) | |
| 196 | + | |
| 193 | 197 | # Debug: 显示数据详情 |
| 194 | 198 | if args.debug: |
| 195 | 199 | log_dataframe_info(logger, df, "用户行为数据", sample_size=10) | ... | ... |
offline_tasks/scripts/i2i_content_similar.py
offline_tasks/scripts/interest_aggregation.py
| ... | ... | @@ -303,6 +303,14 @@ def main(): |
| 303 | 303 | df = pd.read_sql(sql_query, engine) |
| 304 | 304 | logger.info(f"获取到 {len(df)} 条记录") |
| 305 | 305 | |
| 306 | + # 确保ID为整数类型 | |
| 307 | + df['item_id'] = df['item_id'].astype(int) | |
| 308 | + df['user_id'] = df['user_id'].astype(str) | |
| 309 | + if 'category_id' in df.columns: | |
| 310 | + df['category_id'] = df['category_id'].astype(int) | |
| 311 | + if 'supplier_id' in df.columns: | |
| 312 | + df['supplier_id'] = df['supplier_id'].astype(int) | |
| 313 | + | |
| 306 | 314 | # 记录数据信息 |
| 307 | 315 | log_dataframe_info(logger, df, "用户行为数据") |
| 308 | 316 | ... | ... |
offline_tasks/scripts/tag_category_similar.py
| ... | ... | @@ -58,6 +58,12 @@ if args.debug: |
| 58 | 58 | # 执行 SQL 查询并将结果加载到 pandas DataFrame |
| 59 | 59 | df = pd.read_sql(sql_query, engine) |
| 60 | 60 | |
| 61 | +# 确保ID为整数类型 | |
| 62 | +if 'category_id' in df.columns: | |
| 63 | + df['category_id'] = df['category_id'].astype(int) | |
| 64 | +if 'supplier_id' in df.columns: | |
| 65 | + df['supplier_id'] = df['supplier_id'].astype(int) | |
| 66 | + | |
| 61 | 67 | if args.debug: |
| 62 | 68 | print(f"[DEBUG] 查询完成,共 {len(df)} 条订单记录") |
| 63 | 69 | ... | ... |