Commit 53fd5f08170ef9f554e8f8f5929364e919250d54
1 parent
0b73c877
部分文件id格式不对 fix
Showing
4 changed files
with
20 additions
and
0 deletions
Show diff stats
offline_tasks/scripts/generate_session.py
| @@ -190,6 +190,10 @@ def main(): | @@ -190,6 +190,10 @@ def main(): | ||
| 190 | df = pd.read_sql(sql_query, engine) | 190 | df = pd.read_sql(sql_query, engine) |
| 191 | logger.info(f"获取到 {len(df)} 条记录") | 191 | logger.info(f"获取到 {len(df)} 条记录") |
| 192 | 192 | ||
| 193 | + # 确保ID为整数类型 | ||
| 194 | + df['item_id'] = df['item_id'].astype(int) | ||
| 195 | + df['user_id'] = df['user_id'].astype(str) | ||
| 196 | + | ||
| 193 | # Debug: 显示数据详情 | 197 | # Debug: 显示数据详情 |
| 194 | if args.debug: | 198 | if args.debug: |
| 195 | log_dataframe_info(logger, df, "用户行为数据", sample_size=10) | 199 | log_dataframe_info(logger, df, "用户行为数据", sample_size=10) |
offline_tasks/scripts/i2i_content_similar.py
| @@ -49,6 +49,8 @@ def get_active_items(engine): | @@ -49,6 +49,8 @@ def get_active_items(engine): | ||
| 49 | """ | 49 | """ |
| 50 | 50 | ||
| 51 | df = pd.read_sql(sql_query, engine) | 51 | df = pd.read_sql(sql_query, engine) |
| 52 | + # 确保ID为整数类型 | ||
| 53 | + df['item_id'] = df['item_id'].astype(int) | ||
| 52 | return df['item_id'].tolist() | 54 | return df['item_id'].tolist() |
| 53 | 55 | ||
| 54 | 56 |
offline_tasks/scripts/interest_aggregation.py
| @@ -303,6 +303,14 @@ def main(): | @@ -303,6 +303,14 @@ def main(): | ||
| 303 | df = pd.read_sql(sql_query, engine) | 303 | df = pd.read_sql(sql_query, engine) |
| 304 | logger.info(f"获取到 {len(df)} 条记录") | 304 | logger.info(f"获取到 {len(df)} 条记录") |
| 305 | 305 | ||
| 306 | + # 确保ID为整数类型 | ||
| 307 | + df['item_id'] = df['item_id'].astype(int) | ||
| 308 | + df['user_id'] = df['user_id'].astype(str) | ||
| 309 | + if 'category_id' in df.columns: | ||
| 310 | + df['category_id'] = df['category_id'].astype(int) | ||
| 311 | + if 'supplier_id' in df.columns: | ||
| 312 | + df['supplier_id'] = df['supplier_id'].astype(int) | ||
| 313 | + | ||
| 306 | # 记录数据信息 | 314 | # 记录数据信息 |
| 307 | log_dataframe_info(logger, df, "用户行为数据") | 315 | log_dataframe_info(logger, df, "用户行为数据") |
| 308 | 316 |
offline_tasks/scripts/tag_category_similar.py
| @@ -58,6 +58,12 @@ if args.debug: | @@ -58,6 +58,12 @@ if args.debug: | ||
| 58 | # 执行 SQL 查询并将结果加载到 pandas DataFrame | 58 | # 执行 SQL 查询并将结果加载到 pandas DataFrame |
| 59 | df = pd.read_sql(sql_query, engine) | 59 | df = pd.read_sql(sql_query, engine) |
| 60 | 60 | ||
| 61 | +# 确保ID为整数类型 | ||
| 62 | +if 'category_id' in df.columns: | ||
| 63 | + df['category_id'] = df['category_id'].astype(int) | ||
| 64 | +if 'supplier_id' in df.columns: | ||
| 65 | + df['supplier_id'] = df['supplier_id'].astype(int) | ||
| 66 | + | ||
| 61 | if args.debug: | 67 | if args.debug: |
| 62 | print(f"[DEBUG] 查询完成,共 {len(df)} 条订单记录") | 68 | print(f"[DEBUG] 查询完成,共 {len(df)} 条订单记录") |
| 63 | 69 |