diff --git a/offline_tasks/scripts/generate_session.py b/offline_tasks/scripts/generate_session.py index e49f8e0..e3779ba 100644 --- a/offline_tasks/scripts/generate_session.py +++ b/offline_tasks/scripts/generate_session.py @@ -190,6 +190,10 @@ def main(): df = pd.read_sql(sql_query, engine) logger.info(f"获取到 {len(df)} 条记录") + # 确保ID为整数类型 + df['item_id'] = df['item_id'].astype(int) + df['user_id'] = df['user_id'].astype(str) + # Debug: 显示数据详情 if args.debug: log_dataframe_info(logger, df, "用户行为数据", sample_size=10) diff --git a/offline_tasks/scripts/i2i_content_similar.py b/offline_tasks/scripts/i2i_content_similar.py index bf90f4d..d73c91b 100644 --- a/offline_tasks/scripts/i2i_content_similar.py +++ b/offline_tasks/scripts/i2i_content_similar.py @@ -49,6 +49,8 @@ def get_active_items(engine): """ df = pd.read_sql(sql_query, engine) + # 确保ID为整数类型 + df['item_id'] = df['item_id'].astype(int) return df['item_id'].tolist() diff --git a/offline_tasks/scripts/interest_aggregation.py b/offline_tasks/scripts/interest_aggregation.py index 763f27c..6c220ef 100644 --- a/offline_tasks/scripts/interest_aggregation.py +++ b/offline_tasks/scripts/interest_aggregation.py @@ -303,6 +303,14 @@ def main(): df = pd.read_sql(sql_query, engine) logger.info(f"获取到 {len(df)} 条记录") + # 确保ID为整数类型 + df['item_id'] = df['item_id'].astype(int) + df['user_id'] = df['user_id'].astype(str) + if 'category_id' in df.columns: + df['category_id'] = df['category_id'].astype(int) + if 'supplier_id' in df.columns: + df['supplier_id'] = df['supplier_id'].astype(int) + # 记录数据信息 log_dataframe_info(logger, df, "用户行为数据") diff --git a/offline_tasks/scripts/tag_category_similar.py b/offline_tasks/scripts/tag_category_similar.py index c48549f..2f40f9f 100644 --- a/offline_tasks/scripts/tag_category_similar.py +++ b/offline_tasks/scripts/tag_category_similar.py @@ -58,6 +58,12 @@ if args.debug: # 执行 SQL 查询并将结果加载到 pandas DataFrame df = pd.read_sql(sql_query, engine) +# 确保ID为整数类型 +if 'category_id' in df.columns: + df['category_id'] = df['category_id'].astype(int) +if 'supplier_id' in df.columns: + df['supplier_id'] = df['supplier_id'].astype(int) + if args.debug: print(f"[DEBUG] 查询完成,共 {len(df)} 条订单记录") -- libgit2 0.21.2