From 0b73c877f36d43093c7b4fcd4cf662a40a58a95e Mon Sep 17 00:00:00 2001 From: tangwang Date: Thu, 23 Oct 2025 12:10:24 +0800 Subject: [PATCH] fix --- offline_tasks/a.py | 7 +++++++ offline_tasks/scripts/a.py | 36 ------------------------------------ offline_tasks/scripts/i2i_deepwalk.py | 4 ++++ offline_tasks/scripts/i2i_item_behavior.py | 4 ++++ offline_tasks/scripts/i2i_session_w2v.py | 4 ++++ offline_tasks/scripts/i2i_swing.py | 6 +++++- 6 files changed, 24 insertions(+), 37 deletions(-) create mode 100644 offline_tasks/a.py delete mode 100644 offline_tasks/scripts/a.py diff --git a/offline_tasks/a.py b/offline_tasks/a.py new file mode 100644 index 0000000..caacc46 --- /dev/null +++ b/offline_tasks/a.py @@ -0,0 +1,7 @@ +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.outputs import OutputKeys + +img_captioning = pipeline(Tasks.image_captioning, model='iic/ofa_image-caption_coco_distilled_en', model_revision='master') +result = img_captioning('https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-captioning/donuts.jpeg') +print(result[OutputKeys.CAPTION]) # 'a wooden table topped with different types of donuts' \ No newline at end of file diff --git a/offline_tasks/scripts/a.py b/offline_tasks/scripts/a.py deleted file mode 100644 index b5e53fe..0000000 --- a/offline_tasks/scripts/a.py +++ /dev/null @@ -1,36 +0,0 @@ -from modelscope import AutoProcessor, Gemma3nForConditionalGeneration -from PIL import Image -import requests -import torch -model_id = "google/gemma-3n-e4b-it" -model = Gemma3nForConditionalGeneration.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16,).eval() -processor = AutoProcessor.from_pretrained(model_id) -messages = [ - { - "role": "system", - "content": [{"type": "text", "text": "You are a helpful assistant."}] - }, - { - "role": "user", - "content": [ - {"type": "image", "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"}, - {"type": "text", "text": "Describe this image in detail."} - ] - } -] -inputs = processor.apply_chat_template( - messages, - add_generation_prompt=True, - tokenize=True, - return_dict=True, - return_tensors="pt", -).to(model.device) -input_len = inputs["input_ids"].shape[-1] -with torch.inference_mode(): - generation = model.generate(**inputs, max_new_tokens=100, do_sample=False) - generation = generation[0][input_len:] -decoded = processor.decode(generation, skip_special_tokens=True) -print(decoded) -# **Overall Impression:** The image is a close-up shot of a vibrant garden scene, -# focusing on a cluster of pink cosmos flowers and a busy bumblebee. -# It has a slightly soft, natural feel, likely captured in daylight. \ No newline at end of file diff --git a/offline_tasks/scripts/i2i_deepwalk.py b/offline_tasks/scripts/i2i_deepwalk.py index 035cab8..973cdae 100644 --- a/offline_tasks/scripts/i2i_deepwalk.py +++ b/offline_tasks/scripts/i2i_deepwalk.py @@ -280,6 +280,10 @@ def main(): df = pd.read_sql(sql_query, engine) logger.info(f"获取到 {len(df)} 条记录") + # 确保ID为整数类型 + df['item_id'] = df['item_id'].astype(int) + df['user_id'] = df['user_id'].astype(str) + # 记录数据信息 log_dataframe_info(logger, df, "用户行为数据") diff --git a/offline_tasks/scripts/i2i_item_behavior.py b/offline_tasks/scripts/i2i_item_behavior.py index 6920a69..07ae174 100644 --- a/offline_tasks/scripts/i2i_item_behavior.py +++ b/offline_tasks/scripts/i2i_item_behavior.py @@ -56,6 +56,10 @@ if args.debug: # 执行 SQL 查询并将结果加载到 pandas DataFrame df = pd.read_sql(sql_query, engine) +# 确保ID为整数类型 +df['item_id'] = df['item_id'].astype(int) +df['user_id'] = df['user_id'].astype(str) # user_id保持为字符串 + if args.debug: print(f"[DEBUG] 查询完成,共 {len(df)} 条记录") print(f"[DEBUG] 唯一用户数: {df['user_id'].nunique()}") diff --git a/offline_tasks/scripts/i2i_session_w2v.py b/offline_tasks/scripts/i2i_session_w2v.py index ae9c634..a65be3a 100644 --- a/offline_tasks/scripts/i2i_session_w2v.py +++ b/offline_tasks/scripts/i2i_session_w2v.py @@ -227,6 +227,10 @@ def main(): df = pd.read_sql(sql_query, engine) logger.info(f"获取到 {len(df)} 条记录") + # 确保ID为整数类型 + df['item_id'] = df['item_id'].astype(int) + df['user_id'] = df['user_id'].astype(str) + # 记录数据信息 log_dataframe_info(logger, df, "用户行为数据") diff --git a/offline_tasks/scripts/i2i_swing.py b/offline_tasks/scripts/i2i_swing.py index b6e7cb8..d2dc1d7 100644 --- a/offline_tasks/scripts/i2i_swing.py +++ b/offline_tasks/scripts/i2i_swing.py @@ -18,7 +18,7 @@ from config.offline_config import ( from scripts.debug_utils import ( setup_debug_logger, log_dataframe_info, log_dict_stats, save_readable_index, load_name_mappings_from_file, log_algorithm_params, - log_processing_step, clean_item_name + log_processing_step ) @@ -285,6 +285,10 @@ def main(): df = pd.read_sql(sql_query, engine) logger.info(f"获取到 {len(df)} 条记录") + # 确保ID为整数类型 + df['item_id'] = df['item_id'].astype(int) + df['user_id'] = df['user_id'].astype(str) + # Debug: 显示数据详情 if args.debug: log_dataframe_info(logger, df, "用户行为数据", sample_size=10) -- libgit2 0.21.2