Commit 0b73c877f36d43093c7b4fcd4cf662a40a58a95e

Authored by tangwang
1 parent d35d18eb

fix

offline_tasks/a.py 0 → 100644
@@ -0,0 +1,7 @@ @@ -0,0 +1,7 @@
  1 +from modelscope.pipelines import pipeline
  2 +from modelscope.utils.constant import Tasks
  3 +from modelscope.outputs import OutputKeys
  4 +
  5 +img_captioning = pipeline(Tasks.image_captioning, model='iic/ofa_image-caption_coco_distilled_en', model_revision='master')
  6 +result = img_captioning('https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-captioning/donuts.jpeg')
  7 +print(result[OutputKeys.CAPTION]) # 'a wooden table topped with different types of donuts'
0 \ No newline at end of file 8 \ No newline at end of file
offline_tasks/scripts/a.py deleted
@@ -1,36 +0,0 @@ @@ -1,36 +0,0 @@
1 -from modelscope import AutoProcessor, Gemma3nForConditionalGeneration  
2 -from PIL import Image  
3 -import requests  
4 -import torch  
5 -model_id = "google/gemma-3n-e4b-it"  
6 -model = Gemma3nForConditionalGeneration.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16,).eval()  
7 -processor = AutoProcessor.from_pretrained(model_id)  
8 -messages = [  
9 - {  
10 - "role": "system",  
11 - "content": [{"type": "text", "text": "You are a helpful assistant."}]  
12 - },  
13 - {  
14 - "role": "user",  
15 - "content": [  
16 - {"type": "image", "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},  
17 - {"type": "text", "text": "Describe this image in detail."}  
18 - ]  
19 - }  
20 -]  
21 -inputs = processor.apply_chat_template(  
22 - messages,  
23 - add_generation_prompt=True,  
24 - tokenize=True,  
25 - return_dict=True,  
26 - return_tensors="pt",  
27 -).to(model.device)  
28 -input_len = inputs["input_ids"].shape[-1]  
29 -with torch.inference_mode():  
30 - generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)  
31 - generation = generation[0][input_len:]  
32 -decoded = processor.decode(generation, skip_special_tokens=True)  
33 -print(decoded)  
34 -# **Overall Impression:** The image is a close-up shot of a vibrant garden scene,  
35 -# focusing on a cluster of pink cosmos flowers and a busy bumblebee.  
36 -# It has a slightly soft, natural feel, likely captured in daylight.  
37 \ No newline at end of file 0 \ No newline at end of file
offline_tasks/scripts/i2i_deepwalk.py
@@ -280,6 +280,10 @@ def main(): @@ -280,6 +280,10 @@ def main():
280 df = pd.read_sql(sql_query, engine) 280 df = pd.read_sql(sql_query, engine)
281 logger.info(f"获取到 {len(df)} 条记录") 281 logger.info(f"获取到 {len(df)} 条记录")
282 282
  283 + # 确保ID为整数类型
  284 + df['item_id'] = df['item_id'].astype(int)
  285 + df['user_id'] = df['user_id'].astype(str)
  286 +
283 # 记录数据信息 287 # 记录数据信息
284 log_dataframe_info(logger, df, "用户行为数据") 288 log_dataframe_info(logger, df, "用户行为数据")
285 289
offline_tasks/scripts/i2i_item_behavior.py
@@ -56,6 +56,10 @@ if args.debug: @@ -56,6 +56,10 @@ if args.debug:
56 # 执行 SQL 查询并将结果加载到 pandas DataFrame 56 # 执行 SQL 查询并将结果加载到 pandas DataFrame
57 df = pd.read_sql(sql_query, engine) 57 df = pd.read_sql(sql_query, engine)
58 58
  59 +# 确保ID为整数类型
  60 +df['item_id'] = df['item_id'].astype(int)
  61 +df['user_id'] = df['user_id'].astype(str) # user_id保持为字符串
  62 +
59 if args.debug: 63 if args.debug:
60 print(f"[DEBUG] 查询完成,共 {len(df)} 条记录") 64 print(f"[DEBUG] 查询完成,共 {len(df)} 条记录")
61 print(f"[DEBUG] 唯一用户数: {df['user_id'].nunique()}") 65 print(f"[DEBUG] 唯一用户数: {df['user_id'].nunique()}")
offline_tasks/scripts/i2i_session_w2v.py
@@ -227,6 +227,10 @@ def main(): @@ -227,6 +227,10 @@ def main():
227 df = pd.read_sql(sql_query, engine) 227 df = pd.read_sql(sql_query, engine)
228 logger.info(f"获取到 {len(df)} 条记录") 228 logger.info(f"获取到 {len(df)} 条记录")
229 229
  230 + # 确保ID为整数类型
  231 + df['item_id'] = df['item_id'].astype(int)
  232 + df['user_id'] = df['user_id'].astype(str)
  233 +
230 # 记录数据信息 234 # 记录数据信息
231 log_dataframe_info(logger, df, "用户行为数据") 235 log_dataframe_info(logger, df, "用户行为数据")
232 236
offline_tasks/scripts/i2i_swing.py
@@ -18,7 +18,7 @@ from config.offline_config import ( @@ -18,7 +18,7 @@ from config.offline_config import (
18 from scripts.debug_utils import ( 18 from scripts.debug_utils import (
19 setup_debug_logger, log_dataframe_info, log_dict_stats, 19 setup_debug_logger, log_dataframe_info, log_dict_stats,
20 save_readable_index, load_name_mappings_from_file, log_algorithm_params, 20 save_readable_index, load_name_mappings_from_file, log_algorithm_params,
21 - log_processing_step, clean_item_name 21 + log_processing_step
22 ) 22 )
23 23
24 24
@@ -285,6 +285,10 @@ def main(): @@ -285,6 +285,10 @@ def main():
285 df = pd.read_sql(sql_query, engine) 285 df = pd.read_sql(sql_query, engine)
286 logger.info(f"获取到 {len(df)} 条记录") 286 logger.info(f"获取到 {len(df)} 条记录")
287 287
  288 + # 确保ID为整数类型
  289 + df['item_id'] = df['item_id'].astype(int)
  290 + df['user_id'] = df['user_id'].astype(str)
  291 +
288 # Debug: 显示数据详情 292 # Debug: 显示数据详情
289 if args.debug: 293 if args.debug:
290 log_dataframe_info(logger, df, "用户行为数据", sample_size=10) 294 log_dataframe_info(logger, df, "用户行为数据", sample_size=10)