Commit d9914b07103098d741932700baa30385a020e1a0

Authored by tangwang
1 parent 7e37f9e2

add cos sim

Showing 1 changed file with 0 additions and 210 deletions   Show diff stats
offline_tasks/run_all.py deleted
@@ -1,210 +0,0 @@ @@ -1,210 +0,0 @@
1 -"""  
2 -离线任务统一调度脚本  
3 -按顺序运行所有离线任务,生成推荐系统所需的各种索引  
4 -"""  
5 -import os  
6 -import sys  
7 -import subprocess  
8 -import argparse  
9 -import logging  
10 -from datetime import datetime  
11 -  
12 -# 添加父目录到路径以导入配置  
13 -parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  
14 -sys.path.insert(0, parent_dir)  
15 -  
16 -from offline_tasks.config.offline_config import (  
17 - DEFAULT_LOOKBACK_DAYS,  
18 - DEFAULT_I2I_TOP_N,  
19 - DEFAULT_INTEREST_TOP_N  
20 -)  
21 -  
22 -# 设置日志  
23 -LOG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logs')  
24 -os.makedirs(LOG_DIR, exist_ok=True)  
25 -  
26 -logging.basicConfig(  
27 - level=logging.INFO,  
28 - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',  
29 - handlers=[  
30 - logging.FileHandler(os.path.join(LOG_DIR, f'run_all_{datetime.now().strftime("%Y%m%d")}.log')),  
31 - logging.StreamHandler()  
32 - ]  
33 -)  
34 -logger = logging.getLogger(__name__)  
35 -  
36 -# 脚本目录  
37 -SCRIPTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'scripts')  
38 -  
39 -  
40 -def run_script(script_name, args=None):  
41 - """  
42 - 运行Python脚本  
43 -  
44 - Args:  
45 - script_name: 脚本名称  
46 - args: 命令行参数列表  
47 -  
48 - Returns:  
49 - bool: 是否成功  
50 - """  
51 - script_path = os.path.join(SCRIPTS_DIR, script_name)  
52 -  
53 - if not os.path.exists(script_path):  
54 - logger.error(f"Script not found: {script_path}")  
55 - return False  
56 -  
57 - cmd = [sys.executable, script_path]  
58 - if args:  
59 - cmd.extend(args)  
60 -  
61 - logger.info(f"Running: {' '.join(cmd)}")  
62 -  
63 - try:  
64 - result = subprocess.run(  
65 - cmd,  
66 - check=True,  
67 - capture_output=True,  
68 - text=True  
69 - )  
70 - logger.info(f"Script {script_name} completed successfully")  
71 - logger.debug(result.stdout)  
72 - return True  
73 - except subprocess.CalledProcessError as e:  
74 - logger.error(f"Script {script_name} failed with return code {e.returncode}")  
75 - logger.error(f"Error output: {e.stderr}")  
76 - return False  
77 - except Exception as e:  
78 - logger.error(f"Unexpected error running {script_name}: {e}")  
79 - return False  
80 -  
81 -  
82 -def main():  
83 - parser = argparse.ArgumentParser(description='Run all offline recommendation tasks')  
84 - parser.add_argument('--debug', action='store_true',  
85 - help='Enable debug mode for all tasks (detailed logs + readable output files)')  
86 -  
87 - args = parser.parse_args()  
88 -  
89 - logger.info("="*80)  
90 - logger.info("Starting offline recommendation tasks")  
91 - if args.debug:  
92 - logger.info("🐛 DEBUG MODE ENABLED - 详细日志 + 明文输出")  
93 - logger.info("="*80)  
94 -  
95 - success_count = 0  
96 - total_count = 0  
97 -  
98 - # 前置任务1: 获取item基础属性  
99 - logger.info("\n" + "="*80)  
100 - logger.info("前置任务1: 获取商品基础属性(ID->名称映射)")  
101 - logger.info("="*80)  
102 - total_count += 1  
103 - script_args = []  
104 - if args.debug:  
105 - script_args.append('--debug')  
106 - if run_script('fetch_item_attributes.py', script_args):  
107 - success_count += 1  
108 - else:  
109 - logger.error("获取商品属性失败,后续任务可能无法生成可读文件")  
110 -  
111 - # 前置任务2: 生成session文件  
112 - logger.info("\n" + "="*80)  
113 - logger.info("前置任务2: 生成用户行为Session文件")  
114 - logger.info("="*80)  
115 - total_count += 1  
116 - script_args = [  
117 - '--lookback_days', str(DEFAULT_LOOKBACK_DAYS),  
118 - '--format', 'both'  
119 - ]  
120 - if args.debug:  
121 - script_args.append('--debug')  
122 - if run_script('generate_session.py', script_args):  
123 - success_count += 1  
124 - else:  
125 - logger.error("生成session文件失败")  
126 -  
127 - # i2i 行为相似任务  
128 - logger.info("\n" + "="*80)  
129 - logger.info("Task 1: Running Swing algorithm for i2i similarity")  
130 - logger.info("="*80)  
131 - total_count += 1  
132 - script_args = [  
133 - '--lookback_days', str(DEFAULT_LOOKBACK_DAYS),  
134 - '--top_n', str(DEFAULT_I2I_TOP_N),  
135 - '--time_decay'  
136 - ]  
137 - if args.debug:  
138 - script_args.append('--debug')  
139 - if run_script('i2i_swing.py', script_args):  
140 - success_count += 1  
141 -  
142 - # 2. Session W2V  
143 - logger.info("\n" + "="*80)  
144 - logger.info("Task 2: Running Session Word2Vec for i2i similarity")  
145 - logger.info("="*80)  
146 - total_count += 1  
147 - script_args = [  
148 - '--lookback_days', str(DEFAULT_LOOKBACK_DAYS),  
149 - '--top_n', str(DEFAULT_I2I_TOP_N),  
150 - '--save_model'  
151 - ]  
152 - if args.debug:  
153 - script_args.append('--debug')  
154 - if run_script('i2i_session_w2v.py', script_args):  
155 - success_count += 1  
156 -  
157 - # 3. DeepWalk  
158 - logger.info("\n" + "="*80)  
159 - logger.info("Task 3: Running DeepWalk for i2i similarity")  
160 - logger.info("="*80)  
161 - total_count += 1  
162 - script_args = [  
163 - '--lookback_days', str(DEFAULT_LOOKBACK_DAYS),  
164 - '--top_n', str(DEFAULT_I2I_TOP_N),  
165 - '--save_model',  
166 - '--save_graph'  
167 - ]  
168 - if args.debug:  
169 - script_args.append('--debug')  
170 - if run_script('i2i_deepwalk.py', script_args):  
171 - success_count += 1  
172 -  
173 - # 4. Content-based similarity (ES vectors)  
174 - logger.info("\n" + "="*80)  
175 - logger.info("Task 4: Running Content-based similarity (ES vectors)")  
176 - logger.info("="*80)  
177 - total_count += 1  
178 - if run_script('i2i_content_similar.py', []):  
179 - success_count += 1  
180 -  
181 - # 5. 兴趣点聚合任务  
182 - logger.info("\n" + "="*80)  
183 - logger.info("Task 5: Running interest aggregation")  
184 - logger.info("="*80)  
185 - total_count += 1  
186 - script_args = [  
187 - '--lookback_days', str(DEFAULT_LOOKBACK_DAYS),  
188 - '--top_n', str(DEFAULT_INTEREST_TOP_N)  
189 - ]  
190 - if args.debug:  
191 - script_args.append('--debug')  
192 - if run_script('interest_aggregation.py', script_args):  
193 - success_count += 1  
194 -  
195 - # 总结  
196 - logger.info("\n" + "="*80)  
197 - logger.info(f"All tasks completed: {success_count}/{total_count} succeeded")  
198 - logger.info("="*80)  
199 -  
200 - if success_count == total_count:  
201 - logger.info("✓ All tasks completed successfully!")  
202 - return 0  
203 - else:  
204 - logger.warning(f"✗ {total_count - success_count} task(s) failed")  
205 - return 1  
206 -  
207 -  
208 -if __name__ == '__main__':  
209 - sys.exit(main())  
210 -