Commit d9914b07103098d741932700baa30385a020e1a0

Authored by tangwang
1 parent 7e37f9e2

add cos sim

Showing 1 changed file with 0 additions and 210 deletions   Show diff stats
offline_tasks/run_all.py deleted
... ... @@ -1,210 +0,0 @@
1   -"""
2   -离线任务统一调度脚本
3   -按顺序运行所有离线任务,生成推荐系统所需的各种索引
4   -"""
5   -import os
6   -import sys
7   -import subprocess
8   -import argparse
9   -import logging
10   -from datetime import datetime
11   -
12   -# 添加父目录到路径以导入配置
13   -parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
14   -sys.path.insert(0, parent_dir)
15   -
16   -from offline_tasks.config.offline_config import (
17   - DEFAULT_LOOKBACK_DAYS,
18   - DEFAULT_I2I_TOP_N,
19   - DEFAULT_INTEREST_TOP_N
20   -)
21   -
22   -# 设置日志
23   -LOG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logs')
24   -os.makedirs(LOG_DIR, exist_ok=True)
25   -
26   -logging.basicConfig(
27   - level=logging.INFO,
28   - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
29   - handlers=[
30   - logging.FileHandler(os.path.join(LOG_DIR, f'run_all_{datetime.now().strftime("%Y%m%d")}.log')),
31   - logging.StreamHandler()
32   - ]
33   -)
34   -logger = logging.getLogger(__name__)
35   -
36   -# 脚本目录
37   -SCRIPTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'scripts')
38   -
39   -
40   -def run_script(script_name, args=None):
41   - """
42   - 运行Python脚本
43   -
44   - Args:
45   - script_name: 脚本名称
46   - args: 命令行参数列表
47   -
48   - Returns:
49   - bool: 是否成功
50   - """
51   - script_path = os.path.join(SCRIPTS_DIR, script_name)
52   -
53   - if not os.path.exists(script_path):
54   - logger.error(f"Script not found: {script_path}")
55   - return False
56   -
57   - cmd = [sys.executable, script_path]
58   - if args:
59   - cmd.extend(args)
60   -
61   - logger.info(f"Running: {' '.join(cmd)}")
62   -
63   - try:
64   - result = subprocess.run(
65   - cmd,
66   - check=True,
67   - capture_output=True,
68   - text=True
69   - )
70   - logger.info(f"Script {script_name} completed successfully")
71   - logger.debug(result.stdout)
72   - return True
73   - except subprocess.CalledProcessError as e:
74   - logger.error(f"Script {script_name} failed with return code {e.returncode}")
75   - logger.error(f"Error output: {e.stderr}")
76   - return False
77   - except Exception as e:
78   - logger.error(f"Unexpected error running {script_name}: {e}")
79   - return False
80   -
81   -
82   -def main():
83   - parser = argparse.ArgumentParser(description='Run all offline recommendation tasks')
84   - parser.add_argument('--debug', action='store_true',
85   - help='Enable debug mode for all tasks (detailed logs + readable output files)')
86   -
87   - args = parser.parse_args()
88   -
89   - logger.info("="*80)
90   - logger.info("Starting offline recommendation tasks")
91   - if args.debug:
92   - logger.info("🐛 DEBUG MODE ENABLED - 详细日志 + 明文输出")
93   - logger.info("="*80)
94   -
95   - success_count = 0
96   - total_count = 0
97   -
98   - # 前置任务1: 获取item基础属性
99   - logger.info("\n" + "="*80)
100   - logger.info("前置任务1: 获取商品基础属性(ID->名称映射)")
101   - logger.info("="*80)
102   - total_count += 1
103   - script_args = []
104   - if args.debug:
105   - script_args.append('--debug')
106   - if run_script('fetch_item_attributes.py', script_args):
107   - success_count += 1
108   - else:
109   - logger.error("获取商品属性失败,后续任务可能无法生成可读文件")
110   -
111   - # 前置任务2: 生成session文件
112   - logger.info("\n" + "="*80)
113   - logger.info("前置任务2: 生成用户行为Session文件")
114   - logger.info("="*80)
115   - total_count += 1
116   - script_args = [
117   - '--lookback_days', str(DEFAULT_LOOKBACK_DAYS),
118   - '--format', 'both'
119   - ]
120   - if args.debug:
121   - script_args.append('--debug')
122   - if run_script('generate_session.py', script_args):
123   - success_count += 1
124   - else:
125   - logger.error("生成session文件失败")
126   -
127   - # i2i 行为相似任务
128   - logger.info("\n" + "="*80)
129   - logger.info("Task 1: Running Swing algorithm for i2i similarity")
130   - logger.info("="*80)
131   - total_count += 1
132   - script_args = [
133   - '--lookback_days', str(DEFAULT_LOOKBACK_DAYS),
134   - '--top_n', str(DEFAULT_I2I_TOP_N),
135   - '--time_decay'
136   - ]
137   - if args.debug:
138   - script_args.append('--debug')
139   - if run_script('i2i_swing.py', script_args):
140   - success_count += 1
141   -
142   - # 2. Session W2V
143   - logger.info("\n" + "="*80)
144   - logger.info("Task 2: Running Session Word2Vec for i2i similarity")
145   - logger.info("="*80)
146   - total_count += 1
147   - script_args = [
148   - '--lookback_days', str(DEFAULT_LOOKBACK_DAYS),
149   - '--top_n', str(DEFAULT_I2I_TOP_N),
150   - '--save_model'
151   - ]
152   - if args.debug:
153   - script_args.append('--debug')
154   - if run_script('i2i_session_w2v.py', script_args):
155   - success_count += 1
156   -
157   - # 3. DeepWalk
158   - logger.info("\n" + "="*80)
159   - logger.info("Task 3: Running DeepWalk for i2i similarity")
160   - logger.info("="*80)
161   - total_count += 1
162   - script_args = [
163   - '--lookback_days', str(DEFAULT_LOOKBACK_DAYS),
164   - '--top_n', str(DEFAULT_I2I_TOP_N),
165   - '--save_model',
166   - '--save_graph'
167   - ]
168   - if args.debug:
169   - script_args.append('--debug')
170   - if run_script('i2i_deepwalk.py', script_args):
171   - success_count += 1
172   -
173   - # 4. Content-based similarity (ES vectors)
174   - logger.info("\n" + "="*80)
175   - logger.info("Task 4: Running Content-based similarity (ES vectors)")
176   - logger.info("="*80)
177   - total_count += 1
178   - if run_script('i2i_content_similar.py', []):
179   - success_count += 1
180   -
181   - # 5. 兴趣点聚合任务
182   - logger.info("\n" + "="*80)
183   - logger.info("Task 5: Running interest aggregation")
184   - logger.info("="*80)
185   - total_count += 1
186   - script_args = [
187   - '--lookback_days', str(DEFAULT_LOOKBACK_DAYS),
188   - '--top_n', str(DEFAULT_INTEREST_TOP_N)
189   - ]
190   - if args.debug:
191   - script_args.append('--debug')
192   - if run_script('interest_aggregation.py', script_args):
193   - success_count += 1
194   -
195   - # 总结
196   - logger.info("\n" + "="*80)
197   - logger.info(f"All tasks completed: {success_count}/{total_count} succeeded")
198   - logger.info("="*80)
199   -
200   - if success_count == total_count:
201   - logger.info("✓ All tasks completed successfully!")
202   - return 0
203   - else:
204   - logger.warning(f"✗ {total_count - success_count} task(s) failed")
205   - return 1
206   -
207   -
208   -if __name__ == '__main__':
209   - sys.exit(main())
210   -