Blame view

offline_tasks/run_all.py 7.32 KB
5ab1c29c   tangwang   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  """
  离线任务统一调度脚本
  按顺序运行所有离线任务,生成推荐系统所需的各种索引
  """
  import os
  import sys
  import subprocess
  import argparse
  import logging
  from datetime import datetime
  
  # 添加父目录到路径以导入配置
  parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  sys.path.insert(0, parent_dir)
  
  from offline_tasks.config.offline_config import (
      DEFAULT_LOOKBACK_DAYS,
      DEFAULT_I2I_TOP_N,
      DEFAULT_INTEREST_TOP_N
  )
  
  # 设置日志
  LOG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logs')
  os.makedirs(LOG_DIR, exist_ok=True)
  
  logging.basicConfig(
      level=logging.INFO,
      format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
      handlers=[
          logging.FileHandler(os.path.join(LOG_DIR, f'run_all_{datetime.now().strftime("%Y%m%d")}.log')),
          logging.StreamHandler()
      ]
  )
  logger = logging.getLogger(__name__)
  
  # 脚本目录
  SCRIPTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'scripts')
  
  
  def run_script(script_name, args=None):
      """
      运行Python脚本
      
      Args:
          script_name: 脚本名称
          args: 命令行参数列表
      
      Returns:
          bool: 是否成功
      """
      script_path = os.path.join(SCRIPTS_DIR, script_name)
      
      if not os.path.exists(script_path):
          logger.error(f"Script not found: {script_path}")
          return False
      
      cmd = [sys.executable, script_path]
      if args:
          cmd.extend(args)
      
      logger.info(f"Running: {' '.join(cmd)}")
      
      try:
          result = subprocess.run(
              cmd,
              check=True,
              capture_output=True,
              text=True
          )
          logger.info(f"Script {script_name} completed successfully")
          logger.debug(result.stdout)
          return True
      except subprocess.CalledProcessError as e:
          logger.error(f"Script {script_name} failed with return code {e.returncode}")
          logger.error(f"Error output: {e.stderr}")
          return False
      except Exception as e:
          logger.error(f"Unexpected error running {script_name}: {e}")
          return False
  
  
  def main():
      parser = argparse.ArgumentParser(description='Run all offline recommendation tasks')
      parser.add_argument('--skip-i2i', action='store_true', help='Skip i2i tasks')
      parser.add_argument('--skip-interest', action='store_true', help='Skip interest aggregation')
      parser.add_argument('--only-swing', action='store_true', help='Run only Swing algorithm')
      parser.add_argument('--only-w2v', action='store_true', help='Run only Session W2V')
      parser.add_argument('--only-deepwalk', action='store_true', help='Run only DeepWalk')
      parser.add_argument('--only-content', action='store_true', help='Run only Content-based similarity')
      parser.add_argument('--only-interest', action='store_true', help='Run only interest aggregation')
1721766b   tangwang   offline tasks
91
      parser.add_argument('--lookback_days', type=int, default=DEFAULT_LOOKBACK_DAYS, 
5ab1c29c   tangwang   first commit
92
                          help=f'Lookback days (default: {DEFAULT_LOOKBACK_DAYS}, adjust in offline_config.py)')
1721766b   tangwang   offline tasks
93
      parser.add_argument('--top_n', type=int, default=DEFAULT_I2I_TOP_N, 
5ab1c29c   tangwang   first commit
94
                          help=f'Top N similar items (default: {DEFAULT_I2I_TOP_N})')
1721766b   tangwang   offline tasks
95
96
      parser.add_argument('--debug', action='store_true',
                          help='Enable debug mode for all tasks (detailed logs + readable output files)')
5ab1c29c   tangwang   first commit
97
98
99
100
101
      
      args = parser.parse_args()
      
      logger.info("="*80)
      logger.info("Starting offline recommendation tasks")
1721766b   tangwang   offline tasks
102
103
      if args.debug:
          logger.info("🐛 DEBUG MODE ENABLED - 详细日志 + 明文输出")
5ab1c29c   tangwang   first commit
104
105
106
107
108
109
110
111
112
113
114
115
116
      logger.info("="*80)
      
      success_count = 0
      total_count = 0
      
      # i2i 行为相似任务
      if not args.skip_i2i:
          # 1. Swing算法
          if not args.only_w2v and not args.only_deepwalk and not args.only_interest and not args.only_content:
              logger.info("\n" + "="*80)
              logger.info("Task 1: Running Swing algorithm for i2i similarity")
              logger.info("="*80)
              total_count += 1
1721766b   tangwang   offline tasks
117
              script_args = [
5ab1c29c   tangwang   first commit
118
119
120
                  '--lookback_days', str(args.lookback_days),
                  '--top_n', str(args.top_n),
                  '--time_decay'
1721766b   tangwang   offline tasks
121
122
123
124
              ]
              if args.debug:
                  script_args.append('--debug')
              if run_script('i2i_swing.py', script_args):
5ab1c29c   tangwang   first commit
125
126
127
128
129
130
131
132
                  success_count += 1
          
          # 2. Session W2V
          if not args.only_swing and not args.only_deepwalk and not args.only_interest and not args.only_content:
              logger.info("\n" + "="*80)
              logger.info("Task 2: Running Session Word2Vec for i2i similarity")
              logger.info("="*80)
              total_count += 1
1721766b   tangwang   offline tasks
133
              script_args = [
5ab1c29c   tangwang   first commit
134
135
136
                  '--lookback_days', str(args.lookback_days),
                  '--top_n', str(args.top_n),
                  '--save_model'
1721766b   tangwang   offline tasks
137
138
139
140
              ]
              if args.debug:
                  script_args.append('--debug')
              if run_script('i2i_session_w2v.py', script_args):
5ab1c29c   tangwang   first commit
141
142
143
144
145
146
147
148
                  success_count += 1
          
          # 3. DeepWalk
          if not args.only_swing and not args.only_w2v and not args.only_interest and not args.only_content:
              logger.info("\n" + "="*80)
              logger.info("Task 3: Running DeepWalk for i2i similarity")
              logger.info("="*80)
              total_count += 1
1721766b   tangwang   offline tasks
149
              script_args = [
5ab1c29c   tangwang   first commit
150
151
152
153
                  '--lookback_days', str(args.lookback_days),
                  '--top_n', str(args.top_n),
                  '--save_model',
                  '--save_graph'
1721766b   tangwang   offline tasks
154
155
156
157
              ]
              if args.debug:
                  script_args.append('--debug')
              if run_script('i2i_deepwalk.py', script_args):
5ab1c29c   tangwang   first commit
158
159
160
161
162
163
164
165
                  success_count += 1
          
          # 4. Content-based similarity
          if not args.only_swing and not args.only_w2v and not args.only_deepwalk and not args.only_interest:
              logger.info("\n" + "="*80)
              logger.info("Task 4: Running Content-based similarity")
              logger.info("="*80)
              total_count += 1
1721766b   tangwang   offline tasks
166
              script_args = [
5ab1c29c   tangwang   first commit
167
168
                  '--top_n', str(args.top_n),
                  '--method', 'hybrid'
1721766b   tangwang   offline tasks
169
170
171
172
              ]
              if args.debug:
                  script_args.append('--debug')
              if run_script('i2i_content_similar.py', script_args):
5ab1c29c   tangwang   first commit
173
174
175
176
177
178
179
180
181
                  success_count += 1
      
      # 兴趣点聚合任务
      if not args.skip_interest:
          if not args.only_swing and not args.only_w2v and not args.only_deepwalk and not args.only_content:
              logger.info("\n" + "="*80)
              logger.info("Task 5: Running interest aggregation")
              logger.info("="*80)
              total_count += 1
1721766b   tangwang   offline tasks
182
              script_args = [
5ab1c29c   tangwang   first commit
183
184
                  '--lookback_days', str(args.lookback_days),
                  '--top_n', str(DEFAULT_INTEREST_TOP_N)
1721766b   tangwang   offline tasks
185
186
187
188
              ]
              if args.debug:
                  script_args.append('--debug')
              if run_script('interest_aggregation.py', script_args):
5ab1c29c   tangwang   first commit
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
                  success_count += 1
      
      # 总结
      logger.info("\n" + "="*80)
      logger.info(f"All tasks completed: {success_count}/{total_count} succeeded")
      logger.info("="*80)
      
      if success_count == total_count:
          logger.info("✓ All tasks completed successfully!")
          return 0
      else:
          logger.warning(f"✗ {total_count - success_count} task(s) failed")
          return 1
  
  
  if __name__ == '__main__':
      sys.exit(main())