5ab1c29c
tangwang
first commit
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
"""
离线任务统一调度脚本
按顺序运行所有离线任务,生成推荐系统所需的各种索引
"""
import os
import sys
import subprocess
import argparse
import logging
from datetime import datetime
# 添加父目录到路径以导入配置
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, parent_dir)
from offline_tasks.config.offline_config import (
DEFAULT_LOOKBACK_DAYS,
DEFAULT_I2I_TOP_N,
DEFAULT_INTEREST_TOP_N
)
# 设置日志
LOG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logs')
os.makedirs(LOG_DIR, exist_ok=True)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(os.path.join(LOG_DIR, f'run_all_{datetime.now().strftime("%Y%m%d")}.log')),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# 脚本目录
SCRIPTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'scripts')
def run_script(script_name, args=None):
"""
运行Python脚本
Args:
script_name: 脚本名称
args: 命令行参数列表
Returns:
bool: 是否成功
"""
script_path = os.path.join(SCRIPTS_DIR, script_name)
if not os.path.exists(script_path):
logger.error(f"Script not found: {script_path}")
return False
cmd = [sys.executable, script_path]
if args:
cmd.extend(args)
logger.info(f"Running: {' '.join(cmd)}")
try:
result = subprocess.run(
cmd,
check=True,
capture_output=True,
text=True
)
logger.info(f"Script {script_name} completed successfully")
logger.debug(result.stdout)
return True
except subprocess.CalledProcessError as e:
logger.error(f"Script {script_name} failed with return code {e.returncode}")
logger.error(f"Error output: {e.stderr}")
return False
except Exception as e:
logger.error(f"Unexpected error running {script_name}: {e}")
return False
def main():
parser = argparse.ArgumentParser(description='Run all offline recommendation tasks')
parser.add_argument('--skip-i2i', action='store_true', help='Skip i2i tasks')
parser.add_argument('--skip-interest', action='store_true', help='Skip interest aggregation')
parser.add_argument('--only-swing', action='store_true', help='Run only Swing algorithm')
parser.add_argument('--only-w2v', action='store_true', help='Run only Session W2V')
parser.add_argument('--only-deepwalk', action='store_true', help='Run only DeepWalk')
parser.add_argument('--only-content', action='store_true', help='Run only Content-based similarity')
parser.add_argument('--only-interest', action='store_true', help='Run only interest aggregation')
|
1721766b
tangwang
offline tasks
|
91
|
parser.add_argument('--lookback_days', type=int, default=DEFAULT_LOOKBACK_DAYS,
|
5ab1c29c
tangwang
first commit
|
92
|
help=f'Lookback days (default: {DEFAULT_LOOKBACK_DAYS}, adjust in offline_config.py)')
|
1721766b
tangwang
offline tasks
|
93
|
parser.add_argument('--top_n', type=int, default=DEFAULT_I2I_TOP_N,
|
5ab1c29c
tangwang
first commit
|
94
|
help=f'Top N similar items (default: {DEFAULT_I2I_TOP_N})')
|
1721766b
tangwang
offline tasks
|
95
96
|
parser.add_argument('--debug', action='store_true',
help='Enable debug mode for all tasks (detailed logs + readable output files)')
|
5ab1c29c
tangwang
first commit
|
97
98
99
100
101
|
args = parser.parse_args()
logger.info("="*80)
logger.info("Starting offline recommendation tasks")
|
1721766b
tangwang
offline tasks
|
102
103
|
if args.debug:
logger.info("🐛 DEBUG MODE ENABLED - 详细日志 + 明文输出")
|
5ab1c29c
tangwang
first commit
|
104
105
106
107
108
109
110
111
112
113
114
115
116
|
logger.info("="*80)
success_count = 0
total_count = 0
# i2i 行为相似任务
if not args.skip_i2i:
# 1. Swing算法
if not args.only_w2v and not args.only_deepwalk and not args.only_interest and not args.only_content:
logger.info("\n" + "="*80)
logger.info("Task 1: Running Swing algorithm for i2i similarity")
logger.info("="*80)
total_count += 1
|
1721766b
tangwang
offline tasks
|
117
|
script_args = [
|
5ab1c29c
tangwang
first commit
|
118
119
120
|
'--lookback_days', str(args.lookback_days),
'--top_n', str(args.top_n),
'--time_decay'
|
1721766b
tangwang
offline tasks
|
121
122
123
124
|
]
if args.debug:
script_args.append('--debug')
if run_script('i2i_swing.py', script_args):
|
5ab1c29c
tangwang
first commit
|
125
126
127
128
129
130
131
132
|
success_count += 1
# 2. Session W2V
if not args.only_swing and not args.only_deepwalk and not args.only_interest and not args.only_content:
logger.info("\n" + "="*80)
logger.info("Task 2: Running Session Word2Vec for i2i similarity")
logger.info("="*80)
total_count += 1
|
1721766b
tangwang
offline tasks
|
133
|
script_args = [
|
5ab1c29c
tangwang
first commit
|
134
135
136
|
'--lookback_days', str(args.lookback_days),
'--top_n', str(args.top_n),
'--save_model'
|
1721766b
tangwang
offline tasks
|
137
138
139
140
|
]
if args.debug:
script_args.append('--debug')
if run_script('i2i_session_w2v.py', script_args):
|
5ab1c29c
tangwang
first commit
|
141
142
143
144
145
146
147
148
|
success_count += 1
# 3. DeepWalk
if not args.only_swing and not args.only_w2v and not args.only_interest and not args.only_content:
logger.info("\n" + "="*80)
logger.info("Task 3: Running DeepWalk for i2i similarity")
logger.info("="*80)
total_count += 1
|
1721766b
tangwang
offline tasks
|
149
|
script_args = [
|
5ab1c29c
tangwang
first commit
|
150
151
152
153
|
'--lookback_days', str(args.lookback_days),
'--top_n', str(args.top_n),
'--save_model',
'--save_graph'
|
1721766b
tangwang
offline tasks
|
154
155
156
157
|
]
if args.debug:
script_args.append('--debug')
if run_script('i2i_deepwalk.py', script_args):
|
5ab1c29c
tangwang
first commit
|
158
159
160
161
162
163
164
165
|
success_count += 1
# 4. Content-based similarity
if not args.only_swing and not args.only_w2v and not args.only_deepwalk and not args.only_interest:
logger.info("\n" + "="*80)
logger.info("Task 4: Running Content-based similarity")
logger.info("="*80)
total_count += 1
|
1721766b
tangwang
offline tasks
|
166
|
script_args = [
|
5ab1c29c
tangwang
first commit
|
167
168
|
'--top_n', str(args.top_n),
'--method', 'hybrid'
|
1721766b
tangwang
offline tasks
|
169
170
171
172
|
]
if args.debug:
script_args.append('--debug')
if run_script('i2i_content_similar.py', script_args):
|
5ab1c29c
tangwang
first commit
|
173
174
175
176
177
178
179
180
181
|
success_count += 1
# 兴趣点聚合任务
if not args.skip_interest:
if not args.only_swing and not args.only_w2v and not args.only_deepwalk and not args.only_content:
logger.info("\n" + "="*80)
logger.info("Task 5: Running interest aggregation")
logger.info("="*80)
total_count += 1
|
1721766b
tangwang
offline tasks
|
182
|
script_args = [
|
5ab1c29c
tangwang
first commit
|
183
184
|
'--lookback_days', str(args.lookback_days),
'--top_n', str(DEFAULT_INTEREST_TOP_N)
|
1721766b
tangwang
offline tasks
|
185
186
187
188
|
]
if args.debug:
script_args.append('--debug')
if run_script('interest_aggregation.py', script_args):
|
5ab1c29c
tangwang
first commit
|
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
|
success_count += 1
# 总结
logger.info("\n" + "="*80)
logger.info(f"All tasks completed: {success_count}/{total_count} succeeded")
logger.info("="*80)
if success_count == total_count:
logger.info("✓ All tasks completed successfully!")
return 0
else:
logger.warning(f"✗ {total_count - success_count} task(s) failed")
return 1
if __name__ == '__main__':
sys.exit(main())
|