db578127
tangwang
offline tasks: me...
|
1
2
|
#!/bin/bash
|
23cdea36
tangwang
deepwalk refactor...
|
3
4
|
# 设置 Python 路径,让脚本能找到 db_service, config, deepwalk 等模块
export PYTHONPATH=/home/tw/recommendation/offline_tasks:$PYTHONPATH
|
7e37f9e2
tangwang
add cpp swing for...
|
5
|
|
1721766b
tangwang
offline tasks
|
6
7
|
cd /home/tw/recommendation/offline_tasks
|
7e37f9e2
tangwang
add cpp swing for...
|
8
9
10
11
12
13
14
15
16
17
18
|
# mkdir bak___before_rm_run_all_py
# mv output logs nohup.out bak___before_rm_run_all_py/
# mkdir output
# mkdir logs
# ============================================================================
# 配置区域
# ============================================================================
# 算法参数
|
b3dc7426
tangwang
补充部分任务明文版本输出
|
19
|
LOOKBACK_DAYS=768
|
7e37f9e2
tangwang
add cpp swing for...
|
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
TOP_N=50
DEBUG_MODE="--debug" # 留空则不开启debug
# Redis配置
REDIS_HOST="localhost"
REDIS_PORT=6379
# 内存监控阈值
MEM_WARN_THRESHOLD=25 # GB
MEM_KILL_THRESHOLD=35 # GB
# ============================================================================
# 工具函数
# ============================================================================
|
db578127
tangwang
offline tasks: me...
|
35
36
37
|
# 内存监控函数
check_memory() {
local pid=$1
|
7e37f9e2
tangwang
add cpp swing for...
|
38
|
local task_name=$2
|
db578127
tangwang
offline tasks: me...
|
39
40
|
while kill -0 $pid 2>/dev/null; do
|
db578127
tangwang
offline tasks: me...
|
41
42
43
44
45
46
|
local mem_mb=$(ps -p $pid -o rss= 2>/dev/null | awk '{print int($1/1024)}')
if [ -n "$mem_mb" ]; then
local mem_gb=$(echo "scale=2; $mem_mb/1024" | bc)
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
7e37f9e2
tangwang
add cpp swing for...
|
47
48
|
if [ $(echo "$mem_gb >= $MEM_KILL_THRESHOLD" | bc) -eq 1 ]; then
echo "[$timestamp] ❌ [$task_name] 内存超限!${mem_gb}GB, 强制终止" | tee -a logs/memory_monitor.log
|
db578127
tangwang
offline tasks: me...
|
49
50
|
kill -9 $pid
break
|
7e37f9e2
tangwang
add cpp swing for...
|
51
52
|
elif [ $(echo "$mem_gb >= $MEM_WARN_THRESHOLD" | bc) -eq 1 ]; then
echo "[$timestamp] ⚠️ [$task_name] 内存警告: ${mem_gb}GB" | tee -a logs/memory_monitor.log
|
db578127
tangwang
offline tasks: me...
|
53
54
55
|
fi
fi
|
6409ab2c
tangwang
offline tasks: me...
|
56
|
sleep 60
|
db578127
tangwang
offline tasks: me...
|
57
58
59
|
done
}
|
7e37f9e2
tangwang
add cpp swing for...
|
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
# 运行任务函数
run_task() {
local task_name=$1
local task_cmd=$2
echo ""
echo "======================================================================"
echo "[$task_name] 开始 - $(date '+%Y-%m-%d %H:%M:%S')"
echo "======================================================================"
eval $task_cmd &
local pid=$!
# 启动内存监控
check_memory $pid "$task_name" &
local monitor_pid=$!
# 等待任务完成
wait $pid
local exit_code=$?
# 停止内存监控
kill $monitor_pid 2>/dev/null
if [ $exit_code -eq 0 ]; then
echo "✓ [$task_name] 完成"
return 0
else
echo "✗ [$task_name] 失败,退出码: $exit_code"
return $exit_code
fi
}
# ============================================================================
# 环境准备
# ============================================================================
|
db578127
tangwang
offline tasks: me...
|
97
|
# 清理旧进程
|
7e37f9e2
tangwang
add cpp swing for...
|
98
99
100
101
|
ps -ef | grep "python3.*scripts" | grep -v grep | awk '{print $2}' | xargs kill -9 2>/dev/null
# 创建必要目录
mkdir -p logs output
|
40442baf
tangwang
offline tasks: fi...
|
102
|
|
db578127
tangwang
offline tasks: me...
|
103
|
echo "======================================================================"
|
7e37f9e2
tangwang
add cpp swing for...
|
104
105
106
|
echo "开始运行离线推荐任务 - $(date '+%Y-%m-%d %H:%M:%S')"
echo "配置: lookback_days=$LOOKBACK_DAYS, top_n=$TOP_N"
echo "内存监控: 警告=${MEM_WARN_THRESHOLD}GB, 终止=${MEM_KILL_THRESHOLD}GB"
|
db578127
tangwang
offline tasks: me...
|
107
|
echo "======================================================================"
|
40442baf
tangwang
offline tasks: fi...
|
108
|
|
7e37f9e2
tangwang
add cpp swing for...
|
109
110
111
|
# ============================================================================
# 前置任务
# ============================================================================
|
db578127
tangwang
offline tasks: me...
|
112
|
|
7e37f9e2
tangwang
add cpp swing for...
|
113
114
115
116
117
118
119
120
121
122
|
# 前置任务1: 获取商品属性
run_task "前置任务1: 获取商品属性" \
"python3 scripts/fetch_item_attributes.py $DEBUG_MODE"
if [ $? -ne 0 ]; then
echo "⚠️ 商品属性获取失败,但继续执行"
fi
# 前置任务2: 生成Session文件
run_task "前置任务2: 生成Session文件" \
"python3 scripts/generate_session.py --lookback_days $LOOKBACK_DAYS --format both $DEBUG_MODE"
|
23cdea36
tangwang
deepwalk refactor...
|
123
|
if [ $? -ne 0 ]; then
|
7e37f9e2
tangwang
add cpp swing for...
|
124
125
126
127
128
|
echo "❌ Session文件生成失败,退出"
exit 1
fi
# 前置任务3: C++ Swing算法
|
db578127
tangwang
offline tasks: me...
|
129
|
echo ""
|
7e37f9e2
tangwang
add cpp swing for...
|
130
131
132
133
134
135
136
137
138
139
|
echo "======================================================================"
echo "[前置任务3: C++ Swing算法] 开始 - $(date '+%Y-%m-%d %H:%M:%S')"
echo "======================================================================"
cd collaboration
bash run.sh
SWING_EXIT=$?
cd ..
if [ $SWING_EXIT -eq 0 ]; then
echo "✓ [前置任务3: C++ Swing算法] 完成"
|
db578127
tangwang
offline tasks: me...
|
140
|
else
|
7e37f9e2
tangwang
add cpp swing for...
|
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
echo "⚠️ [前置任务3: C++ Swing算法] 失败,但继续执行"
fi
# ============================================================================
# i2i相似度任务
# ============================================================================
# Task 1: Python Swing算法
run_task "Task 1: Python Swing算法" \
"python3 scripts/i2i_swing.py --lookback_days $LOOKBACK_DAYS --top_n $TOP_N --use_daily_session $DEBUG_MODE"
if [ $? -ne 0 ]; then
echo "⚠️ Python Swing失败,但继续执行"
fi
# Task 2: Session W2V
run_task "Task 2: Session W2V" \
"python3 scripts/i2i_session_w2v.py --lookback_days $LOOKBACK_DAYS --top_n $TOP_N --save_model $DEBUG_MODE"
if [ $? -ne 0 ]; then
echo "⚠️ Session W2V失败,但继续执行"
fi
# Task 3: DeepWalk
run_task "Task 3: DeepWalk" \
"python3 scripts/i2i_deepwalk.py --lookback_days $LOOKBACK_DAYS --top_n $TOP_N --save_model --save_graph $DEBUG_MODE"
if [ $? -ne 0 ]; then
echo "⚠️ DeepWalk失败,但继续执行"
fi
# Task 4: 内容相似度
run_task "Task 4: 内容相似度" \
|
c59dd0b0
tangwang
补充部分任务明文版本输出
|
171
|
"python3 scripts/i2i_content_similar.py --top_n $TOP_N $DEBUG_MODE"
|
7e37f9e2
tangwang
add cpp swing for...
|
172
173
|
if [ $? -ne 0 ]; then
echo "⚠️ 内容相似度失败,但继续执行"
|
db578127
tangwang
offline tasks: me...
|
174
|
fi
|
a1f370ee
tangwang
offline tasks
|
175
|
|
5b954396
tangwang
add cos sim
|
176
177
178
179
180
181
182
183
184
185
186
187
188
189
|
# Task 5: Item行为相似度
run_task "Task 5: Item行为相似度" \
"python3 scripts/i2i_item_behavior.py --lookback_days $LOOKBACK_DAYS --top_n $TOP_N $DEBUG_MODE"
if [ $? -ne 0 ]; then
echo "⚠️ Item行为相似度失败,但继续执行"
fi
# Task 6: Tag分类相似度
run_task "Task 6: Tag分类相似度" \
"python3 scripts/tag_category_similar.py --lookback_days $LOOKBACK_DAYS --top_n $TOP_N $DEBUG_MODE"
if [ $? -ne 0 ]; then
echo "⚠️ Tag分类相似度失败,但继续执行"
fi
|
7e37f9e2
tangwang
add cpp swing for...
|
190
191
192
193
|
# ============================================================================
# 兴趣聚合任务
# ============================================================================
|
5b954396
tangwang
add cos sim
|
194
195
|
# Task 7: 兴趣聚合
run_task "Task 7: 兴趣聚合" \
|
7e37f9e2
tangwang
add cpp swing for...
|
196
197
198
199
200
201
202
203
|
"python3 scripts/interest_aggregation.py --lookback_days $LOOKBACK_DAYS --top_n 1000 $DEBUG_MODE"
if [ $? -ne 0 ]; then
echo "⚠️ 兴趣聚合失败,但继续执行"
fi
# ============================================================================
# 加载到Redis
# ============================================================================
|
6409ab2c
tangwang
offline tasks: me...
|
204
|
|
db578127
tangwang
offline tasks: me...
|
205
|
echo ""
|
7e37f9e2
tangwang
add cpp swing for...
|
206
207
208
209
210
211
|
echo "======================================================================"
echo "[加载到Redis] 开始 - $(date '+%Y-%m-%d %H:%M:%S')"
echo "======================================================================"
python3 scripts/load_index_to_redis.py --redis-host $REDIS_HOST --redis-port $REDIS_PORT
LOAD_EXIT=$?
|
db578127
tangwang
offline tasks: me...
|
212
|
|
7e37f9e2
tangwang
add cpp swing for...
|
213
214
|
if [ $LOAD_EXIT -eq 0 ]; then
echo "✓ [加载到Redis] 完成"
|
db578127
tangwang
offline tasks: me...
|
215
|
else
|
7e37f9e2
tangwang
add cpp swing for...
|
216
|
echo "❌ [加载到Redis] 失败,退出码: $LOAD_EXIT"
|
db578127
tangwang
offline tasks: me...
|
217
218
219
|
exit 1
fi
|
7e37f9e2
tangwang
add cpp swing for...
|
220
221
222
223
|
# ============================================================================
# 完成
# ============================================================================
|
db578127
tangwang
offline tasks: me...
|
224
225
226
227
|
echo ""
echo "======================================================================"
echo "所有任务完成 - $(date '+%Y-%m-%d %H:%M:%S')"
echo "======================================================================"
|
7e37f9e2
tangwang
add cpp swing for...
|
228
229
230
231
232
233
|
echo ""
echo "输出文件位置:"
echo " - 商品属性: output/item_attributes_mappings.json"
echo " - Session文件: output/session.txt.*"
echo " - C++ Swing: collaboration/output/swing_similar.txt"
echo " - Python算法: output/i2i_*.txt"
|
5b954396
tangwang
add cos sim
|
234
235
|
echo " - Item行为相似度: output/i2i_item_behavior_*.txt"
echo " - Tag分类相似度: output/tag_category_similar_*.txt"
|
7e37f9e2
tangwang
add cpp swing for...
|
236
237
238
|
echo " - 兴趣聚合: output/interest_aggregation_*.txt"
echo " - 日志: logs/"
echo ""
|