run.sh
2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
cd /home/tw/recommendation/offline_tasks
# 内存监控函数
check_memory() {
local pid=$1
local threshold_warn=25 # 25GB警告阈值
local threshold_kill=35 # 30GB强制kill阈值
while kill -0 $pid 2>/dev/null; do
# 获取进程内存使用(MB)
local mem_mb=$(ps -p $pid -o rss= 2>/dev/null | awk '{print int($1/1024)}')
if [ -n "$mem_mb" ]; then
local mem_gb=$(echo "scale=2; $mem_mb/1024" | bc)
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
if [ $(echo "$mem_gb >= $threshold_kill" | bc) -eq 1 ]; then
echo "[$timestamp] ❌ 内存超限!当前使用: ${mem_gb}GB (>= ${threshold_kill}GB), 强制终止进程 PID=$pid" | tee -a logs/memory_monitor.log
kill -9 $pid
break
elif [ $(echo "$mem_gb >= $threshold_warn" | bc) -eq 1 ]; then
echo "[$timestamp] ⚠️ 内存警告!当前使用: ${mem_gb}GB (>= ${threshold_warn}GB), PID=$pid" | tee -a logs/memory_monitor.log
fi
fi
sleep 60
done
}
# 清理旧进程
ps -ef|grep run_all.py | awk '{print $2}' | xargs kill -9 2>/dev/null
ps -ef|grep recommendation | awk '{print $2}' | xargs kill -9 2>/dev/null
rm output/* -rf 2>/dev/null
rm logs/* -rf 2>/dev/null
mkdir -p logs
echo "======================================================================"
echo "开始运行离线任务 - $(date '+%Y-%m-%d %H:%M:%S')"
echo "内存监控: 警告阈值=25GB, 强制终止阈值=30GB"
echo "======================================================================"
echo ""
echo ">>> run_all.py"
# python3 run_all.py --lookback_days 400 --top_n 50 --debug &
python3 run_all.py --debug &
PID_PROD=$!
echo "生产任务 PID: $PID_PROD"
# 启动内存监控
check_memory $PID_PROD &
MONITOR_PID_2=$!
# 等待生产任务完成
wait $PID_PROD
PROD_EXIT_CODE=$?
kill $MONITOR_PID_2 2>/dev/null
if [ $PROD_EXIT_CODE -eq 0 ]; then
echo "✓ 生产模式完成"
else
echo "✗ 生产模式失败,退出码: $PROD_EXIT_CODE"
exit 1
fi
echo ""
echo ">>> 步骤3: 加载到Redis"
python3 scripts/load_index_to_redis.py --redis-host localhost
LOAD_EXIT_CODE=$?
if [ $LOAD_EXIT_CODE -eq 0 ]; then
echo "✓ Redis加载完成"
else
echo "✗ Redis加载失败,退出码: $LOAD_EXIT_CODE"
exit 1
fi
echo ""
echo "======================================================================"
echo "所有任务完成 - $(date '+%Y-%m-%d %H:%M:%S')"
echo "======================================================================"