test_memory_monitor.sh
2.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/bin/bash
# 测试内存监控功能
cd /home/tw/recommendation/offline_tasks
mkdir -p logs
echo "======================================================================"
echo "测试内存监控功能"
echo "======================================================================"
# 内存监控函数(从run.sh复制)
check_memory() {
local pid=$1
local threshold_warn=25 # 25GB警告阈值
local threshold_kill=30 # 30GB强制kill阈值
echo "启动内存监控: PID=$pid, 警告阈值=${threshold_warn}GB, 终止阈值=${threshold_kill}GB"
while kill -0 $pid 2>/dev/null; do
# 获取进程内存使用(MB)
local mem_mb=$(ps -p $pid -o rss= 2>/dev/null | awk '{print int($1/1024)}')
if [ -n "$mem_mb" ]; then
local mem_gb=$(echo "scale=2; $mem_mb/1024" | bc)
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
# 显示当前内存使用
echo "[$timestamp] 📊 当前内存: ${mem_gb}GB, PID=$pid"
if [ $(echo "$mem_gb >= $threshold_kill" | bc) -eq 1 ]; then
echo "[$timestamp] ❌ 内存超限!当前使用: ${mem_gb}GB (>= ${threshold_kill}GB), 强制终止进程 PID=$pid" | tee -a logs/memory_monitor.log
kill -9 $pid
break
elif [ $(echo "$mem_gb >= $threshold_warn" | bc) -eq 1 ]; then
echo "[$timestamp] ⚠️ 内存警告!当前使用: ${mem_gb}GB (>= ${threshold_warn}GB), PID=$pid" | tee -a logs/memory_monitor.log
fi
fi
sleep 2 # 测试时每2秒检查一次
done
echo "内存监控结束: PID=$pid"
}
# 模拟占用内存的测试进程
echo ""
echo ">>> 测试1: 启动一个简单进程(低内存)"
sleep 30 &
TEST_PID=$!
echo "测试进程 PID: $TEST_PID"
# 启动监控
check_memory $TEST_PID &
MONITOR_PID=$!
# 等待测试进程
sleep 5
kill $TEST_PID 2>/dev/null
wait $TEST_PID 2>/dev/null
kill $MONITOR_PID 2>/dev/null
echo "✓ 测试1完成"
echo ""
# 测试2: 查看当前Python进程内存
echo ">>> 测试2: 查看当前运行的Python进程内存"
ps aux | grep python | grep -v grep | awk '{printf "PID: %s, 内存: %.2fGB, 命令: %s\n", $2, $6/1024/1024, $11}'
echo ""
# 测试3: 显示系统总内存
echo ">>> 测试3: 系统内存信息"
free -h
echo ""
echo "======================================================================"
echo "测试完成"
echo "======================================================================"
echo ""
echo "💡 提示:"
echo " - 内存监控日志: logs/memory_monitor.log"
echo " - 查看实时日志: tail -f logs/memory_monitor.log"
echo " - 监控阈值可在 run.sh 中修改"
echo ""