test_memory_monitor.sh 2.7 KB
#!/bin/bash

# 测试内存监控功能

cd /home/tw/recommendation/offline_tasks
mkdir -p logs

echo "======================================================================"
echo "测试内存监控功能"
echo "======================================================================"

# 内存监控函数(从run.sh复制)
check_memory() {
    local pid=$1
    local threshold_warn=25  # 25GB警告阈值
    local threshold_kill=30  # 30GB强制kill阈值
    
    echo "启动内存监控: PID=$pid, 警告阈值=${threshold_warn}GB, 终止阈值=${threshold_kill}GB"
    
    while kill -0 $pid 2>/dev/null; do
        # 获取进程内存使用(MB)
        local mem_mb=$(ps -p $pid -o rss= 2>/dev/null | awk '{print int($1/1024)}')
        
        if [ -n "$mem_mb" ]; then
            local mem_gb=$(echo "scale=2; $mem_mb/1024" | bc)
            local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
            
            # 显示当前内存使用
            echo "[$timestamp] 📊 当前内存: ${mem_gb}GB, PID=$pid"
            
            if [ $(echo "$mem_gb >= $threshold_kill" | bc) -eq 1 ]; then
                echo "[$timestamp] ❌ 内存超限!当前使用: ${mem_gb}GB (>= ${threshold_kill}GB), 强制终止进程 PID=$pid" | tee -a logs/memory_monitor.log
                kill -9 $pid
                break
            elif [ $(echo "$mem_gb >= $threshold_warn" | bc) -eq 1 ]; then
                echo "[$timestamp] ⚠️  内存警告!当前使用: ${mem_gb}GB (>= ${threshold_warn}GB), PID=$pid" | tee -a logs/memory_monitor.log
            fi
        fi
        
        sleep 2  # 测试时每2秒检查一次
    done
    
    echo "内存监控结束: PID=$pid"
}

# 模拟占用内存的测试进程
echo ""
echo ">>> 测试1: 启动一个简单进程(低内存)"
sleep 30 &
TEST_PID=$!
echo "测试进程 PID: $TEST_PID"

# 启动监控
check_memory $TEST_PID &
MONITOR_PID=$!

# 等待测试进程
sleep 5
kill $TEST_PID 2>/dev/null
wait $TEST_PID 2>/dev/null
kill $MONITOR_PID 2>/dev/null

echo "✓ 测试1完成"
echo ""

# 测试2: 查看当前Python进程内存
echo ">>> 测试2: 查看当前运行的Python进程内存"
ps aux | grep python | grep -v grep | awk '{printf "PID: %s, 内存: %.2fGB, 命令: %s\n", $2, $6/1024/1024, $11}'
echo ""

# 测试3: 显示系统总内存
echo ">>> 测试3: 系统内存信息"
free -h
echo ""

echo "======================================================================"
echo "测试完成"
echo "======================================================================"
echo ""
echo "💡 提示:"
echo "  - 内存监控日志: logs/memory_monitor.log"
echo "  - 查看实时日志: tail -f logs/memory_monitor.log"
echo "  - 监控阈值可在 run.sh 中修改"
echo ""