Commit 6409ab2c17a1d2d6d50515ccde6d492485b8b0db
1 parent
db578127
offline tasks: mem optimize
Showing
2 changed files
with
7 additions
and
32 deletions
Show diff stats
offline_tasks/config/offline_config.py
| @@ -28,8 +28,8 @@ os.makedirs(LOG_DIR, exist_ok=True) | @@ -28,8 +28,8 @@ os.makedirs(LOG_DIR, exist_ok=True) | ||
| 28 | # ============================================================================ | 28 | # ============================================================================ |
| 29 | 29 | ||
| 30 | # 时间配置(建议先用小数值调试,确认无误后再改为大数值) | 30 | # 时间配置(建议先用小数值调试,确认无误后再改为大数值) |
| 31 | -DEFAULT_LOOKBACK_DAYS = 30 # 默认回看天数(调试用30天,生产可改为730天) | ||
| 32 | -DEFAULT_RECENT_DAYS = 7 # 默认最近天数(调试用7天,生产可改为180天) | 31 | +DEFAULT_LOOKBACK_DAYS = 730 # 默认回看天数(调试用30天,生产可改为730天) |
| 32 | +DEFAULT_RECENT_DAYS = 180 # 默认最近天数(调试用7天,生产可改为180天) | ||
| 33 | 33 | ||
| 34 | # i2i算法默认参数 | 34 | # i2i算法默认参数 |
| 35 | DEFAULT_I2I_TOP_N = 50 # 默认返回Top N个相似商品 | 35 | DEFAULT_I2I_TOP_N = 50 # 默认返回Top N个相似商品 |
offline_tasks/run.sh
| @@ -6,7 +6,7 @@ cd /home/tw/recommendation/offline_tasks | @@ -6,7 +6,7 @@ cd /home/tw/recommendation/offline_tasks | ||
| 6 | check_memory() { | 6 | check_memory() { |
| 7 | local pid=$1 | 7 | local pid=$1 |
| 8 | local threshold_warn=25 # 25GB警告阈值 | 8 | local threshold_warn=25 # 25GB警告阈值 |
| 9 | - local threshold_kill=30 # 30GB强制kill阈值 | 9 | + local threshold_kill=35 # 30GB强制kill阈值 |
| 10 | 10 | ||
| 11 | while kill -0 $pid 2>/dev/null; do | 11 | while kill -0 $pid 2>/dev/null; do |
| 12 | # 获取进程内存使用(MB) | 12 | # 获取进程内存使用(MB) |
| @@ -25,7 +25,7 @@ check_memory() { | @@ -25,7 +25,7 @@ check_memory() { | ||
| 25 | fi | 25 | fi |
| 26 | fi | 26 | fi |
| 27 | 27 | ||
| 28 | - sleep 10 # 每10秒检查一次 | 28 | + sleep 60 |
| 29 | done | 29 | done |
| 30 | } | 30 | } |
| 31 | 31 | ||
| @@ -47,35 +47,10 @@ echo "开始运行离线任务 - $(date '+%Y-%m-%d %H:%M:%S')" | @@ -47,35 +47,10 @@ echo "开始运行离线任务 - $(date '+%Y-%m-%d %H:%M:%S')" | ||
| 47 | echo "内存监控: 警告阈值=25GB, 强制终止阈值=30GB" | 47 | echo "内存监控: 警告阈值=25GB, 强制终止阈值=30GB" |
| 48 | echo "======================================================================" | 48 | echo "======================================================================" |
| 49 | 49 | ||
| 50 | -# 3. 调试模式运行(小数据量) | ||
| 51 | -echo "" | ||
| 52 | -echo ">>> 步骤1: 调试模式运行(小数据量)" | ||
| 53 | -python3 run_all.py --debug & | ||
| 54 | -PID_DEBUG=$! | ||
| 55 | -echo "调试任务 PID: $PID_DEBUG" | ||
| 56 | 50 | ||
| 57 | -# 启动内存监控 | ||
| 58 | -check_memory $PID_DEBUG & | ||
| 59 | -MONITOR_PID_1=$! | ||
| 60 | - | ||
| 61 | -# 等待调试任务完成 | ||
| 62 | -wait $PID_DEBUG | ||
| 63 | -DEBUG_EXIT_CODE=$? | ||
| 64 | -kill $MONITOR_PID_1 2>/dev/null | ||
| 65 | - | ||
| 66 | -if [ $DEBUG_EXIT_CODE -eq 0 ]; then | ||
| 67 | - echo "✓ 调试模式完成" | ||
| 68 | - mv output output_debug 2>/dev/null | ||
| 69 | - mkdir output | ||
| 70 | -else | ||
| 71 | - echo "✗ 调试模式失败,退出码: $DEBUG_EXIT_CODE" | ||
| 72 | - exit 1 | ||
| 73 | -fi | ||
| 74 | - | ||
| 75 | -# 4. 生产模式运行(大数据量) | ||
| 76 | echo "" | 51 | echo "" |
| 77 | -echo ">>> 步骤2: 生产模式运行(大数据量)" | ||
| 78 | -python3 run_all.py --debug & | 52 | +echo ">>> run_all.py" |
| 53 | +python3 run_all.py --debug | ||
| 79 | PID_PROD=$! | 54 | PID_PROD=$! |
| 80 | echo "生产任务 PID: $PID_PROD" | 55 | echo "生产任务 PID: $PID_PROD" |
| 81 | 56 | ||
| @@ -95,7 +70,7 @@ else | @@ -95,7 +70,7 @@ else | ||
| 95 | exit 1 | 70 | exit 1 |
| 96 | fi | 71 | fi |
| 97 | 72 | ||
| 98 | -# 5. 加载到Redis | 73 | + |
| 99 | echo "" | 74 | echo "" |
| 100 | echo ">>> 步骤3: 加载到Redis" | 75 | echo ">>> 步骤3: 加载到Redis" |
| 101 | python3 scripts/load_index_to_redis.py --redis-host localhost | 76 | python3 scripts/load_index_to_redis.py --redis-host localhost |