Commit 6409ab2c17a1d2d6d50515ccde6d492485b8b0db
1 parent
db578127
offline tasks: mem optimize
Showing
2 changed files
with
7 additions
and
32 deletions
Show diff stats
offline_tasks/config/offline_config.py
| ... | ... | @@ -28,8 +28,8 @@ os.makedirs(LOG_DIR, exist_ok=True) |
| 28 | 28 | # ============================================================================ |
| 29 | 29 | |
| 30 | 30 | # 时间配置(建议先用小数值调试,确认无误后再改为大数值) |
| 31 | -DEFAULT_LOOKBACK_DAYS = 30 # 默认回看天数(调试用30天,生产可改为730天) | |
| 32 | -DEFAULT_RECENT_DAYS = 7 # 默认最近天数(调试用7天,生产可改为180天) | |
| 31 | +DEFAULT_LOOKBACK_DAYS = 730 # 默认回看天数(调试用30天,生产可改为730天) | |
| 32 | +DEFAULT_RECENT_DAYS = 180 # 默认最近天数(调试用7天,生产可改为180天) | |
| 33 | 33 | |
| 34 | 34 | # i2i算法默认参数 |
| 35 | 35 | DEFAULT_I2I_TOP_N = 50 # 默认返回Top N个相似商品 | ... | ... |
offline_tasks/run.sh
| ... | ... | @@ -6,7 +6,7 @@ cd /home/tw/recommendation/offline_tasks |
| 6 | 6 | check_memory() { |
| 7 | 7 | local pid=$1 |
| 8 | 8 | local threshold_warn=25 # 25GB警告阈值 |
| 9 | - local threshold_kill=30 # 30GB强制kill阈值 | |
| 9 | + local threshold_kill=35 # 30GB强制kill阈值 | |
| 10 | 10 | |
| 11 | 11 | while kill -0 $pid 2>/dev/null; do |
| 12 | 12 | # 获取进程内存使用(MB) |
| ... | ... | @@ -25,7 +25,7 @@ check_memory() { |
| 25 | 25 | fi |
| 26 | 26 | fi |
| 27 | 27 | |
| 28 | - sleep 10 # 每10秒检查一次 | |
| 28 | + sleep 60 | |
| 29 | 29 | done |
| 30 | 30 | } |
| 31 | 31 | |
| ... | ... | @@ -47,35 +47,10 @@ echo "开始运行离线任务 - $(date '+%Y-%m-%d %H:%M:%S')" |
| 47 | 47 | echo "内存监控: 警告阈值=25GB, 强制终止阈值=30GB" |
| 48 | 48 | echo "======================================================================" |
| 49 | 49 | |
| 50 | -# 3. 调试模式运行(小数据量) | |
| 51 | -echo "" | |
| 52 | -echo ">>> 步骤1: 调试模式运行(小数据量)" | |
| 53 | -python3 run_all.py --debug & | |
| 54 | -PID_DEBUG=$! | |
| 55 | -echo "调试任务 PID: $PID_DEBUG" | |
| 56 | 50 | |
| 57 | -# 启动内存监控 | |
| 58 | -check_memory $PID_DEBUG & | |
| 59 | -MONITOR_PID_1=$! | |
| 60 | - | |
| 61 | -# 等待调试任务完成 | |
| 62 | -wait $PID_DEBUG | |
| 63 | -DEBUG_EXIT_CODE=$? | |
| 64 | -kill $MONITOR_PID_1 2>/dev/null | |
| 65 | - | |
| 66 | -if [ $DEBUG_EXIT_CODE -eq 0 ]; then | |
| 67 | - echo "✓ 调试模式完成" | |
| 68 | - mv output output_debug 2>/dev/null | |
| 69 | - mkdir output | |
| 70 | -else | |
| 71 | - echo "✗ 调试模式失败,退出码: $DEBUG_EXIT_CODE" | |
| 72 | - exit 1 | |
| 73 | -fi | |
| 74 | - | |
| 75 | -# 4. 生产模式运行(大数据量) | |
| 76 | 51 | echo "" |
| 77 | -echo ">>> 步骤2: 生产模式运行(大数据量)" | |
| 78 | -python3 run_all.py --debug & | |
| 52 | +echo ">>> run_all.py" | |
| 53 | +python3 run_all.py --debug | |
| 79 | 54 | PID_PROD=$! |
| 80 | 55 | echo "生产任务 PID: $PID_PROD" |
| 81 | 56 | |
| ... | ... | @@ -95,7 +70,7 @@ else |
| 95 | 70 | exit 1 |
| 96 | 71 | fi |
| 97 | 72 | |
| 98 | -# 5. 加载到Redis | |
| 73 | + | |
| 99 | 74 | echo "" |
| 100 | 75 | echo ">>> 步骤3: 加载到Redis" |
| 101 | 76 | python3 scripts/load_index_to_redis.py --redis-host localhost | ... | ... |