Commit 7e37f9e2a6122504f55e8e19f1ea43a5dda9b726
1 parent
801fb682
add cpp swing for mem optimize
Showing
9 changed files
with
637 additions
and
140 deletions
Show diff stats
offline_tasks/README.md
| @@ -4,32 +4,36 @@ | @@ -4,32 +4,36 @@ | ||
| 4 | 4 | ||
| 5 | ## 🚀 快速开始 | 5 | ## 🚀 快速开始 |
| 6 | 6 | ||
| 7 | -### 运行所有任务(推荐) | 7 | +### 运行所有任务 |
| 8 | 8 | ||
| 9 | ```bash | 9 | ```bash |
| 10 | cd /home/tw/recommendation/offline_tasks | 10 | cd /home/tw/recommendation/offline_tasks |
| 11 | 11 | ||
| 12 | -# 运行全部离线任务(包括C++ Swing) | ||
| 13 | -python3 run_all.py | 12 | +# ⭐ 推荐:使用 run.sh(完整流程,包含Redis加载) |
| 13 | +bash run.sh | ||
| 14 | 14 | ||
| 15 | -# 开启debug模式(详细日志 + 可读文件) | 15 | +# 备用:使用 run_all.py(简化版,不含C++ Swing和Redis) |
| 16 | python3 run_all.py --debug | 16 | python3 run_all.py --debug |
| 17 | ``` | 17 | ``` |
| 18 | 18 | ||
| 19 | +**说明**: | ||
| 20 | +- `run.sh`: 主执行脚本,包含完整流程、内存监控、自动Redis加载 | ||
| 21 | +- `run_all.py`: Python简化版本,只包含Python算法任务 | ||
| 22 | + | ||
| 19 | ### 任务执行顺序 | 23 | ### 任务执行顺序 |
| 20 | 24 | ||
| 21 | ``` | 25 | ``` |
| 22 | 前置任务: | 26 | 前置任务: |
| 23 | -1. fetch_item_attributes.py → 获取商品属性映射 | ||
| 24 | -2. generate_session.py → 生成用户行为session | ||
| 25 | -3. C++ Swing算法 → 高性能i2i相似度计算 | 27 | +1. fetch_item_attributes.py → 获取商品属性映射 |
| 28 | +2. generate_session.py → 生成用户行为session | ||
| 29 | +3. collaboration/run.sh → C++ Swing算法(高性能) | ||
| 26 | 30 | ||
| 27 | 核心算法任务: | 31 | 核心算法任务: |
| 28 | -4. Python Swing算法 → 支持日期维度的i2i | ||
| 29 | -5. Session W2V → 基于序列的embedding | ||
| 30 | -6. DeepWalk → 图结构embedding | ||
| 31 | -7. 内容相似度 → 基于ES向量 | ||
| 32 | -8. 兴趣聚合 → 多维度商品聚合 | 32 | +4. i2i_swing.py → Python Swing(支持日期维度) |
| 33 | +5. i2i_session_w2v.py → Session W2V | ||
| 34 | +6. i2i_deepwalk.py → DeepWalk | ||
| 35 | +7. i2i_content_similar.py → 内容相似度 | ||
| 36 | +8. interest_aggregation.py → 兴趣聚合 | ||
| 33 | ``` | 37 | ``` |
| 34 | 38 | ||
| 35 | ## 📚 文档 | 39 | ## 📚 文档 |
| @@ -94,7 +98,7 @@ python3 scripts/generate_session.py --lookback_days 730 | @@ -94,7 +98,7 @@ python3 scripts/generate_session.py --lookback_days 730 | ||
| 94 | ### 3. C++ Swing | 98 | ### 3. C++ Swing |
| 95 | 99 | ||
| 96 | ```bash | 100 | ```bash |
| 97 | -cd ../collaboration | 101 | +cd collaboration |
| 98 | bash run.sh | 102 | bash run.sh |
| 99 | ``` | 103 | ``` |
| 100 | 104 | ||
| @@ -134,6 +138,11 @@ offline_tasks/ | @@ -134,6 +138,11 @@ offline_tasks/ | ||
| 134 | │ ├── interest_aggregation.py | 138 | │ ├── interest_aggregation.py |
| 135 | │ ├── add_names_to_swing.py | 139 | │ ├── add_names_to_swing.py |
| 136 | │ └── debug_utils.py | 140 | │ └── debug_utils.py |
| 141 | +├── collaboration/ # C++ Swing算法 | ||
| 142 | +│ ├── src/ | ||
| 143 | +│ ├── bin/ | ||
| 144 | +│ ├── run.sh | ||
| 145 | +│ └── output/ | ||
| 137 | ├── config/ # 配置文件 | 146 | ├── config/ # 配置文件 |
| 138 | │ └── offline_config.py | 147 | │ └── offline_config.py |
| 139 | ├── doc/ # 文档中心 | 148 | ├── doc/ # 文档中心 |
| @@ -146,7 +155,8 @@ offline_tasks/ | @@ -146,7 +155,8 @@ offline_tasks/ | ||
| 146 | │ ├── session.txt.* | 155 | │ ├── session.txt.* |
| 147 | │ └── *.txt | 156 | │ └── *.txt |
| 148 | ├── logs/ # 日志目录 | 157 | ├── logs/ # 日志目录 |
| 149 | -├── run_all.py # 统一入口 | 158 | +├── run.sh # 主执行脚本(推荐) |
| 159 | +├── run_all.py # Python版本(简化) | ||
| 150 | └── README.md # 本文件 | 160 | └── README.md # 本文件 |
| 151 | ``` | 161 | ``` |
| 152 | 162 | ||
| @@ -185,7 +195,7 @@ python3 scripts/generate_session.py | @@ -185,7 +195,7 @@ python3 scripts/generate_session.py | ||
| 185 | 195 | ||
| 186 | **3. C++ Swing编译失败** | 196 | **3. C++ Swing编译失败** |
| 187 | ```bash | 197 | ```bash |
| 188 | -cd ../collaboration | 198 | +cd collaboration |
| 189 | make clean | 199 | make clean |
| 190 | make | 200 | make |
| 191 | ``` | 201 | ``` |
offline_tasks/collaboration/run.sh
| @@ -7,7 +7,7 @@ source ~/.bash_profile | @@ -7,7 +7,7 @@ source ~/.bash_profile | ||
| 7 | 7 | ||
| 8 | # 数据路径配置 | 8 | # 数据路径配置 |
| 9 | # 修改这个路径指向实际的session文件位置 | 9 | # 修改这个路径指向实际的session文件位置 |
| 10 | -SESSION_DATA_DIR="../offline_tasks/output" | 10 | +SESSION_DATA_DIR="../output" |
| 11 | 11 | ||
| 12 | # Swing算法参数 | 12 | # Swing算法参数 |
| 13 | ALPHA=0.7 # Swing算法的alpha参数 | 13 | ALPHA=0.7 # Swing算法的alpha参数 |
| @@ -95,7 +95,7 @@ if [[ $? -eq 0 ]]; then | @@ -95,7 +95,7 @@ if [[ $? -eq 0 ]]; then | ||
| 95 | 95 | ||
| 96 | # 生成可读的debug文件(添加商品名称) | 96 | # 生成可读的debug文件(添加商品名称) |
| 97 | echo "生成可读的debug文件..." | 97 | echo "生成可读的debug文件..." |
| 98 | - DEBUG_SCRIPT="../offline_tasks/scripts/add_names_to_swing.py" | 98 | + DEBUG_SCRIPT="../scripts/add_names_to_swing.py" |
| 99 | 99 | ||
| 100 | if [[ -f ${DEBUG_SCRIPT} ]]; then | 100 | if [[ -f ${DEBUG_SCRIPT} ]]; then |
| 101 | ${PYTHON_CMD} ${DEBUG_SCRIPT} output/swing_similar.txt output/swing_similar_readable.txt --debug | 101 | ${PYTHON_CMD} ${DEBUG_SCRIPT} output/swing_similar.txt output/swing_similar_readable.txt --debug |
offline_tasks/doc/README.md
| @@ -30,6 +30,8 @@ | @@ -30,6 +30,8 @@ | ||
| 30 | ### 实现总结 | 30 | ### 实现总结 |
| 31 | 31 | ||
| 32 | - **[Swing实现总结.md](./Swing实现总结.md)** - C++ Swing集成实现的完整说明 | 32 | - **[Swing实现总结.md](./Swing实现总结.md)** - C++ Swing集成实现的完整说明 |
| 33 | +- **[系统改进总结-20241017.md](./系统改进总结-20241017.md)** - 2024-10-17系统改进汇总 | ||
| 34 | +- **[项目重构说明-20241017.md](./项目重构说明-20241017.md)** - 目录结构和执行脚本重构详解 🆕 | ||
| 33 | 35 | ||
| 34 | ### 维护文档 | 36 | ### 维护文档 |
| 35 | 37 |
offline_tasks/doc/Redis数据规范.md
| @@ -20,7 +20,7 @@ | @@ -20,7 +20,7 @@ | ||
| 20 | 20 | ||
| 21 | | 模块名称 | 源数据地址 | 格式描述 | RedisKey模板 | RedisValue格式 | TTL | | 21 | | 模块名称 | 源数据地址 | 格式描述 | RedisKey模板 | RedisValue格式 | TTL | |
| 22 | |---------|-----------|---------|-------------|---------------|-----| | 22 | |---------|-----------|---------|-------------|---------------|-----| |
| 23 | -| **i2i_swing_cpp** | `collaboration/output/swing_similar.txt` | `item_id\tsimilar_id1:score1,...` | `item:similar:swing_cpp:{item_id}` | `[[similar_id1,score1],[similar_id2,score2],...]` | 7天 | | 23 | +| **i2i_swing_cpp** | `offline_tasks/collaboration/output/swing_similar.txt` | `item_id\tsimilar_id1:score1,...` | `item:similar:swing_cpp:{item_id}` | `[[similar_id1,score1],[similar_id2,score2],...]` | 7天 | |
| 24 | | **i2i_swing** | `output/i2i_swing_YYYYMMDD.txt` | `item_id\titem_name\tsimilar_id1:score1,...` | `item:similar:swing:{item_id}` | `[[similar_id1,score1],[similar_id2,score2],...]` | 7天 | | 24 | | **i2i_swing** | `output/i2i_swing_YYYYMMDD.txt` | `item_id\titem_name\tsimilar_id1:score1,...` | `item:similar:swing:{item_id}` | `[[similar_id1,score1],[similar_id2,score2],...]` | 7天 | |
| 25 | | **i2i_session_w2v** | `output/i2i_session_w2v_YYYYMMDD.txt` | `item_id\titem_name\tsimilar_id1:score1,...` | `item:similar:w2v:{item_id}` | `[[similar_id1,score1],[similar_id2,score2],...]` | 7天 | | 25 | | **i2i_session_w2v** | `output/i2i_session_w2v_YYYYMMDD.txt` | `item_id\titem_name\tsimilar_id1:score1,...` | `item:similar:w2v:{item_id}` | `[[similar_id1,score1],[similar_id2,score2],...]` | 7天 | |
| 26 | | **i2i_deepwalk** | `output/i2i_deepwalk_YYYYMMDD.txt` | `item_id\titem_name\tsimilar_id1:score1,...` | `item:similar:deepwalk:{item_id}` | `[[similar_id1,score1],[similar_id2,score2],...]` | 7天 | | 26 | | **i2i_deepwalk** | `output/i2i_deepwalk_YYYYMMDD.txt` | `item_id\titem_name\tsimilar_id1:score1,...` | `item:similar:deepwalk:{item_id}` | `[[similar_id1,score1],[similar_id2,score2],...]` | 7天 | |
| @@ -305,7 +305,7 @@ python3 scripts/load_index_to_redis.py --load-i2i --redis-host localhost | @@ -305,7 +305,7 @@ python3 scripts/load_index_to_redis.py --load-i2i --redis-host localhost | ||
| 305 | 305 | ||
| 306 | # 只加载C++ Swing索引 | 306 | # 只加载C++ Swing索引 |
| 307 | python3 scripts/load_index_to_redis.py \ | 307 | python3 scripts/load_index_to_redis.py \ |
| 308 | - --file ../collaboration/output/swing_similar.txt \ | 308 | + --file collaboration/output/swing_similar.txt \ |
| 309 | --algorithm swing_cpp \ | 309 | --algorithm swing_cpp \ |
| 310 | --redis-host localhost | 310 | --redis-host localhost |
| 311 | 311 |
offline_tasks/doc/离线索引数据规范.md
| @@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
| 4 | 4 | ||
| 5 | | 模块名称 | 任务命令 | 调度频次 | 输出数据 | 格式和示例 | | 5 | | 模块名称 | 任务命令 | 调度频次 | 输出数据 | 格式和示例 | |
| 6 | |---------|---------|---------|---------|-----------| | 6 | |---------|---------|---------|---------|-----------| |
| 7 | -| **i2i_swing_cpp** | `cd collaboration && bash run.sh` | 每天 | `collaboration/output/swing_similar.txt` | `item_id \t similar_id1:score1,similar_id2:score2,...` | | 7 | +| **i2i_swing_cpp** | `cd offline_tasks/collaboration && bash run.sh` | 每天 | `offline_tasks/collaboration/output/swing_similar.txt` | `item_id \t similar_id1:score1,similar_id2:score2,...` | |
| 8 | | **i2i_swing** | `python3 scripts/i2i_swing.py` | 每天 | `output/i2i_swing_YYYYMMDD.txt` | `item_id \t item_name \t similar_id1:score1,similar_id2:score2,...` | | 8 | | **i2i_swing** | `python3 scripts/i2i_swing.py` | 每天 | `output/i2i_swing_YYYYMMDD.txt` | `item_id \t item_name \t similar_id1:score1,similar_id2:score2,...` | |
| 9 | | **i2i_session_w2v** | `python3 scripts/i2i_session_w2v.py` | 每天 | `output/i2i_session_w2v_YYYYMMDD.txt` | `item_id \t item_name \t similar_id1:score1,similar_id2:score2,...` | | 9 | | **i2i_session_w2v** | `python3 scripts/i2i_session_w2v.py` | 每天 | `output/i2i_session_w2v_YYYYMMDD.txt` | `item_id \t item_name \t similar_id1:score1,similar_id2:score2,...` | |
| 10 | | **i2i_deepwalk** | `python3 scripts/i2i_deepwalk.py` | 每天 | `output/i2i_deepwalk_YYYYMMDD.txt` | `item_id \t item_name \t similar_id1:score1,similar_id2:score2,...` | | 10 | | **i2i_deepwalk** | `python3 scripts/i2i_deepwalk.py` | 每天 | `output/i2i_deepwalk_YYYYMMDD.txt` | `item_id \t item_name \t similar_id1:score1,similar_id2:score2,...` | |
| @@ -40,8 +40,8 @@ item_id \t similar_id1:score1,similar_id2:score2,... | @@ -40,8 +40,8 @@ item_id \t similar_id1:score1,similar_id2:score2,... | ||
| 40 | - ⚡ **高性能**: C++实现,速度比Python快10-100倍 | 40 | - ⚡ **高性能**: C++实现,速度比Python快10-100倍 |
| 41 | - 📊 **大规模**: 适合处理10万+商品的相似度计算 | 41 | - 📊 **大规模**: 适合处理10万+商品的相似度计算 |
| 42 | - 🔢 **原始分数**: 输出Swing算法原始分数(未归一化) | 42 | - 🔢 **原始分数**: 输出Swing算法原始分数(未归一化) |
| 43 | -- 📁 **文件位置**: `collaboration/output/swing_similar.txt` | ||
| 44 | -- 📝 **可读版本**: `collaboration/output/swing_similar_readable.txt` (包含商品名称) | 43 | +- 📁 **文件位置**: `offline_tasks/collaboration/output/swing_similar.txt` |
| 44 | +- 📝 **可读版本**: `offline_tasks/collaboration/output/swing_similar_readable.txt` (包含商品名称) | ||
| 45 | 45 | ||
| 46 | #### 1.2 Python算法(标准版本) | 46 | #### 1.2 Python算法(标准版本) |
| 47 | 47 |
offline_tasks/doc/系统改进总结-20241017.md
| @@ -247,25 +247,31 @@ offline_tasks/doc/ | @@ -247,25 +247,31 @@ offline_tasks/doc/ | ||
| 247 | cd /home/tw/recommendation/offline_tasks | 247 | cd /home/tw/recommendation/offline_tasks |
| 248 | 248 | ||
| 249 | # 方式1: 运行全部任务(推荐) | 249 | # 方式1: 运行全部任务(推荐) |
| 250 | +bash run.sh | ||
| 251 | + | ||
| 252 | +# 方式2: 使用Python版本(简化版) | ||
| 250 | python3 run_all.py --debug | 253 | python3 run_all.py --debug |
| 251 | 254 | ||
| 252 | -# 方式2: 分步运行 | 255 | +# 方式3: 分步运行 |
| 253 | # 步骤1: 获取商品属性 | 256 | # 步骤1: 获取商品属性 |
| 254 | python3 scripts/fetch_item_attributes.py | 257 | python3 scripts/fetch_item_attributes.py |
| 255 | 258 | ||
| 256 | # 步骤2: 生成session文件 | 259 | # 步骤2: 生成session文件 |
| 257 | python3 scripts/generate_session.py --lookback_days 730 | 260 | python3 scripts/generate_session.py --lookback_days 730 |
| 258 | 261 | ||
| 259 | -# 步骤3: 运行Swing算法(启用日期维度) | 262 | +# 步骤3: 运行C++ Swing |
| 263 | +cd collaboration && bash run.sh && cd .. | ||
| 264 | + | ||
| 265 | +# 步骤4: 运行Python Swing(启用日期维度) | ||
| 260 | python3 scripts/i2i_swing.py --lookback_days 730 --use_daily_session --debug | 266 | python3 scripts/i2i_swing.py --lookback_days 730 --use_daily_session --debug |
| 261 | ``` | 267 | ``` |
| 262 | 268 | ||
| 263 | ### C++ Swing算法 | 269 | ### C++ Swing算法 |
| 264 | 270 | ||
| 265 | ```bash | 271 | ```bash |
| 266 | -# C++ Swing现已集成到run_all.py,会自动在session生成后执行 | 272 | +# C++ Swing现已集成到run.sh,会自动执行 |
| 267 | # 如需单独运行: | 273 | # 如需单独运行: |
| 268 | -cd /home/tw/recommendation/collaboration | 274 | +cd /home/tw/recommendation/offline_tasks/collaboration |
| 269 | bash run.sh | 275 | bash run.sh |
| 270 | 276 | ||
| 271 | # 查看结果 | 277 | # 查看结果 |
| @@ -301,6 +307,11 @@ recommendation/ | @@ -301,6 +307,11 @@ recommendation/ | ||
| 301 | │ │ ├── add_names_to_swing.py # 修改:使用本地映射 | 307 | │ │ ├── add_names_to_swing.py # 修改:使用本地映射 |
| 302 | │ │ ├── i2i_swing.py # 修改:支持日期维度 | 308 | │ │ ├── i2i_swing.py # 修改:支持日期维度 |
| 303 | │ │ └── debug_utils.py # 修改:添加加载函数 | 309 | │ │ └── debug_utils.py # 修改:添加加载函数 |
| 310 | +│ ├── collaboration/ # 移动:C++ Swing目录 | ||
| 311 | +│ │ ├── src/ | ||
| 312 | +│ │ ├── bin/ | ||
| 313 | +│ │ ├── run.sh # 修改:路径更新 | ||
| 314 | +│ │ └── output/ | ||
| 304 | │ ├── doc/ # 新增:文档中心 | 315 | │ ├── doc/ # 新增:文档中心 |
| 305 | │ │ ├── README.md | 316 | │ │ ├── README.md |
| 306 | │ │ ├── 快速开始.md | 317 | │ │ ├── 快速开始.md |
| @@ -310,12 +321,9 @@ recommendation/ | @@ -310,12 +321,9 @@ recommendation/ | ||
| 310 | │ │ ├── item_attributes_mappings.json # 新增:映射文件 | 321 | │ │ ├── item_attributes_mappings.json # 新增:映射文件 |
| 311 | │ │ ├── item_attributes_stats.txt # 新增:统计信息 | 322 | │ │ ├── item_attributes_stats.txt # 新增:统计信息 |
| 312 | │ │ └── session.txt.YYYYMMDD # session文件 | 323 | │ │ └── session.txt.YYYYMMDD # session文件 |
| 313 | -│ ├── run_all.py # 修改:添加前置任务 | 324 | +│ ├── run.sh # 新增:主执行脚本 |
| 325 | +│ ├── run_all.py # 修改:简化版本 | ||
| 314 | │ └── README.md | 326 | │ └── README.md |
| 315 | -└── collaboration/ | ||
| 316 | - ├── run.sh # 已修改:适配session | ||
| 317 | - ├── Swing快速开始.md # 重命名 | ||
| 318 | - └── ... | ||
| 319 | ``` | 327 | ``` |
| 320 | 328 | ||
| 321 | --- | 329 | --- |
| @@ -324,39 +332,44 @@ recommendation/ | @@ -324,39 +332,44 @@ recommendation/ | ||
| 324 | 332 | ||
| 325 | ### 改进内容 | 333 | ### 改进内容 |
| 326 | 334 | ||
| 327 | -**之前**: C++ Swing需要手动切换目录运行 | 335 | +**之前**: C++ Swing在外层目录,需要手动切换 |
| 328 | ```bash | 336 | ```bash |
| 329 | cd /home/tw/recommendation/collaboration | 337 | cd /home/tw/recommendation/collaboration |
| 330 | bash run.sh | 338 | bash run.sh |
| 331 | ``` | 339 | ``` |
| 332 | 340 | ||
| 333 | -**现在**: 已集成到`run_all.py`,自动执行 | 341 | +**现在**: 已移入offline_tasks,集成到`run.sh`自动执行 |
| 334 | 342 | ||
| 335 | ### 执行流程 | 343 | ### 执行流程 |
| 336 | 344 | ||
| 337 | ``` | 345 | ``` |
| 338 | -run_all.py: | 346 | +run.sh: |
| 339 | 1. fetch_item_attributes.py | 347 | 1. fetch_item_attributes.py |
| 340 | 2. generate_session.py ← 生成session.txt.YYYYMMDD.cpp | 348 | 2. generate_session.py ← 生成session.txt.YYYYMMDD.cpp |
| 341 | -3. run_cpp_swing() ← 自动调用 collaboration/run.sh | 349 | +3. collaboration/run.sh ← 直接调用C++ Swing |
| 342 | ├─ 编译C++程序 | 350 | ├─ 编译C++程序 |
| 343 | ├─ 读取session文件 | 351 | ├─ 读取session文件 |
| 344 | ├─ 运行Swing算法 | 352 | ├─ 运行Swing算法 |
| 345 | ├─ 合并多线程结果 | 353 | ├─ 合并多线程结果 |
| 346 | └─ 生成可读版本(自动添加商品名) | 354 | └─ 生成可读版本(自动添加商品名) |
| 347 | -4. 后续Python任务... | 355 | +4. i2i_swing.py ← Python Swing |
| 356 | +5. i2i_session_w2v.py ← Session W2V | ||
| 357 | +6. i2i_deepwalk.py ← DeepWalk | ||
| 358 | +7. i2i_content_similar.py ← 内容相似度 | ||
| 359 | +8. interest_aggregation.py ← 兴趣聚合 | ||
| 360 | +9. load_index_to_redis.py ← 加载到Redis | ||
| 348 | ``` | 361 | ``` |
| 349 | 362 | ||
| 350 | ### 输出结果 | 363 | ### 输出结果 |
| 351 | 364 | ||
| 352 | C++ Swing执行后,结果保存在: | 365 | C++ Swing执行后,结果保存在: |
| 353 | ``` | 366 | ``` |
| 354 | -collaboration/output_YYYYMMDD/ | 367 | +offline_tasks/collaboration/output_YYYYMMDD/ |
| 355 | ├── sim_matrx.* # 多线程输出 | 368 | ├── sim_matrx.* # 多线程输出 |
| 356 | ├── swing_similar.txt # 合并结果(ID格式) | 369 | ├── swing_similar.txt # 合并结果(ID格式) |
| 357 | └── swing_similar_readable.txt # 可读版本(ID:名称格式) | 370 | └── swing_similar_readable.txt # 可读版本(ID:名称格式) |
| 358 | 371 | ||
| 359 | -collaboration/output -> output_YYYYMMDD # 软链接 | 372 | +offline_tasks/collaboration/output -> output_YYYYMMDD # 软链接 |
| 360 | ``` | 373 | ``` |
| 361 | 374 | ||
| 362 | ### 优势 | 375 | ### 优势 |
| @@ -371,7 +384,7 @@ collaboration/output -> output_YYYYMMDD # 软链接 | @@ -371,7 +384,7 @@ collaboration/output -> output_YYYYMMDD # 软链接 | ||
| 371 | 384 | ||
| 372 | 如需单独运行C++ Swing(不执行其他任务): | 385 | 如需单独运行C++ Swing(不执行其他任务): |
| 373 | ```bash | 386 | ```bash |
| 374 | -cd /home/tw/recommendation/collaboration | 387 | +cd /home/tw/recommendation/offline_tasks/collaboration |
| 375 | bash run.sh | 388 | bash run.sh |
| 376 | ``` | 389 | ``` |
| 377 | 390 |
| @@ -0,0 +1,397 @@ | @@ -0,0 +1,397 @@ | ||
| 1 | +# 项目重构说明 - 2024-10-17 | ||
| 2 | + | ||
| 3 | +## ✅ 完成的重构 | ||
| 4 | + | ||
| 5 | +### 1. 目录结构调整 | ||
| 6 | + | ||
| 7 | +**改动**: 将`collaboration`目录移入`offline_tasks` | ||
| 8 | + | ||
| 9 | +**之前**: | ||
| 10 | +``` | ||
| 11 | +recommendation/ | ||
| 12 | +├── offline_tasks/ | ||
| 13 | +│ ├── scripts/ | ||
| 14 | +│ └── ... | ||
| 15 | +└── collaboration/ # 外层目录 | ||
| 16 | + ├── src/ | ||
| 17 | + └── run.sh | ||
| 18 | +``` | ||
| 19 | + | ||
| 20 | +**之后**: | ||
| 21 | +``` | ||
| 22 | +recommendation/ | ||
| 23 | +└── offline_tasks/ | ||
| 24 | + ├── scripts/ | ||
| 25 | + ├── collaboration/ # 移入内部 | ||
| 26 | + │ ├── src/ | ||
| 27 | + │ └── run.sh | ||
| 28 | + └── ... | ||
| 29 | +``` | ||
| 30 | + | ||
| 31 | +**优势**: | ||
| 32 | +- ✅ 统一目录结构,所有离线任务在同一目录 | ||
| 33 | +- ✅ 简化路径配置 | ||
| 34 | +- ✅ 便于统一管理和部署 | ||
| 35 | + | ||
| 36 | +--- | ||
| 37 | + | ||
| 38 | +### 2. 执行脚本简化 | ||
| 39 | + | ||
| 40 | +**改动**: 主执行脚本从`run_all.py`改为`run.sh`,直接调用各个脚本 | ||
| 41 | + | ||
| 42 | +**之前的流程**: | ||
| 43 | +```python | ||
| 44 | +# run_all.py (Python实现) | ||
| 45 | +run_script('fetch_item_attributes.py') | ||
| 46 | +run_script('generate_session.py') | ||
| 47 | +run_cpp_swing() # 调用collaboration/run.sh | ||
| 48 | +run_script('i2i_swing.py') | ||
| 49 | +# ... | ||
| 50 | +``` | ||
| 51 | + | ||
| 52 | +**现在的流程**: | ||
| 53 | +```bash | ||
| 54 | +# run.sh (Shell实现) | ||
| 55 | +python3 scripts/fetch_item_attributes.py | ||
| 56 | +python3 scripts/generate_session.py | ||
| 57 | +cd collaboration && bash run.sh && cd .. | ||
| 58 | +python3 scripts/i2i_swing.py | ||
| 59 | +# ... | ||
| 60 | +``` | ||
| 61 | + | ||
| 62 | +**优势**: | ||
| 63 | +- ✅ 代码更简洁,减少抽象层 | ||
| 64 | +- ✅ 直接调用,易于理解和调试 | ||
| 65 | +- ✅ 内存监控、错误处理更灵活 | ||
| 66 | +- ✅ 配置参数集中在顶部,便于修改 | ||
| 67 | + | ||
| 68 | +--- | ||
| 69 | + | ||
| 70 | +### 3. 路径更新 | ||
| 71 | + | ||
| 72 | +所有相关路径已更新: | ||
| 73 | + | ||
| 74 | +**collaboration/run.sh**: | ||
| 75 | +- `SESSION_DATA_DIR="../offline_tasks/output"` → `"../output"` | ||
| 76 | +- `DEBUG_SCRIPT="../offline_tasks/scripts/..."` → `"../scripts/..."` | ||
| 77 | + | ||
| 78 | +**文档更新**: | ||
| 79 | +- ✅ `README.md` | ||
| 80 | +- ✅ `doc/离线索引数据规范.md` | ||
| 81 | +- ✅ `doc/Redis数据规范.md` | ||
| 82 | +- ✅ `doc/系统改进总结-20241017.md` | ||
| 83 | + | ||
| 84 | +--- | ||
| 85 | + | ||
| 86 | +## 📋 新的项目结构 | ||
| 87 | + | ||
| 88 | +``` | ||
| 89 | +offline_tasks/ | ||
| 90 | +├── scripts/ # Python脚本 | ||
| 91 | +│ ├── fetch_item_attributes.py # 前置:获取商品属性 | ||
| 92 | +│ ├── generate_session.py # 前置:生成session | ||
| 93 | +│ ├── i2i_swing.py # Python Swing | ||
| 94 | +│ ├── i2i_session_w2v.py # Session W2V | ||
| 95 | +│ ├── i2i_deepwalk.py # DeepWalk | ||
| 96 | +│ ├── i2i_content_similar.py # 内容相似度 | ||
| 97 | +│ ├── interest_aggregation.py # 兴趣聚合 | ||
| 98 | +│ ├── load_index_to_redis.py # 加载到Redis | ||
| 99 | +│ ├── add_names_to_swing.py # 添加商品名 | ||
| 100 | +│ └── debug_utils.py # Debug工具 | ||
| 101 | +├── collaboration/ # C++ Swing算法 | ||
| 102 | +│ ├── src/ | ||
| 103 | +│ │ ├── swing.cc # Swing实现 | ||
| 104 | +│ │ ├── swing_symmetric.cc # 对称Swing | ||
| 105 | +│ │ ├── icf_simple.cc # 简单协同 | ||
| 106 | +│ │ └── ucf.py # 用户协同 | ||
| 107 | +│ ├── bin/ # 编译后的二进制 | ||
| 108 | +│ ├── include/ # 头文件 | ||
| 109 | +│ ├── utils/ # 工具函数 | ||
| 110 | +│ ├── run.sh # C++ Swing执行脚本 | ||
| 111 | +│ ├── Makefile # 编译配置 | ||
| 112 | +│ └── output/ # 输出目录 | ||
| 113 | +├── config/ | ||
| 114 | +│ └── offline_config.py # 配置文件 | ||
| 115 | +├── doc/ # 文档中心 | ||
| 116 | +│ ├── README.md | ||
| 117 | +│ ├── 快速开始.md | ||
| 118 | +│ ├── Swing算法使用指南.md | ||
| 119 | +│ ├── 离线索引数据规范.md | ||
| 120 | +│ ├── Redis数据规范.md | ||
| 121 | +│ └── ... | ||
| 122 | +├── output/ # 输出文件 | ||
| 123 | +│ ├── item_attributes_mappings.json | ||
| 124 | +│ ├── session.txt.* | ||
| 125 | +│ └── *.txt | ||
| 126 | +├── logs/ # 日志文件 | ||
| 127 | +├── run.sh # ⭐ 主执行脚本(推荐) | ||
| 128 | +├── run_all.py # Python版本(保留但简化) | ||
| 129 | +└── README.md | ||
| 130 | +``` | ||
| 131 | + | ||
| 132 | +--- | ||
| 133 | + | ||
| 134 | +## 🚀 使用方式 | ||
| 135 | + | ||
| 136 | +### 主要方式:run.sh(推荐) | ||
| 137 | + | ||
| 138 | +```bash | ||
| 139 | +cd /home/tw/recommendation/offline_tasks | ||
| 140 | + | ||
| 141 | +# 直接运行(使用默认配置) | ||
| 142 | +bash run.sh | ||
| 143 | + | ||
| 144 | +# 修改配置后运行 | ||
| 145 | +# 编辑 run.sh 顶部的配置区域 | ||
| 146 | +vim run.sh | ||
| 147 | + | ||
| 148 | +# 查看帮助 | ||
| 149 | +cat run.sh | head -40 # 查看配置说明 | ||
| 150 | +``` | ||
| 151 | + | ||
| 152 | +**run.sh配置项**: | ||
| 153 | +```bash | ||
| 154 | +# 算法参数 | ||
| 155 | +LOOKBACK_DAYS=730 | ||
| 156 | +TOP_N=50 | ||
| 157 | +DEBUG_MODE="--debug" # 留空则不开启debug | ||
| 158 | + | ||
| 159 | +# Redis配置 | ||
| 160 | +REDIS_HOST="localhost" | ||
| 161 | +REDIS_PORT=6379 | ||
| 162 | + | ||
| 163 | +# 内存监控阈值 | ||
| 164 | +MEM_WARN_THRESHOLD=25 # GB | ||
| 165 | +MEM_KILL_THRESHOLD=35 # GB | ||
| 166 | +``` | ||
| 167 | + | ||
| 168 | +### 备用方式:run_all.py(简化版) | ||
| 169 | + | ||
| 170 | +```bash | ||
| 171 | +cd /home/tw/recommendation/offline_tasks | ||
| 172 | + | ||
| 173 | +# 运行(不包括C++ Swing和Redis加载) | ||
| 174 | +python3 run_all.py --debug | ||
| 175 | +``` | ||
| 176 | + | ||
| 177 | +**注意**: `run_all.py`已简化,只包含: | ||
| 178 | +- 前置任务(商品属性、session) | ||
| 179 | +- Python算法任务(Swing、W2V、DeepWalk等) | ||
| 180 | +- 不包括C++ Swing和Redis加载 | ||
| 181 | + | ||
| 182 | +--- | ||
| 183 | + | ||
| 184 | +## 📊 执行流程对比 | ||
| 185 | + | ||
| 186 | +### run.sh(完整流程) | ||
| 187 | + | ||
| 188 | +``` | ||
| 189 | +1. 环境准备 | ||
| 190 | + ├─ 清理旧进程 | ||
| 191 | + └─ 创建必要目录 | ||
| 192 | + | ||
| 193 | +2. 前置任务 | ||
| 194 | + ├─ fetch_item_attributes.py → 商品属性映射 | ||
| 195 | + ├─ generate_session.py → 用户session | ||
| 196 | + └─ collaboration/run.sh → C++ Swing (高性能) | ||
| 197 | + | ||
| 198 | +3. i2i算法任务 | ||
| 199 | + ├─ i2i_swing.py → Python Swing (日期维度) | ||
| 200 | + ├─ i2i_session_w2v.py → Session W2V | ||
| 201 | + ├─ i2i_deepwalk.py → DeepWalk | ||
| 202 | + └─ i2i_content_similar.py → 内容相似度 | ||
| 203 | + | ||
| 204 | +4. 兴趣聚合 | ||
| 205 | + └─ interest_aggregation.py → 多维度聚合 | ||
| 206 | + | ||
| 207 | +5. 加载Redis | ||
| 208 | + └─ load_index_to_redis.py → 导入Redis | ||
| 209 | + | ||
| 210 | +6. 完成 | ||
| 211 | + └─ 输出结果文件列表 | ||
| 212 | +``` | ||
| 213 | + | ||
| 214 | +### run_all.py(简化流程) | ||
| 215 | + | ||
| 216 | +``` | ||
| 217 | +1. 前置任务 | ||
| 218 | + ├─ fetch_item_attributes.py | ||
| 219 | + └─ generate_session.py | ||
| 220 | + | ||
| 221 | +2. i2i算法任务 | ||
| 222 | + ├─ i2i_swing.py | ||
| 223 | + ├─ i2i_session_w2v.py | ||
| 224 | + ├─ i2i_deepwalk.py | ||
| 225 | + └─ i2i_content_similar.py | ||
| 226 | + | ||
| 227 | +3. 兴趣聚合 | ||
| 228 | + └─ interest_aggregation.py | ||
| 229 | +``` | ||
| 230 | + | ||
| 231 | +--- | ||
| 232 | + | ||
| 233 | +## 💡 关键改进 | ||
| 234 | + | ||
| 235 | +### 1. 代码简化 | ||
| 236 | + | ||
| 237 | +**删除的冗余代码**: | ||
| 238 | +- `run_all.py`中的`run_cpp_swing()`函数(45行) | ||
| 239 | +- 复杂的子进程调用和错误处理 | ||
| 240 | + | ||
| 241 | +**简化效果**: | ||
| 242 | +- run.sh: 直接调用,清晰明了 | ||
| 243 | +- run_all.py: 从270行简化到211行 | ||
| 244 | + | ||
| 245 | +### 2. 灵活性提升 | ||
| 246 | + | ||
| 247 | +**run.sh的优势**: | ||
| 248 | +```bash | ||
| 249 | +# 内存监控(自动) | ||
| 250 | +check_memory $pid "$task_name" & | ||
| 251 | + | ||
| 252 | +# 任务函数(统一) | ||
| 253 | +run_task "任务名" "python3 scripts/xxx.py" | ||
| 254 | + | ||
| 255 | +# 配置集中(顶部) | ||
| 256 | +LOOKBACK_DAYS=730 | ||
| 257 | +DEBUG_MODE="--debug" | ||
| 258 | +``` | ||
| 259 | + | ||
| 260 | +### 3. 错误处理 | ||
| 261 | + | ||
| 262 | +**之前**: | ||
| 263 | +- Python捕获异常,日志分散 | ||
| 264 | +- 失败后需要手动排查 | ||
| 265 | + | ||
| 266 | +**现在**: | ||
| 267 | +- Shell直接显示错误 | ||
| 268 | +- 内存监控自动处理OOM | ||
| 269 | +- 任务失败继续执行后续任务 | ||
| 270 | + | ||
| 271 | +--- | ||
| 272 | + | ||
| 273 | +## 🔧 常见操作 | ||
| 274 | + | ||
| 275 | +### 修改算法参数 | ||
| 276 | + | ||
| 277 | +```bash | ||
| 278 | +# 编辑 run.sh | ||
| 279 | +vim run.sh | ||
| 280 | + | ||
| 281 | +# 修改这些参数 | ||
| 282 | +LOOKBACK_DAYS=365 # 回看天数 | ||
| 283 | +TOP_N=100 # 推荐数量 | ||
| 284 | +DEBUG_MODE="" # 关闭debug | ||
| 285 | +``` | ||
| 286 | + | ||
| 287 | +### 只运行特定任务 | ||
| 288 | + | ||
| 289 | +```bash | ||
| 290 | +cd /home/tw/recommendation/offline_tasks | ||
| 291 | + | ||
| 292 | +# 只运行C++ Swing | ||
| 293 | +cd collaboration && bash run.sh && cd .. | ||
| 294 | + | ||
| 295 | +# 只运行Python Swing | ||
| 296 | +python3 scripts/i2i_swing.py --lookback_days 730 --debug | ||
| 297 | + | ||
| 298 | +# 只加载Redis | ||
| 299 | +python3 scripts/load_index_to_redis.py --redis-host localhost | ||
| 300 | +``` | ||
| 301 | + | ||
| 302 | +### 查看日志 | ||
| 303 | + | ||
| 304 | +```bash | ||
| 305 | +# 主日志 | ||
| 306 | +tail -f logs/run_all_$(date +%Y%m%d).log | ||
| 307 | + | ||
| 308 | +# 内存监控日志 | ||
| 309 | +tail -f logs/memory_monitor.log | ||
| 310 | + | ||
| 311 | +# Debug日志 | ||
| 312 | +ls logs/debug/ | ||
| 313 | +``` | ||
| 314 | + | ||
| 315 | +--- | ||
| 316 | + | ||
| 317 | +## 📝 迁移指南 | ||
| 318 | + | ||
| 319 | +如果你之前使用`python3 run_all.py`,现在改用`bash run.sh`: | ||
| 320 | + | ||
| 321 | +### 命令对应关系 | ||
| 322 | + | ||
| 323 | +| 之前 | 现在 | 说明 | | ||
| 324 | +|------|------|------| | ||
| 325 | +| `python3 run_all.py` | `bash run.sh` | 完整流程 | | ||
| 326 | +| `python3 run_all.py --debug` | `bash run.sh` | run.sh默认开启debug | | ||
| 327 | +| 无对应命令 | `bash run.sh` | 现在包含Redis加载 | | ||
| 328 | + | ||
| 329 | +### 定时任务更新 | ||
| 330 | + | ||
| 331 | +**旧的crontab**: | ||
| 332 | +```cron | ||
| 333 | +0 3 * * * cd /home/tw/recommendation/offline_tasks && python3 run_all.py | ||
| 334 | +``` | ||
| 335 | + | ||
| 336 | +**新的crontab**: | ||
| 337 | +```cron | ||
| 338 | +0 3 * * * cd /home/tw/recommendation/offline_tasks && bash run.sh >> logs/cron_$(date +\%Y\%m\%d).log 2>&1 | ||
| 339 | +``` | ||
| 340 | + | ||
| 341 | +--- | ||
| 342 | + | ||
| 343 | +## ⚠️ 注意事项 | ||
| 344 | + | ||
| 345 | +1. **路径依赖**: | ||
| 346 | + - 确保在`offline_tasks`目录下执行`bash run.sh` | ||
| 347 | + - 不要在其他目录执行 | ||
| 348 | + | ||
| 349 | +2. **内存监控**: | ||
| 350 | + - 默认阈值:警告25GB,终止35GB | ||
| 351 | + - 根据服务器配置调整`MEM_WARN_THRESHOLD`和`MEM_KILL_THRESHOLD` | ||
| 352 | + | ||
| 353 | +3. **并行执行**: | ||
| 354 | + - 不建议同时运行多个`run.sh`实例 | ||
| 355 | + - 脚本会自动清理旧进程 | ||
| 356 | + | ||
| 357 | +4. **失败处理**: | ||
| 358 | + - 单个任务失败不会终止整体流程 | ||
| 359 | + - 查看日志确认失败原因 | ||
| 360 | + | ||
| 361 | +--- | ||
| 362 | + | ||
| 363 | +## 🎯 总结 | ||
| 364 | + | ||
| 365 | +### 改进前后对比 | ||
| 366 | + | ||
| 367 | +| 方面 | 改进前 | 改进后 | | ||
| 368 | +|------|--------|--------| | ||
| 369 | +| **目录结构** | collaboration在外层 | 统一在offline_tasks内 | | ||
| 370 | +| **主执行脚本** | run_all.py (Python) | run.sh (Shell) | | ||
| 371 | +| **代码复杂度** | 270行,多层抽象 | 214行,直接调用 | | ||
| 372 | +| **配置方式** | 参数分散 | 集中在顶部 | | ||
| 373 | +| **内存监控** | 无 | 自动监控+自动终止 | | ||
| 374 | +| **错误处理** | Python异常捕获 | Shell直接显示 | | ||
| 375 | +| **包含任务** | 不含Redis加载 | 含完整流程 | | ||
| 376 | + | ||
| 377 | +### 核心改进 | ||
| 378 | + | ||
| 379 | +1. ✅ **结构简化**: collaboration目录移入,统一管理 | ||
| 380 | +2. ✅ **代码简化**: 去除冗余抽象,直接调用脚本 | ||
| 381 | +3. ✅ **功能增强**: 添加内存监控、统一任务管理 | ||
| 382 | +4. ✅ **易用性**: 配置集中、日志清晰、错误明确 | ||
| 383 | + | ||
| 384 | +--- | ||
| 385 | + | ||
| 386 | +## 📚 相关文档 | ||
| 387 | + | ||
| 388 | +- [快速开始](./快速开始.md) | ||
| 389 | +- [运行脚本指南](./运行脚本指南.md) | ||
| 390 | +- [故障排查指南](./故障排查指南.md) | ||
| 391 | +- [系统改进总结](./系统改进总结-20241017.md) | ||
| 392 | + | ||
| 393 | +--- | ||
| 394 | + | ||
| 395 | +**更新时间**: 2024-10-17 | ||
| 396 | +**状态**: ✅ 已完成并测试 | ||
| 397 | + |
offline_tasks/run.sh
| 1 | #!/bin/bash | 1 | #!/bin/bash |
| 2 | 2 | ||
| 3 | + | ||
| 3 | cd /home/tw/recommendation/offline_tasks | 4 | cd /home/tw/recommendation/offline_tasks |
| 4 | 5 | ||
| 6 | +# mkdir bak___before_rm_run_all_py | ||
| 7 | +# mv output logs nohup.out bak___before_rm_run_all_py/ | ||
| 8 | +# mkdir output | ||
| 9 | +# mkdir logs | ||
| 10 | + | ||
| 11 | + | ||
| 12 | +# ============================================================================ | ||
| 13 | +# 配置区域 | ||
| 14 | +# ============================================================================ | ||
| 15 | + | ||
| 16 | +# 算法参数 | ||
| 17 | +LOOKBACK_DAYS=400 | ||
| 18 | +TOP_N=50 | ||
| 19 | +DEBUG_MODE="--debug" # 留空则不开启debug | ||
| 20 | + | ||
| 21 | +# Redis配置 | ||
| 22 | +REDIS_HOST="localhost" | ||
| 23 | +REDIS_PORT=6379 | ||
| 24 | + | ||
| 25 | +# 内存监控阈值 | ||
| 26 | +MEM_WARN_THRESHOLD=25 # GB | ||
| 27 | +MEM_KILL_THRESHOLD=35 # GB | ||
| 28 | + | ||
| 29 | +# ============================================================================ | ||
| 30 | +# 工具函数 | ||
| 31 | +# ============================================================================ | ||
| 32 | + | ||
| 5 | # 内存监控函数 | 33 | # 内存监控函数 |
| 6 | check_memory() { | 34 | check_memory() { |
| 7 | local pid=$1 | 35 | local pid=$1 |
| 8 | - local threshold_warn=25 # 25GB警告阈值 | ||
| 9 | - local threshold_kill=35 # 30GB强制kill阈值 | 36 | + local task_name=$2 |
| 10 | 37 | ||
| 11 | while kill -0 $pid 2>/dev/null; do | 38 | while kill -0 $pid 2>/dev/null; do |
| 12 | - # 获取进程内存使用(MB) | ||
| 13 | local mem_mb=$(ps -p $pid -o rss= 2>/dev/null | awk '{print int($1/1024)}') | 39 | local mem_mb=$(ps -p $pid -o rss= 2>/dev/null | awk '{print int($1/1024)}') |
| 14 | 40 | ||
| 15 | if [ -n "$mem_mb" ]; then | 41 | if [ -n "$mem_mb" ]; then |
| 16 | local mem_gb=$(echo "scale=2; $mem_mb/1024" | bc) | 42 | local mem_gb=$(echo "scale=2; $mem_mb/1024" | bc) |
| 17 | local timestamp=$(date '+%Y-%m-%d %H:%M:%S') | 43 | local timestamp=$(date '+%Y-%m-%d %H:%M:%S') |
| 18 | 44 | ||
| 19 | - if [ $(echo "$mem_gb >= $threshold_kill" | bc) -eq 1 ]; then | ||
| 20 | - echo "[$timestamp] ❌ 内存超限!当前使用: ${mem_gb}GB (>= ${threshold_kill}GB), 强制终止进程 PID=$pid" | tee -a logs/memory_monitor.log | 45 | + if [ $(echo "$mem_gb >= $MEM_KILL_THRESHOLD" | bc) -eq 1 ]; then |
| 46 | + echo "[$timestamp] ❌ [$task_name] 内存超限!${mem_gb}GB, 强制终止" | tee -a logs/memory_monitor.log | ||
| 21 | kill -9 $pid | 47 | kill -9 $pid |
| 22 | break | 48 | break |
| 23 | - elif [ $(echo "$mem_gb >= $threshold_warn" | bc) -eq 1 ]; then | ||
| 24 | - echo "[$timestamp] ⚠️ 内存警告!当前使用: ${mem_gb}GB (>= ${threshold_warn}GB), PID=$pid" | tee -a logs/memory_monitor.log | 49 | + elif [ $(echo "$mem_gb >= $MEM_WARN_THRESHOLD" | bc) -eq 1 ]; then |
| 50 | + echo "[$timestamp] ⚠️ [$task_name] 内存警告: ${mem_gb}GB" | tee -a logs/memory_monitor.log | ||
| 25 | fi | 51 | fi |
| 26 | fi | 52 | fi |
| 27 | 53 | ||
| @@ -29,58 +55,166 @@ check_memory() { | @@ -29,58 +55,166 @@ check_memory() { | ||
| 29 | done | 55 | done |
| 30 | } | 56 | } |
| 31 | 57 | ||
| 58 | +# 运行任务函数 | ||
| 59 | +run_task() { | ||
| 60 | + local task_name=$1 | ||
| 61 | + local task_cmd=$2 | ||
| 62 | + | ||
| 63 | + echo "" | ||
| 64 | + echo "======================================================================" | ||
| 65 | + echo "[$task_name] 开始 - $(date '+%Y-%m-%d %H:%M:%S')" | ||
| 66 | + echo "======================================================================" | ||
| 67 | + | ||
| 68 | + eval $task_cmd & | ||
| 69 | + local pid=$! | ||
| 70 | + | ||
| 71 | + # 启动内存监控 | ||
| 72 | + check_memory $pid "$task_name" & | ||
| 73 | + local monitor_pid=$! | ||
| 74 | + | ||
| 75 | + # 等待任务完成 | ||
| 76 | + wait $pid | ||
| 77 | + local exit_code=$? | ||
| 78 | + | ||
| 79 | + # 停止内存监控 | ||
| 80 | + kill $monitor_pid 2>/dev/null | ||
| 81 | + | ||
| 82 | + if [ $exit_code -eq 0 ]; then | ||
| 83 | + echo "✓ [$task_name] 完成" | ||
| 84 | + return 0 | ||
| 85 | + else | ||
| 86 | + echo "✗ [$task_name] 失败,退出码: $exit_code" | ||
| 87 | + return $exit_code | ||
| 88 | + fi | ||
| 89 | +} | ||
| 90 | + | ||
| 91 | +# ============================================================================ | ||
| 92 | +# 环境准备 | ||
| 93 | +# ============================================================================ | ||
| 94 | + | ||
| 32 | # 清理旧进程 | 95 | # 清理旧进程 |
| 33 | -ps -ef|grep run_all.py | awk '{print $2}' | xargs kill -9 2>/dev/null | ||
| 34 | -ps -ef|grep recommendation | awk '{print $2}' | xargs kill -9 2>/dev/null | ||
| 35 | -rm output/* -rf 2>/dev/null | ||
| 36 | -rm logs/* -rf 2>/dev/null | ||
| 37 | -mkdir -p logs | 96 | +ps -ef | grep "python3.*scripts" | grep -v grep | awk '{print $2}' | xargs kill -9 2>/dev/null |
| 97 | + | ||
| 98 | +# 创建必要目录 | ||
| 99 | +mkdir -p logs output | ||
| 38 | 100 | ||
| 39 | echo "======================================================================" | 101 | echo "======================================================================" |
| 40 | -echo "开始运行离线任务 - $(date '+%Y-%m-%d %H:%M:%S')" | ||
| 41 | -echo "内存监控: 警告阈值=25GB, 强制终止阈值=30GB" | 102 | +echo "开始运行离线推荐任务 - $(date '+%Y-%m-%d %H:%M:%S')" |
| 103 | +echo "配置: lookback_days=$LOOKBACK_DAYS, top_n=$TOP_N" | ||
| 104 | +echo "内存监控: 警告=${MEM_WARN_THRESHOLD}GB, 终止=${MEM_KILL_THRESHOLD}GB" | ||
| 42 | echo "======================================================================" | 105 | echo "======================================================================" |
| 43 | 106 | ||
| 107 | +# ============================================================================ | ||
| 108 | +# 前置任务 | ||
| 109 | +# ============================================================================ | ||
| 44 | 110 | ||
| 111 | +# 前置任务1: 获取商品属性 | ||
| 112 | +run_task "前置任务1: 获取商品属性" \ | ||
| 113 | + "python3 scripts/fetch_item_attributes.py $DEBUG_MODE" | ||
| 114 | +if [ $? -ne 0 ]; then | ||
| 115 | + echo "⚠️ 商品属性获取失败,但继续执行" | ||
| 116 | +fi | ||
| 117 | + | ||
| 118 | +# 前置任务2: 生成Session文件 | ||
| 119 | +run_task "前置任务2: 生成Session文件" \ | ||
| 120 | + "python3 scripts/generate_session.py --lookback_days $LOOKBACK_DAYS --format both $DEBUG_MODE" | ||
| 121 | +if [ $? -ne 0 ]; then | ||
| 122 | + echo "❌ Session文件生成失败,退出" | ||
| 123 | + exit 1 | ||
| 124 | +fi | ||
| 125 | + | ||
| 126 | +# 前置任务3: C++ Swing算法 | ||
| 45 | echo "" | 127 | echo "" |
| 46 | -echo ">>> run_all.py" | ||
| 47 | -# python3 run_all.py --lookback_days 400 --top_n 50 --debug & | ||
| 48 | -python3 run_all.py --debug & | ||
| 49 | -PID_PROD=$! | ||
| 50 | -echo "生产任务 PID: $PID_PROD" | ||
| 51 | - | ||
| 52 | -# 启动内存监控 | ||
| 53 | -check_memory $PID_PROD & | ||
| 54 | -MONITOR_PID_2=$! | ||
| 55 | - | ||
| 56 | -# 等待生产任务完成 | ||
| 57 | -wait $PID_PROD | ||
| 58 | -PROD_EXIT_CODE=$? | ||
| 59 | -kill $MONITOR_PID_2 2>/dev/null | ||
| 60 | - | ||
| 61 | -if [ $PROD_EXIT_CODE -eq 0 ]; then | ||
| 62 | - echo "✓ 生产模式完成" | 128 | +echo "======================================================================" |
| 129 | +echo "[前置任务3: C++ Swing算法] 开始 - $(date '+%Y-%m-%d %H:%M:%S')" | ||
| 130 | +echo "======================================================================" | ||
| 131 | +cd collaboration | ||
| 132 | +bash run.sh | ||
| 133 | +SWING_EXIT=$? | ||
| 134 | +cd .. | ||
| 135 | + | ||
| 136 | +if [ $SWING_EXIT -eq 0 ]; then | ||
| 137 | + echo "✓ [前置任务3: C++ Swing算法] 完成" | ||
| 63 | else | 138 | else |
| 64 | - echo "✗ 生产模式失败,退出码: $PROD_EXIT_CODE" | ||
| 65 | - exit 1 | 139 | + echo "⚠️ [前置任务3: C++ Swing算法] 失败,但继续执行" |
| 140 | +fi | ||
| 141 | + | ||
| 142 | +# ============================================================================ | ||
| 143 | +# i2i相似度任务 | ||
| 144 | +# ============================================================================ | ||
| 145 | + | ||
| 146 | +# Task 1: Python Swing算法 | ||
| 147 | +run_task "Task 1: Python Swing算法" \ | ||
| 148 | + "python3 scripts/i2i_swing.py --lookback_days $LOOKBACK_DAYS --top_n $TOP_N --use_daily_session $DEBUG_MODE" | ||
| 149 | +if [ $? -ne 0 ]; then | ||
| 150 | + echo "⚠️ Python Swing失败,但继续执行" | ||
| 151 | +fi | ||
| 152 | + | ||
| 153 | +# Task 2: Session W2V | ||
| 154 | +run_task "Task 2: Session W2V" \ | ||
| 155 | + "python3 scripts/i2i_session_w2v.py --lookback_days $LOOKBACK_DAYS --top_n $TOP_N --save_model $DEBUG_MODE" | ||
| 156 | +if [ $? -ne 0 ]; then | ||
| 157 | + echo "⚠️ Session W2V失败,但继续执行" | ||
| 158 | +fi | ||
| 159 | + | ||
| 160 | +# Task 3: DeepWalk | ||
| 161 | +run_task "Task 3: DeepWalk" \ | ||
| 162 | + "python3 scripts/i2i_deepwalk.py --lookback_days $LOOKBACK_DAYS --top_n $TOP_N --save_model --save_graph $DEBUG_MODE" | ||
| 163 | +if [ $? -ne 0 ]; then | ||
| 164 | + echo "⚠️ DeepWalk失败,但继续执行" | ||
| 165 | +fi | ||
| 166 | + | ||
| 167 | +# Task 4: 内容相似度 | ||
| 168 | +run_task "Task 4: 内容相似度" \ | ||
| 169 | + "python3 scripts/i2i_content_similar.py" | ||
| 170 | +if [ $? -ne 0 ]; then | ||
| 171 | + echo "⚠️ 内容相似度失败,但继续执行" | ||
| 66 | fi | 172 | fi |
| 67 | 173 | ||
| 174 | +# ============================================================================ | ||
| 175 | +# 兴趣聚合任务 | ||
| 176 | +# ============================================================================ | ||
| 177 | + | ||
| 178 | +# Task 5: 兴趣聚合 | ||
| 179 | +run_task "Task 5: 兴趣聚合" \ | ||
| 180 | + "python3 scripts/interest_aggregation.py --lookback_days $LOOKBACK_DAYS --top_n 1000 $DEBUG_MODE" | ||
| 181 | +if [ $? -ne 0 ]; then | ||
| 182 | + echo "⚠️ 兴趣聚合失败,但继续执行" | ||
| 183 | +fi | ||
| 184 | + | ||
| 185 | +# ============================================================================ | ||
| 186 | +# 加载到Redis | ||
| 187 | +# ============================================================================ | ||
| 68 | 188 | ||
| 69 | echo "" | 189 | echo "" |
| 70 | -echo ">>> 步骤3: 加载到Redis" | ||
| 71 | -python3 scripts/load_index_to_redis.py --redis-host localhost | ||
| 72 | -LOAD_EXIT_CODE=$? | 190 | +echo "======================================================================" |
| 191 | +echo "[加载到Redis] 开始 - $(date '+%Y-%m-%d %H:%M:%S')" | ||
| 192 | +echo "======================================================================" | ||
| 193 | + | ||
| 194 | +python3 scripts/load_index_to_redis.py --redis-host $REDIS_HOST --redis-port $REDIS_PORT | ||
| 195 | +LOAD_EXIT=$? | ||
| 73 | 196 | ||
| 74 | -if [ $LOAD_EXIT_CODE -eq 0 ]; then | ||
| 75 | - echo "✓ Redis加载完成" | 197 | +if [ $LOAD_EXIT -eq 0 ]; then |
| 198 | + echo "✓ [加载到Redis] 完成" | ||
| 76 | else | 199 | else |
| 77 | - echo "✗ Redis加载失败,退出码: $LOAD_EXIT_CODE" | 200 | + echo "❌ [加载到Redis] 失败,退出码: $LOAD_EXIT" |
| 78 | exit 1 | 201 | exit 1 |
| 79 | fi | 202 | fi |
| 80 | 203 | ||
| 204 | +# ============================================================================ | ||
| 205 | +# 完成 | ||
| 206 | +# ============================================================================ | ||
| 207 | + | ||
| 81 | echo "" | 208 | echo "" |
| 82 | echo "======================================================================" | 209 | echo "======================================================================" |
| 83 | echo "所有任务完成 - $(date '+%Y-%m-%d %H:%M:%S')" | 210 | echo "所有任务完成 - $(date '+%Y-%m-%d %H:%M:%S')" |
| 84 | echo "======================================================================" | 211 | echo "======================================================================" |
| 85 | - | ||
| 86 | - | 212 | +echo "" |
| 213 | +echo "输出文件位置:" | ||
| 214 | +echo " - 商品属性: output/item_attributes_mappings.json" | ||
| 215 | +echo " - Session文件: output/session.txt.*" | ||
| 216 | +echo " - C++ Swing: collaboration/output/swing_similar.txt" | ||
| 217 | +echo " - Python算法: output/i2i_*.txt" | ||
| 218 | +echo " - 兴趣聚合: output/interest_aggregation_*.txt" | ||
| 219 | +echo " - 日志: logs/" | ||
| 220 | +echo "" |
offline_tasks/run_all.py
| @@ -79,52 +79,6 @@ def run_script(script_name, args=None): | @@ -79,52 +79,6 @@ def run_script(script_name, args=None): | ||
| 79 | return False | 79 | return False |
| 80 | 80 | ||
| 81 | 81 | ||
| 82 | -def run_cpp_swing(): | ||
| 83 | - """ | ||
| 84 | - 运行C++ Swing算法 | ||
| 85 | - | ||
| 86 | - Returns: | ||
| 87 | - bool: 是否成功 | ||
| 88 | - """ | ||
| 89 | - collaboration_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'collaboration') | ||
| 90 | - run_sh_path = os.path.join(collaboration_dir, 'run.sh') | ||
| 91 | - | ||
| 92 | - if not os.path.exists(run_sh_path): | ||
| 93 | - logger.error(f"C++ Swing script not found: {run_sh_path}") | ||
| 94 | - return False | ||
| 95 | - | ||
| 96 | - logger.info(f"Running C++ Swing: bash {run_sh_path}") | ||
| 97 | - | ||
| 98 | - try: | ||
| 99 | - result = subprocess.run( | ||
| 100 | - ['bash', run_sh_path], | ||
| 101 | - cwd=collaboration_dir, | ||
| 102 | - check=True, | ||
| 103 | - capture_output=True, | ||
| 104 | - text=True | ||
| 105 | - ) | ||
| 106 | - logger.info("C++ Swing algorithm completed successfully") | ||
| 107 | - # 输出部分日志 | ||
| 108 | - output_lines = result.stdout.split('\n') | ||
| 109 | - for line in output_lines[-20:]: # 输出最后20行 | ||
| 110 | - if line.strip(): | ||
| 111 | - logger.info(f" {line}") | ||
| 112 | - return True | ||
| 113 | - except subprocess.CalledProcessError as e: | ||
| 114 | - logger.error(f"C++ Swing failed with return code {e.returncode}") | ||
| 115 | - logger.error(f"Error output: {e.stderr}") | ||
| 116 | - # 输出部分stdout以便调试 | ||
| 117 | - if e.stdout: | ||
| 118 | - logger.error("Stdout output:") | ||
| 119 | - for line in e.stdout.split('\n')[-20:]: | ||
| 120 | - if line.strip(): | ||
| 121 | - logger.error(f" {line}") | ||
| 122 | - return False | ||
| 123 | - except Exception as e: | ||
| 124 | - logger.error(f"Unexpected error running C++ Swing: {e}") | ||
| 125 | - return False | ||
| 126 | - | ||
| 127 | - | ||
| 128 | def main(): | 82 | def main(): |
| 129 | parser = argparse.ArgumentParser(description='Run all offline recommendation tasks') | 83 | parser = argparse.ArgumentParser(description='Run all offline recommendation tasks') |
| 130 | parser.add_argument('--debug', action='store_true', | 84 | parser.add_argument('--debug', action='store_true', |
| @@ -170,22 +124,9 @@ def main(): | @@ -170,22 +124,9 @@ def main(): | ||
| 170 | else: | 124 | else: |
| 171 | logger.error("生成session文件失败") | 125 | logger.error("生成session文件失败") |
| 172 | 126 | ||
| 173 | - # 前置任务3: 运行C++ Swing算法 | ||
| 174 | - logger.info("\n" + "="*80) | ||
| 175 | - logger.info("前置任务3: 运行C++ Swing算法(基于session文件)") | ||
| 176 | - logger.info("="*80) | ||
| 177 | - total_count += 1 | ||
| 178 | - if run_cpp_swing(): | ||
| 179 | - success_count += 1 | ||
| 180 | - logger.info("✓ C++ Swing算法执行成功") | ||
| 181 | - logger.info(" 结果文件: collaboration/output/swing_similar.txt") | ||
| 182 | - logger.info(" 可读文件: collaboration/output/swing_similar_readable.txt") | ||
| 183 | - else: | ||
| 184 | - logger.error("C++ Swing算法执行失败,但不影响其他任务继续") | ||
| 185 | - | ||
| 186 | # i2i 行为相似任务 | 127 | # i2i 行为相似任务 |
| 187 | logger.info("\n" + "="*80) | 128 | logger.info("\n" + "="*80) |
| 188 | - logger.info("Task 1: Running Python Swing algorithm for i2i similarity") | 129 | + logger.info("Task 1: Running Swing algorithm for i2i similarity") |
| 189 | logger.info("="*80) | 130 | logger.info("="*80) |
| 190 | total_count += 1 | 131 | total_count += 1 |
| 191 | script_args = [ | 132 | script_args = [ |