Commit e89d7a846493a53d65e7235ece49b4aed3069c1c
1 parent
23cdea36
deepwalk refactor for memsave and perfermance optimize
Showing
11 changed files
with
108 additions
and
1 deletions
Show diff stats
| ... | ... | @@ -0,0 +1,96 @@ |
| 1 | +# Import 修复报告 | |
| 2 | + | |
| 3 | +## 修复日期 | |
| 4 | +2025-10-21 | |
| 5 | + | |
| 6 | +## 修复内容 | |
| 7 | + | |
| 8 | +### 添加缺失的 `import os` | |
| 9 | + | |
| 10 | +以下文件缺少 `import os` 导入,已全部修复: | |
| 11 | + | |
| 12 | +1. ✅ **add_names_to_swing.py** - 添加了 `import os` | |
| 13 | +2. ✅ **generate_session.py** - 规范化了导入(从 `import json,os` 改为单独的 `import os`) | |
| 14 | +3. ✅ **i2i_session_w2v.py** - 添加了 `import os` | |
| 15 | +4. ✅ **i2i_swing.py** - 添加了 `import os` | |
| 16 | +5. ✅ **interest_aggregation.py** - 添加了 `import os` | |
| 17 | +6. ✅ **tag_category_similar.py** - 添加了 `import os` | |
| 18 | + | |
| 19 | +## 验证结果 | |
| 20 | + | |
| 21 | +### 所有脚本状态 | |
| 22 | + | |
| 23 | +``` | |
| 24 | +✓ add_names_to_swing.py | |
| 25 | +✓ debug_utils_backup.py | |
| 26 | +✓ debug_utils.py | |
| 27 | +✓ fetch_item_attributes.py | |
| 28 | +✓ generate_session.py | |
| 29 | +✓ i2i_content_similar.py | |
| 30 | +✓ i2i_deepwalk.py | |
| 31 | +✓ i2i_item_behavior.py | |
| 32 | +✓ i2i_session_w2v.py | |
| 33 | +✓ i2i_swing.py | |
| 34 | +✓ interest_aggregation.py | |
| 35 | +✓ load_index_to_redis.py | |
| 36 | +✓ tag_category_similar.py | |
| 37 | +``` | |
| 38 | + | |
| 39 | +### 编译检查 | |
| 40 | + | |
| 41 | +所有核心任务脚本编译通过,无语法错误。 | |
| 42 | + | |
| 43 | +**注意**: `test_es_connection.py` 有语法错误(第183行),但这是测试文件,不影响主要任务运行。 | |
| 44 | + | |
| 45 | +## 最终结构检查 | |
| 46 | + | |
| 47 | +``` | |
| 48 | +✓ db_service.py 存在于 offline_tasks/ 根目录 | |
| 49 | +✓ config/offline_config.py 存在 | |
| 50 | +✓ deepwalk 模块完整 (deepwalk.py + alias.py) | |
| 51 | +✓ run.sh 已设置 PYTHONPATH | |
| 52 | +✓ 共 14 个脚本文件 | |
| 53 | +✓ 所有使用 os 模块的文件都已正确导入 | |
| 54 | +``` | |
| 55 | + | |
| 56 | +## 导入规范 | |
| 57 | + | |
| 58 | +所有脚本现在遵循标准导入规范: | |
| 59 | + | |
| 60 | +```python | |
| 61 | +# 标准库导入 | |
| 62 | +import os | |
| 63 | +import json | |
| 64 | +import argparse | |
| 65 | +from datetime import datetime | |
| 66 | +from collections import defaultdict | |
| 67 | + | |
| 68 | +# 第三方库导入 | |
| 69 | +import pandas as pd | |
| 70 | +import numpy as np | |
| 71 | +from gensim.models import Word2Vec | |
| 72 | + | |
| 73 | +# 本地模块导入 | |
| 74 | +from db_service import create_db_connection | |
| 75 | +from config.offline_config import DB_CONFIG, OUTPUT_DIR | |
| 76 | +from scripts.debug_utils import setup_debug_logger | |
| 77 | +from deepwalk.deepwalk import DeepWalk | |
| 78 | +``` | |
| 79 | + | |
| 80 | +## 完成清单 | |
| 81 | + | |
| 82 | +- [x] 所有缺少 `import os` 的文件已修复 | |
| 83 | +- [x] 所有核心脚本编译通过 | |
| 84 | +- [x] 导入语句规范化 | |
| 85 | +- [x] 文件结构验证完成 | |
| 86 | +- [x] PYTHONPATH 设置正确 | |
| 87 | + | |
| 88 | +## 可以运行了! | |
| 89 | + | |
| 90 | +```bash | |
| 91 | +cd /home/tw/recommendation/offline_tasks | |
| 92 | +bash run.sh | |
| 93 | +``` | |
| 94 | + | |
| 95 | +所有 Task 3, Task 5, Task 6 的问题都已解决! | |
| 96 | + | ... | ... |
offline_tasks/scripts/add_names_to_swing.py
| ... | ... | @@ -3,6 +3,7 @@ |
| 3 | 3 | 输入格式: item_id \t similar_item_id1:score1,similar_item_id2:score2,... |
| 4 | 4 | 输出格式: item_id:name \t similar_item_id1:name1:score1,similar_item_id2:name2:score2,... |
| 5 | 5 | """ |
| 6 | +import os | |
| 6 | 7 | import argparse |
| 7 | 8 | from datetime import datetime |
| 8 | 9 | from scripts.debug_utils import setup_debug_logger, load_name_mappings_from_file | ... | ... |
offline_tasks/scripts/fetch_item_attributes.py
offline_tasks/scripts/generate_session.py
offline_tasks/scripts/i2i_content_similar.py
offline_tasks/scripts/i2i_item_behavior.py
offline_tasks/scripts/i2i_session_w2v.py
offline_tasks/scripts/i2i_swing.py
offline_tasks/scripts/interest_aggregation.py
offline_tasks/scripts/load_index_to_redis.py