Commit 801fb682049255f0ba81df38d10ebc52dbfb017e
1 parent
a1c26d3d
add cpp swing for mem optimize
Showing
17 changed files
with
36 additions
and
6 deletions
Show diff stats
collaboration/.gitignore renamed to offline_tasks/collaboration/.gitignore
collaboration/Makefile renamed to offline_tasks/collaboration/Makefile
collaboration/README.md renamed to offline_tasks/collaboration/README.md
collaboration/Swing快速开始.md renamed to offline_tasks/collaboration/Swing快速开始.md
collaboration/bin/icf_simple renamed to offline_tasks/collaboration/bin/icf_simple
No preview for this file type
collaboration/bin/swing renamed to offline_tasks/collaboration/bin/swing
No preview for this file type
collaboration/bin/swing_symmetric renamed to offline_tasks/collaboration/bin/swing_symmetric
No preview for this file type
collaboration/eval.py renamed to offline_tasks/collaboration/eval.py
collaboration/include/BitMap.h renamed to offline_tasks/collaboration/include/BitMap.h
collaboration/include/utils.h renamed to offline_tasks/collaboration/include/utils.h
collaboration/run.sh renamed to offline_tasks/collaboration/run.sh
collaboration/src/icf_simple.cc renamed to offline_tasks/collaboration/src/icf_simple.cc
collaboration/src/swing.cc renamed to offline_tasks/collaboration/src/swing.cc
collaboration/src/swing_symmetric.cc renamed to offline_tasks/collaboration/src/swing_symmetric.cc
collaboration/src/ucf.py renamed to offline_tasks/collaboration/src/ucf.py
collaboration/utils/utils.cc renamed to offline_tasks/collaboration/utils/utils.cc
offline_tasks/doc/Redis数据规范.md
| ... | ... | @@ -294,15 +294,21 @@ def load_interest_index(file_path, list_type, redis_client, expire_seconds=25920 |
| 294 | 294 | ```bash |
| 295 | 295 | cd /home/tw/recommendation/offline_tasks |
| 296 | 296 | |
| 297 | -# 加载所有索引(使用今天的数据) | |
| 297 | +# 加载所有索引(使用今天的数据,包括C++ Swing) | |
| 298 | 298 | python3 scripts/load_index_to_redis.py --redis-host localhost --redis-port 6379 |
| 299 | 299 | |
| 300 | 300 | # 加载指定日期的索引 |
| 301 | 301 | python3 scripts/load_index_to_redis.py --date 20251016 --redis-host localhost |
| 302 | 302 | |
| 303 | -# 只加载i2i索引 | |
| 303 | +# 只加载i2i索引(包括C++ Swing) | |
| 304 | 304 | python3 scripts/load_index_to_redis.py --load-i2i --redis-host localhost |
| 305 | 305 | |
| 306 | +# 只加载C++ Swing索引 | |
| 307 | +python3 scripts/load_index_to_redis.py \ | |
| 308 | + --file ../collaboration/output/swing_similar.txt \ | |
| 309 | + --algorithm swing_cpp \ | |
| 310 | + --redis-host localhost | |
| 311 | + | |
| 306 | 312 | # 只加载兴趣聚合索引 |
| 307 | 313 | python3 scripts/load_index_to_redis.py --load-interest --redis-host localhost |
| 308 | 314 | ``` |
| ... | ... | @@ -315,7 +321,10 @@ redis-cli |
| 315 | 321 | # 检查key数量 |
| 316 | 322 | DBSIZE |
| 317 | 323 | |
| 318 | -# 查看某个商品的相似推荐 | |
| 324 | +# 查看某个商品的相似推荐(C++ Swing) | |
| 325 | +GET item:similar:swing_cpp:3600052 | |
| 326 | + | |
| 327 | +# 查看某个商品的相似推荐(Python Swing) | |
| 319 | 328 | GET item:similar:swing:12345 |
| 320 | 329 | |
| 321 | 330 | # 查看平台热门商品 |
| ... | ... | @@ -324,10 +333,17 @@ GET interest:hot:platform:pc |
| 324 | 333 | # 查看所有i2i相关的key |
| 325 | 334 | KEYS item:similar:* |
| 326 | 335 | |
| 336 | +# 查看C++ Swing的key | |
| 337 | +KEYS item:similar:swing_cpp:* | |
| 338 | + | |
| 339 | +# 查看Python Swing的key | |
| 340 | +KEYS item:similar:swing:* | |
| 341 | + | |
| 327 | 342 | # 查看所有interest相关的key |
| 328 | 343 | KEYS interest:* |
| 329 | 344 | |
| 330 | 345 | # 检查key的过期时间 |
| 346 | +TTL item:similar:swing_cpp:3600052 | |
| 331 | 347 | TTL item:similar:swing:12345 |
| 332 | 348 | ``` |
| 333 | 349 | |
| ... | ... | @@ -337,6 +353,7 @@ TTL item:similar:swing:12345 |
| 337 | 353 | |
| 338 | 354 | | 索引类型 | Key数量 | 单条Value大小 | 总内存 | |
| 339 | 355 | |---------|--------|-------------|--------| |
| 356 | +| i2i_swing_cpp | 50,000 | ~400B | ~20MB | | |
| 340 | 357 | | i2i_swing | 50,000 | ~500B | ~25MB | |
| 341 | 358 | | i2i_w2v | 50,000 | ~500B | ~25MB | |
| 342 | 359 | | i2i_deepwalk | 50,000 | ~500B | ~25MB | |
| ... | ... | @@ -346,7 +363,12 @@ TTL item:similar:swing:12345 |
| 346 | 363 | | interest_cart | 10,000 | ~1KB | ~10MB | |
| 347 | 364 | | interest_new | 5,000 | ~1KB | ~5MB | |
| 348 | 365 | | interest_global | 10,000 | ~1KB | ~10MB | |
| 349 | -| **总计** | **270,000** | - | **~160MB** | | |
| 366 | +| **总计** | **320,000** | - | **~180MB** | | |
| 367 | + | |
| 368 | +**说明**: | |
| 369 | +- C++ Swing数据更紧凑(无商品名),单条大小约400B | |
| 370 | +- 建议生产环境使用C++ Swing (`swing_cpp`),性能更优 | |
| 371 | +- Python Swing可作为对照组或特殊场景使用 | |
| 350 | 372 | |
| 351 | 373 | ### 过期策略 |
| 352 | 374 | |
| ... | ... | @@ -373,17 +395,25 @@ TTL item:similar:swing:12345 |
| 373 | 395 | ```python |
| 374 | 396 | # 检查加载成功率 |
| 375 | 397 | total_keys = redis_client.dbsize() |
| 376 | -expected_keys = 245000 | |
| 398 | +expected_keys = 320000 # 更新:包含C++ Swing | |
| 377 | 399 | success_rate = total_keys / expected_keys * 100 |
| 378 | 400 | |
| 379 | 401 | # 检查数据完整性 |
| 380 | 402 | sample_keys = [ |
| 381 | - 'item:similar:swing:12345', | |
| 403 | + 'item:similar:swing_cpp:3600052', # C++ Swing | |
| 404 | + 'item:similar:swing:12345', # Python Swing | |
| 405 | + 'item:similar:w2v:12345', | |
| 382 | 406 | 'interest:hot:platform:pc' |
| 383 | 407 | ] |
| 384 | 408 | for key in sample_keys: |
| 385 | 409 | if not redis_client.exists(key): |
| 386 | 410 | print(f"Missing key: {key}") |
| 411 | + | |
| 412 | +# 检查C++ Swing vs Python Swing覆盖率 | |
| 413 | +cpp_swing_count = len(redis_client.keys('item:similar:swing_cpp:*')) | |
| 414 | +py_swing_count = len(redis_client.keys('item:similar:swing:*')) | |
| 415 | +print(f"C++ Swing keys: {cpp_swing_count}") | |
| 416 | +print(f"Python Swing keys: {py_swing_count}") | |
| 387 | 417 | ``` |
| 388 | 418 | |
| 389 | 419 | ### 性能指标 | ... | ... |