Commit 801fb682049255f0ba81df38d10ebc52dbfb017e

Authored by tangwang
1 parent a1c26d3d

add cpp swing for mem optimize

collaboration/.gitignore renamed to offline_tasks/collaboration/.gitignore
collaboration/Makefile renamed to offline_tasks/collaboration/Makefile
collaboration/README.md renamed to offline_tasks/collaboration/README.md
collaboration/Swing快速开始.md renamed to offline_tasks/collaboration/Swing快速开始.md
collaboration/bin/icf_simple renamed to offline_tasks/collaboration/bin/icf_simple
No preview for this file type
collaboration/bin/swing renamed to offline_tasks/collaboration/bin/swing
No preview for this file type
collaboration/bin/swing_symmetric renamed to offline_tasks/collaboration/bin/swing_symmetric
No preview for this file type
collaboration/eval.py renamed to offline_tasks/collaboration/eval.py
collaboration/include/BitMap.h renamed to offline_tasks/collaboration/include/BitMap.h
collaboration/include/utils.h renamed to offline_tasks/collaboration/include/utils.h
collaboration/run.sh renamed to offline_tasks/collaboration/run.sh
collaboration/src/icf_simple.cc renamed to offline_tasks/collaboration/src/icf_simple.cc
collaboration/src/swing.cc renamed to offline_tasks/collaboration/src/swing.cc
collaboration/src/swing_symmetric.cc renamed to offline_tasks/collaboration/src/swing_symmetric.cc
collaboration/src/ucf.py renamed to offline_tasks/collaboration/src/ucf.py
collaboration/utils/utils.cc renamed to offline_tasks/collaboration/utils/utils.cc
offline_tasks/doc/Redis数据规范.md
@@ -294,15 +294,21 @@ def load_interest_index(file_path, list_type, redis_client, expire_seconds=25920 @@ -294,15 +294,21 @@ def load_interest_index(file_path, list_type, redis_client, expire_seconds=25920
294 ```bash 294 ```bash
295 cd /home/tw/recommendation/offline_tasks 295 cd /home/tw/recommendation/offline_tasks
296 296
297 -# 加载所有索引(使用今天的数据 297 +# 加载所有索引(使用今天的数据,包括C++ Swing
298 python3 scripts/load_index_to_redis.py --redis-host localhost --redis-port 6379 298 python3 scripts/load_index_to_redis.py --redis-host localhost --redis-port 6379
299 299
300 # 加载指定日期的索引 300 # 加载指定日期的索引
301 python3 scripts/load_index_to_redis.py --date 20251016 --redis-host localhost 301 python3 scripts/load_index_to_redis.py --date 20251016 --redis-host localhost
302 302
303 -# 只加载i2i索引 303 +# 只加载i2i索引(包括C++ Swing)
304 python3 scripts/load_index_to_redis.py --load-i2i --redis-host localhost 304 python3 scripts/load_index_to_redis.py --load-i2i --redis-host localhost
305 305
  306 +# 只加载C++ Swing索引
  307 +python3 scripts/load_index_to_redis.py \
  308 + --file ../collaboration/output/swing_similar.txt \
  309 + --algorithm swing_cpp \
  310 + --redis-host localhost
  311 +
306 # 只加载兴趣聚合索引 312 # 只加载兴趣聚合索引
307 python3 scripts/load_index_to_redis.py --load-interest --redis-host localhost 313 python3 scripts/load_index_to_redis.py --load-interest --redis-host localhost
308 ``` 314 ```
@@ -315,7 +321,10 @@ redis-cli @@ -315,7 +321,10 @@ redis-cli
315 # 检查key数量 321 # 检查key数量
316 DBSIZE 322 DBSIZE
317 323
318 -# 查看某个商品的相似推荐 324 +# 查看某个商品的相似推荐(C++ Swing)
  325 +GET item:similar:swing_cpp:3600052
  326 +
  327 +# 查看某个商品的相似推荐(Python Swing)
319 GET item:similar:swing:12345 328 GET item:similar:swing:12345
320 329
321 # 查看平台热门商品 330 # 查看平台热门商品
@@ -324,10 +333,17 @@ GET interest:hot:platform:pc @@ -324,10 +333,17 @@ GET interest:hot:platform:pc
324 # 查看所有i2i相关的key 333 # 查看所有i2i相关的key
325 KEYS item:similar:* 334 KEYS item:similar:*
326 335
  336 +# 查看C++ Swing的key
  337 +KEYS item:similar:swing_cpp:*
  338 +
  339 +# 查看Python Swing的key
  340 +KEYS item:similar:swing:*
  341 +
327 # 查看所有interest相关的key 342 # 查看所有interest相关的key
328 KEYS interest:* 343 KEYS interest:*
329 344
330 # 检查key的过期时间 345 # 检查key的过期时间
  346 +TTL item:similar:swing_cpp:3600052
331 TTL item:similar:swing:12345 347 TTL item:similar:swing:12345
332 ``` 348 ```
333 349
@@ -337,6 +353,7 @@ TTL item:similar:swing:12345 @@ -337,6 +353,7 @@ TTL item:similar:swing:12345
337 353
338 | 索引类型 | Key数量 | 单条Value大小 | 总内存 | 354 | 索引类型 | Key数量 | 单条Value大小 | 总内存 |
339 |---------|--------|-------------|--------| 355 |---------|--------|-------------|--------|
  356 +| i2i_swing_cpp | 50,000 | ~400B | ~20MB |
340 | i2i_swing | 50,000 | ~500B | ~25MB | 357 | i2i_swing | 50,000 | ~500B | ~25MB |
341 | i2i_w2v | 50,000 | ~500B | ~25MB | 358 | i2i_w2v | 50,000 | ~500B | ~25MB |
342 | i2i_deepwalk | 50,000 | ~500B | ~25MB | 359 | i2i_deepwalk | 50,000 | ~500B | ~25MB |
@@ -346,7 +363,12 @@ TTL item:similar:swing:12345 @@ -346,7 +363,12 @@ TTL item:similar:swing:12345
346 | interest_cart | 10,000 | ~1KB | ~10MB | 363 | interest_cart | 10,000 | ~1KB | ~10MB |
347 | interest_new | 5,000 | ~1KB | ~5MB | 364 | interest_new | 5,000 | ~1KB | ~5MB |
348 | interest_global | 10,000 | ~1KB | ~10MB | 365 | interest_global | 10,000 | ~1KB | ~10MB |
349 -| **总计** | **270,000** | - | **~160MB** | 366 +| **总计** | **320,000** | - | **~180MB** |
  367 +
  368 +**说明**:
  369 +- C++ Swing数据更紧凑(无商品名),单条大小约400B
  370 +- 建议生产环境使用C++ Swing (`swing_cpp`),性能更优
  371 +- Python Swing可作为对照组或特殊场景使用
350 372
351 ### 过期策略 373 ### 过期策略
352 374
@@ -373,17 +395,25 @@ TTL item:similar:swing:12345 @@ -373,17 +395,25 @@ TTL item:similar:swing:12345
373 ```python 395 ```python
374 # 检查加载成功率 396 # 检查加载成功率
375 total_keys = redis_client.dbsize() 397 total_keys = redis_client.dbsize()
376 -expected_keys = 245000 398 +expected_keys = 320000 # 更新:包含C++ Swing
377 success_rate = total_keys / expected_keys * 100 399 success_rate = total_keys / expected_keys * 100
378 400
379 # 检查数据完整性 401 # 检查数据完整性
380 sample_keys = [ 402 sample_keys = [
381 - 'item:similar:swing:12345', 403 + 'item:similar:swing_cpp:3600052', # C++ Swing
  404 + 'item:similar:swing:12345', # Python Swing
  405 + 'item:similar:w2v:12345',
382 'interest:hot:platform:pc' 406 'interest:hot:platform:pc'
383 ] 407 ]
384 for key in sample_keys: 408 for key in sample_keys:
385 if not redis_client.exists(key): 409 if not redis_client.exists(key):
386 print(f"Missing key: {key}") 410 print(f"Missing key: {key}")
  411 +
  412 +# 检查C++ Swing vs Python Swing覆盖率
  413 +cpp_swing_count = len(redis_client.keys('item:similar:swing_cpp:*'))
  414 +py_swing_count = len(redis_client.keys('item:similar:swing:*'))
  415 +print(f"C++ Swing keys: {cpp_swing_count}")
  416 +print(f"Python Swing keys: {py_swing_count}")
387 ``` 417 ```
388 418
389 ### 性能指标 419 ### 性能指标