Compare View
Commits (4)
-
问题描述 ---------- 使用 facebook/nllb-200-distilled-600M(CTranslate2 后端)时,若 API 传入 ISO 639-1 或 FLORES 短标签(如 ca、da、nl、sv、no、tr 等),会触发 「Unsupported NLLB source/target language」。模型与 tokenizer 实际支持这些语言; 根因是 resolve_nllb_language_code 仅依赖 translation/languages.py 里十余条 NLLB_LANGUAGE_CODES 映射,大量合法短码未注册,校验误报为不支持。 修改内容 ---------- 1. 新增 translation/nllb_flores_short_map.py - NLLB_FLORES_SHORT_TO_CODE:与 HF 模型卡 language 列表对齐的短标签 -> NLLB 强制 BOS/src_lang 形式(<ISO639-3>_<ISO15924>,如 cat_Latn)。 - NLLB_TOKENIZER_LANGUAGE_CODES:从 tokenizer.json 提取的 202 个语言 token 全集,供直接传入 deu_Latn 等形式时做规范化解析。 - 额外约定:ISO 639-1「no」映射 nob_Latn(书面挪威语 Bokmål);nb/nn 分别 对应 nob_Latn / nno_Latn;「ar」显式指向 arb_Arab(与 NLLB 一致)。 2. 调整 translation/languages.py - build_nllb_language_catalog:合并顺序为 FLORES 全表 -> NLLB_LANGUAGE_CODES (保留少量显式覆盖,如 zh->zho_Hans)-> 调用方 overrides。 - resolve_nllb_language_code:在目录与别名之后,增加基于 NLLB_TOKENIZER_LANGUAGE_CODES 的大小写不敏感匹配(如 eng_latn -> eng_Latn), 覆盖「已传完整 NLLB 码」的场景。 3. tests/test_translation_local_backends.py - 新增 test_nllb_resolves_flores_short_tags_and_iso_no,覆盖用户关心的短码及 deu_Latn 直通解析。 方案说明 ---------- NLLB 接口语义以 Hugging Face NllbTokenizer 为准:语言标识为 FLORES-200 风格 三字母语种码 + 下划线 + 四字母脚本子标签(ISO 15924)。业务侧常用 ISO 639-1 (de、sv)或模型卡短列表(ca、nl),需在服务内统一映射到 tokenizer 特殊 token。 本实现以模型卡 language 字段 + tokenizer 词表为单一事实来源生成静态表, 避免运行时依赖额外库;同时保留原有 NLLB_LANGUAGE_CODES 作为薄覆盖层以兼容 既有配置与测试。 Refs: https://huggingface.co/facebook/nllb-200-distilled-600M Made-with: Cursor
Showing
28 changed files
Show diff stats
.gitignore
@ deleted
| ... | ... | @@ -1,17 +0,0 @@ |
| 1 | - | |
| 2 | -# Please enter the commit message for your changes. Lines starting | |
| 3 | -# with '#' will be ignored, and an empty message aborts the commit. | |
| 4 | -# | |
| 5 | -# On branch master | |
| 6 | -# Your branch is ahead of 'origin/master' by 3 commits. | |
| 7 | -# (use "git push" to publish your local commits) | |
| 8 | -# | |
| 9 | -# Changes to be committed: | |
| 10 | -# modified: README.md | |
| 11 | -# modified: docs/Usage-Guide.md | |
| 12 | -# modified: scripts/service_ctl.sh | |
| 13 | -# new file: status.sh | |
| 14 | -# | |
| 15 | -# Changes not staged for commit: | |
| 16 | -# modified: third-party/clip-as-service (untracked content) | |
| 17 | -# |
] deleted
| ... | ... | @@ -1,17 +0,0 @@ |
| 1 | -docs | |
| 2 | -# Please enter the commit message for your changes. Lines starting | |
| 3 | -# with '#' will be ignored, and an empty message aborts the commit. | |
| 4 | -# | |
| 5 | -# On branch master | |
| 6 | -# Your branch is ahead of 'origin/master' by 5 commits. | |
| 7 | -# (use "git push" to publish your local commits) | |
| 8 | -# | |
| 9 | -# Changes to be committed: | |
| 10 | -# modified: config/config.yaml | |
| 11 | -# modified: docs/TODO.txt | |
| 12 | -# modified: "docs/\346\220\234\347\264\242API\345\257\271\346\216\245\346\214\207\345\215\227-07-\345\276\256\346\234\215\345\212\241\346\216\245\345\217\243\357\274\210Embedding-Reranker-Translation\357\274\211.md" | |
| 13 | -# modified: "docs/\347\233\270\345\205\263\346\200\247\346\243\200\347\264\242\344\274\230\345\214\226\350\257\264\346\230\216.md" | |
| 14 | -# | |
| 15 | -# Changes not staged for commit: | |
| 16 | -# modified: third-party/clip-as-service (untracked content) | |
| 17 | -# |
| ... | ... | @@ -0,0 +1,417 @@ |
| 1 | +#!/bin/bash | |
| 2 | +start=$(date +%s%N) # 开始时间,纳秒级 | |
| 3 | + | |
| 4 | +time curl -X POST "http://localhost:6007/rerank" \ | |
| 5 | + -H "Content-Type: application/json" \ | |
| 6 | + -d '{ | |
| 7 | + "query": "健身女生T恤短袖", | |
| 8 | + "docs": [ "60 Jelly Bracelets 80 s Adult Size - MAQIHAN Neon Gummy Bracelets for Women 80s Jelly Bangles Glow Silicone Bands Jewelry Wristband Rainbow Jellies Bangle Girls Boys Colored Accessories Party Favor", | |
| 9 | +"MEROKEETY Women s 2025 Summer Square Neck Puff Sleeve Boho Midi Dress Swiss Dot Ruffle Flowy Tie Back Dress", | |
| 10 | +"FITORY Mens Sandals", | |
| 11 | +"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", | |
| 12 | +"Merrell Mens Hydro Moc", | |
| 13 | +"Lounge Sets for Women Summer Outfits Women 2 Piece Sets 2025 Sleeveless Matching Lounge Crop Top High Waisted Short", | |
| 14 | +"Men s Underwear", | |
| 15 | +"Executive Functioning Workbook for Teens: 101 Activities and Strategies for Enhancing Self-Discipline", | |
| 16 | +"LEVSOX Compression Socks Women and Men", | |
| 17 | +"MGparty 12 Pieces Christmas Headbands Christmas Parties Favors Decoration Supplies Xmas Gifts Photo Booth Xmas Tree Snowman Reindeer Antlers Santa Hat", | |
| 18 | +"10 Large Vacuum Storage Bags with Hand Pump", | |
| 19 | +"Disney Lilo and Stitch Boys Swim Set", | |
| 20 | +"Sterling Silver Hoop Earrings", | |
| 21 | +"23 Pcs Day of The Dead Altar Decorations Set", | |
| 22 | +"Travel Makeup Bag for Women Fashion Large Capacity Pouch Open Flat Cosmetic Portable Organizer Waterproof Large Opening Storage Toiletry Bags Vertical Free-Standing Brush Holder for Easy Access Blue", | |
| 23 | +"Iron Flame: Empyrean", | |
| 24 | +"Luxebell Luggage Straps Suitcase Belt TSA Approved Travel Accessories Gift 4-Pack 6.56ft (Green)", | |
| 25 | +"TONY & SANDY Christian Gifts for Women", | |
| 26 | +"Blue Birthday Party Supplies", | |
| 27 | +"Vionic Women s Coral Loafer Moccasin", | |
| 28 | +"LIQING 35L Large Picnic Basket 2 Layers of Internal Pockets Leak-Proof and Insulated ,Folding with Internal Support for enhansed Stability", | |
| 29 | +"40oz Softball Tumbler with Handle Softball Gifts Stuff for Women Girls Men Gift for Coach Lovers Fan Stainless Steel Cup", | |
| 30 | +"Crayola Colour & Erase Reusable Puzzle Set", | |
| 31 | +"Carry On Luggage with Front Compartment and Cup Holder", | |
| 32 | +"Interactive Cat Toy Rechargeable", | |
| 33 | +"Nike Air Rift", | |
| 34 | +"Portable Hookah Set for Travel - Premium Handheld Glass Aluminum Mini Hookah Real Metal Accessories", | |
| 35 | +"Clear Backpack for Boys", | |
| 36 | +"Women’s Knee High Boots Round Toe Chunky Heel Faux Leather Tall Riding Boots with Side Zipper", | |
| 37 | +"Golf Grip Trainer & Connection Band 2Set", | |
| 38 | +"Monster High Self Scare Day Cleo De Nile Doll Play Set", | |
| 39 | +"Fortnite eGift Card - Powered by the Epic Games Store", | |
| 40 | +"Mesh Beach Bags", | |
| 41 | +"Crowye Anime Cosplay Costume for Halloween Princess Costume Accessories Anime White Cosplay Wig Egypt Arm Cuff Bracelet Gold Earrings Greek Goddess Set for Halloween Dress up Princess", | |
| 42 | +"Premium Women s Leather Tote Handbag - Bag for Everyday Use", | |
| 43 | +"Ekouaer Maternity Nursing Gown and Robe Set Labor Delivery Nursing Nightgowns for Breastfeeding Pregnancy Clothes", | |
| 44 | +"Superband Mermaid Tails for Swimming for Women and Adults Without Monofin", | |
| 45 | +"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops", | |
| 46 | +"WDIRARA Girl s Bow Puff Sleeve A Line Midi Dress Cute Collared Ruffle Hem Swing Dresses", | |
| 47 | +"Funziez! Adult Onesie Halloween Costume Animal Dinosaur Shark Unisex Plush One Piece Cosplay Suit for Adults", | |
| 48 | +"Rockland Duffel Bag", | |
| 49 | +"Centipede Demon Baby Shoes Baby Boys Girls Walking Shoes Non Slip Booties Sock Shoe Infants Breathable Sneakers Lightweight Barefoot Slip On Sneakers", | |
| 50 | +"CYDREAM Long Sleeve Bodysuits for Women - Square Neck Shapewear Bodysuit Tops Going Out Body Suits Shirt Leotard", | |
| 51 | +"Men s Oversized Letter Graphic Tank Top Sleeveless Casual Summer Tops Y2K Streetwear", | |
| 52 | +"Flower Claw Clip 7 PCS Claw Clips", | |
| 53 | +"waist twister,waist twisting machine ab twister board with 300 lbs Weight Capacity", | |
| 54 | +"PAGE ONE Womens Winter Ribbed Beanie Crossed Cap Chunky Cable Knit Pompom Soft Warm Hat", | |
| 55 | +"5 Pack Cute Keychains for Girls", | |
| 56 | +"Dragon Ball Super - Complete Series - Blu-ray", | |
| 57 | +"VejiA Multifunctional Simple Shoe Cabinet Storage Shoe Rack Save Space Hallway Furniture", | |
| 58 | +"50Pcs Handbag Purse Feet Handbag Nailhead Brass Studs Screw-Back Feet Flat Head Stud Metal Studs Rivet Leather Craft DIY for DIY Purse Leather Craft", | |
| 59 | +"Wearable Blanket Hoodie with Letter A-Z - Oversized Blanket Hooded Personalized Birthday Christmas Gifts for Women Mom", | |
| 60 | +"On Women s Cloudnova Form 2 Sneakers", | |
| 61 | +"SANTINY 18 Skorts for Women with 4 Pockets High Waist Long Athletic Tennis Skirt Golf Skort Dressy Casual", | |
| 62 | +"Compatible with AirTag Case Keychain", | |
| 63 | +"Rod Holder Plugs", | |
| 64 | +"Protective Case Compatible with Have A Seat Figure-Clear PVC Portable Storage Box with Keychain", | |
| 65 | +"adidas Men s Swift Run 1.0 Running Shoes", | |
| 66 | +"M MOOHAM Cross Necklace for Women Teen Girls", | |
| 67 | +"Sportneer Adjustable Ankle Weights for Women and Men 7 lbs/Pair Adjustable Leg Weights with Secure Straps", | |
| 68 | +"PRETTYGARDEN Women s 2 Piece Outfits Sleeveless Suit Vest and Wide Leg Pants Business Casual Blazer Sets", | |
| 69 | +"Bouncer Seat for Babies 0-12 Months", | |
| 70 | +"Womens Crew Socks Cotton Long Gym Socks Lightweight Athletic Running Socks", | |
| 71 | +"Denior Magnetic Card Phone Wallet Holder for iPhone 17/16/15/14/13/12 Series", | |
| 72 | +"LIGHT DOT Women s Summer Dress Plisse Maxi Tube Bodycon Dress Back Tie Beach Resort Vacation", | |
| 73 | +"Vivresina UV Resin 400g (400.0", | |
| 74 | +"Wide Leg Pants High Waisted Pleated Trousers with 4 Colors", | |
| 75 | +"Osprey Daylite Shoulder Sling Bag – Compact Crossbody Backpack for Everyday Carry", | |
| 76 | +"Tote Bag for Women Large PVC Tote Bag Letters Print Plastic Handbag for Christmas Gift", | |
| 77 | +"Hello Kitty Giant Coloring & Activity Book 11x16", | |
| 78 | +"Skechers Mens Delson 3.0 - Roth 210606", | |
| 79 | +"3pcs Heart Badge Reel with Alligator Clip Cute Retractable Badge Holder Acrylic Nurse Badge Clip for Office Workers", | |
| 80 | +"Ortho Balance Hiking Shoes for Men Women", | |
| 81 | +"GOLDENMATE 1000VA/600W Lithium UPS Battery Backup and Surge Protector", | |
| 82 | +"Gelante Solid Color 100% Cotton Bucket Hat for Women and Men Packable Travel Summer Beach Hat", | |
| 83 | +"Sonic The Hedgehog 3 Movie Action Figures 2.5-Inch Movie Collector Toy Figure Multi-Pack Includes Sonic The Hedgehog Knuckles Shadow Buzz Bomber & Drone- Officially Licensed Toys", | |
| 84 | +"61 Pcs Nacho Libre Stickers Comedy Movie Graffiti Waterproof Vinyl for Adults for Birthday Party Supplies Decoration Favors for Water Bottles Laptop Suitcase Scrapbooking Choice", | |
| 85 | +"Neck Lift Tape", | |
| 86 | +"925 Sterling Silver Earrings for Womens Sparkly Colorful Full Diamond Simple Stylish Elegant Hypoallergenic Jewelry", | |
| 87 | +"Pink Ceramic Bow Vase for Flowers", | |
| 88 | +"Winter Coats For Men Winter Jackets Water Resistant Warm Thicken Parka Puffer Coat Long Down Jacket", | |
| 89 | +"Alarm Clocks for Bedrooms", | |
| 90 | +"KINURI Running Belt for Men & Women – Fits All Smartphones – Waterproof Waist Pack with Adjustable Strap – Ideal for Jogging", | |
| 91 | +"DREAM PAIRS Heels for Women Flip Flops Kitten Low Heels Open Square Toe Thong Heeled Sandals", | |
| 92 | +"Amazon Basics All Purpose Washable School Craft Liquid Glue for Making Slime", | |
| 93 | +"Inflatable Costume Adult Frog Full Body Deluxe Funny Air Blow Up Costume for Men Women Halloween", | |
| 94 | +"Mens Golf Pants Stretch Casual Dress Pants Elastic Drawstring Slacks for Men Lightweight Trousers with 5 Pockets", | |
| 95 | +"Lip Smacker Hello Kitty Lip Balm", | |
| 96 | +"Brown Sugar Keeper 3D – Terracotta Clay Bear Softener", | |
| 97 | +"MEETSUN Polarized Sunglasses for Women Men Trendy Classic Retro Designer Style", | |
| 98 | +"Corset Top Bustier Lingerie for Women Zipper Front Flower Sexy Burlesque Vintage", | |
| 99 | +"Pro Club Men s Heavyweight Mesh Basketball Shorts", | |
| 100 | +"Nike Tech Men s Full-Zip Windrunner Hoodie (HV0949-237", | |
| 101 | +"Ear Piercing Kit", | |
| 102 | +"Timberland Men s 6 Premium Boot", | |
| 103 | +"STAR WARS The Black Series Darth Maul", | |
| 104 | +"VZQI Halloween Cosplay Costumes Kamado Tanjir Kids Anime Kimono Halloween Green Cloak", | |
| 105 | +"Fringe Vest for Women Faux Suede Open Front Cardigan Sleeveless Tassels Fringed Vest Cardigan Hippie Jacket", | |
| 106 | +"Smart Health Ring 2.0 for Women Men", | |
| 107 | +"Fast Forward Kid s Licensed 15 Backpack With Lunch Box Combo Set (Hello Kitty)", | |
| 108 | +"Handmade Authentic Katana - 41-inch Full Tang Sharp Blade", | |
| 109 | +"Inateck Sling Bag X", | |
| 110 | +"EXLURA Women s Fashion Faux Wool Mini Skirt High Waisted Y2K Trendy Side Slit Tweed Plaid Skirts 2025 Fall Winter Outfits", | |
| 111 | +"LASLULU Womens Sexy Crossover Crop Top Long Sleeve Workout Tops Crewneck Athletic Yoga T-Shirts Fall Outfits", | |
| 112 | +"Wrangler Authentics Men s Classic Relaxed Fit Five Pocket Jean Short", | |
| 113 | +"ZeroBound Built in Bra Tank Tops for Women - High Neck Racerback Tank Tops", | |
| 114 | +"Nike Mens Air Max Alpha Trainer 6", | |
| 115 | +"MAZZERI Solid Gold Plated Sterling Silver Italian 1.3/1.6/2.2/2.8mm Diamond-Cut Braided Rope Chain Necklace for Men Women", | |
| 116 | +"Milumia Women s Polka Dots Twist Front Halter Top Dressy Casual Textured Peplum Going Out Tops", | |
| 117 | +"80s 90s Outfit for Women", | |
| 118 | +"EFAN Womens Sexy Sleeveless Double Lined Crop Tops Workout Cute Tight Racerback Tank Tops Summer Clothes Teen Girls 2025", | |
| 119 | +"Nike Mens Shorts Dri-Fit Flex Woven Shorts 7inch (US", | |
| 120 | +"top handle satchel Women", | |
| 121 | +"Kono Expandable Luggage 3 Piece Set Hardshell Lightweight 20in 24in 28in Carry On Suitcase with Spinner Wheels TSA Lock(Black & Brown)", | |
| 122 | +"Nations of The World | National Pride Flag Symbol Arms Tee Unisex T-Shirt for Men or Women", | |
| 123 | +"Jo & Bette Seamless Thongs for Women - High Waist Panties 6 Pack - Thong Underwear Pack Breathable No show Sports", | |
| 124 | +"eKids Disney Frozen 2 Bluetooth Headphones with Microphone", | |
| 125 | +"Arctix Kids Insulated Snow Bib Overalls", | |
| 126 | +"USA Flag Charlie Gift T-Shirt", | |
| 127 | +"CBKSUHBADE 15in×11in Anime One Piece Wanted Bounty Posters", | |
| 128 | +"Plus Size Underwear for Women XL-5XL Cotton High Waist Women Briefs Full Coverage Ladies Panties 4 Pack", | |
| 129 | +"Little Adventures Enchanted Rapunzel Dress-Up Costume for Adult Women", | |
| 130 | +"G Gradual Tennis Dress for Women Golf Outfits with Shorts and Pockets Sleeveless Active Exercise Athletic Dresses for Women", | |
| 131 | +"Pastoral Style Porch Goose Outfits", | |
| 132 | +"Vive Thigh High Compression Stockings for Women & Men - 15-20 mmHg Graduated Support Hose - Opaque Closed Toe Compression Tights - Stockings for Varicose Veins", | |
| 133 | +"Canada is Not for Sale Vintage Cotton Twill Cap", | |
| 134 | +"TomTiger Yoga Shorts for Women Tummy Control High Waist Biker Shorts Exercise Workout Butt Lifting Tights Women s Short Pants", | |
| 135 | +"4PCS GOD IS FIRST IM SECOND Bracelet: Faith Priority Bracelet - Engraved Cross Silicone Wristband for Daily Encouragement", | |
| 136 | +"Tahitian Black Pearl Pendant Necklace AAAA 18K White Gold Plated 925 Sterling Silver Black Pearl Jewelry Gift for Women Mother Wife Her for Anniversary Christmas Birthday", | |
| 137 | +"HOTOUCH Womens Short Sleeve Button Down Shirts Loose Fit V Neck Business Casual Blouses Summer Top with Pockets S-XXL", | |
| 138 | +"Men s Corduroy Short Sleeved Cargo Shirt Relaxed Fit Button Down Casual Wear Tops with Flap Pockets", | |
| 139 | +"Orange Blue Light Blocking Glasses for Better Sleep - 99.5% Premium Acetate Migraine Glasses for Women & Men", | |
| 140 | +"Disney Stitch Beach Towel for Kids Cotton Bath Towels with 2 Clothes Pins Travel Swimming Quick Dry Towel Beach Vacation Essentials", | |
| 141 | +"PGANDS Womens Crew Neck Solid/Color Block Sweatshirts Long Sleeve Casual Lightweight Pullover Tops", | |
| 142 | +"Premium Organic Whole Cloves 5.3 oz (150 grams)", | |
| 143 | +"habibee Bra for Women No Underwire Comfort Seamless Bras Push Up Wireless Bras Full Coverage Bralettes", | |
| 144 | +"Puma Mens Caven 2.0 Shoes", | |
| 145 | +"PRETTYGARDEN Women s Fall Button Down Shirts Dressy Casual Spring Long Puff Sleeve Eyelet Loose Fit Collared Blouse Top", | |
| 146 | +"TNNZEET 2 Pack Plus Size Biker Shorts for Women - 8 Black High Waisted Tummy Control Spandex Workout Shorts (XL-4XL)", | |
| 147 | +"Marvel Legends Series Captain America Shield", | |
| 148 | +"PAVOI 14K Gold AAA+ Handpicked White Freshwater Cultured Pearl Earrings Studs", | |
| 149 | +"Trendy Queen Long Skirts for Women Boho Maxi Skirt Winter Swing Tiered A-Line Elastic High Waist Dress with Pockets Fashion", | |
| 150 | +"Reebok Classic Leather Sneakers for Men", | |
| 151 | +"PRETTYGARDEN Women s Summer Bodycon Maxi Tube Dress Ribbed Strapless Side Slit Long Going Out Casual Elegant Party Dresses", | |
| 152 | +"Favorite Daughter Women s Classic Logo Baseball Cap", | |
| 153 | +"Reebok Men s Cotton Vital Fleece Sweatpant", | |
| 154 | +"COOFANDY Mens Hawaiian Shirt Short Sleeve Button Down Shirts Tropical Summer Beach Shirts Casual Floral Aloha Shirts", | |
| 155 | +"Columbia Mens Grander Marlin Iii Offshore Short", | |
| 156 | +"Satin One Shoulder Flower Girl Dress with Bow Wedding Princess Pageant Party Gown Puffy Formal First Communion", | |
| 157 | +"Nike Mens V5 RNR", | |
| 158 | +"Speed Cube 3x3", | |
| 159 | +"FOURSTEEDS Women s Cotton Zipper Front Multi-Pocket Twill Bermuda Women Cargo Shorts", | |
| 160 | +"Curly Hair Brush Defining", | |
| 161 | +"YQXCC Cooling Towels | 4 Pack 47x12 | Ice Cool for Neck | Microfiber Soft Breathable Chilly | for Yoga", | |
| 162 | +"Hot Wheels Toy Car Playset with Lights", | |
| 163 | +"Carhartt Men s Loose Fit Heavyweight Short-Sleeve Pocket Henley T-Shirt", | |
| 164 | +"Women s Mid-High Rise Ripped Denim Shorts Stretchy Distressed Jean Shorts with Pockets Folded Hem Casual Summer Jorts", | |
| 165 | +"Monster High Cleo De Nile Doll in Golden Blouse & Layered Skirt", | |
| 166 | +"Ariat Women’s Fatbaby Western Boot", | |
| 167 | +"UYYE Car Registration and Insurance Card Holder", | |
| 168 | +"365 by Whole Foods Market", | |
| 169 | +"Crystal Bracelet for Women Fashion 7 Inch Approximately Rainbow Sparkling Crystal Bracelet with Adjustable Elastic Cord", | |
| 170 | +"Samsung Galaxy Watch 7 (44mm) AI Smartwatch w/ 1.5 AMOLED", | |
| 171 | +"DOUKEN 4 Pair Sneaker Creases Protector", | |
| 172 | +"Elvis: The Legend music word search puzzle.: Great Country Music Word Scrambles about Elvis. Large print word puzzle for adults and rock music lovers. ... Great music gift for your friends or family.", | |
| 173 | +"Pinkfong Bebefinn Plush Toy - 12 (30cm) Stuffed Doll | Soft Cuddly Plush for Toddlers | Bebefinn Toy | Perfect Birthday", | |
| 174 | +"Thrusting Dildo Vibrator Sex Toys for Women", | |
| 175 | +"VANLOVEMAC Baseball Gifts for Boys 8-12 Baseball Stuff College Going Away Gifts Welcome Back to School Gifts Dorm Room Essentials for Guys Off to College", | |
| 176 | +"Hello Kitty and Friends - Cinnamoroll 12” Pink Monochrome Plush", | |
| 177 | +"BOBISUKA Pearl White Face Body Paint", | |
| 178 | +"OMKAGI 2 Piece Workout Sets for Women Halter Sports Bras Gym Sets Booty Leggings Outfits", | |
| 179 | +"Ivay Womens Scoop Neck Ribbed Knit Tank Top Sleeveless Cotton Wife Beater Camisole Shirts", | |
| 180 | +"SOLY HUX Women s Graphic Tee Shirts Novelty Funny Short Sleeve Summer Casual Tops", | |
| 181 | +"Wooden Taper Candle Holders: Wood Candlestick Holders Rustic Brown Farmhouse Fall Decor for Living Room Dinning Table Centerpiece Christmas Set of 2", | |
| 182 | +"PRETTYGARDEN Long Sleeve Shirts for Women 2025 Fall V Neck Waffle Basic Tee Dressy Casual Winter Blouses Knit Tunic Tops", | |
| 183 | +"Ray-Ban RB2140 Original Wayfarer Square Sunglasses", | |
| 184 | +"Lee Womens Ultra Lux Comfort with Flex-to-go Utility Skimmer Capri Pant", | |
| 185 | +"3D Pedometer for Walking", | |
| 186 | +"HiiFeuer Medieval Faux Leather Chest Armor", | |
| 187 | +"Pet Deadly Dog Costume", | |
| 188 | +"Western Chief Kids Freestyle Neoprene Outdoor Boot", | |
| 189 | +"SKECHERS Women s Ultra Flex 3.0-Brilliant Path Hands Free Slip-INS Sneaker", | |
| 190 | +"LUOBO Keychain Accessory Decor Keychain Decoration backpacks Bag Pendant", | |
| 191 | +"10inch Teddy Bear Stuffed Animal", | |
| 192 | +"Halloweentown University T-Shirt for Women Fall Pumpkin Shirts Funny Halloween Thanksgiving Gift Tops", | |
| 193 | +"Women s Sexy American Flag Crop Tank 4th of July Patriotic Sleeveless Tee Tops", | |
| 194 | +"Gillette Fusion5 ProGlide Men s Razor Blade Refills", | |
| 195 | +"Poppy Playtime - Mommy Long Legs Plush (14 Medium Plush", | |
| 196 | +"Women’s Heated Vest with 12V 20000mAh Battery – Cropped Stand Collar Lightweight Insulated Winter Vest.", | |
| 197 | +"toolant Winter Work Gloves for Men", | |
| 198 | +"192Pcs Halloween Favors Stationery Gift Set", | |
| 199 | +"20 Pcs Ultra Thin Tattoo Cover up Patch Waterproof Tattoo Cover up Tape Sweatproof Tattoos Covers Patches Cuttable Invisible Non-Woven Fabric Patches for Tattoos Scar Birthmark 4.72×3.35In(Light Skin)", | |
| 200 | +"Popcorns Maker", | |
| 201 | +"Paladone Kuromi GloBuddies Night Light", | |
| 202 | +"Creativity for Kids Sensory Minis Dinosaur Kit | Cloud Clay Sensory Toy for Toddlers | Squish", | |
| 203 | +"Mouse Ears Headband Fully Sewn Sturdy Headbands 2-Pcs, 4.6-Inch Sequin Big Ears 3D Silk Satin Bowknot Suitable for Women and Girls Theme Role Play Costume Accessories Party", | |
| 204 | +"Tanluhu Sweatbands Sport Headbands for Men & Women", | |
| 205 | +"Pilates Reformer Machine", | |
| 206 | +"Fossil Fenmore Analog Men Watch", | |
| 207 | +"Stray Kids Official Lightstick Ver 2", | |
| 208 | +"Zima Dental Pod PRO: New Ultrasonic Retainer Cleaner Machine. Market-Leading", | |
| 209 | +"2300pcs Polymer Clay Beads Bracelet Making Kit", | |
| 210 | +"AI ACCESSORY INNOVATIONS Bluey 4 Piece Backpack Set for Pre School Girls & Boys", | |
| 211 | +"MIRITY Women s High Waist Cotton Underwear - Soft Full Coverage Briefs with Double-Layer Waistedband", | |
| 212 | +"Plus Size Summer Dresses - Floral Beach Wedding Guest Semi Formal Tiered Flowy Long Sundress", | |
| 213 | +"AUTOMET Womens Tops Summer Sweater Long Tunic Dressy Casual Blouses Business Cute Trendy Short Sleeve Shirt 2025", | |
| 214 | +"Black Sabbath Sketch Band T-Shirt", | |
| 215 | +"Loomie Upgraded 6 Drawer White Dresser for Bedroom", | |
| 216 | +"Michael Kors Womens Zuma Trainer", | |
| 217 | +"Chunky Silver Bohemian Flower Bracelet For Wemen Men", | |
| 218 | +"Classic Black Western Felt Roll Up Brim Cowboy and Cowgirl Hat for Women and Men - Decoration with Western Belt Bukle", | |
| 219 | +"Jellycat Little Pig Bag Charm", | |
| 220 | +"LARNMERN Steel Toe Work Boots Men", | |
| 221 | +"3PCS Gold Hair Ties", | |
| 222 | +"Red Kap Men s Snap Front Cotton Coverall", | |
| 223 | +"Citizen Quartz Mens Watch", | |
| 224 | +"ATHMILE Long Sleeve Shirts for Women Tunic Fall Tops Loose Fit Dressy Crew Neck Basic Sweaters 2025", | |
| 225 | +"Narecte Summer Maxi Dresses for Women Back Strap Beach Dress Women s Casual Dress Long Flowy Dresses for Vacation", | |
| 226 | +"LIDHAY Cowboy Hat for Women and Men Western Cowgirl Hats Suede Cowboy Hat for Rodeo", | |
| 227 | +"BIC Classic Maxi Pocket Lighter", | |
| 228 | +"A + S Luxxe Diaper Bag Tote – Stylish", | |
| 229 | +"100pack Name Badge Holders Name Tag Holder Clear Plastic Badge Holder ID Holders for Lanyard (100Pcs Vertical)", | |
| 230 | +"MOOSEA Christmas Gifts for Women Wife - Love Knot Moissanite Necklace 1-3ct D Color VVS1 Clarity Moissanite 925 Sterling Silver Necklace Anniversary Birthday Gifts for Women Wife Mom Girlfriend", | |
| 231 | +"Solid Wood Retangle End Table with Drawer and Storage Shelf", | |
| 232 | +"Madden Girl womens Beella Heeled SandalHeeled Sandal", | |
| 233 | +"Ekouaer 2 Pack Womens Pajama Sets Short Sleeve Sleepwear Soft Crew Neck Pj Shorts Set Printed Loungewear Set S-XXL", | |
| 234 | +"NPQQUAN Original Classic Low Profile Baseball Cap Golf Dad Hat Adjustable Cotton Hats Men Women Unconstructed Plain Cap", | |
| 235 | +"YEOREO Women Workout Biker Shorts Impact 4.5 No Front Seam Hidden Scrunch Lifting Seamless Yoga Gym Shorts", | |
| 236 | +"Merino Wool Underwear Men by Thermowave - Sport & Everyday Men s Merino Wool Boxer Brief - 150 GSM Stretchy & Soft", | |
| 237 | +"COACH Women s Leah Platform Loafers", | |
| 238 | +"Doodle Me Happy Kids Thank You Cards - 25 Cards With Envelopes - Cute", | |
| 239 | +"Spring Summer Women Pleated Casual Denim V Neck Ruffle Sleeve Dress Light Blue XL", | |
| 240 | +"Disney Hooded Matching Family Cosplay T-Shirt Infant to Adult Sizes (12 Months - 2XL)", | |
| 241 | +"Leather CPR Cleaner & Conditioner 18oz - Cleans", | |
| 242 | +"Baseball Shirts Women Baseball Mom Tshirt Baseball Heart Graphic Tee Game Day Gifts Funny Short Sleeve Tops", | |
| 243 | +"4 Pack Cooling Towels", | |
| 244 | +"ZEEPORTE Mask Fin Snorkel Set", | |
| 245 | +"60 Pcs Bride Tribe Bachelorette Party Favors Bulk Friendship Bridesmaid Gifts 12 Set Friendship Bracelets Heart Sunglasses Satin Scrunchie for Engagement Bridal Shower Wedding Favor", | |
| 246 | +"AUSELILY Summer Dress Sundress Beach Cover up Swing Dresses", | |
| 247 | +"Loungefly Disney Minnie Mouse Crossbody Satchel Handbag", | |
| 248 | +"Tactical Gym Bag for Men,50L Large 3 in 1 Sports Duffle Bag with Shoes Compartment for Travel", | |
| 249 | +"YETI Rambler 42 oz Tumbler with Handle and Straw Lid", | |
| 250 | +"Samsonite Classic Leather Slim Backpack", | |
| 251 | +"Vive Thigh High Compression Stockings for Women & Men - 15-20 mmHg Graduated Support Hose - Opaque Closed Toe Compression Tights - Stockings for Varicose Veins", | |
| 252 | +"Canada is Not for Sale Vintage Cotton Twill Cap", | |
| 253 | +"TomTiger Yoga Shorts for Women Tummy Control High Waist Biker Shorts Exercise Workout Butt Lifting Tights Women s Short Pants", | |
| 254 | +"4PCS GOD IS FIRST IM SECOND Bracelet: Faith Priority Bracelet - Engraved Cross Silicone Wristband for Daily Encouragement", | |
| 255 | +"Tahitian Black Pearl Pendant Necklace AAAA 18K White Gold Plated 925 Sterling Silver Black Pearl Jewelry Gift for Women Mother Wife Her for Anniversary Christmas Birthday", | |
| 256 | +"HOTOUCH Womens Short Sleeve Button Down Shirts Loose Fit V Neck Business Casual Blouses Summer Top with Pockets S-XXL", | |
| 257 | +"Men s Corduroy Short Sleeved Cargo Shirt Relaxed Fit Button Down Casual Wear Tops with Flap Pockets", | |
| 258 | +"Orange Blue Light Blocking Glasses for Better Sleep - 99.5% Premium Acetate Migraine Glasses for Women & Men", | |
| 259 | +"Disney Stitch Beach Towel for Kids Cotton Bath Towels with 2 Clothes Pins Travel Swimming Quick Dry Towel Beach Vacation Essentials", | |
| 260 | +"PGANDS Womens Crew Neck Solid/Color Block Sweatshirts Long Sleeve Casual Lightweight Pullover Tops", | |
| 261 | +"Premium Organic Whole Cloves 5.3 oz (150 grams)", | |
| 262 | +"habibee Bra for Women No Underwire Comfort Seamless Bras Push Up Wireless Bras Full Coverage Bralettes", | |
| 263 | +"Puma Mens Caven 2.0 Shoes", | |
| 264 | +"PRETTYGARDEN Women s Fall Button Down Shirts Dressy Casual Spring Long Puff Sleeve Eyelet Loose Fit Collared Blouse Top", | |
| 265 | +"TNNZEET 2 Pack Plus Size Biker Shorts for Women - 8 Black High Waisted Tummy Control Spandex Workout Shorts (XL-4XL)", | |
| 266 | +"Marvel Legends Series Captain America Shield", | |
| 267 | +"PAVOI 14K Gold AAA+ Handpicked White Freshwater Cultured Pearl Earrings Studs", | |
| 268 | +"Trendy Queen Long Skirts for Women Boho Maxi Skirt Winter Swing Tiered A-Line Elastic High Waist Dress with Pockets Fashion", | |
| 269 | +"Reebok Classic Leather Sneakers for Men", | |
| 270 | +"PRETTYGARDEN Women s Summer Bodycon Maxi Tube Dress Ribbed Strapless Side Slit Long Going Out Casual Elegant Party Dresses", | |
| 271 | +"Favorite Daughter Women s Classic Logo Baseball Cap", | |
| 272 | +"Reebok Men s Cotton Vital Fleece Sweatpant", | |
| 273 | +"COOFANDY Mens Hawaiian Shirt Short Sleeve Button Down Shirts Tropical Summer Beach Shirts Casual Floral Aloha Shirts", | |
| 274 | +"Columbia Mens Grander Marlin Iii Offshore Short", | |
| 275 | +"Satin One Shoulder Flower Girl Dress with Bow Wedding Princess Pageant Party Gown Puffy Formal First Communion", | |
| 276 | +"Nike Mens V5 RNR", | |
| 277 | +"Speed Cube 3x3", | |
| 278 | +"FOURSTEEDS Women s Cotton Zipper Front Multi-Pocket Twill Bermuda Women Cargo Shorts", | |
| 279 | +"Curly Hair Brush Defining", | |
| 280 | +"YQXCC Cooling Towels | 4 Pack 47x12 | Ice Cool for Neck | Microfiber Soft Breathable Chilly | for Yoga", | |
| 281 | +"Hot Wheels Toy Car Playset with Lights", | |
| 282 | +"Carhartt Men s Loose Fit Heavyweight Short-Sleeve Pocket Henley T-Shirt", | |
| 283 | +"Women s Mid-High Rise Ripped Denim Shorts Stretchy Distressed Jean Shorts with Pockets Folded Hem Casual Summer Jorts", | |
| 284 | +"Monster High Cleo De Nile Doll in Golden Blouse & Layered Skirt", | |
| 285 | +"Ariat Women’s Fatbaby Western Boot", | |
| 286 | +"UYYE Car Registration and Insurance Card Holder", | |
| 287 | +"365 by Whole Foods Market", | |
| 288 | +"Crystal Bracelet for Women Fashion 7 Inch Approximately Rainbow Sparkling Crystal Bracelet with Adjustable Elastic Cord", | |
| 289 | +"Samsung Galaxy Watch 7 (44mm) AI Smartwatch w/ 1.5 AMOLED", | |
| 290 | +"DOUKEN 4 Pair Sneaker Creases Protector", | |
| 291 | +"Elvis: The Legend music word search puzzle.: Great Country Music Word Scrambles about Elvis. Large print word puzzle for adults and rock music lovers. ... Great music gift for your friends or family.", | |
| 292 | +"Pinkfong Bebefinn Plush Toy - 12 (30cm) Stuffed Doll | Soft Cuddly Plush for Toddlers | Bebefinn Toy | Perfect Birthday", | |
| 293 | +"Thrusting Dildo Vibrator Sex Toys for Women", | |
| 294 | +"VANLOVEMAC Baseball Gifts for Boys 8-12 Baseball Stuff College Going Away Gifts Welcome Back to School Gifts Dorm Room Essentials for Guys Off to College", | |
| 295 | +"Hello Kitty and Friends - Cinnamoroll 12” Pink Monochrome Plush", | |
| 296 | +"BOBISUKA Pearl White Face Body Paint", | |
| 297 | +"OMKAGI 2 Piece Workout Sets for Women Halter Sports Bras Gym Sets Booty Leggings Outfits", | |
| 298 | +"Ivay Womens Scoop Neck Ribbed Knit Tank Top Sleeveless Cotton Wife Beater Camisole Shirts", | |
| 299 | +"SOLY HUX Women s Graphic Tee Shirts Novelty Funny Short Sleeve Summer Casual Tops", | |
| 300 | +"Wooden Taper Candle Holders: Wood Candlestick Holders Rustic Brown Farmhouse Fall Decor for Living Room Dinning Table Centerpiece Christmas Set of 2", | |
| 301 | +"PRETTYGARDEN Long Sleeve Shirts for Women 2025 Fall V Neck Waffle Basic Tee Dressy Casual Winter Blouses Knit Tunic Tops", | |
| 302 | +"Ray-Ban RB2140 Original Wayfarer Square Sunglasses", | |
| 303 | +"Lee Womens Ultra Lux Comfort with Flex-to-go Utility Skimmer Capri Pant", | |
| 304 | +"3D Pedometer for Walking", | |
| 305 | +"HiiFeuer Medieval Faux Leather Chest Armor", | |
| 306 | +"Pet Deadly Dog Costume", | |
| 307 | +"Western Chief Kids Freestyle Neoprene Outdoor Boot", | |
| 308 | +"SKECHERS Women s Ultra Flex 3.0-Brilliant Path Hands Free Slip-INS Sneaker", | |
| 309 | +"LUOBO Keychain Accessory Decor Keychain Decoration backpacks Bag Pendant", | |
| 310 | +"10inch Teddy Bear Stuffed Animal", | |
| 311 | +"Halloweentown University T-Shirt for Women Fall Pumpkin Shirts Funny Halloween Thanksgiving Gift Tops", | |
| 312 | +"Women s Sexy American Flag Crop Tank 4th of July Patriotic Sleeveless Tee Tops", | |
| 313 | +"Gillette Fusion5 ProGlide Men s Razor Blade Refills", | |
| 314 | +"Poppy Playtime - Mommy Long Legs Plush (14 Medium Plush", | |
| 315 | +"Women’s Heated Vest with 12V 20000mAh Battery – Cropped Stand Collar Lightweight Insulated Winter Vest.", | |
| 316 | +"toolant Winter Work Gloves for Men", | |
| 317 | +"192Pcs Halloween Favors Stationery Gift Set", | |
| 318 | +"20 Pcs Ultra Thin Tattoo Cover up Patch Waterproof Tattoo Cover up Tape Sweatproof Tattoos Covers Patches Cuttable Invisible Non-Woven Fabric Patches for Tattoos Scar Birthmark 4.72×3.35In(Light Skin)", | |
| 319 | +"Popcorns Maker", | |
| 320 | +"Paladone Kuromi GloBuddies Night Light", | |
| 321 | +"Creativity for Kids Sensory Minis Dinosaur Kit | Cloud Clay Sensory Toy for Toddlers | Squish", | |
| 322 | +"Mouse Ears Headband Fully Sewn Sturdy Headbands 2-Pcs, 4.6-Inch Sequin Big Ears 3D Silk Satin Bowknot Suitable for Women and Girls Theme Role Play Costume Accessories Party", | |
| 323 | +"Tanluhu Sweatbands Sport Headbands for Men & Women", | |
| 324 | +"Pilates Reformer Machine", | |
| 325 | +"Fossil Fenmore Analog Men Watch", | |
| 326 | +"Stray Kids Official Lightstick Ver 2", | |
| 327 | +"Zima Dental Pod PRO: New Ultrasonic Retainer Cleaner Machine. Market-Leading", | |
| 328 | +"2300pcs Polymer Clay Beads Bracelet Making Kit", | |
| 329 | +"AI ACCESSORY INNOVATIONS Bluey 4 Piece Backpack Set for Pre School Girls & Boys", | |
| 330 | +"MIRITY Women s High Waist Cotton Underwear - Soft Full Coverage Briefs with Double-Layer Waistedband", | |
| 331 | +"Plus Size Summer Dresses - Floral Beach Wedding Guest Semi Formal Tiered Flowy Long Sundress", | |
| 332 | +"AUTOMET Womens Tops Summer Sweater Long Tunic Dressy Casual Blouses Business Cute Trendy Short Sleeve Shirt 2025", | |
| 333 | +"Black Sabbath Sketch Band T-Shirt", | |
| 334 | +"Loomie Upgraded 6 Drawer White Dresser for Bedroom", | |
| 335 | +"Michael Kors Womens Zuma Trainer", | |
| 336 | +"Chunky Silver Bohemian Flower Bracelet For Wemen Men", | |
| 337 | +"Classic Black Western Felt Roll Up Brim Cowboy and Cowgirl Hat for Women and Men - Decoration with Western Belt Bukle", | |
| 338 | +"Jellycat Little Pig Bag Charm", | |
| 339 | +"LARNMERN Steel Toe Work Boots Men", | |
| 340 | +"3PCS Gold Hair Ties", | |
| 341 | +"Red Kap Men s Snap Front Cotton Coverall", | |
| 342 | +"Citizen Quartz Mens Watch", | |
| 343 | +"ATHMILE Long Sleeve Shirts for Women Tunic Fall Tops Loose Fit Dressy Crew Neck Basic Sweaters 2025", | |
| 344 | +"Narecte Summer Maxi Dresses for Women Back Strap Beach Dress Women s Casual Dress Long Flowy Dresses for Vacation", | |
| 345 | +"LIDHAY Cowboy Hat for Women and Men Western Cowgirl Hats Suede Cowboy Hat for Rodeo", | |
| 346 | +"BIC Classic Maxi Pocket Lighter", | |
| 347 | +"A + S Luxxe Diaper Bag Tote – Stylish", | |
| 348 | +"100pack Name Badge Holders Name Tag Holder Clear Plastic Badge Holder ID Holders for Lanyard (100Pcs Vertical)", | |
| 349 | +"MOOSEA Christmas Gifts for Women Wife - Love Knot Moissanite Necklace 1-3ct D Color VVS1 Clarity Moissanite 925 Sterling Silver Necklace Anniversary Birthday Gifts for Women Wife Mom Girlfriend", | |
| 350 | +"Solid Wood Retangle End Table with Drawer and Storage Shelf", | |
| 351 | +"Madden Girl womens Beella Heeled SandalHeeled Sandal", | |
| 352 | +"Ekouaer 2 Pack Womens Pajama Sets Short Sleeve Sleepwear Soft Crew Neck Pj Shorts Set Printed Loungewear Set S-XXL", | |
| 353 | +"NPQQUAN Original Classic Low Profile Baseball Cap Golf Dad Hat Adjustable Cotton Hats Men Women Unconstructed Plain Cap", | |
| 354 | +"YEOREO Women Workout Biker Shorts Impact 4.5 No Front Seam Hidden Scrunch Lifting Seamless Yoga Gym Shorts", | |
| 355 | +"Merino Wool Underwear Men by Thermowave - Sport & Everyday Men s Merino Wool Boxer Brief - 150 GSM Stretchy & Soft", | |
| 356 | +"COACH Women s Leah Platform Loafers", | |
| 357 | +"Doodle Me Happy Kids Thank You Cards - 25 Cards With Envelopes - Cute", | |
| 358 | +"Spring Summer Women Pleated Casual Denim V Neck Ruffle Sleeve Dress Light Blue XL", | |
| 359 | +"Disney Hooded Matching Family Cosplay T-Shirt Infant to Adult Sizes (12 Months - 2XL)", | |
| 360 | +"Leather CPR Cleaner & Conditioner 18oz - Cleans", | |
| 361 | +"Baseball Shirts Women Baseball Mom Tshirt Baseball Heart Graphic Tee Game Day Gifts Funny Short Sleeve Tops", | |
| 362 | +"4 Pack Cooling Towels", | |
| 363 | +"ZEEPORTE Mask Fin Snorkel Set", | |
| 364 | +"60 Pcs Bride Tribe Bachelorette Party Favors Bulk Friendship Bridesmaid Gifts 12 Set Friendship Bracelets Heart Sunglasses Satin Scrunchie for Engagement Bridal Shower Wedding Favor", | |
| 365 | +"AUSELILY Summer Dress Sundress Beach Cover up Swing Dresses", | |
| 366 | +"Loungefly Disney Minnie Mouse Crossbody Satchel Handbag", | |
| 367 | +"Tactical Gym Bag for Men,50L Large 3 in 1 Sports Duffle Bag with Shoes Compartment for Travel", | |
| 368 | +"YETI Rambler 42 oz Tumbler with Handle and Straw Lid", | |
| 369 | +"Samsonite Classic Leather Slim Backpack", | |
| 370 | +"Fabletics Men s Only Short", | |
| 371 | +"3pcs Heart Badge Reel with Alligator Clip Cute Retractable Badge Holder Acrylic Nurse Badge Clip for Office Workers", | |
| 372 | +"Ortho Balance Hiking Shoes for Men Women", | |
| 373 | +"GOLDENMATE 1000VA/600W Lithium UPS Battery Backup and Surge Protector", | |
| 374 | +"Gelante Solid Color 100% Cotton Bucket Hat for Women and Men Packable Travel Summer Beach Hat", | |
| 375 | +"Sonic The Hedgehog 3 Movie Action Figures 2.5-Inch Movie Collector Toy Figure Multi-Pack Includes Sonic The Hedgehog Knuckles Shadow Buzz Bomber & Drone- Officially Licensed Toys", | |
| 376 | +"61 Pcs Nacho Libre Stickers Comedy Movie Graffiti Waterproof Vinyl for Adults for Birthday Party Supplies Decoration Favors for Water Bottles Laptop Suitcase Scrapbooking Choice", | |
| 377 | +"Neck Lift Tape", | |
| 378 | +"925 Sterling Silver Earrings for Womens Sparkly Colorful Full Diamond Simple Stylish Elegant Hypoallergenic Jewelry", | |
| 379 | +"Pink Ceramic Bow Vase for Flowers", | |
| 380 | +"Winter Coats For Men Winter Jackets Water Resistant Warm Thicken Parka Puffer Coat Long Down Jacket", | |
| 381 | +"Alarm Clocks for Bedrooms", | |
| 382 | +"KINURI Running Belt for Men & Women – Fits All Smartphones – Waterproof Waist Pack with Adjustable Strap – Ideal for Jogging", | |
| 383 | +"DREAM PAIRS Heels for Women Flip Flops Kitten Low Heels Open Square Toe Thong Heeled Sandals", | |
| 384 | +"Amazon Basics All Purpose Washable School Craft Liquid Glue for Making Slime", | |
| 385 | +"Inflatable Costume Adult Frog Full Body Deluxe Funny Air Blow Up Costume for Men Women Halloween", | |
| 386 | +"Mens Golf Pants Stretch Casual Dress Pants Elastic Drawstring Slacks for Men Lightweight Trousers with 5 Pockets", | |
| 387 | +"Lip Smacker Hello Kitty Lip Balm", | |
| 388 | +"Brown Sugar Keeper 3D – Terracotta Clay Bear Softener", | |
| 389 | +"MEETSUN Polarized Sunglasses for Women Men Trendy Classic Retro Designer Style", | |
| 390 | +"Corset Top Bustier Lingerie for Women Zipper Front Flower Sexy Burlesque Vintage", | |
| 391 | +"Pro Club Men s Heavyweight Mesh Basketball Shorts", | |
| 392 | +"Nike Tech Men s Full-Zip Windrunner Hoodie (HV0949-237", | |
| 393 | +"Ear Piercing Kit", | |
| 394 | +"Timberland Men s 6 Premium Boot", | |
| 395 | +"Nike Air Rift", | |
| 396 | +"Portable Hookah Set for Travel - Premium Handheld Glass Aluminum Mini Hookah Real Metal Accessories", | |
| 397 | +"Clear Backpack for Boys", | |
| 398 | +"Women’s Knee High Boots Round Toe Chunky Heel Faux Leather Tall Riding Boots with Side Zipper", | |
| 399 | +"Golf Grip Trainer & Connection Band 2Set", | |
| 400 | +"Monster High Self Scare Day Cleo De Nile Doll Play Set", | |
| 401 | +"Fortnite eGift Card - Powered by the Epic Games Store", | |
| 402 | +"Mesh Beach Bags", | |
| 403 | +"Crowye Anime Cosplay Costume for Halloween Princess Costume Accessories Anime White Cosplay Wig Egypt Arm Cuff Bracelet Gold Earrings Greek Goddess Set for Halloween Dress up Princess", | |
| 404 | +"Premium Women s Leather Tote Handbag - Bag for Everyday Use", | |
| 405 | +"Ekouaer Maternity Nursing Gown and Robe Set Labor Delivery Nursing Nightgowns for Breastfeeding Pregnancy Clothes", | |
| 406 | +"Superband Mermaid Tails for Swimming for Women and Adults Without Monofin", | |
| 407 | +"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops" | |
| 408 | + ], | |
| 409 | + "top_n":386, | |
| 410 | + "normalize": true | |
| 411 | + }' | |
| 412 | + | |
| 413 | +end=$(date +%s%N) # 结束时间,纳秒级 | |
| 414 | +duration=$(( (end - start) / 1000000 )) # 转换为毫秒 | |
| 415 | +echo "Command took $duration milliseconds." | |
| 416 | + | |
| 417 | + | ... | ... |
| ... | ... | @@ -0,0 +1,35 @@ |
| 1 | +#!/bin/bash | |
| 2 | +start=$(date +%s%N) # 开始时间,纳秒级 | |
| 3 | + | |
| 4 | +time curl -X POST "http://localhost:6007/rerank" \ | |
| 5 | + -H "Content-Type: application/json" \ | |
| 6 | + -d '{ | |
| 7 | + "query": "健身女生T恤短袖", | |
| 8 | + "docs": [ "60 Jelly Bracelets 80 s Adult Size - MAQIHAN Neon Gummy Bracelets for Women 80s Jelly Bangles Glow Silicone Bands Jewelry Wristband Rainbow Jellies Bangle Girls Boys Colored Accessories Party Favor", | |
| 9 | +"FITORY Mens Sandals", | |
| 10 | +"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", | |
| 11 | +"Merrell Mens Hydro Moc", | |
| 12 | +"FITORY Mens Sandals", | |
| 13 | +"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", | |
| 14 | +"Merrell Mens Hydro Moc", | |
| 15 | + | |
| 16 | +"FITORY Mens Sandals", | |
| 17 | +"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", | |
| 18 | +"Merrell Mens Hydro Moc", | |
| 19 | + | |
| 20 | + | |
| 21 | +"FITORY Mens Sandals", | |
| 22 | +"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", | |
| 23 | +"Merrell Mens Hydro Moc", | |
| 24 | +Superband Mermaid Tails for Swimming for Women and Adults Without Monofin", | |
| 25 | +"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops" | |
| 26 | + ], | |
| 27 | + "top_n":386, | |
| 28 | + "normalize": true | |
| 29 | + }' | |
| 30 | + | |
| 31 | +end=$(date +%s%N) # 结束时间,纳秒级 | |
| 32 | +duration=$(( (end - start) / 1000000 )) # 转换为毫秒 | |
| 33 | +echo "Command took $duration milliseconds." | |
| 34 | + | |
| 35 | + | ... | ... |
config/config.yaml
| ... | ... | @@ -131,7 +131,7 @@ function_score: |
| 131 | 131 | # 重排配置(provider/URL 在 services.rerank) |
| 132 | 132 | rerank: |
| 133 | 133 | enabled: true |
| 134 | - rerank_window: 384 | |
| 134 | + rerank_window: 400 | |
| 135 | 135 | timeout_sec: 15.0 |
| 136 | 136 | weight_es: 0.4 |
| 137 | 137 | weight_ai: 0.6 |
| ... | ... | @@ -282,29 +282,32 @@ services: |
| 282 | 282 | device: null |
| 283 | 283 | use_fp16: true |
| 284 | 284 | batch_size: 64 |
| 285 | - max_length: 512 | |
| 285 | + max_length: 160 | |
| 286 | 286 | cache_dir: "./model_cache" |
| 287 | 287 | enable_warmup: true |
| 288 | 288 | qwen3_vllm: |
| 289 | 289 | model_name: "Qwen/Qwen3-Reranker-0.6B" |
| 290 | 290 | engine: "vllm" |
| 291 | - max_model_len: 256 | |
| 291 | + max_model_len: 160 | |
| 292 | 292 | tensor_parallel_size: 1 |
| 293 | 293 | gpu_memory_utilization: 0.36 |
| 294 | 294 | dtype: "float16" |
| 295 | 295 | enable_prefix_caching: true |
| 296 | 296 | enforce_eager: false |
| 297 | - infer_batch_size: 64 | |
| 297 | + infer_batch_size: 100 | |
| 298 | 298 | sort_by_doc_length: true |
| 299 | - length_sort_mode: "char" # char | token | |
| 300 | - instruction: "rank products by given query" | |
| 299 | + # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点 | |
| 300 | + instruction: "rank products by given query" | |
| 301 | + # instruction: "Given a query, score the product for relevance" | |
| 301 | 302 | qwen3_transformers: |
| 302 | 303 | model_name: "Qwen/Qwen3-Reranker-0.6B" |
| 303 | 304 | instruction: "rank products by given query" |
| 305 | + # instruction: "Score the product’s relevance to the given query" | |
| 304 | 306 | max_length: 8192 |
| 305 | 307 | batch_size: 64 |
| 306 | 308 | use_fp16: true |
| 307 | - attn_implementation: "flash_attention_2" | |
| 309 | + # sdpa:默认无需 flash-attn;若已安装 flash_attn 可改为 flash_attention_2 | |
| 310 | + attn_implementation: "sdpa" | |
| 308 | 311 | dashscope_rerank: |
| 309 | 312 | model_name: "qwen3-rerank" |
| 310 | 313 | # 按地域选择 endpoint: | ... | ... |
docs/DEVELOPER_GUIDE.md
| ... | ... | @@ -360,7 +360,7 @@ services: |
| 360 | 360 | ### 7.6 新增后端清单(以 Qwen3-Reranker 为例) |
| 361 | 361 | |
| 362 | 362 | 1. **实现协议**:在 `reranker/backends/qwen3_vllm.py` 中实现类,提供 `score_with_meta(query, docs, normalize) -> (scores, meta)`,输出与 docs 等长且顺序一致。 |
| 363 | -2. **配置**:在 `config/config.yaml` 的 `services.rerank.backends` 下增加 `qwen3_vllm` 块(model_name、engine、max_model_len、gpu_memory_utilization、`infer_batch_size`、`sort_by_doc_length`、`length_sort_mode` 等);支持环境变量 `RERANK_BACKEND=qwen3_vllm`。 | |
| 363 | +2. **配置**:在 `config/config.yaml` 的 `services.rerank.backends` 下增加 `qwen3_vllm` 块(model_name、engine、max_model_len、gpu_memory_utilization、`infer_batch_size`、`sort_by_doc_length`等);支持环境变量 `RERANK_BACKEND=qwen3_vllm`。 | |
| 364 | 364 | 3. **注册**:在 `reranker/backends/__init__.py` 的 `get_rerank_backend(name, config)` 中增加 `qwen3_vllm` 分支。 |
| 365 | 365 | 4. **服务启动**:`reranker/server.py` 启动时根据配置调用 `get_rerank_backend(backend_name, backend_cfg)` 得到实例。 |
| 366 | 366 | 5. **调用方**:无需修改;仅部署时启动使用新后端的 reranker 服务即可。 | ... | ... |
docs/TODO.txt
| 1 | 1 | |
| 2 | 2 | |
| 3 | +@reranker/backends/qwen3_vllm.py 单次 generate 前有进程内锁,同一进程里不会并行多路 vLLM 推理,这个锁有必要吗?是否会影响性能?是否能够打开,使得性能更好?比如这个场景,我一次请求 400 条,分成每64个一个batch,基于我现在的gpu配置,可以再提高并发度吗? | |
| 4 | +测试了,让每个批次都并发地进行,耗时没有变化 | |
| 5 | + | |
| 6 | +增加款式意图识别模块 | |
| 7 | + | |
| 8 | +意图类型: 颜色,尺寸(目前只需要支持这两种) | |
| 9 | + | |
| 10 | +意图召回层: | |
| 11 | +每种意图,有一个召回词集合 | |
| 12 | +对query(包括原始query、各种翻译query 都做匹配) | |
| 13 | + | |
| 14 | +意图识别层: | |
| 15 | +如果召回 判断有款式需求, | |
| 16 | + | |
| 17 | + | |
| 18 | +是否有: | |
| 19 | +颜色需求 | |
| 20 | +尺码需求 | |
| 21 | +如果有: 先做sku筛选,然后把最优的拼接到名称中,参与reranker。 | |
| 22 | + | |
| 23 | + | |
| 24 | +现在在reranker、分页之后、做填充的时候,已经有做sku的筛选。 | |
| 25 | +需要优化: | |
| 26 | +现在是,先做包含的判断,找到第一个 option_value被query包含的,则直接认为匹配。改为 | |
| 27 | +1. 第一轮:遍历完,如果有且仅有一个才这样。 | |
| 28 | +2. 第二轮:如果有多个,跳到3。如果没有,对每个词都走泛化词表进行匹配。 | |
| 29 | +3. 第三轮:如果有多个,那么对这多个,走embedding相关性取最高的。如果一个也没有,则对所有的走embedding相关性取最高的 | |
| 30 | +这个sku筛选也需要提取为一个独立的模块 | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | +2026-03-21 10:29:23,698 - elastic_transport.transport - INFO - POST http://localhost:9200/search_products_tenant_163/_search?include_named_queries_score=false [status:200 duration:0.009s] | |
| 35 | +2026-03-21 10:29:23,700 - request_context - INFO - 分页详情回填 | ids=20 | filled=20 | took=7ms | |
| 36 | +2026-03-21 10:29:23,700 - request_context - INFO - 重排分页切片 | from=20, size=20, 返回=20条 | |
| 37 | +2026-03-21 10:29:23,720 - embeddings.text_encoder - ERROR - TextEmbeddingEncoder service request failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 38 | +Traceback (most recent call last): | |
| 39 | + File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service | |
| 40 | + response.raise_for_status() | |
| 41 | + File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status | |
| 42 | + raise HTTPError(http_error_msg, response=self) | |
| 43 | +requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 44 | +2026-03-21 10:29:23,720 - search.searcher - WARNING - Failed to encode SKU option1 values for final-page sorting: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 45 | +Traceback (most recent call last): | |
| 46 | + File "/data/saas-search/search/searcher.py", line 448, in _apply_sku_sorting_for_page_hits | |
| 47 | + encoded_option_vectors = text_encoder.encode(option1_values_to_encode, priority=1) | |
| 48 | + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| 49 | + File "/data/saas-search/embeddings/text_encoder.py", line 112, in encode | |
| 50 | + response_data = self._call_service( | |
| 51 | + ^^^^^^^^^^^^^^^^^^^ | |
| 52 | + File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service | |
| 53 | + response.raise_for_status() | |
| 54 | + File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status | |
| 55 | + raise HTTPError(http_error_msg, response=self) | |
| 56 | +requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 57 | +2026-03-21 10:29:23,721 - request_context - WARNING - SKU option embedding failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 58 | + | |
| 59 | + | |
| 60 | + | |
| 3 | 61 | |
| 4 | 62 | 先阅读文本embedding相关的代码: |
| 5 | 63 | @embeddings/README.md @embeddings/server.py @docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md @embeddings/text_encoder.py |
| ... | ... | @@ -246,12 +304,20 @@ config/environments/<env>.yaml |
| 246 | 304 | |
| 247 | 305 | |
| 248 | 306 | |
| 307 | +属性的筛选: | |
| 308 | +训练一个bert/transformer多分类模型,分类: 颜色、尺寸、材质 等等。但是要注意一些属性的值不规范、非常多,要考虑 是不是做规范化,如何规范化。 | |
| 309 | + | |
| 310 | + | |
| 311 | + | |
| 249 | 312 | |
| 250 | 313 | 无结果重查 |
| 251 | 314 | 稀有语言,翻译可能超时(因为zh-en互译之外的翻译耗时更长) |
| 252 | 315 | |
| 253 | 316 | |
| 254 | 317 | |
| 318 | + | |
| 319 | + | |
| 320 | + | |
| 255 | 321 | 检索相关性优化: |
| 256 | 322 | 原始搜索词和翻译的词,都需要有对应的主干分析 |
| 257 | 323 | 这个主干可以根据词性简单提取名词即可 | ... | ... |
docs/suggestion索引构建.md
| ... | ... | @@ -96,14 +96,15 @@ |
| 96 | 96 | "lang": { "type": "keyword" }, |
| 97 | 97 | "text": { "type": "keyword" }, // 显示给用户的原始文案 |
| 98 | 98 | "text_norm": { "type": "keyword" }, // 归一化后文本:小写+空白规整 |
| 99 | - "sources": { "type": "keyword" }, // 来源集合:["title", "qanchor", "query_log"] | |
| 99 | + "sources": { "type": "keyword" }, // 来源集合:["title", "qanchor", "tag", "query_log"] | |
| 100 | 100 | "title_doc_count": { "type": "integer" }, |
| 101 | 101 | "qanchor_doc_count": { "type": "integer" }, |
| 102 | + "tag_doc_count": { "type": "integer" }, | |
| 102 | 103 | "query_count_7d": { "type": "integer" }, |
| 103 | 104 | "query_count_30d": { "type": "integer" }, |
| 104 | 105 | "rank_score": { "type": "float" }, // 排序打分 |
| 105 | 106 | "lang_confidence": { "type": "float" }, |
| 106 | - "lang_source": { "type": "keyword" }, // 语言来源:log_field / request_params / script / default | |
| 107 | + "lang_source": { "type": "keyword" }, // 语言来源:log_field / request_params / detector / fallback / default | |
| 107 | 108 | "lang_conflict": { "type": "boolean" }, // 是否存在多来源语言冲突 |
| 108 | 109 | "status": { "type": "byte" }, // 1 = 有效 |
| 109 | 110 | "updated_at": { "type": "date" }, |
| ... | ... | @@ -166,9 +167,9 @@ |
| 166 | 167 | |
| 167 | 168 | #### 4. 构建候选词 |
| 168 | 169 | |
| 169 | -##### 4.1 从商品索引收集 title / qanchors(Step 1) | |
| 170 | +##### 4.1 从商品索引收集 title / qanchors / tags(Step 1) | |
| 170 | 171 | |
| 171 | - - 遍历店铺的所有商品:获取每个商品的 `"spu_id"`, `"title"`, `"qanchors"` 3个字段(按`spu_id`升序) | |
| 172 | + - 遍历店铺的所有商品:获取每个商品的 `"spu_id"`, `"title"`, `"qanchors"`, `"tags"`(按 `spu_id`、`id.keyword` 升序,便于 `search_after` 稳定分页) | |
| 172 | 173 | |
| 173 | 174 | - 对每个商品文档: |
| 174 | 175 | |
| ... | ... | @@ -216,6 +217,11 @@ |
| 216 | 217 | - `text_norm = _normalize_text(q_text)`,再用 `_looks_noise` 过滤 |
| 217 | 218 | - 同样按 `(lang, text_norm)` 合并为 `SuggestionCandidate`,调用 `add_product("qanchor", spu_id=product_id)`。 |
| 218 | 219 | |
| 220 | + 4. **tags 处理**(与 `index_languages` 循环并列,每个商品只做一次): | |
| 221 | + - `tags` 可为字符串数组,或逗号等分隔的单个字符串;经 `_iter_product_tags` 展开为若干条。 | |
| 222 | + - 每条 tag **无语言字段**:使用 `query.query_parser.detect_text_language_for_suggestions`(与 `QueryParser` 相同的 `LanguageDetector`)判定语言,并约束在租户的 `index_languages` 内。 | |
| 223 | + - 通过 `_looks_noise` 后按 `(detected_lang, text_norm)` 合并,调用 `add_product("tag", spu_id=product_id)`。 | |
| 224 | + | |
| 219 | 225 | ##### 4.2 从查询日志收集用户 query(Step 2) |
| 220 | 226 | |
| 221 | 227 | 对应 `_iter_query_log_rows` 与 `_build_full_candidates` 的后半段。 |
| ... | ... | @@ -284,6 +290,7 @@ |
| 284 | 290 | 1.8 \cdot \log(1 + query\_count\_{30d}) + |
| 285 | 291 | 1.2 \cdot \log(1 + query\_count\_{7d}) + |
| 286 | 292 | 1.0 \cdot \log(1 + qanchor\_doc\_count) + |
| 293 | + 0.85 \cdot \log(1 + tag\_doc\_count) + | |
| 287 | 294 | 0.6 \cdot \log(1 + title\_doc\_count) |
| 288 | 295 | \] |
| 289 | 296 | |
| ... | ... | @@ -423,6 +430,7 @@ |
| 423 | 430 | - `sources = ["query_log"]` |
| 424 | 431 | - `title_doc_count = 0` |
| 425 | 432 | - `qanchor_doc_count = 0` |
| 433 | + - `tag_doc_count = 0` | |
| 426 | 434 | - `completion.<lang>.input = [text]` |
| 427 | 435 | - `completion.<lang>.weight = int(max(rank_score, 1.0) * 100)` |
| 428 | 436 | - `sat.<lang> = text` |
| ... | ... | @@ -446,7 +454,7 @@ |
| 446 | 454 | - 将 `"query_log"` 加入 `sources` |
| 447 | 455 | - `lang_conflict` 与 `params.lang_conflict` 取或 |
| 448 | 456 | - 若 `params.lang_confidence > ctx._source.lang_confidence` 则更新 `lang_confidence` 和 `lang_source` |
| 449 | - - 基于更新后的 `query_count_7d/30d` + `qanchor_doc_count` + `title_doc_count` 重新计算 `rank_score` | |
| 457 | + - 基于更新后的 `query_count_7d/30d` + `qanchor_doc_count` + `tag_doc_count` + `title_doc_count` 重新计算 `rank_score` | |
| 450 | 458 | - `status = 1` |
| 451 | 459 | - `updated_at = params.now_iso` |
| 452 | 460 | - 同步更新 `text / lang / text_norm` |
| ... | ... | @@ -502,3 +510,9 @@ |
| 502 | 510 | - 若是 `"zh_tw"` / `"pt_br"` → 保留全量 |
| 503 | 511 | - 其他 → 取 `_` 前缀(例如 `"en_US"` → `"en"`) |
| 504 | 512 | |
| 513 | +#### 4. 查询日志 / tag 的语言回退 `_resolve_query_language` 与 `detect_text_language_for_suggestions` | |
| 514 | + | |
| 515 | +- 日志语言优先级不变:`language` 字段 → `request_params.language` → **语言检测**。 | |
| 516 | +- 检测实现为 `query.query_parser.detect_text_language_for_suggestions`:内部使用与 `QueryParser` 相同的 `LanguageDetector`(`query/language_detector.py`),并将结果约束到租户 `index_languages`(含 `zh_tw` 等与检测码的 base 匹配)。 | |
| 517 | +- 在线联想:`SuggestionService` 在合并 completion 与 SAT 结果后,按 `ES_score × (1 / sqrt(词元数))` 排序(词元算法与 `simple_tokenize_query` 一致),再以 `rank_score` 作次要键,减轻长标题/长短语相对短词根的压制不足问题。 | |
| 518 | + | ... | ... |
docs/工作总结-微服务性能优化与架构.md
| ... | ... | @@ -41,7 +41,7 @@ |
| 41 | 41 | - **精度**:`dtype: "float16"`,降低显存与计算量。 |
| 42 | 42 | - **Prefix Caching**:`enable_prefix_caching: true`,对重复前缀(如相同 query)做缓存,减少重复计算。 |
| 43 | 43 | - **CUDA 图**:`enforce_eager: false`(默认),利用 vLLM 的 CUDA graph 降低 kernel 启动开销。 |
| 44 | -- **按文档长度分批**:`sort_by_doc_length: true`,请求内先按文档长度排序再按 `infer_batch_size` 分批推理,减少 padding 浪费;`length_sort_mode: "char"`(更快,短文本推荐)或 `"token"`(更精确)。 | |
| 44 | +- **按文档长度分批**:`sort_by_doc_length: true`,请求内先按文档长度排序再按 `infer_batch_size` 分批推理,减少 padding 浪费。 | |
| 45 | 45 | - **参数搜索结论**:在 T4、1000-doc 口径下对 `infer_batch_size` 做了 24/32/48/64 对比;**单请求延迟(c=1)** 上 `infer_batch_size=64` 最优,故当前默认 `infer_batch_size: 64`;`max_model_len: 256` 满足 query+doc 短文本场景;`gpu_memory_utilization: 0.36` 与 T4 16GB 匹配。 |
| 46 | 46 | |
| 47 | 47 | **具体配置**(`config/config.yaml` → `services.rerank.backends.qwen3_vllm`): |
| ... | ... | @@ -56,7 +56,6 @@ enable_prefix_caching: true |
| 56 | 56 | enforce_eager: false |
| 57 | 57 | infer_batch_size: 64 |
| 58 | 58 | sort_by_doc_length: true |
| 59 | -length_sort_mode: "char" | |
| 60 | 59 | instruction: "Given a shopping query, rank product titles by relevance" |
| 61 | 60 | ``` |
| 62 | 61 | 环境变量覆盖:`RERANK_BACKEND`、`RERANKER_SERVICE_URL`、`RERANK_VLLM_INFER_BATCH_SIZE`、`RERANK_VLLM_SORT_BY_DOC_LENGTH` 等。启停:`./scripts/service_ctl.sh start reranker`,健康:`curl -sS http://127.0.0.1:6007/health`。 | ... | ... |
docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md
| ... | ... | @@ -161,7 +161,7 @@ curl "http://localhost:6008/ready" |
| 161 | 161 | |
| 162 | 162 | 说明:默认后端为 `qwen3_vllm`(`Qwen/Qwen3-Reranker-0.6B`),需要可用 GPU 显存。 |
| 163 | 163 | |
| 164 | -补充:`docs` 的请求大小与模型推理 `batch size` 解耦。即使一次传入 1000 条文档,服务端也会按 `services.rerank.backends.qwen3_vllm.infer_batch_size` 自动拆分;若 `sort_by_doc_length=true`,会先按文档长度排序后分批,减少 padding,再按原输入顺序返回分数。`length_sort_mode` 可选 `char`(更快)或 `token`(更精确)。 | |
| 164 | +补充:`docs` 的请求大小与模型推理 `batch size` 解耦。即使一次传入 1000 条文档,服务端也会按 `services.rerank.backends.qwen3_vllm.infer_batch_size` 自动拆分。 | |
| 165 | 165 | |
| 166 | 166 | #### 7.2.1 `POST /rerank` — 结果重排 |
| 167 | 167 | ... | ... |
docs/相关性检索优化说明.md
| ... | ... | @@ -267,3 +267,17 @@ python ./scripts/eval_search_quality.py |
| 267 | 267 | 3. 源语种不在索引语言,翻译全部失败(验证多目标 fallback) |
| 268 | 268 | 4. 自定义 `original_query_fallback_boost_when_translation_missing` 生效 |
| 269 | 269 | 5. 非 `zh/en` 语种字段动态拼接(如 `de/fr/es`) |
| 270 | + | |
| 271 | + | |
| 272 | + | |
| 273 | +## reranker方面: | |
| 274 | +BAAI/bge-reranker-v2-m3的一个严重badcase: | |
| 275 | +q=黑色中长半身裙 | |
| 276 | + | |
| 277 | +Rerank score: 0.0785 | |
| 278 | +title.zh: 2026款韩版高腰显瘦雪尼尔包臀裙灯芯绒开叉中长款咖啡色半身裙女 | |
| 279 | +title.en: 2026 Korean-style High-waisted Slimming Corduroy Skirt with Slit, Mid-Length Coffee-colored Skirt for Women | |
| 280 | + | |
| 281 | +Rerank score: 0.9643 | |
| 282 | +title.en: Black Half-high Collar Base Shirt Women's Autumn and Winter fleece-lined Contrast Color Pure Desire Design Sense Horn Sleeve Ruffled Inner Top | |
| 283 | +title.zh: 黑色高领半高领女士秋冬内搭加绒拼色纯欲设计荷叶边袖内衬上衣 | ... | ... |
mappings/search_suggestions.json
| ... | ... | @@ -27,6 +27,7 @@ |
| 27 | 27 | "sources": { "type": "keyword" }, |
| 28 | 28 | "title_doc_count": { "type": "integer" }, |
| 29 | 29 | "qanchor_doc_count": { "type": "integer" }, |
| 30 | + "tag_doc_count": { "type": "integer" }, | |
| 30 | 31 | "query_count_7d": { "type": "integer" }, |
| 31 | 32 | "query_count_30d": { "type": "integer" }, |
| 32 | 33 | "rank_score": { "type": "float" }, | ... | ... |
query/query_parser.py
| ... | ... | @@ -4,7 +4,7 @@ Query parser - main module for query processing. |
| 4 | 4 | Handles query rewriting, translation, and embedding generation. |
| 5 | 5 | """ |
| 6 | 6 | |
| 7 | -from typing import Dict, List, Optional, Any, Union | |
| 7 | +from typing import Dict, List, Optional, Any, Union, Tuple | |
| 8 | 8 | import numpy as np |
| 9 | 9 | import logging |
| 10 | 10 | import re |
| ... | ... | @@ -23,6 +23,20 @@ try: |
| 23 | 23 | except Exception: # pragma: no cover |
| 24 | 24 | hanlp = None |
| 25 | 25 | |
| 26 | + | |
| 27 | +def simple_tokenize_query(text: str) -> List[str]: | |
| 28 | + """ | |
| 29 | + Lightweight tokenizer for suggestion length / analysis (aligned with QueryParser fallback). | |
| 30 | + | |
| 31 | + - Consecutive CJK characters form one token | |
| 32 | + - Latin / digit runs (with internal hyphens) form tokens | |
| 33 | + """ | |
| 34 | + if not text: | |
| 35 | + return [] | |
| 36 | + pattern = re.compile(r"[\u4e00-\u9fff]+|[A-Za-z0-9_]+(?:-[A-Za-z0-9_]+)*") | |
| 37 | + return pattern.findall(text) | |
| 38 | + | |
| 39 | + | |
| 26 | 40 | class ParsedQuery: |
| 27 | 41 | """Container for parsed query results.""" |
| 28 | 42 | |
| ... | ... | @@ -173,16 +187,7 @@ class QueryParser: |
| 173 | 187 | return config.query_config.default_translation_model |
| 174 | 188 | |
| 175 | 189 | def _simple_tokenize(self, text: str) -> List[str]: |
| 176 | - """ | |
| 177 | - Lightweight tokenizer fallback. | |
| 178 | - | |
| 179 | - - Groups consecutive CJK chars as a token | |
| 180 | - - Groups consecutive latin/digits/underscore/dash as a token | |
| 181 | - """ | |
| 182 | - if not text: | |
| 183 | - return [] | |
| 184 | - pattern = re.compile(r"[\u4e00-\u9fff]+|[A-Za-z0-9_]+(?:-[A-Za-z0-9_]+)*") | |
| 185 | - return pattern.findall(text) | |
| 190 | + return simple_tokenize_query(text) | |
| 186 | 191 | |
| 187 | 192 | def _extract_keywords(self, query: str) -> str: |
| 188 | 193 | """Extract keywords (nouns with length > 1) from query.""" |
| ... | ... | @@ -636,3 +641,63 @@ class QueryParser: |
| 636 | 641 | queries.append(translation) |
| 637 | 642 | |
| 638 | 643 | return queries |
| 644 | + | |
| 645 | + | |
| 646 | +def detect_text_language_for_suggestions( | |
| 647 | + text: str, | |
| 648 | + *, | |
| 649 | + index_languages: Optional[List[str]] = None, | |
| 650 | + primary_language: str = "en", | |
| 651 | +) -> Tuple[str, float, str]: | |
| 652 | + """ | |
| 653 | + Language detection for short strings (mixed-language tags, query-log fallback). | |
| 654 | + | |
| 655 | + Uses the same ``LanguageDetector`` as :class:`QueryParser`. Returns a language | |
| 656 | + code present in ``index_languages`` when possible, otherwise the tenant primary. | |
| 657 | + | |
| 658 | + Returns: | |
| 659 | + (lang, confidence, source) where source is ``detector``, ``fallback``, or ``default``. | |
| 660 | + """ | |
| 661 | + langs_list = [x for x in (index_languages or []) if x] | |
| 662 | + langs_set = set(langs_list) | |
| 663 | + | |
| 664 | + def _norm_lang(raw: Optional[str]) -> Optional[str]: | |
| 665 | + if not raw: | |
| 666 | + return None | |
| 667 | + token = str(raw).strip().lower().replace("-", "_") | |
| 668 | + if not token: | |
| 669 | + return None | |
| 670 | + if token in {"zh_tw", "pt_br"}: | |
| 671 | + return token | |
| 672 | + return token.split("_")[0] | |
| 673 | + | |
| 674 | + primary = _norm_lang(primary_language) or "en" | |
| 675 | + if primary not in langs_set and langs_list: | |
| 676 | + primary = _norm_lang(langs_list[0]) or langs_list[0] | |
| 677 | + | |
| 678 | + if not text or not str(text).strip(): | |
| 679 | + return primary, 0.0, "default" | |
| 680 | + | |
| 681 | + raw_code = LanguageDetector().detect(str(text).strip()) | |
| 682 | + if not raw_code or raw_code == "unknown": | |
| 683 | + return primary, 0.35, "default" | |
| 684 | + | |
| 685 | + def _index_lang_base(cand: str) -> str: | |
| 686 | + t = str(cand).strip().lower().replace("-", "_") | |
| 687 | + return t.split("_")[0] if t else "" | |
| 688 | + | |
| 689 | + def _resolve_index_lang(code: str) -> Optional[str]: | |
| 690 | + if code in langs_set: | |
| 691 | + return code | |
| 692 | + for cand in langs_list: | |
| 693 | + if _index_lang_base(cand) == code: | |
| 694 | + return cand | |
| 695 | + return None | |
| 696 | + | |
| 697 | + if langs_list: | |
| 698 | + resolved = _resolve_index_lang(raw_code) | |
| 699 | + if resolved is None: | |
| 700 | + return primary, 0.5, "fallback" | |
| 701 | + return resolved, 0.92, "detector" | |
| 702 | + | |
| 703 | + return raw_code, 0.92, "detector" | ... | ... |
reranker/DEPLOYMENT_AND_TUNING.md
| ... | ... | @@ -93,19 +93,14 @@ curl -sS http://127.0.0.1:6007/health |
| 93 | 93 | |
| 94 | 94 | - 调用方一次可传入 1000 docs(业务需求) |
| 95 | 95 | - 服务端按 `infer_batch_size` 自动拆批推理(模型效率需求) |
| 96 | - | |
| 97 | -### 4.2 先排序再分批,降低 padding 浪费 | |
| 98 | - | |
| 99 | 96 | - `sort_by_doc_length: true`:按长度排序后再分批 |
| 100 | -- `length_sort_mode: "char"`:短文本场景下开销更低,默认推荐 | |
| 101 | -- `length_sort_mode: "token"`:长度估计更精确,但有额外 tokenizer 开销 | |
| 102 | 97 | |
| 103 | -### 4.3 全局去重后回填 | |
| 98 | +### 4.2 全局去重后回填 | |
| 104 | 99 | |
| 105 | 100 | - 对 docs 进行全局去重(非“仅相邻去重”) |
| 106 | 101 | - 推理后按原请求顺序回填 scores,保证接口契约稳定 |
| 107 | 102 | |
| 108 | -### 4.4 启动稳定性修复 | |
| 103 | +### 4.3 启动稳定性修复 | |
| 109 | 104 | |
| 110 | 105 | - `service_ctl.sh` 对 reranker 使用独立启动路径 |
| 111 | 106 | - 增加“稳定健康检查”(连续健康探测)避免“刚 healthy 即退出”的假阳性 |
| ... | ... | @@ -159,7 +154,7 @@ curl -sS http://127.0.0.1:6007/health |
| 159 | 154 | |
| 160 | 155 | ## 7. 生产建议 |
| 161 | 156 | |
| 162 | -- 默认保持:`infer_batch_size: 64`、`sort_by_doc_length: true`、`length_sort_mode: "char"` | |
| 157 | +- 默认保持:`infer_batch_size: 64`、`sort_by_doc_length: true` | |
| 163 | 158 | - 满足以下条件时可考虑提高到 `96`:业务以吞吐优先、可接受更高单请求延迟、已通过同机同数据压测验证收益 |
| 164 | 159 | - 每次改动后都必须复跑 `benchmark_reranker_1000docs.sh` 并归档结果 |
| 165 | 160 | ... | ... |
reranker/README.md
| ... | ... | @@ -24,7 +24,7 @@ Reranker 服务提供统一的 `/rerank` API,支持可插拔后端(BGE、Qwe |
| 24 | 24 | - `reranker/config.py`:服务端口、MAX_DOCS、NORMALIZE 等(后端参数在 config.yaml) |
| 25 | 25 | |
| 26 | 26 | ## 依赖 |
| 27 | -- 通用:`torch`、`modelscope`、`fastapi`、`uvicorn`(见项目 `requirements.txt` / `requirements_ml.txt`) | |
| 27 | +- 通用:`torch`、`transformers`、`fastapi`、`uvicorn`(隔离环境见 `requirements_reranker_service.txt`;全量 ML 环境另见 `requirements_ml.txt`) | |
| 28 | 28 | - **Qwen3-vLLM 后端**:`vllm>=0.8.5`、`transformers>=4.51.0`(仅当使用 `backend: qwen3_vllm` 时需 vLLM) |
| 29 | 29 | - **Qwen3-Transformers 后端**:`transformers>=4.51.0`、`torch`(无需 vLLM,适合 CPU 或小显存) |
| 30 | 30 | ```bash |
| ... | ... | @@ -53,7 +53,6 @@ services: |
| 53 | 53 | max_model_len: 256 |
| 54 | 54 | infer_batch_size: 64 |
| 55 | 55 | sort_by_doc_length: true |
| 56 | - length_sort_mode: "char" # char | token | |
| 57 | 56 | enable_prefix_caching: true |
| 58 | 57 | enforce_eager: false |
| 59 | 58 | instruction: "Given a shopping query, rank product titles by relevance" |
| ... | ... | @@ -157,7 +156,7 @@ uvicorn reranker.server:app --host 0.0.0.0 --port 6007 --log-level info |
| 157 | 156 | ## Notes |
| 158 | 157 | - 无请求级缓存;输入按字符串去重后推理,再按原始顺序回填分数。 |
| 159 | 158 | - 空或 null 的 doc 跳过并计为 0。 |
| 160 | -- **Qwen3-vLLM 分批策略**:`docs` 请求体可为 1000+,服务端会按 `infer_batch_size` 拆分;当 `sort_by_doc_length=true` 时,会先按文档长度排序后分批,减少 padding 开销,最终再按输入顺序回填分数。`length_sort_mode` 支持 `char`(默认,更快)与 `token`(更精确)。 | |
| 159 | +- **Qwen3-vLLM 分批策略**:`docs` 请求体可为 1000+,服务端会按 `infer_batch_size` 拆分;当 `sort_by_doc_length=true` 时,会先按文档长度排序后分批,减少 padding 开销,最终再按输入顺序回填分数。 | |
| 161 | 160 | - 运行时可用环境变量临时覆盖批量参数:`RERANK_VLLM_INFER_BATCH_SIZE`、`RERANK_VLLM_SORT_BY_DOC_LENGTH`。 |
| 162 | 161 | - **Qwen3-vLLM**:参考 [Qwen3-Reranker-0.6B](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B),需 GPU 与较多显存;与 BGE 相比适合长文本、高吞吐场景(vLLM 前缀缓存)。 |
| 163 | -- **Qwen3-Transformers**:官方 Transformers Usage 方式,无需 vLLM;适合 CPU 或小显存,可选 `attn_implementation: "flash_attention_2"` 加速。 | |
| 162 | +- **Qwen3-Transformers**:官方 Transformers Usage 方式,无需 vLLM;适合 CPU 或小显存。默认 `attn_implementation: "sdpa"`;若已安装 `flash_attn` 可设 `flash_attention_2`(未安装时服务会自动回退到 sdpa)。 | ... | ... |
reranker/backends/qwen3_vllm.py
| ... | ... | @@ -76,7 +76,7 @@ class Qwen3VLLMRerankerBackend: |
| 76 | 76 | dtype = str(self._config.get("dtype", "float16")).strip().lower() |
| 77 | 77 | self._instruction = str( |
| 78 | 78 | self._config.get("instruction") |
| 79 | - or "Given a shopping query, rank product titles by relevance" | |
| 79 | + or "Given a query, score the product for relevance" | |
| 80 | 80 | ) |
| 81 | 81 | infer_batch_size = os.getenv("RERANK_VLLM_INFER_BATCH_SIZE") or self._config.get("infer_batch_size", 64) |
| 82 | 82 | sort_by_doc_length = os.getenv("RERANK_VLLM_SORT_BY_DOC_LENGTH") | ... | ... |
reranker/bge_reranker.py
| ... | ... | @@ -15,7 +15,7 @@ import time |
| 15 | 15 | from typing import Any, Dict, List, Optional, Tuple |
| 16 | 16 | |
| 17 | 17 | import torch |
| 18 | -from modelscope import AutoModelForSequenceClassification, AutoTokenizer | |
| 18 | +from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| 19 | 19 | |
| 20 | 20 | logger = logging.getLogger("reranker.core") |
| 21 | 21 | |
| ... | ... | @@ -170,13 +170,14 @@ class BGEReranker: |
| 170 | 170 | output_scores[orig_idx] = float(unique_scores[unique_idx]) |
| 171 | 171 | |
| 172 | 172 | # Log per-doc scores (aligned to original docs order) |
| 173 | - try: | |
| 174 | - lines = [] | |
| 175 | - for i, d in enumerate(docs[:100]): | |
| 176 | - lines.append(f"{output_scores[i]},{'' if d is None else str(d)}") | |
| 177 | - logger.info("[BGE_RERANKER] query:%s Scores (score,doc):\n%s", query, "\n".join(lines)) | |
| 178 | - except Exception: | |
| 179 | - pass | |
| 173 | + if 0: | |
| 174 | + try: | |
| 175 | + lines = [] | |
| 176 | + for i, d in enumerate(docs[:100]): | |
| 177 | + lines.append(f"{output_scores[i]},{'' if d is None else str(d)}") | |
| 178 | + logger.info("[BGE_RERANKER] query:%s Scores (score,doc):\n%s", query, "\n".join(lines)) | |
| 179 | + except Exception: | |
| 180 | + pass | |
| 180 | 181 | |
| 181 | 182 | elapsed_ms = (time.time() - start_ts) * 1000.0 |
| 182 | 183 | dedup_ratio = 0.0 | ... | ... |
| ... | ... | @@ -0,0 +1,237 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +""" | |
| 3 | +Single-request rerank latency probe using real title lines (e.g. 1.8w export). | |
| 4 | + | |
| 5 | +Randomly samples N titles from a text file (one title per line), POSTs to the | |
| 6 | +rerank HTTP API, prints wall-clock latency. | |
| 7 | + | |
| 8 | +Supports multiple N values (comma-separated) and multiple repeats per N. | |
| 9 | + | |
| 10 | +Example: | |
| 11 | + source activate.sh | |
| 12 | + python scripts/benchmark_reranker_random_titles.py 386 | |
| 13 | + python scripts/benchmark_reranker_random_titles.py 40,80,100 | |
| 14 | + python scripts/benchmark_reranker_random_titles.py 40,80,100 --repeat 3 --seed 42 | |
| 15 | + RERANK_BASE=http://127.0.0.1:6007 python scripts/benchmark_reranker_random_titles.py 200 | |
| 16 | +""" | |
| 17 | + | |
| 18 | +from __future__ import annotations | |
| 19 | + | |
| 20 | +import argparse | |
| 21 | +import json | |
| 22 | +import os | |
| 23 | +import random | |
| 24 | +import statistics | |
| 25 | +import sys | |
| 26 | +import time | |
| 27 | +from pathlib import Path | |
| 28 | +from typing import List, Optional, Tuple | |
| 29 | + | |
| 30 | +import httpx | |
| 31 | + | |
| 32 | + | |
| 33 | +def _load_titles(path: Path) -> List[str]: | |
| 34 | + lines: List[str] = [] | |
| 35 | + with path.open(encoding="utf-8", errors="replace") as f: | |
| 36 | + for line in f: | |
| 37 | + s = line.strip() | |
| 38 | + if s: | |
| 39 | + lines.append(s) | |
| 40 | + return lines | |
| 41 | + | |
| 42 | + | |
| 43 | +def _parse_doc_counts(s: str) -> List[int]: | |
| 44 | + parts = [p.strip() for p in s.split(",") if p.strip()] | |
| 45 | + if not parts: | |
| 46 | + raise ValueError("empty doc-count list") | |
| 47 | + out: List[int] = [] | |
| 48 | + for p in parts: | |
| 49 | + v = int(p, 10) | |
| 50 | + if v <= 0: | |
| 51 | + raise ValueError(f"doc count must be positive, got {v}") | |
| 52 | + out.append(v) | |
| 53 | + return out | |
| 54 | + | |
| 55 | + | |
| 56 | +def _do_rerank( | |
| 57 | + client: httpx.Client, | |
| 58 | + url: str, | |
| 59 | + query: str, | |
| 60 | + docs: List[str], | |
| 61 | + *, | |
| 62 | + top_n: int, | |
| 63 | + normalize: bool, | |
| 64 | +) -> Tuple[bool, int, float, Optional[int], str]: | |
| 65 | + payload: dict = {"query": query, "docs": docs, "normalize": normalize} | |
| 66 | + if top_n > 0: | |
| 67 | + payload["top_n"] = top_n | |
| 68 | + body = json.dumps(payload, ensure_ascii=False) | |
| 69 | + headers = {"Content-Type": "application/json"} | |
| 70 | + t0 = time.perf_counter() | |
| 71 | + try: | |
| 72 | + resp = client.post(url, content=body.encode("utf-8"), headers=headers) | |
| 73 | + except httpx.HTTPError: | |
| 74 | + raise | |
| 75 | + elapsed_ms = (time.perf_counter() - t0) * 1000.0 | |
| 76 | + text = resp.text or "" | |
| 77 | + ok = resp.status_code == 200 | |
| 78 | + scores_len: Optional[int] = None | |
| 79 | + if ok: | |
| 80 | + try: | |
| 81 | + data = resp.json() | |
| 82 | + sc = data.get("scores") | |
| 83 | + if isinstance(sc, list): | |
| 84 | + scores_len = len(sc) | |
| 85 | + except json.JSONDecodeError: | |
| 86 | + scores_len = None | |
| 87 | + return ok, resp.status_code, elapsed_ms, scores_len, text | |
| 88 | + | |
| 89 | + | |
| 90 | +def main() -> int: | |
| 91 | + parser = argparse.ArgumentParser( | |
| 92 | + description="POST /rerank with N random titles from a file and print latency." | |
| 93 | + ) | |
| 94 | + parser.add_argument( | |
| 95 | + "n", | |
| 96 | + type=str, | |
| 97 | + metavar="N[,N,...]", | |
| 98 | + help="Doc counts: one integer or comma-separated list, e.g. 40,80,100.", | |
| 99 | + ) | |
| 100 | + parser.add_argument( | |
| 101 | + "--repeat", | |
| 102 | + type=int, | |
| 103 | + default=3, | |
| 104 | + help="Number of runs per doc count (default: 3).", | |
| 105 | + ) | |
| 106 | + parser.add_argument( | |
| 107 | + "--titles-file", | |
| 108 | + type=Path, | |
| 109 | + default=Path(os.environ.get("RERANK_TITLE_FILE", "/home/ubuntu/rerank_test/titles.1.8w")), | |
| 110 | + help="Path to newline-separated titles (default: %(default)s or env RERANK_TITLE_FILE).", | |
| 111 | + ) | |
| 112 | + parser.add_argument( | |
| 113 | + "--url", | |
| 114 | + type=str, | |
| 115 | + default=os.environ.get("RERANK_BASE", "http://127.0.0.1:6007").rstrip("/") + "/rerank", | |
| 116 | + help="Full rerank URL (default: $RERANK_BASE/rerank or http://127.0.0.1:6007/rerank).", | |
| 117 | + ) | |
| 118 | + parser.add_argument( | |
| 119 | + "--query", | |
| 120 | + type=str, | |
| 121 | + default="健身女生T恤短袖", | |
| 122 | + help="Rerank query string.", | |
| 123 | + ) | |
| 124 | + parser.add_argument( | |
| 125 | + "--seed", | |
| 126 | + type=int, | |
| 127 | + default=None, | |
| 128 | + help="RNG base seed; each (n, run) uses a derived seed when set (optional).", | |
| 129 | + ) | |
| 130 | + parser.add_argument( | |
| 131 | + "--top-n", | |
| 132 | + type=int, | |
| 133 | + default=0, | |
| 134 | + help="If > 0, include top_n in JSON body (omit field when 0).", | |
| 135 | + ) | |
| 136 | + parser.add_argument( | |
| 137 | + "--no-normalize", | |
| 138 | + action="store_true", | |
| 139 | + help="Send normalize=false (default: normalize=true).", | |
| 140 | + ) | |
| 141 | + parser.add_argument( | |
| 142 | + "--timeout", | |
| 143 | + type=float, | |
| 144 | + default=float(os.environ.get("RERANK_TIMEOUT_SEC", "240")), | |
| 145 | + help="HTTP timeout seconds.", | |
| 146 | + ) | |
| 147 | + parser.add_argument( | |
| 148 | + "--print-body-preview", | |
| 149 | + action="store_true", | |
| 150 | + help="Print first ~500 chars of response body on success (last run only).", | |
| 151 | + ) | |
| 152 | + args = parser.parse_args() | |
| 153 | + | |
| 154 | + try: | |
| 155 | + doc_counts = _parse_doc_counts(args.n) | |
| 156 | + except ValueError as exc: | |
| 157 | + print(f"error: invalid N list {args.n!r}: {exc}", file=sys.stderr) | |
| 158 | + return 2 | |
| 159 | + | |
| 160 | + repeat = int(args.repeat) | |
| 161 | + if repeat <= 0: | |
| 162 | + print("error: --repeat must be positive", file=sys.stderr) | |
| 163 | + return 2 | |
| 164 | + | |
| 165 | + if not args.titles_file.is_file(): | |
| 166 | + print(f"error: titles file not found: {args.titles_file}", file=sys.stderr) | |
| 167 | + return 2 | |
| 168 | + | |
| 169 | + titles = _load_titles(args.titles_file) | |
| 170 | + max_n = max(doc_counts) | |
| 171 | + if len(titles) < max_n: | |
| 172 | + print( | |
| 173 | + f"error: file has only {len(titles)} non-empty lines, need at least {max_n}", | |
| 174 | + file=sys.stderr, | |
| 175 | + ) | |
| 176 | + return 2 | |
| 177 | + | |
| 178 | + top_n = int(args.top_n) | |
| 179 | + normalize = not args.no_normalize | |
| 180 | + any_fail = False | |
| 181 | + summary: dict[int, List[float]] = {n: [] for n in doc_counts} | |
| 182 | + | |
| 183 | + with httpx.Client(timeout=args.timeout) as client: | |
| 184 | + for n in doc_counts: | |
| 185 | + for run_idx in range(repeat): | |
| 186 | + if args.seed is not None: | |
| 187 | + random.seed(args.seed + n * 10_000 + run_idx) | |
| 188 | + docs = random.sample(titles, n) | |
| 189 | + try: | |
| 190 | + ok, status, elapsed_ms, scores_len, text = _do_rerank( | |
| 191 | + client, | |
| 192 | + args.url, | |
| 193 | + args.query, | |
| 194 | + docs, | |
| 195 | + top_n=top_n, | |
| 196 | + normalize=normalize, | |
| 197 | + ) | |
| 198 | + except httpx.HTTPError as exc: | |
| 199 | + print( | |
| 200 | + f"n={n} run={run_idx + 1}/{repeat} error: request failed: {exc}", | |
| 201 | + file=sys.stderr, | |
| 202 | + ) | |
| 203 | + any_fail = True | |
| 204 | + continue | |
| 205 | + | |
| 206 | + if ok: | |
| 207 | + summary[n].append(elapsed_ms) | |
| 208 | + else: | |
| 209 | + any_fail = True | |
| 210 | + | |
| 211 | + print( | |
| 212 | + f"n={n} run={run_idx + 1}/{repeat} status={status} " | |
| 213 | + f"latency_ms={elapsed_ms:.2f} scores={scores_len if scores_len is not None else 'n/a'}" | |
| 214 | + ) | |
| 215 | + if args.print_body_preview and text and run_idx == repeat - 1 and n == doc_counts[-1]: | |
| 216 | + preview = text[:500] + ("…" if len(text) > 500 else "") | |
| 217 | + print(preview) | |
| 218 | + | |
| 219 | + for n in doc_counts: | |
| 220 | + lat = summary[n] | |
| 221 | + if not lat: | |
| 222 | + print(f"summary n={n} runs=0 (all failed)") | |
| 223 | + continue | |
| 224 | + avg = statistics.mean(lat) | |
| 225 | + lo, hi = min(lat), max(lat) | |
| 226 | + extra = "" | |
| 227 | + if len(lat) >= 2: | |
| 228 | + extra = f" stdev_ms={statistics.stdev(lat):.2f}" | |
| 229 | + print( | |
| 230 | + f"summary n={n} runs={len(lat)} min_ms={lo:.2f} max_ms={hi:.2f} avg_ms={avg:.2f}{extra}" | |
| 231 | + ) | |
| 232 | + | |
| 233 | + return 1 if any_fail else 0 | |
| 234 | + | |
| 235 | + | |
| 236 | +if __name__ == "__main__": | |
| 237 | + raise SystemExit(main()) | ... | ... |
scripts/rebuild_suggestions.sh
| ... | ... | @@ -2,15 +2,16 @@ |
| 2 | 2 | set -euo pipefail |
| 3 | 3 | |
| 4 | 4 | if [ $# -lt 1 ]; then |
| 5 | - echo "Usage: $0 <tenant_id> [sample_query] [sample_language]" | |
| 6 | - echo "Example: $0 162 shi en" | |
| 5 | + echo "Usage: $0 <tenant_id>" | |
| 6 | + echo "Example: $0 162" | |
| 7 | 7 | exit 1 |
| 8 | 8 | fi |
| 9 | 9 | |
| 10 | 10 | ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" |
| 11 | 11 | TENANT_ID="$1" |
| 12 | -SAMPLE_Q="${2:-shi}" | |
| 13 | -SAMPLE_LANG="${3:-en}" | |
| 12 | +# Fixed smoke-test queries and languages (no CLI args). | |
| 13 | +SAMPLE_QUERIES=(s sh dress tshirt) | |
| 14 | +SAMPLE_LANGS=(en zh) | |
| 14 | 15 | API_BASE="${API_BASE_URL:-http://localhost:6002}" |
| 15 | 16 | |
| 16 | 17 | cd "$ROOT_DIR" |
| ... | ... | @@ -76,5 +77,10 @@ curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_search?pretty" -H 'Content-Type: ap |
| 76 | 77 | echo |
| 77 | 78 | |
| 78 | 79 | echo "[4/4] API smoke test" |
| 79 | -curl -sS "$API_BASE/search/suggestions?q=${SAMPLE_Q}&size=10&language=${SAMPLE_LANG}" -H "X-Tenant-ID: ${TENANT_ID}" | |
| 80 | -echo | |
| 80 | +for lang in "${SAMPLE_LANGS[@]}"; do | |
| 81 | + for q in "${SAMPLE_QUERIES[@]}"; do | |
| 82 | + echo "--- GET /search/suggestions?q=${q}&language=${lang} ---" | |
| 83 | + curl -sS "$API_BASE/search/suggestions?q=${q}&size=10&language=${lang}" -H "X-Tenant-ID: ${TENANT_ID}" | |
| 84 | + echo | |
| 85 | + done | |
| 86 | +done | ... | ... |
suggestion/TROUBLESHOOTING.md
| ... | ... | @@ -131,6 +131,6 @@ curl -u "$ES_USERNAME:$ES_PASSWORD" "$ES_HOST/search_products_tenant_162/_search |
| 131 | 131 | 4. 重建后再测 API: |
| 132 | 132 | |
| 133 | 133 | ```bash |
| 134 | -./scripts/rebuild_suggestions.sh 162 F en | |
| 134 | +./scripts/rebuild_suggestions.sh 162 | |
| 135 | 135 | curl "http://localhost:6002/search/suggestions?q=F&size=40&language=en&tenant_id=162" |
| 136 | 136 | ``` | ... | ... |
suggestion/builder.py
| ... | ... | @@ -20,6 +20,7 @@ from sqlalchemy import text |
| 20 | 20 | |
| 21 | 21 | from config.loader import get_app_config |
| 22 | 22 | from config.tenant_config_loader import get_tenant_config_loader |
| 23 | +from query.query_parser import detect_text_language_for_suggestions | |
| 23 | 24 | from suggestion.mapping import build_suggestion_mapping |
| 24 | 25 | from utils.es_client import ESClient |
| 25 | 26 | |
| ... | ... | @@ -57,6 +58,7 @@ class SuggestionCandidate: |
| 57 | 58 | sources: set = field(default_factory=set) |
| 58 | 59 | title_spu_ids: set = field(default_factory=set) |
| 59 | 60 | qanchor_spu_ids: set = field(default_factory=set) |
| 61 | + tag_spu_ids: set = field(default_factory=set) | |
| 60 | 62 | query_count_7d: int = 0 |
| 61 | 63 | query_count_30d: int = 0 |
| 62 | 64 | lang_confidence: float = 1.0 |
| ... | ... | @@ -69,6 +71,8 @@ class SuggestionCandidate: |
| 69 | 71 | self.title_spu_ids.add(spu_id) |
| 70 | 72 | elif source == "qanchor": |
| 71 | 73 | self.qanchor_spu_ids.add(spu_id) |
| 74 | + elif source == "tag": | |
| 75 | + self.tag_spu_ids.add(spu_id) | |
| 72 | 76 | |
| 73 | 77 | def add_query_log(self, is_7d: bool) -> None: |
| 74 | 78 | self.sources.add("query_log") |
| ... | ... | @@ -150,6 +154,19 @@ class SuggestionIndexBuilder: |
| 150 | 154 | return out |
| 151 | 155 | |
| 152 | 156 | @staticmethod |
| 157 | + def _iter_product_tags(raw: Any) -> List[str]: | |
| 158 | + if raw is None: | |
| 159 | + return [] | |
| 160 | + if isinstance(raw, list): | |
| 161 | + return [str(x).strip() for x in raw if str(x).strip()] | |
| 162 | + s = str(raw).strip() | |
| 163 | + if not s: | |
| 164 | + return [] | |
| 165 | + parts = re.split(r"[,;|/\n\t]+", s) | |
| 166 | + out = [p.strip() for p in parts if p and p.strip()] | |
| 167 | + return out if out else [s] | |
| 168 | + | |
| 169 | + @staticmethod | |
| 153 | 170 | def _looks_noise(text_value: str) -> bool: |
| 154 | 171 | if not text_value: |
| 155 | 172 | return True |
| ... | ... | @@ -187,20 +204,6 @@ class SuggestionIndexBuilder: |
| 187 | 204 | return None |
| 188 | 205 | return None |
| 189 | 206 | |
| 190 | - @staticmethod | |
| 191 | - def _detect_script_language(query: str) -> Tuple[Optional[str], float, str]: | |
| 192 | - if re.search(r"[\u4e00-\u9fff]", query): | |
| 193 | - return "zh", 0.98, "script" | |
| 194 | - if re.search(r"[\u0600-\u06FF]", query): | |
| 195 | - return "ar", 0.98, "script" | |
| 196 | - if re.search(r"[\u0400-\u04FF]", query): | |
| 197 | - return "ru", 0.95, "script" | |
| 198 | - if re.search(r"[\u0370-\u03FF]", query): | |
| 199 | - return "el", 0.95, "script" | |
| 200 | - if re.search(r"[a-zA-Z]", query): | |
| 201 | - return "en", 0.55, "model" | |
| 202 | - return None, 0.0, "default" | |
| 203 | - | |
| 204 | 207 | def _resolve_query_language( |
| 205 | 208 | self, |
| 206 | 209 | query: str, |
| ... | ... | @@ -225,18 +228,29 @@ class SuggestionIndexBuilder: |
| 225 | 228 | if req_lang and (not langs_set or req_lang in langs_set): |
| 226 | 229 | return req_lang, 1.0, "request_params", conflict |
| 227 | 230 | |
| 228 | - detected_lang, conf, source = self._detect_script_language(query) | |
| 229 | - if detected_lang and (not langs_set or detected_lang in langs_set): | |
| 230 | - return detected_lang, conf, source, conflict | |
| 231 | + det_lang, conf, det_source = detect_text_language_for_suggestions( | |
| 232 | + query, | |
| 233 | + index_languages=index_languages, | |
| 234 | + primary_language=primary, | |
| 235 | + ) | |
| 236 | + if det_lang and (not langs_set or det_lang in langs_set): | |
| 237 | + return det_lang, conf, det_source, conflict | |
| 231 | 238 | |
| 232 | 239 | return primary, 0.3, "default", conflict |
| 233 | 240 | |
| 234 | 241 | @staticmethod |
| 235 | - def _compute_rank_score(query_count_30d: int, query_count_7d: int, qanchor_doc_count: int, title_doc_count: int) -> float: | |
| 242 | + def _compute_rank_score( | |
| 243 | + query_count_30d: int, | |
| 244 | + query_count_7d: int, | |
| 245 | + qanchor_doc_count: int, | |
| 246 | + title_doc_count: int, | |
| 247 | + tag_doc_count: int = 0, | |
| 248 | + ) -> float: | |
| 236 | 249 | return ( |
| 237 | 250 | 1.8 * math.log1p(max(query_count_30d, 0)) |
| 238 | 251 | + 1.2 * math.log1p(max(query_count_7d, 0)) |
| 239 | 252 | + 1.0 * math.log1p(max(qanchor_doc_count, 0)) |
| 253 | + + 0.85 * math.log1p(max(tag_doc_count, 0)) | |
| 240 | 254 | + 0.6 * math.log1p(max(title_doc_count, 0)) |
| 241 | 255 | ) |
| 242 | 256 | |
| ... | ... | @@ -247,6 +261,7 @@ class SuggestionIndexBuilder: |
| 247 | 261 | query_count_7d=c.query_count_7d, |
| 248 | 262 | qanchor_doc_count=len(c.qanchor_spu_ids), |
| 249 | 263 | title_doc_count=len(c.title_spu_ids), |
| 264 | + tag_doc_count=len(c.tag_spu_ids), | |
| 250 | 265 | ) |
| 251 | 266 | |
| 252 | 267 | def _iter_products(self, tenant_id: str, batch_size: int = 500) -> Iterator[Dict[str, Any]]: |
| ... | ... | @@ -259,9 +274,10 @@ class SuggestionIndexBuilder: |
| 259 | 274 | while True: |
| 260 | 275 | body: Dict[str, Any] = { |
| 261 | 276 | "size": batch_size, |
| 262 | - "_source": ["id", "spu_id", "title", "qanchors"], | |
| 277 | + "_source": ["id", "spu_id", "title", "qanchors", "tags"], | |
| 263 | 278 | "sort": [ |
| 264 | 279 | {"spu_id": {"order": "asc", "missing": "_last"}}, |
| 280 | + {"id.keyword": {"order": "asc", "missing": "_last"}}, | |
| 265 | 281 | ], |
| 266 | 282 | "query": {"match_all": {}}, |
| 267 | 283 | } |
| ... | ... | @@ -471,6 +487,22 @@ class SuggestionIndexBuilder: |
| 471 | 487 | key_to_candidate[key] = c |
| 472 | 488 | c.add_product("qanchor", spu_id=product_id) |
| 473 | 489 | |
| 490 | + for tag in self._iter_product_tags(src.get("tags")): | |
| 491 | + tag_lang, _, _ = detect_text_language_for_suggestions( | |
| 492 | + tag, | |
| 493 | + index_languages=index_languages, | |
| 494 | + primary_language=primary_language, | |
| 495 | + ) | |
| 496 | + text_norm = self._normalize_text(tag) | |
| 497 | + if self._looks_noise(text_norm): | |
| 498 | + continue | |
| 499 | + key = (tag_lang, text_norm) | |
| 500 | + c = key_to_candidate.get(key) | |
| 501 | + if c is None: | |
| 502 | + c = SuggestionCandidate(text=tag, text_norm=text_norm, lang=tag_lang) | |
| 503 | + key_to_candidate[key] = c | |
| 504 | + c.add_product("tag", spu_id=product_id) | |
| 505 | + | |
| 474 | 506 | # Step 2: query logs |
| 475 | 507 | now = datetime.now(timezone.utc) |
| 476 | 508 | since = now - timedelta(days=days) |
| ... | ... | @@ -521,6 +553,7 @@ class SuggestionIndexBuilder: |
| 521 | 553 | "sources": sorted(c.sources), |
| 522 | 554 | "title_doc_count": len(c.title_spu_ids), |
| 523 | 555 | "qanchor_doc_count": len(c.qanchor_spu_ids), |
| 556 | + "tag_doc_count": len(c.tag_spu_ids), | |
| 524 | 557 | "query_count_7d": c.query_count_7d, |
| 525 | 558 | "query_count_30d": c.query_count_30d, |
| 526 | 559 | "rank_score": float(rank_score), |
| ... | ... | @@ -672,6 +705,7 @@ class SuggestionIndexBuilder: |
| 672 | 705 | query_count_7d=delta.delta_7d, |
| 673 | 706 | qanchor_doc_count=0, |
| 674 | 707 | title_doc_count=0, |
| 708 | + tag_doc_count=0, | |
| 675 | 709 | ) |
| 676 | 710 | return { |
| 677 | 711 | "tenant_id": delta.tenant_id, |
| ... | ... | @@ -681,6 +715,7 @@ class SuggestionIndexBuilder: |
| 681 | 715 | "sources": ["query_log"], |
| 682 | 716 | "title_doc_count": 0, |
| 683 | 717 | "qanchor_doc_count": 0, |
| 718 | + "tag_doc_count": 0, | |
| 684 | 719 | "query_count_7d": delta.delta_7d, |
| 685 | 720 | "query_count_30d": delta.delta_30d, |
| 686 | 721 | "rank_score": float(rank_score), |
| ... | ... | @@ -710,6 +745,7 @@ class SuggestionIndexBuilder: |
| 710 | 745 | if (ctx._source.query_count_7d == null) { ctx._source.query_count_7d = 0; } |
| 711 | 746 | if (ctx._source.qanchor_doc_count == null) { ctx._source.qanchor_doc_count = 0; } |
| 712 | 747 | if (ctx._source.title_doc_count == null) { ctx._source.title_doc_count = 0; } |
| 748 | + if (ctx._source.tag_doc_count == null) { ctx._source.tag_doc_count = 0; } | |
| 713 | 749 | |
| 714 | 750 | ctx._source.query_count_30d += params.delta_30d; |
| 715 | 751 | ctx._source.query_count_7d += params.delta_7d; |
| ... | ... | @@ -729,10 +765,12 @@ class SuggestionIndexBuilder: |
| 729 | 765 | int q7 = ctx._source.query_count_7d; |
| 730 | 766 | int qa = ctx._source.qanchor_doc_count; |
| 731 | 767 | int td = ctx._source.title_doc_count; |
| 768 | + int tg = ctx._source.tag_doc_count; | |
| 732 | 769 | |
| 733 | 770 | double score = 1.8 * Math.log(1 + q30) |
| 734 | 771 | + 1.2 * Math.log(1 + q7) |
| 735 | 772 | + 1.0 * Math.log(1 + qa) |
| 773 | + + 0.85 * Math.log(1 + tg) | |
| 736 | 774 | + 0.6 * Math.log(1 + td); |
| 737 | 775 | ctx._source.rank_score = score; |
| 738 | 776 | ctx._source.status = 1; | ... | ... |
suggestion/mapping.py
| ... | ... | @@ -96,6 +96,7 @@ def build_suggestion_mapping(index_languages: List[str]) -> Dict[str, Any]: |
| 96 | 96 | "sources": {"type": "keyword"}, |
| 97 | 97 | "title_doc_count": {"type": "integer"}, |
| 98 | 98 | "qanchor_doc_count": {"type": "integer"}, |
| 99 | + "tag_doc_count": {"type": "integer"}, | |
| 99 | 100 | "query_count_7d": {"type": "integer"}, |
| 100 | 101 | "query_count_30d": {"type": "integer"}, |
| 101 | 102 | "rank_score": {"type": "float"}, | ... | ... |
suggestion/service.py
| ... | ... | @@ -7,12 +7,24 @@ import time |
| 7 | 7 | from typing import Any, Dict, List, Optional |
| 8 | 8 | |
| 9 | 9 | from config.tenant_config_loader import get_tenant_config_loader |
| 10 | +from query.query_parser import simple_tokenize_query | |
| 10 | 11 | from suggestion.builder import get_suggestion_alias_name |
| 11 | 12 | from utils.es_client import ESClient |
| 12 | 13 | |
| 13 | 14 | logger = logging.getLogger(__name__) |
| 14 | 15 | |
| 15 | 16 | |
| 17 | +def _suggestion_length_factor(text: str) -> float: | |
| 18 | + """Down-weight longer strings at query time: factor 1 / sqrt(token_len).""" | |
| 19 | + n = max(len(simple_tokenize_query(str(text or ""))), 1) | |
| 20 | + return 1.0 / (n ** 0.5) | |
| 21 | + | |
| 22 | + | |
| 23 | +def _score_with_token_length_penalty(item: Dict[str, Any]) -> float: | |
| 24 | + base = float(item.get("score") or 0.0) | |
| 25 | + return base * _suggestion_length_factor(str(item.get("text") or "")) | |
| 26 | + | |
| 27 | + | |
| 16 | 28 | class SuggestionService: |
| 17 | 29 | def __init__(self, es_client: ESClient): |
| 18 | 30 | self.es_client = es_client |
| ... | ... | @@ -150,6 +162,17 @@ class SuggestionService: |
| 150 | 162 | seen_text_norm.add(norm) |
| 151 | 163 | suggestions.append(dict(item)) |
| 152 | 164 | |
| 165 | + def _finalize_suggestion_list(items: List[Dict[str, Any]], limit: int) -> List[Dict[str, Any]]: | |
| 166 | + out = list(items) | |
| 167 | + out.sort( | |
| 168 | + key=lambda x: ( | |
| 169 | + _score_with_token_length_penalty(x), | |
| 170 | + float(x.get("rank_score") or 0.0), | |
| 171 | + ), | |
| 172 | + reverse=True, | |
| 173 | + ) | |
| 174 | + return out[:limit] | |
| 175 | + | |
| 153 | 176 | _append_items(completion_items) |
| 154 | 177 | |
| 155 | 178 | # Fast path: avoid a second ES query for short prefixes or when completion already full. |
| ... | ... | @@ -168,7 +191,7 @@ class SuggestionService: |
| 168 | 191 | "query": query, |
| 169 | 192 | "language": language, |
| 170 | 193 | "resolved_language": resolved_lang, |
| 171 | - "suggestions": suggestions[:size], | |
| 194 | + "suggestions": _finalize_suggestion_list(suggestions, size), | |
| 172 | 195 | "took_ms": took_ms, |
| 173 | 196 | } |
| 174 | 197 | |
| ... | ... | @@ -260,6 +283,6 @@ class SuggestionService: |
| 260 | 283 | "query": query, |
| 261 | 284 | "language": language, |
| 262 | 285 | "resolved_language": resolved_lang, |
| 263 | - "suggestions": suggestions[:size], | |
| 286 | + "suggestions": _finalize_suggestion_list(suggestions, size), | |
| 264 | 287 | "took_ms": took_ms, |
| 265 | 288 | } | ... | ... |
tests/test_suggestions.py
| ... | ... | @@ -388,6 +388,53 @@ def test_build_full_candidates_fallback_to_id_when_spu_id_missing(monkeypatch): |
| 388 | 388 | |
| 389 | 389 | |
| 390 | 390 | @pytest.mark.unit |
| 391 | +def test_build_full_candidates_tags_and_qanchor_phrases(monkeypatch): | |
| 392 | + fake_es = FakeESClient() | |
| 393 | + builder = SuggestionIndexBuilder(es_client=fake_es, db_engine=None) | |
| 394 | + | |
| 395 | + monkeypatch.setattr( | |
| 396 | + builder, | |
| 397 | + "_iter_products", | |
| 398 | + lambda tenant_id, batch_size=500: iter( | |
| 399 | + [ | |
| 400 | + { | |
| 401 | + "_id": "900", | |
| 402 | + "_source": { | |
| 403 | + "spu_id": "900", | |
| 404 | + "title": {"en": "Tee", "zh": "T恤"}, | |
| 405 | + "qanchors": { | |
| 406 | + "en": "slim fit, sporty casual", | |
| 407 | + "zh": "修身, 显瘦", | |
| 408 | + }, | |
| 409 | + "tags": ["Classic", "辣妹风", "ribbed neckline"], | |
| 410 | + }, | |
| 411 | + } | |
| 412 | + ] | |
| 413 | + ), | |
| 414 | + ) | |
| 415 | + monkeypatch.setattr(builder, "_iter_query_log_rows", lambda **kwargs: iter([])) | |
| 416 | + | |
| 417 | + key_to_candidate = builder._build_full_candidates( | |
| 418 | + tenant_id="162", | |
| 419 | + index_languages=["en", "zh"], | |
| 420 | + primary_language="en", | |
| 421 | + days=365, | |
| 422 | + batch_size=100, | |
| 423 | + min_query_len=1, | |
| 424 | + ) | |
| 425 | + | |
| 426 | + assert ("en", "slim fit") in key_to_candidate | |
| 427 | + assert ("en", "sporty casual") in key_to_candidate | |
| 428 | + assert ("zh", "修身") in key_to_candidate | |
| 429 | + assert ("zh", "显瘦") in key_to_candidate | |
| 430 | + assert ("en", "classic") in key_to_candidate | |
| 431 | + assert key_to_candidate[("en", "classic")].tag_spu_ids == {"900"} | |
| 432 | + assert ("zh", "辣妹风") in key_to_candidate | |
| 433 | + assert key_to_candidate[("zh", "辣妹风")].tag_spu_ids == {"900"} | |
| 434 | + assert ("en", "ribbed neckline") in key_to_candidate | |
| 435 | + | |
| 436 | + | |
| 437 | +@pytest.mark.unit | |
| 391 | 438 | def test_build_full_candidates_splits_long_title_for_suggest(monkeypatch): |
| 392 | 439 | fake_es = FakeESClient() |
| 393 | 440 | builder = SuggestionIndexBuilder(es_client=fake_es, db_engine=None) | ... | ... |
tests/test_translation_local_backends.py
| ... | ... | @@ -5,6 +5,7 @@ import torch |
| 5 | 5 | |
| 6 | 6 | from translation.backends.local_seq2seq import MarianMTTranslationBackend, NLLBTranslationBackend |
| 7 | 7 | from translation.backends.local_ctranslate2 import NLLBCTranslate2TranslationBackend |
| 8 | +from translation.languages import build_nllb_language_catalog, resolve_nllb_language_code | |
| 8 | 9 | from translation.service import TranslationService |
| 9 | 10 | from translation.text_splitter import compute_safe_input_token_limit, split_text_for_translation |
| 10 | 11 | |
| ... | ... | @@ -200,6 +201,22 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch): |
| 200 | 201 | assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]] |
| 201 | 202 | |
| 202 | 203 | |
| 204 | +def test_nllb_resolves_flores_short_tags_and_iso_no(): | |
| 205 | + cat = build_nllb_language_catalog(None) | |
| 206 | + assert resolve_nllb_language_code("ca", cat) == "cat_Latn" | |
| 207 | + assert resolve_nllb_language_code("da", cat) == "dan_Latn" | |
| 208 | + assert resolve_nllb_language_code("eu", cat) == "eus_Latn" | |
| 209 | + assert resolve_nllb_language_code("gl", cat) == "glg_Latn" | |
| 210 | + assert resolve_nllb_language_code("hu", cat) == "hun_Latn" | |
| 211 | + assert resolve_nllb_language_code("id", cat) == "ind_Latn" | |
| 212 | + assert resolve_nllb_language_code("nl", cat) == "nld_Latn" | |
| 213 | + assert resolve_nllb_language_code("no", cat) == "nob_Latn" | |
| 214 | + assert resolve_nllb_language_code("ro", cat) == "ron_Latn" | |
| 215 | + assert resolve_nllb_language_code("SV", cat) == "swe_Latn" | |
| 216 | + assert resolve_nllb_language_code("tr", cat) == "tur_Latn" | |
| 217 | + assert resolve_nllb_language_code("deu_Latn", cat) == "deu_Latn" | |
| 218 | + | |
| 219 | + | |
| 203 | 220 | def test_translation_service_preloads_enabled_backends(monkeypatch): |
| 204 | 221 | created = [] |
| 205 | 222 | ... | ... |
translation/languages.py
| ... | ... | @@ -2,8 +2,14 @@ |
| 2 | 2 | |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | +from functools import lru_cache | |
| 5 | 6 | from typing import Dict, Mapping, Optional, Tuple |
| 6 | 7 | |
| 8 | +from translation.nllb_flores_short_map import ( | |
| 9 | + NLLB_FLORES_SHORT_TO_CODE, | |
| 10 | + NLLB_TOKENIZER_LANGUAGE_CODES, | |
| 11 | +) | |
| 12 | + | |
| 7 | 13 | |
| 8 | 14 | LANGUAGE_LABELS: Dict[str, str] = { |
| 9 | 15 | "zh": "Chinese", |
| ... | ... | @@ -48,6 +54,8 @@ DEEPL_LANGUAGE_CODES: Dict[str, str] = { |
| 48 | 54 | } |
| 49 | 55 | |
| 50 | 56 | |
| 57 | +# Sparse overrides on top of ``NLLB_FLORES_SHORT_TO_CODE`` (same keys win later in | |
| 58 | +# ``build_nllb_language_catalog``). Kept for backward compatibility and explicit defaults. | |
| 51 | 59 | NLLB_LANGUAGE_CODES: Dict[str, str] = { |
| 52 | 60 | "en": "eng_Latn", |
| 53 | 61 | "fi": "fin_Latn", |
| ... | ... | @@ -82,14 +90,24 @@ def normalize_language_key(language: Optional[str]) -> str: |
| 82 | 90 | return str(language or "").strip().lower().replace("-", "_") |
| 83 | 91 | |
| 84 | 92 | |
| 93 | +@lru_cache(maxsize=1) | |
| 94 | +def _nllb_tokenizer_code_by_normalized_key() -> Dict[str, str]: | |
| 95 | + """Map lowercased ``deu_latn``-style keys to canonical tokenizer strings (e.g. ``deu_Latn``).""" | |
| 96 | + return {normalize_language_key(code): code for code in NLLB_TOKENIZER_LANGUAGE_CODES} | |
| 97 | + | |
| 98 | + | |
| 85 | 99 | def build_nllb_language_catalog( |
| 86 | 100 | overrides: Optional[Mapping[str, str]] = None, |
| 87 | 101 | ) -> Dict[str, str]: |
| 88 | - catalog = { | |
| 89 | - normalize_language_key(key): str(value).strip() | |
| 90 | - for key, value in NLLB_LANGUAGE_CODES.items() | |
| 91 | - if str(key).strip() | |
| 92 | - } | |
| 102 | + catalog: Dict[str, str] = {} | |
| 103 | + for key, value in NLLB_FLORES_SHORT_TO_CODE.items(): | |
| 104 | + normalized_key = normalize_language_key(key) | |
| 105 | + if normalized_key: | |
| 106 | + catalog[normalized_key] = str(value).strip() | |
| 107 | + for key, value in NLLB_LANGUAGE_CODES.items(): | |
| 108 | + normalized_key = normalize_language_key(key) | |
| 109 | + if normalized_key: | |
| 110 | + catalog[normalized_key] = str(value).strip() | |
| 93 | 111 | for key, value in (overrides or {}).items(): |
| 94 | 112 | normalized_key = normalize_language_key(key) |
| 95 | 113 | if normalized_key: |
| ... | ... | @@ -116,6 +134,10 @@ def resolve_nllb_language_code( |
| 116 | 134 | if aliased is not None: |
| 117 | 135 | return aliased |
| 118 | 136 | |
| 137 | + tokenizer_hit = _nllb_tokenizer_code_by_normalized_key().get(normalized) | |
| 138 | + if tokenizer_hit is not None: | |
| 139 | + return tokenizer_hit | |
| 140 | + | |
| 119 | 141 | for code in catalog.values(): |
| 120 | 142 | if normalize_language_key(code) == normalized: |
| 121 | 143 | return code | ... | ... |
| ... | ... | @@ -0,0 +1,416 @@ |
| 1 | +"""FLORES short language tags and canonical NLLB tokenizer codes. | |
| 2 | + | |
| 3 | +``NLLB_FLORES_SHORT_TO_CODE`` maps model-card short tags (ISO 639-1 / FLORES ids) | |
| 4 | +to NLLB ``src_lang`` tokens: ``<iso639-3>_<Script>`` (ISO 15924 script). | |
| 5 | + | |
| 6 | +``NLLB_TOKENIZER_LANGUAGE_CODES`` lists every language token in the tokenizer. | |
| 7 | +""" | |
| 8 | +from __future__ import annotations | |
| 9 | + | |
| 10 | +from typing import Dict, FrozenSet | |
| 11 | + | |
| 12 | +NLLB_TOKENIZER_LANGUAGE_CODES: FrozenSet[str] = frozenset({ | |
| 13 | + "ace_Arab", | |
| 14 | + "ace_Latn", | |
| 15 | + "acm_Arab", | |
| 16 | + "acq_Arab", | |
| 17 | + "aeb_Arab", | |
| 18 | + "afr_Latn", | |
| 19 | + "ajp_Arab", | |
| 20 | + "aka_Latn", | |
| 21 | + "als_Latn", | |
| 22 | + "amh_Ethi", | |
| 23 | + "apc_Arab", | |
| 24 | + "arb_Arab", | |
| 25 | + "ars_Arab", | |
| 26 | + "ary_Arab", | |
| 27 | + "arz_Arab", | |
| 28 | + "asm_Beng", | |
| 29 | + "ast_Latn", | |
| 30 | + "awa_Deva", | |
| 31 | + "ayr_Latn", | |
| 32 | + "azb_Arab", | |
| 33 | + "azj_Latn", | |
| 34 | + "bak_Cyrl", | |
| 35 | + "bam_Latn", | |
| 36 | + "ban_Latn", | |
| 37 | + "bel_Cyrl", | |
| 38 | + "bem_Latn", | |
| 39 | + "ben_Beng", | |
| 40 | + "bho_Deva", | |
| 41 | + "bjn_Arab", | |
| 42 | + "bjn_Latn", | |
| 43 | + "bod_Tibt", | |
| 44 | + "bos_Latn", | |
| 45 | + "bug_Latn", | |
| 46 | + "bul_Cyrl", | |
| 47 | + "cat_Latn", | |
| 48 | + "ceb_Latn", | |
| 49 | + "ces_Latn", | |
| 50 | + "cjk_Latn", | |
| 51 | + "ckb_Arab", | |
| 52 | + "crh_Latn", | |
| 53 | + "cym_Latn", | |
| 54 | + "dan_Latn", | |
| 55 | + "deu_Latn", | |
| 56 | + "dik_Latn", | |
| 57 | + "dyu_Latn", | |
| 58 | + "dzo_Tibt", | |
| 59 | + "ell_Grek", | |
| 60 | + "eng_Latn", | |
| 61 | + "epo_Latn", | |
| 62 | + "est_Latn", | |
| 63 | + "eus_Latn", | |
| 64 | + "ewe_Latn", | |
| 65 | + "fao_Latn", | |
| 66 | + "fij_Latn", | |
| 67 | + "fin_Latn", | |
| 68 | + "fon_Latn", | |
| 69 | + "fra_Latn", | |
| 70 | + "fur_Latn", | |
| 71 | + "fuv_Latn", | |
| 72 | + "gaz_Latn", | |
| 73 | + "gla_Latn", | |
| 74 | + "gle_Latn", | |
| 75 | + "glg_Latn", | |
| 76 | + "grn_Latn", | |
| 77 | + "guj_Gujr", | |
| 78 | + "hat_Latn", | |
| 79 | + "hau_Latn", | |
| 80 | + "heb_Hebr", | |
| 81 | + "hin_Deva", | |
| 82 | + "hne_Deva", | |
| 83 | + "hrv_Latn", | |
| 84 | + "hun_Latn", | |
| 85 | + "hye_Armn", | |
| 86 | + "ibo_Latn", | |
| 87 | + "ilo_Latn", | |
| 88 | + "ind_Latn", | |
| 89 | + "isl_Latn", | |
| 90 | + "ita_Latn", | |
| 91 | + "jav_Latn", | |
| 92 | + "jpn_Jpan", | |
| 93 | + "kab_Latn", | |
| 94 | + "kac_Latn", | |
| 95 | + "kam_Latn", | |
| 96 | + "kan_Knda", | |
| 97 | + "kas_Arab", | |
| 98 | + "kas_Deva", | |
| 99 | + "kat_Geor", | |
| 100 | + "kaz_Cyrl", | |
| 101 | + "kbp_Latn", | |
| 102 | + "kea_Latn", | |
| 103 | + "khk_Cyrl", | |
| 104 | + "khm_Khmr", | |
| 105 | + "kik_Latn", | |
| 106 | + "kin_Latn", | |
| 107 | + "kir_Cyrl", | |
| 108 | + "kmb_Latn", | |
| 109 | + "kmr_Latn", | |
| 110 | + "knc_Arab", | |
| 111 | + "knc_Latn", | |
| 112 | + "kon_Latn", | |
| 113 | + "kor_Hang", | |
| 114 | + "lao_Laoo", | |
| 115 | + "lij_Latn", | |
| 116 | + "lim_Latn", | |
| 117 | + "lin_Latn", | |
| 118 | + "lit_Latn", | |
| 119 | + "lmo_Latn", | |
| 120 | + "ltg_Latn", | |
| 121 | + "ltz_Latn", | |
| 122 | + "lua_Latn", | |
| 123 | + "lug_Latn", | |
| 124 | + "luo_Latn", | |
| 125 | + "lus_Latn", | |
| 126 | + "lvs_Latn", | |
| 127 | + "mag_Deva", | |
| 128 | + "mai_Deva", | |
| 129 | + "mal_Mlym", | |
| 130 | + "mar_Deva", | |
| 131 | + "min_Latn", | |
| 132 | + "mkd_Cyrl", | |
| 133 | + "mlt_Latn", | |
| 134 | + "mni_Beng", | |
| 135 | + "mos_Latn", | |
| 136 | + "mri_Latn", | |
| 137 | + "mya_Mymr", | |
| 138 | + "nld_Latn", | |
| 139 | + "nno_Latn", | |
| 140 | + "nob_Latn", | |
| 141 | + "npi_Deva", | |
| 142 | + "nso_Latn", | |
| 143 | + "nus_Latn", | |
| 144 | + "nya_Latn", | |
| 145 | + "oci_Latn", | |
| 146 | + "ory_Orya", | |
| 147 | + "pag_Latn", | |
| 148 | + "pan_Guru", | |
| 149 | + "pap_Latn", | |
| 150 | + "pbt_Arab", | |
| 151 | + "pes_Arab", | |
| 152 | + "plt_Latn", | |
| 153 | + "pol_Latn", | |
| 154 | + "por_Latn", | |
| 155 | + "prs_Arab", | |
| 156 | + "quy_Latn", | |
| 157 | + "ron_Latn", | |
| 158 | + "run_Latn", | |
| 159 | + "rus_Cyrl", | |
| 160 | + "sag_Latn", | |
| 161 | + "san_Deva", | |
| 162 | + "sat_Beng", | |
| 163 | + "scn_Latn", | |
| 164 | + "shn_Mymr", | |
| 165 | + "sin_Sinh", | |
| 166 | + "slk_Latn", | |
| 167 | + "slv_Latn", | |
| 168 | + "smo_Latn", | |
| 169 | + "sna_Latn", | |
| 170 | + "snd_Arab", | |
| 171 | + "som_Latn", | |
| 172 | + "sot_Latn", | |
| 173 | + "spa_Latn", | |
| 174 | + "srd_Latn", | |
| 175 | + "srp_Cyrl", | |
| 176 | + "ssw_Latn", | |
| 177 | + "sun_Latn", | |
| 178 | + "swe_Latn", | |
| 179 | + "swh_Latn", | |
| 180 | + "szl_Latn", | |
| 181 | + "tam_Taml", | |
| 182 | + "taq_Latn", | |
| 183 | + "taq_Tfng", | |
| 184 | + "tat_Cyrl", | |
| 185 | + "tel_Telu", | |
| 186 | + "tgk_Cyrl", | |
| 187 | + "tgl_Latn", | |
| 188 | + "tha_Thai", | |
| 189 | + "tir_Ethi", | |
| 190 | + "tpi_Latn", | |
| 191 | + "tsn_Latn", | |
| 192 | + "tso_Latn", | |
| 193 | + "tuk_Latn", | |
| 194 | + "tum_Latn", | |
| 195 | + "tur_Latn", | |
| 196 | + "twi_Latn", | |
| 197 | + "tzm_Tfng", | |
| 198 | + "uig_Arab", | |
| 199 | + "ukr_Cyrl", | |
| 200 | + "umb_Latn", | |
| 201 | + "urd_Arab", | |
| 202 | + "uzn_Latn", | |
| 203 | + "vec_Latn", | |
| 204 | + "vie_Latn", | |
| 205 | + "war_Latn", | |
| 206 | + "wol_Latn", | |
| 207 | + "xho_Latn", | |
| 208 | + "ydd_Hebr", | |
| 209 | + "yor_Latn", | |
| 210 | + "yue_Hant", | |
| 211 | + "zho_Hans", | |
| 212 | + "zho_Hant", | |
| 213 | + "zsm_Latn", | |
| 214 | + "zul_Latn", | |
| 215 | +}) | |
| 216 | + | |
| 217 | +NLLB_FLORES_SHORT_TO_CODE: Dict[str, str] = { | |
| 218 | + "ace": "ace_Latn", | |
| 219 | + "acm": "acm_Arab", | |
| 220 | + "acq": "acq_Arab", | |
| 221 | + "aeb": "aeb_Arab", | |
| 222 | + "af": "afr_Latn", | |
| 223 | + "ajp": "ajp_Arab", | |
| 224 | + "ak": "aka_Latn", | |
| 225 | + "als": "als_Latn", | |
| 226 | + "am": "amh_Ethi", | |
| 227 | + "apc": "apc_Arab", | |
| 228 | + "ar": "arb_Arab", | |
| 229 | + "ars": "ars_Arab", | |
| 230 | + "ary": "ary_Arab", | |
| 231 | + "arz": "arz_Arab", | |
| 232 | + "as": "asm_Beng", | |
| 233 | + "ast": "ast_Latn", | |
| 234 | + "awa": "awa_Deva", | |
| 235 | + "ayr": "ayr_Latn", | |
| 236 | + "azb": "azb_Arab", | |
| 237 | + "azj": "azj_Latn", | |
| 238 | + "ba": "bak_Cyrl", | |
| 239 | + "ban": "ban_Latn", | |
| 240 | + "be": "bel_Cyrl", | |
| 241 | + "bem": "bem_Latn", | |
| 242 | + "bg": "bul_Cyrl", | |
| 243 | + "bho": "bho_Deva", | |
| 244 | + "bjn": "bjn_Latn", | |
| 245 | + "bm": "bam_Latn", | |
| 246 | + "bn": "ben_Beng", | |
| 247 | + "bo": "bod_Tibt", | |
| 248 | + "bs": "bos_Latn", | |
| 249 | + "bug": "bug_Latn", | |
| 250 | + "ca": "cat_Latn", | |
| 251 | + "ceb": "ceb_Latn", | |
| 252 | + "cjk": "cjk_Latn", | |
| 253 | + "ckb": "ckb_Arab", | |
| 254 | + "crh": "crh_Latn", | |
| 255 | + "cs": "ces_Latn", | |
| 256 | + "cy": "cym_Latn", | |
| 257 | + "da": "dan_Latn", | |
| 258 | + "de": "deu_Latn", | |
| 259 | + "dik": "dik_Latn", | |
| 260 | + "dyu": "dyu_Latn", | |
| 261 | + "dz": "dzo_Tibt", | |
| 262 | + "ee": "ewe_Latn", | |
| 263 | + "el": "ell_Grek", | |
| 264 | + "en": "eng_Latn", | |
| 265 | + "eo": "epo_Latn", | |
| 266 | + "es": "spa_Latn", | |
| 267 | + "et": "est_Latn", | |
| 268 | + "eu": "eus_Latn", | |
| 269 | + "fi": "fin_Latn", | |
| 270 | + "fj": "fij_Latn", | |
| 271 | + "fo": "fao_Latn", | |
| 272 | + "fon": "fon_Latn", | |
| 273 | + "fr": "fra_Latn", | |
| 274 | + "fur": "fur_Latn", | |
| 275 | + "fuv": "fuv_Latn", | |
| 276 | + "ga": "gle_Latn", | |
| 277 | + "gaz": "gaz_Latn", | |
| 278 | + "gd": "gla_Latn", | |
| 279 | + "gl": "glg_Latn", | |
| 280 | + "gn": "grn_Latn", | |
| 281 | + "gu": "guj_Gujr", | |
| 282 | + "ha": "hau_Latn", | |
| 283 | + "he": "heb_Hebr", | |
| 284 | + "hi": "hin_Deva", | |
| 285 | + "hne": "hne_Deva", | |
| 286 | + "hr": "hrv_Latn", | |
| 287 | + "ht": "hat_Latn", | |
| 288 | + "hu": "hun_Latn", | |
| 289 | + "hy": "hye_Armn", | |
| 290 | + "id": "ind_Latn", | |
| 291 | + "ig": "ibo_Latn", | |
| 292 | + "ilo": "ilo_Latn", | |
| 293 | + "is": "isl_Latn", | |
| 294 | + "it": "ita_Latn", | |
| 295 | + "ja": "jpn_Jpan", | |
| 296 | + "jv": "jav_Latn", | |
| 297 | + "ka": "kat_Geor", | |
| 298 | + "kab": "kab_Latn", | |
| 299 | + "kac": "kac_Latn", | |
| 300 | + "kam": "kam_Latn", | |
| 301 | + "kbp": "kbp_Latn", | |
| 302 | + "kea": "kea_Latn", | |
| 303 | + "kg": "kon_Latn", | |
| 304 | + "khk": "khk_Cyrl", | |
| 305 | + "ki": "kik_Latn", | |
| 306 | + "kk": "kaz_Cyrl", | |
| 307 | + "km": "khm_Khmr", | |
| 308 | + "kmb": "kmb_Latn", | |
| 309 | + "kmr": "kmr_Latn", | |
| 310 | + "kn": "kan_Knda", | |
| 311 | + "knc": "knc_Latn", | |
| 312 | + "ko": "kor_Hang", | |
| 313 | + "ks": "kas_Arab", | |
| 314 | + "ky": "kir_Cyrl", | |
| 315 | + "lb": "ltz_Latn", | |
| 316 | + "lg": "lug_Latn", | |
| 317 | + "li": "lim_Latn", | |
| 318 | + "lij": "lij_Latn", | |
| 319 | + "lmo": "lmo_Latn", | |
| 320 | + "ln": "lin_Latn", | |
| 321 | + "lo": "lao_Laoo", | |
| 322 | + "lt": "lit_Latn", | |
| 323 | + "ltg": "ltg_Latn", | |
| 324 | + "lua": "lua_Latn", | |
| 325 | + "luo": "luo_Latn", | |
| 326 | + "lus": "lus_Latn", | |
| 327 | + "lvs": "lvs_Latn", | |
| 328 | + "mag": "mag_Deva", | |
| 329 | + "mai": "mai_Deva", | |
| 330 | + "mar": "mar_Deva", | |
| 331 | + "mi": "mri_Latn", | |
| 332 | + "min": "min_Latn", | |
| 333 | + "mk": "mkd_Cyrl", | |
| 334 | + "ml": "mal_Mlym", | |
| 335 | + "mni": "mni_Beng", | |
| 336 | + "mos": "mos_Latn", | |
| 337 | + "mt": "mlt_Latn", | |
| 338 | + "my": "mya_Mymr", | |
| 339 | + "nb": "nob_Latn", | |
| 340 | + "nl": "nld_Latn", | |
| 341 | + "nn": "nno_Latn", | |
| 342 | + "no": "nob_Latn", | |
| 343 | + "npi": "npi_Deva", | |
| 344 | + "nso": "nso_Latn", | |
| 345 | + "nus": "nus_Latn", | |
| 346 | + "ny": "nya_Latn", | |
| 347 | + "oc": "oci_Latn", | |
| 348 | + "ory": "ory_Orya", | |
| 349 | + "pa": "pan_Guru", | |
| 350 | + "pag": "pag_Latn", | |
| 351 | + "pap": "pap_Latn", | |
| 352 | + "pbt": "pbt_Arab", | |
| 353 | + "pes": "pes_Arab", | |
| 354 | + "pl": "pol_Latn", | |
| 355 | + "plt": "plt_Latn", | |
| 356 | + "prs": "prs_Arab", | |
| 357 | + "pt": "por_Latn", | |
| 358 | + "quy": "quy_Latn", | |
| 359 | + "rn": "run_Latn", | |
| 360 | + "ro": "ron_Latn", | |
| 361 | + "ru": "rus_Cyrl", | |
| 362 | + "rw": "kin_Latn", | |
| 363 | + "sa": "san_Deva", | |
| 364 | + "sat": "sat_Beng", | |
| 365 | + "sc": "srd_Latn", | |
| 366 | + "scn": "scn_Latn", | |
| 367 | + "sd": "snd_Arab", | |
| 368 | + "sg": "sag_Latn", | |
| 369 | + "shn": "shn_Mymr", | |
| 370 | + "si": "sin_Sinh", | |
| 371 | + "sk": "slk_Latn", | |
| 372 | + "sl": "slv_Latn", | |
| 373 | + "sm": "smo_Latn", | |
| 374 | + "sn": "sna_Latn", | |
| 375 | + "so": "som_Latn", | |
| 376 | + "sr": "srp_Cyrl", | |
| 377 | + "ss": "ssw_Latn", | |
| 378 | + "st": "sot_Latn", | |
| 379 | + "su": "sun_Latn", | |
| 380 | + "sv": "swe_Latn", | |
| 381 | + "swh": "swh_Latn", | |
| 382 | + "szl": "szl_Latn", | |
| 383 | + "ta": "tam_Taml", | |
| 384 | + "taq": "taq_Latn", | |
| 385 | + "te": "tel_Telu", | |
| 386 | + "tg": "tgk_Cyrl", | |
| 387 | + "th": "tha_Thai", | |
| 388 | + "ti": "tir_Ethi", | |
| 389 | + "tk": "tuk_Latn", | |
| 390 | + "tl": "tgl_Latn", | |
| 391 | + "tn": "tsn_Latn", | |
| 392 | + "tpi": "tpi_Latn", | |
| 393 | + "tr": "tur_Latn", | |
| 394 | + "ts": "tso_Latn", | |
| 395 | + "tt": "tat_Cyrl", | |
| 396 | + "tum": "tum_Latn", | |
| 397 | + "tw": "twi_Latn", | |
| 398 | + "tzm": "tzm_Tfng", | |
| 399 | + "ug": "uig_Arab", | |
| 400 | + "uk": "ukr_Cyrl", | |
| 401 | + "umb": "umb_Latn", | |
| 402 | + "ur": "urd_Arab", | |
| 403 | + "uzn": "uzn_Latn", | |
| 404 | + "vec": "vec_Latn", | |
| 405 | + "vi": "vie_Latn", | |
| 406 | + "war": "war_Latn", | |
| 407 | + "wo": "wol_Latn", | |
| 408 | + "xh": "xho_Latn", | |
| 409 | + "ydd": "ydd_Hebr", | |
| 410 | + "yo": "yor_Latn", | |
| 411 | + "yue": "yue_Hant", | |
| 412 | + "zh": "zho_Hans", | |
| 413 | + "zsm": "zsm_Latn", | |
| 414 | + "zu": "zul_Latn", | |
| 415 | +} | |
| 416 | + | ... | ... |