From 00c8ddb9b138aaaf9e2e9d40bd555ead00226efb Mon Sep 17 00:00:00 2001 From: tangwang Date: Sat, 21 Mar 2026 19:41:23 +0800 Subject: [PATCH] suggest rank optimize --- b.sh | 417 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ bb.sh | 35 +++++++++++++++++++++++++++++++++++ config/config.yaml | 8 ++++---- docs/TODO.txt | 17 +++++++++++++++-- docs/suggestion索引构建.md | 24 +++++++++++++++++++----- mappings/search_suggestions.json | 1 + query/query_parser.py | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------- reranker/bge_reranker.py | 2 +- scripts/benchmark_reranker_random_titles.py | 237 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ suggestion/builder.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------- suggestion/mapping.py | 1 + suggestion/service.py | 27 +++++++++++++++++++++++++-- tests/test_suggestions.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 13 files changed, 935 insertions(+), 44 deletions(-) create mode 100644 b.sh create mode 100644 bb.sh create mode 100755 scripts/benchmark_reranker_random_titles.py diff --git a/b.sh b/b.sh new file mode 100644 index 0000000..f55842e --- /dev/null +++ b/b.sh @@ -0,0 +1,417 @@ +#!/bin/bash +start=$(date +%s%N) # 开始时间,纳秒级 + +time curl -X POST "http://localhost:6007/rerank" \ + -H "Content-Type: application/json" \ + -d '{ + "query": "健身女生T恤短袖", + "docs": [ "60 Jelly Bracelets 80 s Adult Size - MAQIHAN Neon Gummy Bracelets for Women 80s Jelly Bangles Glow Silicone Bands Jewelry Wristband Rainbow Jellies Bangle Girls Boys Colored Accessories Party Favor", +"MEROKEETY Women s 2025 Summer Square Neck Puff Sleeve Boho Midi Dress Swiss Dot Ruffle Flowy Tie Back Dress", +"FITORY Mens Sandals", +"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", +"Merrell Mens Hydro Moc", +"Lounge Sets for Women Summer Outfits Women 2 Piece Sets 2025 Sleeveless Matching Lounge Crop Top High Waisted Short", +"Men s Underwear", +"Executive Functioning Workbook for Teens: 101 Activities and Strategies for Enhancing Self-Discipline", +"LEVSOX Compression Socks Women and Men", +"MGparty 12 Pieces Christmas Headbands Christmas Parties Favors Decoration Supplies Xmas Gifts Photo Booth Xmas Tree Snowman Reindeer Antlers Santa Hat", +"10 Large Vacuum Storage Bags with Hand Pump", +"Disney Lilo and Stitch Boys Swim Set", +"Sterling Silver Hoop Earrings", +"23 Pcs Day of The Dead Altar Decorations Set", +"Travel Makeup Bag for Women Fashion Large Capacity Pouch Open Flat Cosmetic Portable Organizer Waterproof Large Opening Storage Toiletry Bags Vertical Free-Standing Brush Holder for Easy Access Blue", +"Iron Flame: Empyrean", +"Luxebell Luggage Straps Suitcase Belt TSA Approved Travel Accessories Gift 4-Pack 6.56ft (Green)", +"TONY & SANDY Christian Gifts for Women", +"Blue Birthday Party Supplies", +"Vionic Women s Coral Loafer Moccasin", +"LIQING 35L Large Picnic Basket 2 Layers of Internal Pockets Leak-Proof and Insulated ,Folding with Internal Support for enhansed Stability", +"40oz Softball Tumbler with Handle Softball Gifts Stuff for Women Girls Men Gift for Coach Lovers Fan Stainless Steel Cup", +"Crayola Colour & Erase Reusable Puzzle Set", +"Carry On Luggage with Front Compartment and Cup Holder", +"Interactive Cat Toy Rechargeable", +"Nike Air Rift", +"Portable Hookah Set for Travel - Premium Handheld Glass Aluminum Mini Hookah Real Metal Accessories", +"Clear Backpack for Boys", +"Women’s Knee High Boots Round Toe Chunky Heel Faux Leather Tall Riding Boots with Side Zipper", +"Golf Grip Trainer & Connection Band 2Set", +"Monster High Self Scare Day Cleo De Nile Doll Play Set", +"Fortnite eGift Card - Powered by the Epic Games Store", +"Mesh Beach Bags", +"Crowye Anime Cosplay Costume for Halloween Princess Costume Accessories Anime White Cosplay Wig Egypt Arm Cuff Bracelet Gold Earrings Greek Goddess Set for Halloween Dress up Princess", +"Premium Women s Leather Tote Handbag - Bag for Everyday Use", +"Ekouaer Maternity Nursing Gown and Robe Set Labor Delivery Nursing Nightgowns for Breastfeeding Pregnancy Clothes", +"Superband Mermaid Tails for Swimming for Women and Adults Without Monofin", +"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops", +"WDIRARA Girl s Bow Puff Sleeve A Line Midi Dress Cute Collared Ruffle Hem Swing Dresses", +"Funziez! Adult Onesie Halloween Costume Animal Dinosaur Shark Unisex Plush One Piece Cosplay Suit for Adults", +"Rockland Duffel Bag", +"Centipede Demon Baby Shoes Baby Boys Girls Walking Shoes Non Slip Booties Sock Shoe Infants Breathable Sneakers Lightweight Barefoot Slip On Sneakers", +"CYDREAM Long Sleeve Bodysuits for Women - Square Neck Shapewear Bodysuit Tops Going Out Body Suits Shirt Leotard", +"Men s Oversized Letter Graphic Tank Top Sleeveless Casual Summer Tops Y2K Streetwear", +"Flower Claw Clip 7 PCS Claw Clips", +"waist twister,waist twisting machine ab twister board with 300 lbs Weight Capacity", +"PAGE ONE Womens Winter Ribbed Beanie Crossed Cap Chunky Cable Knit Pompom Soft Warm Hat", +"5 Pack Cute Keychains for Girls", +"Dragon Ball Super - Complete Series - Blu-ray", +"VejiA Multifunctional Simple Shoe Cabinet Storage Shoe Rack Save Space Hallway Furniture", +"50Pcs Handbag Purse Feet Handbag Nailhead Brass Studs Screw-Back Feet Flat Head Stud Metal Studs Rivet Leather Craft DIY for DIY Purse Leather Craft", +"Wearable Blanket Hoodie with Letter A-Z - Oversized Blanket Hooded Personalized Birthday Christmas Gifts for Women Mom", +"On Women s Cloudnova Form 2 Sneakers", +"SANTINY 18 Skorts for Women with 4 Pockets High Waist Long Athletic Tennis Skirt Golf Skort Dressy Casual", +"Compatible with AirTag Case Keychain", +"Rod Holder Plugs", +"Protective Case Compatible with Have A Seat Figure-Clear PVC Portable Storage Box with Keychain", +"adidas Men s Swift Run 1.0 Running Shoes", +"M MOOHAM Cross Necklace for Women Teen Girls", +"Sportneer Adjustable Ankle Weights for Women and Men 7 lbs/Pair Adjustable Leg Weights with Secure Straps", +"PRETTYGARDEN Women s 2 Piece Outfits Sleeveless Suit Vest and Wide Leg Pants Business Casual Blazer Sets", +"Bouncer Seat for Babies 0-12 Months", +"Womens Crew Socks Cotton Long Gym Socks Lightweight Athletic Running Socks", +"Denior Magnetic Card Phone Wallet Holder for iPhone 17/16/15/14/13/12 Series", +"LIGHT DOT Women s Summer Dress Plisse Maxi Tube Bodycon Dress Back Tie Beach Resort Vacation", +"Vivresina UV Resin 400g (400.0", +"Wide Leg Pants High Waisted Pleated Trousers with 4 Colors", +"Osprey Daylite Shoulder Sling Bag – Compact Crossbody Backpack for Everyday Carry", +"Tote Bag for Women Large PVC Tote Bag Letters Print Plastic Handbag for Christmas Gift", +"Hello Kitty Giant Coloring & Activity Book 11x16", +"Skechers Mens Delson 3.0 - Roth 210606", +"3pcs Heart Badge Reel with Alligator Clip Cute Retractable Badge Holder Acrylic Nurse Badge Clip for Office Workers", +"Ortho Balance Hiking Shoes for Men Women", +"GOLDENMATE 1000VA/600W Lithium UPS Battery Backup and Surge Protector", +"Gelante Solid Color 100% Cotton Bucket Hat for Women and Men Packable Travel Summer Beach Hat", +"Sonic The Hedgehog 3 Movie Action Figures 2.5-Inch Movie Collector Toy Figure Multi-Pack Includes Sonic The Hedgehog Knuckles Shadow Buzz Bomber & Drone- Officially Licensed Toys", +"61 Pcs Nacho Libre Stickers Comedy Movie Graffiti Waterproof Vinyl for Adults for Birthday Party Supplies Decoration Favors for Water Bottles Laptop Suitcase Scrapbooking Choice", +"Neck Lift Tape", +"925 Sterling Silver Earrings for Womens Sparkly Colorful Full Diamond Simple Stylish Elegant Hypoallergenic Jewelry", +"Pink Ceramic Bow Vase for Flowers", +"Winter Coats For Men Winter Jackets Water Resistant Warm Thicken Parka Puffer Coat Long Down Jacket", +"Alarm Clocks for Bedrooms", +"KINURI Running Belt for Men & Women – Fits All Smartphones – Waterproof Waist Pack with Adjustable Strap – Ideal for Jogging", +"DREAM PAIRS Heels for Women Flip Flops Kitten Low Heels Open Square Toe Thong Heeled Sandals", +"Amazon Basics All Purpose Washable School Craft Liquid Glue for Making Slime", +"Inflatable Costume Adult Frog Full Body Deluxe Funny Air Blow Up Costume for Men Women Halloween", +"Mens Golf Pants Stretch Casual Dress Pants Elastic Drawstring Slacks for Men Lightweight Trousers with 5 Pockets", +"Lip Smacker Hello Kitty Lip Balm", +"Brown Sugar Keeper 3D – Terracotta Clay Bear Softener", +"MEETSUN Polarized Sunglasses for Women Men Trendy Classic Retro Designer Style", +"Corset Top Bustier Lingerie for Women Zipper Front Flower Sexy Burlesque Vintage", +"Pro Club Men s Heavyweight Mesh Basketball Shorts", +"Nike Tech Men s Full-Zip Windrunner Hoodie (HV0949-237", +"Ear Piercing Kit", +"Timberland Men s 6 Premium Boot", +"STAR WARS The Black Series Darth Maul", +"VZQI Halloween Cosplay Costumes Kamado Tanjir Kids Anime Kimono Halloween Green Cloak", +"Fringe Vest for Women Faux Suede Open Front Cardigan Sleeveless Tassels Fringed Vest Cardigan Hippie Jacket", +"Smart Health Ring 2.0 for Women Men", +"Fast Forward Kid s Licensed 15 Backpack With Lunch Box Combo Set (Hello Kitty)", +"Handmade Authentic Katana - 41-inch Full Tang Sharp Blade", +"Inateck Sling Bag X", +"EXLURA Women s Fashion Faux Wool Mini Skirt High Waisted Y2K Trendy Side Slit Tweed Plaid Skirts 2025 Fall Winter Outfits", +"LASLULU Womens Sexy Crossover Crop Top Long Sleeve Workout Tops Crewneck Athletic Yoga T-Shirts Fall Outfits", +"Wrangler Authentics Men s Classic Relaxed Fit Five Pocket Jean Short", +"ZeroBound Built in Bra Tank Tops for Women - High Neck Racerback Tank Tops", +"Nike Mens Air Max Alpha Trainer 6", +"MAZZERI Solid Gold Plated Sterling Silver Italian 1.3/1.6/2.2/2.8mm Diamond-Cut Braided Rope Chain Necklace for Men Women", +"Milumia Women s Polka Dots Twist Front Halter Top Dressy Casual Textured Peplum Going Out Tops", +"80s 90s Outfit for Women", +"EFAN Womens Sexy Sleeveless Double Lined Crop Tops Workout Cute Tight Racerback Tank Tops Summer Clothes Teen Girls 2025", +"Nike Mens Shorts Dri-Fit Flex Woven Shorts 7inch (US", +"top handle satchel Women", +"Kono Expandable Luggage 3 Piece Set Hardshell Lightweight 20in 24in 28in Carry On Suitcase with Spinner Wheels TSA Lock(Black & Brown)", +"Nations of The World | National Pride Flag Symbol Arms Tee Unisex T-Shirt for Men or Women", +"Jo & Bette Seamless Thongs for Women - High Waist Panties 6 Pack - Thong Underwear Pack Breathable No show Sports", +"eKids Disney Frozen 2 Bluetooth Headphones with Microphone", +"Arctix Kids Insulated Snow Bib Overalls", +"USA Flag Charlie Gift T-Shirt", +"CBKSUHBADE 15in×11in Anime One Piece Wanted Bounty Posters", +"Plus Size Underwear for Women XL-5XL Cotton High Waist Women Briefs Full Coverage Ladies Panties 4 Pack", +"Little Adventures Enchanted Rapunzel Dress-Up Costume for Adult Women", +"G Gradual Tennis Dress for Women Golf Outfits with Shorts and Pockets Sleeveless Active Exercise Athletic Dresses for Women", +"Pastoral Style Porch Goose Outfits", +"Vive Thigh High Compression Stockings for Women & Men - 15-20 mmHg Graduated Support Hose - Opaque Closed Toe Compression Tights - Stockings for Varicose Veins", +"Canada is Not for Sale Vintage Cotton Twill Cap", +"TomTiger Yoga Shorts for Women Tummy Control High Waist Biker Shorts Exercise Workout Butt Lifting Tights Women s Short Pants", +"4PCS GOD IS FIRST IM SECOND Bracelet: Faith Priority Bracelet - Engraved Cross Silicone Wristband for Daily Encouragement", +"Tahitian Black Pearl Pendant Necklace AAAA 18K White Gold Plated 925 Sterling Silver Black Pearl Jewelry Gift for Women Mother Wife Her for Anniversary Christmas Birthday", +"HOTOUCH Womens Short Sleeve Button Down Shirts Loose Fit V Neck Business Casual Blouses Summer Top with Pockets S-XXL", +"Men s Corduroy Short Sleeved Cargo Shirt Relaxed Fit Button Down Casual Wear Tops with Flap Pockets", +"Orange Blue Light Blocking Glasses for Better Sleep - 99.5% Premium Acetate Migraine Glasses for Women & Men", +"Disney Stitch Beach Towel for Kids Cotton Bath Towels with 2 Clothes Pins Travel Swimming Quick Dry Towel Beach Vacation Essentials", +"PGANDS Womens Crew Neck Solid/Color Block Sweatshirts Long Sleeve Casual Lightweight Pullover Tops", +"Premium Organic Whole Cloves 5.3 oz (150 grams)", +"habibee Bra for Women No Underwire Comfort Seamless Bras Push Up Wireless Bras Full Coverage Bralettes", +"Puma Mens Caven 2.0 Shoes", +"PRETTYGARDEN Women s Fall Button Down Shirts Dressy Casual Spring Long Puff Sleeve Eyelet Loose Fit Collared Blouse Top", +"TNNZEET 2 Pack Plus Size Biker Shorts for Women - 8 Black High Waisted Tummy Control Spandex Workout Shorts (XL-4XL)", +"Marvel Legends Series Captain America Shield", +"PAVOI 14K Gold AAA+ Handpicked White Freshwater Cultured Pearl Earrings Studs", +"Trendy Queen Long Skirts for Women Boho Maxi Skirt Winter Swing Tiered A-Line Elastic High Waist Dress with Pockets Fashion", +"Reebok Classic Leather Sneakers for Men", +"PRETTYGARDEN Women s Summer Bodycon Maxi Tube Dress Ribbed Strapless Side Slit Long Going Out Casual Elegant Party Dresses", +"Favorite Daughter Women s Classic Logo Baseball Cap", +"Reebok Men s Cotton Vital Fleece Sweatpant", +"COOFANDY Mens Hawaiian Shirt Short Sleeve Button Down Shirts Tropical Summer Beach Shirts Casual Floral Aloha Shirts", +"Columbia Mens Grander Marlin Iii Offshore Short", +"Satin One Shoulder Flower Girl Dress with Bow Wedding Princess Pageant Party Gown Puffy Formal First Communion", +"Nike Mens V5 RNR", +"Speed Cube 3x3", +"FOURSTEEDS Women s Cotton Zipper Front Multi-Pocket Twill Bermuda Women Cargo Shorts", +"Curly Hair Brush Defining", +"YQXCC Cooling Towels | 4 Pack 47x12 | Ice Cool for Neck | Microfiber Soft Breathable Chilly | for Yoga", +"Hot Wheels Toy Car Playset with Lights", +"Carhartt Men s Loose Fit Heavyweight Short-Sleeve Pocket Henley T-Shirt", +"Women s Mid-High Rise Ripped Denim Shorts Stretchy Distressed Jean Shorts with Pockets Folded Hem Casual Summer Jorts", +"Monster High Cleo De Nile Doll in Golden Blouse & Layered Skirt", +"Ariat Women’s Fatbaby Western Boot", +"UYYE Car Registration and Insurance Card Holder", +"365 by Whole Foods Market", +"Crystal Bracelet for Women Fashion 7 Inch Approximately Rainbow Sparkling Crystal Bracelet with Adjustable Elastic Cord", +"Samsung Galaxy Watch 7 (44mm) AI Smartwatch w/ 1.5 AMOLED", +"DOUKEN 4 Pair Sneaker Creases Protector", +"Elvis: The Legend music word search puzzle.: Great Country Music Word Scrambles about Elvis. Large print word puzzle for adults and rock music lovers. ... Great music gift for your friends or family.", +"Pinkfong Bebefinn Plush Toy - 12 (30cm) Stuffed Doll | Soft Cuddly Plush for Toddlers | Bebefinn Toy | Perfect Birthday", +"Thrusting Dildo Vibrator Sex Toys for Women", +"VANLOVEMAC Baseball Gifts for Boys 8-12 Baseball Stuff College Going Away Gifts Welcome Back to School Gifts Dorm Room Essentials for Guys Off to College", +"Hello Kitty and Friends - Cinnamoroll 12” Pink Monochrome Plush", +"BOBISUKA Pearl White Face Body Paint", +"OMKAGI 2 Piece Workout Sets for Women Halter Sports Bras Gym Sets Booty Leggings Outfits", +"Ivay Womens Scoop Neck Ribbed Knit Tank Top Sleeveless Cotton Wife Beater Camisole Shirts", +"SOLY HUX Women s Graphic Tee Shirts Novelty Funny Short Sleeve Summer Casual Tops", +"Wooden Taper Candle Holders: Wood Candlestick Holders Rustic Brown Farmhouse Fall Decor for Living Room Dinning Table Centerpiece Christmas Set of 2", +"PRETTYGARDEN Long Sleeve Shirts for Women 2025 Fall V Neck Waffle Basic Tee Dressy Casual Winter Blouses Knit Tunic Tops", +"Ray-Ban RB2140 Original Wayfarer Square Sunglasses", +"Lee Womens Ultra Lux Comfort with Flex-to-go Utility Skimmer Capri Pant", +"3D Pedometer for Walking", +"HiiFeuer Medieval Faux Leather Chest Armor", +"Pet Deadly Dog Costume", +"Western Chief Kids Freestyle Neoprene Outdoor Boot", +"SKECHERS Women s Ultra Flex 3.0-Brilliant Path Hands Free Slip-INS Sneaker", +"LUOBO Keychain Accessory Decor Keychain Decoration backpacks Bag Pendant", +"10inch Teddy Bear Stuffed Animal", +"Halloweentown University T-Shirt for Women Fall Pumpkin Shirts Funny Halloween Thanksgiving Gift Tops", +"Women s Sexy American Flag Crop Tank 4th of July Patriotic Sleeveless Tee Tops", +"Gillette Fusion5 ProGlide Men s Razor Blade Refills", +"Poppy Playtime - Mommy Long Legs Plush (14 Medium Plush", +"Women’s Heated Vest with 12V 20000mAh Battery – Cropped Stand Collar Lightweight Insulated Winter Vest.", +"toolant Winter Work Gloves for Men", +"192Pcs Halloween Favors Stationery Gift Set", +"20 Pcs Ultra Thin Tattoo Cover up Patch Waterproof Tattoo Cover up Tape Sweatproof Tattoos Covers Patches Cuttable Invisible Non-Woven Fabric Patches for Tattoos Scar Birthmark 4.72×3.35In(Light Skin)", +"Popcorns Maker", +"Paladone Kuromi GloBuddies Night Light", +"Creativity for Kids Sensory Minis Dinosaur Kit | Cloud Clay Sensory Toy for Toddlers | Squish", +"Mouse Ears Headband Fully Sewn Sturdy Headbands 2-Pcs, 4.6-Inch Sequin Big Ears 3D Silk Satin Bowknot Suitable for Women and Girls Theme Role Play Costume Accessories Party", +"Tanluhu Sweatbands Sport Headbands for Men & Women", +"Pilates Reformer Machine", +"Fossil Fenmore Analog Men Watch", +"Stray Kids Official Lightstick Ver 2", +"Zima Dental Pod PRO: New Ultrasonic Retainer Cleaner Machine. Market-Leading", +"2300pcs Polymer Clay Beads Bracelet Making Kit", +"AI ACCESSORY INNOVATIONS Bluey 4 Piece Backpack Set for Pre School Girls & Boys", +"MIRITY Women s High Waist Cotton Underwear - Soft Full Coverage Briefs with Double-Layer Waistedband", +"Plus Size Summer Dresses - Floral Beach Wedding Guest Semi Formal Tiered Flowy Long Sundress", +"AUTOMET Womens Tops Summer Sweater Long Tunic Dressy Casual Blouses Business Cute Trendy Short Sleeve Shirt 2025", +"Black Sabbath Sketch Band T-Shirt", +"Loomie Upgraded 6 Drawer White Dresser for Bedroom", +"Michael Kors Womens Zuma Trainer", +"Chunky Silver Bohemian Flower Bracelet For Wemen Men", +"Classic Black Western Felt Roll Up Brim Cowboy and Cowgirl Hat for Women and Men - Decoration with Western Belt Bukle", +"Jellycat Little Pig Bag Charm", +"LARNMERN Steel Toe Work Boots Men", +"3PCS Gold Hair Ties", +"Red Kap Men s Snap Front Cotton Coverall", +"Citizen Quartz Mens Watch", +"ATHMILE Long Sleeve Shirts for Women Tunic Fall Tops Loose Fit Dressy Crew Neck Basic Sweaters 2025", +"Narecte Summer Maxi Dresses for Women Back Strap Beach Dress Women s Casual Dress Long Flowy Dresses for Vacation", +"LIDHAY Cowboy Hat for Women and Men Western Cowgirl Hats Suede Cowboy Hat for Rodeo", +"BIC Classic Maxi Pocket Lighter", +"A + S Luxxe Diaper Bag Tote – Stylish", +"100pack Name Badge Holders Name Tag Holder Clear Plastic Badge Holder ID Holders for Lanyard (100Pcs Vertical)", +"MOOSEA Christmas Gifts for Women Wife - Love Knot Moissanite Necklace 1-3ct D Color VVS1 Clarity Moissanite 925 Sterling Silver Necklace Anniversary Birthday Gifts for Women Wife Mom Girlfriend", +"Solid Wood Retangle End Table with Drawer and Storage Shelf", +"Madden Girl womens Beella Heeled SandalHeeled Sandal", +"Ekouaer 2 Pack Womens Pajama Sets Short Sleeve Sleepwear Soft Crew Neck Pj Shorts Set Printed Loungewear Set S-XXL", +"NPQQUAN Original Classic Low Profile Baseball Cap Golf Dad Hat Adjustable Cotton Hats Men Women Unconstructed Plain Cap", +"YEOREO Women Workout Biker Shorts Impact 4.5 No Front Seam Hidden Scrunch Lifting Seamless Yoga Gym Shorts", +"Merino Wool Underwear Men by Thermowave - Sport & Everyday Men s Merino Wool Boxer Brief - 150 GSM Stretchy & Soft", +"COACH Women s Leah Platform Loafers", +"Doodle Me Happy Kids Thank You Cards - 25 Cards With Envelopes - Cute", +"Spring Summer Women Pleated Casual Denim V Neck Ruffle Sleeve Dress Light Blue XL", +"Disney Hooded Matching Family Cosplay T-Shirt Infant to Adult Sizes (12 Months - 2XL)", +"Leather CPR Cleaner & Conditioner 18oz - Cleans", +"Baseball Shirts Women Baseball Mom Tshirt Baseball Heart Graphic Tee Game Day Gifts Funny Short Sleeve Tops", +"4 Pack Cooling Towels", +"ZEEPORTE Mask Fin Snorkel Set", +"60 Pcs Bride Tribe Bachelorette Party Favors Bulk Friendship Bridesmaid Gifts 12 Set Friendship Bracelets Heart Sunglasses Satin Scrunchie for Engagement Bridal Shower Wedding Favor", +"AUSELILY Summer Dress Sundress Beach Cover up Swing Dresses", +"Loungefly Disney Minnie Mouse Crossbody Satchel Handbag", +"Tactical Gym Bag for Men,50L Large 3 in 1 Sports Duffle Bag with Shoes Compartment for Travel", +"YETI Rambler 42 oz Tumbler with Handle and Straw Lid", +"Samsonite Classic Leather Slim Backpack", +"Vive Thigh High Compression Stockings for Women & Men - 15-20 mmHg Graduated Support Hose - Opaque Closed Toe Compression Tights - Stockings for Varicose Veins", +"Canada is Not for Sale Vintage Cotton Twill Cap", +"TomTiger Yoga Shorts for Women Tummy Control High Waist Biker Shorts Exercise Workout Butt Lifting Tights Women s Short Pants", +"4PCS GOD IS FIRST IM SECOND Bracelet: Faith Priority Bracelet - Engraved Cross Silicone Wristband for Daily Encouragement", +"Tahitian Black Pearl Pendant Necklace AAAA 18K White Gold Plated 925 Sterling Silver Black Pearl Jewelry Gift for Women Mother Wife Her for Anniversary Christmas Birthday", +"HOTOUCH Womens Short Sleeve Button Down Shirts Loose Fit V Neck Business Casual Blouses Summer Top with Pockets S-XXL", +"Men s Corduroy Short Sleeved Cargo Shirt Relaxed Fit Button Down Casual Wear Tops with Flap Pockets", +"Orange Blue Light Blocking Glasses for Better Sleep - 99.5% Premium Acetate Migraine Glasses for Women & Men", +"Disney Stitch Beach Towel for Kids Cotton Bath Towels with 2 Clothes Pins Travel Swimming Quick Dry Towel Beach Vacation Essentials", +"PGANDS Womens Crew Neck Solid/Color Block Sweatshirts Long Sleeve Casual Lightweight Pullover Tops", +"Premium Organic Whole Cloves 5.3 oz (150 grams)", +"habibee Bra for Women No Underwire Comfort Seamless Bras Push Up Wireless Bras Full Coverage Bralettes", +"Puma Mens Caven 2.0 Shoes", +"PRETTYGARDEN Women s Fall Button Down Shirts Dressy Casual Spring Long Puff Sleeve Eyelet Loose Fit Collared Blouse Top", +"TNNZEET 2 Pack Plus Size Biker Shorts for Women - 8 Black High Waisted Tummy Control Spandex Workout Shorts (XL-4XL)", +"Marvel Legends Series Captain America Shield", +"PAVOI 14K Gold AAA+ Handpicked White Freshwater Cultured Pearl Earrings Studs", +"Trendy Queen Long Skirts for Women Boho Maxi Skirt Winter Swing Tiered A-Line Elastic High Waist Dress with Pockets Fashion", +"Reebok Classic Leather Sneakers for Men", +"PRETTYGARDEN Women s Summer Bodycon Maxi Tube Dress Ribbed Strapless Side Slit Long Going Out Casual Elegant Party Dresses", +"Favorite Daughter Women s Classic Logo Baseball Cap", +"Reebok Men s Cotton Vital Fleece Sweatpant", +"COOFANDY Mens Hawaiian Shirt Short Sleeve Button Down Shirts Tropical Summer Beach Shirts Casual Floral Aloha Shirts", +"Columbia Mens Grander Marlin Iii Offshore Short", +"Satin One Shoulder Flower Girl Dress with Bow Wedding Princess Pageant Party Gown Puffy Formal First Communion", +"Nike Mens V5 RNR", +"Speed Cube 3x3", +"FOURSTEEDS Women s Cotton Zipper Front Multi-Pocket Twill Bermuda Women Cargo Shorts", +"Curly Hair Brush Defining", +"YQXCC Cooling Towels | 4 Pack 47x12 | Ice Cool for Neck | Microfiber Soft Breathable Chilly | for Yoga", +"Hot Wheels Toy Car Playset with Lights", +"Carhartt Men s Loose Fit Heavyweight Short-Sleeve Pocket Henley T-Shirt", +"Women s Mid-High Rise Ripped Denim Shorts Stretchy Distressed Jean Shorts with Pockets Folded Hem Casual Summer Jorts", +"Monster High Cleo De Nile Doll in Golden Blouse & Layered Skirt", +"Ariat Women’s Fatbaby Western Boot", +"UYYE Car Registration and Insurance Card Holder", +"365 by Whole Foods Market", +"Crystal Bracelet for Women Fashion 7 Inch Approximately Rainbow Sparkling Crystal Bracelet with Adjustable Elastic Cord", +"Samsung Galaxy Watch 7 (44mm) AI Smartwatch w/ 1.5 AMOLED", +"DOUKEN 4 Pair Sneaker Creases Protector", +"Elvis: The Legend music word search puzzle.: Great Country Music Word Scrambles about Elvis. Large print word puzzle for adults and rock music lovers. ... Great music gift for your friends or family.", +"Pinkfong Bebefinn Plush Toy - 12 (30cm) Stuffed Doll | Soft Cuddly Plush for Toddlers | Bebefinn Toy | Perfect Birthday", +"Thrusting Dildo Vibrator Sex Toys for Women", +"VANLOVEMAC Baseball Gifts for Boys 8-12 Baseball Stuff College Going Away Gifts Welcome Back to School Gifts Dorm Room Essentials for Guys Off to College", +"Hello Kitty and Friends - Cinnamoroll 12” Pink Monochrome Plush", +"BOBISUKA Pearl White Face Body Paint", +"OMKAGI 2 Piece Workout Sets for Women Halter Sports Bras Gym Sets Booty Leggings Outfits", +"Ivay Womens Scoop Neck Ribbed Knit Tank Top Sleeveless Cotton Wife Beater Camisole Shirts", +"SOLY HUX Women s Graphic Tee Shirts Novelty Funny Short Sleeve Summer Casual Tops", +"Wooden Taper Candle Holders: Wood Candlestick Holders Rustic Brown Farmhouse Fall Decor for Living Room Dinning Table Centerpiece Christmas Set of 2", +"PRETTYGARDEN Long Sleeve Shirts for Women 2025 Fall V Neck Waffle Basic Tee Dressy Casual Winter Blouses Knit Tunic Tops", +"Ray-Ban RB2140 Original Wayfarer Square Sunglasses", +"Lee Womens Ultra Lux Comfort with Flex-to-go Utility Skimmer Capri Pant", +"3D Pedometer for Walking", +"HiiFeuer Medieval Faux Leather Chest Armor", +"Pet Deadly Dog Costume", +"Western Chief Kids Freestyle Neoprene Outdoor Boot", +"SKECHERS Women s Ultra Flex 3.0-Brilliant Path Hands Free Slip-INS Sneaker", +"LUOBO Keychain Accessory Decor Keychain Decoration backpacks Bag Pendant", +"10inch Teddy Bear Stuffed Animal", +"Halloweentown University T-Shirt for Women Fall Pumpkin Shirts Funny Halloween Thanksgiving Gift Tops", +"Women s Sexy American Flag Crop Tank 4th of July Patriotic Sleeveless Tee Tops", +"Gillette Fusion5 ProGlide Men s Razor Blade Refills", +"Poppy Playtime - Mommy Long Legs Plush (14 Medium Plush", +"Women’s Heated Vest with 12V 20000mAh Battery – Cropped Stand Collar Lightweight Insulated Winter Vest.", +"toolant Winter Work Gloves for Men", +"192Pcs Halloween Favors Stationery Gift Set", +"20 Pcs Ultra Thin Tattoo Cover up Patch Waterproof Tattoo Cover up Tape Sweatproof Tattoos Covers Patches Cuttable Invisible Non-Woven Fabric Patches for Tattoos Scar Birthmark 4.72×3.35In(Light Skin)", +"Popcorns Maker", +"Paladone Kuromi GloBuddies Night Light", +"Creativity for Kids Sensory Minis Dinosaur Kit | Cloud Clay Sensory Toy for Toddlers | Squish", +"Mouse Ears Headband Fully Sewn Sturdy Headbands 2-Pcs, 4.6-Inch Sequin Big Ears 3D Silk Satin Bowknot Suitable for Women and Girls Theme Role Play Costume Accessories Party", +"Tanluhu Sweatbands Sport Headbands for Men & Women", +"Pilates Reformer Machine", +"Fossil Fenmore Analog Men Watch", +"Stray Kids Official Lightstick Ver 2", +"Zima Dental Pod PRO: New Ultrasonic Retainer Cleaner Machine. Market-Leading", +"2300pcs Polymer Clay Beads Bracelet Making Kit", +"AI ACCESSORY INNOVATIONS Bluey 4 Piece Backpack Set for Pre School Girls & Boys", +"MIRITY Women s High Waist Cotton Underwear - Soft Full Coverage Briefs with Double-Layer Waistedband", +"Plus Size Summer Dresses - Floral Beach Wedding Guest Semi Formal Tiered Flowy Long Sundress", +"AUTOMET Womens Tops Summer Sweater Long Tunic Dressy Casual Blouses Business Cute Trendy Short Sleeve Shirt 2025", +"Black Sabbath Sketch Band T-Shirt", +"Loomie Upgraded 6 Drawer White Dresser for Bedroom", +"Michael Kors Womens Zuma Trainer", +"Chunky Silver Bohemian Flower Bracelet For Wemen Men", +"Classic Black Western Felt Roll Up Brim Cowboy and Cowgirl Hat for Women and Men - Decoration with Western Belt Bukle", +"Jellycat Little Pig Bag Charm", +"LARNMERN Steel Toe Work Boots Men", +"3PCS Gold Hair Ties", +"Red Kap Men s Snap Front Cotton Coverall", +"Citizen Quartz Mens Watch", +"ATHMILE Long Sleeve Shirts for Women Tunic Fall Tops Loose Fit Dressy Crew Neck Basic Sweaters 2025", +"Narecte Summer Maxi Dresses for Women Back Strap Beach Dress Women s Casual Dress Long Flowy Dresses for Vacation", +"LIDHAY Cowboy Hat for Women and Men Western Cowgirl Hats Suede Cowboy Hat for Rodeo", +"BIC Classic Maxi Pocket Lighter", +"A + S Luxxe Diaper Bag Tote – Stylish", +"100pack Name Badge Holders Name Tag Holder Clear Plastic Badge Holder ID Holders for Lanyard (100Pcs Vertical)", +"MOOSEA Christmas Gifts for Women Wife - Love Knot Moissanite Necklace 1-3ct D Color VVS1 Clarity Moissanite 925 Sterling Silver Necklace Anniversary Birthday Gifts for Women Wife Mom Girlfriend", +"Solid Wood Retangle End Table with Drawer and Storage Shelf", +"Madden Girl womens Beella Heeled SandalHeeled Sandal", +"Ekouaer 2 Pack Womens Pajama Sets Short Sleeve Sleepwear Soft Crew Neck Pj Shorts Set Printed Loungewear Set S-XXL", +"NPQQUAN Original Classic Low Profile Baseball Cap Golf Dad Hat Adjustable Cotton Hats Men Women Unconstructed Plain Cap", +"YEOREO Women Workout Biker Shorts Impact 4.5 No Front Seam Hidden Scrunch Lifting Seamless Yoga Gym Shorts", +"Merino Wool Underwear Men by Thermowave - Sport & Everyday Men s Merino Wool Boxer Brief - 150 GSM Stretchy & Soft", +"COACH Women s Leah Platform Loafers", +"Doodle Me Happy Kids Thank You Cards - 25 Cards With Envelopes - Cute", +"Spring Summer Women Pleated Casual Denim V Neck Ruffle Sleeve Dress Light Blue XL", +"Disney Hooded Matching Family Cosplay T-Shirt Infant to Adult Sizes (12 Months - 2XL)", +"Leather CPR Cleaner & Conditioner 18oz - Cleans", +"Baseball Shirts Women Baseball Mom Tshirt Baseball Heart Graphic Tee Game Day Gifts Funny Short Sleeve Tops", +"4 Pack Cooling Towels", +"ZEEPORTE Mask Fin Snorkel Set", +"60 Pcs Bride Tribe Bachelorette Party Favors Bulk Friendship Bridesmaid Gifts 12 Set Friendship Bracelets Heart Sunglasses Satin Scrunchie for Engagement Bridal Shower Wedding Favor", +"AUSELILY Summer Dress Sundress Beach Cover up Swing Dresses", +"Loungefly Disney Minnie Mouse Crossbody Satchel Handbag", +"Tactical Gym Bag for Men,50L Large 3 in 1 Sports Duffle Bag with Shoes Compartment for Travel", +"YETI Rambler 42 oz Tumbler with Handle and Straw Lid", +"Samsonite Classic Leather Slim Backpack", +"Fabletics Men s Only Short", +"3pcs Heart Badge Reel with Alligator Clip Cute Retractable Badge Holder Acrylic Nurse Badge Clip for Office Workers", +"Ortho Balance Hiking Shoes for Men Women", +"GOLDENMATE 1000VA/600W Lithium UPS Battery Backup and Surge Protector", +"Gelante Solid Color 100% Cotton Bucket Hat for Women and Men Packable Travel Summer Beach Hat", +"Sonic The Hedgehog 3 Movie Action Figures 2.5-Inch Movie Collector Toy Figure Multi-Pack Includes Sonic The Hedgehog Knuckles Shadow Buzz Bomber & Drone- Officially Licensed Toys", +"61 Pcs Nacho Libre Stickers Comedy Movie Graffiti Waterproof Vinyl for Adults for Birthday Party Supplies Decoration Favors for Water Bottles Laptop Suitcase Scrapbooking Choice", +"Neck Lift Tape", +"925 Sterling Silver Earrings for Womens Sparkly Colorful Full Diamond Simple Stylish Elegant Hypoallergenic Jewelry", +"Pink Ceramic Bow Vase for Flowers", +"Winter Coats For Men Winter Jackets Water Resistant Warm Thicken Parka Puffer Coat Long Down Jacket", +"Alarm Clocks for Bedrooms", +"KINURI Running Belt for Men & Women – Fits All Smartphones – Waterproof Waist Pack with Adjustable Strap – Ideal for Jogging", +"DREAM PAIRS Heels for Women Flip Flops Kitten Low Heels Open Square Toe Thong Heeled Sandals", +"Amazon Basics All Purpose Washable School Craft Liquid Glue for Making Slime", +"Inflatable Costume Adult Frog Full Body Deluxe Funny Air Blow Up Costume for Men Women Halloween", +"Mens Golf Pants Stretch Casual Dress Pants Elastic Drawstring Slacks for Men Lightweight Trousers with 5 Pockets", +"Lip Smacker Hello Kitty Lip Balm", +"Brown Sugar Keeper 3D – Terracotta Clay Bear Softener", +"MEETSUN Polarized Sunglasses for Women Men Trendy Classic Retro Designer Style", +"Corset Top Bustier Lingerie for Women Zipper Front Flower Sexy Burlesque Vintage", +"Pro Club Men s Heavyweight Mesh Basketball Shorts", +"Nike Tech Men s Full-Zip Windrunner Hoodie (HV0949-237", +"Ear Piercing Kit", +"Timberland Men s 6 Premium Boot", +"Nike Air Rift", +"Portable Hookah Set for Travel - Premium Handheld Glass Aluminum Mini Hookah Real Metal Accessories", +"Clear Backpack for Boys", +"Women’s Knee High Boots Round Toe Chunky Heel Faux Leather Tall Riding Boots with Side Zipper", +"Golf Grip Trainer & Connection Band 2Set", +"Monster High Self Scare Day Cleo De Nile Doll Play Set", +"Fortnite eGift Card - Powered by the Epic Games Store", +"Mesh Beach Bags", +"Crowye Anime Cosplay Costume for Halloween Princess Costume Accessories Anime White Cosplay Wig Egypt Arm Cuff Bracelet Gold Earrings Greek Goddess Set for Halloween Dress up Princess", +"Premium Women s Leather Tote Handbag - Bag for Everyday Use", +"Ekouaer Maternity Nursing Gown and Robe Set Labor Delivery Nursing Nightgowns for Breastfeeding Pregnancy Clothes", +"Superband Mermaid Tails for Swimming for Women and Adults Without Monofin", +"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops" + ], + "top_n":386, + "normalize": true + }' + +end=$(date +%s%N) # 结束时间,纳秒级 +duration=$(( (end - start) / 1000000 )) # 转换为毫秒 +echo "Command took $duration milliseconds." + + diff --git a/bb.sh b/bb.sh new file mode 100644 index 0000000..023590d --- /dev/null +++ b/bb.sh @@ -0,0 +1,35 @@ +#!/bin/bash +start=$(date +%s%N) # 开始时间,纳秒级 + +time curl -X POST "http://localhost:6007/rerank" \ + -H "Content-Type: application/json" \ + -d '{ + "query": "健身女生T恤短袖", + "docs": [ "60 Jelly Bracelets 80 s Adult Size - MAQIHAN Neon Gummy Bracelets for Women 80s Jelly Bangles Glow Silicone Bands Jewelry Wristband Rainbow Jellies Bangle Girls Boys Colored Accessories Party Favor", +"FITORY Mens Sandals", +"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", +"Merrell Mens Hydro Moc", +"FITORY Mens Sandals", +"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", +"Merrell Mens Hydro Moc", + +"FITORY Mens Sandals", +"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", +"Merrell Mens Hydro Moc", + + +"FITORY Mens Sandals", +"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", +"Merrell Mens Hydro Moc", +Superband Mermaid Tails for Swimming for Women and Adults Without Monofin", +"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops" + ], + "top_n":386, + "normalize": true + }' + +end=$(date +%s%N) # 结束时间,纳秒级 +duration=$(( (end - start) / 1000000 )) # 转换为毫秒 +echo "Command took $duration milliseconds." + + diff --git a/config/config.yaml b/config/config.yaml index 04a54a6..c1cacb7 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -275,26 +275,26 @@ services: max_docs: 1000 normalize: true # 服务内后端(reranker 进程启动时读取) - backend: "qwen3_vllm" # bge | qwen3_vllm | qwen3_transformers | dashscope_rerank + backend: "qwen3_transformers" # bge | qwen3_vllm | qwen3_transformers | dashscope_rerank backends: bge: model_name: "BAAI/bge-reranker-v2-m3" device: null use_fp16: true batch_size: 64 - max_length: 512 + max_length: 160 cache_dir: "./model_cache" enable_warmup: true qwen3_vllm: model_name: "Qwen/Qwen3-Reranker-0.6B" engine: "vllm" - max_model_len: 256 + max_model_len: 160 tensor_parallel_size: 1 gpu_memory_utilization: 0.36 dtype: "float16" enable_prefix_caching: true enforce_eager: false - infer_batch_size: 64 + infer_batch_size: 100 sort_by_doc_length: true length_sort_mode: "char" # char | token instruction: "rank products by given query" diff --git a/docs/TODO.txt b/docs/TODO.txt index f31c378..873a169 100644 --- a/docs/TODO.txt +++ b/docs/TODO.txt @@ -1,10 +1,23 @@ -增加意图识别模块 +@reranker/backends/qwen3_vllm.py 单次 generate 前有进程内锁,同一进程里不会并行多路 vLLM 推理,这个锁有必要吗?是否会影响性能?是否能够打开,使得性能更好?比如这个场景,我一次请求 400 条,分成每64个一个batch,基于我现在的gpu配置,可以再提高并发度吗? +测试了,让每个批次都并发地进行,耗时没有变化 + +增加款式意图识别模块 + +意图类型: 颜色,尺寸(目前只需要支持这两种) + +意图召回层: +每种意图,有一个召回词集合 +对query(包括原始query、各种翻译query 都做匹配) + +意图识别层: +如果召回 判断有款式需求, + + 是否有: 颜色需求 尺码需求 - 如果有: 先做sku筛选,然后把最优的拼接到名称中,参与reranker。 diff --git a/docs/suggestion索引构建.md b/docs/suggestion索引构建.md index 5b56890..0f3b4d5 100644 --- a/docs/suggestion索引构建.md +++ b/docs/suggestion索引构建.md @@ -96,14 +96,15 @@ "lang": { "type": "keyword" }, "text": { "type": "keyword" }, // 显示给用户的原始文案 "text_norm": { "type": "keyword" }, // 归一化后文本:小写+空白规整 - "sources": { "type": "keyword" }, // 来源集合:["title", "qanchor", "query_log"] + "sources": { "type": "keyword" }, // 来源集合:["title", "qanchor", "tag", "query_log"] "title_doc_count": { "type": "integer" }, "qanchor_doc_count": { "type": "integer" }, + "tag_doc_count": { "type": "integer" }, "query_count_7d": { "type": "integer" }, "query_count_30d": { "type": "integer" }, "rank_score": { "type": "float" }, // 排序打分 "lang_confidence": { "type": "float" }, - "lang_source": { "type": "keyword" }, // 语言来源:log_field / request_params / script / default + "lang_source": { "type": "keyword" }, // 语言来源:log_field / request_params / detector / fallback / default "lang_conflict": { "type": "boolean" }, // 是否存在多来源语言冲突 "status": { "type": "byte" }, // 1 = 有效 "updated_at": { "type": "date" }, @@ -166,9 +167,9 @@ #### 4. 构建候选词 -##### 4.1 从商品索引收集 title / qanchors(Step 1) +##### 4.1 从商品索引收集 title / qanchors / tags(Step 1) - - 遍历店铺的所有商品:获取每个商品的 `"spu_id"`, `"title"`, `"qanchors"` 3个字段(按`spu_id`升序) + - 遍历店铺的所有商品:获取每个商品的 `"spu_id"`, `"title"`, `"qanchors"`, `"tags"`(按 `spu_id`、`id.keyword` 升序,便于 `search_after` 稳定分页) - 对每个商品文档: @@ -216,6 +217,11 @@ - `text_norm = _normalize_text(q_text)`,再用 `_looks_noise` 过滤 - 同样按 `(lang, text_norm)` 合并为 `SuggestionCandidate`,调用 `add_product("qanchor", spu_id=product_id)`。 + 4. **tags 处理**(与 `index_languages` 循环并列,每个商品只做一次): + - `tags` 可为字符串数组,或逗号等分隔的单个字符串;经 `_iter_product_tags` 展开为若干条。 + - 每条 tag **无语言字段**:使用 `query.query_parser.detect_text_language_for_suggestions`(与 `QueryParser` 相同的 `LanguageDetector`)判定语言,并约束在租户的 `index_languages` 内。 + - 通过 `_looks_noise` 后按 `(detected_lang, text_norm)` 合并,调用 `add_product("tag", spu_id=product_id)`。 + ##### 4.2 从查询日志收集用户 query(Step 2) 对应 `_iter_query_log_rows` 与 `_build_full_candidates` 的后半段。 @@ -284,6 +290,7 @@ 1.8 \cdot \log(1 + query\_count\_{30d}) + 1.2 \cdot \log(1 + query\_count\_{7d}) + 1.0 \cdot \log(1 + qanchor\_doc\_count) + + 0.85 \cdot \log(1 + tag\_doc\_count) + 0.6 \cdot \log(1 + title\_doc\_count) \] @@ -423,6 +430,7 @@ - `sources = ["query_log"]` - `title_doc_count = 0` - `qanchor_doc_count = 0` + - `tag_doc_count = 0` - `completion..input = [text]` - `completion..weight = int(max(rank_score, 1.0) * 100)` - `sat. = text` @@ -446,7 +454,7 @@ - 将 `"query_log"` 加入 `sources` - `lang_conflict` 与 `params.lang_conflict` 取或 - 若 `params.lang_confidence > ctx._source.lang_confidence` 则更新 `lang_confidence` 和 `lang_source` - - 基于更新后的 `query_count_7d/30d` + `qanchor_doc_count` + `title_doc_count` 重新计算 `rank_score` + - 基于更新后的 `query_count_7d/30d` + `qanchor_doc_count` + `tag_doc_count` + `title_doc_count` 重新计算 `rank_score` - `status = 1` - `updated_at = params.now_iso` - 同步更新 `text / lang / text_norm` @@ -502,3 +510,9 @@ - 若是 `"zh_tw"` / `"pt_br"` → 保留全量 - 其他 → 取 `_` 前缀(例如 `"en_US"` → `"en"`) +#### 4. 查询日志 / tag 的语言回退 `_resolve_query_language` 与 `detect_text_language_for_suggestions` + +- 日志语言优先级不变:`language` 字段 → `request_params.language` → **语言检测**。 +- 检测实现为 `query.query_parser.detect_text_language_for_suggestions`:内部使用与 `QueryParser` 相同的 `LanguageDetector`(`query/language_detector.py`),并将结果约束到租户 `index_languages`(含 `zh_tw` 等与检测码的 base 匹配)。 +- 在线联想:`SuggestionService` 在合并 completion 与 SAT 结果后,按 `ES_score × (1 / sqrt(词元数))` 排序(词元算法与 `simple_tokenize_query` 一致),再以 `rank_score` 作次要键,减轻长标题/长短语相对短词根的压制不足问题。 + diff --git a/mappings/search_suggestions.json b/mappings/search_suggestions.json index 7d52daf..f414066 100644 --- a/mappings/search_suggestions.json +++ b/mappings/search_suggestions.json @@ -27,6 +27,7 @@ "sources": { "type": "keyword" }, "title_doc_count": { "type": "integer" }, "qanchor_doc_count": { "type": "integer" }, + "tag_doc_count": { "type": "integer" }, "query_count_7d": { "type": "integer" }, "query_count_30d": { "type": "integer" }, "rank_score": { "type": "float" }, diff --git a/query/query_parser.py b/query/query_parser.py index e09202c..dfcdeda 100644 --- a/query/query_parser.py +++ b/query/query_parser.py @@ -4,7 +4,7 @@ Query parser - main module for query processing. Handles query rewriting, translation, and embedding generation. """ -from typing import Dict, List, Optional, Any, Union +from typing import Dict, List, Optional, Any, Union, Tuple import numpy as np import logging import re @@ -23,6 +23,20 @@ try: except Exception: # pragma: no cover hanlp = None + +def simple_tokenize_query(text: str) -> List[str]: + """ + Lightweight tokenizer for suggestion length / analysis (aligned with QueryParser fallback). + + - Consecutive CJK characters form one token + - Latin / digit runs (with internal hyphens) form tokens + """ + if not text: + return [] + pattern = re.compile(r"[\u4e00-\u9fff]+|[A-Za-z0-9_]+(?:-[A-Za-z0-9_]+)*") + return pattern.findall(text) + + class ParsedQuery: """Container for parsed query results.""" @@ -173,16 +187,7 @@ class QueryParser: return config.query_config.default_translation_model def _simple_tokenize(self, text: str) -> List[str]: - """ - Lightweight tokenizer fallback. - - - Groups consecutive CJK chars as a token - - Groups consecutive latin/digits/underscore/dash as a token - """ - if not text: - return [] - pattern = re.compile(r"[\u4e00-\u9fff]+|[A-Za-z0-9_]+(?:-[A-Za-z0-9_]+)*") - return pattern.findall(text) + return simple_tokenize_query(text) def _extract_keywords(self, query: str) -> str: """Extract keywords (nouns with length > 1) from query.""" @@ -636,3 +641,63 @@ class QueryParser: queries.append(translation) return queries + + +def detect_text_language_for_suggestions( + text: str, + *, + index_languages: Optional[List[str]] = None, + primary_language: str = "en", +) -> Tuple[str, float, str]: + """ + Language detection for short strings (mixed-language tags, query-log fallback). + + Uses the same ``LanguageDetector`` as :class:`QueryParser`. Returns a language + code present in ``index_languages`` when possible, otherwise the tenant primary. + + Returns: + (lang, confidence, source) where source is ``detector``, ``fallback``, or ``default``. + """ + langs_list = [x for x in (index_languages or []) if x] + langs_set = set(langs_list) + + def _norm_lang(raw: Optional[str]) -> Optional[str]: + if not raw: + return None + token = str(raw).strip().lower().replace("-", "_") + if not token: + return None + if token in {"zh_tw", "pt_br"}: + return token + return token.split("_")[0] + + primary = _norm_lang(primary_language) or "en" + if primary not in langs_set and langs_list: + primary = _norm_lang(langs_list[0]) or langs_list[0] + + if not text or not str(text).strip(): + return primary, 0.0, "default" + + raw_code = LanguageDetector().detect(str(text).strip()) + if not raw_code or raw_code == "unknown": + return primary, 0.35, "default" + + def _index_lang_base(cand: str) -> str: + t = str(cand).strip().lower().replace("-", "_") + return t.split("_")[0] if t else "" + + def _resolve_index_lang(code: str) -> Optional[str]: + if code in langs_set: + return code + for cand in langs_list: + if _index_lang_base(cand) == code: + return cand + return None + + if langs_list: + resolved = _resolve_index_lang(raw_code) + if resolved is None: + return primary, 0.5, "fallback" + return resolved, 0.92, "detector" + + return raw_code, 0.92, "detector" diff --git a/reranker/bge_reranker.py b/reranker/bge_reranker.py index 26a3590..221818a 100644 --- a/reranker/bge_reranker.py +++ b/reranker/bge_reranker.py @@ -15,7 +15,7 @@ import time from typing import Any, Dict, List, Optional, Tuple import torch -from modelscope import AutoModelForSequenceClassification, AutoTokenizer +from transformers import AutoModelForSequenceClassification, AutoTokenizer logger = logging.getLogger("reranker.core") diff --git a/scripts/benchmark_reranker_random_titles.py b/scripts/benchmark_reranker_random_titles.py new file mode 100755 index 0000000..ef8319c --- /dev/null +++ b/scripts/benchmark_reranker_random_titles.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +""" +Single-request rerank latency probe using real title lines (e.g. 1.8w export). + +Randomly samples N titles from a text file (one title per line), POSTs to the +rerank HTTP API, prints wall-clock latency. + +Supports multiple N values (comma-separated) and multiple repeats per N. + +Example: + source activate.sh + python scripts/benchmark_reranker_random_titles.py 386 + python scripts/benchmark_reranker_random_titles.py 40,80,100 + python scripts/benchmark_reranker_random_titles.py 40,80,100 --repeat 3 --seed 42 + RERANK_BASE=http://127.0.0.1:6007 python scripts/benchmark_reranker_random_titles.py 200 +""" + +from __future__ import annotations + +import argparse +import json +import os +import random +import statistics +import sys +import time +from pathlib import Path +from typing import List, Optional, Tuple + +import httpx + + +def _load_titles(path: Path) -> List[str]: + lines: List[str] = [] + with path.open(encoding="utf-8", errors="replace") as f: + for line in f: + s = line.strip() + if s: + lines.append(s) + return lines + + +def _parse_doc_counts(s: str) -> List[int]: + parts = [p.strip() for p in s.split(",") if p.strip()] + if not parts: + raise ValueError("empty doc-count list") + out: List[int] = [] + for p in parts: + v = int(p, 10) + if v <= 0: + raise ValueError(f"doc count must be positive, got {v}") + out.append(v) + return out + + +def _do_rerank( + client: httpx.Client, + url: str, + query: str, + docs: List[str], + *, + top_n: int, + normalize: bool, +) -> Tuple[bool, int, float, Optional[int], str]: + payload: dict = {"query": query, "docs": docs, "normalize": normalize} + if top_n > 0: + payload["top_n"] = top_n + body = json.dumps(payload, ensure_ascii=False) + headers = {"Content-Type": "application/json"} + t0 = time.perf_counter() + try: + resp = client.post(url, content=body.encode("utf-8"), headers=headers) + except httpx.HTTPError: + raise + elapsed_ms = (time.perf_counter() - t0) * 1000.0 + text = resp.text or "" + ok = resp.status_code == 200 + scores_len: Optional[int] = None + if ok: + try: + data = resp.json() + sc = data.get("scores") + if isinstance(sc, list): + scores_len = len(sc) + except json.JSONDecodeError: + scores_len = None + return ok, resp.status_code, elapsed_ms, scores_len, text + + +def main() -> int: + parser = argparse.ArgumentParser( + description="POST /rerank with N random titles from a file and print latency." + ) + parser.add_argument( + "n", + type=str, + metavar="N[,N,...]", + help="Doc counts: one integer or comma-separated list, e.g. 40,80,100.", + ) + parser.add_argument( + "--repeat", + type=int, + default=3, + help="Number of runs per doc count (default: 3).", + ) + parser.add_argument( + "--titles-file", + type=Path, + default=Path(os.environ.get("RERANK_TITLE_FILE", "/home/ubuntu/rerank_test/titles.1.8w")), + help="Path to newline-separated titles (default: %(default)s or env RERANK_TITLE_FILE).", + ) + parser.add_argument( + "--url", + type=str, + default=os.environ.get("RERANK_BASE", "http://127.0.0.1:6007").rstrip("/") + "/rerank", + help="Full rerank URL (default: $RERANK_BASE/rerank or http://127.0.0.1:6007/rerank).", + ) + parser.add_argument( + "--query", + type=str, + default="健身女生T恤短袖", + help="Rerank query string.", + ) + parser.add_argument( + "--seed", + type=int, + default=None, + help="RNG base seed; each (n, run) uses a derived seed when set (optional).", + ) + parser.add_argument( + "--top-n", + type=int, + default=0, + help="If > 0, include top_n in JSON body (omit field when 0).", + ) + parser.add_argument( + "--no-normalize", + action="store_true", + help="Send normalize=false (default: normalize=true).", + ) + parser.add_argument( + "--timeout", + type=float, + default=float(os.environ.get("RERANK_TIMEOUT_SEC", "240")), + help="HTTP timeout seconds.", + ) + parser.add_argument( + "--print-body-preview", + action="store_true", + help="Print first ~500 chars of response body on success (last run only).", + ) + args = parser.parse_args() + + try: + doc_counts = _parse_doc_counts(args.n) + except ValueError as exc: + print(f"error: invalid N list {args.n!r}: {exc}", file=sys.stderr) + return 2 + + repeat = int(args.repeat) + if repeat <= 0: + print("error: --repeat must be positive", file=sys.stderr) + return 2 + + if not args.titles_file.is_file(): + print(f"error: titles file not found: {args.titles_file}", file=sys.stderr) + return 2 + + titles = _load_titles(args.titles_file) + max_n = max(doc_counts) + if len(titles) < max_n: + print( + f"error: file has only {len(titles)} non-empty lines, need at least {max_n}", + file=sys.stderr, + ) + return 2 + + top_n = int(args.top_n) + normalize = not args.no_normalize + any_fail = False + summary: dict[int, List[float]] = {n: [] for n in doc_counts} + + with httpx.Client(timeout=args.timeout) as client: + for n in doc_counts: + for run_idx in range(repeat): + if args.seed is not None: + random.seed(args.seed + n * 10_000 + run_idx) + docs = random.sample(titles, n) + try: + ok, status, elapsed_ms, scores_len, text = _do_rerank( + client, + args.url, + args.query, + docs, + top_n=top_n, + normalize=normalize, + ) + except httpx.HTTPError as exc: + print( + f"n={n} run={run_idx + 1}/{repeat} error: request failed: {exc}", + file=sys.stderr, + ) + any_fail = True + continue + + if ok: + summary[n].append(elapsed_ms) + else: + any_fail = True + + print( + f"n={n} run={run_idx + 1}/{repeat} status={status} " + f"latency_ms={elapsed_ms:.2f} scores={scores_len if scores_len is not None else 'n/a'}" + ) + if args.print_body_preview and text and run_idx == repeat - 1 and n == doc_counts[-1]: + preview = text[:500] + ("…" if len(text) > 500 else "") + print(preview) + + for n in doc_counts: + lat = summary[n] + if not lat: + print(f"summary n={n} runs=0 (all failed)") + continue + avg = statistics.mean(lat) + lo, hi = min(lat), max(lat) + extra = "" + if len(lat) >= 2: + extra = f" stdev_ms={statistics.stdev(lat):.2f}" + print( + f"summary n={n} runs={len(lat)} min_ms={lo:.2f} max_ms={hi:.2f} avg_ms={avg:.2f}{extra}" + ) + + return 1 if any_fail else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/suggestion/builder.py b/suggestion/builder.py index c8e2c47..3001742 100644 --- a/suggestion/builder.py +++ b/suggestion/builder.py @@ -20,6 +20,7 @@ from sqlalchemy import text from config.loader import get_app_config from config.tenant_config_loader import get_tenant_config_loader +from query.query_parser import detect_text_language_for_suggestions from suggestion.mapping import build_suggestion_mapping from utils.es_client import ESClient @@ -57,6 +58,7 @@ class SuggestionCandidate: sources: set = field(default_factory=set) title_spu_ids: set = field(default_factory=set) qanchor_spu_ids: set = field(default_factory=set) + tag_spu_ids: set = field(default_factory=set) query_count_7d: int = 0 query_count_30d: int = 0 lang_confidence: float = 1.0 @@ -69,6 +71,8 @@ class SuggestionCandidate: self.title_spu_ids.add(spu_id) elif source == "qanchor": self.qanchor_spu_ids.add(spu_id) + elif source == "tag": + self.tag_spu_ids.add(spu_id) def add_query_log(self, is_7d: bool) -> None: self.sources.add("query_log") @@ -150,6 +154,19 @@ class SuggestionIndexBuilder: return out @staticmethod + def _iter_product_tags(raw: Any) -> List[str]: + if raw is None: + return [] + if isinstance(raw, list): + return [str(x).strip() for x in raw if str(x).strip()] + s = str(raw).strip() + if not s: + return [] + parts = re.split(r"[,;|/\n\t]+", s) + out = [p.strip() for p in parts if p and p.strip()] + return out if out else [s] + + @staticmethod def _looks_noise(text_value: str) -> bool: if not text_value: return True @@ -187,20 +204,6 @@ class SuggestionIndexBuilder: return None return None - @staticmethod - def _detect_script_language(query: str) -> Tuple[Optional[str], float, str]: - if re.search(r"[\u4e00-\u9fff]", query): - return "zh", 0.98, "script" - if re.search(r"[\u0600-\u06FF]", query): - return "ar", 0.98, "script" - if re.search(r"[\u0400-\u04FF]", query): - return "ru", 0.95, "script" - if re.search(r"[\u0370-\u03FF]", query): - return "el", 0.95, "script" - if re.search(r"[a-zA-Z]", query): - return "en", 0.55, "model" - return None, 0.0, "default" - def _resolve_query_language( self, query: str, @@ -225,18 +228,29 @@ class SuggestionIndexBuilder: if req_lang and (not langs_set or req_lang in langs_set): return req_lang, 1.0, "request_params", conflict - detected_lang, conf, source = self._detect_script_language(query) - if detected_lang and (not langs_set or detected_lang in langs_set): - return detected_lang, conf, source, conflict + det_lang, conf, det_source = detect_text_language_for_suggestions( + query, + index_languages=index_languages, + primary_language=primary, + ) + if det_lang and (not langs_set or det_lang in langs_set): + return det_lang, conf, det_source, conflict return primary, 0.3, "default", conflict @staticmethod - def _compute_rank_score(query_count_30d: int, query_count_7d: int, qanchor_doc_count: int, title_doc_count: int) -> float: + def _compute_rank_score( + query_count_30d: int, + query_count_7d: int, + qanchor_doc_count: int, + title_doc_count: int, + tag_doc_count: int = 0, + ) -> float: return ( 1.8 * math.log1p(max(query_count_30d, 0)) + 1.2 * math.log1p(max(query_count_7d, 0)) + 1.0 * math.log1p(max(qanchor_doc_count, 0)) + + 0.85 * math.log1p(max(tag_doc_count, 0)) + 0.6 * math.log1p(max(title_doc_count, 0)) ) @@ -247,6 +261,7 @@ class SuggestionIndexBuilder: query_count_7d=c.query_count_7d, qanchor_doc_count=len(c.qanchor_spu_ids), title_doc_count=len(c.title_spu_ids), + tag_doc_count=len(c.tag_spu_ids), ) def _iter_products(self, tenant_id: str, batch_size: int = 500) -> Iterator[Dict[str, Any]]: @@ -259,9 +274,10 @@ class SuggestionIndexBuilder: while True: body: Dict[str, Any] = { "size": batch_size, - "_source": ["id", "spu_id", "title", "qanchors"], + "_source": ["id", "spu_id", "title", "qanchors", "tags"], "sort": [ {"spu_id": {"order": "asc", "missing": "_last"}}, + {"id.keyword": {"order": "asc", "missing": "_last"}}, ], "query": {"match_all": {}}, } @@ -471,6 +487,22 @@ class SuggestionIndexBuilder: key_to_candidate[key] = c c.add_product("qanchor", spu_id=product_id) + for tag in self._iter_product_tags(src.get("tags")): + tag_lang, _, _ = detect_text_language_for_suggestions( + tag, + index_languages=index_languages, + primary_language=primary_language, + ) + text_norm = self._normalize_text(tag) + if self._looks_noise(text_norm): + continue + key = (tag_lang, text_norm) + c = key_to_candidate.get(key) + if c is None: + c = SuggestionCandidate(text=tag, text_norm=text_norm, lang=tag_lang) + key_to_candidate[key] = c + c.add_product("tag", spu_id=product_id) + # Step 2: query logs now = datetime.now(timezone.utc) since = now - timedelta(days=days) @@ -521,6 +553,7 @@ class SuggestionIndexBuilder: "sources": sorted(c.sources), "title_doc_count": len(c.title_spu_ids), "qanchor_doc_count": len(c.qanchor_spu_ids), + "tag_doc_count": len(c.tag_spu_ids), "query_count_7d": c.query_count_7d, "query_count_30d": c.query_count_30d, "rank_score": float(rank_score), @@ -672,6 +705,7 @@ class SuggestionIndexBuilder: query_count_7d=delta.delta_7d, qanchor_doc_count=0, title_doc_count=0, + tag_doc_count=0, ) return { "tenant_id": delta.tenant_id, @@ -681,6 +715,7 @@ class SuggestionIndexBuilder: "sources": ["query_log"], "title_doc_count": 0, "qanchor_doc_count": 0, + "tag_doc_count": 0, "query_count_7d": delta.delta_7d, "query_count_30d": delta.delta_30d, "rank_score": float(rank_score), @@ -710,6 +745,7 @@ class SuggestionIndexBuilder: if (ctx._source.query_count_7d == null) { ctx._source.query_count_7d = 0; } if (ctx._source.qanchor_doc_count == null) { ctx._source.qanchor_doc_count = 0; } if (ctx._source.title_doc_count == null) { ctx._source.title_doc_count = 0; } + if (ctx._source.tag_doc_count == null) { ctx._source.tag_doc_count = 0; } ctx._source.query_count_30d += params.delta_30d; ctx._source.query_count_7d += params.delta_7d; @@ -729,10 +765,12 @@ class SuggestionIndexBuilder: int q7 = ctx._source.query_count_7d; int qa = ctx._source.qanchor_doc_count; int td = ctx._source.title_doc_count; + int tg = ctx._source.tag_doc_count; double score = 1.8 * Math.log(1 + q30) + 1.2 * Math.log(1 + q7) + 1.0 * Math.log(1 + qa) + + 0.85 * Math.log(1 + tg) + 0.6 * Math.log(1 + td); ctx._source.rank_score = score; ctx._source.status = 1; diff --git a/suggestion/mapping.py b/suggestion/mapping.py index 72280e9..baab28d 100644 --- a/suggestion/mapping.py +++ b/suggestion/mapping.py @@ -96,6 +96,7 @@ def build_suggestion_mapping(index_languages: List[str]) -> Dict[str, Any]: "sources": {"type": "keyword"}, "title_doc_count": {"type": "integer"}, "qanchor_doc_count": {"type": "integer"}, + "tag_doc_count": {"type": "integer"}, "query_count_7d": {"type": "integer"}, "query_count_30d": {"type": "integer"}, "rank_score": {"type": "float"}, diff --git a/suggestion/service.py b/suggestion/service.py index 8211426..15671d3 100644 --- a/suggestion/service.py +++ b/suggestion/service.py @@ -7,12 +7,24 @@ import time from typing import Any, Dict, List, Optional from config.tenant_config_loader import get_tenant_config_loader +from query.query_parser import simple_tokenize_query from suggestion.builder import get_suggestion_alias_name from utils.es_client import ESClient logger = logging.getLogger(__name__) +def _suggestion_length_factor(text: str) -> float: + """Down-weight longer strings at query time: factor 1 / sqrt(token_len).""" + n = max(len(simple_tokenize_query(str(text or ""))), 1) + return 1.0 / (n ** 0.5) + + +def _score_with_token_length_penalty(item: Dict[str, Any]) -> float: + base = float(item.get("score") or 0.0) + return base * _suggestion_length_factor(str(item.get("text") or "")) + + class SuggestionService: def __init__(self, es_client: ESClient): self.es_client = es_client @@ -150,6 +162,17 @@ class SuggestionService: seen_text_norm.add(norm) suggestions.append(dict(item)) + def _finalize_suggestion_list(items: List[Dict[str, Any]], limit: int) -> List[Dict[str, Any]]: + out = list(items) + out.sort( + key=lambda x: ( + _score_with_token_length_penalty(x), + float(x.get("rank_score") or 0.0), + ), + reverse=True, + ) + return out[:limit] + _append_items(completion_items) # Fast path: avoid a second ES query for short prefixes or when completion already full. @@ -168,7 +191,7 @@ class SuggestionService: "query": query, "language": language, "resolved_language": resolved_lang, - "suggestions": suggestions[:size], + "suggestions": _finalize_suggestion_list(suggestions, size), "took_ms": took_ms, } @@ -260,6 +283,6 @@ class SuggestionService: "query": query, "language": language, "resolved_language": resolved_lang, - "suggestions": suggestions[:size], + "suggestions": _finalize_suggestion_list(suggestions, size), "took_ms": took_ms, } diff --git a/tests/test_suggestions.py b/tests/test_suggestions.py index af1d1c0..8b97b96 100644 --- a/tests/test_suggestions.py +++ b/tests/test_suggestions.py @@ -388,6 +388,53 @@ def test_build_full_candidates_fallback_to_id_when_spu_id_missing(monkeypatch): @pytest.mark.unit +def test_build_full_candidates_tags_and_qanchor_phrases(monkeypatch): + fake_es = FakeESClient() + builder = SuggestionIndexBuilder(es_client=fake_es, db_engine=None) + + monkeypatch.setattr( + builder, + "_iter_products", + lambda tenant_id, batch_size=500: iter( + [ + { + "_id": "900", + "_source": { + "spu_id": "900", + "title": {"en": "Tee", "zh": "T恤"}, + "qanchors": { + "en": "slim fit, sporty casual", + "zh": "修身, 显瘦", + }, + "tags": ["Classic", "辣妹风", "ribbed neckline"], + }, + } + ] + ), + ) + monkeypatch.setattr(builder, "_iter_query_log_rows", lambda **kwargs: iter([])) + + key_to_candidate = builder._build_full_candidates( + tenant_id="162", + index_languages=["en", "zh"], + primary_language="en", + days=365, + batch_size=100, + min_query_len=1, + ) + + assert ("en", "slim fit") in key_to_candidate + assert ("en", "sporty casual") in key_to_candidate + assert ("zh", "修身") in key_to_candidate + assert ("zh", "显瘦") in key_to_candidate + assert ("en", "classic") in key_to_candidate + assert key_to_candidate[("en", "classic")].tag_spu_ids == {"900"} + assert ("zh", "辣妹风") in key_to_candidate + assert key_to_candidate[("zh", "辣妹风")].tag_spu_ids == {"900"} + assert ("en", "ribbed neckline") in key_to_candidate + + +@pytest.mark.unit def test_build_full_candidates_splits_long_title_for_suggest(monkeypatch): fake_es = FakeESClient() builder = SuggestionIndexBuilder(es_client=fake_es, db_engine=None) -- libgit2 0.21.2