prompts.py
5.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""LLM prompt templates for relevance judging (keep wording changes here)."""
from __future__ import annotations
import json
from typing import Any, Dict, Sequence
def classify_batch_simple_prompt(query: str, numbered_doc_lines: Sequence[str]) -> str:
lines = "\n".join(numbered_doc_lines)
n = len(numbered_doc_lines)
return (
"You are an e-commerce search result relevance evaluation assistant. "
"Based on the user query and each product's information, output the relevance level for each product.\n\n"
"## Relevance Level Criteria\n"
"Exact โ Fully matches the user's search intent.\n"
"Partial โ Primary intent satisfied (same category or similar use, basically aligns with search intent), "
"but secondary attributes such as color, style, size, fit, length, or material deviate from or cannot be confirmed.\n"
"Irrelevant โ Category or use case mismatched, primary intent not satisfied.\n\n"
"Additional judging guidance:\n"
"- If the query clearly names a product type, product type matching has the highest priority. "
"Dress vs skirt vs jumpsuit, jeans vs pants, T-shirt vs blouse, cardigan vs sweater, boots vs shoes, "
"bra vs top, backpack vs bag are not interchangeable.\n"
"- When the query clearly specifies a concrete product type, a different product type should usually be Irrelevant, not Partial.\n"
"- If an attribute looks missing or uncertain, prefer Partial instead of Exact.\n"
"- Do not guess missing attributes.\n"
"- Graphic, slogan, holiday, memorial, or message tees are not Exact for a plain tee query unless that graphic/theme is requested.\n"
"- Be conservative with Exact.\n\n"
f"Query: {query}\n\n"
"Products:\n"
f"{lines}\n\n"
"## Output Format\n"
f"Strictly output {n} lines, each line containing exactly one of Exact / Partial / Irrelevant. "
"They must correspond sequentially to the products above. Do not output any other information.\n"
)
def extract_query_profile_prompt(query: str, parser_hints: Dict[str, Any]) -> str:
hints_json = json.dumps(parser_hints, ensure_ascii=False)
return (
"You are building a structured intent profile for e-commerce relevance judging.\n"
"Use the original user query as the source of truth. Parser hints may help, but if a hint conflicts with the original query, trust the original query.\n"
"Be conservative: only mark an attribute as required if the user explicitly asked for it.\n\n"
"Return JSON with this schema:\n"
"{\n"
' "normalized_query_en": string,\n'
' "primary_category": string,\n'
' "allowed_categories": [string],\n'
' "required_attributes": [\n'
' {"name": string, "required_terms": [string], "conflicting_terms": [string], "match_mode": "explicit"}\n'
" ],\n"
' "notes": [string]\n'
"}\n\n"
"Guidelines:\n"
"- Exact later will require explicit evidence for all required attributes.\n"
"- allowed_categories should contain only near-synonyms of the same product type, not substitutes. For example dress can allow midi dress/cocktail dress, but not skirt, top, jumpsuit, or outfit unless the query explicitly asks for them.\n"
"- If the query asks for dress/skirt/jeans/t-shirt, near but different product types are not Exact.\n"
"- If the query includes color, fit, silhouette, or length, include them as required_attributes.\n"
"- For fit words, include conflicting terms when obvious, e.g. fitted conflicts with oversized/loose; oversized conflicts with fitted/tight.\n"
"- For color, include conflicting colors only when clear from the query.\n\n"
f"Original query: {query}\n"
f"Parser hints JSON: {hints_json}\n"
)
def classify_batch_complex_prompt(
query: str,
query_profile: Dict[str, Any],
numbered_doc_lines: Sequence[str],
) -> str:
lines = "\n".join(numbered_doc_lines)
profile_json = json.dumps(query_profile, ensure_ascii=False)
return (
"You are an e-commerce search relevance judge.\n"
"Judge each product against the structured query profile below.\n\n"
"Relevance rules:\n"
"- Exact: product type matches the target intent, and every explicit required attribute is positively supported by the title/options/tags/category. If an attribute is missing or only guessed, it is NOT Exact.\n"
"- Partial: main product type/use case matches, but some required attribute is missing, weaker, uncertain, or only approximately matched.\n"
"- Irrelevant: product type/use case mismatched, or an explicit required attribute clearly conflicts.\n"
"- Be conservative with Exact.\n"
"- Graphic/holiday/message tees are not Exact for a plain color/style tee query unless that graphic/theme was requested.\n"
"- Jumpsuit/romper/set is not Exact for dress/skirt/jeans queries.\n\n"
f"Original query: {query}\n"
f"Structured query profile JSON: {profile_json}\n\n"
"Products:\n"
f"{lines}\n\n"
"Return JSON only, with schema:\n"
'{"labels":[{"index":1,"label":"Exact","reason":"short phrase"}]}\n'
)