test_es_query_builder.py 5.9 KB
from types import SimpleNamespace

import numpy as np

from search.es_query_builder import ESQueryBuilder


def _builder() -> ESQueryBuilder:
    return ESQueryBuilder(
        match_fields=["title.en^3.0", "brief.en^1.0"],
        text_embedding_field="title_embedding",
        default_language="en",
    )


def test_knn_prefilter_includes_range_filters():
    qb = _builder()
    q = qb.build_query(
        query_text="bags",
        query_vector=np.array([0.1, 0.2, 0.3]),
        range_filters={"min_price": {"gte": 50, "lt": 100}},
        enable_knn=True,
    )

    assert "knn" in q
    assert q["knn"]["filter"] == {"range": {"min_price": {"gte": 50, "lt": 100}}}


def test_knn_prefilter_uses_only_conjunctive_filters_when_disjunctive_present():
    qb = _builder()
    facets = [SimpleNamespace(field="category_name", disjunctive=True)]
    q = qb.build_query(
        query_text="bags",
        query_vector=np.array([0.1, 0.2, 0.3]),
        filters={"category_name": ["A", "B"], "vendor": "Nike"},
        range_filters={"min_price": {"gte": 50, "lt": 100}},
        facet_configs=facets,
        enable_knn=True,
    )

    assert "knn" in q
    assert "filter" in q["knn"]
    knn_filter = q["knn"]["filter"]
    assert knn_filter == {
        "bool": {
            "filter": [
                {"term": {"vendor": "Nike"}},
                {"range": {"min_price": {"gte": 50, "lt": 100}}},
            ]
        }
    }
    assert q["post_filter"] == {"terms": {"category_name": ["A", "B"]}}


def test_knn_prefilter_not_added_without_filters():
    qb = _builder()
    q = qb.build_query(
        query_text="bags",
        query_vector=np.array([0.1, 0.2, 0.3]),
        enable_knn=True,
    )

    assert "knn" in q
    assert "filter" not in q["knn"]
    assert q["knn"]["_name"] == "knn_query"


def test_text_query_contains_only_base_translation_and_fallback_named_queries():
    qb = _builder()
    parsed_query = SimpleNamespace(
        query_text_by_lang={"en": "dress", "zh": "连衣裙"},
        search_langs=["en", "zh"],
        detected_language="en",
        source_in_index_languages=False,
        index_languages=["en", "zh", "fr"],
    )

    q = qb.build_query(query_text="dress", parsed_query=parsed_query, enable_knn=False)
    should = q["query"]["bool"]["should"]
    names = [clause["multi_match"]["_name"] for clause in should]

    assert names == ["base_query", "base_query_trans_zh", "fallback_original_query_fr"]


def test_mixed_script_merges_en_fields_into_zh_clause():
    qb = ESQueryBuilder(
        match_fields=["title.en^3.0"],
        multilingual_fields=["title", "brief"],
        shared_fields=[],
        text_embedding_field="title_embedding",
        default_language="en",
    )
    parsed_query = SimpleNamespace(
        query_text_by_lang={"zh": "法式 dress"},
        search_langs=["zh"],
        detected_language="zh",
        source_in_index_languages=True,
        index_languages=["zh", "en"],
        contains_chinese=True,
        contains_english=True,
    )
    q = qb.build_query(query_text="法式 dress", parsed_query=parsed_query, enable_knn=False)
    fields = q["query"]["multi_match"]["fields"]
    bases = {f.split("^", 1)[0] for f in fields}
    assert "title.zh" in bases and "title.en" in bases
    assert "brief.zh" in bases and "brief.en" in bases
    # Merged supplemental language fields use boost * 0.8 (implicit 1.0 -> ^0.8)
    assert "title.en^0.8" in fields
    assert "brief.en^0.8" in fields


def test_mixed_script_merges_zh_fields_into_en_clause():
    qb = ESQueryBuilder(
        match_fields=["title.en^3.0"],
        multilingual_fields=["title"],
        shared_fields=[],
        text_embedding_field="title_embedding",
        default_language="en",
    )
    parsed_query = SimpleNamespace(
        query_text_by_lang={"en": "red 连衣裙"},
        search_langs=["en"],
        detected_language="en",
        source_in_index_languages=True,
        index_languages=["zh", "en"],
        contains_chinese=True,
        contains_english=True,
    )
    q = qb.build_query(query_text="red 连衣裙", parsed_query=parsed_query, enable_knn=False)
    fields = q["query"]["multi_match"]["fields"]
    bases = {f.split("^", 1)[0] for f in fields}
    assert "title.en" in bases and "title.zh" in bases
    assert "title.zh^0.8" in fields


def test_mixed_script_merged_fields_scale_configured_boosts():
    qb = ESQueryBuilder(
        match_fields=["title.en^3.0"],
        multilingual_fields=["title"],
        shared_fields=[],
        field_boosts={"title.zh": 5.0, "title.en": 10.0},
        text_embedding_field="title_embedding",
        default_language="en",
    )
    parsed_query = SimpleNamespace(
        query_text_by_lang={"zh": "法式 dress"},
        search_langs=["zh"],
        detected_language="zh",
        source_in_index_languages=True,
        index_languages=["zh", "en"],
        contains_chinese=True,
        contains_english=True,
    )
    q = qb.build_query(query_text="法式 dress", parsed_query=parsed_query, enable_knn=False)
    fields = q["query"]["multi_match"]["fields"]
    assert "title.zh^5.0" in fields
    assert "title.en^8.0" in fields  # 10.0 * 0.8


def test_mixed_script_does_not_merge_en_when_not_in_index_languages():
    qb = ESQueryBuilder(
        match_fields=["title.zh^3.0"],
        multilingual_fields=["title"],
        shared_fields=[],
        text_embedding_field="title_embedding",
        default_language="zh",
    )
    parsed_query = SimpleNamespace(
        query_text_by_lang={"zh": "法式 dress"},
        search_langs=["zh"],
        detected_language="zh",
        source_in_index_languages=True,
        index_languages=["zh"],
        contains_chinese=True,
        contains_english=True,
    )
    q = qb.build_query(query_text="法式 dress", parsed_query=parsed_query, enable_knn=False)
    fields = q["query"]["multi_match"]["fields"]
    bases = {f.split("^", 1)[0] for f in fields}
    assert "title.zh" in bases
    assert "title.en" not in bases