config.py 1.62 KB
"""Reranker service compatibility config derived from unified app config."""

from __future__ import annotations

import os

from config.loader import get_app_config


class RerankerConfig(object):
    def __init__(self) -> None:
        app_config = get_app_config()
        service = app_config.services.rerank
        instance_name = str(os.getenv("RERANK_INSTANCE") or service.default_instance).strip() or service.default_instance
        instance = service.get_instance(instance_name)
        backend = service.get_backend_config(instance_name)
        request = service.request

        self.INSTANCE = instance_name
        self.HOST = str(os.getenv("RERANKER_HOST") or instance.host)
        self.PORT = int(os.getenv("RERANKER_PORT") or instance.port)
        self.BACKEND = str(os.getenv("RERANK_BACKEND") or instance.backend)
        self.RUNTIME_DIR = str(
            os.getenv("RERANKER_RUNTIME_DIR")
            or instance.runtime_dir
            or f"./.runtime/reranker/{instance_name}"
        )

        self.MODEL_NAME = str(backend.get("model_name") or "Qwen/Qwen3-Reranker-0.6B")
        self.DEVICE = backend.get("device")
        self.USE_FP16 = bool(backend.get("use_fp16", True))
        self.BATCH_SIZE = int(backend.get("batch_size", backend.get("infer_batch_size", 64)))
        self.MAX_LENGTH = int(backend.get("max_length", 512))
        self.CACHE_DIR = str(backend.get("cache_dir") or "./model_cache")
        self.ENABLE_WARMUP = bool(backend.get("enable_warmup", True))

        self.MAX_DOCS = int(request.get("max_docs", 1000))
        self.NORMALIZE = bool(request.get("normalize", True))


CONFIG = RerankerConfig()