config.py 549 Bytes
"""Reranker service configuration (simple Python config)."""

import os


class RerankerConfig(object):
    # Server
    HOST = os.getenv("RERANKER_HOST", "0.0.0.0")
    PORT = int(os.getenv("RERANKER_PORT", 6007))

    # Model
    MODEL_NAME = "Qwen/Qwen3-Reranker-0.6B"
    DEVICE = None  # None -> auto (cuda if available)
    USE_FP16 = True
    BATCH_SIZE = 64
    MAX_LENGTH = 512
    CACHE_DIR = "./model_cache"
    ENABLE_WARMUP = True

    # Request limits
    MAX_DOCS = 1000

    # Output
    NORMALIZE = True


CONFIG = RerankerConfig()