# Pick any model served by FastChat
GEN_AI_MODEL_VERSION=vicuna-7b-v1.5
# Hint: To point Docker containers to http://localhost, use http://host.docker.internal
# Don't forget to include the /v1 below
GEN_AI_API_ENDPOINT=http://<your-FastChat-server>/v1
GEN_AI_LLM_PROVIDER_TYPE=openai # Since it's an OpenAI compatible API
# Let's also make some changes to accommodate the weaker locally hosted LLM
QA_TIMEOUT=120 # Set a longer timeout, running models on CPU can be slow
# Always run search, never skip
DISABLE_LLM_CHOOSE_SEARCH=True
# Don't use LLM for reranking, the prompts aren't properly tuned for these models
DISABLE_LLM_CHUNK_FILTER=True
# Don't try to rephrase the user query, the prompts aren't properly tuned for these models
DISABLE_LLM_QUERY_REPHRASE=True
# Don't use LLM to automatically discover time/source filters
DISABLE_LLM_FILTER_EXTRACTION=True
# Use only 1 section from the documents and do not require quotes
QA_PROMPT_OVERRIDE=weak