Refer to Model Configs for how to set the
environment variables for your particular deployment.Note: While we support self hosted LLMs, you will get significantly better responses with a more powerful
model like GPT-4.
GEN_AI_MODEL_PROVIDER=ollama_chat# Model of your choiceGEN_AI_MODEL_VERSION=llama2# Wherever Ollama is running# Hint: To point Docker containers to http://localhost:11434, use host.docker.internal instead of localhostGEN_AI_API_ENDPOINT=http://host.docker.internal:11434# Let's also make some changes to accommodate the weaker locally hosted LLMQA_TIMEOUT=120 # Set a longer timeout, running models on CPU can be slow# Always run search, never skipDISABLE_LLM_CHOOSE_SEARCH=True# Don't use LLM for reranking, the prompts aren't properly tuned for these modelsDISABLE_LLM_CHUNK_FILTER=True# Don't try to rephrase the user query, the prompts aren't properly tuned for these modelsDISABLE_LLM_QUERY_REPHRASE=True# Don't use LLM to automatically discover time/source filtersDISABLE_LLM_FILTER_EXTRACTION=True# Uncomment this one if you find that the model is struggling (slow or distracted by too many docs)# Use only 1 section from the documents and do not require quotes# QA_PROMPT_OVERRIDE=weak