Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions documentation/docs/get-started/setup.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Restart your Khoj server after the first run to ensure all settings are applied
- Set `KHOJ_ADMIN_PASSWORD`, `KHOJ_DJANGO_SECRET_KEY` (and optionally the `KHOJ_ADMIN_EMAIL`) to something secure. This allows you to customize Khoj later via the admin panel.
- Set `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `GEMINI_API_KEY` to your API key if you want to use OpenAI, Anthropic or Gemini commercial chat models respectively.
- Uncomment `OPENAI_BASE_URL` to use [Ollama](/advanced/ollama?type=first-run&server=docker#setup) running on your host machine. Or set it to the URL of your OpenAI compatible API like vLLM or [LMStudio](/advanced/lmstudio).
- (Optional) Set `KHOJ_LLM_TIMEOUT_READ` to configure the read timeout for LLM API calls (default: `60` seconds). Useful for slower connections or larger models that take longer to respond.
- (Optional) Set `KHOJ_LLM_TIMEOUT_CONNECT` to configure the connection timeout for LLM API calls (default: `30` seconds).
3. Start Khoj by running the following command in the same directory as your docker-compose.yml file.
```shell
cd ~/.khoj
Expand All @@ -71,6 +73,8 @@ Restart your Khoj server after the first run to ensure all settings are applied
- Set `KHOJ_ADMIN_PASSWORD`, `KHOJ_DJANGO_SECRET_KEY` (and optionally the `KHOJ_ADMIN_EMAIL`) to something secure. This allows you to customize Khoj later via the admin panel.
- Set `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `GEMINI_API_KEY` to your API key if you want to use OpenAI, Anthropic or Gemini commercial chat models respectively.
- Uncomment `OPENAI_BASE_URL` to use [Ollama](/advanced/ollama) running on your host machine. Or set it to the URL of your OpenAI compatible API like vLLM or [LMStudio](/advanced/lmstudio).
- (Optional) Set `KHOJ_LLM_TIMEOUT_READ` to configure the read timeout for LLM API calls (default: `60` seconds). Useful for slower connections or larger models that take longer to respond.
- (Optional) Set `KHOJ_LLM_TIMEOUT_CONNECT` to configure the connection timeout for LLM API calls (default: `30` seconds).
3. Start Khoj by running the following command in the same directory as your docker-compose.yml file.
```shell
# Windows users should use their WSL2 terminal to run these commands
Expand All @@ -93,6 +97,8 @@ Restart your Khoj server after the first run to ensure all settings are applied
- Set `KHOJ_ADMIN_PASSWORD`, `KHOJ_DJANGO_SECRET_KEY` (and optionally the `KHOJ_ADMIN_EMAIL`) to something secure. This allows you to customize Khoj later via the admin panel.
- Set `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `GEMINI_API_KEY` to your API key if you want to use OpenAI, Anthropic or Gemini commercial chat models respectively.
- Uncomment `OPENAI_BASE_URL` to use [Ollama](/advanced/ollama) running on your host machine. Or set it to the URL of your OpenAI compatible API like vLLM or [LMStudio](/advanced/lmstudio).
- (Optional) Set `KHOJ_LLM_TIMEOUT_READ` to configure the read timeout for LLM API calls (default: `60` seconds). Useful for slower connections or larger models that take longer to respond.
- (Optional) Set `KHOJ_LLM_TIMEOUT_CONNECT` to configure the connection timeout for LLM API calls (default: `30` seconds).
3. Start Khoj by running the following command in the same directory as your docker-compose.yml file.
```shell
cd ~/.khoj
Expand Down
32 changes: 21 additions & 11 deletions src/khoj/processor/conversation/openai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,22 @@
MAX_COMPLETION_TOKENS = 16000


def get_llm_timeout() -> httpx.Timeout:
"""
Get the httpx.Timeout configuration for LLM API calls.

Supports environment variables:
- KHOJ_LLM_TIMEOUT_READ: Read timeout (default: 60)
- KHOJ_LLM_TIMEOUT_CONNECT: Connection timeout (default: 30)

Returns:
httpx.Timeout configured with appropriate values
"""
connect_timeout = float(os.getenv("KHOJ_LLM_TIMEOUT_CONNECT", "30"))
read_timeout = float(os.getenv("KHOJ_LLM_TIMEOUT_READ", "60"))
return httpx.Timeout(connect_timeout, read=read_timeout)


def _extract_text_for_instructions(content: Union[str, List, Dict, None]) -> str:
"""Extract plain text from a message content suitable for Responses API instructions."""
if content is None:
Expand Down Expand Up @@ -158,7 +174,6 @@ def completion_with_backoff(
elif is_groq_api(api_base_url):
model_kwargs["service_tier"] = "auto"

read_timeout = 300 if is_local_api(api_base_url) else 60
if os.getenv("KHOJ_LLM_SEED"):
model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))

Expand All @@ -171,7 +186,7 @@ def completion_with_backoff(
with client.beta.chat.completions.stream(
messages=formatted_messages, # type: ignore
model=model_name,
timeout=httpx.Timeout(30, read=read_timeout),
timeout=get_llm_timeout(),
**model_kwargs,
) as chat:
for chunk in stream_processor(chat):
Expand Down Expand Up @@ -215,7 +230,7 @@ def completion_with_backoff(
chunk = client.beta.chat.completions.parse(
messages=formatted_messages, # type: ignore
model=model_name,
timeout=httpx.Timeout(30, read=read_timeout),
timeout=get_llm_timeout(),
**model_kwargs,
)
aggregated_response = chunk.choices[0].message.content
Expand Down Expand Up @@ -360,7 +375,6 @@ async def chat_completion_with_backoff(
elif is_groq_api(api_base_url):
model_kwargs["service_tier"] = "auto"

read_timeout = 300 if is_local_api(api_base_url) else 60
if os.getenv("KHOJ_LLM_SEED"):
model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))

Expand All @@ -373,7 +387,7 @@ async def chat_completion_with_backoff(
model=model_name,
stream=stream,
temperature=temperature,
timeout=httpx.Timeout(30, read=read_timeout),
timeout=get_llm_timeout(),
**model_kwargs,
)
if not stream:
Expand Down Expand Up @@ -494,15 +508,13 @@ def responses_completion_with_backoff(
model_kwargs.pop("top_p", None)
model_kwargs.pop("stop", None)

read_timeout = 300 if is_local_api(api_base_url) else 60

# Stream and aggregate
model_response: OpenAIResponse = client.responses.create(
input=formatted_messages,
instructions=instructions,
model=model_name,
temperature=temperature,
timeout=httpx.Timeout(30, read=read_timeout), # type: ignore
timeout=get_llm_timeout(), # type: ignore
store=False,
**model_kwargs,
)
Expand Down Expand Up @@ -607,8 +619,6 @@ async def responses_chat_completion_with_backoff(
model_kwargs.pop("top_p", None)
model_kwargs.pop("stop", None)

read_timeout = 300 if is_local_api(api_base_url) else 60

aggregated_text = ""
last_final: Optional[OpenAIResponse] = None
# Tool call assembly buffers
Expand All @@ -621,7 +631,7 @@ async def responses_chat_completion_with_backoff(
instructions=instructions,
model=model_name,
temperature=temperature,
timeout=httpx.Timeout(30, read=read_timeout),
timeout=get_llm_timeout(),
**model_kwargs,
) as stream: # type: ignore
async for event in stream: # type: ignore
Expand Down
12 changes: 9 additions & 3 deletions src/khoj/routers/api_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1708,17 +1708,23 @@ async def delayed_flush():
await asyncio.sleep(BUFFER_FLUSH_INTERVAL)
# Check if there's still content to flush
chunks = "".join([chunk async for chunk in flush_message_buffer()])
await websocket.send_text(chunks)
await websocket.send_text(ChatEvent.END_EVENT.value)
try:
await websocket.send_text(chunks)
await websocket.send_text(ChatEvent.END_EVENT.value)
except RuntimeError:
pass # WebSocket already closed

# Flush buffer if no new messages arrive within debounce interval
message_buffer.timeout = asyncio.create_task(delayed_flush())
except asyncio.CancelledError:
logger.debug(f"Chat request cancelled for user {websocket.scope['user'].object.id}")
raise
except Exception as e:
await websocket.send_text(json.dumps({"error": "Internal server error"}))
logger.error(f"Error processing chat request: {e}", exc_info=True)
try:
await websocket.send_text(json.dumps({"error": "Internal server error"}))
except RuntimeError:
pass # WebSocket already closed
raise


Expand Down