From 5dad71474a5757bcc3769379e9de7ca35803033d Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 5 Mar 2026 14:14:19 +0530
Subject: [PATCH 01/96] feat: add kubernetes app role selection

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 appinfo/info.xml                     | 14 ++++++++++++++
 context_chat_backend/controller.py   | 15 ++++++++-------
 context_chat_backend/task_fetcher.py |  4 ++++
 context_chat_backend/types.py        |  8 ++++++++
 context_chat_backend/utils.py        | 13 ++++++++++++-
 5 files changed, 46 insertions(+), 8 deletions(-)
 create mode 100644 context_chat_backend/task_fetcher.py
diff --git a/appinfo/info.xml b/appinfo/info.xml
index 9760cd2..30194ba 100644
--- a/appinfo/info.xml
+++ b/appinfo/info.xml
@@ -82,5 +82,19 @@ Setup background job workers as described here: https://docs.nextcloud.com/serve
 				<description>Password to be used for authenticating requests to the OpenAI-compatible endpoint set in CC_EM_BASE_URL.</description>
 			</variable>
 		</environment-variables>
+		<k8s-service-roles>
+			<role>
+				<name>rp</name>
+				<display-name>Request Processing Mode</display-name>
+				<env>APP_ROLE=rp</env>
+				<expose>true</expose>
+			</role>
+			<role>
+				<name>indexing</name>
+				<display-name>Indexing Mode</display-name>
+				<env>APP_ROLE=indexing</env>
+				<expose>false</expose>
+			</role>
+		</k8s-service-roles>
 	</external-app>
 </info>
diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index c26b930..0b6b53d 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -75,6 +75,7 @@
 def enabled_handler(enabled: bool, _: NextcloudApp | AsyncNextcloudApp) -> str:
 	if enabled:
 		app_enabled.set()
+		# todo: start bg threads to fetch docs, updates and requests to process
 	else:
 		app_enabled.clear()
 
@@ -213,6 +214,13 @@ def _():
 	return JSONResponse(content={'enabled': app_enabled.is_set()}, status_code=200)
 
 
+@app.post('/countIndexedDocuments')
+@enabled_guard(app)
+def _():
+	counts = exec_in_proc(target=count_documents_by_provider, args=(vectordb_loader,))
+	return JSONResponse(counts)
+
+
 @app.post('/updateAccessDeclarative')
 @enabled_guard(app)
 def _(
@@ -328,13 +336,6 @@ def _(userId: str = Body(embed=True)):
 	return JSONResponse('User deleted')
 
 
-@app.post('/countIndexedDocuments')
-@enabled_guard(app)
-def _():
-	counts = exec_in_proc(target=count_documents_by_provider, args=(vectordb_loader,))
-	return JSONResponse(counts)
-
-
 @app.put('/loadSources')
 @enabled_guard(app)
 def _(sources: list[UploadFile]):
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
new file mode 100644
index 0000000..5e2f317
--- /dev/null
+++ b/context_chat_backend/task_fetcher.py
@@ -0,0 +1,4 @@
+#
+# SPDX-FileCopyrightText: 2026 Nextcloud GmbH and Nextcloud contributors
+# SPDX-License-Identifier: AGPL-3.0-or-later
+#
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 500a97d..7868086 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -2,6 +2,8 @@
 # SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
+from enum import Enum
+
 from pydantic import BaseModel
 
 __all__ = [
@@ -71,3 +73,9 @@ class FatalEmbeddingException(EmbeddingException):
 
 	Either malformed request, authentication error, or other non-retryable error.
 	"""
+
+
+class AppRole(str, Enum):
+	NORMAL = 'normal'
+	INDEXING = 'indexing'
+	RP = 'rp'
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index f6d6e67..224f466 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -4,6 +4,7 @@
 #
 import logging
 import multiprocessing as mp
+import os
 import re
 import traceback
 from collections.abc import Callable
@@ -14,7 +15,7 @@
 
 from fastapi.responses import JSONResponse as FastAPIJSONResponse
 
-from .types import TConfig, TEmbeddingAuthApiKey, TEmbeddingAuthBasic, TEmbeddingConfig
+from .types import AppRole, TConfig, TEmbeddingAuthApiKey, TEmbeddingAuthBasic, TEmbeddingConfig
 
 T = TypeVar('T')
 _logger = logging.getLogger('ccb.utils')
@@ -144,3 +145,13 @@ def redact_config(config: TConfig | TEmbeddingConfig) -> TConfig | TEmbeddingCon
 			em_conf.auth.password = '***REDACTED***'  # noqa: S105
 
 	return config_copy
+
+
+def get_app_role() -> AppRole:
+	role = os.getenv('APP_ROLE', '').lower()
+	if role == '':
+		return AppRole.NORMAL
+	if role not in ['indexing', 'rp']:
+		_logger.warning(f'Invalid app role: {role}, defaulting to all roles')
+		return AppRole.NORMAL
+	return AppRole(role)

From 089d27a41643c165d0474258c840ba6e048279a9 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 5 Mar 2026 16:42:41 +0530
Subject: [PATCH 02/96] feat: add thread start and stop logic

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/controller.py   | 17 ++++--
 context_chat_backend/task_fetcher.py | 82 ++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 0b6b53d..fadc5f8 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -42,6 +42,7 @@
 from .models.types import LlmException
 from nc_py_api.ex_app import AppAPIAuthMiddleware
 from .utils import JSONResponse, exec_in_proc, is_valid_provider_id, is_valid_source_id, value_of
+from .task_fetcher import start_bg_threads, stop_bg_threads
 from .vectordb.service import (
 	count_documents_by_provider,
 	decl_update_access,
@@ -73,11 +74,16 @@
 app_enabled = Event()
 
 def enabled_handler(enabled: bool, _: NextcloudApp | AsyncNextcloudApp) -> str:
-	if enabled:
-		app_enabled.set()
-		# todo: start bg threads to fetch docs, updates and requests to process
-	else:
-		app_enabled.clear()
+	try:
+		if enabled:
+			app_enabled.set()
+			start_bg_threads()
+		else:
+			app_enabled.clear()
+			stop_bg_threads()
+	except Exception as e:
+		logger.exception('Error in enabled handler:', exc_info=e)
+		return f'Error in enabled handler: {e}'
 
 	logger.info(f'App {("disabled", "enabled")[enabled]}')
 	return ''
@@ -95,6 +101,7 @@ async def lifespan(app: FastAPI):
 	yield
 	vectordb_loader.offload()
 	llm_loader.offload()
+	stop_bg_threads()
 
 
 app_config = get_config(os.environ['CC_CONFIG_PATH'])
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 5e2f317..9660b44 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -2,3 +2,85 @@
 # SPDX-FileCopyrightText: 2026 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
+
+from enum import Enum
+from threading import Thread
+
+from .types import AppRole
+from .utils import get_app_role
+
+APP_ROLE = get_app_role()
+THREADS = {}
+THREADS_STOP_EVENTS = {}
+
+
+class ThreadType(Enum):
+	FILES_INDEXING = 'files_indexing'
+	UPDATES_PROCESSING = 'updates_processing'
+	REQUEST_PROCESSING = 'request_processing'
+
+
+def files_indexing_thread():
+	...
+
+
+def updates_processing_thread():
+	...
+
+
+def request_processing_thread():
+	...
+
+
+def start_bg_threads():
+	match APP_ROLE:
+		case AppRole.INDEXING | AppRole.NORMAL:
+			THREADS[ThreadType.FILES_INDEXING] = Thread(
+				target=files_indexing_thread,
+				name='FilesIndexingThread',
+				daemon=True,
+			)
+			THREADS[ThreadType.UPDATES_PROCESSING] = Thread(
+				target=updates_processing_thread,
+				name='UpdatesProcessingThread',
+				daemon=True,
+			)
+			THREADS[ThreadType.FILES_INDEXING].start()
+			THREADS[ThreadType.UPDATES_PROCESSING].start()
+		case AppRole.RP | AppRole.NORMAL:
+			THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
+				target=request_processing_thread,
+				name='RequestProcessingThread',
+				daemon=True,
+			)
+			THREADS[ThreadType.REQUEST_PROCESSING].start()
+
+
+def stop_bg_threads():
+	match APP_ROLE:
+		case AppRole.INDEXING | AppRole.NORMAL:
+			if (
+				ThreadType.FILES_INDEXING not in THREADS
+				or ThreadType.UPDATES_PROCESSING not in THREADS
+				or ThreadType.FILES_INDEXING not in THREADS_STOP_EVENTS
+				or ThreadType.UPDATES_PROCESSING not in THREADS_STOP_EVENTS
+			):
+				return
+			THREADS_STOP_EVENTS[ThreadType.FILES_INDEXING].set()
+			THREADS_STOP_EVENTS[ThreadType.UPDATES_PROCESSING].set()
+			THREADS[ThreadType.FILES_INDEXING].join()
+			THREADS[ThreadType.UPDATES_PROCESSING].join()
+			THREADS.pop(ThreadType.FILES_INDEXING)
+			THREADS.pop(ThreadType.UPDATES_PROCESSING)
+			THREADS_STOP_EVENTS.pop(ThreadType.FILES_INDEXING)
+			THREADS_STOP_EVENTS.pop(ThreadType.UPDATES_PROCESSING)
+		case AppRole.RP | AppRole.NORMAL:
+			if (
+				ThreadType.REQUEST_PROCESSING not in THREADS
+				or ThreadType.REQUEST_PROCESSING not in THREADS_STOP_EVENTS
+			):
+				return
+			THREADS_STOP_EVENTS[ThreadType.REQUEST_PROCESSING].set()
+			THREADS[ThreadType.REQUEST_PROCESSING].join()
+			THREADS.pop(ThreadType.REQUEST_PROCESSING)
+			THREADS_STOP_EVENTS.pop(ThreadType.REQUEST_PROCESSING)

From 64ffdaf2b83dae9f450a86024cad9f3a41849c30 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Mon, 9 Mar 2026 19:22:45 +0530
Subject: [PATCH 03/96] wip: migrate the indexing process

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .../chain/ingest/doc_loader.py                |  53 +--
 context_chat_backend/chain/ingest/injest.py   | 201 ++++++-----
 context_chat_backend/controller.py            | 165 +++++-----
 .../{chain/ingest => }/mimetype_list.py       |   0
 context_chat_backend/task_fetcher.py          | 311 ++++++++++++++++--
 context_chat_backend/types.py                 | 121 ++++++-
 context_chat_backend/vectordb/base.py         |   9 +-
 context_chat_backend/vectordb/pgvector.py     |  61 ++--
 8 files changed, 659 insertions(+), 262 deletions(-)
 rename context_chat_backend/{chain/ingest => }/mimetype_list.py (100%)

diff --git a/context_chat_backend/chain/ingest/doc_loader.py b/context_chat_backend/chain/ingest/doc_loader.py
index efb81b6..d26f74b 100644
--- a/context_chat_backend/chain/ingest/doc_loader.py
+++ b/context_chat_backend/chain/ingest/doc_loader.py
@@ -7,11 +7,10 @@
 import re
 import tempfile
 from collections.abc import Callable
-from typing import BinaryIO
+from io import BytesIO
 
 import docx2txt
 from epub2txt import epub2txt
-from fastapi import UploadFile
 from langchain_unstructured import UnstructuredLoader
 from odfdo import Document
 from pandas import read_csv, read_excel
@@ -19,9 +18,11 @@
 from pypdf.errors import FileNotDecryptedError as PdfFileNotDecryptedError
 from striprtf import striprtf
 
+from ...types import SourceItem
+
 logger = logging.getLogger('ccb.doc_loader')
 
-def _temp_file_wrapper(file: BinaryIO, loader: Callable, sep: str = '\n') -> str:
+def _temp_file_wrapper(file: BytesIO, loader: Callable, sep: str = '\n') -> str:
 	raw_bytes = file.read()
 	with tempfile.NamedTemporaryFile(mode='wb') as tmp:
 		tmp.write(raw_bytes)
@@ -35,46 +36,46 @@ def _temp_file_wrapper(file: BinaryIO, loader: Callable, sep: str = '\n') -> str
 
 # -- LOADERS -- #
 
-def _load_pdf(file: BinaryIO) -> str:
+def _load_pdf(file: BytesIO) -> str:
 	pdf_reader = PdfReader(file)
 	return '\n\n'.join([page.extract_text().strip() for page in pdf_reader.pages])
 
 
-def _load_csv(file: BinaryIO) -> str:
+def _load_csv(file: BytesIO) -> str:
 	return read_csv(file).to_string(header=False, na_rep='')
 
 
-def _load_epub(file: BinaryIO) -> str:
+def _load_epub(file: BytesIO) -> str:
 	return _temp_file_wrapper(file, epub2txt).strip()
 
 
-def _load_docx(file: BinaryIO) -> str:
+def _load_docx(file: BytesIO) -> str:
 	return docx2txt.process(file).strip()
 
 
-def _load_odt(file: BinaryIO) -> str:
+def _load_odt(file: BytesIO) -> str:
 	return _temp_file_wrapper(file, lambda fp: Document(fp).get_formatted_text()).strip()
 
 
-def _load_ppt_x(file: BinaryIO) -> str:
+def _load_ppt_x(file: BytesIO) -> str:
 	return _temp_file_wrapper(file, lambda fp: UnstructuredLoader(fp).load()).strip()
 
 
-def _load_rtf(file: BinaryIO) -> str:
+def _load_rtf(file: BytesIO) -> str:
 	return striprtf.rtf_to_text(file.read().decode('utf-8', 'ignore')).strip()
 
 
-def _load_xml(file: BinaryIO) -> str:
+def _load_xml(file: BytesIO) -> str:
 	data = file.read().decode('utf-8', 'ignore')
 	data = re.sub(r'</.+>', '', data)
 	return data.strip()
 
 
-def _load_xlsx(file: BinaryIO) -> str:
+def _load_xlsx(file: BytesIO) -> str:
 	return read_excel(file, na_filter=False).to_string(header=False, na_rep='')
 
 
-def _load_email(file: BinaryIO, ext: str = 'eml') -> str | None:
+def _load_email(file: BytesIO, ext: str = 'eml') -> str | None:
 	# NOTE: msg format is not tested
 	if ext not in ['eml', 'msg']:
 		return None
@@ -115,30 +116,34 @@ def attachment_partitioner(
 }
 
 
-def decode_source(source: UploadFile) -> str | None:
+def decode_source(source: SourceItem) -> str | None:
+	io_obj: BytesIO | None = None
 	try:
 		# .pot files are powerpoint templates but also plain text files,
 		# so we skip them to prevent decoding errors
-		if source.headers['title'].endswith('.pot'):
+		if source.title.endswith('.pot'):
 			return None
 
-		mimetype = source.headers['type']
+		mimetype = source.type
 		if mimetype is None:
 			return None
 
+		if isinstance(source.content, str):
+			io_obj = BytesIO(source.content.encode('utf-8', 'ignore'))
+		else:
+			io_obj = source.content
+
 		if _loader_map.get(mimetype):
-			result = _loader_map[mimetype](source.file)
-			source.file.close()
+			result = _loader_map[mimetype](io_obj)
 			return result.encode('utf-8', 'ignore').decode('utf-8', 'ignore')
 
-		result = source.file.read().decode('utf-8', 'ignore')
-		source.file.close()
-		return result
+		return io_obj.read().decode('utf-8', 'ignore')
 	except PdfFileNotDecryptedError:
-		logger.warning(f'PDF file ({source.filename}) is encrypted and cannot be read')
+		logger.warning(f'PDF file ({source.reference}) is encrypted and cannot be read')
 		return None
 	except Exception:
-		logger.exception(f'Error decoding source file ({source.filename})', stack_info=True)
+		logger.exception(f'Error decoding source file ({source.reference})', stack_info=True)
 		return None
 	finally:
-		source.file.close()  # Ensure file is closed after processing
+		if io_obj is not None:
+			io_obj.close()
diff --git a/context_chat_backend/chain/ingest/injest.py b/context_chat_backend/chain/ingest/injest.py
index 5871ebb..0eb70e0 100644
--- a/context_chat_backend/chain/ingest/injest.py
+++ b/context_chat_backend/chain/ingest/injest.py
@@ -5,29 +5,23 @@
 import logging
 import re
 
-from fastapi.datastructures import UploadFile
 from langchain.schema import Document
 
 from ...dyn_loader import VectorDBLoader
-from ...types import TConfig
-from ...utils import is_valid_source_id, to_int
+from ...types import IndexingError, SourceItem, TConfig
 from ...vectordb.base import BaseVectorDB
 from ...vectordb.types import DbException, SafeDbException, UpdateAccessOp
 from ..types import InDocument
 from .doc_loader import decode_source
 from .doc_splitter import get_splitter_for
-from .mimetype_list import SUPPORTED_MIMETYPES
 
 logger = logging.getLogger('ccb.injest')
 
-def _allowed_file(file: UploadFile) -> bool:
-	return file.headers['type'] in SUPPORTED_MIMETYPES
-
 
 def _filter_sources(
 	vectordb: BaseVectorDB,
-	sources: list[UploadFile]
-) -> tuple[list[UploadFile], list[UploadFile]]:
+	sources: dict[int, SourceItem]
+) -> tuple[dict[int, SourceItem], dict[int, SourceItem]]:
 	'''
 	Returns
 	-------
@@ -37,30 +31,42 @@ def _filter_sources(
 	'''
 
 	try:
-		existing_sources, new_sources = vectordb.check_sources(sources)
+		existing_source_ids, to_embed_source_ids = vectordb.check_sources(sources)
 	except Exception as e:
-		raise DbException('Error: Vectordb sources_to_embed error') from e
+		raise DbException('Error: Vectordb error while checking existing sources in indexing') from e
+
+	existing_sources = {}
+	to_embed_sources = {}
 
-	return ([
-		source for source in sources
-		if source.filename in existing_sources
-	], [
-		source for source in sources
-		if source.filename in new_sources
-	])
+	for db_id, source in sources.items():
+		if source.reference in existing_source_ids:
+			existing_sources[db_id] = source
+		elif source.reference in to_embed_source_ids:
+			to_embed_sources[db_id] = source
 
+	return existing_sources, to_embed_sources
 
-def _sources_to_indocuments(config: TConfig, sources: list[UploadFile]) -> list[InDocument]:
-	indocuments = []
 
-	for source in sources:
-		logger.debug('processing source', extra={ 'source_id': source.filename })
+def _sources_to_indocuments(
+	config: TConfig,
+	sources: dict[int, SourceItem]
+) -> tuple[dict[int, InDocument], dict[int, IndexingError]]:
+	indocuments = {}
+	errored_docs = {}
 
+	for db_id, source in sources.items():
+		logger.debug('processing source', extra={ 'source_id': source.reference })
+
+		# todo: maybe fetch the content of the files here
 		# transform the source to have text data
 		content = decode_source(source)
 
 		if content is None or (content := content.strip()) == '':
-			logger.debug('decoded empty source', extra={ 'source_id': source.filename })
+			logger.debug('decoded empty source', extra={ 'source_id': source.reference })
+			errored_docs[db_id] = IndexingError(
+				error='Decoded content is empty',
+				retryable=False,
+			)
 			continue
 
 		# replace more than two newlines with two newlines (also blank spaces, more than 4)
@@ -71,94 +77,123 @@ def _sources_to_indocuments(config: TConfig, sources: list[UploadFile]) -> list[
 		content = content.replace('\0', '')
 
 		if content is None or content == '':
-			logger.debug('decoded empty source after cleanup', extra={ 'source_id': source.filename })
+			logger.debug('decoded empty source after cleanup', extra={ 'source_id': source.reference })
+			errored_docs[db_id] = IndexingError(
+				error='Decoded content is empty',
+				retryable=False,
+			)
 			continue
 
-		logger.debug('decoded non empty source', extra={ 'source_id': source.filename })
+		logger.debug('decoded non empty source', extra={ 'source_id': source.reference })
 
 		metadata = {
-			'source': source.filename,
-			'title': _decode_latin_1(source.headers['title']),
-			'type': source.headers['type'],
+			'source': source.reference,
+			'title': _decode_latin_1(source.title),
+			'type': source.type,
 		}
 		doc = Document(page_content=content, metadata=metadata)
 
-		splitter = get_splitter_for(config.embedding_chunk_size, source.headers['type'])
+		splitter = get_splitter_for(config.embedding_chunk_size, source.type)
 		split_docs = splitter.split_documents([doc])
 		logger.debug('split document into chunks', extra={
-			'source_id': source.filename,
+			'source_id': source.reference,
 			'len(split_docs)': len(split_docs),
 		})
 
-		indocuments.append(InDocument(
+		indocuments[db_id] = InDocument(
 			documents=split_docs,
-			userIds=list(map(_decode_latin_1, source.headers['userIds'].split(','))),
-			source_id=source.filename,  # pyright: ignore[reportArgumentType]
-			provider=source.headers['provider'],
-			modified=to_int(source.headers['modified']),
-		))
+			userIds=list(map(_decode_latin_1, source.userIds)),
+			source_id=source.reference,
+			provider=source.provider,
+			modified=source.modified,  # pyright: ignore[reportArgumentType]
+		)
+
+	return indocuments, errored_docs
+
+
+def _increase_access_for_existing_sources(
+	vectordb: BaseVectorDB,
+	existing_sources: dict[int, SourceItem]
+) -> dict[int, IndexingError | None]:
+	'''
+	update userIds for existing sources
+	allow the userIds as additional users, not as the only users
+	'''
+	if len(existing_sources) == 0:
+		return {}
 
-	return indocuments
+	results = {}
+	logger.debug('Increasing access for existing sources', extra={
+		'source_ids': [source.reference for source in existing_sources.values()]
+	})
+	for db_id, source in existing_sources.items():
+		try:
+			vectordb.update_access(
+				UpdateAccessOp.allow,
+				list(map(_decode_latin_1, source.userIds)),
+				source.reference,
+			)
+			results[db_id] = None
+		except SafeDbException as e:
+			logger.error(f'Failed to update access for source ({source.reference}): {e.args[0]}')
+			results[db_id] = IndexingError(
+				error=str(e),
+				retryable=False,
+			)
+			continue
+		except Exception as e:
+			logger.error(f'Unexpected error while updating access for source ({source.reference}): {e}')
+			results[db_id] = IndexingError(
+				error='Unexpected error while updating access',
+				retryable=True,
+			)
+			continue
+	return results
 
 
 def _process_sources(
 	vectordb: BaseVectorDB,
 	config: TConfig,
-	sources: list[UploadFile],
-) -> tuple[list[str],list[str]]:
+	sources: dict[int, SourceItem]
+) -> dict[int, IndexingError | None]:
 	'''
 	Processes the sources and adds them to the vectordb.
 	Returns the list of source ids that were successfully added and those that need to be retried.
 	'''
-	existing_sources, filtered_sources = _filter_sources(vectordb, sources)
+	existing_sources, to_embed_sources = _filter_sources(vectordb, sources)
 	logger.debug('db filter source results', extra={
 		'len(existing_sources)': len(existing_sources),
 		'existing_sources': existing_sources,
-		'len(filtered_sources)': len(filtered_sources),
-		'filtered_sources': filtered_sources,
+		'len(to_embed_sources)': len(to_embed_sources),
+		'to_embed_sources': to_embed_sources,
 	})
-	loaded_source_ids = [source.filename for source in existing_sources]
 
-	# update userIds for existing sources
-	# allow the userIds as additional users, not as the only users
-	if len(existing_sources) > 0:
-		logger.debug('Increasing access for existing sources', extra={
-			'source_ids': [source.filename for source in existing_sources]
-		})
-		for source in existing_sources:
-			try:
-				vectordb.update_access(
-					UpdateAccessOp.allow,
-					list(map(_decode_latin_1, source.headers['userIds'].split(','))),
-					source.filename,  # pyright: ignore[reportArgumentType]
-				)
-			except SafeDbException as e:
-				logger.error(f'Failed to update access for source ({source.filename}): {e.args[0]}')
-				continue
-
-	if len(filtered_sources) == 0:
+	source_proc_results = _increase_access_for_existing_sources(vectordb, existing_sources)
+
+	if len(to_embed_sources) == 0:
 		# no new sources to embed
 		logger.debug('Filtered all sources, nothing to embed')
-		return loaded_source_ids, []  # pyright: ignore[reportReturnType]
+		return source_proc_results
 
 	logger.debug('Filtered sources:', extra={
-		'source_ids': [source.filename for source in filtered_sources]
+		'source_ids': [source.reference for source in to_embed_sources.values()]
 	})
 	# invalid/empty sources are filtered out here and not counted in loaded/retryable
-	indocuments = _sources_to_indocuments(config, filtered_sources)
+	indocuments, errored_docs = _sources_to_indocuments(config, to_embed_sources)
 
-	logger.debug('Converted all sources to documents')
+	source_proc_results.update(errored_docs)
+	logger.debug('Converted sources to documents')
 
 	if len(indocuments) == 0:
 		# filtered document(s) were invalid/empty, not an error
 		logger.debug('All documents were found empty after being processed')
-		return loaded_source_ids, []  # pyright: ignore[reportReturnType]
+		return source_proc_results
 
-	added_source_ids, retry_source_ids = vectordb.add_indocuments(indocuments)
-	loaded_source_ids.extend(added_source_ids)
+	doc_add_results = vectordb.add_indocuments(indocuments)
+	source_proc_results.update(doc_add_results)
 	logger.debug('Added documents to vectordb')
 
-	return loaded_source_ids, retry_source_ids  # pyright: ignore[reportReturnType]
+	return source_proc_results
 
 
 def _decode_latin_1(s: str) -> str:
@@ -172,31 +207,15 @@ def _decode_latin_1(s: str) -> str:
 def embed_sources(
 	vectordb_loader: VectorDBLoader,
 	config: TConfig,
-	sources: list[UploadFile],
-) -> tuple[list[str],list[str]]:
-	# either not a file or a file that is allowed
-	sources_filtered = [
-		source for source in sources
-		if is_valid_source_id(source.filename)  # pyright: ignore[reportArgumentType]
-		or _allowed_file(source)
-	]
-
+	sources: dict[int, SourceItem]
+) -> dict[int, IndexingError | None]:
 	logger.debug('Embedding sources:', extra={
 		'source_ids': [
-			f'{source.filename} ({_decode_latin_1(source.headers["title"])})'
-			for source in sources_filtered
-		],
-		'invalid_source_ids': [
-			source.filename for source in sources
-			if not is_valid_source_id(source.filename)  # pyright: ignore[reportArgumentType]
-		],
-		'not_allowed_file_ids': [
-			source.filename for source in sources
-			if not _allowed_file(source)
+			f'{source.reference} ({_decode_latin_1(source.title)})'
+			for source in sources.values()
 		],
-		'len(source_ids)': len(sources_filtered),
-		'len(total_source_ids)': len(sources),
+		'len(source_ids)': len(sources),
 	})
 
 	vectordb = vectordb_loader.load()
-	return _process_sources(vectordb, config, sources_filtered)
+	return _process_sources(vectordb, config, sources)
diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index fadc5f8..3e70ee1 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -27,7 +27,7 @@
 from time import sleep
 from typing import Annotated, Any
 
-from fastapi import Body, FastAPI, Request, UploadFile
+from fastapi import Body, FastAPI, Request
 from langchain.llms.base import LLM
 from nc_py_api import AsyncNextcloudApp, NextcloudApp
 from nc_py_api.ex_app import persistent_storage, set_handlers
@@ -35,14 +35,13 @@
 from starlette.responses import FileResponse
 
 from .chain.context import do_doc_search
-from .chain.ingest.injest import embed_sources
 from .chain.one_shot import process_context_query, process_query
 from .config_parser import get_config
 from .dyn_loader import LLMModelLoader, VectorDBLoader
 from .models.types import LlmException
 from nc_py_api.ex_app import AppAPIAuthMiddleware
 from .utils import JSONResponse, exec_in_proc, is_valid_provider_id, is_valid_source_id, value_of
-from .task_fetcher import start_bg_threads, stop_bg_threads
+from .task_fetcher import start_bg_threads, wait_for_bg_threads
 from .vectordb.service import (
 	count_documents_by_provider,
 	decl_update_access,
@@ -57,6 +56,7 @@
 repair_run()
 ensure_config_file()
 logger = logging.getLogger('ccb.controller')
+app_config = get_config(os.environ['CC_CONFIG_PATH'])
 __download_models_from_hf = os.environ.get('CC_DOWNLOAD_MODELS_FROM_HF', 'true').lower() in ('1', 'true', 'yes')
 
 models_to_fetch = {
@@ -77,10 +77,10 @@ def enabled_handler(enabled: bool, _: NextcloudApp | AsyncNextcloudApp) -> str:
 	try:
 		if enabled:
 			app_enabled.set()
-			start_bg_threads()
+			start_bg_threads(app_config, app_enabled)
 		else:
 			app_enabled.clear()
-			stop_bg_threads()
+			wait_for_bg_threads()
 	except Exception as e:
 		logger.exception('Error in enabled handler:', exc_info=e)
 		return f'Error in enabled handler: {e}'
@@ -101,10 +101,9 @@ async def lifespan(app: FastAPI):
 	yield
 	vectordb_loader.offload()
 	llm_loader.offload()
-	stop_bg_threads()
+	wait_for_bg_threads()
 
 
-app_config = get_config(os.environ['CC_CONFIG_PATH'])
 app = FastAPI(debug=app_config.debug, lifespan=lifespan)  # pyright: ignore[reportArgumentType]
 
 app.extra['CONFIG'] = app_config
@@ -343,86 +342,78 @@ def _(userId: str = Body(embed=True)):
 	return JSONResponse('User deleted')
 
 
-@app.put('/loadSources')
-@enabled_guard(app)
-def _(sources: list[UploadFile]):
-	global _indexing
-
-	if len(sources) == 0:
-		return JSONResponse('No sources provided', 400)
-
-	filtered_sources = []
-
-	for source in sources:
-		if not value_of(source.filename):
-			logger.warning('Skipping source with invalid source_id', extra={
-				'source_id': source.filename,
-				'title': source.headers.get('title'),
-			})
-			continue
-
-		with index_lock:
-			if source.filename in _indexing:
-				# this request will be retried by the client
-				return JSONResponse(
-					f'This source ({source.filename}) is already being processed in another request, try again later',
-					503,
-					headers={'cc-retry': 'true'},
-				)
-
-		if not (
-			value_of(source.headers.get('userIds'))
-			and source.headers.get('title', None) is not None
-			and value_of(source.headers.get('type'))
-			and value_of(source.headers.get('modified'))
-			and source.headers['modified'].isdigit()
-			and value_of(source.headers.get('provider'))
-		):
-			logger.warning('Skipping source with invalid/missing headers', extra={
-				'source_id': source.filename,
-				'title': source.headers.get('title'),
-				'headers': source.headers,
-			})
-			continue
-
-		filtered_sources.append(source)
-
-	# wait for 10 minutes before failing the request
-	semres = doc_parse_semaphore.acquire(block=True, timeout=10*60)
-	if not semres:
-		return JSONResponse(
-			'Document parser worker limit reached, try again in some time or consider increasing the limit',
-			503,
-			headers={'cc-retry': 'true'}
-		)
-
-	with index_lock:
-		for source in filtered_sources:
-			_indexing[source.filename] = source.size
-
-	try:
-		loaded_sources, not_added_sources = exec_in_proc(
-			target=embed_sources,
-			args=(vectordb_loader, app.extra['CONFIG'], filtered_sources)
-		)
-	except (DbException, EmbeddingException):
-		raise
-	except Exception as e:
-		raise DbException('Error: failed to load sources') from e
-	finally:
-		with index_lock:
-			for source in filtered_sources:
-				_indexing.pop(source.filename, None)
-		doc_parse_semaphore.release()
-
-	if len(loaded_sources) != len(filtered_sources):
-		logger.debug('Some sources were not loaded', extra={
-			'Count of loaded sources': f'{len(loaded_sources)}/{len(filtered_sources)}',
-			'source_ids': loaded_sources,
-		})
-
-	# loaded sources include the existing sources that may only have their access updated
-	return JSONResponse({'loaded_sources': loaded_sources, 'sources_to_retry': not_added_sources})
+# @app.put('/loadSources')
+# @enabled_guard(app)
+# def _(sources: list[UploadFile]):
+# 	global _indexing
+
+# 	if len(sources) == 0:
+# 		return JSONResponse('No sources provided', 400)
+
+# 	for source in sources:
+# 		if not value_of(source.filename):
+# 			return JSONResponse(f'Invalid source filename for: {source.headers.get("title")}', 400)
+
+# 		with index_lock:
+# 			if source.filename in _indexing:
+# 				# this request will be retried by the client
+# 				return JSONResponse(
+# 					f'This source ({source.filename}) is already being processed in another request, try again later',
+# 					503,
+# 					headers={'cc-retry': 'true'},
+# 				)
+
+# 		if not (
+# 			value_of(source.headers.get('userIds'))
+# 			and source.headers.get('title', None) is not None
+# 			and value_of(source.headers.get('type'))
+# 			and value_of(source.headers.get('modified'))
+# 			and source.headers['modified'].isdigit()
+# 			and value_of(source.headers.get('provider'))
+# 		):
+# 			logger.error('Invalid/missing headers received', extra={
+# 				'source_id': source.filename,
+# 				'title': source.headers.get('title'),
+# 				'headers': source.headers,
+# 			})
+# 			return JSONResponse(f'Invaild/missing headers for: {source.filename}', 400)
+
+# 	# wait for 10 minutes before failing the request
+# 	semres = doc_parse_semaphore.acquire(block=True, timeout=10*60)
+# 	if not semres:
+# 		return JSONResponse(
+# 			'Document parser worker limit reached, try again in some time or consider increasing the limit',
+# 			503,
+# 			headers={'cc-retry': 'true'}
+# 		)
+
+# 	with index_lock:
+# 		for source in sources:
+# 			_indexing[source.filename] = source.size
+
+# 	try:
+# 		loaded_sources, not_added_sources = exec_in_proc(
+# 			target=embed_sources,
+# 			args=(vectordb_loader, app.extra['CONFIG'], sources)
+# 		)
+# 	except (DbException, EmbeddingException):
+# 		raise
+# 	except Exception as e:
+# 		raise DbException('Error: failed to load sources') from e
+# 	finally:
+# 		with index_lock:
+# 			for source in sources:
+# 				_indexing.pop(source.filename, None)
+# 		doc_parse_semaphore.release()
+
+# 	if len(loaded_sources) != len(sources):
+# 		logger.debug('Some sources were not loaded', extra={
+# 			'Count of loaded sources': f'{len(loaded_sources)}/{len(sources)}',
+# 			'source_ids': loaded_sources,
+# 		})
+
+# 	# loaded sources include the existing sources that may only have their access updated
+# 	return JSONResponse({'loaded_sources': loaded_sources, 'sources_to_retry': not_added_sources})
 
 
 class Query(BaseModel):
diff --git a/context_chat_backend/chain/ingest/mimetype_list.py b/context_chat_backend/mimetype_list.py
similarity index 100%
rename from context_chat_backend/chain/ingest/mimetype_list.py
rename to context_chat_backend/mimetype_list.py
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 9660b44..a548bcf 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -3,15 +3,41 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
 
+import asyncio
+import logging
+from contextlib import suppress
 from enum import Enum
-from threading import Thread
+from io import BytesIO
+from threading import Event, Thread
+from time import sleep
 
-from .types import AppRole
-from .utils import get_app_role
+import niquests
+from nc_py_api import AsyncNextcloudApp, NextcloudApp
+from pydantic import ValidationError
+
+from .chain.ingest.injest import embed_sources
+from .dyn_loader import VectorDBLoader
+from .types import (
+	AppRole,
+	EmbeddingException,
+	FilesQueueItem,
+	IndexingError,
+	IndexingException,
+	LoaderException,
+	ReceivedFileItem,
+	SourceItem,
+	TConfig,
+)
+from .utils import exec_in_proc, get_app_role
+from .vectordb.types import DbException
 
 APP_ROLE = get_app_role()
 THREADS = {}
-THREADS_STOP_EVENTS = {}
+LOGGER = logging.getLogger('ccb.task_fetcher')
+FILES_INDEXING_BATCH_SIZE = 64  # todo: config?
+# max concurrent fetches to avoid overloading the NC server or hitting rate limits
+CONCURRENT_FILE_FETCHES = 10  # todo: config?
+MAX_FILE_SIZE = 100 * 1024 * 1024  # 100 MB, todo: config?
 
 
 class ThreadType(Enum):
@@ -20,67 +46,294 @@ class ThreadType(Enum):
 	REQUEST_PROCESSING = 'request_processing'
 
 
-def files_indexing_thread():
-	...
+async def __fetch_file_content(
+	semaphore: asyncio.Semaphore,
+	file_id: int,
+	user_id: str,
+	_rlimit = 3,
+) -> BytesIO:
+	'''
+	Raises
+	------
+	IndexingException
+	'''
+
+	async with semaphore:
+		nc = AsyncNextcloudApp()
+		try:
+			# a file pointer for storing the stream in memory until it is consumed
+			fp = BytesIO()
+			await nc._session.download2fp(
+				url_path=f'/apps/context_chat/files/{file_id}',
+				fp=fp,
+				dav=False,
+				params={ 'userId': user_id },
+			)
+			return fp
+		except niquests.exceptions.RequestException as e:
+			# todo: raise IndexingException with retryable=True for rate limit errors,
+			# todo: and handle it in the caller to not delete the source from the queue and retry later through
+			# todo: the normal lock expiry mechanism
+			if e.response is None:
+				raise
+
+			if e.response.status_code == niquests.codes.too_many_requests:  # pyright: ignore[reportAttributeAccessIssue]
+				# todo: implement rate limits in php CC?
+				wait_for = int(e.response.headers.get('Retry-After', '30'))
+				if _rlimit <= 0:
+					raise IndexingException(
+						f'Rate limited when fetching content for file id {file_id}, user id {user_id},'
+						' max retries exceeded',
+						retryable=True,
+					) from e
+				LOGGER.warning(
+					f'Rate limited when fetching content for file id {file_id}, user id {user_id},'
+					f' waiting {wait_for} before retrying',
+					exc_info=e,
+				)
+				await asyncio.sleep(wait_for)
+				return await __fetch_file_content(semaphore, file_id, user_id, _rlimit - 1)
+
+			raise
+		except IndexingException:
+			raise
+		except Exception as e:
+			LOGGER.error(f'Error fetching content for file id {file_id}, user id {user_id}: {e}', exc_info=e)
+			raise IndexingException(f'Error fetching content for file id {file_id}, user id {user_id}: {e}') from e
+
+
+async def __fetch_files_content(
+	files: dict[int, ReceivedFileItem]
+) -> dict[int, SourceItem | IndexingError]:
+	source_items = {}
+	semaphore = asyncio.Semaphore(CONCURRENT_FILE_FETCHES)
+	tasks = []
+
+	for file_id, file_item in files.items():
+		if file_item.size > MAX_FILE_SIZE:
+			LOGGER.info(
+				f'Skipping file id {file_id}, source id {file_item.reference} due to size'
+				f' {(file_item.size/(1024*1024)):.2f} MiB exceeding the limit {(MAX_FILE_SIZE/(1024*1024)):.2f} MiB',
+			)
+			source_items[file_id] = IndexingError(
+				error=(
+					f'File size {(file_item.size/(1024*1024)):.2f} MiB'
+					f' exceeds the limit {(MAX_FILE_SIZE/(1024*1024)):.2f} MiB'
+				),
+				retryable=False,
+			)
+			continue
+		# todo: perform the existing file check before fetching the content to avoid unnecessary fetches
+		# any user id from the list should have read access to the file
+		tasks.append(asyncio.ensure_future(__fetch_file_content(semaphore, file_id, file_item.userIds[0])))
 
+	results = await asyncio.gather(*tasks, return_exceptions=True)
+	for (file_id, file_item), result in zip(files.items(), results, strict=True):
+		if isinstance(result, IndexingException):
+			LOGGER.error(
+				f'Error fetching content for file id {file_id}, reference {file_item.reference}: {result}',
+				exc_info=result,
+			)
+			source_items[file_id] = IndexingError(
+				error=str(result),
+				retryable=result.retryable,
+			)
+		elif isinstance(result, str) or isinstance(result, BytesIO):
+			source_items[file_id] = SourceItem(
+				**file_item.model_dump(),
+				content=result,
+			)
+		elif isinstance(result, BaseException):
+			LOGGER.error(
+				f'Unexpected error fetching content for file id {file_id}, reference {file_item.reference}: {result}',
+				exc_info=result,
+			)
+			source_items[file_id] = IndexingError(
+				error=f'Unexpected error: {result}',
+				retryable=True,
+			)
+		else:
+			LOGGER.error(
+				f'Unknown error fetching content for file id {file_id}, reference {file_item.reference}: {result}',
+				exc_info=True,
+			)
+			source_items[file_id] = IndexingError(
+				error='Unknown error',
+				retryable=True,
+			)
+	return source_items
+
+
+def files_indexing_thread(app_config: TConfig, app_enabled: Event) -> None:
+	try:
+		vectordb_loader = VectorDBLoader(app_config)
+	except LoaderException as e:
+		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
+		return
+
+	def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingError | None]:
+		try:
+			return exec_in_proc(
+				target=embed_sources,
+				args=(vectordb_loader, app_config, source_items),
+			)
+		except (DbException, EmbeddingException):
+			raise
+		except Exception as e:
+			raise DbException('Error: failed to load sources') from e
 
-def updates_processing_thread():
+
+	while True:
+		if not app_enabled.is_set():
+			LOGGER.info('Files indexing thread is stopping as the app is disabled')
+			return
+
+		try:
+			nc = NextcloudApp()
+			# todo: add the 'size' param to the return of this call.
+			q_items_res = nc.ocs(
+				'GET',
+				'/apps/context_chat/queues/documents',
+				params={ 'n': FILES_INDEXING_BATCH_SIZE }
+			)
+
+			try:
+				q_items = FilesQueueItem.model_validate(q_items_res)
+			except ValidationError as e:
+				raise Exception(f'Error validating queue items response: {e}\nResponse content: {q_items_res}') from e
+
+			# populate files content and convert to source items
+			fetched_files = {}
+			source_files = {}
+			# unified error structure for files and content providers
+			source_errors = {}
+
+			if q_items.files:
+				fetched_files = asyncio.run(__fetch_files_content(q_items.files))
+
+			for file_id, result in fetched_files.items():
+				if isinstance(result, SourceItem):
+					source_files[file_id] = result
+				else:
+					source_errors[file_id] = result
+
+			files_result = _load_sources(source_files)
+			providers_result = _load_sources(q_items.content_providers)
+
+			if (
+				any(isinstance(res, IndexingError) for res in files_result.values())
+				or any(isinstance(res, IndexingError) for res in providers_result.values())
+			):
+				LOGGER.error('Some sources failed to index', extra={
+					'file_errors': {
+						file_id: error
+						for file_id, error in files_result.items()
+						if isinstance(error, IndexingError)
+					},
+					'provider_errors': {
+						provider_id: error
+						for provider_id, error in providers_result.items()
+						if isinstance(error, IndexingError)
+					},
+				})
+		except (
+			niquests.exceptions.ConnectionError,
+			niquests.exceptions.Timeout,
+		) as e:
+			LOGGER.info('Temporary error fetching documents to index, will retry:', exc_info=e)
+			sleep(5)
+			continue
+		except Exception as e:
+			LOGGER.exception('Error fetching documents to index:', exc_info=e)
+			sleep(5)
+			continue
+
+		# delete the entries from the PHP side queue where indexing succeeded or the error is not retryable
+		to_delete_file_ids = [
+			file_id for file_id, result in files_result.items()
+			if result is None or (isinstance(result, IndexingError) and not result.retryable)
+		]
+		to_delete_provider_ids = [
+			provider_id for provider_id, result in providers_result.items()
+			if result is None or (isinstance(result, IndexingError) and not result.retryable)
+		]
+
+		try:
+			nc.ocs(
+				'DELETE',
+				'/apps/context_chat/queues/documents/',
+				json={
+					'files': to_delete_file_ids,
+					'content_providers': to_delete_provider_ids,
+				},
+			)
+		except (
+			niquests.exceptions.ConnectionError,
+			niquests.exceptions.Timeout,
+		) as e:
+			LOGGER.info('Temporary error reporting indexing results, will retry:', exc_info=e)
+			sleep(5)
+			with suppress(Exception):
+				nc = NextcloudApp()
+				nc.ocs(
+					'DELETE',
+					'/apps/context_chat/queues/documents/',
+					json={
+						'files': to_delete_file_ids,
+						'content_providers': to_delete_provider_ids,
+					},
+				)
+			continue
+		except Exception as e:
+			LOGGER.exception('Error reporting indexing results:', exc_info=e)
+			sleep(5)
+			continue
+
+
+
+def updates_processing_thread(app_config: TConfig):
 	...
 
 
-def request_processing_thread():
+def request_processing_thread(app_config: TConfig):
 	...
 
 
-def start_bg_threads():
+def start_bg_threads(app_config: TConfig, app_enabled: Event):
 	match APP_ROLE:
 		case AppRole.INDEXING | AppRole.NORMAL:
 			THREADS[ThreadType.FILES_INDEXING] = Thread(
 				target=files_indexing_thread,
+				args=(app_config, Event),
 				name='FilesIndexingThread',
-				daemon=True,
 			)
 			THREADS[ThreadType.UPDATES_PROCESSING] = Thread(
 				target=updates_processing_thread,
+				args=(app_config, Event),
 				name='UpdatesProcessingThread',
-				daemon=True,
 			)
 			THREADS[ThreadType.FILES_INDEXING].start()
 			THREADS[ThreadType.UPDATES_PROCESSING].start()
 		case AppRole.RP | AppRole.NORMAL:
 			THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
 				target=request_processing_thread,
+				args=(app_config, Event),
 				name='RequestProcessingThread',
-				daemon=True,
 			)
 			THREADS[ThreadType.REQUEST_PROCESSING].start()
 
 
-def stop_bg_threads():
+def wait_for_bg_threads():
 	match APP_ROLE:
 		case AppRole.INDEXING | AppRole.NORMAL:
-			if (
-				ThreadType.FILES_INDEXING not in THREADS
-				or ThreadType.UPDATES_PROCESSING not in THREADS
-				or ThreadType.FILES_INDEXING not in THREADS_STOP_EVENTS
-				or ThreadType.UPDATES_PROCESSING not in THREADS_STOP_EVENTS
-			):
+			if (ThreadType.FILES_INDEXING not in THREADS or ThreadType.UPDATES_PROCESSING not in THREADS):
 				return
-			THREADS_STOP_EVENTS[ThreadType.FILES_INDEXING].set()
-			THREADS_STOP_EVENTS[ThreadType.UPDATES_PROCESSING].set()
 			THREADS[ThreadType.FILES_INDEXING].join()
 			THREADS[ThreadType.UPDATES_PROCESSING].join()
 			THREADS.pop(ThreadType.FILES_INDEXING)
 			THREADS.pop(ThreadType.UPDATES_PROCESSING)
-			THREADS_STOP_EVENTS.pop(ThreadType.FILES_INDEXING)
-			THREADS_STOP_EVENTS.pop(ThreadType.UPDATES_PROCESSING)
 		case AppRole.RP | AppRole.NORMAL:
-			if (
-				ThreadType.REQUEST_PROCESSING not in THREADS
-				or ThreadType.REQUEST_PROCESSING not in THREADS_STOP_EVENTS
-			):
+			if (ThreadType.REQUEST_PROCESSING not in THREADS):
 				return
-			THREADS_STOP_EVENTS[ThreadType.REQUEST_PROCESSING].set()
 			THREADS[ThreadType.REQUEST_PROCESSING].join()
 			THREADS.pop(ThreadType.REQUEST_PROCESSING)
-			THREADS_STOP_EVENTS.pop(ThreadType.REQUEST_PROCESSING)
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 7868086..97d48ce 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -3,8 +3,13 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
 from enum import Enum
+from io import BytesIO
+from typing import Self
 
-from pydantic import BaseModel
+from pydantic import BaseModel, field_validator
+
+from .mimetype_list import SUPPORTED_MIMETYPES
+from .utils import is_valid_provider_id, is_valid_source_id
 
 __all__ = [
 	'DEFAULT_EM_MODEL_ALIAS',
@@ -17,6 +22,7 @@
 ]
 
 DEFAULT_EM_MODEL_ALIAS = 'em_model'
+FILES_PROVIDER_ID = 'files__default'
 
 
 class TEmbeddingAuthApiKey(BaseModel):
@@ -79,3 +85,116 @@ class AppRole(str, Enum):
 	NORMAL = 'normal'
 	INDEXING = 'indexing'
 	RP = 'rp'
+
+
+class CommonSourceItem(BaseModel):
+	userIds: list[str]
+	reference: str  # source_id of the form "appId__providerId: itemId"
+	title: str
+	modified: int | str  # todo: int/string?
+	type: str
+	provider: str
+	size: int
+
+	@field_validator('modified', mode='before')
+	@classmethod
+	def validate_modified(cls, v):
+		if isinstance(v, int):
+			return v
+		if isinstance(v, str):
+			try:
+				return int(v)
+			except ValueError as e:
+				raise ValueError(f'Invalid modified value: {v}') from e
+		raise ValueError(f'Invalid modified type: {type(v)}')
+
+	@field_validator('reference', 'title', 'type', 'provider')
+	@classmethod
+	def validate_strings_non_empty(cls, v):
+		if not isinstance(v, str) or v.strip() == '':
+			raise ValueError('Must be a non-empty string')
+		return v.strip()
+
+	@field_validator('userIds', mode='after')
+	def validate_user_ids(self) -> Self:
+		if (
+			not isinstance(self.userIds, list)
+			or not all(
+				isinstance(uid, str)
+				and uid.strip() != ''
+				for uid in self.userIds
+			)
+			or len(self.userIds) == 0
+		):
+			raise ValueError('userIds must be a non-empty list of non-empty strings')
+		self.userIds = [uid.strip() for uid in self.userIds]
+		return self
+
+	@field_validator('reference', mode='after')
+	def validate_reference_format(self) -> Self:
+		# validate reference format: "appId__providerId: itemId"
+		if not is_valid_source_id(self.reference):
+			raise ValueError('Invalid reference format, must be "appId__providerId: itemId"')
+		return self
+
+	@field_validator('provider', mode='after')
+	def validate_provider_format(self) -> Self:
+		# validate provider format: "appId__providerId"
+		if not is_valid_provider_id(self.provider):
+			raise ValueError('Invalid provider format, must be "appId__providerId"')
+		return self
+
+	@field_validator('type', mode='after')
+	def validate_type(self) -> Self:
+		if self.reference.startswith(FILES_PROVIDER_ID) and self.type not in SUPPORTED_MIMETYPES:
+			raise ValueError(f'Unsupported file type: {self.type} for reference {self.reference}')
+		return self
+
+	@field_validator('size', mode='after')
+	def validate_size(self) -> Self:
+		if not isinstance(self.size, int) or self.size < 0:
+			raise ValueError(f'Invalid size value: {self.size}, must be a non-negative integer')
+		return self
+
+
+class ReceivedFileItem(CommonSourceItem):
+	content: None
+
+
+class SourceItem(CommonSourceItem):
+	'''
+	Used for the unified queue of items to process, after fetching the content for files
+	and for directly fetched content providers.
+	'''
+	content: str | BytesIO
+
+	@field_validator('content')
+	@classmethod
+	def validate_content(cls, v):
+		if isinstance(v, str):
+			if v.strip() == '':
+				raise ValueError('Content must be a non-empty string')
+			return v.strip()
+		if isinstance(v, BytesIO):
+			if v.getbuffer().nbytes == 0:
+				raise ValueError('Content must be a non-empty BytesIO')
+			return v
+		raise ValueError('Content must be either a non-empty string or a non-empty BytesIO')
+
+
+class FilesQueueItem(BaseModel):
+	files: dict[int, ReceivedFileItem]  # [db id]: FileItem
+	content_providers: dict[int, SourceItem]  # [db id]: SourceItem
+
+
+class IndexingException(Exception):
+	retryable: bool = False
+
+	def __init__(self, message: str, retryable: bool = False):
+		super().__init__(message)
+		self.retryable = retryable
+
+
+class IndexingError(BaseModel):
+	error: str
+	retryable: bool = False
diff --git a/context_chat_backend/vectordb/base.py b/context_chat_backend/vectordb/base.py
index 0bf1020..ebd5407 100644
--- a/context_chat_backend/vectordb/base.py
+++ b/context_chat_backend/vectordb/base.py
@@ -5,12 +5,12 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
-from fastapi import UploadFile
 from langchain.schema import Document
 from langchain.schema.embeddings import Embeddings
 from langchain.schema.vectorstore import VectorStore
 
 from ..chain.types import InDocument, ScopeType
+from ..types import IndexingError, SourceItem
 from ..utils import timed
 from .types import UpdateAccessOp
 
@@ -62,7 +62,7 @@ def get_instance(self) -> VectorStore:
 		'''
 
 	@abstractmethod
-	def add_indocuments(self, indocuments: list[InDocument]) -> tuple[list[str],list[str]]:
+	def add_indocuments(self, indocuments: dict[int, InDocument]) -> dict[int, IndexingError | None]:
 		'''
 		Adds the given indocuments to the vectordb and updates the docs + access tables.
 
@@ -79,10 +79,7 @@ def add_indocuments(self, indocuments: list[InDocument]) -> tuple[list[str],list
 
 	@timed
 	@abstractmethod
-	def check_sources(
-		self,
-		sources: list[UploadFile],
-	) -> tuple[list[str], list[str]]:
+	def check_sources(self, sources: dict[int, SourceItem]) -> tuple[list[str], list[str]]:
 		'''
 		Checks the sources in the vectordb if they are already embedded
 			and are up to date.
diff --git a/context_chat_backend/vectordb/pgvector.py b/context_chat_backend/vectordb/pgvector.py
index 2b7fc06..f5879fe 100644
--- a/context_chat_backend/vectordb/pgvector.py
+++ b/context_chat_backend/vectordb/pgvector.py
@@ -11,14 +11,13 @@
 import sqlalchemy.dialects.postgresql as postgresql_dialects
 import sqlalchemy.orm as orm
 from dotenv import load_dotenv
-from fastapi import UploadFile
 from langchain.schema import Document
 from langchain.vectorstores import VectorStore
 from langchain_core.embeddings import Embeddings
 from langchain_postgres.vectorstores import Base, PGVector
 
 from ..chain.types import InDocument, ScopeType
-from ..types import EmbeddingException, RetryableEmbeddingException
+from ..types import EmbeddingException, IndexingError, RetryableEmbeddingException, SourceItem
 from ..utils import timed
 from .base import BaseVectorDB
 from .types import DbException, SafeDbException, UpdateAccessOp
@@ -130,17 +129,16 @@ def get_users(self) -> list[str]:
 			except Exception as e:
 				raise DbException('Error: getting a list of all users from access list') from e
 
-	def add_indocuments(self, indocuments: list[InDocument]) -> tuple[list[str], list[str]]:
+	def add_indocuments(self, indocuments: dict[int, InDocument]) -> dict[int, IndexingError | None]:
 		"""
 		Raises
 			EmbeddingException: if the embedding request definitively fails
 		"""
-		added_sources = []
-		retry_sources = []
+		results = {}
 		batch_size = PG_BATCH_SIZE // 5
 
 		with self.session_maker() as session:
-			for indoc in indocuments:
+			for php_db_id, indoc in indocuments.items():
 				try:
 					# query paramerters limitation in postgres is 65535 (https://www.postgresql.org/docs/current/limits.html)
 					# so we chunk the documents into (5 values * 10k) chunks
@@ -170,7 +168,7 @@ def add_indocuments(self, indocuments: list[InDocument]) -> tuple[list[str], lis
 						)
 
 					self.decl_update_access(indoc.userIds, indoc.source_id, session)
-					added_sources.append(indoc.source_id)
+					results[php_db_id] = None
 					session.commit()
 				except SafeDbException as e:
 					# for when the source_id is not found. This here can be an error in the DB
@@ -178,51 +176,67 @@ def add_indocuments(self, indocuments: list[InDocument]) -> tuple[list[str], lis
 					logger.exception('Error adding documents to vectordb', exc_info=e, extra={
 						'source_id': indoc.source_id,
 					})
-					retry_sources.append(indoc.source_id)
+					results[php_db_id] = IndexingError(
+						error=str(e),
+						retryable=True,
+					)
 					continue
 				except RetryableEmbeddingException as e:
 					# temporary error, continue with the next document
 					logger.exception('Error adding documents to vectordb, should be retried later.', exc_info=e, extra={
 						'source_id': indoc.source_id,
 					})
-					retry_sources.append(indoc.source_id)
+					results[php_db_id] = IndexingError(
+						error=str(e),
+						retryable=True,
+					)
 					continue
 				except EmbeddingException as e:
 					logger.exception('Error adding documents to vectordb', exc_info=e, extra={
 						'source_id': indoc.source_id,
 					})
-					raise
+					results[php_db_id] = IndexingError(
+						error=str(e),
+						retryable=False,
+					)
+					continue
 				except Exception as e:
 					logger.exception('Error adding documents to vectordb', exc_info=e, extra={
 						'source_id': indoc.source_id,
 					})
-					retry_sources.append(indoc.source_id)
+					results[php_db_id] = IndexingError(
+						error='An unexpected error occurred while adding documents to the database.',
+						retryable=True,
+					)
 					continue
 
-		return added_sources, retry_sources
+		return results
 
 	@timed
-	def check_sources(self, sources: list[UploadFile]) -> tuple[list[str], list[str]]:
+	def check_sources(self, sources: dict[int, SourceItem]) -> tuple[list[str], list[str]]:
+		'''
+		returns a tuple of (existing_source_ids, to_embed_source_ids)
+		'''
 		with self.session_maker() as session:
 			try:
 				stmt = (
 					sa.select(DocumentsStore.source_id)
-					.filter(DocumentsStore.source_id.in_([source.filename for source in sources]))
+					.filter(DocumentsStore.source_id.in_([source.reference for source in sources.values()]))
 					.with_for_update()
 				)
 
 				results = session.execute(stmt).fetchall()
 				existing_sources = {r.source_id for r in results}
-				to_embed = [source.filename for source in sources if source.filename not in existing_sources]
+				to_embed = [source.reference for source in sources.values() if source.reference not in existing_sources]
 
 				to_delete = []
 
-				for source in sources:
+				for source in sources.values():
 					stmt = (
 						sa.select(DocumentsStore.source_id)
-						.filter(DocumentsStore.source_id == source.filename)
+						.filter(DocumentsStore.source_id == source.reference)
 						.filter(DocumentsStore.modified < sa.cast(
-							datetime.fromtimestamp(int(source.headers['modified'])),
+							datetime.fromtimestamp(int(source.modified)),
 							sa.DateTime,
 						))
 					)
@@ -239,14 +253,13 @@ def check_sources(self, sources: list[UploadFile]) -> tuple[list[str], list[str]
 				session.rollback()
 				raise DbException('Error: checking sources in vectordb') from e
 
-			still_existing_sources = [
-				source
-				for source in existing_sources
-				if source not in to_delete
+			still_existing_source_ids = [
+				source_id
+				for source_id in existing_sources
+				if source_id not in to_delete
 			]
 
-			# the pyright issue stems from source.filename, which has already been validated
-			return list(still_existing_sources), to_embed  # pyright: ignore[reportReturnType]
+			return list(still_existing_source_ids), to_embed
 
 	def decl_update_access(self, user_ids: list[str], source_id: str, session_: orm.Session | None = None):
 		session = session_ or self.session_maker()

From 03a3f433caccdf7121c3171538828c8f6fefa5af Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Mon, 9 Mar 2026 19:42:21 +0530
Subject: [PATCH 04/96] wip: parallelize file parsing and processing based on
 cpu count

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/task_fetcher.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index a548bcf..853a68c 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -5,6 +5,7 @@
 
 import asyncio
 import logging
+import os
 from contextlib import suppress
 from enum import Enum
 from io import BytesIO
@@ -35,6 +36,8 @@
 THREADS = {}
 LOGGER = logging.getLogger('ccb.task_fetcher')
 FILES_INDEXING_BATCH_SIZE = 64  # todo: config?
+# divides the batch into these many chunks
+PARALLEL_FILE_PARSING = max(1, (os.cpu_count() or 2) - 1)  # todo: config?
 # max concurrent fetches to avoid overloading the NC server or hitting rate limits
 CONCURRENT_FILE_FETCHES = 10  # todo: config?
 MAX_FILE_SIZE = 100 * 1024 * 1024  # 100 MB, todo: config?
@@ -217,8 +220,18 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 				else:
 					source_errors[file_id] = result
 
-			files_result = _load_sources(source_files)
-			providers_result = _load_sources(q_items.content_providers)
+			files_result = {}
+			providers_result = {}
+			chunk_size = FILES_INDEXING_BATCH_SIZE // PARALLEL_FILE_PARSING
+
+			# chunk file parsing for better file operation parallelism
+			for i in range(0, len(source_files), chunk_size):
+				chunk = dict(list(source_files.items())[i:i+chunk_size])
+				files_result.update(_load_sources(chunk))
+
+			for i in range(0, len(q_items.content_providers), chunk_size):
+				chunk = dict(list(q_items.content_providers.items())[i:i+chunk_size])
+				providers_result.update(_load_sources(chunk))
 
 			if (
 				any(isinstance(res, IndexingError) for res in files_result.values())

From 0dc404bf48cff0e358b723bcb12775956d0c2eac Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 10 Mar 2026 17:36:03 +0530
Subject: [PATCH 05/96] ci: use the kubernetes branch of context_chat

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 10e2d61..fb06baf 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -113,6 +113,8 @@ jobs:
           repository: nextcloud/context_chat
           path: apps/context_chat
           persist-credentials: false
+          # todo: remove later
+          ref: feat/reverse-content-flow
 
       - name: Checkout backend
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4

From c7339828818ff49e8a2c44aa7896b4b2fdf495fb Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 10 Mar 2026 17:43:27 +0530
Subject: [PATCH 06/96] fix typo

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/task_fetcher.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 853a68c..cfa9293 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -304,11 +304,11 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 
 
 
-def updates_processing_thread(app_config: TConfig):
+def updates_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 	...
 
 
-def request_processing_thread(app_config: TConfig):
+def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 	...
 
 
@@ -317,12 +317,12 @@ def start_bg_threads(app_config: TConfig, app_enabled: Event):
 		case AppRole.INDEXING | AppRole.NORMAL:
 			THREADS[ThreadType.FILES_INDEXING] = Thread(
 				target=files_indexing_thread,
-				args=(app_config, Event),
+				args=(app_config, app_enabled),
 				name='FilesIndexingThread',
 			)
 			THREADS[ThreadType.UPDATES_PROCESSING] = Thread(
 				target=updates_processing_thread,
-				args=(app_config, Event),
+				args=(app_config, app_enabled),
 				name='UpdatesProcessingThread',
 			)
 			THREADS[ThreadType.FILES_INDEXING].start()
@@ -330,7 +330,7 @@ def start_bg_threads(app_config: TConfig, app_enabled: Event):
 		case AppRole.RP | AppRole.NORMAL:
 			THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
 				target=request_processing_thread,
-				args=(app_config, Event),
+				args=(app_config, app_enabled),
 				name='RequestProcessingThread',
 			)
 			THREADS[ThreadType.REQUEST_PROCESSING].start()

From dda312f21f74955d70e6f5f74840a31b26bb3f9d Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 11 Mar 2026 11:58:50 +0530
Subject: [PATCH 07/96] migrate the update process to be thread based

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/chain/ingest/injest.py |   2 +-
 context_chat_backend/controller.py          | 203 ++++++++++----------
 context_chat_backend/task_fetcher.py        | 183 +++++++++++++++++-
 context_chat_backend/types.py               | 183 +++++++++++++++++-
 context_chat_backend/vectordb/pgvector.py   |  27 ++-
 context_chat_backend/vectordb/service.py    |  54 +++++-
 context_chat_backend/vectordb/types.py      |   4 +-
 7 files changed, 531 insertions(+), 125 deletions(-)

diff --git a/context_chat_backend/chain/ingest/injest.py b/context_chat_backend/chain/ingest/injest.py
index 0eb70e0..7369f45 100644
--- a/context_chat_backend/chain/ingest/injest.py
+++ b/context_chat_backend/chain/ingest/injest.py
@@ -129,7 +129,7 @@ def _increase_access_for_existing_sources(
 	for db_id, source in existing_sources.items():
 		try:
 			vectordb.update_access(
-				UpdateAccessOp.allow,
+				UpdateAccessOp.ALLOW,
 				list(map(_decode_latin_1, source.userIds)),
 				source.reference,
 			)
diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 3e70ee1..580416f 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -6,7 +6,7 @@
 # isort: off
 from .chain.types import ContextException, LLMOutput, ScopeType, SearchResult
 from .types import LoaderException, EmbeddingException
-from .vectordb.types import DbException, SafeDbException, UpdateAccessOp
+from .vectordb.types import DbException, SafeDbException
 from .setup_functions import ensure_config_file, repair_run, setup_env_vars
 
 # setup env vars before importing other modules
@@ -25,9 +25,9 @@
 from functools import wraps
 from threading import Event, Thread
 from time import sleep
-from typing import Annotated, Any
+from typing import Any
 
-from fastapi import Body, FastAPI, Request
+from fastapi import FastAPI, Request
 from langchain.llms.base import LLM
 from nc_py_api import AsyncNextcloudApp, NextcloudApp
 from nc_py_api.ex_app import persistent_storage, set_handlers
@@ -40,16 +40,9 @@
 from .dyn_loader import LLMModelLoader, VectorDBLoader
 from .models.types import LlmException
 from nc_py_api.ex_app import AppAPIAuthMiddleware
-from .utils import JSONResponse, exec_in_proc, is_valid_provider_id, is_valid_source_id, value_of
+from .utils import JSONResponse, exec_in_proc, value_of
 from .task_fetcher import start_bg_threads, wait_for_bg_threads
-from .vectordb.service import (
-	count_documents_by_provider,
-	decl_update_access,
-	delete_by_provider,
-	delete_by_source,
-	delete_user,
-	update_access,
-)
+from .vectordb.service import count_documents_by_provider
 
 # setup
 
@@ -227,119 +220,131 @@ def _():
 	return JSONResponse(counts)
 
 
-@app.post('/updateAccessDeclarative')
-@enabled_guard(app)
-def _(
-	userIds: Annotated[list[str], Body()],
-	sourceId: Annotated[str, Body()],
-):
-	logger.debug('Update access declarative request:', extra={
-		'user_ids': userIds,
-		'source_id': sourceId,
-	})
+@app.get('/downloadLogs')
+def download_logs() -> FileResponse:
+	with tempfile.NamedTemporaryFile('wb', delete=False) as tmp:
+		with zipfile.ZipFile(tmp, mode='w', compression=zipfile.ZIP_DEFLATED) as zip_file:
+			files = os.listdir(os.path.join(persistent_storage(), 'logs'))
+			for file in files:
+				file_path = os.path.join(persistent_storage(), 'logs', file)
+				if os.path.isfile(file_path): # Might be a folder (just skip it then)
+					zip_file.write(file_path)
+		return FileResponse(tmp.name, media_type='application/zip', filename='docker_logs.zip')
 
-	if len(userIds) == 0:
-		return JSONResponse('Empty list of user ids', 400)
 
-	if not is_valid_source_id(sourceId):
-		return JSONResponse('Invalid source id', 400)
+# @app.post('/updateAccessDeclarative')
+# @enabled_guard(app)
+# def _(
+# 	userIds: Annotated[list[str], Body()],
+# 	sourceId: Annotated[str, Body()],
+# ):
+# 	logger.debug('Update access declarative request:', extra={
+# 		'user_ids': userIds,
+# 		'source_id': sourceId,
+# 	})
 
-	exec_in_proc(target=decl_update_access, args=(vectordb_loader, userIds, sourceId))
+# 	if len(userIds) == 0:
+# 		return JSONResponse('Empty list of user ids', 400)
 
-	return JSONResponse('Access updated')
+# 	if not is_valid_source_id(sourceId):
+# 		return JSONResponse('Invalid source id', 400)
 
+# 	exec_in_proc(target=decl_update_access, args=(vectordb_loader, userIds, sourceId))
 
-@app.post('/updateAccess')
-@enabled_guard(app)
-def _(
-	op: Annotated[UpdateAccessOp, Body()],
-	userIds: Annotated[list[str], Body()],
-	sourceId: Annotated[str, Body()],
-):
-	logger.debug('Update access request', extra={
-		'op': op,
-		'user_ids': userIds,
-		'source_id': sourceId,
-	})
+# 	return JSONResponse('Access updated')
 
-	if len(userIds) == 0:
-		return JSONResponse('Empty list of user ids', 400)
 
-	if not is_valid_source_id(sourceId):
-		return JSONResponse('Invalid source id', 400)
+# @app.post('/updateAccess')
+# @enabled_guard(app)
+# def _(
+# 	op: Annotated[UpdateAccessOp, Body()],
+# 	userIds: Annotated[list[str], Body()],
+# 	sourceId: Annotated[str, Body()],
+# ):
+# 	logger.debug('Update access request', extra={
+# 		'op': op,
+# 		'user_ids': userIds,
+# 		'source_id': sourceId,
+# 	})
 
-	exec_in_proc(target=update_access, args=(vectordb_loader, op, userIds, sourceId))
+# 	if len(userIds) == 0:
+# 		return JSONResponse('Empty list of user ids', 400)
 
-	return JSONResponse('Access updated')
+# 	if not is_valid_source_id(sourceId):
+# 		return JSONResponse('Invalid source id', 400)
 
+# 	exec_in_proc(target=update_access, args=(vectordb_loader, op, userIds, sourceId))
 
-@app.post('/updateAccessProvider')
-@enabled_guard(app)
-def _(
-	op: Annotated[UpdateAccessOp, Body()],
-	userIds: Annotated[list[str], Body()],
-	providerId: Annotated[str, Body()],
-):
-	logger.debug('Update access by provider request', extra={
-		'op': op,
-		'user_ids': userIds,
-		'provider_id': providerId,
-	})
+# 	return JSONResponse('Access updated')
 
-	if len(userIds) == 0:
-		return JSONResponse('Empty list of user ids', 400)
 
-	if not is_valid_provider_id(providerId):
-		return JSONResponse('Invalid provider id', 400)
+# @app.post('/updateAccessProvider')
+# @enabled_guard(app)
+# def _(
+# 	op: Annotated[UpdateAccessOp, Body()],
+# 	userIds: Annotated[list[str], Body()],
+# 	providerId: Annotated[str, Body()],
+# ):
+# 	logger.debug('Update access by provider request', extra={
+# 		'op': op,
+# 		'user_ids': userIds,
+# 		'provider_id': providerId,
+# 	})
 
-	exec_in_proc(target=update_access, args=(vectordb_loader, op, userIds, providerId))
+# 	if len(userIds) == 0:
+# 		return JSONResponse('Empty list of user ids', 400)
 
-	return JSONResponse('Access updated')
+# 	if not is_valid_provider_id(providerId):
+# 		return JSONResponse('Invalid provider id', 400)
 
+# 	exec_in_proc(target=update_access_provider, args=(vectordb_loader, op, userIds, providerId))
 
-@app.post('/deleteSources')
-@enabled_guard(app)
-def _(sourceIds: Annotated[list[str], Body(embed=True)]):
-	logger.debug('Delete sources request', extra={
-		'source_ids': sourceIds,
-	})
+# 	return JSONResponse('Access updated')
 
-	sourceIds = [source.strip() for source in sourceIds if source.strip() != '']
 
-	if len(sourceIds) == 0:
-		return JSONResponse('No sources provided', 400)
+# @app.post('/deleteSources')
+# @enabled_guard(app)
+# def _(sourceIds: Annotated[list[str], Body(embed=True)]):
+# 	logger.debug('Delete sources request', extra={
+# 		'source_ids': sourceIds,
+# 	})
 
-	res = exec_in_proc(target=delete_by_source, args=(vectordb_loader, sourceIds))
-	if res is False:
-		return JSONResponse('Error: VectorDB delete failed, check vectordb logs for more info.', 400)
+# 	sourceIds = [source.strip() for source in sourceIds if source.strip() != '']
 
-	return JSONResponse('All valid sources deleted')
+# 	if len(sourceIds) == 0:
+# 		return JSONResponse('No sources provided', 400)
 
+# 	res = exec_in_proc(target=delete_by_source, args=(vectordb_loader, sourceIds))
+# 	if res is False:
+# 		return JSONResponse('Error: VectorDB delete failed, check vectordb logs for more info.', 400)
 
-@app.post('/deleteProvider')
-@enabled_guard(app)
-def _(providerKey: str = Body(embed=True)):
-	logger.debug('Delete sources by provider for all users request', extra={ 'provider_key': providerKey })
+# 	return JSONResponse('All valid sources deleted')
 
-	if value_of(providerKey) is None:
-		return JSONResponse('Invalid provider key provided', 400)
 
-	exec_in_proc(target=delete_by_provider, args=(vectordb_loader, providerKey))
+# @app.post('/deleteProvider')
+# @enabled_guard(app)
+# def _(providerKey: str = Body(embed=True)):
+# 	logger.debug('Delete sources by provider for all users request', extra={ 'provider_key': providerKey })
 
-	return JSONResponse('All valid sources deleted')
+# 	if value_of(providerKey) is None:
+# 		return JSONResponse('Invalid provider key provided', 400)
 
+# 	exec_in_proc(target=delete_by_provider, args=(vectordb_loader, providerKey))
 
-@app.post('/deleteUser')
-@enabled_guard(app)
-def _(userId: str = Body(embed=True)):
-	logger.debug('Remove access list for user, and orphaned sources', extra={ 'user_id': userId })
+# 	return JSONResponse('All valid sources deleted')
 
-	if value_of(userId) is None:
-		return JSONResponse('Invalid userId provided', 400)
 
-	exec_in_proc(target=delete_user, args=(vectordb_loader, userId))
+# @app.post('/deleteUser')
+# @enabled_guard(app)
+# def _(userId: str = Body(embed=True)):
+# 	logger.debug('Remove access list for user, and orphaned sources', extra={ 'user_id': userId })
+
+# 	if value_of(userId) is None:
+# 		return JSONResponse('Invalid userId provided', 400)
 
-	return JSONResponse('User deleted')
+# 	exec_in_proc(target=delete_user, args=(vectordb_loader, userId))
+
+# 	return JSONResponse('User deleted')
 
 
 # @app.put('/loadSources')
@@ -503,15 +508,3 @@ def _(query: Query) -> list[SearchResult]:
 		query.scopeType,
 		query.scopeList,
 	))
-
-
-@app.get('/downloadLogs')
-def download_logs() -> FileResponse:
-	with tempfile.NamedTemporaryFile('wb', delete=False) as tmp:
-		with zipfile.ZipFile(tmp, mode='w', compression=zipfile.ZIP_DEFLATED) as zip_file:
-			files = os.listdir(os.path.join(persistent_storage(), 'logs'))
-			for file in files:
-				file_path = os.path.join(persistent_storage(), 'logs', file)
-				if os.path.isfile(file_path): # Might be a folder (just skip it then)
-					zip_file.write(file_path)
-		return FileResponse(tmp.name, media_type='application/zip', filename='docker_logs.zip')
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index cfa9293..84b974b 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -19,9 +19,11 @@
 from .chain.ingest.injest import embed_sources
 from .dyn_loader import VectorDBLoader
 from .types import (
+	ActionsQueueItems,
+	ActionType,
 	AppRole,
 	EmbeddingException,
-	FilesQueueItem,
+	FilesQueueItems,
 	IndexingError,
 	IndexingException,
 	LoaderException,
@@ -30,7 +32,15 @@
 	TConfig,
 )
 from .utils import exec_in_proc, get_app_role
-from .vectordb.types import DbException
+from .vectordb.service import (
+	decl_update_access,
+	delete_by_provider,
+	delete_by_source,
+	delete_user,
+	update_access,
+	update_access_provider,
+)
+from .vectordb.types import DbException, SafeDbException
 
 APP_ROLE = get_app_role()
 THREADS = {}
@@ -41,6 +51,8 @@
 # max concurrent fetches to avoid overloading the NC server or hitting rate limits
 CONCURRENT_FILE_FETCHES = 10  # todo: config?
 MAX_FILE_SIZE = 100 * 1024 * 1024  # 100 MB, todo: config?
+ACTIONS_BATCH_SIZE = 512  # todo: config?
+POLLING_COOLDOWN = 30
 
 
 class ThreadType(Enum):
@@ -201,10 +213,15 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 			)
 
 			try:
-				q_items = FilesQueueItem.model_validate(q_items_res)
+				q_items: FilesQueueItems = FilesQueueItems.model_validate(q_items_res)
 			except ValidationError as e:
 				raise Exception(f'Error validating queue items response: {e}\nResponse content: {q_items_res}') from e
 
+			if not q_items.files and not q_items.content_providers:
+				LOGGER.debug('No documents to index')
+				sleep(POLLING_COOLDOWN)
+				continue
+
 			# populate files content and convert to source items
 			fetched_files = {}
 			source_files = {}
@@ -305,7 +322,165 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 
 
 def updates_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
-	...
+	try:
+		vectordb_loader = VectorDBLoader(app_config)
+	except LoaderException as e:
+		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
+		return
+
+	while True:
+		if not app_enabled.is_set():
+			LOGGER.info('Files indexing thread is stopping as the app is disabled')
+			return
+
+		try:
+			nc = NextcloudApp()
+			q_items_res = nc.ocs(
+				'GET',
+				'/apps/context_chat/queues/actions',
+				params={ 'n': ACTIONS_BATCH_SIZE }
+			)
+
+			try:
+				q_items: ActionsQueueItems = ActionsQueueItems.model_validate(q_items_res)
+			except ValidationError as e:
+				raise Exception(f'Error validating queue items response: {e}\nResponse content: {q_items_res}') from e
+		except (
+			niquests.exceptions.ConnectionError,
+			niquests.exceptions.Timeout,
+		) as e:
+			LOGGER.info('Temporary error fetching updates to process, will retry:', exc_info=e)
+			sleep(5)
+			continue
+		except Exception as e:
+			LOGGER.exception('Error fetching updates to process:', exc_info=e)
+			sleep(5)
+			continue
+
+		if not q_items.actions:
+			LOGGER.debug('No updates to process')
+			sleep(POLLING_COOLDOWN)
+			continue
+
+		processed_event_ids = []
+		errored_events = {}
+		for i, (db_id, action_item) in enumerate(q_items.actions.items()):
+			try:
+				match action_item.type:
+					case ActionType.DELETE_SOURCE_IDS:
+						exec_in_proc(target=delete_by_source, args=(vectordb_loader, action_item.payload.sourceIds))
+
+					case ActionType.DELETE_PROVIDER_ID:
+						exec_in_proc(target=delete_by_provider, args=(vectordb_loader, action_item.payload.providerId))
+
+					case ActionType.DELETE_USER_ID:
+						exec_in_proc(target=delete_user, args=(vectordb_loader, action_item.payload.userId))
+
+					case ActionType.UPDATE_ACCESS_SOURCE_ID:
+						exec_in_proc(
+							target=update_access,
+							args=(
+								vectordb_loader,
+								action_item.payload.op,
+								action_item.payload.userIds,
+								action_item.payload.sourceId,
+							),
+						)
+
+					case ActionType.UPDATE_ACCESS_PROVIDER_ID:
+						exec_in_proc(
+							target=update_access_provider,
+							args=(
+								vectordb_loader,
+								action_item.payload.op,
+								action_item.payload.userIds,
+								action_item.payload.providerId,
+							),
+						)
+
+					case ActionType.UPDATE_ACCESS_DECL_SOURCE_ID:
+						exec_in_proc(
+							target=decl_update_access,
+							args=(
+								vectordb_loader,
+								action_item.payload.userIds,
+								action_item.payload.sourceId,
+							),
+						)
+
+					case _:
+						LOGGER.warning(
+							f'Unknown action type {action_item.type} for action id {db_id},'
+							f' type {action_item.type}, skipping and marking as processed',
+							extra={ 'action_item': action_item },
+						)
+						continue
+
+				processed_event_ids.append(db_id)
+			except SafeDbException as e:
+				LOGGER.debug(
+					f'Safe DB error thrown while processing action id {db_id}, type {action_item.type},'
+					" it's safe to ignore and mark as processed.",
+					exc_info=e,
+					extra={ 'action_item': action_item },
+				)
+				processed_event_ids.append(db_id)
+				continue
+
+			except (LoaderException, DbException) as e:
+				LOGGER.error(
+					f'Error deleting source for action id {db_id}, type {action_item.type}: {e}',
+					exc_info=e,
+					extra={ 'action_item': action_item },
+				)
+				errored_events[db_id] = str(e)
+				continue
+
+			except Exception as e:
+				LOGGER.error(
+					f'Unexpected error processing action id {db_id}, type {action_item.type}: {e}',
+					exc_info=e,
+					extra={ 'action_item': action_item },
+				)
+				errored_events[db_id] = f'Unexpected error: {e}'
+				continue
+
+			if (i + 1) % 20 == 0:
+				LOGGER.debug(f'Processed {i + 1} updates, sleeping for a bit to allow other operations to proceed')
+				sleep(2)
+
+		LOGGER.info(f'Processed {len(processed_event_ids)} updates with {len(errored_events)} errors', extra={
+			'errored_events': errored_events,
+		})
+
+		if len(processed_event_ids) == 0:
+			LOGGER.debug('No updates processed, skipping reporting to the server')
+			continue
+
+		try:
+			nc.ocs(
+				'DELETE',
+				'/apps/context_chat/queues/actions/',
+				json={ 'actions': processed_event_ids },
+			)
+		except (
+			niquests.exceptions.ConnectionError,
+			niquests.exceptions.Timeout,
+		) as e:
+			LOGGER.info('Temporary error reporting processed updates, will retry:', exc_info=e)
+			sleep(5)
+			with suppress(Exception):
+				nc = NextcloudApp()
+				nc.ocs(
+					'DELETE',
+					'/apps/context_chat/queues/actions/',
+					json={ 'ids': processed_event_ids },
+				)
+			continue
+		except Exception as e:
+			LOGGER.exception('Error reporting processed updates:', exc_info=e)
+			sleep(5)
+			continue
 
 
 def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 97d48ce..849c2e3 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -4,12 +4,13 @@
 #
 from enum import Enum
 from io import BytesIO
-from typing import Self
+from typing import Annotated, Literal, Self
 
-from pydantic import BaseModel, field_validator
+from pydantic import BaseModel, Discriminator, field_validator
 
 from .mimetype_list import SUPPORTED_MIMETYPES
 from .utils import is_valid_provider_id, is_valid_source_id
+from .vectordb.types import UpdateAccessOp
 
 __all__ = [
 	'DEFAULT_EM_MODEL_ALIAS',
@@ -182,7 +183,7 @@ def validate_content(cls, v):
 		raise ValueError('Content must be either a non-empty string or a non-empty BytesIO')
 
 
-class FilesQueueItem(BaseModel):
+class FilesQueueItems(BaseModel):
 	files: dict[int, ReceivedFileItem]  # [db id]: FileItem
 	content_providers: dict[int, SourceItem]  # [db id]: SourceItem
 
@@ -198,3 +199,179 @@ def __init__(self, message: str, retryable: bool = False):
 class IndexingError(BaseModel):
 	error: str
 	retryable: bool = False
+
+
+# PHP equivalent for reference:
+
+# class ActionType {
+# 	// { sourceIds: array<string> }
+# 	public const DELETE_SOURCE_IDS = 'delete_source_ids';
+# 	// { providerId: string }
+# 	public const DELETE_PROVIDER_ID = 'delete_provider_id';
+# 	// { userId: string }
+# 	public const DELETE_USER_ID = 'delete_user_id';
+# 	// { op: string, userIds: array<string>, sourceId: string }
+# 	public const UPDATE_ACCESS_SOURCE_ID = 'update_access_source_id';
+# 	// { op: string, userIds: array<string>, providerId: string }
+# 	public const UPDATE_ACCESS_PROVIDER_ID = 'update_access_provider_id';
+# 	// { userIds: array<string>, sourceId: string }
+# 	public const UPDATE_ACCESS_DECL_SOURCE_ID = 'update_access_decl_source_id';
+# }
+
+
+def _validate_source_ids(source_ids: list[str]) -> list[str]:
+	if (
+		not isinstance(source_ids, list)
+		or not all(isinstance(sid, str) and sid.strip() != '' for sid in source_ids)
+		or len(source_ids) == 0
+	):
+		raise ValueError('sourceIds must be a non-empty list of non-empty strings')
+	return [sid.strip() for sid in source_ids]
+
+
+def _validate_provider_id(provider_id: str) -> str:
+	if not isinstance(provider_id, str) or not is_valid_provider_id(provider_id):
+		raise ValueError('providerId must be a valid provider ID string')
+	return provider_id
+
+
+def _validate_user_ids(user_ids: list[str]) -> list[str]:
+	if (
+		not isinstance(user_ids, list)
+		or not all(isinstance(uid, str) and uid.strip() != '' for uid in user_ids)
+		or len(user_ids) == 0
+	):
+		raise ValueError('userIds must be a non-empty list of non-empty strings')
+	return [uid.strip() for uid in user_ids]
+
+
+class ActionPayloadDeleteSourceIds(BaseModel):
+	sourceIds: list[str]
+
+	@field_validator('sourceIds', mode='after')
+	def validate_source_ids(self) -> Self:
+		self.sourceIds = _validate_source_ids(self.sourceIds)
+		return self
+
+
+class ActionPayloadDeleteProviderId(BaseModel):
+	providerId: str
+
+	@field_validator('providerId')
+	def validate_provider_id(self) -> Self:
+		self.providerId = _validate_provider_id(self.providerId)
+		return self
+
+
+class ActionPayloadDeleteUserId(BaseModel):
+	userId: str
+
+	@field_validator('userId')
+	def validate_user_id(self) -> Self:
+		self.userId = _validate_user_ids([self.userId])[0]
+		return self
+
+
+class ActionPayloadUpdateAccessSourceId(BaseModel):
+	op: UpdateAccessOp
+	userIds: list[str]
+	sourceId: str
+
+	@field_validator('userIds', mode='after')
+	def validate_user_ids(self) -> Self:
+		self.userIds = _validate_user_ids(self.userIds)
+		return self
+
+	@field_validator('sourceId')
+	def validate_source_id(self) -> Self:
+		self.sourceId = _validate_source_ids([self.sourceId])[0]
+		return self
+
+
+class ActionPayloadUpdateAccessProviderId(BaseModel):
+	op: UpdateAccessOp
+	userIds: list[str]
+	providerId: str
+
+	@field_validator('userIds', mode='after')
+	def validate_user_ids(self) -> Self:
+		self.userIds = _validate_user_ids(self.userIds)
+		return self
+
+	@field_validator('providerId')
+	def validate_provider_id(self) -> Self:
+		self.providerId = _validate_provider_id(self.providerId)
+		return self
+
+
+class ActionPayloadUpdateAccessDeclSourceId(BaseModel):
+	userIds: list[str]
+	sourceId: str
+
+	@field_validator('userIds', mode='after')
+	def validate_user_ids(self) -> Self:
+		self.userIds = _validate_user_ids(self.userIds)
+		return self
+
+	@field_validator('sourceId')
+	def validate_source_id(self) -> Self:
+		self.sourceId = _validate_source_ids([self.sourceId])[0]
+		return self
+
+
+class ActionType(str, Enum):
+	DELETE_SOURCE_IDS = 'delete_source_ids'
+	DELETE_PROVIDER_ID = 'delete_provider_id'
+	DELETE_USER_ID = 'delete_user_id'
+	UPDATE_ACCESS_SOURCE_ID = 'update_access_source_id'
+	UPDATE_ACCESS_PROVIDER_ID = 'update_access_provider_id'
+	UPDATE_ACCESS_DECL_SOURCE_ID = 'update_access_decl_source_id'
+
+
+class CommonActionsQueueItem(BaseModel):
+	id: int
+
+
+class ActionsQueueItemDeleteSourceIds(CommonActionsQueueItem):
+	type: Literal[ActionType.DELETE_SOURCE_IDS]
+	payload: ActionPayloadDeleteSourceIds
+
+
+class ActionsQueueItemDeleteProviderId(CommonActionsQueueItem):
+	type: Literal[ActionType.DELETE_PROVIDER_ID]
+	payload: ActionPayloadDeleteProviderId
+
+
+class ActionsQueueItemDeleteUserId(CommonActionsQueueItem):
+	type: Literal[ActionType.DELETE_USER_ID]
+	payload: ActionPayloadDeleteUserId
+
+
+class ActionsQueueItemUpdateAccessSourceId(CommonActionsQueueItem):
+	type: Literal[ActionType.UPDATE_ACCESS_SOURCE_ID]
+	payload: ActionPayloadUpdateAccessSourceId
+
+
+class ActionsQueueItemUpdateAccessProviderId(CommonActionsQueueItem):
+	type: Literal[ActionType.UPDATE_ACCESS_PROVIDER_ID]
+	payload: ActionPayloadUpdateAccessProviderId
+
+
+class ActionsQueueItemUpdateAccessDeclSourceId(CommonActionsQueueItem):
+	type: Literal[ActionType.UPDATE_ACCESS_DECL_SOURCE_ID]
+	payload: ActionPayloadUpdateAccessDeclSourceId
+
+
+ActionsQueueItem = Annotated[
+	ActionsQueueItemDeleteSourceIds
+	| ActionsQueueItemDeleteProviderId
+	| ActionsQueueItemDeleteUserId
+	| ActionsQueueItemUpdateAccessSourceId
+	| ActionsQueueItemUpdateAccessProviderId
+	| ActionsQueueItemUpdateAccessDeclSourceId,
+	Discriminator('type'),
+]
+
+
+class ActionsQueueItems(BaseModel):
+	actions: dict[int, ActionsQueueItem]
diff --git a/context_chat_backend/vectordb/pgvector.py b/context_chat_backend/vectordb/pgvector.py
index f5879fe..8bcc6f4 100644
--- a/context_chat_backend/vectordb/pgvector.py
+++ b/context_chat_backend/vectordb/pgvector.py
@@ -338,7 +338,7 @@ def update_access(
 				)
 
 			match op:
-				case UpdateAccessOp.allow:
+				case UpdateAccessOp.ALLOW:
 					for i in range(0, len(user_ids), PG_BATCH_SIZE):
 						batched_uids = user_ids[i:i+PG_BATCH_SIZE]
 						stmt = (
@@ -355,7 +355,7 @@ def update_access(
 						session.execute(stmt)
 					session.commit()
 
-				case UpdateAccessOp.deny:
+				case UpdateAccessOp.DENY:
 					for i in range(0, len(user_ids), PG_BATCH_SIZE):
 						batched_uids = user_ids[i:i+PG_BATCH_SIZE]
 						stmt = (
@@ -448,15 +448,17 @@ def delete_source_ids(self, source_ids: list[str], session_: orm.Session | None
 			# entry from "AccessListStore" is deleted automatically due to the foreign key constraint
       # batch the deletion to avoid hitting the query parameter limit
 			chunks_to_delete = []
+			deleted_source_ids = []
 			for i in range(0, len(source_ids), PG_BATCH_SIZE):
 				batched_ids = source_ids[i:i+PG_BATCH_SIZE]
 				stmt_doc = (
 					sa.delete(DocumentsStore)
 					.filter(DocumentsStore.source_id.in_(batched_ids))
-					.returning(DocumentsStore.chunks)
+					.returning(DocumentsStore.chunks, DocumentsStore.source_id)
 				)
 				doc_result = session.execute(stmt_doc)
 				chunks_to_delete.extend(str(c) for res in doc_result for c in res.chunks)
+				deleted_source_ids.extend(str(res.source_id) for res in doc_result)
 
 			for i in range(0, len(chunks_to_delete), PG_BATCH_SIZE):
 				batched_chunks = chunks_to_delete[i:i+PG_BATCH_SIZE]
@@ -476,6 +478,14 @@ def delete_source_ids(self, source_ids: list[str], session_: orm.Session | None
 			if session_ is None:
 				session.close()
 
+		undeleted_source_ids = set(source_ids) - set(deleted_source_ids)
+		if len(undeleted_source_ids) > 0:
+			logger.info(
+				f'Source ids {undeleted_source_ids} were not deleted from documents store.'
+				' This can be due to the source ids not existing in the documents store due to'
+				' already being deleted or not having been added yet.'
+			)
+
 	def delete_provider(self, provider_key: str):
 		with self.session_maker() as session:
 			try:
@@ -519,7 +529,16 @@ def delete_user(self, user_id: str):
 				session.rollback()
 				raise DbException('Error: deleting user from access list') from e
 
-			self._cleanup_if_orphaned(list(source_ids), session)
+			try:
+				self._cleanup_if_orphaned(list(source_ids), session)
+			except Exception as e:
+				session.rollback()
+				logger.error(
+					'Error cleaning up orphaned source ids after deleting user, manual cleanup might be required',
+					exc_info=e,
+					extra={ 'source_ids': list(source_ids) },
+				)
+				raise DbException('Error: cleaning up orphaned source ids after deleting user') from e
 
 	def count_documents_by_provider(self) -> dict[str, int]:
 		try:
diff --git a/context_chat_backend/vectordb/service.py b/context_chat_backend/vectordb/service.py
index 620a0b3..06a8e19 100644
--- a/context_chat_backend/vectordb/service.py
+++ b/context_chat_backend/vectordb/service.py
@@ -6,27 +6,42 @@
 
 from ..dyn_loader import VectorDBLoader
 from .base import BaseVectorDB
-from .types import DbException, UpdateAccessOp
+from .types import UpdateAccessOp
 
 logger = logging.getLogger('ccb.vectordb')
 
-# todo: return source ids that were successfully deleted
+
 def delete_by_source(vectordb_loader: VectorDBLoader, source_ids: list[str]):
+	'''
+	Raises
+	------
+	DbException
+	LoaderException
+	'''
 	db: BaseVectorDB = vectordb_loader.load()
 	logger.debug('deleting sources by id', extra={ 'source_ids': source_ids })
-	try:
-		db.delete_source_ids(source_ids)
-	except Exception as e:
-		raise DbException('Error: Vectordb delete_source_ids error') from e
+	db.delete_source_ids(source_ids)
 
 
 def delete_by_provider(vectordb_loader: VectorDBLoader, provider_key: str):
+	'''
+	Raises
+	------
+	DbException
+	LoaderException
+	'''
 	db: BaseVectorDB = vectordb_loader.load()
 	logger.debug(f'deleting sources by provider: {provider_key}')
 	db.delete_provider(provider_key)
 
 
 def delete_user(vectordb_loader: VectorDBLoader, user_id: str):
+	'''
+	Raises
+	------
+	DbException
+	LoaderException
+	'''
 	db: BaseVectorDB = vectordb_loader.load()
 	logger.debug(f'deleting user from db: {user_id}')
 	db.delete_user(user_id)
@@ -38,6 +53,13 @@ def update_access(
 	user_ids: list[str],
 	source_id: str,
 ):
+	'''
+	Raises
+	------
+	DbException
+	LoaderException
+	SafeDbException
+	'''
 	db: BaseVectorDB = vectordb_loader.load()
 	logger.debug('updating access', extra={ 'op': op, 'user_ids': user_ids, 'source_id': source_id })
 	db.update_access(op, user_ids, source_id)
@@ -49,6 +71,13 @@ def update_access_provider(
 	user_ids: list[str],
 	provider_id: str,
 ):
+	'''
+	Raises
+	------
+	DbException
+	LoaderException
+	SafeDbException
+	'''
 	db: BaseVectorDB = vectordb_loader.load()
 	logger.debug('updating access by provider', extra={ 'op': op, 'user_ids': user_ids, 'provider_id': provider_id })
 	db.update_access_provider(op, user_ids, provider_id)
@@ -59,11 +88,24 @@ def decl_update_access(
 	user_ids: list[str],
 	source_id: str,
 ):
+	'''
+	Raises
+	------
+	DbException
+	LoaderException
+	SafeDbException
+	'''
 	db: BaseVectorDB = vectordb_loader.load()
 	logger.debug('decl update access', extra={ 'user_ids': user_ids, 'source_id': source_id })
 	db.decl_update_access(user_ids, source_id)
 
 def count_documents_by_provider(vectordb_loader: VectorDBLoader):
+	'''
+	Raises
+	------
+	DbException
+	LoaderException
+	'''
 	db: BaseVectorDB = vectordb_loader.load()
 	logger.debug('counting documents by provider')
 	return db.count_documents_by_provider()
diff --git a/context_chat_backend/vectordb/types.py b/context_chat_backend/vectordb/types.py
index df5c6dd..3081179 100644
--- a/context_chat_backend/vectordb/types.py
+++ b/context_chat_backend/vectordb/types.py
@@ -14,5 +14,5 @@ class SafeDbException(Exception):
 
 
 class UpdateAccessOp(Enum):
-	allow = 'allow'
-	deny = 'deny'
+	ALLOW = 'allow'
+	DENY = 'deny'

From b09a93cafda6726b706f11c8e7815b4a91acfc43 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 11 Mar 2026 14:33:39 +0530
Subject: [PATCH 08/96] fix pydantic types

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/types.py | 180 ++++++++++++----------------------
 context_chat_backend/utils.py |  10 --
 2 files changed, 64 insertions(+), 126 deletions(-)

diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 849c2e3..8577c93 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -2,14 +2,14 @@
 # SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
+import re
 from enum import Enum
 from io import BytesIO
 from typing import Annotated, Literal, Self
 
-from pydantic import BaseModel, Discriminator, field_validator
+from pydantic import AfterValidator, BaseModel, Discriminator, field_validator, model_validator
 
 from .mimetype_list import SUPPORTED_MIMETYPES
-from .utils import is_valid_provider_id, is_valid_source_id
 from .vectordb.types import UpdateAccessOp
 
 __all__ = [
@@ -26,6 +26,49 @@
 FILES_PROVIDER_ID = 'files__default'
 
 
+def is_valid_source_id(source_id: str) -> bool:
+	# note the ":" in the item id part
+	return re.match(r'^[a-zA-Z0-9_-]+__[a-zA-Z0-9_-]+: [a-zA-Z0-9:-]+$', source_id) is not None
+
+
+def is_valid_provider_id(provider_id: str) -> bool:
+	return re.match(r'^[a-zA-Z0-9_-]+__[a-zA-Z0-9_-]+$', provider_id) is not None
+
+
+def _validate_source_ids(source_ids: list[str]) -> list[str]:
+	if (
+		not isinstance(source_ids, list)
+		or not all(isinstance(sid, str) and sid.strip() != '' for sid in source_ids)
+		or len(source_ids) == 0
+	):
+		raise ValueError('sourceIds must be a non-empty list of non-empty strings')
+	return [sid.strip() for sid in source_ids]
+
+
+def _validate_source_id(source_id: str) -> str:
+	return _validate_source_ids([source_id])[0]
+
+
+def _validate_provider_id(provider_id: str) -> str:
+	if not isinstance(provider_id, str) or not is_valid_provider_id(provider_id):
+		raise ValueError('providerId must be a valid provider ID string')
+	return provider_id
+
+
+def _validate_user_ids(user_ids: list[str]) -> list[str]:
+	if (
+		not isinstance(user_ids, list)
+		or not all(isinstance(uid, str) and uid.strip() != '' for uid in user_ids)
+		or len(user_ids) == 0
+	):
+		raise ValueError('userIds must be a non-empty list of non-empty strings')
+	return [uid.strip() for uid in user_ids]
+
+
+def _validate_user_id(user_id: str) -> str:
+	return _validate_user_ids([user_id])[0]
+
+
 class TEmbeddingAuthApiKey(BaseModel):
 	apikey: str
 
@@ -89,12 +132,13 @@ class AppRole(str, Enum):
 
 
 class CommonSourceItem(BaseModel):
-	userIds: list[str]
-	reference: str  # source_id of the form "appId__providerId: itemId"
+	userIds: Annotated[list[str], AfterValidator(_validate_user_ids)]
+	# source_id of the form "appId__providerId: itemId"
+	reference: Annotated[str, AfterValidator(_validate_source_id)]
 	title: str
 	modified: int | str  # todo: int/string?
 	type: str
-	provider: str
+	provider: Annotated[str, AfterValidator(_validate_provider_id)]
 	size: int
 
 	@field_validator('modified', mode='before')
@@ -116,42 +160,13 @@ def validate_strings_non_empty(cls, v):
 			raise ValueError('Must be a non-empty string')
 		return v.strip()
 
-	@field_validator('userIds', mode='after')
-	def validate_user_ids(self) -> Self:
-		if (
-			not isinstance(self.userIds, list)
-			or not all(
-				isinstance(uid, str)
-				and uid.strip() != ''
-				for uid in self.userIds
-			)
-			or len(self.userIds) == 0
-		):
-			raise ValueError('userIds must be a non-empty list of non-empty strings')
-		self.userIds = [uid.strip() for uid in self.userIds]
-		return self
-
-	@field_validator('reference', mode='after')
-	def validate_reference_format(self) -> Self:
-		# validate reference format: "appId__providerId: itemId"
-		if not is_valid_source_id(self.reference):
-			raise ValueError('Invalid reference format, must be "appId__providerId: itemId"')
-		return self
-
-	@field_validator('provider', mode='after')
-	def validate_provider_format(self) -> Self:
-		# validate provider format: "appId__providerId"
-		if not is_valid_provider_id(self.provider):
-			raise ValueError('Invalid provider format, must be "appId__providerId"')
-		return self
-
-	@field_validator('type', mode='after')
+	@model_validator(mode='after')
 	def validate_type(self) -> Self:
 		if self.reference.startswith(FILES_PROVIDER_ID) and self.type not in SUPPORTED_MIMETYPES:
 			raise ValueError(f'Unsupported file type: {self.type} for reference {self.reference}')
 		return self
 
-	@field_validator('size', mode='after')
+	@model_validator(mode='after')
 	def validate_size(self) -> Self:
 		if not isinstance(self.size, int) or self.size < 0:
 			raise ValueError(f'Invalid size value: {self.size}, must be a non-negative integer')
@@ -182,6 +197,10 @@ def validate_content(cls, v):
 			return v
 		raise ValueError('Content must be either a non-empty string or a non-empty BytesIO')
 
+	class Config:
+		# to allow BytesIO in content field
+		arbitrary_types_allowed = True
+
 
 class FilesQueueItems(BaseModel):
 	files: dict[int, ReceivedFileItem]  # [db id]: FileItem
@@ -219,104 +238,33 @@ class IndexingError(BaseModel):
 # }
 
 
-def _validate_source_ids(source_ids: list[str]) -> list[str]:
-	if (
-		not isinstance(source_ids, list)
-		or not all(isinstance(sid, str) and sid.strip() != '' for sid in source_ids)
-		or len(source_ids) == 0
-	):
-		raise ValueError('sourceIds must be a non-empty list of non-empty strings')
-	return [sid.strip() for sid in source_ids]
-
-
-def _validate_provider_id(provider_id: str) -> str:
-	if not isinstance(provider_id, str) or not is_valid_provider_id(provider_id):
-		raise ValueError('providerId must be a valid provider ID string')
-	return provider_id
-
-
-def _validate_user_ids(user_ids: list[str]) -> list[str]:
-	if (
-		not isinstance(user_ids, list)
-		or not all(isinstance(uid, str) and uid.strip() != '' for uid in user_ids)
-		or len(user_ids) == 0
-	):
-		raise ValueError('userIds must be a non-empty list of non-empty strings')
-	return [uid.strip() for uid in user_ids]
-
-
 class ActionPayloadDeleteSourceIds(BaseModel):
-	sourceIds: list[str]
-
-	@field_validator('sourceIds', mode='after')
-	def validate_source_ids(self) -> Self:
-		self.sourceIds = _validate_source_ids(self.sourceIds)
-		return self
+	sourceIds: Annotated[list[str], AfterValidator(_validate_source_ids)]
 
 
 class ActionPayloadDeleteProviderId(BaseModel):
-	providerId: str
-
-	@field_validator('providerId')
-	def validate_provider_id(self) -> Self:
-		self.providerId = _validate_provider_id(self.providerId)
-		return self
+	providerId: Annotated[str, AfterValidator(_validate_provider_id)]
 
 
 class ActionPayloadDeleteUserId(BaseModel):
-	userId: str
-
-	@field_validator('userId')
-	def validate_user_id(self) -> Self:
-		self.userId = _validate_user_ids([self.userId])[0]
-		return self
+	userId: Annotated[str, AfterValidator(_validate_user_id)]
 
 
 class ActionPayloadUpdateAccessSourceId(BaseModel):
 	op: UpdateAccessOp
-	userIds: list[str]
-	sourceId: str
-
-	@field_validator('userIds', mode='after')
-	def validate_user_ids(self) -> Self:
-		self.userIds = _validate_user_ids(self.userIds)
-		return self
-
-	@field_validator('sourceId')
-	def validate_source_id(self) -> Self:
-		self.sourceId = _validate_source_ids([self.sourceId])[0]
-		return self
+	userIds: Annotated[list[str], AfterValidator(_validate_user_ids)]
+	sourceId: Annotated[str, AfterValidator(_validate_source_id)]
 
 
 class ActionPayloadUpdateAccessProviderId(BaseModel):
 	op: UpdateAccessOp
-	userIds: list[str]
-	providerId: str
-
-	@field_validator('userIds', mode='after')
-	def validate_user_ids(self) -> Self:
-		self.userIds = _validate_user_ids(self.userIds)
-		return self
-
-	@field_validator('providerId')
-	def validate_provider_id(self) -> Self:
-		self.providerId = _validate_provider_id(self.providerId)
-		return self
+	userIds: Annotated[list[str], AfterValidator(_validate_user_ids)]
+	providerId: Annotated[str, AfterValidator(_validate_provider_id)]
 
 
 class ActionPayloadUpdateAccessDeclSourceId(BaseModel):
-	userIds: list[str]
-	sourceId: str
-
-	@field_validator('userIds', mode='after')
-	def validate_user_ids(self) -> Self:
-		self.userIds = _validate_user_ids(self.userIds)
-		return self
-
-	@field_validator('sourceId')
-	def validate_source_id(self) -> Self:
-		self.sourceId = _validate_source_ids([self.sourceId])[0]
-		return self
+	userIds: Annotated[list[str], AfterValidator(_validate_user_ids)]
+	sourceId: Annotated[str, AfterValidator(_validate_source_id)]
 
 
 class ActionType(str, Enum):
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index 224f466..c7e588b 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -5,7 +5,6 @@
 import logging
 import multiprocessing as mp
 import os
-import re
 import traceback
 from collections.abc import Callable
 from functools import partial, wraps
@@ -102,15 +101,6 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 	return result['value']
 
 
-def is_valid_source_id(source_id: str) -> bool:
-	# note the ":" in the item id part
-	return re.match(r'^[a-zA-Z0-9_-]+__[a-zA-Z0-9_-]+: [a-zA-Z0-9:-]+$', source_id) is not None
-
-
-def is_valid_provider_id(provider_id: str) -> bool:
-	return re.match(r'^[a-zA-Z0-9_-]+__[a-zA-Z0-9_-]+$', provider_id) is not None
-
-
 def timed(func: Callable):
 	'''
 	Decorator to time a function

From 11b436c8ce43778dbf6beda8a7e3978626e7aee5 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 11 Mar 2026 14:34:55 +0530
Subject: [PATCH 09/96] fix: use a dedicated event to allow app halt without
 app being disabled

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/controller.py   |  1 +
 context_chat_backend/task_fetcher.py | 28 ++++++++++++++++++++++++----
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 580416f..55206ca 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -88,6 +88,7 @@ async def lifespan(app: FastAPI):
 	nc = NextcloudApp()
 	if nc.enabled_state:
 		app_enabled.set()
+		start_bg_threads(app_config, app_enabled)
 	logger.info(f'App enable state at startup: {app_enabled.is_set()}')
 	t = Thread(target=background_thread_task, args=())
 	t.start()
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 84b974b..e93eac3 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -44,6 +44,7 @@
 
 APP_ROLE = get_app_role()
 THREADS = {}
+THREAD_STOP_EVENT = Event()
 LOGGER = logging.getLogger('ccb.task_fetcher')
 FILES_INDEXING_BATCH_SIZE = 64  # todo: config?
 # divides the batch into these many chunks
@@ -199,8 +200,8 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 
 
 	while True:
-		if not app_enabled.is_set():
-			LOGGER.info('Files indexing thread is stopping as the app is disabled')
+		if THREAD_STOP_EVENT.is_set():
+			LOGGER.info('Files indexing thread is stopping due to stop event being set')
 			return
 
 		try:
@@ -329,8 +330,8 @@ def updates_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 		return
 
 	while True:
-		if not app_enabled.is_set():
-			LOGGER.info('Files indexing thread is stopping as the app is disabled')
+		if THREAD_STOP_EVENT.is_set():
+			LOGGER.info('Updates processing thread is stopping due to stop event being set')
 			return
 
 		try:
@@ -490,6 +491,14 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 def start_bg_threads(app_config: TConfig, app_enabled: Event):
 	match APP_ROLE:
 		case AppRole.INDEXING | AppRole.NORMAL:
+			if (
+				ThreadType.FILES_INDEXING in THREADS
+				or ThreadType.UPDATES_PROCESSING in THREADS
+			):
+				LOGGER.info('Background threads already running, skipping start')
+				return
+
+			THREAD_STOP_EVENT.clear()
 			THREADS[ThreadType.FILES_INDEXING] = Thread(
 				target=files_indexing_thread,
 				args=(app_config, app_enabled),
@@ -502,7 +511,13 @@ def start_bg_threads(app_config: TConfig, app_enabled: Event):
 			)
 			THREADS[ThreadType.FILES_INDEXING].start()
 			THREADS[ThreadType.UPDATES_PROCESSING].start()
+
 		case AppRole.RP | AppRole.NORMAL:
+			if ThreadType.REQUEST_PROCESSING in THREADS:
+				LOGGER.info('Background threads already running, skipping start')
+				return
+
+			THREAD_STOP_EVENT.clear()
 			THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
 				target=request_processing_thread,
 				args=(app_config, app_enabled),
@@ -516,12 +531,17 @@ def wait_for_bg_threads():
 		case AppRole.INDEXING | AppRole.NORMAL:
 			if (ThreadType.FILES_INDEXING not in THREADS or ThreadType.UPDATES_PROCESSING not in THREADS):
 				return
+
+			THREAD_STOP_EVENT.set()
 			THREADS[ThreadType.FILES_INDEXING].join()
 			THREADS[ThreadType.UPDATES_PROCESSING].join()
 			THREADS.pop(ThreadType.FILES_INDEXING)
 			THREADS.pop(ThreadType.UPDATES_PROCESSING)
+
 		case AppRole.RP | AppRole.NORMAL:
 			if (ThreadType.REQUEST_PROCESSING not in THREADS):
 				return
+
+			THREAD_STOP_EVENT.set()
 			THREADS[ThreadType.REQUEST_PROCESSING].join()
 			THREADS.pop(ThreadType.REQUEST_PROCESSING)

From c88e15364d53764257f7fddaca76505cf27c80d9 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 11 Mar 2026 17:54:48 +0530
Subject: [PATCH 10/96] fix fetch url and pydantic types

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/task_fetcher.py | 14 +++++++-------
 context_chat_backend/types.py        | 17 +++++++++--------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index e93eac3..5784d12 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -80,7 +80,7 @@ async def __fetch_file_content(
 			# a file pointer for storing the stream in memory until it is consumed
 			fp = BytesIO()
 			await nc._session.download2fp(
-				url_path=f'/apps/context_chat/files/{file_id}',
+				url_path=f'/ocs/v2.php/apps/context_chat/files/{file_id}',
 				fp=fp,
 				dav=False,
 				params={ 'userId': user_id },
@@ -209,7 +209,7 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 			# todo: add the 'size' param to the return of this call.
 			q_items_res = nc.ocs(
 				'GET',
-				'/apps/context_chat/queues/documents',
+				'/ocs/v2.php/apps/context_chat/queues/documents',
 				params={ 'n': FILES_INDEXING_BATCH_SIZE }
 			)
 
@@ -292,7 +292,7 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 		try:
 			nc.ocs(
 				'DELETE',
-				'/apps/context_chat/queues/documents/',
+				'/ocs/v2.php/apps/context_chat/queues/documents/',
 				json={
 					'files': to_delete_file_ids,
 					'content_providers': to_delete_provider_ids,
@@ -308,7 +308,7 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 				nc = NextcloudApp()
 				nc.ocs(
 					'DELETE',
-					'/apps/context_chat/queues/documents/',
+					'/ocs/v2.php/apps/context_chat/queues/documents/',
 					json={
 						'files': to_delete_file_ids,
 						'content_providers': to_delete_provider_ids,
@@ -338,7 +338,7 @@ def updates_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 			nc = NextcloudApp()
 			q_items_res = nc.ocs(
 				'GET',
-				'/apps/context_chat/queues/actions',
+				'/ocs/v2.php/apps/context_chat/queues/actions',
 				params={ 'n': ACTIONS_BATCH_SIZE }
 			)
 
@@ -461,7 +461,7 @@ def updates_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 		try:
 			nc.ocs(
 				'DELETE',
-				'/apps/context_chat/queues/actions/',
+				'/ocs/v2.php/apps/context_chat/queues/actions/',
 				json={ 'actions': processed_event_ids },
 			)
 		except (
@@ -474,7 +474,7 @@ def updates_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 				nc = NextcloudApp()
 				nc.ocs(
 					'DELETE',
-					'/apps/context_chat/queues/actions/',
+					'/ocs/v2.php/apps/context_chat/queues/actions/',
 					json={ 'ids': processed_event_ids },
 				)
 			continue
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 8577c93..972756f 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -136,10 +136,10 @@ class CommonSourceItem(BaseModel):
 	# source_id of the form "appId__providerId: itemId"
 	reference: Annotated[str, AfterValidator(_validate_source_id)]
 	title: str
-	modified: int | str  # todo: int/string?
+	modified: int
 	type: str
 	provider: Annotated[str, AfterValidator(_validate_provider_id)]
-	size: int
+	size: float
 
 	@field_validator('modified', mode='before')
 	@classmethod
@@ -160,18 +160,19 @@ def validate_strings_non_empty(cls, v):
 			raise ValueError('Must be a non-empty string')
 		return v.strip()
 
+	@field_validator('size')
+	@classmethod
+	def validate_size(cls, v):
+		if isinstance(v, int | float) and v >= 0:
+			return float(v)
+		raise ValueError(f'Invalid size value: {v}, must be a non-negative number')
+
 	@model_validator(mode='after')
 	def validate_type(self) -> Self:
 		if self.reference.startswith(FILES_PROVIDER_ID) and self.type not in SUPPORTED_MIMETYPES:
 			raise ValueError(f'Unsupported file type: {self.type} for reference {self.reference}')
 		return self
 
-	@model_validator(mode='after')
-	def validate_size(self) -> Self:
-		if not isinstance(self.size, int) or self.size < 0:
-			raise ValueError(f'Invalid size value: {self.size}, must be a non-negative integer')
-		return self
-
 
 class ReceivedFileItem(CommonSourceItem):
 	content: None

From cd5241e199a2ae2316d4f8f3841aa27bb7c12842 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 11 Mar 2026 18:52:35 +0530
Subject: [PATCH 11/96] fix: use the correct file id

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/controller.py   |  9 ++--
 context_chat_backend/task_fetcher.py | 79 +++++++++++++++++-----------
 context_chat_backend/types.py        | 22 +++++++-
 3 files changed, 75 insertions(+), 35 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 55206ca..797ba20 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -24,7 +24,6 @@
 from contextlib import asynccontextmanager
 from functools import wraps
 from threading import Event, Thread
-from time import sleep
 from typing import Any
 
 from fastapi import FastAPI, Request
@@ -130,9 +129,11 @@ async def lifespan(app: FastAPI):
 # logger background thread
 
 def background_thread_task():
-	while(True):
-		logger.info(f'Currently indexing {len(_indexing)} documents (filename, size): ', extra={'_indexing': _indexing})
-		sleep(10)
+	# todo
+	# while(True):
+	# 	logger.info(f'Currently indexing {len(_indexing)} documents (filename, size): ', extra={'_indexing': _indexing})
+	# 	sleep(10)
+	...
 
 # exception handlers
 
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 5784d12..0442cd5 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -125,15 +125,29 @@ async def __fetch_files_content(
 	semaphore = asyncio.Semaphore(CONCURRENT_FILE_FETCHES)
 	tasks = []
 
-	for file_id, file_item in files.items():
-		if file_item.size > MAX_FILE_SIZE:
+	for db_id, file in files.items():
+		try:
+			# to detect any validation errors but it should not happen since file.reference is validated
+			file.file_id  # noqa: B018
+		except ValueError as e:
+			LOGGER.error(
+				f'Invalid file reference format for db id {db_id}, file reference {file.reference}: {e}',
+				exc_info=e,
+			)
+			source_items[db_id] = IndexingError(
+				error=f'Invalid file reference format: {file.reference}',
+				retryable=False,
+			)
+			continue
+
+		if file.size > MAX_FILE_SIZE:
 			LOGGER.info(
-				f'Skipping file id {file_id}, source id {file_item.reference} due to size'
-				f' {(file_item.size/(1024*1024)):.2f} MiB exceeding the limit {(MAX_FILE_SIZE/(1024*1024)):.2f} MiB',
+				f'Skipping db id {db_id}, file id {file.file_id}, source id {file.reference} due to size'
+				f' {(file.size/(1024*1024)):.2f} MiB exceeding the limit {(MAX_FILE_SIZE/(1024*1024)):.2f} MiB',
 			)
-			source_items[file_id] = IndexingError(
+			source_items[db_id] = IndexingError(
 				error=(
-					f'File size {(file_item.size/(1024*1024)):.2f} MiB'
+					f'File size {(file.size/(1024*1024)):.2f} MiB'
 					f' exceeds the limit {(MAX_FILE_SIZE/(1024*1024)):.2f} MiB'
 				),
 				retryable=False,
@@ -141,39 +155,44 @@ async def __fetch_files_content(
 			continue
 		# todo: perform the existing file check before fetching the content to avoid unnecessary fetches
 		# any user id from the list should have read access to the file
-		tasks.append(asyncio.ensure_future(__fetch_file_content(semaphore, file_id, file_item.userIds[0])))
+		tasks.append(asyncio.ensure_future(__fetch_file_content(semaphore, file.file_id, file.userIds[0])))
 
 	results = await asyncio.gather(*tasks, return_exceptions=True)
-	for (file_id, file_item), result in zip(files.items(), results, strict=True):
+	for (db_id, file), result in zip(files.items(), results, strict=True):
 		if isinstance(result, IndexingException):
 			LOGGER.error(
-				f'Error fetching content for file id {file_id}, reference {file_item.reference}: {result}',
+				f'Error fetching content for db id {db_id}, file id {file.file_id}, reference {file.reference}'
+				f': {result}',
 				exc_info=result,
 			)
-			source_items[file_id] = IndexingError(
+			source_items[db_id] = IndexingError(
 				error=str(result),
 				retryable=result.retryable,
 			)
 		elif isinstance(result, str) or isinstance(result, BytesIO):
-			source_items[file_id] = SourceItem(
-				**file_item.model_dump(),
-				content=result,
+			source_items[db_id] = SourceItem(
+				**{
+					**file.model_dump(),
+					'content': result,
+				}
 			)
 		elif isinstance(result, BaseException):
 			LOGGER.error(
-				f'Unexpected error fetching content for file id {file_id}, reference {file_item.reference}: {result}',
+				f'Unexpected error fetching content for db id {db_id}, file id {file.file_id},'
+				f' reference {file.reference}: {result}',
 				exc_info=result,
 			)
-			source_items[file_id] = IndexingError(
+			source_items[db_id] = IndexingError(
 				error=f'Unexpected error: {result}',
 				retryable=True,
 			)
 		else:
 			LOGGER.error(
-				f'Unknown error fetching content for file id {file_id}, reference {file_item.reference}: {result}',
+				f'Unknown error fetching content for db id {db_id}, file id {file.file_id}, reference {file.reference}'
+				f': {result}',
 				exc_info=True,
 			)
-			source_items[file_id] = IndexingError(
+			source_items[db_id] = IndexingError(
 				error='Unknown error',
 				retryable=True,
 			)
@@ -232,11 +251,11 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 			if q_items.files:
 				fetched_files = asyncio.run(__fetch_files_content(q_items.files))
 
-			for file_id, result in fetched_files.items():
+			for db_id, result in fetched_files.items():
 				if isinstance(result, SourceItem):
-					source_files[file_id] = result
+					source_files[db_id] = result
 				else:
-					source_errors[file_id] = result
+					source_errors[db_id] = result
 
 			files_result = {}
 			providers_result = {}
@@ -257,8 +276,8 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 			):
 				LOGGER.error('Some sources failed to index', extra={
 					'file_errors': {
-						file_id: error
-						for file_id, error in files_result.items()
+						db_id: error
+						for db_id, error in files_result.items()
 						if isinstance(error, IndexingError)
 					},
 					'provider_errors': {
@@ -280,12 +299,12 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 			continue
 
 		# delete the entries from the PHP side queue where indexing succeeded or the error is not retryable
-		to_delete_file_ids = [
-			file_id for file_id, result in files_result.items()
+		to_delete_files_db_ids = [
+			db_id for db_id, result in files_result.items()
 			if result is None or (isinstance(result, IndexingError) and not result.retryable)
 		]
-		to_delete_provider_ids = [
-			provider_id for provider_id, result in providers_result.items()
+		to_delete_provider_db_ids = [
+			db_id for db_id, result in providers_result.items()
 			if result is None or (isinstance(result, IndexingError) and not result.retryable)
 		]
 
@@ -294,8 +313,8 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 				'DELETE',
 				'/ocs/v2.php/apps/context_chat/queues/documents/',
 				json={
-					'files': to_delete_file_ids,
-					'content_providers': to_delete_provider_ids,
+					'files': to_delete_files_db_ids,
+					'content_providers': to_delete_provider_db_ids,
 				},
 			)
 		except (
@@ -310,8 +329,8 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 					'DELETE',
 					'/ocs/v2.php/apps/context_chat/queues/documents/',
 					json={
-						'files': to_delete_file_ids,
-						'content_providers': to_delete_provider_ids,
+						'files': to_delete_files_db_ids,
+						'content_providers': to_delete_provider_db_ids,
 					},
 				)
 			continue
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 972756f..9f23e14 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -7,7 +7,7 @@
 from io import BytesIO
 from typing import Annotated, Literal, Self
 
-from pydantic import AfterValidator, BaseModel, Discriminator, field_validator, model_validator
+from pydantic import AfterValidator, BaseModel, Discriminator, computed_field, field_validator, model_validator
 
 from .mimetype_list import SUPPORTED_MIMETYPES
 from .vectordb.types import UpdateAccessOp
@@ -69,6 +69,21 @@ def _validate_user_id(user_id: str) -> str:
 	return _validate_user_ids([user_id])[0]
 
 
+def _get_file_id_from_source_ref(source_ref: str) -> int:
+	'''
+	source reference is in the format "FILES_PROVIDER_ID: <file_id>".
+	'''
+	if not source_ref.startswith(f'{FILES_PROVIDER_ID}: '):
+		raise ValueError(f'Source reference does not start with expected prefix: {source_ref}')
+
+	try:
+		return int(source_ref[len(f'{FILES_PROVIDER_ID}: '):])
+	except ValueError as e:
+		raise ValueError(
+			f'Invalid source reference format for extracting file_id: {source_ref}'
+		) from e
+
+
 class TEmbeddingAuthApiKey(BaseModel):
 	apikey: str
 
@@ -177,6 +192,11 @@ def validate_type(self) -> Self:
 class ReceivedFileItem(CommonSourceItem):
 	content: None
 
+	@computed_field
+	@property
+	def file_id(self) -> int:
+		return _get_file_id_from_source_ref(self.reference)
+
 
 class SourceItem(CommonSourceItem):
 	'''

From 4958d1d980b0d0741762ffc9c3eac3ff91e5c2b0 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 11 Mar 2026 19:24:51 +0530
Subject: [PATCH 12/96] fix: wip: improve embeddings exception handling

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/network_em.py        | 13 +++++++++----
 context_chat_backend/task_fetcher.py      |  1 +
 context_chat_backend/vectordb/pgvector.py | 17 ++++++-----------
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/context_chat_backend/network_em.py b/context_chat_backend/network_em.py
index 18bb11f..d39ea56 100644
--- a/context_chat_backend/network_em.py
+++ b/context_chat_backend/network_em.py
@@ -79,6 +79,7 @@ def _get_embedding(self, input_: str | list[str], try_: int = 3) -> list[float]
 				raise FatalEmbeddingException(response.text)
 			if response.status_code // 100 != 2:
 				raise EmbeddingException(response.text)
+		# todo: rework exception handling and their downstream interpretation
 		except FatalEmbeddingException as e:
 			logger.error('Fatal error while getting embeddings: %s', str(e), exc_info=e)
 			raise e
@@ -108,10 +109,14 @@ def _get_embedding(self, input_: str | list[str], try_: int = 3) -> list[float]
 			logger.error('Unexpected error while getting embeddings', exc_info=e)
 			raise EmbeddingException('Error: unexpected error while getting embeddings') from e
 
-		# converts TypedDict to a pydantic model
-		resp = CreateEmbeddingResponse(**response.json())
-		if isinstance(input_, str):
-			return resp['data'][0]['embedding']
+		try:
+			# converts TypedDict to a pydantic model
+			resp = CreateEmbeddingResponse(**response.json())
+			if isinstance(input_, str):
+				return resp['data'][0]['embedding']
+		except Exception as e:
+			logger.error('Error parsing embedding response', exc_info=e)
+			raise EmbeddingException('Error: failed to parse embedding response') from e
 
 		# only one embedding in d['embedding'] since truncate is True
 		return [d['embedding'] for d in resp['data']]  # pyright: ignore[reportReturnType]
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 0442cd5..51f98e7 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -261,6 +261,7 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 			providers_result = {}
 			chunk_size = FILES_INDEXING_BATCH_SIZE // PARALLEL_FILE_PARSING
 
+			# todo: do it in asyncio, it's not truly parallel yet
 			# chunk file parsing for better file operation parallelism
 			for i in range(0, len(source_files), chunk_size):
 				chunk = dict(list(source_files.items())[i:i+chunk_size])
diff --git a/context_chat_backend/vectordb/pgvector.py b/context_chat_backend/vectordb/pgvector.py
index 8bcc6f4..bfca0bb 100644
--- a/context_chat_backend/vectordb/pgvector.py
+++ b/context_chat_backend/vectordb/pgvector.py
@@ -17,7 +17,7 @@
 from langchain_postgres.vectorstores import Base, PGVector
 
 from ..chain.types import InDocument, ScopeType
-from ..types import EmbeddingException, IndexingError, RetryableEmbeddingException, SourceItem
+from ..types import EmbeddingException, FatalEmbeddingException, IndexingError, RetryableEmbeddingException, SourceItem
 from ..utils import timed
 from .base import BaseVectorDB
 from .types import DbException, SafeDbException, UpdateAccessOp
@@ -181,7 +181,11 @@ def add_indocuments(self, indocuments: dict[int, InDocument]) -> dict[int, Index
 						retryable=True,
 					)
 					continue
-				except RetryableEmbeddingException as e:
+				except FatalEmbeddingException as e:
+					raise EmbeddingException(
+						f'Fatal error while embedding documents for source {indoc.source_id}: {e}'
+					) from e
+				except (RetryableEmbeddingException, EmbeddingException) as e:
 					# temporary error, continue with the next document
 					logger.exception('Error adding documents to vectordb, should be retried later.', exc_info=e, extra={
 						'source_id': indoc.source_id,
@@ -191,15 +195,6 @@ def add_indocuments(self, indocuments: dict[int, InDocument]) -> dict[int, Index
 						retryable=True,
 					)
 					continue
-				except EmbeddingException as e:
-					logger.exception('Error adding documents to vectordb', exc_info=e, extra={
-						'source_id': indoc.source_id,
-					})
-					results[php_db_id] = IndexingError(
-						error=str(e),
-						retryable=False,
-					)
-					continue
 				except Exception as e:
 					logger.exception('Error adding documents to vectordb', exc_info=e, extra={
 						'source_id': indoc.source_id,

From a04912120965d8ff9a285eac559794b716a595ce Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 11 Mar 2026 19:44:06 +0530
Subject: [PATCH 13/96] fix(ci): update to the latest changes

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 104 ++++++++++++++++++-------
 1 file changed, 76 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index fb06baf..9563bcd 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -199,26 +199,87 @@ jobs:
           ls -la context_chat_backend/persistent_storage/*
           sleep 30 # Wait for the em server to get ready
 
-      - name: Scan files, baseline
-        run: |
-          ./occ files:scan admin
-          ./occ context_chat:scan admin -m text/plain
-
-      - name: Check python memory usage
+      - name: Initial memory usage check
         run: |
           ps -p $(cat pid.txt) -o pid,cmd,%mem,rss --sort=-%mem
           ps -p $(cat pid.txt) -o %mem --no-headers > initial_mem.txt
 
-      - name: Scan files
-        run: |
-          ./occ files:scan admin
-          ./occ context_chat:scan admin -m text/markdown &
-          ./occ context_chat:scan admin -m text/x-rst
-
-      - name: Check python memory usage
+      - name: Periodically check context_chat stats for 15 minutes to allow the backend to index the files
         run: |
-          ps -p $(cat pid.txt) -o pid,cmd,%mem,rss --sort=-%mem
-          ps -p $(cat pid.txt) -o %mem --no-headers > after_scan_mem.txt
+          success=0
+          for i in {1..90}; do
+            echo "Checking stats, attempt $i..."
+
+            mkfifo error_pipe
+            stats=$(timeout 5 ./occ context_chat:stats 2>error_pipe)
+            echo "Stats output:"
+            echo "$stats"
+            echo "---"
+
+            # Check for critical errors in output
+            if echo "$stats" | grep -q "Error during request"; then
+              echo "Backend connection error detected, retrying..."
+              rm -f error_pipe
+              sleep 10
+              continue
+            fi
+
+            # Extract Total eligible files
+            total_files=$(echo "$stats" | grep -oP 'Total eligible files:\s*\K\d+' || echo "")
+
+            # Extract Indexed documents count (files__default)
+            indexed_count=$(echo "$stats" | grep -oP "'files__default'\s*=>\s*\K\d+" || echo "")
+
+            # Validate parsed values
+            if [ -z "$total_files" ] || [ -z "$indexed_count" ]; then
+              echo "Error: Could not parse stats output properly"
+              if echo "$stats" | grep -q "Indexed documents:"; then
+                echo "  Indexed documents section found but could not extract count"
+              fi
+              rm -f error_pipe
+              sleep 10
+              continue
+            fi
+
+            echo "Total eligible files: $total_files"
+            echo "Indexed documents (files__default): $indexed_count"
+
+            # Calculate absolute difference
+            diff=$((total_files - indexed_count))
+            if [ $diff -lt 0 ]; then
+              diff=$((-diff))
+            fi
+
+            # Calculate 2% threshold using bc for floating point support
+            threshold=$(echo "scale=4; $total_files * 0.02" | bc)
+
+            # Check if difference is within tolerance
+            if (( $(echo "$diff <= $threshold" | bc -l) )); then
+              echo "Indexing within 2% tolerance (diff=$diff, threshold=$threshold)"
+              rm -f error_pipe
+              success=1
+              break
+            else
+              pct=$(echo "scale=2; ($diff / $total_files) * 100" | bc)
+              echo "Outside 2% tolerance: diff=$diff (${pct}%), threshold=$threshold"
+            fi
+
+            # Check if backend is still alive
+            ccb_alive=$(ps -p $(cat pid.txt) -o cmd= | grep -c "main.py" || echo "0")
+            if [ "$ccb_alive" -eq 0 ]; then
+              echo "Error: Context Chat Backend process is not running. Exiting."
+              rm -f error_pipe
+              exit 1
+            fi
+
+            rm -f error_pipe
+            sleep 10
+          done
+
+          if [ $success -ne 1 ]; then
+            echo "Max attempts reached"
+            exit 1
+          fi
 
       - name: Run the prompts
         run: |
@@ -252,19 +313,6 @@ jobs:
             echo "Memory usage during scan is stable. No memory leak detected."
           fi
 
-      - name: Compare memory usage and detect leak
-        run: |
-          initial_mem=$(cat after_scan_mem.txt | tr -d ' ')
-          final_mem=$(cat after_prompt_mem.txt | tr -d ' ')
-          echo "Initial Memory Usage: $initial_mem%"
-          echo "Memory Usage after prompt: $final_mem%"
-
-          if (( $(echo "$final_mem > $initial_mem" | bc -l) )); then
-            echo "Memory usage has increased during prompt. Possible memory leak detected!"
-          else
-            echo "Memory usage during prompt is stable. No memory leak detected."
-          fi
-
       - name: Show server logs
         if: always()
         run: |

From 795380c7c62ce5f60f80aa16ffa1e7568133f03e Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 12 Mar 2026 16:10:58 +0530
Subject: [PATCH 14/96] fix(ci): use file to store stderr

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 9563bcd..de0f465 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -210,16 +210,21 @@ jobs:
           for i in {1..90}; do
             echo "Checking stats, attempt $i..."
 
-            mkfifo error_pipe
-            stats=$(timeout 5 ./occ context_chat:stats 2>error_pipe)
+            stats_err=$(mktemp)
+            stats=$(timeout 5 ./occ context_chat:stats 2>"$stats_err")
+            stats_exit=$?
             echo "Stats output:"
             echo "$stats"
+            if [ -s "$stats_err" ]; then
+              echo "Stderr:"
+              cat "$stats_err"
+            fi
             echo "---"
+            rm -f "$stats_err"
 
             # Check for critical errors in output
-            if echo "$stats" | grep -q "Error during request"; then
-              echo "Backend connection error detected, retrying..."
-              rm -f error_pipe
+            if [ $stats_exit -ne 0 ] || echo "$stats" | grep -q "Error during request"; then
+              echo "Backend connection error detected (exit=$stats_exit), retrying..."
               sleep 10
               continue
             fi
@@ -236,7 +241,6 @@ jobs:
               if echo "$stats" | grep -q "Indexed documents:"; then
                 echo "  Indexed documents section found but could not extract count"
               fi
-              rm -f error_pipe
               sleep 10
               continue
             fi
@@ -256,7 +260,6 @@ jobs:
             # Check if difference is within tolerance
             if (( $(echo "$diff <= $threshold" | bc -l) )); then
               echo "Indexing within 2% tolerance (diff=$diff, threshold=$threshold)"
-              rm -f error_pipe
               success=1
               break
             else
@@ -268,11 +271,9 @@ jobs:
             ccb_alive=$(ps -p $(cat pid.txt) -o cmd= | grep -c "main.py" || echo "0")
             if [ "$ccb_alive" -eq 0 ]; then
               echo "Error: Context Chat Backend process is not running. Exiting."
-              rm -f error_pipe
               exit 1
             fi
 
-            rm -f error_pipe
             sleep 10
           done
 

From 7bc0ed7c3c535f930f03cc38c4dd884b5370696c Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 12 Mar 2026 17:17:38 +0530
Subject: [PATCH 15/96] fix(ci): add cron jobs

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index de0f465..0d8e422 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -204,9 +204,18 @@ jobs:
           ps -p $(cat pid.txt) -o pid,cmd,%mem,rss --sort=-%mem
           ps -p $(cat pid.txt) -o %mem --no-headers > initial_mem.txt
 
+      - name: Run cron jobs
+        run: |
+          # every 10 seconds indefinitely
+          while true; do
+            php cron.php
+            sleep 10
+          done &
+
       - name: Periodically check context_chat stats for 15 minutes to allow the backend to index the files
         run: |
           success=0
+          echo "::group::Checking stats periodically for 15 minutes to allow the backend to index the files"
           for i in {1..90}; do
             echo "Checking stats, attempt $i..."
 
@@ -277,6 +286,10 @@ jobs:
             sleep 10
           done
 
+          echo "::endgroup::"
+
+          ./occ context_chat:stats
+
           if [ $success -ne 1 ]; then
             echo "Max attempts reached"
             exit 1

From d94c687e057a7049e6b0f1f32b580f326692acd3 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 12 Mar 2026 17:35:47 +0530
Subject: [PATCH 16/96] fix(ci): do a occ files scan before cron jobs

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 0d8e422..58f9f50 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -169,6 +169,10 @@ jobs:
           cd ..
           rm -rf documentation
 
+      - name: Run files scan
+        run: |
+          ./occ files:scan --all
+
       - name: Setup python 3.11
         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5
         with:

From dadc8fa7d193f40ddacffecf6266d8a2b37a6817 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Mon, 16 Mar 2026 20:09:30 +0530
Subject: [PATCH 17/96] feat: record indexing errors in content decode function

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .../chain/ingest/doc_loader.py                | 44 +++++++++----------
 context_chat_backend/chain/ingest/injest.py   | 20 ++++++---
 2 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/context_chat_backend/chain/ingest/doc_loader.py b/context_chat_backend/chain/ingest/doc_loader.py
index d26f74b..832c833 100644
--- a/context_chat_backend/chain/ingest/doc_loader.py
+++ b/context_chat_backend/chain/ingest/doc_loader.py
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
 
-import logging
 import re
 import tempfile
 from collections.abc import Callable
@@ -18,9 +17,8 @@
 from pypdf.errors import FileNotDecryptedError as PdfFileNotDecryptedError
 from striprtf import striprtf
 
-from ...types import SourceItem
+from ...types import IndexingException, SourceItem
 
-logger = logging.getLogger('ccb.doc_loader')
 
 def _temp_file_wrapper(file: BytesIO, loader: Callable, sep: str = '\n') -> str:
 	raw_bytes = file.read()
@@ -75,10 +73,10 @@ def _load_xlsx(file: BytesIO) -> str:
 	return read_excel(file, na_filter=False).to_string(header=False, na_rep='')
 
 
-def _load_email(file: BytesIO, ext: str = 'eml') -> str | None:
+def _load_email(file: BytesIO, ext: str = 'eml') -> str:
 	# NOTE: msg format is not tested
 	if ext not in ['eml', 'msg']:
-		return None
+		raise IndexingException(f'Unsupported email format: {ext}')
 
 	# TODO: implement attachment partitioner using unstructured.partition.partition_{email,msg}
 	# since langchain does not pass through the attachment_partitioner kwarg
@@ -116,34 +114,36 @@ def attachment_partitioner(
 }
 
 
-def decode_source(source: SourceItem) -> str | None:
+def decode_source(source: SourceItem) -> str:
+	'''
+	Raises
+	------
+	IndexingException
+	'''
+
 	io_obj: BytesIO | None = None
 	try:
 		# .pot files are powerpoint templates but also plain text files,
 		# so we skip them to prevent decoding errors
 		if source.title.endswith('.pot'):
-			return None
-
-		mimetype = source.type
-		if mimetype is None:
-			return None
+			raise IndexingException('PowerPoint template files (.pot) are not supported')
 
 		if isinstance(source.content, str):
 			io_obj = BytesIO(source.content.encode('utf-8', 'ignore'))
 		else:
 			io_obj = source.content
 
-		if _loader_map.get(mimetype):
-			result = _loader_map[mimetype](io_obj)
-			return result.encode('utf-8', 'ignore').decode('utf-8', 'ignore')
-
-		return io_obj.read().decode('utf-8', 'ignore')
-	except PdfFileNotDecryptedError:
-		logger.warning(f'PDF file ({source.reference}) is encrypted and cannot be read')
-		return None
-	except Exception:
-		logger.exception(f'Error decoding source file ({source.reference})', stack_info=True)
-		return None
+		if _loader_map.get(source.type):
+			result = _loader_map[source.type](io_obj)
+			return result.encode('utf-8', 'ignore').decode('utf-8', 'ignore').strip()
+
+		return io_obj.read().decode('utf-8', 'ignore').strip()
+	except IndexingException:
+		raise
+	except PdfFileNotDecryptedError as e:
+		raise IndexingException('PDF file is encrypted and cannot be read') from e
+	except Exception as e:
+		raise IndexingException(f'Error decoding source file: {e}') from e
 	finally:
 		if io_obj is not None:
 			io_obj.close()
diff --git a/context_chat_backend/chain/ingest/injest.py b/context_chat_backend/chain/ingest/injest.py
index 7369f45..d9ea543 100644
--- a/context_chat_backend/chain/ingest/injest.py
+++ b/context_chat_backend/chain/ingest/injest.py
@@ -8,7 +8,7 @@
 from langchain.schema import Document
 
 from ...dyn_loader import VectorDBLoader
-from ...types import IndexingError, SourceItem, TConfig
+from ...types import IndexingError, IndexingException, SourceItem, TConfig
 from ...vectordb.base import BaseVectorDB
 from ...vectordb.types import DbException, SafeDbException, UpdateAccessOp
 from ..types import InDocument
@@ -59,9 +59,17 @@ def _sources_to_indocuments(
 
 		# todo: maybe fetch the content of the files here
 		# transform the source to have text data
-		content = decode_source(source)
+		try:
+			content = decode_source(source)
+		except IndexingException as e:
+			logger.error(f'Error decoding source ({source.reference}): {e}', exc_info=e)
+			errored_docs[db_id] = IndexingError(
+				error=str(e),
+				retryable=False,
+			)
+			continue
 
-		if content is None or (content := content.strip()) == '':
+		if content == '':
 			logger.debug('decoded empty source', extra={ 'source_id': source.reference })
 			errored_docs[db_id] = IndexingError(
 				error='Decoded content is empty',
@@ -74,12 +82,12 @@ def _sources_to_indocuments(
 		# NOTE: do not use this with all docs when programming files are added
 		content = re.sub(r'(\s){5,}', r'\g<1>', content)
 		# filter out null bytes
-		content = content.replace('\0', '')
+		content = content.replace('\0', '').strip()
 
-		if content is None or content == '':
+		if content == '':
 			logger.debug('decoded empty source after cleanup', extra={ 'source_id': source.reference })
 			errored_docs[db_id] = IndexingError(
-				error='Decoded content is empty',
+				error='Cleaned up content is empty',
 				retryable=False,
 			)
 			continue

From f9d86dcf1ddac21e61edcc3698b79e0a69475a24 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 17 Mar 2026 20:27:10 +0530
Subject: [PATCH 18/96] chore: move file fetch inside injest

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/chain/ingest/injest.py | 197 ++++++++++++++++++--
 context_chat_backend/task_fetcher.py        | 173 +----------------
 context_chat_backend/types.py               |   7 +-
 context_chat_backend/vectordb/base.py       |  11 +-
 context_chat_backend/vectordb/pgvector.py   |  14 +-
 5 files changed, 208 insertions(+), 194 deletions(-)

diff --git a/context_chat_backend/chain/ingest/injest.py b/context_chat_backend/chain/ingest/injest.py
index d9ea543..18a37b4 100644
--- a/context_chat_backend/chain/ingest/injest.py
+++ b/context_chat_backend/chain/ingest/injest.py
@@ -2,13 +2,18 @@
 # SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
+import asyncio
 import logging
 import re
+from collections.abc import Mapping
+from io import BytesIO
 
+import niquests
 from langchain.schema import Document
+from nc_py_api import AsyncNextcloudApp
 
 from ...dyn_loader import VectorDBLoader
-from ...types import IndexingError, IndexingException, SourceItem, TConfig
+from ...types import IndexingError, IndexingException, ReceivedFileItem, SourceItem, TConfig
 from ...vectordb.base import BaseVectorDB
 from ...vectordb.types import DbException, SafeDbException, UpdateAccessOp
 from ..types import InDocument
@@ -17,15 +22,165 @@
 
 logger = logging.getLogger('ccb.injest')
 
+# max concurrent fetches to avoid overloading the NC server or hitting rate limits
+CONCURRENT_FILE_FETCHES = 10  # todo: config?
+MAX_FILE_SIZE = 100 * 1024 * 1024  # 100 MB, all loaded in RAM at once, todo: config?
+
+
+async def __fetch_file_content(
+	semaphore: asyncio.Semaphore,
+	file_id: int,
+	user_id: str,
+	_rlimit = 3,
+) -> BytesIO:
+	'''
+	Raises
+	------
+	IndexingException
+	'''
+
+	async with semaphore:
+		nc = AsyncNextcloudApp()
+		try:
+			# a file pointer for storing the stream in memory until it is consumed
+			fp = BytesIO()
+			await nc._session.download2fp(
+				url_path=f'/ocs/v2.php/apps/context_chat/files/{file_id}',
+				fp=fp,
+				dav=False,
+				params={ 'userId': user_id },
+			)
+			return fp
+		except niquests.exceptions.RequestException as e:
+			if e.response is None:
+				raise
+
+			if e.response.status_code == niquests.codes.too_many_requests:  # pyright: ignore[reportAttributeAccessIssue]
+				# todo: implement rate limits in php CC?
+				wait_for = int(e.response.headers.get('Retry-After', '30'))
+				if _rlimit <= 0:
+					raise IndexingException(
+						f'Rate limited when fetching content for file id {file_id}, user id {user_id},'
+						' max retries exceeded',
+						retryable=True,
+					) from e
+				logger.warning(
+					f'Rate limited when fetching content for file id {file_id}, user id {user_id},'
+					f' waiting {wait_for} before retrying',
+					exc_info=e,
+				)
+				await asyncio.sleep(wait_for)
+				return await __fetch_file_content(semaphore, file_id, user_id, _rlimit - 1)
+
+			raise
+		except IndexingException:
+			raise
+		except Exception as e:
+			logger.error(f'Error fetching content for file id {file_id}, user id {user_id}: {e}', exc_info=e)
+			raise IndexingException(f'Error fetching content for file id {file_id}, user id {user_id}: {e}') from e
+
+
+async def __fetch_files_content(
+	sources: Mapping[int, SourceItem | ReceivedFileItem]
+) -> tuple[Mapping[int, SourceItem], Mapping[int, IndexingError]]:
+	source_items = {}
+	error_items = {}
+	semaphore = asyncio.Semaphore(CONCURRENT_FILE_FETCHES)
+	tasks = []
+
+	for db_id, file in sources.items():
+		if isinstance(file, SourceItem):
+			continue
+
+		try:
+			# to detect any validation errors but it should not happen since file.reference is validated
+			file.file_id  # noqa: B018
+		except ValueError as e:
+			logger.error(
+				f'Invalid file reference format for db id {db_id}, file reference {file.reference}: {e}',
+				exc_info=e,
+			)
+			error_items[db_id] = IndexingError(
+				error=f'Invalid file reference format: {file.reference}',
+				retryable=False,
+			)
+			continue
+
+		if file.size > MAX_FILE_SIZE:
+			logger.info(
+				f'Skipping db id {db_id}, file id {file.file_id}, source id {file.reference} due to size'
+				f' {(file.size/(1024*1024)):.2f} MiB exceeding the limit {(MAX_FILE_SIZE/(1024*1024)):.2f} MiB',
+			)
+			error_items[db_id] = IndexingError(
+				error=(
+					f'File size {(file.size/(1024*1024)):.2f} MiB'
+					f' exceeds the limit {(MAX_FILE_SIZE/(1024*1024)):.2f} MiB'
+				),
+				retryable=False,
+			)
+			continue
+		# any user id from the list should have read access to the file
+		tasks.append(asyncio.ensure_future(__fetch_file_content(semaphore, file.file_id, file.userIds[0])))
+
+	results = await asyncio.gather(*tasks, return_exceptions=True)
+	for (db_id, file), result in zip(sources.items(), results, strict=True):
+		if isinstance(file, SourceItem):
+			continue
+
+		if isinstance(result, IndexingException):
+			logger.error(
+				f'Error fetching content for db id {db_id}, file id {file.file_id}, reference {file.reference}'
+				f': {result}',
+				exc_info=result,
+			)
+			error_items[db_id] = IndexingError(
+				error=str(result),
+				retryable=result.retryable,
+			)
+		elif isinstance(result, str) or isinstance(result, BytesIO):
+			source_items[db_id] = SourceItem(
+				**{
+					**file.model_dump(),
+					'content': result,
+				}
+			)
+		elif isinstance(result, BaseException):
+			logger.error(
+				f'Unexpected error fetching content for db id {db_id}, file id {file.file_id},'
+				f' reference {file.reference}: {result}',
+				exc_info=result,
+			)
+			error_items[db_id] = IndexingError(
+				error=f'Unexpected error: {result}',
+				retryable=True,
+			)
+		else:
+			logger.error(
+				f'Unknown error fetching content for db id {db_id}, file id {file.file_id}, reference {file.reference}'
+				f': {result}',
+				exc_info=True,
+			)
+			error_items[db_id] = IndexingError(
+				error='Unknown error',
+				retryable=True,
+			)
+
+	# add the content providers from the orginal "sources" to the result unprocessed
+	for db_id, source in sources.items():
+		if isinstance(source, SourceItem):
+			source_items[db_id] = source
+
+	return source_items, error_items
+
 
 def _filter_sources(
 	vectordb: BaseVectorDB,
-	sources: dict[int, SourceItem]
-) -> tuple[dict[int, SourceItem], dict[int, SourceItem]]:
+	sources: Mapping[int, SourceItem | ReceivedFileItem]
+) -> tuple[Mapping[int, SourceItem | ReceivedFileItem], Mapping[int, SourceItem | ReceivedFileItem]]:
 	'''
 	Returns
 	-------
-	tuple[list[str], list[UploadFile]]
+	tuple[Mapping[int, SourceItem | ReceivedFileItem], Mapping[int, SourceItem | ReceivedFileItem]]:
 		First value is a list of sources that already exist in the vectordb.
 		Second value is a list of sources that are new and should be embedded.
 	'''
@@ -49,15 +204,14 @@ def _filter_sources(
 
 def _sources_to_indocuments(
 	config: TConfig,
-	sources: dict[int, SourceItem]
-) -> tuple[dict[int, InDocument], dict[int, IndexingError]]:
+	sources: Mapping[int, SourceItem]
+) -> tuple[Mapping[int, InDocument], Mapping[int, IndexingError]]:
 	indocuments = {}
 	errored_docs = {}
 
 	for db_id, source in sources.items():
 		logger.debug('processing source', extra={ 'source_id': source.reference })
 
-		# todo: maybe fetch the content of the files here
 		# transform the source to have text data
 		try:
 			content = decode_source(source)
@@ -121,8 +275,8 @@ def _sources_to_indocuments(
 
 def _increase_access_for_existing_sources(
 	vectordb: BaseVectorDB,
-	existing_sources: dict[int, SourceItem]
-) -> dict[int, IndexingError | None]:
+	existing_sources: Mapping[int, SourceItem | ReceivedFileItem]
+) -> Mapping[int, IndexingError | None]:
 	'''
 	update userIds for existing sources
 	allow the userIds as additional users, not as the only users
@@ -162,8 +316,8 @@ def _increase_access_for_existing_sources(
 def _process_sources(
 	vectordb: BaseVectorDB,
 	config: TConfig,
-	sources: dict[int, SourceItem]
-) -> dict[int, IndexingError | None]:
+	sources: Mapping[int, SourceItem | ReceivedFileItem]
+) -> Mapping[int, IndexingError | None]:
 	'''
 	Processes the sources and adds them to the vectordb.
 	Returns the list of source ids that were successfully added and those that need to be retried.
@@ -178,18 +332,21 @@ def _process_sources(
 
 	source_proc_results = _increase_access_for_existing_sources(vectordb, existing_sources)
 
-	if len(to_embed_sources) == 0:
+	populated_to_embed_sources, errored_sources = asyncio.run(__fetch_files_content(to_embed_sources))
+	source_proc_results.update(errored_sources)  # pyright: ignore[reportAttributeAccessIssue]
+
+	if len(populated_to_embed_sources) == 0:
 		# no new sources to embed
 		logger.debug('Filtered all sources, nothing to embed')
 		return source_proc_results
 
 	logger.debug('Filtered sources:', extra={
-		'source_ids': [source.reference for source in to_embed_sources.values()]
+		'source_ids': [source.reference for source in populated_to_embed_sources.values()]
 	})
 	# invalid/empty sources are filtered out here and not counted in loaded/retryable
-	indocuments, errored_docs = _sources_to_indocuments(config, to_embed_sources)
+	indocuments, errored_docs = _sources_to_indocuments(config, populated_to_embed_sources)
 
-	source_proc_results.update(errored_docs)
+	source_proc_results.update(errored_docs)  # pyright: ignore[reportAttributeAccessIssue]
 	logger.debug('Converted sources to documents')
 
 	if len(indocuments) == 0:
@@ -197,8 +354,12 @@ def _process_sources(
 		logger.debug('All documents were found empty after being processed')
 		return source_proc_results
 
+	logger.debug('Adding documents to vectordb', extra={
+		'source_ids': [indoc.source_id for indoc in indocuments.values()]
+	})
+
 	doc_add_results = vectordb.add_indocuments(indocuments)
-	source_proc_results.update(doc_add_results)
+	source_proc_results.update(doc_add_results)  # pyright: ignore[reportAttributeAccessIssue]
 	logger.debug('Added documents to vectordb')
 
 	return source_proc_results
@@ -215,8 +376,8 @@ def _decode_latin_1(s: str) -> str:
 def embed_sources(
 	vectordb_loader: VectorDBLoader,
 	config: TConfig,
-	sources: dict[int, SourceItem]
-) -> dict[int, IndexingError | None]:
+	sources: Mapping[int, SourceItem | ReceivedFileItem]
+) -> Mapping[int, IndexingError | None]:
 	logger.debug('Embedding sources:', extra={
 		'source_ids': [
 			f'{source.reference} ({_decode_latin_1(source.title)})'
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 51f98e7..28aff6a 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -3,17 +3,16 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
 
-import asyncio
 import logging
 import os
+from collections.abc import Mapping
 from contextlib import suppress
 from enum import Enum
-from io import BytesIO
 from threading import Event, Thread
 from time import sleep
 
 import niquests
-from nc_py_api import AsyncNextcloudApp, NextcloudApp
+from nc_py_api import NextcloudApp
 from pydantic import ValidationError
 
 from .chain.ingest.injest import embed_sources
@@ -25,7 +24,6 @@
 	EmbeddingException,
 	FilesQueueItems,
 	IndexingError,
-	IndexingException,
 	LoaderException,
 	ReceivedFileItem,
 	SourceItem,
@@ -46,12 +44,10 @@
 THREADS = {}
 THREAD_STOP_EVENT = Event()
 LOGGER = logging.getLogger('ccb.task_fetcher')
-FILES_INDEXING_BATCH_SIZE = 64  # todo: config?
+FILES_INDEXING_BATCH_SIZE = 16  # theoretical max RAM usage: 16 * 100 MiB, todo: config?
+MIN_FILES_PER_CPU = 4
 # divides the batch into these many chunks
 PARALLEL_FILE_PARSING = max(1, (os.cpu_count() or 2) - 1)  # todo: config?
-# max concurrent fetches to avoid overloading the NC server or hitting rate limits
-CONCURRENT_FILE_FETCHES = 10  # todo: config?
-MAX_FILE_SIZE = 100 * 1024 * 1024  # 100 MB, todo: config?
 ACTIONS_BATCH_SIZE = 512  # todo: config?
 POLLING_COOLDOWN = 30
 
@@ -62,143 +58,6 @@ class ThreadType(Enum):
 	REQUEST_PROCESSING = 'request_processing'
 
 
-async def __fetch_file_content(
-	semaphore: asyncio.Semaphore,
-	file_id: int,
-	user_id: str,
-	_rlimit = 3,
-) -> BytesIO:
-	'''
-	Raises
-	------
-	IndexingException
-	'''
-
-	async with semaphore:
-		nc = AsyncNextcloudApp()
-		try:
-			# a file pointer for storing the stream in memory until it is consumed
-			fp = BytesIO()
-			await nc._session.download2fp(
-				url_path=f'/ocs/v2.php/apps/context_chat/files/{file_id}',
-				fp=fp,
-				dav=False,
-				params={ 'userId': user_id },
-			)
-			return fp
-		except niquests.exceptions.RequestException as e:
-			# todo: raise IndexingException with retryable=True for rate limit errors,
-			# todo: and handle it in the caller to not delete the source from the queue and retry later through
-			# todo: the normal lock expiry mechanism
-			if e.response is None:
-				raise
-
-			if e.response.status_code == niquests.codes.too_many_requests:  # pyright: ignore[reportAttributeAccessIssue]
-				# todo: implement rate limits in php CC?
-				wait_for = int(e.response.headers.get('Retry-After', '30'))
-				if _rlimit <= 0:
-					raise IndexingException(
-						f'Rate limited when fetching content for file id {file_id}, user id {user_id},'
-						' max retries exceeded',
-						retryable=True,
-					) from e
-				LOGGER.warning(
-					f'Rate limited when fetching content for file id {file_id}, user id {user_id},'
-					f' waiting {wait_for} before retrying',
-					exc_info=e,
-				)
-				await asyncio.sleep(wait_for)
-				return await __fetch_file_content(semaphore, file_id, user_id, _rlimit - 1)
-
-			raise
-		except IndexingException:
-			raise
-		except Exception as e:
-			LOGGER.error(f'Error fetching content for file id {file_id}, user id {user_id}: {e}', exc_info=e)
-			raise IndexingException(f'Error fetching content for file id {file_id}, user id {user_id}: {e}') from e
-
-
-async def __fetch_files_content(
-	files: dict[int, ReceivedFileItem]
-) -> dict[int, SourceItem | IndexingError]:
-	source_items = {}
-	semaphore = asyncio.Semaphore(CONCURRENT_FILE_FETCHES)
-	tasks = []
-
-	for db_id, file in files.items():
-		try:
-			# to detect any validation errors but it should not happen since file.reference is validated
-			file.file_id  # noqa: B018
-		except ValueError as e:
-			LOGGER.error(
-				f'Invalid file reference format for db id {db_id}, file reference {file.reference}: {e}',
-				exc_info=e,
-			)
-			source_items[db_id] = IndexingError(
-				error=f'Invalid file reference format: {file.reference}',
-				retryable=False,
-			)
-			continue
-
-		if file.size > MAX_FILE_SIZE:
-			LOGGER.info(
-				f'Skipping db id {db_id}, file id {file.file_id}, source id {file.reference} due to size'
-				f' {(file.size/(1024*1024)):.2f} MiB exceeding the limit {(MAX_FILE_SIZE/(1024*1024)):.2f} MiB',
-			)
-			source_items[db_id] = IndexingError(
-				error=(
-					f'File size {(file.size/(1024*1024)):.2f} MiB'
-					f' exceeds the limit {(MAX_FILE_SIZE/(1024*1024)):.2f} MiB'
-				),
-				retryable=False,
-			)
-			continue
-		# todo: perform the existing file check before fetching the content to avoid unnecessary fetches
-		# any user id from the list should have read access to the file
-		tasks.append(asyncio.ensure_future(__fetch_file_content(semaphore, file.file_id, file.userIds[0])))
-
-	results = await asyncio.gather(*tasks, return_exceptions=True)
-	for (db_id, file), result in zip(files.items(), results, strict=True):
-		if isinstance(result, IndexingException):
-			LOGGER.error(
-				f'Error fetching content for db id {db_id}, file id {file.file_id}, reference {file.reference}'
-				f': {result}',
-				exc_info=result,
-			)
-			source_items[db_id] = IndexingError(
-				error=str(result),
-				retryable=result.retryable,
-			)
-		elif isinstance(result, str) or isinstance(result, BytesIO):
-			source_items[db_id] = SourceItem(
-				**{
-					**file.model_dump(),
-					'content': result,
-				}
-			)
-		elif isinstance(result, BaseException):
-			LOGGER.error(
-				f'Unexpected error fetching content for db id {db_id}, file id {file.file_id},'
-				f' reference {file.reference}: {result}',
-				exc_info=result,
-			)
-			source_items[db_id] = IndexingError(
-				error=f'Unexpected error: {result}',
-				retryable=True,
-			)
-		else:
-			LOGGER.error(
-				f'Unknown error fetching content for db id {db_id}, file id {file.file_id}, reference {file.reference}'
-				f': {result}',
-				exc_info=True,
-			)
-			source_items[db_id] = IndexingError(
-				error='Unknown error',
-				retryable=True,
-			)
-	return source_items
-
-
 def files_indexing_thread(app_config: TConfig, app_enabled: Event) -> None:
 	try:
 		vectordb_loader = VectorDBLoader(app_config)
@@ -206,7 +65,7 @@ def files_indexing_thread(app_config: TConfig, app_enabled: Event) -> None:
 		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
 		return
 
-	def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingError | None]:
+	def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) -> Mapping[int, IndexingError | None]:
 		try:
 			return exec_in_proc(
 				target=embed_sources,
@@ -225,7 +84,6 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 
 		try:
 			nc = NextcloudApp()
-			# todo: add the 'size' param to the return of this call.
 			q_items_res = nc.ocs(
 				'GET',
 				'/ocs/v2.php/apps/context_chat/queues/documents',
@@ -242,29 +100,14 @@ def _load_sources(source_items: dict[int, SourceItem]) -> dict[int, IndexingErro
 				sleep(POLLING_COOLDOWN)
 				continue
 
-			# populate files content and convert to source items
-			fetched_files = {}
-			source_files = {}
-			# unified error structure for files and content providers
-			source_errors = {}
-
-			if q_items.files:
-				fetched_files = asyncio.run(__fetch_files_content(q_items.files))
-
-			for db_id, result in fetched_files.items():
-				if isinstance(result, SourceItem):
-					source_files[db_id] = result
-				else:
-					source_errors[db_id] = result
-
 			files_result = {}
 			providers_result = {}
-			chunk_size = FILES_INDEXING_BATCH_SIZE // PARALLEL_FILE_PARSING
+			chunk_size = max(MIN_FILES_PER_CPU, FILES_INDEXING_BATCH_SIZE // PARALLEL_FILE_PARSING)
 
 			# todo: do it in asyncio, it's not truly parallel yet
 			# chunk file parsing for better file operation parallelism
-			for i in range(0, len(source_files), chunk_size):
-				chunk = dict(list(source_files.items())[i:i+chunk_size])
+			for i in range(0, len(q_items.files), chunk_size):
+				chunk = dict(list(q_items.files.items())[i:i+chunk_size])
 				files_result.update(_load_sources(chunk))
 
 			for i in range(0, len(q_items.content_providers), chunk_size):
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 9f23e14..59d2568 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
 import re
+from collections.abc import Mapping
 from enum import Enum
 from io import BytesIO
 from typing import Annotated, Literal, Self
@@ -224,8 +225,8 @@ class Config:
 
 
 class FilesQueueItems(BaseModel):
-	files: dict[int, ReceivedFileItem]  # [db id]: FileItem
-	content_providers: dict[int, SourceItem]  # [db id]: SourceItem
+	files: Mapping[int, ReceivedFileItem]  # [db id]: FileItem
+	content_providers: Mapping[int, SourceItem]  # [db id]: SourceItem
 
 
 class IndexingException(Exception):
@@ -343,4 +344,4 @@ class ActionsQueueItemUpdateAccessDeclSourceId(CommonActionsQueueItem):
 
 
 class ActionsQueueItems(BaseModel):
-	actions: dict[int, ActionsQueueItem]
+	actions: Mapping[int, ActionsQueueItem]
diff --git a/context_chat_backend/vectordb/base.py b/context_chat_backend/vectordb/base.py
index ebd5407..2b4aa35 100644
--- a/context_chat_backend/vectordb/base.py
+++ b/context_chat_backend/vectordb/base.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
 from abc import ABC, abstractmethod
+from collections.abc import Mapping
 from typing import Any
 
 from langchain.schema import Document
@@ -10,7 +11,7 @@
 from langchain.schema.vectorstore import VectorStore
 
 from ..chain.types import InDocument, ScopeType
-from ..types import IndexingError, SourceItem
+from ..types import IndexingError, ReceivedFileItem, SourceItem
 from ..utils import timed
 from .types import UpdateAccessOp
 
@@ -62,7 +63,7 @@ def get_instance(self) -> VectorStore:
 		'''
 
 	@abstractmethod
-	def add_indocuments(self, indocuments: dict[int, InDocument]) -> dict[int, IndexingError | None]:
+	def add_indocuments(self, indocuments: Mapping[int, InDocument]) -> Mapping[int, IndexingError | None]:
 		'''
 		Adds the given indocuments to the vectordb and updates the docs + access tables.
 
@@ -79,7 +80,7 @@ def add_indocuments(self, indocuments: dict[int, InDocument]) -> dict[int, Index
 
 	@timed
 	@abstractmethod
-	def check_sources(self, sources: dict[int, SourceItem]) -> tuple[list[str], list[str]]:
+	def check_sources(self, sources: Mapping[int, SourceItem | ReceivedFileItem]) -> tuple[list[str], list[str]]:
 		'''
 		Checks the sources in the vectordb if they are already embedded
 			and are up to date.
@@ -88,8 +89,8 @@ def check_sources(self, sources: dict[int, SourceItem]) -> tuple[list[str], list
 
 		Args
 		----
-		sources: list[UploadFile]
-			List of source ids to check.
+		sources: Mapping[int, SourceItem | ReceivedFileItem]
+			Dict of sources to check.
 
 		Returns
 		-------
diff --git a/context_chat_backend/vectordb/pgvector.py b/context_chat_backend/vectordb/pgvector.py
index bfca0bb..86f636b 100644
--- a/context_chat_backend/vectordb/pgvector.py
+++ b/context_chat_backend/vectordb/pgvector.py
@@ -4,6 +4,7 @@
 #
 import logging
 import os
+from collections.abc import Mapping
 from datetime import datetime
 
 import psycopg
@@ -17,7 +18,14 @@
 from langchain_postgres.vectorstores import Base, PGVector
 
 from ..chain.types import InDocument, ScopeType
-from ..types import EmbeddingException, FatalEmbeddingException, IndexingError, RetryableEmbeddingException, SourceItem
+from ..types import (
+	EmbeddingException,
+	FatalEmbeddingException,
+	IndexingError,
+	ReceivedFileItem,
+	RetryableEmbeddingException,
+	SourceItem,
+)
 from ..utils import timed
 from .base import BaseVectorDB
 from .types import DbException, SafeDbException, UpdateAccessOp
@@ -129,7 +137,7 @@ def get_users(self) -> list[str]:
 			except Exception as e:
 				raise DbException('Error: getting a list of all users from access list') from e
 
-	def add_indocuments(self, indocuments: dict[int, InDocument]) -> dict[int, IndexingError | None]:
+	def add_indocuments(self, indocuments: Mapping[int, InDocument]) -> Mapping[int, IndexingError | None]:
 		"""
 		Raises
 			EmbeddingException: if the embedding request definitively fails
@@ -208,7 +216,7 @@ def add_indocuments(self, indocuments: dict[int, InDocument]) -> dict[int, Index
 		return results
 
 	@timed
-	def check_sources(self, sources: dict[int, SourceItem]) -> tuple[list[str], list[str]]:
+	def check_sources(self, sources: Mapping[int, SourceItem | ReceivedFileItem]) -> tuple[list[str], list[str]]:
 		'''
 		returns a tuple of (existing_source_ids, to_embed_source_ids)
 		'''

From 1ade19186593193a5005d2aadc97a83b25f601b8 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 18 Mar 2026 16:49:09 +0530
Subject: [PATCH 19/96] fix: truly parallel file parsing and indexing

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/task_fetcher.py | 48 ++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 28aff6a..f07f501 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -4,8 +4,10 @@
 #
 
 import logging
+import math
 import os
 from collections.abc import Mapping
+from concurrent.futures import ThreadPoolExecutor
 from contextlib import suppress
 from enum import Enum
 from threading import Event, Thread
@@ -47,7 +49,7 @@
 FILES_INDEXING_BATCH_SIZE = 16  # theoretical max RAM usage: 16 * 100 MiB, todo: config?
 MIN_FILES_PER_CPU = 4
 # divides the batch into these many chunks
-PARALLEL_FILE_PARSING = max(1, (os.cpu_count() or 2) - 1)  # todo: config?
+PARALLEL_FILE_PARSING_COUNT = max(1, (os.cpu_count() or 2) - 1)  # todo: config?
 ACTIONS_BATCH_SIZE = 512  # todo: config?
 POLLING_COOLDOWN = 30
 
@@ -71,10 +73,14 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 				target=embed_sources,
 				args=(vectordb_loader, app_config, source_items),
 			)
-		except (DbException, EmbeddingException):
-			raise
 		except Exception as e:
-			raise DbException('Error: failed to load sources') from e
+			err_name = {DbException: "DB", EmbeddingException: "Embedding"}.get(type(e), "Unknown")
+			source_ids = (s.reference for s in source_items.values())
+			err = IndexingError(
+				error=f'{err_name} Error occurred, the sources {source_ids} will be retried: {e}',
+				retryable=True,
+			)
+			return dict.fromkeys(source_items, err)
 
 
 	while True:
@@ -102,17 +108,33 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 
 			files_result = {}
 			providers_result = {}
-			chunk_size = max(MIN_FILES_PER_CPU, FILES_INDEXING_BATCH_SIZE // PARALLEL_FILE_PARSING)
 
-			# todo: do it in asyncio, it's not truly parallel yet
 			# chunk file parsing for better file operation parallelism
-			for i in range(0, len(q_items.files), chunk_size):
-				chunk = dict(list(q_items.files.items())[i:i+chunk_size])
-				files_result.update(_load_sources(chunk))
-
-			for i in range(0, len(q_items.content_providers), chunk_size):
-				chunk = dict(list(q_items.content_providers.items())[i:i+chunk_size])
-				providers_result.update(_load_sources(chunk))
+			file_chunk_size = max(MIN_FILES_PER_CPU, math.ceil(len(q_items.files) / PARALLEL_FILE_PARSING_COUNT))
+			file_chunks = [
+				dict(list(q_items.files.items())[i:i+file_chunk_size])
+				for i in range(0, len(q_items.files), file_chunk_size)
+			]
+			provider_chunk_size = max(
+				MIN_FILES_PER_CPU,
+				math.ceil(len(q_items.content_providers) / PARALLEL_FILE_PARSING_COUNT),
+			)
+			provider_chunks = [
+				dict(list(q_items.content_providers.items())[i:i+provider_chunk_size])
+				for i in range(0, len(q_items.content_providers), provider_chunk_size)
+			]
+
+			with ThreadPoolExecutor(
+				max_workers=PARALLEL_FILE_PARSING_COUNT,
+				thread_name_prefix='IndexingPool',
+			) as executor:
+				file_futures = [executor.submit(_load_sources, chunk) for chunk in file_chunks]
+				provider_futures = [executor.submit(_load_sources, chunk) for chunk in provider_chunks]
+
+				for future in file_futures:
+					files_result.update(future.result())
+				for future in provider_futures:
+					providers_result.update(future.result())
 
 			if (
 				any(isinstance(res, IndexingError) for res in files_result.values())

From 12fd1ca00fc6d3fab6e91b8bb4dbc6c11488ca74 Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Tue, 24 Mar 2026 10:36:04 +0100
Subject: [PATCH 20/96] initial pass at request processing

---
 context_chat_backend/controller.py   |   4 +-
 context_chat_backend/task_fetcher.py | 362 +++++++++++++++++++++++++--
 2 files changed, 350 insertions(+), 16 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 797ba20..3ebdc8a 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -40,7 +40,7 @@
 from .models.types import LlmException
 from nc_py_api.ex_app import AppAPIAuthMiddleware
 from .utils import JSONResponse, exec_in_proc, value_of
-from .task_fetcher import start_bg_threads, wait_for_bg_threads
+from .task_fetcher import start_bg_threads, trigger_handler, wait_for_bg_threads
 from .vectordb.service import count_documents_by_provider
 
 # setup
@@ -83,7 +83,7 @@ def enabled_handler(enabled: bool, _: NextcloudApp | AsyncNextcloudApp) -> str:
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-	set_handlers(app, enabled_handler, models_to_fetch=models_to_fetch)
+	set_handlers(app, enabled_handler, models_to_fetch=models_to_fetch, trigger_handler=trigger_handler)
 	nc = NextcloudApp()
 	if nc.enabled_state:
 		app_enabled.set()
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index f07f501..a502802 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -12,26 +12,25 @@
 from enum import Enum
 from threading import Event, Thread
 from time import sleep
+from typing import Any
 
 import niquests
-from nc_py_api import NextcloudApp
+from langchain.llms.base import LLM
+from langchain.schema import Document
+from nc_py_api import NextcloudApp, NextcloudException
+from niquests import JSONDecodeError, RequestException
 from pydantic import ValidationError
 
+from .chain.context import get_context_chunks, get_context_docs
 from .chain.ingest.injest import embed_sources
+from .chain.query_proc import get_pruned_query
+from .chain.types import ContextException, LLMOutput, ScopeType
+from .controller import llm_loader
 from .dyn_loader import VectorDBLoader
-from .types import (
-	ActionsQueueItems,
-	ActionType,
-	AppRole,
-	EmbeddingException,
-	FilesQueueItems,
-	IndexingError,
-	LoaderException,
-	ReceivedFileItem,
-	SourceItem,
-	TConfig,
-)
+from .types import ActionType, ActionsQueueItems, AppRole, EmbeddingException, FilesQueueItems, IndexingError, \
+	LoaderException, ReceivedFileItem, SourceItem, TConfig
 from .utils import exec_in_proc, get_app_role
+from .vectordb.base import BaseVectorDB
 from .vectordb.service import (
 	decl_update_access,
 	delete_by_provider,
@@ -52,6 +51,10 @@
 PARALLEL_FILE_PARSING_COUNT = max(1, (os.cpu_count() or 2) - 1)  # todo: config?
 ACTIONS_BATCH_SIZE = 512  # todo: config?
 POLLING_COOLDOWN = 30
+TRIGGER = Event()
+CHECK_INTERVAL = 5
+CHECK_INTERVAL_WITH_TRIGGER = 5 * 60
+CHECK_INTERVAL_ON_ERROR = 15
 
 
 class ThreadType(Enum):
@@ -370,7 +373,78 @@ def updates_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 
 
 def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
-	...
+	logger.info('Starting task fetcher loop')
+
+	try:
+		vectordb_loader = VectorDBLoader(app_config)
+	except LoaderException as e:
+		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
+		return
+
+	nc = NextcloudApp()
+	llm: LLM = llm_loader.load()
+
+	while True:
+		if THREAD_STOP_EVENT.is_set():
+			LOGGER.info('Updates processing thread is stopping due to stop event being set')
+			return
+
+		try:
+			# Fetch pending task
+			try:
+				response = nc.providers.task_processing.next_task(list(provider_ids), list(task_type_ids))
+				if not response:
+					wait_for_tasks()
+					continue
+			except (NextcloudException, RequestException, JSONDecodeError) as e:
+				LOGGER.error(f"Network error fetching the next task {e}", exc_info=e)
+				wait_for_tasks(CHECK_INTERVAL_ON_ERROR)
+				continue
+
+			# Process task
+			task = response["task"]
+			provider = response["provider"]
+
+			try:
+				logger.debug(f'Processing task {task["id"]}')
+				result = process_task(task, vectordb_loader, llm, app_config)
+
+				# Return result to Nextcloud
+				success = return_result_to_nextcloud(task_id, result)
+
+				if success:
+					LOGGER.info(f'Task {task["id"]} completed successfully')
+				else:
+					LOGGER.error(f'Failed to return result for task {task["id"]}')
+
+			except ContextException as e:
+				LOGGER.warning(f'Context error for task {task["id"]}: {e}')
+			# TODO: Return error to Nextcloud
+			except ValueError as e:
+				LOGGER.warning(f'Validation error for task {task["id"]}: {e}')
+			# TODO: Return error to Nextcloud
+			except Exception as e:
+				LOGGER.exception(f'Unexpected error processing task {task["id"]}', exc_info=e)
+			# TODO: Return error to Nextcloud
+
+		except Exception as e:
+			logger.exception('Error in task fetcher loop', exc_info=e)
+	# TODO: Add appropriate error handling and backoff
+
+def trigger_handler(providerId: str):
+	global TRIGGER
+	print('TRIGGER called')
+	TRIGGER.set()
+
+def wait_for_tasks(interval = None):
+	global TRIGGER
+	global CHECK_INTERVAL
+	global CHECK_INTERVAL_WITH_TRIGGER
+	actual_interval = CHECK_INTERVAL if interval is None else interval
+	if TRIGGER.wait(timeout=actual_interval):
+		CHECK_INTERVAL = CHECK_INTERVAL_WITH_TRIGGER
+	TRIGGER.clear()
+
 
 
 def start_bg_threads(app_config: TConfig, app_enabled: Event):
@@ -430,3 +504,263 @@ def wait_for_bg_threads():
 			THREAD_STOP_EVENT.set()
 			THREADS[ThreadType.REQUEST_PROCESSING].join()
 			THREADS.pop(ThreadType.REQUEST_PROCESSING)
+
+
+# Default LLM template for context-based queries
+_LLM_TEMPLATE = '''Answer based only on this context and do not add any imaginative details. Make sure to use the same language as the question in your answer.
+{context}
+
+{question}
+'''
+
+def query_vector_database(
+	user_id: str,
+	query: str,
+	vectordb: BaseVectorDB,
+	ctx_limit: int,
+	scope_type: ScopeType | None = None,
+	scope_list: list[str] | None = None,
+) -> list[Document]:
+	"""
+	Query the vector database to retrieve relevant documents.
+
+	Args:
+		user_id: User ID for scoping the search
+		query: The search query text
+		vectordb: Vector database instance
+		ctx_limit: Maximum number of documents to return
+		scope_type: Optional scope type (PROVIDER or SOURCE)
+		scope_list: Optional list of scope identifiers
+
+	Returns:
+		List of relevant Document objects
+
+	Raises:
+		ContextException: If scope type is provided without scope list
+	"""
+	context_docs = get_context_docs(user_id, query, vectordb, ctx_limit, scope_type, scope_list)
+	logger.debug('Retrieved context documents', extra={
+		'user_id': user_id,
+		'num_docs': len(context_docs),
+		'ctx_limit': ctx_limit,
+	})
+	return context_docs
+
+
+def prepare_context_chunks(context_docs: list[Document]) -> list[str]:
+	"""
+	Extract and format text chunks from documents for LLM context.
+
+	Args:
+		context_docs: List of Document objects from vector DB
+
+	Returns:
+		List of formatted text chunks including titles and content
+	"""
+	return get_context_chunks(context_docs)
+
+
+def generate_llm_response(
+	llm: LLM,
+	app_config: TConfig,
+	user_id: str,
+	query: str,
+	template: str,
+	context_chunks: list[str],
+	end_separator: str = '',
+) -> str:
+	"""
+	Generate LLM response using the pruned query and context.
+
+	Args:
+		llm: Language model instance
+		app_config: Application configuration
+		user_id: User ID for the request
+		query: The original query text
+		template: Template for formatting the prompt
+		context_chunks: Context chunks to include in the prompt
+		end_separator: Optional separator to stop generation
+
+	Returns:
+		Generated LLM output text
+
+	Raises:
+		ValueError: If context length is too small to fit the query
+	"""
+	pruned_query_text = get_pruned_query(llm, app_config, query, template, context_chunks)
+
+	stop = [end_separator] if end_separator else None
+	output = llm.invoke(
+		pruned_query_text,
+		stop=stop,
+		userid=user_id,
+	).strip()
+
+	logger.debug('Generated LLM response', extra={
+		'user_id': user_id,
+		'output_length': len(output),
+	})
+	return output
+
+
+def extract_unique_sources(context_docs: list[Document]) -> list[str]:
+	"""
+	Extract unique source IDs from context documents.
+
+	Args:
+		context_docs: List of Document objects
+
+	Returns:
+		List of unique source IDs
+	"""
+	unique_sources: list[str] = list({
+		source for d in context_docs if (source := d.metadata.get('source'))
+	})
+	return unique_sources
+
+def execute_context_query(
+	user_id: str,
+	vectordb_loader: VectorDBLoader,
+	llm: LLM,
+	app_config: TConfig,
+	query: str,
+	ctx_limit: int = 20,
+	scope_type: ScopeType | None = None,
+	scope_list: list[str] | None = None,
+	template: str | None = None,
+	end_separator: str = '',
+) -> LLMOutput:
+	"""
+	Execute a RAG query with context retrieval from vector database.
+
+	This is the main function for processing queries that require context
+	from the vector database. It orchestrates the entire RAG pipeline:
+	1. Query vector database for relevant documents
+	2. Extract and format context chunks
+	3. Generate LLM response with context
+	4. Return output with source references
+
+	Args:
+		user_id: User ID for the request
+		vectordb_loader: Vector database loader instance
+		llm: Language model instance
+		app_config: Application configuration
+		query: The query text
+		ctx_limit: Maximum number of context documents (default: 20)
+		scope_type: Optional scope type for filtering
+		scope_list: Optional list of scope identifiers
+		template: Optional custom prompt template
+		end_separator: Optional separator to stop generation
+
+	Returns:
+		LLMOutput with generated text and source references
+
+	Raises:
+		ContextException: If no documents are retrieved
+		ValueError: If context length is too small to fit the query
+	"""
+	logger.info('Executing context query', extra={
+		'user_id': user_id,
+		'query_length': len(query),
+		'ctx_limit': ctx_limit,
+	})
+
+	# Step 1: Load vector database and retrieve relevant documents
+	db = vectordb_loader.load()
+	context_docs = query_vector_database(user_id, query, db, ctx_limit, scope_type, scope_list)
+
+	if len(context_docs) == 0:
+		raise ContextException('No documents retrieved, please index a few documents first')
+
+	# Step 2: Prepare context chunks for LLM
+	context_chunks = prepare_context_chunks(context_docs)
+	logger.debug('Prepared context chunks', extra={
+		'num_docs': len(context_docs),
+		'num_chunks': len(context_chunks),
+	})
+
+	# Step 3: Generate LLM response
+	output = generate_llm_response(
+		llm,
+		app_config,
+		user_id,
+		query,
+		template or _LLM_TEMPLATE,
+		context_chunks,
+		end_separator,
+	)
+
+	# Step 4: Extract unique sources for citation
+	unique_sources = extract_unique_sources(context_docs)
+
+	logger.info('Context query completed', extra={
+		'user_id': user_id,
+		'num_sources': len(unique_sources),
+	})
+
+	return LLMOutput(output=output, sources=unique_sources)
+
+# ============================================================================
+# Task Queue Processing
+# ============================================================================
+
+
+def return_result_to_nextcloud(task_id: str, result: LLMOutput) -> bool:
+	"""
+	Return query result back to Nextcloud.
+
+	STUB: This function should be implemented to send results back
+	to Nextcloud's task queue or API endpoint.
+
+	Args:
+		task_id: Unique task identifier
+		result: The LLMOutput result to return
+
+	Returns:
+		True if successful, False otherwise
+	"""
+	logger.debug('Returning result to Nextcloud (STUB)', extra={
+		'task_id': task_id,
+		'output_length': len(result['output']),
+		'num_sources': len(result['sources']),
+	})
+	# TODO: Implement actual Nextcloud result submission
+	return True
+
+
+def process_task(
+	task: dict[str, Any],
+	vectordb_loader: VectorDBLoader,
+	llm: LLM,
+	app_config: TConfig,
+) -> LLMOutput:
+	"""
+	Process a single query task.
+
+	Args:
+		task: Task dictionary from fetch_query_tasks_from_nextcloud
+		vectordb_loader: Vector database loader instance
+		llm: Language model instance
+		app_config: Application configuration
+
+	Returns:
+		LLMOutput with generated text and sources
+
+	Raises:
+		Various exceptions from query execution
+	"""
+	user_id = task['user_id']
+	query = task['query']
+
+	return execute_context_query(
+		user_id=user_id,
+		vectordb_loader=vectordb_loader,
+		llm=llm,
+		app_config=app_config,
+		query=query,
+		ctx_limit=task.get('ctx_limit', 20),
+		scope_type=task.get('scope_type'),
+		scope_list=task.get('scope_list'),
+		template=task.get('template'), # TODO: Somehow get the real template, tasks don't have it
+		end_separator=task.get('end_separator', ''), # TODO: same here
+	)
\ No newline at end of file

From 8aa2471080c10ea7b0a97a9d2dac4023e005464c Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 25 Mar 2026 10:42:40 +0100
Subject: [PATCH 21/96] implement request processing

---
 context_chat_backend/chain/one_shot.py |   1 +
 context_chat_backend/chain/types.py    |  12 +
 context_chat_backend/controller.py     |  19 +-
 context_chat_backend/task_fetcher.py   | 292 +++++++++++++++----------
 4 files changed, 201 insertions(+), 123 deletions(-)

diff --git a/context_chat_backend/chain/one_shot.py b/context_chat_backend/chain/one_shot.py
index 1c0521b..d0f5bbe 100644
--- a/context_chat_backend/chain/one_shot.py
+++ b/context_chat_backend/chain/one_shot.py
@@ -20,6 +20,7 @@
 
 logger = logging.getLogger('ccb.chain')
 
+# todo: remove this maybe
 def process_query(
 	user_id: str,
 	llm: LLM,
diff --git a/context_chat_backend/chain/types.py b/context_chat_backend/chain/types.py
index b006ad1..c527756 100644
--- a/context_chat_backend/chain/types.py
+++ b/context_chat_backend/chain/types.py
@@ -42,3 +42,15 @@ class LLMOutput(TypedDict):
 class SearchResult(TypedDict):
 	source_id: str
 	title: str
+
+class EnrichedSource(BaseModel):
+	id: str
+	label: str
+	icon: str
+	url: str
+
+class EnrichedSourceList(BaseModel):
+	sources: list[EnrichedSource]
+
+class ScopeList(BaseModel):
+	source_ids: list[str]
\ No newline at end of file
diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 3ebdc8a..1e0d277 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
+from nc_py_api.ex_app.providers.task_processing import TaskProcessingProvider
 
 # isort: off
 from .chain.types import ContextException, LLMOutput, ScopeType, SearchResult
@@ -65,9 +66,23 @@
 } if __download_models_from_hf else {}
 app_enabled = Event()
 
-def enabled_handler(enabled: bool, _: NextcloudApp | AsyncNextcloudApp) -> str:
+def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 	try:
 		if enabled:
+			provider = TaskProcessingProvider(
+				id="context_chat-context_chat_search",
+				name="Context Chat",
+				task_type="context_chat:context_chat_search",
+				expected_runtime=30,
+			)
+			nc.providers.task_processing.register(provider)
+			provider = TaskProcessingProvider(
+				id="context_chat-context_chat",
+				name="Context Chat",
+				task_type="context_chat:context_chat",
+				expected_runtime=30,
+			)
+			nc.providers.task_processing.register(provider)
 			app_enabled.set()
 			start_bg_threads(app_config, app_enabled)
 		else:
@@ -383,7 +398,7 @@ def download_logs() -> FileResponse:
 # 				'title': source.headers.get('title'),
 # 				'headers': source.headers,
 # 			})
-# 			return JSONResponse(f'Invaild/missing headers for: {source.filename}', 400)
+# 			return JSONResponse(f'Invaild/missing headers for:provider_ids {source.filename}', 400)
 
 # 	# wait for 10 minutes before failing the request
 # 	semres = doc_parse_semaphore.acquire(block=True, timeout=10*60)
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index a502802..7951f06 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: 2026 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
-
+import json
 import logging
 import math
 import os
@@ -21,11 +21,13 @@
 from niquests import JSONDecodeError, RequestException
 from pydantic import ValidationError
 
-from .chain.context import get_context_chunks, get_context_docs
+from .chain.context import do_doc_search, get_context_chunks, get_context_docs
 from .chain.ingest.injest import embed_sources
+from .chain.one_shot import process_context_query
 from .chain.query_proc import get_pruned_query
-from .chain.types import ContextException, LLMOutput, ScopeType
-from .controller import llm_loader
+from .chain.types import ContextException, EnrichedSource, EnrichedSourceList, LLMOutput, ScopeList, ScopeType, \
+	SearchResult
+from .controller import Query, execute_query, llm_loader
 from .dyn_loader import VectorDBLoader
 from .types import ActionType, ActionsQueueItems, AppRole, EmbeddingException, FilesQueueItems, IndexingError, \
 	LoaderException, ReceivedFileItem, SourceItem, TConfig
@@ -55,6 +57,7 @@
 CHECK_INTERVAL = 5
 CHECK_INTERVAL_WITH_TRIGGER = 5 * 60
 CHECK_INTERVAL_ON_ERROR = 15
+CONTEXT_LIMIT=20
 
 
 class ThreadType(Enum):
@@ -372,8 +375,25 @@ def updates_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 			continue
 
 
+def resolve_scope_list(source_ids: list[str], userId: str) -> list[str]:
+	"""
+
+	Parameters
+	----------
+	source_ids
+
+	Returns
+	-------
+	source_ids with only files, no folders (or source_ids in case of non-file provider)
+	"""
+	nc = NextcloudApp()
+	data = nc.ocs('POST', f'/ocs/v2.php/apps/context_chat/resolve_scope_list', json={'source_ids': source_ids, 'userId': userId})
+	sources = ScopeList.model_validate(data).source_ids
+	return sources
+
+
 def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
-	logger.info('Starting task fetcher loop')
+	LOGGER.info('Starting task fetcher loop')
 
 	try:
 		vectordb_loader = VectorDBLoader(app_config)
@@ -392,7 +412,7 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 		try:
 			# Fetch pending task
 			try:
-				response = nc.providers.task_processing.next_task(list(provider_ids), list(task_type_ids))
+				response = nc.providers.task_processing.next_task(['context_chat-context_chat', 'context_chat-context_chat_search'], ['context_chat:context_chat', 'context_chat:context_chat_search'])
 				if not response:
 					wait_for_tasks()
 					continue
@@ -403,14 +423,26 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 
 			# Process task
 			task = response["task"]
-			provider = response["provider"]
+			userId = task['userId']
 
 			try:
-				logger.debug(f'Processing task {task["id"]}')
-				result = process_task(task, vectordb_loader, llm, app_config)
-
-				# Return result to Nextcloud
-				success = return_result_to_nextcloud(task_id, result)
+				LOGGER.debug(f'Processing task {task["id"]}')
+
+				if task['input'].get('scopeType') == 'source':
+					# Resolve scope list to only files, no folders
+					task['input']['scopeList'] = resolve_scope_list(task['input'].get('scopeList'), userId)
+
+				if task['type'] == 'context_chat:context_chat':
+					result: LLMOutput = process_normal_task(task, vectordb_loader, llm, app_config)
+					# Return result to Nextcloud
+					success = return_normal_result_to_nextcloud(task['id'], userId, result)
+				elif task['type'] == 'context_chat:context_chat_search':
+					result: list[SearchResult] = process_search_task(task, vectordb_loader)
+					# Return result to Nextcloud
+					success = return_search_result_to_nextcloud(task['id'], userId, result)
+				else:
+					LOGGER.error(f'Unknown task type {task["type"]}')
+					success = return_error_to_nextcloud(task['id'], Exception(f'Unknown task type {task["type"]}'))
 
 				if success:
 					LOGGER.info(f'Task {task["id"]} completed successfully')
@@ -419,17 +451,17 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 
 			except ContextException as e:
 				LOGGER.warning(f'Context error for task {task["id"]}: {e}')
-			# TODO: Return error to Nextcloud
+				return_error_to_nextcloud(task['id'], e)
 			except ValueError as e:
 				LOGGER.warning(f'Validation error for task {task["id"]}: {e}')
-			# TODO: Return error to Nextcloud
+				return_error_to_nextcloud(task['id'], e)
 			except Exception as e:
 				LOGGER.exception(f'Unexpected error processing task {task["id"]}', exc_info=e)
-			# TODO: Return error to Nextcloud
+				return_error_to_nextcloud(task['id'], e)
 
 		except Exception as e:
-			logger.exception('Error in task fetcher loop', exc_info=e)
-	# TODO: Add appropriate error handling and backoff
+			LOGGER.exception('Error in task fetcher loop', exc_info=e)
+			wait_for_tasks(CHECK_INTERVAL_ON_ERROR)
 
 def trigger_handler(providerId: str):
 	global TRIGGER
@@ -506,13 +538,6 @@ def wait_for_bg_threads():
 			THREADS.pop(ThreadType.REQUEST_PROCESSING)
 
 
-# Default LLM template for context-based queries
-_LLM_TEMPLATE = '''Answer based only on this context and do not add any imaginative details. Make sure to use the same language as the question in your answer.
-{context}
-
-{question}
-'''
-
 def query_vector_database(
 	user_id: str,
 	query: str,
@@ -539,7 +564,7 @@ def query_vector_database(
 		ContextException: If scope type is provided without scope list
 	"""
 	context_docs = get_context_docs(user_id, query, vectordb, ctx_limit, scope_type, scope_list)
-	logger.debug('Retrieved context documents', extra={
+	LOGGER.debug('Retrieved context documents', extra={
 		'user_id': user_id,
 		'num_docs': len(context_docs),
 		'ctx_limit': ctx_limit,
@@ -596,7 +621,7 @@ def generate_llm_response(
 		userid=user_id,
 	).strip()
 
-	logger.debug('Generated LLM response', extra={
+	LOGGER.debug('Generated LLM response', extra={
 		'user_id': user_id,
 		'output_length': len(output),
 	})
@@ -618,117 +643,112 @@ def extract_unique_sources(context_docs: list[Document]) -> list[str]:
 	})
 	return unique_sources
 
-def execute_context_query(
-	user_id: str,
-	vectordb_loader: VectorDBLoader,
-	llm: LLM,
-	app_config: TConfig,
-	query: str,
-	ctx_limit: int = 20,
-	scope_type: ScopeType | None = None,
-	scope_list: list[str] | None = None,
-	template: str | None = None,
-	end_separator: str = '',
-) -> LLMOutput:
+def return_normal_result_to_nextcloud(task_id: int, userId: str, result: LLMOutput) -> bool:
 	"""
-	Execute a RAG query with context retrieval from vector database.
-
-	This is the main function for processing queries that require context
-	from the vector database. It orchestrates the entire RAG pipeline:
-	1. Query vector database for relevant documents
-	2. Extract and format context chunks
-	3. Generate LLM response with context
-	4. Return output with source references
+	Return query result back to Nextcloud.
 
 	Args:
-		user_id: User ID for the request
-		vectordb_loader: Vector database loader instance
-		llm: Language model instance
-		app_config: Application configuration
-		query: The query text
-		ctx_limit: Maximum number of context documents (default: 20)
-		scope_type: Optional scope type for filtering
-		scope_list: Optional list of scope identifiers
-		template: Optional custom prompt template
-		end_separator: Optional separator to stop generation
+		task_id: Unique task identifier
+		result: The LLMOutput result to return
 
 	Returns:
-		LLMOutput with generated text and source references
-
-	Raises:
-		ContextException: If no documents are retrieved
-		ValueError: If context length is too small to fit the query
+		True if successful, False otherwise
 	"""
-	logger.info('Executing context query', extra={
-		'user_id': user_id,
-		'query_length': len(query),
-		'ctx_limit': ctx_limit,
+	LOGGER.debug('Returning result to Nextcloud', extra={
+		'task_id': task_id,
+		'output_length': len(result['output']),
+		'num_sources': len(result['sources']),
 	})
 
-	# Step 1: Load vector database and retrieve relevant documents
-	db = vectordb_loader.load()
-	context_docs = query_vector_database(user_id, query, db, ctx_limit, scope_type, scope_list)
+	nc = NextcloudApp()
 
-	if len(context_docs) == 0:
-		raise ContextException('No documents retrieved, please index a few documents first')
+	try:
+		nc.providers.task_processing.report_result(task_id, {
+			'output': result['output'],
+			'sources': enrich_sources(result['sources'], userId),
+		})
+	except (NextcloudException, RequestException, JSONDecodeError) as e:
+		LOGGER.error(f"Network error reporting task result {e}", exc_info=e)
+		return False
 
-	# Step 2: Prepare context chunks for LLM
-	context_chunks = prepare_context_chunks(context_docs)
-	logger.debug('Prepared context chunks', extra={
-		'num_docs': len(context_docs),
-		'num_chunks': len(context_chunks),
-	})
+	return True
 
-	# Step 3: Generate LLM response
-	output = generate_llm_response(
-		llm,
-		app_config,
-		user_id,
-		query,
-		template or _LLM_TEMPLATE,
-		context_chunks,
-		end_separator,
-	)
+def enrich_sources(results: list[str], userId: str) -> list[EnrichedSource]:
+	nc = NextcloudApp()
+	# todo: refactor to include title here
+	data = nc.ocs('POST', f'/ocs/v2.php/apps/context_chat/enrich_sources', json={'sources': [{'source_id': id} for id in results], 'userId': userId})
+	sources = EnrichedSourceList.model_validate(data).sources
+	return sources
 
-	# Step 4: Extract unique sources for citation
-	unique_sources = extract_unique_sources(context_docs)
+def enrich_search_sources(results: list[SearchResult], userId: str) -> list[EnrichedSource]:
+	nc = NextcloudApp()
+	data = nc.ocs('POST', f'/ocs/v2.php/apps/context_chat/enrich_sources', json={'sources': results, 'userId': userId})
+	sources = EnrichedSourceList.model_validate(data).sources
+	return sources
 
-	logger.info('Context query completed', extra={
-		'user_id': user_id,
-		'num_sources': len(unique_sources),
+
+def return_search_result_to_nextcloud(task_id: int, userId: str, result: list[SearchResult]) -> bool:
+	"""
+	Return search result back to Nextcloud.
+
+	Args:
+		task_id: Unique task identifier
+		result: The list of search results to return
+
+	Returns:
+		True if successful, False otherwise
+	"""
+	LOGGER.debug('Returning search result to Nextcloud', extra={
+		'task_id': task_id,
+		'num_sources': len(result),
 	})
 
-	return LLMOutput(output=output, sources=unique_sources)
+	nc = NextcloudApp()
 
-# ============================================================================
-# Task Queue Processing
-# ============================================================================
+	try:
+		sources = [json.dumps(source) for source in enrich_search_sources(result, userId)]
 
+		nc.providers.task_processing.report_result(task_id, {
+			'sources': sources,
+		})
+	except (NextcloudException, RequestException, JSONDecodeError) as e:
+		LOGGER.error(f"Network error reporting search task result {e}", exc_info=e)
+		return False
 
-def return_result_to_nextcloud(task_id: str, result: LLMOutput) -> bool:
-	"""
-	Return query result back to Nextcloud.
+	return True
 
-	STUB: This function should be implemented to send results back
-	to Nextcloud's task queue or API endpoint.
+def return_error_to_nextcloud(task_id: int, e: Exception) -> bool:
+	"""
+	Return error result back to Nextcloud.
 
 	Args:
 		task_id: Unique task identifier
-		result: The LLMOutput result to return
+		e: error object
 
 	Returns:
 		True if successful, False otherwise
 	"""
-	logger.debug('Returning result to Nextcloud (STUB)', extra={
-		'task_id': task_id,
-		'output_length': len(result['output']),
-		'num_sources': len(result['sources']),
-	})
-	# TODO: Implement actual Nextcloud result submission
+	LOGGER.debug('Returning error to Nextcloud', exc_info=e)
+
+	nc = NextcloudApp()
+
+	if isinstance(e, ValueError):
+		message = "Validation error: " + str(e)
+	elif isinstance(e, ContextException):
+		message = "Context error" + str(e)
+	else:
+		message = "Unexpected error" + str(e)
+
+	try:
+		nc.providers.task_processing.report_result(task_id, None, message)
+	except (NextcloudException, RequestException, JSONDecodeError) as e:
+		LOGGER.error(f"Network error reporting task result {e}", exc_info=e)
+		return False
+
 	return True
 
 
-def process_task(
+def process_normal_task(
 	task: dict[str, Any],
 	vectordb_loader: VectorDBLoader,
 	llm: LLM,
@@ -750,17 +770,47 @@ def process_task(
 		Various exceptions from query execution
 	"""
 	user_id = task['user_id']
-	query = task['query']
-
-	return execute_context_query(
-		user_id=user_id,
-		vectordb_loader=vectordb_loader,
-		llm=llm,
-		app_config=app_config,
-		query=query,
-		ctx_limit=task.get('ctx_limit', 20),
-		scope_type=task.get('scope_type'),
-		scope_list=task.get('scope_list'),
-		template=task.get('template'), # TODO: Somehow get the real template, tasks don't have it
-		end_separator=task.get('end_separator', ''), # TODO: same here
+	task_input = task['input']
+
+	return exec_in_proc(target=process_context_query,
+		args=(
+			user_id,
+			vectordb_loader,
+			llm,
+			app_config,
+			task_input.get('prompt'),
+			CONTEXT_LIMIT,
+			task_input.get('scopeType'),
+			task_input.get('scopeList'),
+		)
+	)
+
+def process_search_task(
+	task: dict[str, Any],
+	vectordb_loader: VectorDBLoader,
+) -> list[SearchResult]:
+	"""
+	Process a single search task.
+
+	Args:
+		task: Task dictionary from fetch_query_tasks_from_nextcloud
+		vectordb_loader: Vector database loader instance
+
+	Returns:
+		list of Search results
+
+	Raises:
+		Various exceptions from query execution
+	"""
+	user_id = task['user_id']
+	task_input = task['input']
+	return exec_in_proc(target=do_doc_search,
+		args=(
+			user_id,
+			task_input.get('prompt'),
+			vectordb_loader,
+			CONTEXT_LIMIT,
+			task_input.get('scopeType'),
+			task_input.get('scopeList'),
+		)
 	)
\ No newline at end of file

From 2093936913c08e55c5aca01b559314df731b4bb4 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 26 Mar 2026 22:43:48 +0530
Subject: [PATCH 22/96] request processing fixes

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/chain/one_shot.py |   7 +-
 context_chat_backend/chain/types.py    |  14 +-
 context_chat_backend/controller.py     | 179 ++++++++++++-------------
 context_chat_backend/dyn_loader.py     |  16 +--
 context_chat_backend/task_fetcher.py   | 164 +++++++++++-----------
 5 files changed, 185 insertions(+), 195 deletions(-)

diff --git a/context_chat_backend/chain/one_shot.py b/context_chat_backend/chain/one_shot.py
index d0f5bbe..c79f272 100644
--- a/context_chat_backend/chain/one_shot.py
+++ b/context_chat_backend/chain/one_shot.py
@@ -10,7 +10,7 @@
 from ..types import TConfig
 from .context import get_context_chunks, get_context_docs
 from .query_proc import get_pruned_query
-from .types import ContextException, LLMOutput, ScopeType
+from .types import ContextException, LLMOutput, ScopeType, SearchResult
 
 _LLM_TEMPLATE = '''Answer based only on this context and do not add any imaginative details. Make sure to use the same language as the question in your answer.
 {context}
@@ -79,6 +79,9 @@ def process_context_query(
 		stop=[end_separator],
 		userid=user_id,
 	).strip()
-	unique_sources: list[str] = list({source for d in context_docs if (source := d.metadata.get('source'))})
+	unique_sources = [SearchResult(
+		source_id=source,
+		title=d.metadata.get('title', ''),
+	) for d in context_docs if (source := d.metadata.get('source'))]
 
 	return LLMOutput(output=output, sources=unique_sources)
diff --git a/context_chat_backend/chain/types.py b/context_chat_backend/chain/types.py
index c527756..3afdf29 100644
--- a/context_chat_backend/chain/types.py
+++ b/context_chat_backend/chain/types.py
@@ -33,16 +33,16 @@ class ContextException(Exception):
 	...
 
 
-class LLMOutput(TypedDict):
-	output: str
-	sources: list[str]
-	# todo: add "titles" field
-
-
 class SearchResult(TypedDict):
 	source_id: str
 	title: str
 
+
+class LLMOutput(TypedDict):
+	output: str
+	sources: list[SearchResult]
+
+
 class EnrichedSource(BaseModel):
 	id: str
 	label: str
@@ -53,4 +53,4 @@ class EnrichedSourceList(BaseModel):
 	sources: list[EnrichedSource]
 
 class ScopeList(BaseModel):
-	source_ids: list[str]
\ No newline at end of file
+	source_ids: list[str]
diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 1e0d277..33e3cad 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -5,7 +5,7 @@
 from nc_py_api.ex_app.providers.task_processing import TaskProcessingProvider
 
 # isort: off
-from .chain.types import ContextException, LLMOutput, ScopeType, SearchResult
+from .chain.types import ContextException
 from .types import LoaderException, EmbeddingException
 from .vectordb.types import DbException, SafeDbException
 from .setup_functions import ensure_config_file, repair_run, setup_env_vars
@@ -25,22 +25,17 @@
 from contextlib import asynccontextmanager
 from functools import wraps
 from threading import Event, Thread
-from typing import Any
 
 from fastapi import FastAPI, Request
-from langchain.llms.base import LLM
 from nc_py_api import AsyncNextcloudApp, NextcloudApp
 from nc_py_api.ex_app import persistent_storage, set_handlers
-from pydantic import BaseModel, ValidationInfo, field_validator
 from starlette.responses import FileResponse
 
-from .chain.context import do_doc_search
-from .chain.one_shot import process_context_query, process_query
 from .config_parser import get_config
-from .dyn_loader import LLMModelLoader, VectorDBLoader
+from .dyn_loader import VectorDBLoader
 from .models.types import LlmException
 from nc_py_api.ex_app import AppAPIAuthMiddleware
-from .utils import JSONResponse, exec_in_proc, value_of
+from .utils import JSONResponse, exec_in_proc
 from .task_fetcher import start_bg_threads, trigger_handler, wait_for_bg_threads
 from .vectordb.service import count_documents_by_provider
 
@@ -108,7 +103,6 @@ async def lifespan(app: FastAPI):
 	t.start()
 	yield
 	vectordb_loader.offload()
-	llm_loader.offload()
 	wait_for_bg_threads()
 
 
@@ -120,7 +114,6 @@ async def lifespan(app: FastAPI):
 # loaders
 
 vectordb_loader = VectorDBLoader(app_config)
-llm_loader = LLMModelLoader(app, app_config)
 
 
 # locks and semaphores
@@ -438,90 +431,90 @@ def download_logs() -> FileResponse:
 # 	return JSONResponse({'loaded_sources': loaded_sources, 'sources_to_retry': not_added_sources})
 
 
-class Query(BaseModel):
-	userId: str
-	query: str
-	useContext: bool = True
-	scopeType: ScopeType | None = None
-	scopeList: list[str] | None = None
-	ctxLimit: int = 20
-
-	@field_validator('userId', 'query', 'ctxLimit')
-	@classmethod
-	def check_empty_values(cls, value: Any, info: ValidationInfo):
-		if value_of(value) is None:
-			raise ValueError('Empty value for field', info.field_name)
-
-		return value
-
-	@field_validator('ctxLimit')
-	@classmethod
-	def at_least_one_context(cls, value: int):
-		if value < 1:
-			raise ValueError('Invalid context chunk limit')
-
-		return value
-
-
-def execute_query(query: Query, in_proc: bool = True) -> LLMOutput:
-	llm: LLM = llm_loader.load()
-	template = app.extra.get('LLM_TEMPLATE')
-	no_ctx_template = app.extra['LLM_NO_CTX_TEMPLATE']
-	# todo: array
-	end_separator = app.extra.get('LLM_END_SEPARATOR', '')
-
-	if query.useContext:
-		target = process_context_query
-		args=(
-			query.userId,
-			vectordb_loader,
-			llm,
-			app_config,
-			query.query,
-			query.ctxLimit,
-			query.scopeType,
-			query.scopeList,
-			template,
-			end_separator,
-		)
-	else:
-		target=process_query
-		args=(
-			query.userId,
-			llm,
-			app_config,
-			query.query,
-			no_ctx_template,
-			end_separator,
-		)
-
-	if in_proc:
-		return exec_in_proc(target=target, args=args)
-
-	return target(*args)  # pyright: ignore
-
-
-@app.post('/query')
-@enabled_guard(app)
-def _(query: Query) -> LLMOutput:
-	logger.debug('received query request', extra={ 'query': query.dict() })
+# class Query(BaseModel):
+# 	userId: str
+# 	query: str
+# 	useContext: bool = True
+# 	scopeType: ScopeType | None = None
+# 	scopeList: list[str] | None = None
+# 	ctxLimit: int = 20
+
+# 	@field_validator('userId', 'query', 'ctxLimit')
+# 	@classmethod
+# 	def check_empty_values(cls, value: Any, info: ValidationInfo):
+# 		if value_of(value) is None:
+# 			raise ValueError('Empty value for field', info.field_name)
+
+# 		return value
+
+# 	@field_validator('ctxLimit')
+# 	@classmethod
+# 	def at_least_one_context(cls, value: int):
+# 		if value < 1:
+# 			raise ValueError('Invalid context chunk limit')
+
+# 		return value
+
+
+# def execute_query(query: Query, in_proc: bool = True) -> LLMOutput:
+# 	llm: LLM = llm_loader.load()
+# 	template = app.extra.get('LLM_TEMPLATE')
+# 	no_ctx_template = app.extra['LLM_NO_CTX_TEMPLATE']
+# 	# todo: array
+# 	end_separator = app.extra.get('LLM_END_SEPARATOR', '')
+
+# 	if query.useContext:
+# 		target = process_context_query
+# 		args=(
+# 			query.userId,
+# 			vectordb_loader,
+# 			llm,
+# 			app_config,
+# 			query.query,
+# 			query.ctxLimit,
+# 			query.scopeType,
+# 			query.scopeList,
+# 			template,
+# 			end_separator,
+# 		)
+# 	else:
+# 		target=process_query
+# 		args=(
+# 			query.userId,
+# 			llm,
+# 			app_config,
+# 			query.query,
+# 			no_ctx_template,
+# 			end_separator,
+# 		)
 
-	if app_config.llm[0] == 'nc_texttotext':
-		return execute_query(query)
+# 	if in_proc:
+# 		return exec_in_proc(target=target, args=args)
 
-	with llm_lock:
-		return execute_query(query, in_proc=False)
+# 	return target(*args)  # pyright: ignore
 
 
-@app.post('/docSearch')
-@enabled_guard(app)
-def _(query: Query) -> list[SearchResult]:
-	# useContext from Query is not used here
-	return exec_in_proc(target=do_doc_search, args=(
-		query.userId,
-		query.query,
-		vectordb_loader,
-		query.ctxLimit,
-		query.scopeType,
-		query.scopeList,
-	))
+# @app.post('/query')
+# @enabled_guard(app)
+# def _(query: Query) -> LLMOutput:
+# 	logger.debug('received query request', extra={ 'query': query.dict() })
+
+# 	if app_config.llm[0] == 'nc_texttotext':
+# 		return execute_query(query)
+
+# 	with llm_lock:
+# 		return execute_query(query, in_proc=False)
+
+
+# @app.post('/docSearch')
+# @enabled_guard(app)
+# def _(query: Query) -> list[SearchResult]:
+# 	# useContext from Query is not used here
+# 	return exec_in_proc(target=do_doc_search, args=(
+# 		query.userId,
+# 		query.query,
+# 		vectordb_loader,
+# 		query.ctxLimit,
+# 		query.scopeType,
+# 		query.scopeList,
+# 	))
diff --git a/context_chat_backend/dyn_loader.py b/context_chat_backend/dyn_loader.py
index d67310f..47b1957 100644
--- a/context_chat_backend/dyn_loader.py
+++ b/context_chat_backend/dyn_loader.py
@@ -7,11 +7,9 @@
 import gc
 import logging
 from abc import ABC, abstractmethod
-from time import time
 from typing import Any
 
 import torch
-from fastapi import FastAPI
 from langchain.llms.base import LLM
 
 from .models.loader import init_model
@@ -54,19 +52,11 @@ def offload(self) -> None:
 
 
 class LLMModelLoader(Loader):
-	def __init__(self, app: FastAPI, config: TConfig) -> None:
+	def __init__(self, config: TConfig) -> None:
 		self.config = config
-		self.app = app
 
 	def load(self) -> LLM:
-		if self.app.extra.get('LLM_MODEL') is not None:
-			self.app.extra['LLM_LAST_ACCESSED'] = time()
-			return self.app.extra['LLM_MODEL']
-
 		llm_name, llm_config = self.config.llm
-		self.app.extra['LLM_TEMPLATE'] = llm_config.pop('template', '')
-		self.app.extra['LLM_NO_CTX_TEMPLATE'] = llm_config.pop('no_ctx_template', '')
-		self.app.extra['LLM_END_SEPARATOR'] = llm_config.pop('end_separator', '')
 
 		try:
 			model = init_model('llm', (llm_name, llm_config))
@@ -75,13 +65,9 @@ def load(self) -> LLM:
 		if not isinstance(model, LLM):
 			raise LoaderException(f'Error: {model} does not implement "llm" type or has returned an invalid object')
 
-		self.app.extra['LLM_MODEL'] = model
-		self.app.extra['LLM_LAST_ACCESSED'] = time()
 		return model
 
 	def offload(self) -> None:
-		if self.app.extra.get('LLM_MODEL') is not None:
-			del self.app.extra['LLM_MODEL']
 		clear_cache()
 
 
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 7951f06..634b51c 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: 2026 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
-import json
 import logging
 import math
 import os
@@ -25,12 +24,20 @@
 from .chain.ingest.injest import embed_sources
 from .chain.one_shot import process_context_query
 from .chain.query_proc import get_pruned_query
-from .chain.types import ContextException, EnrichedSource, EnrichedSourceList, LLMOutput, ScopeList, ScopeType, \
-	SearchResult
-from .controller import Query, execute_query, llm_loader
-from .dyn_loader import VectorDBLoader
-from .types import ActionType, ActionsQueueItems, AppRole, EmbeddingException, FilesQueueItems, IndexingError, \
-	LoaderException, ReceivedFileItem, SourceItem, TConfig
+from .chain.types import ContextException, EnrichedSourceList, LLMOutput, ScopeList, ScopeType, SearchResult
+from .dyn_loader import LLMModelLoader, VectorDBLoader
+from .types import (
+	ActionsQueueItems,
+	ActionType,
+	AppRole,
+	EmbeddingException,
+	FilesQueueItems,
+	IndexingError,
+	LoaderException,
+	ReceivedFileItem,
+	SourceItem,
+	TConfig,
+)
 from .utils import exec_in_proc, get_app_role
 from .vectordb.base import BaseVectorDB
 from .vectordb.service import (
@@ -387,9 +394,11 @@ def resolve_scope_list(source_ids: list[str], userId: str) -> list[str]:
 	source_ids with only files, no folders (or source_ids in case of non-file provider)
 	"""
 	nc = NextcloudApp()
-	data = nc.ocs('POST', f'/ocs/v2.php/apps/context_chat/resolve_scope_list', json={'source_ids': source_ids, 'userId': userId})
-	sources = ScopeList.model_validate(data).source_ids
-	return sources
+	data = nc.ocs('POST', '/ocs/v2.php/apps/context_chat/resolve_scope_list', json={
+		'source_ids': source_ids,
+		'userId': userId,
+	})
+	return ScopeList.model_validate(data).source_ids
 
 
 def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
@@ -397,6 +406,7 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 
 	try:
 		vectordb_loader = VectorDBLoader(app_config)
+		llm_loader = LLMModelLoader(app_config)
 	except LoaderException as e:
 		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
 		return
@@ -412,7 +422,10 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 		try:
 			# Fetch pending task
 			try:
-				response = nc.providers.task_processing.next_task(['context_chat-context_chat', 'context_chat-context_chat_search'], ['context_chat:context_chat', 'context_chat:context_chat_search'])
+				response = nc.providers.task_processing.next_task(
+					['context_chat-context_chat', 'context_chat-context_chat_search'],
+					['context_chat:context_chat', 'context_chat:context_chat_search'],
+				)
 				if not response:
 					wait_for_tasks()
 					continue
@@ -437,9 +450,9 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 					# Return result to Nextcloud
 					success = return_normal_result_to_nextcloud(task['id'], userId, result)
 				elif task['type'] == 'context_chat:context_chat_search':
-					result: list[SearchResult] = process_search_task(task, vectordb_loader)
+					search_result: list[SearchResult] = process_search_task(task, vectordb_loader)
 					# Return result to Nextcloud
-					success = return_search_result_to_nextcloud(task['id'], userId, result)
+					success = return_search_result_to_nextcloud(task['id'], userId, search_result)
 				else:
 					LOGGER.error(f'Unknown task type {task["type"]}')
 					success = return_error_to_nextcloud(task['id'], Exception(f'Unknown task type {task["type"]}'))
@@ -480,62 +493,60 @@ def wait_for_tasks(interval = None):
 
 
 def start_bg_threads(app_config: TConfig, app_enabled: Event):
-	match APP_ROLE:
-		case AppRole.INDEXING | AppRole.NORMAL:
-			if (
-				ThreadType.FILES_INDEXING in THREADS
-				or ThreadType.UPDATES_PROCESSING in THREADS
-			):
-				LOGGER.info('Background threads already running, skipping start')
-				return
-
-			THREAD_STOP_EVENT.clear()
-			THREADS[ThreadType.FILES_INDEXING] = Thread(
-				target=files_indexing_thread,
-				args=(app_config, app_enabled),
-				name='FilesIndexingThread',
-			)
-			THREADS[ThreadType.UPDATES_PROCESSING] = Thread(
-				target=updates_processing_thread,
-				args=(app_config, app_enabled),
-				name='UpdatesProcessingThread',
-			)
-			THREADS[ThreadType.FILES_INDEXING].start()
-			THREADS[ThreadType.UPDATES_PROCESSING].start()
-
-		case AppRole.RP | AppRole.NORMAL:
-			if ThreadType.REQUEST_PROCESSING in THREADS:
-				LOGGER.info('Background threads already running, skipping start')
-				return
-
-			THREAD_STOP_EVENT.clear()
-			THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
-				target=request_processing_thread,
-				args=(app_config, app_enabled),
-				name='RequestProcessingThread',
-			)
-			THREADS[ThreadType.REQUEST_PROCESSING].start()
+	if APP_ROLE == AppRole.INDEXING or APP_ROLE == AppRole.NORMAL:
+		if (
+			ThreadType.FILES_INDEXING in THREADS
+			or ThreadType.UPDATES_PROCESSING in THREADS
+		):
+			LOGGER.info('Background threads already running, skipping start')
+			return
+
+		THREAD_STOP_EVENT.clear()
+		THREADS[ThreadType.FILES_INDEXING] = Thread(
+			target=files_indexing_thread,
+			args=(app_config, app_enabled),
+			name='FilesIndexingThread',
+		)
+		THREADS[ThreadType.UPDATES_PROCESSING] = Thread(
+			target=updates_processing_thread,
+			args=(app_config, app_enabled),
+			name='UpdatesProcessingThread',
+		)
+		THREADS[ThreadType.FILES_INDEXING].start()
+		THREADS[ThreadType.UPDATES_PROCESSING].start()
+
+	if APP_ROLE == AppRole.RP or APP_ROLE == AppRole.NORMAL:
+		if ThreadType.REQUEST_PROCESSING in THREADS:
+			LOGGER.info('Background threads already running, skipping start')
+			return
+
+		THREAD_STOP_EVENT.clear()
+		THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
+			target=request_processing_thread,
+			args=(app_config, app_enabled),
+			name='RequestProcessingThread',
+		)
+		THREADS[ThreadType.REQUEST_PROCESSING].start()
 
 
 def wait_for_bg_threads():
-	match APP_ROLE:
-		case AppRole.INDEXING | AppRole.NORMAL:
-			if (ThreadType.FILES_INDEXING not in THREADS or ThreadType.UPDATES_PROCESSING not in THREADS):
-				return
+	if APP_ROLE == AppRole.INDEXING or APP_ROLE == AppRole.NORMAL:
+		if (ThreadType.FILES_INDEXING not in THREADS or ThreadType.UPDATES_PROCESSING not in THREADS):
+			return
 
-			THREAD_STOP_EVENT.set()
-			THREADS[ThreadType.FILES_INDEXING].join()
-			THREADS[ThreadType.UPDATES_PROCESSING].join()
-			THREADS.pop(ThreadType.FILES_INDEXING)
-			THREADS.pop(ThreadType.UPDATES_PROCESSING)
+		THREAD_STOP_EVENT.set()
+		THREADS[ThreadType.FILES_INDEXING].join()
+		THREADS[ThreadType.UPDATES_PROCESSING].join()
+		THREADS.pop(ThreadType.FILES_INDEXING)
+		THREADS.pop(ThreadType.UPDATES_PROCESSING)
 
-		case AppRole.RP | AppRole.NORMAL:
-			if (ThreadType.REQUEST_PROCESSING not in THREADS):
-				return
+	if APP_ROLE == AppRole.RP or APP_ROLE == AppRole.NORMAL:
+		if (ThreadType.REQUEST_PROCESSING not in THREADS):
+			return
 
-			THREAD_STOP_EVENT.set()
-			THREADS[ThreadType.REQUEST_PROCESSING].join()
-			THREADS.pop(ThreadType.REQUEST_PROCESSING)
+		THREAD_STOP_EVENT.set()
+		THREADS[ThreadType.REQUEST_PROCESSING].join()
+		THREADS.pop(ThreadType.REQUEST_PROCESSING)
 
 
 def query_vector_database(
@@ -673,18 +684,12 @@ def return_normal_result_to_nextcloud(task_id: int, userId: str, result: LLMOutp
 
 	return True
 
-def enrich_sources(results: list[str], userId: str) -> list[EnrichedSource]:
-	nc = NextcloudApp()
-	# todo: refactor to include title here
-	data = nc.ocs('POST', f'/ocs/v2.php/apps/context_chat/enrich_sources', json={'sources': [{'source_id': id} for id in results], 'userId': userId})
-	sources = EnrichedSourceList.model_validate(data).sources
-	return sources
 
-def enrich_search_sources(results: list[SearchResult], userId: str) -> list[EnrichedSource]:
+def enrich_sources(results: list[SearchResult], userId: str) -> list[str]:
 	nc = NextcloudApp()
-	data = nc.ocs('POST', f'/ocs/v2.php/apps/context_chat/enrich_sources', json={'sources': results, 'userId': userId})
+	data = nc.ocs('POST', '/ocs/v2.php/apps/context_chat/enrich_sources', json={'sources': results, 'userId': userId})
 	sources = EnrichedSourceList.model_validate(data).sources
-	return sources
+	return [s.model_dump_json() for s in sources]
 
 
 def return_search_result_to_nextcloud(task_id: int, userId: str, result: list[SearchResult]) -> bool:
@@ -706,10 +711,8 @@ def return_search_result_to_nextcloud(task_id: int, userId: str, result: list[Se
 	nc = NextcloudApp()
 
 	try:
-		sources = [json.dumps(source) for source in enrich_search_sources(result, userId)]
-
 		nc.providers.task_processing.report_result(task_id, {
-			'sources': sources,
+			'sources': enrich_sources(result, userId),
 		})
 	except (NextcloudException, RequestException, JSONDecodeError) as e:
 		LOGGER.error(f"Network error reporting search task result {e}", exc_info=e)
@@ -769,8 +772,10 @@ def process_normal_task(
 	Raises:
 		Various exceptions from query execution
 	"""
-	user_id = task['user_id']
+	user_id = task['userId']
 	task_input = task['input']
+	if task_input.get('scopeType') == 'none':
+		task_input['scopeType'] = None
 
 	return exec_in_proc(target=process_context_query,
 		args=(
@@ -802,8 +807,11 @@ def process_search_task(
 	Raises:
 		Various exceptions from query execution
 	"""
-	user_id = task['user_id']
+	user_id = task['userId']
 	task_input = task['input']
+	if task_input.get('scopeType') == 'none':
+		task_input['scopeType'] = None
+
 	return exec_in_proc(target=do_doc_search,
 		args=(
 			user_id,
@@ -813,4 +821,4 @@ def process_search_task(
 			task_input.get('scopeType'),
 			task_input.get('scopeList'),
 		)
-	)
\ No newline at end of file
+	)

From 36b5f0211ee2da2123d220a312521afe204a559b Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 26 Mar 2026 23:01:56 +0530
Subject: [PATCH 23/96] chore: drop commented code

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/controller.py | 292 +----------------------------
 1 file changed, 1 insertion(+), 291 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 33e3cad..49d1d73 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -24,7 +24,6 @@
 from collections.abc import Callable
 from contextlib import asynccontextmanager
 from functools import wraps
-from threading import Event, Thread
 
 from fastapi import FastAPI, Request
 from nc_py_api import AsyncNextcloudApp, NextcloudApp
@@ -59,7 +58,7 @@
 		'revision': '607a30d783dfa663caf39e06633721c8d4cfcd7e',
 	}
 } if __download_models_from_hf else {}
-app_enabled = Event()
+app_enabled = threading.Event()
 
 def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 	try:
@@ -99,8 +98,6 @@ async def lifespan(app: FastAPI):
 		app_enabled.set()
 		start_bg_threads(app_config, app_enabled)
 	logger.info(f'App enable state at startup: {app_enabled.is_set()}')
-	t = Thread(target=background_thread_task, args=())
-	t.start()
 	yield
 	vectordb_loader.offload()
 	wait_for_bg_threads()
@@ -134,15 +131,6 @@ async def lifespan(app: FastAPI):
 if not app_config.disable_aaa:
 	app.add_middleware(AppAPIAuthMiddleware)
 
-# logger background thread
-
-def background_thread_task():
-	# todo
-	# while(True):
-	# 	logger.info(f'Currently indexing {len(_indexing)} documents (filename, size): ', extra={'_indexing': _indexing})
-	# 	sleep(10)
-	...
-
 # exception handlers
 
 @app.exception_handler(DbException)
@@ -240,281 +228,3 @@ def download_logs() -> FileResponse:
 				if os.path.isfile(file_path): # Might be a folder (just skip it then)
 					zip_file.write(file_path)
 		return FileResponse(tmp.name, media_type='application/zip', filename='docker_logs.zip')
-
-
-# @app.post('/updateAccessDeclarative')
-# @enabled_guard(app)
-# def _(
-# 	userIds: Annotated[list[str], Body()],
-# 	sourceId: Annotated[str, Body()],
-# ):
-# 	logger.debug('Update access declarative request:', extra={
-# 		'user_ids': userIds,
-# 		'source_id': sourceId,
-# 	})
-
-# 	if len(userIds) == 0:
-# 		return JSONResponse('Empty list of user ids', 400)
-
-# 	if not is_valid_source_id(sourceId):
-# 		return JSONResponse('Invalid source id', 400)
-
-# 	exec_in_proc(target=decl_update_access, args=(vectordb_loader, userIds, sourceId))
-
-# 	return JSONResponse('Access updated')
-
-
-# @app.post('/updateAccess')
-# @enabled_guard(app)
-# def _(
-# 	op: Annotated[UpdateAccessOp, Body()],
-# 	userIds: Annotated[list[str], Body()],
-# 	sourceId: Annotated[str, Body()],
-# ):
-# 	logger.debug('Update access request', extra={
-# 		'op': op,
-# 		'user_ids': userIds,
-# 		'source_id': sourceId,
-# 	})
-
-# 	if len(userIds) == 0:
-# 		return JSONResponse('Empty list of user ids', 400)
-
-# 	if not is_valid_source_id(sourceId):
-# 		return JSONResponse('Invalid source id', 400)
-
-# 	exec_in_proc(target=update_access, args=(vectordb_loader, op, userIds, sourceId))
-
-# 	return JSONResponse('Access updated')
-
-
-# @app.post('/updateAccessProvider')
-# @enabled_guard(app)
-# def _(
-# 	op: Annotated[UpdateAccessOp, Body()],
-# 	userIds: Annotated[list[str], Body()],
-# 	providerId: Annotated[str, Body()],
-# ):
-# 	logger.debug('Update access by provider request', extra={
-# 		'op': op,
-# 		'user_ids': userIds,
-# 		'provider_id': providerId,
-# 	})
-
-# 	if len(userIds) == 0:
-# 		return JSONResponse('Empty list of user ids', 400)
-
-# 	if not is_valid_provider_id(providerId):
-# 		return JSONResponse('Invalid provider id', 400)
-
-# 	exec_in_proc(target=update_access_provider, args=(vectordb_loader, op, userIds, providerId))
-
-# 	return JSONResponse('Access updated')
-
-
-# @app.post('/deleteSources')
-# @enabled_guard(app)
-# def _(sourceIds: Annotated[list[str], Body(embed=True)]):
-# 	logger.debug('Delete sources request', extra={
-# 		'source_ids': sourceIds,
-# 	})
-
-# 	sourceIds = [source.strip() for source in sourceIds if source.strip() != '']
-
-# 	if len(sourceIds) == 0:
-# 		return JSONResponse('No sources provided', 400)
-
-# 	res = exec_in_proc(target=delete_by_source, args=(vectordb_loader, sourceIds))
-# 	if res is False:
-# 		return JSONResponse('Error: VectorDB delete failed, check vectordb logs for more info.', 400)
-
-# 	return JSONResponse('All valid sources deleted')
-
-
-# @app.post('/deleteProvider')
-# @enabled_guard(app)
-# def _(providerKey: str = Body(embed=True)):
-# 	logger.debug('Delete sources by provider for all users request', extra={ 'provider_key': providerKey })
-
-# 	if value_of(providerKey) is None:
-# 		return JSONResponse('Invalid provider key provided', 400)
-
-# 	exec_in_proc(target=delete_by_provider, args=(vectordb_loader, providerKey))
-
-# 	return JSONResponse('All valid sources deleted')
-
-
-# @app.post('/deleteUser')
-# @enabled_guard(app)
-# def _(userId: str = Body(embed=True)):
-# 	logger.debug('Remove access list for user, and orphaned sources', extra={ 'user_id': userId })
-
-# 	if value_of(userId) is None:
-# 		return JSONResponse('Invalid userId provided', 400)
-
-# 	exec_in_proc(target=delete_user, args=(vectordb_loader, userId))
-
-# 	return JSONResponse('User deleted')
-
-
-# @app.put('/loadSources')
-# @enabled_guard(app)
-# def _(sources: list[UploadFile]):
-# 	global _indexing
-
-# 	if len(sources) == 0:
-# 		return JSONResponse('No sources provided', 400)
-
-# 	for source in sources:
-# 		if not value_of(source.filename):
-# 			return JSONResponse(f'Invalid source filename for: {source.headers.get("title")}', 400)
-
-# 		with index_lock:
-# 			if source.filename in _indexing:
-# 				# this request will be retried by the client
-# 				return JSONResponse(
-# 					f'This source ({source.filename}) is already being processed in another request, try again later',
-# 					503,
-# 					headers={'cc-retry': 'true'},
-# 				)
-
-# 		if not (
-# 			value_of(source.headers.get('userIds'))
-# 			and source.headers.get('title', None) is not None
-# 			and value_of(source.headers.get('type'))
-# 			and value_of(source.headers.get('modified'))
-# 			and source.headers['modified'].isdigit()
-# 			and value_of(source.headers.get('provider'))
-# 		):
-# 			logger.error('Invalid/missing headers received', extra={
-# 				'source_id': source.filename,
-# 				'title': source.headers.get('title'),
-# 				'headers': source.headers,
-# 			})
-# 			return JSONResponse(f'Invaild/missing headers for:provider_ids {source.filename}', 400)
-
-# 	# wait for 10 minutes before failing the request
-# 	semres = doc_parse_semaphore.acquire(block=True, timeout=10*60)
-# 	if not semres:
-# 		return JSONResponse(
-# 			'Document parser worker limit reached, try again in some time or consider increasing the limit',
-# 			503,
-# 			headers={'cc-retry': 'true'}
-# 		)
-
-# 	with index_lock:
-# 		for source in sources:
-# 			_indexing[source.filename] = source.size
-
-# 	try:
-# 		loaded_sources, not_added_sources = exec_in_proc(
-# 			target=embed_sources,
-# 			args=(vectordb_loader, app.extra['CONFIG'], sources)
-# 		)
-# 	except (DbException, EmbeddingException):
-# 		raise
-# 	except Exception as e:
-# 		raise DbException('Error: failed to load sources') from e
-# 	finally:
-# 		with index_lock:
-# 			for source in sources:
-# 				_indexing.pop(source.filename, None)
-# 		doc_parse_semaphore.release()
-
-# 	if len(loaded_sources) != len(sources):
-# 		logger.debug('Some sources were not loaded', extra={
-# 			'Count of loaded sources': f'{len(loaded_sources)}/{len(sources)}',
-# 			'source_ids': loaded_sources,
-# 		})
-
-# 	# loaded sources include the existing sources that may only have their access updated
-# 	return JSONResponse({'loaded_sources': loaded_sources, 'sources_to_retry': not_added_sources})
-
-
-# class Query(BaseModel):
-# 	userId: str
-# 	query: str
-# 	useContext: bool = True
-# 	scopeType: ScopeType | None = None
-# 	scopeList: list[str] | None = None
-# 	ctxLimit: int = 20
-
-# 	@field_validator('userId', 'query', 'ctxLimit')
-# 	@classmethod
-# 	def check_empty_values(cls, value: Any, info: ValidationInfo):
-# 		if value_of(value) is None:
-# 			raise ValueError('Empty value for field', info.field_name)
-
-# 		return value
-
-# 	@field_validator('ctxLimit')
-# 	@classmethod
-# 	def at_least_one_context(cls, value: int):
-# 		if value < 1:
-# 			raise ValueError('Invalid context chunk limit')
-
-# 		return value
-
-
-# def execute_query(query: Query, in_proc: bool = True) -> LLMOutput:
-# 	llm: LLM = llm_loader.load()
-# 	template = app.extra.get('LLM_TEMPLATE')
-# 	no_ctx_template = app.extra['LLM_NO_CTX_TEMPLATE']
-# 	# todo: array
-# 	end_separator = app.extra.get('LLM_END_SEPARATOR', '')
-
-# 	if query.useContext:
-# 		target = process_context_query
-# 		args=(
-# 			query.userId,
-# 			vectordb_loader,
-# 			llm,
-# 			app_config,
-# 			query.query,
-# 			query.ctxLimit,
-# 			query.scopeType,
-# 			query.scopeList,
-# 			template,
-# 			end_separator,
-# 		)
-# 	else:
-# 		target=process_query
-# 		args=(
-# 			query.userId,
-# 			llm,
-# 			app_config,
-# 			query.query,
-# 			no_ctx_template,
-# 			end_separator,
-# 		)
-
-# 	if in_proc:
-# 		return exec_in_proc(target=target, args=args)
-
-# 	return target(*args)  # pyright: ignore
-
-
-# @app.post('/query')
-# @enabled_guard(app)
-# def _(query: Query) -> LLMOutput:
-# 	logger.debug('received query request', extra={ 'query': query.dict() })
-
-# 	if app_config.llm[0] == 'nc_texttotext':
-# 		return execute_query(query)
-
-# 	with llm_lock:
-# 		return execute_query(query, in_proc=False)
-
-
-# @app.post('/docSearch')
-# @enabled_guard(app)
-# def _(query: Query) -> list[SearchResult]:
-# 	# useContext from Query is not used here
-# 	return exec_in_proc(target=do_doc_search, args=(
-# 		query.userId,
-# 		query.query,
-# 		vectordb_loader,
-# 		query.ctxLimit,
-# 		query.scopeType,
-# 		query.scopeList,
-# 	))

From 85d29f1640eb2ff5daa89016ecbae8ee9d484d27 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 27 Mar 2026 01:06:34 +0530
Subject: [PATCH 24/96] fix(ci): parse json output from the stats command

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 31 +++++++-------------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 58f9f50..589f885 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -224,7 +224,7 @@ jobs:
             echo "Checking stats, attempt $i..."
 
             stats_err=$(mktemp)
-            stats=$(timeout 5 ./occ context_chat:stats 2>"$stats_err")
+            stats=$(timeout 5 ./occ context_chat:stats --json 2>"$stats_err")
             stats_exit=$?
             echo "Stats output:"
             echo "$stats"
@@ -243,41 +243,25 @@ jobs:
             fi
 
             # Extract Total eligible files
-            total_files=$(echo "$stats" | grep -oP 'Total eligible files:\s*\K\d+' || echo "")
+            total_files=$(echo "$stats" | jq '.eligible_files_count' || echo "")
 
             # Extract Indexed documents count (files__default)
-            indexed_count=$(echo "$stats" | grep -oP "'files__default'\s*=>\s*\K\d+" || echo "")
-
-            # Validate parsed values
-            if [ -z "$total_files" ] || [ -z "$indexed_count" ]; then
-              echo "Error: Could not parse stats output properly"
-              if echo "$stats" | grep -q "Indexed documents:"; then
-                echo "  Indexed documents section found but could not extract count"
-              fi
-              sleep 10
-              continue
-            fi
+            indexed_count=$(echo "$stats" | jq '.queued_documents_counts.files__default' || echo "")
 
             echo "Total eligible files: $total_files"
             echo "Indexed documents (files__default): $indexed_count"
 
-            # Calculate absolute difference
             diff=$((total_files - indexed_count))
-            if [ $diff -lt 0 ]; then
-              diff=$((-diff))
-            fi
-
-            # Calculate 2% threshold using bc for floating point support
-            threshold=$(echo "scale=4; $total_files * 0.02" | bc)
+            threshold=$((total_files * 2 / 100))
 
             # Check if difference is within tolerance
-            if (( $(echo "$diff <= $threshold" | bc -l) )); then
+            if [ $diff -le $threshold ]; then
               echo "Indexing within 2% tolerance (diff=$diff, threshold=$threshold)"
               success=1
               break
             else
-              pct=$(echo "scale=2; ($diff / $total_files) * 100" | bc)
-              echo "Outside 2% tolerance: diff=$diff (${pct}%), threshold=$threshold"
+              progress=$((diff * 100 / total_files))
+              echo "Outside 2% tolerance: diff=$diff (${progress}%), threshold=$threshold"
             fi
 
             # Check if backend is still alive
@@ -293,6 +277,7 @@ jobs:
           echo "::endgroup::"
 
           ./occ context_chat:stats
+          ./occ context_chat:stats --json
 
           if [ $success -ne 1 ]; then
             echo "Max attempts reached"

From 4c6d01b9e913de0a931345aeab7169b3029a5c9a Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 27 Mar 2026 02:57:22 +0530
Subject: [PATCH 25/96] fix: seek to 0 to read the full buffer

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/chain/ingest/injest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/context_chat_backend/chain/ingest/injest.py b/context_chat_backend/chain/ingest/injest.py
index 18a37b4..0196f5d 100644
--- a/context_chat_backend/chain/ingest/injest.py
+++ b/context_chat_backend/chain/ingest/injest.py
@@ -50,6 +50,7 @@ async def __fetch_file_content(
 				dav=False,
 				params={ 'userId': user_id },
 			)
+			fp.seek(0)
 			return fp
 		except niquests.exceptions.RequestException as e:
 			if e.response is None:

From 51774ff771944c5dffd46b3f33ed2c4a0d7f5bb6 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 27 Mar 2026 02:59:46 +0530
Subject: [PATCH 26/96] fix(ci): 3% tolerance

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 589f885..73418e9 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -252,16 +252,16 @@ jobs:
             echo "Indexed documents (files__default): $indexed_count"
 
             diff=$((total_files - indexed_count))
-            threshold=$((total_files * 2 / 100))
+            threshold=$((total_files * 3 / 100))
 
             # Check if difference is within tolerance
             if [ $diff -le $threshold ]; then
-              echo "Indexing within 2% tolerance (diff=$diff, threshold=$threshold)"
+              echo "Indexing within 3% tolerance (diff=$diff, threshold=$threshold)"
               success=1
               break
             else
               progress=$((diff * 100 / total_files))
-              echo "Outside 2% tolerance: diff=$diff (${progress}%), threshold=$threshold"
+              echo "Outside 3% tolerance: diff=$diff (${progress}%), threshold=$threshold"
             fi
 
             # Check if backend is still alive

From c81b6758600eae2f049deb7ec578ef5c7eeca41b Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 27 Mar 2026 04:38:36 +0530
Subject: [PATCH 27/96] fix(ci): wait longer for EM server

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 73418e9..5c50548 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -201,7 +201,7 @@ jobs:
           timeout 10 ./occ app_api:daemon:register --net host manual_install "Manual Install" manual-install http localhost http://localhost:8080
           timeout 120 ./occ app_api:app:register context_chat_backend manual_install --json-info "{\"appid\":\"context_chat_backend\",\"name\":\"Context Chat Backend\",\"daemon_config_name\":\"manual_install\",\"version\":\"${{ fromJson(steps.appinfo.outputs.result).version }}\",\"secret\":\"12345\",\"port\":10034,\"scopes\":[],\"system_app\":0}" --force-scopes --wait-finish
           ls -la context_chat_backend/persistent_storage/*
-          sleep 30 # Wait for the em server to get ready
+          sleep 60 # Wait for the em server to get ready
 
       - name: Initial memory usage check
         run: |
@@ -242,13 +242,13 @@ jobs:
               continue
             fi
 
-            # Extract Total eligible files
-            total_files=$(echo "$stats" | jq '.eligible_files_count' || echo "")
+            # Extract total queued files
+            total_files=$(echo "$stats" | jq '.queued_documents_counts.files__default' || echo "")
 
-            # Extract Indexed documents count (files__default)
-            indexed_count=$(echo "$stats" | jq '.queued_documents_counts.files__default' || echo "")
+            # Extract indexed documents count (files__default)
+            indexed_count=$(echo "$stats" | jq '.vectordb_document_counts.files__default' || echo "")
 
-            echo "Total eligible files: $total_files"
+            echo "Total queued files: $total_files"
             echo "Indexed documents (files__default): $indexed_count"
 
             diff=$((total_files - indexed_count))

From 6817f897e4ae14fdfeab0ad7b40a9a2de78cfe4b Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Mon, 30 Mar 2026 15:57:44 +0530
Subject: [PATCH 28/96] fix: don't process files or requests until the EM
 server is healthy

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml |  1 -
 context_chat_backend/network_em.py     | 14 +++++++++++---
 context_chat_backend/task_fetcher.py   | 14 ++++++++++++++
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 5c50548..8e6ca7d 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -201,7 +201,6 @@ jobs:
           timeout 10 ./occ app_api:daemon:register --net host manual_install "Manual Install" manual-install http localhost http://localhost:8080
           timeout 120 ./occ app_api:app:register context_chat_backend manual_install --json-info "{\"appid\":\"context_chat_backend\",\"name\":\"Context Chat Backend\",\"daemon_config_name\":\"manual_install\",\"version\":\"${{ fromJson(steps.appinfo.outputs.result).version }}\",\"secret\":\"12345\",\"port\":10034,\"scopes\":[],\"system_app\":0}" --force-scopes --wait-finish
           ls -la context_chat_backend/persistent_storage/*
-          sleep 60 # Wait for the em server to get ready
 
       - name: Initial memory usage check
         run: |
diff --git a/context_chat_backend/network_em.py b/context_chat_backend/network_em.py
index d39ea56..43ced6c 100644
--- a/context_chat_backend/network_em.py
+++ b/context_chat_backend/network_em.py
@@ -8,7 +8,6 @@
 
 import niquests
 from langchain_core.embeddings import Embeddings
-from pydantic import BaseModel
 
 from .types import (
 	EmbeddingException,
@@ -41,8 +40,17 @@ class CreateEmbeddingResponse(TypedDict):
 	usage: EmbeddingUsage
 
 
-class NetworkEmbeddings(Embeddings, BaseModel):
-	app_config: TConfig
+class NetworkEmbeddings(Embeddings):
+	def __init__(self, app_config: TConfig):
+		self.app_config = app_config
+
+	def check_connection(self) -> bool:
+		try:
+			self.embed_query('hello')
+			return True
+		except EmbeddingException as e:
+			logger.warning('Embedding server connection failed', exc_info=e)
+			return False
 
 	def _get_embedding(self, input_: str | list[str], try_: int = 3) -> list[float] | list[list[float]]:
 		emconf = self.app_config.embedding
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 634b51c..92d2719 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -26,6 +26,7 @@
 from .chain.query_proc import get_pruned_query
 from .chain.types import ContextException, EnrichedSourceList, LLMOutput, ScopeList, ScopeType, SearchResult
 from .dyn_loader import LLMModelLoader, VectorDBLoader
+from .network_em import NetworkEmbeddings
 from .types import (
 	ActionsQueueItems,
 	ActionType,
@@ -102,6 +103,10 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 			return
 
 		try:
+			if not __check_em_server(app_config):
+				sleep(POLLING_COOLDOWN)
+				continue
+
 			nc = NextcloudApp()
 			q_items_res = nc.ocs(
 				'GET',
@@ -415,6 +420,10 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 	llm: LLM = llm_loader.load()
 
 	while True:
+		if not __check_em_server(app_config):
+			sleep(POLLING_COOLDOWN)
+			continue
+
 		if THREAD_STOP_EVENT.is_set():
 			LOGGER.info('Updates processing thread is stopping due to stop event being set')
 			return
@@ -822,3 +831,8 @@ def process_search_task(
 			task_input.get('scopeList'),
 		)
 	)
+
+
+def __check_em_server(app_config: TConfig) -> bool:
+	embedding_model = NetworkEmbeddings(app_config=app_config)
+	return embedding_model.check_connection()

From 104a37a8a1b28878b98da5ce7b0eb520ebe73716 Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 1 Apr 2026 12:38:38 +0200
Subject: [PATCH 29/96] tests: Increase testing time to allow backend to injest
 more sources

---
 .github/workflows/integration-test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 8e6ca7d..b937a14 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -218,8 +218,8 @@ jobs:
       - name: Periodically check context_chat stats for 15 minutes to allow the backend to index the files
         run: |
           success=0
-          echo "::group::Checking stats periodically for 15 minutes to allow the backend to index the files"
-          for i in {1..90}; do
+          echo "::group::Checking stats periodically for 30 minutes to allow the backend to index the files"
+          for i in {1..180}; do
             echo "Checking stats, attempt $i..."
 
             stats_err=$(mktemp)

From b3b461a2b3a88f2fd815be11c132a7174772aa3c Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 1 Apr 2026 13:17:13 +0200
Subject: [PATCH 30/96] fix: More log statements

---
 .../chain/ingest/doc_loader.py                | 20 +++++++++--
 context_chat_backend/chain/ingest/injest.py   | 35 +++++++++++++++++++
 context_chat_backend/task_fetcher.py          | 29 +++++++++++++--
 context_chat_backend/utils.py                 | 12 +++++++
 context_chat_backend/vectordb/pgvector.py     | 20 ++++++++++-
 5 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/context_chat_backend/chain/ingest/doc_loader.py b/context_chat_backend/chain/ingest/doc_loader.py
index 832c833..04c611d 100644
--- a/context_chat_backend/chain/ingest/doc_loader.py
+++ b/context_chat_backend/chain/ingest/doc_loader.py
@@ -7,6 +7,8 @@
 import tempfile
 from collections.abc import Callable
 from io import BytesIO
+import logging
+from time import perf_counter_ns
 
 import docx2txt
 from epub2txt import epub2txt
@@ -19,6 +21,8 @@
 
 from ...types import IndexingException, SourceItem
 
+logger = logging.getLogger('ccb.doc_loader')
+
 
 def _temp_file_wrapper(file: BytesIO, loader: Callable, sep: str = '\n') -> str:
 	raw_bytes = file.read()
@@ -133,10 +137,22 @@ def decode_source(source: SourceItem) -> str:
 		else:
 			io_obj = source.content
 
-		if _loader_map.get(source.type):
-			result = _loader_map[source.type](io_obj)
+		loader_fn = _loader_map.get(source.type)
+		if loader_fn:
+			logger.debug(
+				'Decoding source %r with loader %s (mime: %s) — may be slow or block',
+				source.title, loader_fn.__name__, source.type,
+			)
+			t0 = perf_counter_ns()
+			result = loader_fn(io_obj)
+			elapsed_ms = (perf_counter_ns() - t0) / 1e6
+			logger.debug(
+				'Loader %s for %r finished in %.2f ms (%d chars)',
+				loader_fn.__name__, source.title, elapsed_ms, len(result),
+			)
 			return result.encode('utf-8', 'ignore').decode('utf-8', 'ignore').strip()
 
+		logger.debug('No specific loader for mime type %s, reading as plain text for %r', source.type, source.title)
 		return io_obj.read().decode('utf-8', 'ignore').strip()
 	except IndexingException:
 		raise
diff --git a/context_chat_backend/chain/ingest/injest.py b/context_chat_backend/chain/ingest/injest.py
index 0196f5d..7ede94a 100644
--- a/context_chat_backend/chain/ingest/injest.py
+++ b/context_chat_backend/chain/ingest/injest.py
@@ -7,6 +7,7 @@
 import re
 from collections.abc import Mapping
 from io import BytesIO
+from time import perf_counter_ns
 
 import niquests
 from langchain.schema import Document
@@ -42,6 +43,8 @@ async def __fetch_file_content(
 	async with semaphore:
 		nc = AsyncNextcloudApp()
 		try:
+			logger.debug('Downloading file id %d for user %s', file_id, user_id)
+			t0 = perf_counter_ns()
 			# a file pointer for storing the stream in memory until it is consumed
 			fp = BytesIO()
 			await nc._session.download2fp(
@@ -51,6 +54,8 @@ async def __fetch_file_content(
 				params={ 'userId': user_id },
 			)
 			fp.seek(0)
+			elapsed_ms = (perf_counter_ns() - t0) / 1e6
+			logger.debug('Downloaded file id %d for user %s in %.2f ms (%d bytes)', file_id, user_id, elapsed_ms, fp.getbuffer().nbytes)
 			return fp
 		except niquests.exceptions.RequestException as e:
 			if e.response is None:
@@ -89,6 +94,9 @@ async def __fetch_files_content(
 	semaphore = asyncio.Semaphore(CONCURRENT_FILE_FETCHES)
 	tasks = []
 
+	file_count = sum(1 for s in sources.values() if isinstance(s, ReceivedFileItem))
+	logger.debug('Fetching content for %d file(s) (max %d concurrent)', file_count, CONCURRENT_FILE_FETCHES)
+
 	for db_id, file in sources.items():
 		if isinstance(file, SourceItem):
 			continue
@@ -123,7 +131,11 @@ async def __fetch_files_content(
 		# any user id from the list should have read access to the file
 		tasks.append(asyncio.ensure_future(__fetch_file_content(semaphore, file.file_id, file.userIds[0])))
 
+	logger.debug('Gathering %d file download task(s) — this blocks until all downloads complete or fail', len(tasks))
+	t0 = perf_counter_ns()
 	results = await asyncio.gather(*tasks, return_exceptions=True)
+	elapsed_ms = (perf_counter_ns() - t0) / 1e6
+	logger.debug('All %d file download task(s) completed in %.2f ms', len(tasks), elapsed_ms)
 	for (db_id, file), result in zip(sources.items(), results, strict=True):
 		if isinstance(file, SourceItem):
 			continue
@@ -215,7 +227,14 @@ def _sources_to_indocuments(
 
 		# transform the source to have text data
 		try:
+			logger.debug(
+				'Decoding source %s (type: %s, title: %r) — may be slow for complex file types',
+				source.reference, source.type, source.title,
+			)
+			t0 = perf_counter_ns()
 			content = decode_source(source)
+			elapsed_ms = (perf_counter_ns() - t0) / 1e6
+			logger.debug('Decoded source %s in %.2f ms (%d chars)', source.reference, elapsed_ms, len(content))
 		except IndexingException as e:
 			logger.error(f'Error decoding source ({source.reference}): {e}', exc_info=e)
 			errored_docs[db_id] = IndexingError(
@@ -333,7 +352,17 @@ def _process_sources(
 
 	source_proc_results = _increase_access_for_existing_sources(vectordb, existing_sources)
 
+	logger.debug(
+		'Fetching file contents for %d source(s) — this blocks on network I/O to Nextcloud',
+		len(to_embed_sources),
+	)
+	t0 = perf_counter_ns()
 	populated_to_embed_sources, errored_sources = asyncio.run(__fetch_files_content(to_embed_sources))
+	elapsed_ms = (perf_counter_ns() - t0) / 1e6
+	logger.debug(
+		'File content fetch complete in %.2f ms: %d fetched, %d errored',
+		elapsed_ms, len(populated_to_embed_sources), len(errored_sources),
+	)
 	source_proc_results.update(errored_sources)  # pyright: ignore[reportAttributeAccessIssue]
 
 	if len(populated_to_embed_sources) == 0:
@@ -359,7 +388,13 @@ def _process_sources(
 		'source_ids': [indoc.source_id for indoc in indocuments.values()]
 	})
 
+	t0 = perf_counter_ns()
 	doc_add_results = vectordb.add_indocuments(indocuments)
+	elapsed_ms = (perf_counter_ns() - t0) / 1e6
+	logger.info(
+		'vectordb.add_indocuments completed in %.2f ms for %d document(s)',
+		elapsed_ms, len(indocuments),
+	)
 	source_proc_results.update(doc_add_results)  # pyright: ignore[reportAttributeAccessIssue]
 	logger.debug('Added documents to vectordb')
 
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 92d2719..32673c8 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -82,11 +82,22 @@ def files_indexing_thread(app_config: TConfig, app_enabled: Event) -> None:
 		return
 
 	def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) -> Mapping[int, IndexingError | None]:
+		source_refs = [s.reference for s in source_items.values()]
+		LOGGER.info('Starting embed_sources subprocess for %d source(s): %s', len(source_items), source_refs)
 		try:
-			return exec_in_proc(
+			result = exec_in_proc(
 				target=embed_sources,
 				args=(vectordb_loader, app_config, source_items),
 			)
+			errors = {k: v for k, v in result.items() if isinstance(v, IndexingError)}
+			LOGGER.info(
+				'embed_sources subprocess finished for %d source(s): %d succeeded, %d errored',
+				len(source_items),
+				len(result) - len(errors),
+				len(errors),
+				extra={'errors': errors} if errors else {},
+			)
+			return result
 		except Exception as e:
 			err_name = {DbException: "DB", EmbeddingException: "Embedding"}.get(type(e), "Unknown")
 			source_ids = (s.reference for s in source_items.values())
@@ -94,6 +105,10 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 				error=f'{err_name} Error occurred, the sources {source_ids} will be retried: {e}',
 				retryable=True,
 			)
+			LOGGER.error(
+				'embed_sources subprocess raised a %s error for sources %s, marking all as retryable',
+				err_name, source_refs, exc_info=e,
+			)
 			return dict.fromkeys(source_items, err)
 
 
@@ -146,13 +161,21 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 				max_workers=PARALLEL_FILE_PARSING_COUNT,
 				thread_name_prefix='IndexingPool',
 			) as executor:
+				LOGGER.info(
+					'Dispatching %d file chunk(s) and %d provider chunk(s) to %d IndexingPool worker(s)',
+					len(file_chunks), len(provider_chunks), PARALLEL_FILE_PARSING_COUNT,
+				)
 				file_futures = [executor.submit(_load_sources, chunk) for chunk in file_chunks]
 				provider_futures = [executor.submit(_load_sources, chunk) for chunk in provider_chunks]
 
-				for future in file_futures:
+				for i, future in enumerate(file_futures):
+					LOGGER.debug('Waiting for file chunk %d/%d future to complete', i + 1, len(file_futures))
 					files_result.update(future.result())
-				for future in provider_futures:
+					LOGGER.debug('File chunk %d/%d future completed', i + 1, len(file_futures))
+				for i, future in enumerate(provider_futures):
+					LOGGER.debug('Waiting for provider chunk %d/%d future to complete', i + 1, len(provider_futures))
 					providers_result.update(future.result())
+					LOGGER.debug('Provider chunk %d/%d future completed', i + 1, len(provider_futures))
 
 			if (
 				any(isinstance(res, IndexingError) for res in files_result.values())
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index c7e588b..d28fc58 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -90,8 +90,20 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 		kwargs=kwargs,
 		daemon=daemon,
 	)
+	target_name = getattr(target, '__name__', str(target))
+	_logger.debug('Starting subprocess for %s', target_name)
+	start = perf_counter_ns()
 	p.start()
+	_logger.debug('Subprocess PID %d started for %s, waiting for it to finish (no timeout)', p.pid, target_name)
 	p.join()
+	elapsed_ms = (perf_counter_ns() - start) / 1e6
+	_logger.debug('Subprocess PID %d for %s finished in %.2f ms (exit code: %s)', p.pid, target_name, elapsed_ms, p.exitcode)
+	if p.exitcode != 0:
+		_logger.warning(
+			'Subprocess PID %d for %s exited with non-zero exit code %d after %.2f ms'
+			' — possible OOM kill or unhandled signal',
+			p.pid, target_name, p.exitcode, elapsed_ms,
+		)
 
 	result = pconn.recv()
 	if result['error'] is not None:
diff --git a/context_chat_backend/vectordb/pgvector.py b/context_chat_backend/vectordb/pgvector.py
index 86f636b..33dfb03 100644
--- a/context_chat_backend/vectordb/pgvector.py
+++ b/context_chat_backend/vectordb/pgvector.py
@@ -6,6 +6,7 @@
 import os
 from collections.abc import Mapping
 from datetime import datetime
+from time import perf_counter_ns
 
 import psycopg
 import sqlalchemy as sa
@@ -152,8 +153,25 @@ def add_indocuments(self, indocuments: Mapping[int, InDocument]) -> Mapping[int,
 					# so we chunk the documents into (5 values * 10k) chunks
 					# change the chunk size when there are more inserted values per document
 					chunk_ids = []
-					for i in range(0, len(indoc.documents), batch_size):
+					total_chunks = len(indoc.documents)
+					num_batches = max(1, -(-total_chunks // batch_size))  # ceiling division
+					logger.debug(
+						'Embedding source %s: %d chunk(s) in %d batch(es) — blocks on embedding model',
+						indoc.source_id, total_chunks, num_batches,
+					)
+					for i in range(0, total_chunks, batch_size):
+						batch_num = i // batch_size + 1
+						logger.debug(
+							'Sending embedding batch %d/%d (%d chunk(s)) for source %s',
+							batch_num, num_batches, len(indoc.documents[i:i+batch_size]), indoc.source_id,
+						)
+						t0 = perf_counter_ns()
 						chunk_ids.extend(self.client.add_documents(indoc.documents[i:i+batch_size]))
+						elapsed_ms = (perf_counter_ns() - t0) / 1e6
+						logger.debug(
+							'Embedding batch %d/%d for source %s completed in %.2f ms',
+							batch_num, num_batches, indoc.source_id, elapsed_ms,
+						)
 
 					doc = DocumentsStore(
 						source_id=indoc.source_id,

From a4a88dae5f231732e448cefb9c0ea3e0da03aee5 Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 1 Apr 2026 13:18:24 +0200
Subject: [PATCH 31/96] tests: Set wait time back to 90

---
 .github/workflows/integration-test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index b937a14..8e6ca7d 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -218,8 +218,8 @@ jobs:
       - name: Periodically check context_chat stats for 15 minutes to allow the backend to index the files
         run: |
           success=0
-          echo "::group::Checking stats periodically for 30 minutes to allow the backend to index the files"
-          for i in {1..180}; do
+          echo "::group::Checking stats periodically for 15 minutes to allow the backend to index the files"
+          for i in {1..90}; do
             echo "Checking stats, attempt $i..."
 
             stats_err=$(mktemp)

From 0c52747375355e6e0338fd68599338f8bd644dc4 Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 1 Apr 2026 14:04:57 +0200
Subject: [PATCH 32/96] fix: Reduce worker count on github actions

to prevent oom
---
 context_chat_backend/task_fetcher.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 32673c8..91d1991 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -59,6 +59,10 @@
 MIN_FILES_PER_CPU = 4
 # divides the batch into these many chunks
 PARALLEL_FILE_PARSING_COUNT = max(1, (os.cpu_count() or 2) - 1)  # todo: config?
+if os.getenv('GITHUB_ACTIONS'):
+	# Keep CI memory usage predictable and avoid OOM-killed workers.
+	PARALLEL_FILE_PARSING_COUNT = max(1, min(PARALLEL_FILE_PARSING_COUNT, 2))
+LOGGER.info(f'Using {PARALLEL_FILE_PARSING_COUNT} parallel file parsing workers')
 ACTIONS_BATCH_SIZE = 512  # todo: config?
 POLLING_COOLDOWN = 30
 TRIGGER = Event()

From e676c329ca5a0c147ef0bfadbf5c372f4e25dd99 Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 1 Apr 2026 14:14:58 +0200
Subject: [PATCH 33/96] fix(exec_in_proc): Raise RuntimeError if exitcode is
 non-zero

---
 context_chat_backend/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index d28fc58..024e71c 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -104,6 +104,7 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 			' — possible OOM kill or unhandled signal',
 			p.pid, target_name, p.exitcode, elapsed_ms,
 		)
+		raise RuntimeError(f'Subprocess PID {p.pid} for {target_name} exited with non-zero exit code {p.exitcode}')
 
 	result = pconn.recv()
 	if result['error'] is not None:

From b027ff3234a50cf8eb5a1447bafbef8f147212b5 Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 1 Apr 2026 14:46:42 +0200
Subject: [PATCH 34/96] fix(indexing): Reduce memory pressure on gh actions

---
 context_chat_backend/task_fetcher.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 91d1991..2a7e84f 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -56,7 +56,11 @@
 THREAD_STOP_EVENT = Event()
 LOGGER = logging.getLogger('ccb.task_fetcher')
 FILES_INDEXING_BATCH_SIZE = 16  # theoretical max RAM usage: 16 * 100 MiB, todo: config?
+if os.getenv('GITHUB_ACTIONS'):
+	FILES_INDEXING_BATCH_SIZE = 4
 MIN_FILES_PER_CPU = 4
+if os.getenv('GITHUB_ACTIONS'):
+	MIN_FILES_PER_CPU = 2
 # divides the batch into these many chunks
 PARALLEL_FILE_PARSING_COUNT = max(1, (os.cpu_count() or 2) - 1)  # todo: config?
 if os.getenv('GITHUB_ACTIONS'):

From 19b773fac97d3cf76fb581224df76d63e3c9a34d Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 1 Apr 2026 15:19:06 +0200
Subject: [PATCH 35/96] fix(indexing): Fallback to batch_size=1 if
 embed_sources is killed

and do not retry afterward if one these single item batches get killed
---
 context_chat_backend/task_fetcher.py | 51 +++++++++++++++++++++++++---
 context_chat_backend/utils.py        | 13 ++++++-
 2 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 2a7e84f..edeabc1 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -39,7 +39,7 @@
 	SourceItem,
 	TConfig,
 )
-from .utils import exec_in_proc, get_app_role
+from .utils import SubprocessKilledError, exec_in_proc, get_app_role
 from .vectordb.base import BaseVectorDB
 from .vectordb.service import (
 	decl_update_access,
@@ -89,6 +89,29 @@ def files_indexing_thread(app_config: TConfig, app_enabled: Event) -> None:
 		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
 		return
 
+	def _embed_one(db_id: int, item: SourceItem | ReceivedFileItem) -> tuple[int, IndexingError | None]:
+		"""Run embed_sources for a single item in its own subprocess. Returns (db_id, error_or_None)."""
+		try:
+			result = exec_in_proc(
+				target=embed_sources,
+				args=(vectordb_loader, app_config, {db_id: item}),
+			)
+			return db_id, result.get(db_id)
+		except SubprocessKilledError as e:
+			LOGGER.error(
+				'embed_sources subprocess killed for individual source %s — marking as non-retryable'
+				' to prevent infinite OOM retry loop',
+				item.reference, exc_info=e,
+			)
+			return db_id, IndexingError(error=f'Subprocess killed (OOM?): {e}', retryable=False)
+		except Exception as e:
+			err_name = {DbException: 'DB', EmbeddingException: 'Embedding'}.get(type(e), 'Unknown')
+			LOGGER.error(
+				'embed_sources raised a %s error for individual source %s, marking as retryable',
+				err_name, item.reference, exc_info=e,
+			)
+			return db_id, IndexingError(error=str(e), retryable=True)
+
 	def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) -> Mapping[int, IndexingError | None]:
 		source_refs = [s.reference for s in source_items.values()]
 		LOGGER.info('Starting embed_sources subprocess for %d source(s): %s', len(source_items), source_refs)
@@ -106,11 +129,31 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 				extra={'errors': errors} if errors else {},
 			)
 			return result
+		except SubprocessKilledError as e:
+			LOGGER.error(
+				'embed_sources subprocess was killed (likely OOM) for %d source(s): %s',
+				len(source_items), source_refs, exc_info=e,
+			)
+			if len(source_items) == 1:
+				# Single-item subprocess was killed — mark non-retryable to break infinite OOM loop.
+				LOGGER.error(
+					'Single-item subprocess killed for %s — marking as non-retryable',
+					source_refs,
+				)
+				return {db_id: IndexingError(error=f'Subprocess killed (OOM?): {e}', retryable=False)
+					for db_id in source_items}
+
+			# Multi-item batch: fall back to one subprocess per source to pinpoint the problematic file.
+			LOGGER.warning(
+				'Falling back to individual processing for %d sources to isolate any OOM-causing file(s)',
+				len(source_items),
+			)
+			return dict(_embed_one(db_id, item) for db_id, item in source_items.items())
+
 		except Exception as e:
-			err_name = {DbException: "DB", EmbeddingException: "Embedding"}.get(type(e), "Unknown")
-			source_ids = (s.reference for s in source_items.values())
+			err_name = {DbException: 'DB', EmbeddingException: 'Embedding'}.get(type(e), 'Unknown')
 			err = IndexingError(
-				error=f'{err_name} Error occurred, the sources {source_ids} will be retried: {e}',
+				error=f'{err_name} Error: {e}',
 				retryable=True,
 			)
 			LOGGER.error(
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index 024e71c..4b9fad5 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -69,6 +69,17 @@ def JSONResponse(
 	return FastAPIJSONResponse(content, status_code, **kwargs)
 
 
+class SubprocessKilledError(RuntimeError):
+	"""Raised when a subprocess exits with a non-zero exit code (likely OOM kill or unhandled signal)."""
+
+	def __init__(self, pid: int, target_name: str, exitcode: int):
+		super().__init__(
+			f'Subprocess PID {pid} for {target_name} exited with non-zero exit code {exitcode}'
+			' — possible OOM kill or unhandled signal'
+		)
+		self.exitcode = exitcode
+
+
 def exception_wrap(fun: Callable | None, *args, resconn: Connection, **kwargs):
 	try:
 		if fun is None:
@@ -104,7 +115,7 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 			' — possible OOM kill or unhandled signal',
 			p.pid, target_name, p.exitcode, elapsed_ms,
 		)
-		raise RuntimeError(f'Subprocess PID {p.pid} for {target_name} exited with non-zero exit code {p.exitcode}')
+		raise SubprocessKilledError(p.pid, target_name, p.exitcode)
 
 	result = pconn.recv()
 	if result['error'] is not None:

From bde0bc54e2dde254b37fe426418abbca295a27a0 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 2 Apr 2026 14:18:47 +0530
Subject: [PATCH 36/96] fix: log stdout and stderr from subprocesses

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/utils.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index 4b9fad5..068ffa8 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -2,9 +2,11 @@
 # SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
+import io
 import logging
 import multiprocessing as mp
 import os
+import sys
 import traceback
 from collections.abc import Callable
 from functools import partial, wraps
@@ -80,7 +82,12 @@ def __init__(self, pid: int, target_name: str, exitcode: int):
 		self.exitcode = exitcode
 
 
-def exception_wrap(fun: Callable | None, *args, resconn: Connection, **kwargs):
+def exception_wrap(fun: Callable | None, *args, resconn: Connection, stdconn: Connection, **kwargs):
+	stdout_capture = io.StringIO()
+	stderr_capture = io.StringIO()
+	sys.stdout = stdout_capture
+	sys.stderr = stderr_capture
+
 	try:
 		if fun is None:
 			return resconn.send({ 'value': None, 'error': None })
@@ -88,11 +95,15 @@ def exception_wrap(fun: Callable | None, *args, resconn: Connection, **kwargs):
 	except Exception as e:
 		tb = traceback.format_exc()
 		resconn.send({ 'value': None, 'error': e, 'traceback': tb })
+	finally:
+		stdconn.send({'stdout': stdout_capture.getvalue(), 'stderr': stderr_capture.getvalue()})
 
 
 def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None):  # noqa: B006
 	pconn, cconn = mp.Pipe()
+	std_pconn, std_cconn = mp.Pipe()
 	kwargs['resconn'] = cconn
+	kwargs['stdconn'] = std_cconn
 	p = mp.Process(
 		group=group,
 		target=partial(exception_wrap, target),
@@ -108,20 +119,28 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 	_logger.debug('Subprocess PID %d started for %s, waiting for it to finish (no timeout)', p.pid, target_name)
 	p.join()
 	elapsed_ms = (perf_counter_ns() - start) / 1e6
-	_logger.debug('Subprocess PID %d for %s finished in %.2f ms (exit code: %s)', p.pid, target_name, elapsed_ms, p.exitcode)
+	_logger.debug(
+		'Subprocess PID %d for %s finished in %.2f ms (exit code: %s)',
+		p.pid, target_name, elapsed_ms, p.exitcode,
+	)
 	if p.exitcode != 0:
 		_logger.warning(
 			'Subprocess PID %d for %s exited with non-zero exit code %d after %.2f ms'
 			' — possible OOM kill or unhandled signal',
 			p.pid, target_name, p.exitcode, elapsed_ms,
 		)
-		raise SubprocessKilledError(p.pid, target_name, p.exitcode)
+		raise SubprocessKilledError(p.pid or 0, target_name, p.exitcode or -1)
 
 	result = pconn.recv()
 	if result['error'] is not None:
 		_logger.error('original traceback: %s', result['traceback'])
 		raise result['error']
 
+	stdobj = std_pconn.recv()
+	_logger.info(f'std info for {target_name}', extra={
+		'stdout': stdobj['stdout'],
+		'stderr': stdobj['stderr'],
+	})
 	return result['value']
 
 

From 4de591f79b29746c220cd0a268b9254a18fc424c Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 2 Apr 2026 14:57:16 +0530
Subject: [PATCH 37/96] fix: don't raise before std* is captured

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/utils.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index 068ffa8..3122a41 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -123,6 +123,17 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 		'Subprocess PID %d for %s finished in %.2f ms (exit code: %s)',
 		p.pid, target_name, elapsed_ms, p.exitcode,
 	)
+	stdobj = std_pconn.recv()
+	_logger.info(f'std info for {target_name}', extra={
+		'stdout': stdobj['stdout'],
+		'stderr': stdobj['stderr'],
+	})
+
+	result = pconn.recv()
+	if result['error'] is not None:
+		_logger.error('original traceback: %s', result['traceback'])
+		raise result['error']
+
 	if p.exitcode != 0:
 		_logger.warning(
 			'Subprocess PID %d for %s exited with non-zero exit code %d after %.2f ms'
@@ -131,16 +142,6 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 		)
 		raise SubprocessKilledError(p.pid or 0, target_name, p.exitcode or -1)
 
-	result = pconn.recv()
-	if result['error'] is not None:
-		_logger.error('original traceback: %s', result['traceback'])
-		raise result['error']
-
-	stdobj = std_pconn.recv()
-	_logger.info(f'std info for {target_name}', extra={
-		'stdout': stdobj['stdout'],
-		'stderr': stdobj['stderr'],
-	})
 	return result['value']
 
 

From 4deda845f40dd3e3419253ec647d156a4c76e218 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 2 Apr 2026 15:01:10 +0530
Subject: [PATCH 38/96] feat: log cpu count and memory info of the system

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 main.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/main.py b/main.py
index c4ffa1f..8d838d8 100755
--- a/main.py
+++ b/main.py
@@ -4,8 +4,9 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
 import logging
-from os import getenv
+from os import cpu_count, getenv
 
+import psutil
 import uvicorn
 from nc_py_api.ex_app import run_app
 
@@ -48,6 +49,7 @@ def _setup_log_levels(debug: bool):
 	app_config: TConfig = app.extra['CONFIG']
 	_setup_log_levels(app_config.debug)
 
+	print(f'CPU count: {cpu_count()}, Memory: {psutil.virtual_memory()}')
 	print('App config:\n' + redact_config(app_config).model_dump_json(indent=2), flush=True)
 
 	uv_log_config = uvicorn.config.LOGGING_CONFIG  # pyright: ignore[reportAttributeAccessIssue]

From ad0eac70712600964f45e2401bed411945e148a7 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 2 Apr 2026 17:41:39 +0530
Subject: [PATCH 39/96] fix: catch BaseException in subprocess

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index 3122a41..02545d9 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -92,7 +92,7 @@ def exception_wrap(fun: Callable | None, *args, resconn: Connection, stdconn: Co
 		if fun is None:
 			return resconn.send({ 'value': None, 'error': None })
 		resconn.send({ 'value': fun(*args, **kwargs), 'error': None })
-	except Exception as e:
+	except BaseException as e:
 		tb = traceback.format_exc()
 		resconn.send({ 'value': None, 'error': e, 'traceback': tb })
 	finally:

From 36bcfb721364912bcca24c37bc30e357cebfe275 Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Thu, 2 Apr 2026 14:19:49 +0200
Subject: [PATCH 40/96] fix(utils): Improve exec_in_proc to handle more failure
 modes

---
 context_chat_backend/utils.py | 170 +++++++++++++++++++++++++++++-----
 1 file changed, 149 insertions(+), 21 deletions(-)

diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index 02545d9..e994a3f 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -9,6 +9,7 @@
 import sys
 import traceback
 from collections.abc import Callable
+from contextlib import suppress
 from functools import partial, wraps
 from multiprocessing.connection import Connection
 from time import perf_counter_ns
@@ -72,31 +73,95 @@ def JSONResponse(
 
 
 class SubprocessKilledError(RuntimeError):
-	"""Raised when a subprocess exits with a non-zero exit code (likely OOM kill or unhandled signal)."""
+	"""Raised when a subprocess is terminated by a signal (for example SIGKILL)."""
 
 	def __init__(self, pid: int, target_name: str, exitcode: int):
 		super().__init__(
-			f'Subprocess PID {pid} for {target_name} exited with non-zero exit code {exitcode}'
-			' — possible OOM kill or unhandled signal'
+			f'Subprocess PID {pid} for {target_name} exited with signal {abs(exitcode)} '
+			f'(raw exit code: {exitcode})'
 		)
 		self.exitcode = exitcode
 
 
+class SubprocessExecutionError(RuntimeError):
+	"""Raised when a subprocess exits non-zero without a recoverable Python exception payload."""
+
+	def __init__(self, pid: int, target_name: str, exitcode: int, details: str = ''):
+		msg = f'Subprocess PID {pid} for {target_name} exited with non-zero exit code {exitcode}'
+		if details:
+			msg = f'{msg}: {details}'
+		super().__init__(msg)
+		self.exitcode = exitcode
+
+
+_MAX_STD_CAPTURE_CHARS = 64 * 1024
+
+
+def _truncate_capture(text: str) -> tuple[str, bool]:
+	if len(text) <= _MAX_STD_CAPTURE_CHARS:
+		return text, False
+
+	head = _MAX_STD_CAPTURE_CHARS // 2
+	tail = _MAX_STD_CAPTURE_CHARS - head
+	omitted = len(text) - _MAX_STD_CAPTURE_CHARS
+	truncated = (
+		f'[truncated {omitted} chars]\n'
+		f'{text[:head]}\n'
+		'[...snip...]\n'
+		f'{text[-tail:]}'
+	)
+	return truncated, True
+
+
 def exception_wrap(fun: Callable | None, *args, resconn: Connection, stdconn: Connection, **kwargs):
 	stdout_capture = io.StringIO()
 	stderr_capture = io.StringIO()
+	orig_stdout = sys.stdout
+	orig_stderr = sys.stderr
 	sys.stdout = stdout_capture
 	sys.stderr = stderr_capture
 
 	try:
 		if fun is None:
-			return resconn.send({ 'value': None, 'error': None })
-		resconn.send({ 'value': fun(*args, **kwargs), 'error': None })
+			resconn.send({ 'value': None, 'error': None })
+		else:
+			resconn.send({ 'value': fun(*args, **kwargs), 'error': None })
 	except BaseException as e:
 		tb = traceback.format_exc()
-		resconn.send({ 'value': None, 'error': e, 'traceback': tb })
+		payload = {
+			'value': None,
+			'error': e,
+			'traceback': tb,
+			'error_type': type(e).__name__,
+			'error_module': type(e).__module__,
+			'error_message': str(e),
+		}
+		try:
+			resconn.send(payload)
+		except Exception as send_err:
+			# Fallback for unpicklable exceptions.
+			with suppress(Exception):
+				resconn.send({
+					'value': None,
+					'error': None,
+					'traceback': tb,
+					'error_type': type(e).__name__,
+					'error_module': type(e).__module__,
+					'error_message': str(e),
+					'send_error': str(send_err),
+				})
 	finally:
-		stdconn.send({'stdout': stdout_capture.getvalue(), 'stderr': stderr_capture.getvalue()})
+		sys.stdout = orig_stdout
+		sys.stderr = orig_stderr
+		stdout_text, stdout_truncated = _truncate_capture(stdout_capture.getvalue())
+		stderr_text, stderr_truncated = _truncate_capture(stderr_capture.getvalue())
+		with suppress(Exception):
+			stdconn.send({
+				'stdout': stdout_text,
+				'stderr': stderr_text,
+				'stdout_truncated': stdout_truncated,
+				'stderr_truncated': stderr_truncated,
+			})
 
 
 def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None):  # noqa: B006
@@ -117,30 +182,93 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 	start = perf_counter_ns()
 	p.start()
 	_logger.debug('Subprocess PID %d started for %s, waiting for it to finish (no timeout)', p.pid, target_name)
+
+	result = None
+	stdobj = {
+		'stdout': '',
+		'stderr': '',
+		'stdout_truncated': False,
+		'stderr_truncated': False,
+	}
+	got_result = False
+	got_std = False
+
+	# Drain result/std pipes while child is still alive to avoid deadlock on full pipe buffers.
+	while p.is_alive() and (not got_result or not got_std):
+		if not got_result and pconn.poll(0.1):
+			with suppress(EOFError, OSError, BrokenPipeError):
+				result = pconn.recv()
+				got_result = True
+		if not got_std and std_pconn.poll():
+			with suppress(EOFError, OSError, BrokenPipeError):
+				stdobj = std_pconn.recv()
+				got_std = True
+
 	p.join()
 	elapsed_ms = (perf_counter_ns() - start) / 1e6
 	_logger.debug(
 		'Subprocess PID %d for %s finished in %.2f ms (exit code: %s)',
 		p.pid, target_name, elapsed_ms, p.exitcode,
 	)
-	stdobj = std_pconn.recv()
-	_logger.info(f'std info for {target_name}', extra={
-		'stdout': stdobj['stdout'],
-		'stderr': stdobj['stderr'],
-	})
-
-	result = pconn.recv()
-	if result['error'] is not None:
-		_logger.error('original traceback: %s', result['traceback'])
+
+	if not got_std:
+		with suppress(EOFError, OSError, BrokenPipeError):
+			if std_pconn.poll():
+				stdobj = std_pconn.recv()
+				got_std = True
+	if stdobj['stdout'] or stdobj['stderr']:
+		extra = {
+			'stdout': stdobj['stdout'],
+			'stderr': stdobj['stderr'],
+		}
+		if stdobj.get('stdout_truncated') or stdobj.get('stderr_truncated'):
+			extra['stdio_truncated'] = {
+				'stdout': bool(stdobj.get('stdout_truncated')),
+				'stderr': bool(stdobj.get('stderr_truncated')),
+			}
+		_logger.info('std info for %s', target_name, extra=extra)
+
+	if not got_result:
+		with suppress(EOFError, OSError, BrokenPipeError):
+			if pconn.poll():
+				result = pconn.recv()
+				got_result = True
+
+	if result is not None and result.get('error') is not None:
+		_logger.error('original traceback: %s', result.get('traceback', ''))
 		raise result['error']
 
-	if p.exitcode != 0:
+	if result is not None and result.get('error_type'):
+		details = (
+			f"{result.get('error_module', '')}.{result.get('error_type', '')}: "
+			f"{result.get('error_message', '')}"
+		)
+		if result.get('traceback'):
+			_logger.error('remote traceback: %s', result['traceback'])
+		raise SubprocessExecutionError(p.pid or 0, target_name, p.exitcode or 1, details)
+
+	if p.exitcode and p.exitcode < 0:
 		_logger.warning(
-			'Subprocess PID %d for %s exited with non-zero exit code %d after %.2f ms'
-			' — possible OOM kill or unhandled signal',
-			p.pid, target_name, p.exitcode, elapsed_ms,
+			'Subprocess PID %d for %s exited due to signal %d after %.2f ms',
+			p.pid, target_name, abs(p.exitcode), elapsed_ms,
+		)
+		raise SubprocessKilledError(p.pid or 0, target_name, p.exitcode)
+
+	if p.exitcode not in (None, 0):
+		raise SubprocessExecutionError(
+			p.pid or 0,
+			target_name,
+			p.exitcode,
+			'No structured exception payload received from child process',
+		)
+
+	if result is None:
+		raise SubprocessExecutionError(
+			p.pid or 0,
+			target_name,
+			0,
+			'Subprocess exited successfully but returned no result payload',
 		)
-		raise SubprocessKilledError(p.pid or 0, target_name, p.exitcode or -1)
 
 	return result['value']
 

From 47eaf72daec83faec6d9a4a4ce9e23b231cfba31 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 11:08:34 +0530
Subject: [PATCH 41/96] one more stab at a fix

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/utils.py | 37 ++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index e994a3f..b4e93c7 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -2,6 +2,8 @@
 # SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
+import atexit
+import faulthandler
 import io
 import logging
 import multiprocessing as mp
@@ -114,6 +116,28 @@ def _truncate_capture(text: str) -> tuple[str, bool]:
 
 
 def exception_wrap(fun: Callable | None, *args, resconn: Connection, stdconn: Connection, **kwargs):
+	# --- diagnostic probes: write directly to the real stderr FD so they survive
+	# Python's stdout/stderr redirection below and even os._exit() won't hide them
+	# from the parent process's stderr stream.
+	_diag_fd = os.dup(2)  # dup before we capture sys.stderr
+
+	def _raw_diag(msg: str) -> None:
+		with suppress(Exception):
+			os.write(_diag_fd, (msg + '\n').encode())
+
+	# Enable faulthandler on the real FD so crash tracebacks (SIGSEGV etc.) appear.
+	with suppress(Exception):
+		faulthandler.enable(file=os.fdopen(os.dup(_diag_fd), 'w', closefd=True), all_threads=True)
+
+	# Atexit probe: if this message NEVER appears, it means os._exit() (C-level)
+	# was called with Python's cleanup phase entirely skipped.
+	_fun_name = getattr(fun, '__name__', str(fun))
+	atexit.register(
+		_raw_diag,
+		f'[exception_wrap/atexit] pid={os.getpid()} target={_fun_name}'
+		': Python atexit reached (normal Python exit)',
+	)
+
 	stdout_capture = io.StringIO()
 	stderr_capture = io.StringIO()
 	orig_stdout = sys.stdout
@@ -124,10 +148,18 @@ def exception_wrap(fun: Callable | None, *args, resconn: Connection, stdconn: Co
 	try:
 		if fun is None:
 			resconn.send({ 'value': None, 'error': None })
+			_raw_diag(f'[exception_wrap/probe] pid={os.getpid()} target={_fun_name}: result sent (fun=None)')
 		else:
-			resconn.send({ 'value': fun(*args, **kwargs), 'error': None })
+			result_value = fun(*args, **kwargs)
+			_raw_diag(f'[exception_wrap/probe] pid={os.getpid()} target={_fun_name}: fun() returned, sending result')
+			resconn.send({ 'value': result_value, 'error': None })
+			_raw_diag(f'[exception_wrap/probe] pid={os.getpid()} target={_fun_name}: result pipe send complete')
 	except BaseException as e:
 		tb = traceback.format_exc()
+		_raw_diag(
+			f'[exception_wrap/probe] pid={os.getpid()} target={_fun_name}'
+			f': caught {type(e).__name__}: {e}'
+		)
 		payload = {
 			'value': None,
 			'error': e,
@@ -162,6 +194,9 @@ def exception_wrap(fun: Callable | None, *args, resconn: Connection, stdconn: Co
 				'stdout_truncated': stdout_truncated,
 				'stderr_truncated': stderr_truncated,
 			})
+		_raw_diag(f'[exception_wrap/probe] pid={os.getpid()} target={_fun_name}: finally block complete')
+		with suppress(Exception):
+			os.close(_diag_fd)
 
 
 def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None):  # noqa: B006

From 309ab2bf19a54fb89c01f61550b07a9daf9d45d1 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 11:43:38 +0530
Subject: [PATCH 42/96] do not throw away the valid result even with exitcode 1

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/utils.py | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index b4e93c7..fe4ee96 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -282,6 +282,23 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 			_logger.error('remote traceback: %s', result['traceback'])
 		raise SubprocessExecutionError(p.pid or 0, target_name, p.exitcode or 1, details)
 
+	# If we received a valid result payload, return it even if the exit
+	# code is non-zero.  The non-zero code typically comes from
+	# multiprocessing/C-extension cleanup (e.g. util._exit_function or
+	# a native atexit handler) that runs *after* exception_wrap has
+	# already sent the result over the pipe.
+	if result is not None and 'value' in result:
+		if p.exitcode not in (None, 0):
+			_logger.warning(
+				'Subprocess PID %d for %s exited with code %s after %.2f ms'
+				' but returned a valid result — accepting the result.'
+				' The non-zero exit likely originates from process'
+				' cleanup (multiprocessing finalizers, C-extension'
+				' atexit, etc.).',
+				p.pid, target_name, p.exitcode, elapsed_ms,
+			)
+		return result['value']
+
 	if p.exitcode and p.exitcode < 0:
 		_logger.warning(
 			'Subprocess PID %d for %s exited due to signal %d after %.2f ms',
@@ -297,15 +314,12 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 			'No structured exception payload received from child process',
 		)
 
-	if result is None:
-		raise SubprocessExecutionError(
-			p.pid or 0,
-			target_name,
-			0,
-			'Subprocess exited successfully but returned no result payload',
-		)
-
-	return result['value']
+	raise SubprocessExecutionError(
+		p.pid or 0,
+		target_name,
+		0,
+		'Subprocess exited successfully but returned no result payload',
+	)
 
 
 def timed(func: Callable):

From e1763acdcdfa590cee3c74f6ba1acadf1d9c6f9c Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 12:19:09 +0530
Subject: [PATCH 43/96] fix: use forkserver as process start method

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/controller.py |  4 ----
 main.py                            | 13 +++++++++++++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 49d1d73..3a8e15a 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -16,7 +16,6 @@
 # ruff: noqa: E402
 
 import logging
-import multiprocessing as mp
 import os
 import tempfile
 import threading
@@ -122,9 +121,6 @@ async def lifespan(app: FastAPI):
 index_lock = threading.Lock()
 _indexing = {}
 
-# limit the number of concurrent document parsing
-doc_parse_semaphore = mp.Semaphore(app_config.doc_parser_worker_limit)
-
 
 # middlewares
 
diff --git a/main.py b/main.py
index 8d838d8..4e88ee9 100755
--- a/main.py
+++ b/main.py
@@ -3,6 +3,7 @@
 # SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
+
 import logging
 from os import cpu_count, getenv
 
@@ -44,6 +45,18 @@ def _setup_log_levels(debug: bool):
 
 
 if __name__ == '__main__':
+	import multiprocessing as mp
+
+	# do forks from a clean process that doesn't have any threads or locks
+	mp.set_start_method('forkserver')
+	mp.set_forkserver_preload([
+		'langchain',
+		'sqlalchemy',
+		'numpy',
+		'context_chat_backend.chain.ingest.injest',
+		'context_chat_backend.vectordb.pgvector',
+	])
+
 	logging_config = get_logging_config(LOGGER_CONFIG_NAME)
 	setup_logging(logging_config)
 	app_config: TConfig = app.extra['CONFIG']

From 330165205127524780038280854dacc19f552e9c Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 13:16:49 +0530
Subject: [PATCH 44/96] fix(ci): consider eligible files as the total files
 count

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 8e6ca7d..8ec8eab 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -241,17 +241,17 @@ jobs:
               continue
             fi
 
-            # Extract total queued files
-            total_files=$(echo "$stats" | jq '.queued_documents_counts.files__default' || echo "")
+            # Extract total eligible files
+            total_eligible_files=$(echo "$stats" | jq '.eligible_files_count' || echo "")
 
             # Extract indexed documents count (files__default)
             indexed_count=$(echo "$stats" | jq '.vectordb_document_counts.files__default' || echo "")
 
-            echo "Total queued files: $total_files"
+            echo "Total eligible files: $total_eligible_files"
             echo "Indexed documents (files__default): $indexed_count"
 
-            diff=$((total_files - indexed_count))
-            threshold=$((total_files * 3 / 100))
+            diff=$((total_eligible_files - indexed_count))
+            threshold=$((total_eligible_files * 3 / 100))
 
             # Check if difference is within tolerance
             if [ $diff -le $threshold ]; then
@@ -259,7 +259,7 @@ jobs:
               success=1
               break
             else
-              progress=$((diff * 100 / total_files))
+              progress=$((diff * 100 / total_eligible_files))
               echo "Outside 3% tolerance: diff=$diff (${progress}%), threshold=$threshold"
             fi
 

From 32aa37474547c3f3e7993cf638171ef309c1e1df Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 15:13:14 +0530
Subject: [PATCH 45/96] fix: use logging config in forkserver and other fixes

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/utils.py | 12 ++++++++----
 main.py                       | 17 +++++++++--------
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index fe4ee96..5f12d0c 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -86,10 +86,10 @@ def __init__(self, pid: int, target_name: str, exitcode: int):
 
 
 class SubprocessExecutionError(RuntimeError):
-	"""Raised when a subprocess exits non-zero without a recoverable Python exception payload."""
+	"""Raised when a subprocess exits without a recoverable Python exception payload."""
 
 	def __init__(self, pid: int, target_name: str, exitcode: int, details: str = ''):
-		msg = f'Subprocess PID {pid} for {target_name} exited with non-zero exit code {exitcode}'
+		msg = f'Subprocess PID {pid} for {target_name} exited with exit code {exitcode}'
 		if details:
 			msg = f'{msg}: {details}'
 		super().__init__(msg)
@@ -199,7 +199,11 @@ def _raw_diag(msg: str) -> None:
 			os.close(_diag_fd)
 
 
-def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None):  # noqa: B006
+def exec_in_proc(group=None, target=None, name=None, args=(), kwargs=None, *, daemon=None):
+	if not kwargs:
+		kwargs = {}
+
+	# parent, child
 	pconn, cconn = mp.Pipe()
 	std_pconn, std_cconn = mp.Pipe()
 	kwargs['resconn'] = cconn
@@ -318,7 +322,7 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
 		p.pid or 0,
 		target_name,
 		0,
-		'Subprocess exited successfully but returned no result payload',
+		f'Subprocess exited successfully but returned no result payload: {result}',
 	)
 
 
diff --git a/main.py b/main.py
index 4e88ee9..c261451 100755
--- a/main.py
+++ b/main.py
@@ -47,21 +47,22 @@ def _setup_log_levels(debug: bool):
 if __name__ == '__main__':
 	import multiprocessing as mp
 
+	logging_config = get_logging_config(LOGGER_CONFIG_NAME)
+	setup_logging(logging_config)
+	app_config: TConfig = app.extra['CONFIG']
+	_setup_log_levels(app_config.debug)
+
 	# do forks from a clean process that doesn't have any threads or locks
 	mp.set_start_method('forkserver')
 	mp.set_forkserver_preload([
-		'langchain',
-		'sqlalchemy',
-		'numpy',
 		'context_chat_backend.chain.ingest.injest',
 		'context_chat_backend.vectordb.pgvector',
+		'langchain',
+		'logging',
+		'numpy',
+		'sqlalchemy',
 	])
 
-	logging_config = get_logging_config(LOGGER_CONFIG_NAME)
-	setup_logging(logging_config)
-	app_config: TConfig = app.extra['CONFIG']
-	_setup_log_levels(app_config.debug)
-
 	print(f'CPU count: {cpu_count()}, Memory: {psutil.virtual_memory()}')
 	print('App config:\n' + redact_config(app_config).model_dump_json(indent=2), flush=True)
 

From 33ee38ab24d9567f2a0152b7d55870a28ca2bbe1 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 15:23:40 +0530
Subject: [PATCH 46/96] fix: remove extra diagnostics

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .../chain/ingest/doc_loader.py                |  20 +--
 context_chat_backend/chain/ingest/injest.py   |  15 +-
 context_chat_backend/task_fetcher.py          |  60 ++-----
 context_chat_backend/utils.py                 | 146 +++++-------------
 context_chat_backend/vectordb/pgvector.py     |   2 +-
 5 files changed, 62 insertions(+), 181 deletions(-)

diff --git a/context_chat_backend/chain/ingest/doc_loader.py b/context_chat_backend/chain/ingest/doc_loader.py
index 04c611d..832c833 100644
--- a/context_chat_backend/chain/ingest/doc_loader.py
+++ b/context_chat_backend/chain/ingest/doc_loader.py
@@ -7,8 +7,6 @@
 import tempfile
 from collections.abc import Callable
 from io import BytesIO
-import logging
-from time import perf_counter_ns
 
 import docx2txt
 from epub2txt import epub2txt
@@ -21,8 +19,6 @@
 
 from ...types import IndexingException, SourceItem
 
-logger = logging.getLogger('ccb.doc_loader')
-
 
 def _temp_file_wrapper(file: BytesIO, loader: Callable, sep: str = '\n') -> str:
 	raw_bytes = file.read()
@@ -137,22 +133,10 @@ def decode_source(source: SourceItem) -> str:
 		else:
 			io_obj = source.content
 
-		loader_fn = _loader_map.get(source.type)
-		if loader_fn:
-			logger.debug(
-				'Decoding source %r with loader %s (mime: %s) — may be slow or block',
-				source.title, loader_fn.__name__, source.type,
-			)
-			t0 = perf_counter_ns()
-			result = loader_fn(io_obj)
-			elapsed_ms = (perf_counter_ns() - t0) / 1e6
-			logger.debug(
-				'Loader %s for %r finished in %.2f ms (%d chars)',
-				loader_fn.__name__, source.title, elapsed_ms, len(result),
-			)
+		if _loader_map.get(source.type):
+			result = _loader_map[source.type](io_obj)
 			return result.encode('utf-8', 'ignore').decode('utf-8', 'ignore').strip()
 
-		logger.debug('No specific loader for mime type %s, reading as plain text for %r', source.type, source.title)
 		return io_obj.read().decode('utf-8', 'ignore').strip()
 	except IndexingException:
 		raise
diff --git a/context_chat_backend/chain/ingest/injest.py b/context_chat_backend/chain/ingest/injest.py
index 7ede94a..8e32108 100644
--- a/context_chat_backend/chain/ingest/injest.py
+++ b/context_chat_backend/chain/ingest/injest.py
@@ -43,8 +43,6 @@ async def __fetch_file_content(
 	async with semaphore:
 		nc = AsyncNextcloudApp()
 		try:
-			logger.debug('Downloading file id %d for user %s', file_id, user_id)
-			t0 = perf_counter_ns()
 			# a file pointer for storing the stream in memory until it is consumed
 			fp = BytesIO()
 			await nc._session.download2fp(
@@ -54,8 +52,6 @@ async def __fetch_file_content(
 				params={ 'userId': user_id },
 			)
 			fp.seek(0)
-			elapsed_ms = (perf_counter_ns() - t0) / 1e6
-			logger.debug('Downloaded file id %d for user %s in %.2f ms (%d bytes)', file_id, user_id, elapsed_ms, fp.getbuffer().nbytes)
 			return fp
 		except niquests.exceptions.RequestException as e:
 			if e.response is None:
@@ -131,11 +127,7 @@ async def __fetch_files_content(
 		# any user id from the list should have read access to the file
 		tasks.append(asyncio.ensure_future(__fetch_file_content(semaphore, file.file_id, file.userIds[0])))
 
-	logger.debug('Gathering %d file download task(s) — this blocks until all downloads complete or fail', len(tasks))
-	t0 = perf_counter_ns()
 	results = await asyncio.gather(*tasks, return_exceptions=True)
-	elapsed_ms = (perf_counter_ns() - t0) / 1e6
-	logger.debug('All %d file download task(s) completed in %.2f ms', len(tasks), elapsed_ms)
 	for (db_id, file), result in zip(sources.items(), results, strict=True):
 		if isinstance(file, SourceItem):
 			continue
@@ -227,10 +219,7 @@ def _sources_to_indocuments(
 
 		# transform the source to have text data
 		try:
-			logger.debug(
-				'Decoding source %s (type: %s, title: %r) — may be slow for complex file types',
-				source.reference, source.type, source.title,
-			)
+			logger.debug('Decoding source %s (type: %s)', source.reference, source.type)
 			t0 = perf_counter_ns()
 			content = decode_source(source)
 			elapsed_ms = (perf_counter_ns() - t0) / 1e6
@@ -353,7 +342,7 @@ def _process_sources(
 	source_proc_results = _increase_access_for_existing_sources(vectordb, existing_sources)
 
 	logger.debug(
-		'Fetching file contents for %d source(s) — this blocks on network I/O to Nextcloud',
+		'Fetching file contents for %d source(s) from Nextcloud',
 		len(to_embed_sources),
 	)
 	t0 = perf_counter_ns()
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index edeabc1..c75cec0 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -31,7 +31,6 @@
 	ActionsQueueItems,
 	ActionType,
 	AppRole,
-	EmbeddingException,
 	FilesQueueItems,
 	IndexingError,
 	LoaderException,
@@ -89,29 +88,6 @@ def files_indexing_thread(app_config: TConfig, app_enabled: Event) -> None:
 		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
 		return
 
-	def _embed_one(db_id: int, item: SourceItem | ReceivedFileItem) -> tuple[int, IndexingError | None]:
-		"""Run embed_sources for a single item in its own subprocess. Returns (db_id, error_or_None)."""
-		try:
-			result = exec_in_proc(
-				target=embed_sources,
-				args=(vectordb_loader, app_config, {db_id: item}),
-			)
-			return db_id, result.get(db_id)
-		except SubprocessKilledError as e:
-			LOGGER.error(
-				'embed_sources subprocess killed for individual source %s — marking as non-retryable'
-				' to prevent infinite OOM retry loop',
-				item.reference, exc_info=e,
-			)
-			return db_id, IndexingError(error=f'Subprocess killed (OOM?): {e}', retryable=False)
-		except Exception as e:
-			err_name = {DbException: 'DB', EmbeddingException: 'Embedding'}.get(type(e), 'Unknown')
-			LOGGER.error(
-				'embed_sources raised a %s error for individual source %s, marking as retryable',
-				err_name, item.reference, exc_info=e,
-			)
-			return db_id, IndexingError(error=str(e), retryable=True)
-
 	def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) -> Mapping[int, IndexingError | None]:
 		source_refs = [s.reference for s in source_items.values()]
 		LOGGER.info('Starting embed_sources subprocess for %d source(s): %s', len(source_items), source_refs)
@@ -122,43 +98,39 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 			)
 			errors = {k: v for k, v in result.items() if isinstance(v, IndexingError)}
 			LOGGER.info(
-				'embed_sources subprocess finished for %d source(s): %d succeeded, %d errored',
-				len(source_items),
-				len(result) - len(errors),
-				len(errors),
-				extra={'errors': errors} if errors else {},
+				'embed_sources finished for %d source(s): %d succeeded, %d errored',
+				len(source_items), len(result) - len(errors), len(errors),
+				extra={'errors': errors},
 			)
 			return result
 		except SubprocessKilledError as e:
 			LOGGER.error(
-				'embed_sources subprocess was killed (likely OOM) for %d source(s): %s',
-				len(source_items), source_refs, exc_info=e,
+				'embed_sources subprocess was killed for %d source(s) with exitcode %s: %s',
+				len(source_items), e.exitcode, source_refs, exc_info=e,
 			)
 			if len(source_items) == 1:
-				# Single-item subprocess was killed — mark non-retryable to break infinite OOM loop.
-				LOGGER.error(
-					'Single-item subprocess killed for %s — marking as non-retryable',
-					source_refs,
+				return dict.fromkeys(
+					source_items,
+					IndexingError(error=f'Subprocess killed with exitcode {e.exitcode}: {e}', retryable=False),
 				)
-				return {db_id: IndexingError(error=f'Subprocess killed (OOM?): {e}', retryable=False)
-					for db_id in source_items}
 
-			# Multi-item batch: fall back to one subprocess per source to pinpoint the problematic file.
+			# Fall back to one-by-one to isolate the problematic file.
 			LOGGER.warning(
-				'Falling back to individual processing for %d sources to isolate any OOM-causing file(s)',
+				'Falling back to individual processing for %d sources',
 				len(source_items),
 			)
-			return dict(_embed_one(db_id, item) for db_id, item in source_items.items())
-
+			fallback: dict[int, IndexingError | None] = {}
+			for db_id, item in source_items.items():
+				fallback.update(_load_sources({db_id: item}))
+			return fallback
 		except Exception as e:
-			err_name = {DbException: 'DB', EmbeddingException: 'Embedding'}.get(type(e), 'Unknown')
 			err = IndexingError(
-				error=f'{err_name} Error: {e}',
+				error=f'{e.__class__.__name__}: {e}',
 				retryable=True,
 			)
 			LOGGER.error(
 				'embed_sources subprocess raised a %s error for sources %s, marking all as retryable',
-				err_name, source_refs, exc_info=e,
+				e.__class__.__name__, source_refs, exc_info=e,
 			)
 			return dict.fromkeys(source_items, err)
 
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index 5f12d0c..4552e32 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
-import atexit
 import faulthandler
 import io
 import logging
@@ -23,6 +22,7 @@
 
 T = TypeVar('T')
 _logger = logging.getLogger('ccb.utils')
+_MAX_STD_CAPTURE_CHARS = 64 * 1024
 
 
 def not_none(value: T | None) -> TypeGuard[T]:
@@ -77,7 +77,7 @@ def JSONResponse(
 class SubprocessKilledError(RuntimeError):
 	"""Raised when a subprocess is terminated by a signal (for example SIGKILL)."""
 
-	def __init__(self, pid: int, target_name: str, exitcode: int):
+	def __init__(self, pid: int | None, target_name: str, exitcode: int):
 		super().__init__(
 			f'Subprocess PID {pid} for {target_name} exited with signal {abs(exitcode)} '
 			f'(raw exit code: {exitcode})'
@@ -88,7 +88,7 @@ def __init__(self, pid: int, target_name: str, exitcode: int):
 class SubprocessExecutionError(RuntimeError):
 	"""Raised when a subprocess exits without a recoverable Python exception payload."""
 
-	def __init__(self, pid: int, target_name: str, exitcode: int, details: str = ''):
+	def __init__(self, pid: int | None, target_name: str, exitcode: int, details: str = ''):
 		msg = f'Subprocess PID {pid} for {target_name} exited with exit code {exitcode}'
 		if details:
 			msg = f'{msg}: {details}'
@@ -96,47 +96,29 @@ def __init__(self, pid: int, target_name: str, exitcode: int, details: str = '')
 		self.exitcode = exitcode
 
 
-_MAX_STD_CAPTURE_CHARS = 64 * 1024
-
-
-def _truncate_capture(text: str) -> tuple[str, bool]:
+def _truncate_capture(text: str) -> str:
 	if len(text) <= _MAX_STD_CAPTURE_CHARS:
-		return text, False
+		return text
 
 	head = _MAX_STD_CAPTURE_CHARS // 2
 	tail = _MAX_STD_CAPTURE_CHARS - head
 	omitted = len(text) - _MAX_STD_CAPTURE_CHARS
-	truncated = (
+	return (
 		f'[truncated {omitted} chars]\n'
 		f'{text[:head]}\n'
 		'[...snip...]\n'
 		f'{text[-tail:]}'
 	)
-	return truncated, True
 
 
 def exception_wrap(fun: Callable | None, *args, resconn: Connection, stdconn: Connection, **kwargs):
-	# --- diagnostic probes: write directly to the real stderr FD so they survive
-	# Python's stdout/stderr redirection below and even os._exit() won't hide them
-	# from the parent process's stderr stream.
-	_diag_fd = os.dup(2)  # dup before we capture sys.stderr
-
-	def _raw_diag(msg: str) -> None:
-		with suppress(Exception):
-			os.write(_diag_fd, (msg + '\n').encode())
-
-	# Enable faulthandler on the real FD so crash tracebacks (SIGSEGV etc.) appear.
+	# Preserve real stderr FD for faulthandler before we redirect sys.stderr.
+	_faulthandler_fd = os.dup(2)
 	with suppress(Exception):
-		faulthandler.enable(file=os.fdopen(os.dup(_diag_fd), 'w', closefd=True), all_threads=True)
-
-	# Atexit probe: if this message NEVER appears, it means os._exit() (C-level)
-	# was called with Python's cleanup phase entirely skipped.
-	_fun_name = getattr(fun, '__name__', str(fun))
-	atexit.register(
-		_raw_diag,
-		f'[exception_wrap/atexit] pid={os.getpid()} target={_fun_name}'
-		': Python atexit reached (normal Python exit)',
-	)
+		faulthandler.enable(
+			file=os.fdopen(_faulthandler_fd, 'w', closefd=False),
+			all_threads=True,
+		)
 
 	stdout_capture = io.StringIO()
 	stderr_capture = io.StringIO()
@@ -148,55 +130,31 @@ def _raw_diag(msg: str) -> None:
 	try:
 		if fun is None:
 			resconn.send({ 'value': None, 'error': None })
-			_raw_diag(f'[exception_wrap/probe] pid={os.getpid()} target={_fun_name}: result sent (fun=None)')
 		else:
-			result_value = fun(*args, **kwargs)
-			_raw_diag(f'[exception_wrap/probe] pid={os.getpid()} target={_fun_name}: fun() returned, sending result')
-			resconn.send({ 'value': result_value, 'error': None })
-			_raw_diag(f'[exception_wrap/probe] pid={os.getpid()} target={_fun_name}: result pipe send complete')
+			resconn.send({ 'value': fun(*args, **kwargs), 'error': None })
 	except BaseException as e:
 		tb = traceback.format_exc()
-		_raw_diag(
-			f'[exception_wrap/probe] pid={os.getpid()} target={_fun_name}'
-			f': caught {type(e).__name__}: {e}'
-		)
 		payload = {
 			'value': None,
 			'error': e,
 			'traceback': tb,
-			'error_type': type(e).__name__,
-			'error_module': type(e).__module__,
-			'error_message': str(e),
 		}
 		try:
 			resconn.send(payload)
 		except Exception as send_err:
-			# Fallback for unpicklable exceptions.
-			with suppress(Exception):
-				resconn.send({
-					'value': None,
-					'error': None,
-					'traceback': tb,
-					'error_type': type(e).__name__,
-					'error_module': type(e).__module__,
-					'error_message': str(e),
-					'send_error': str(send_err),
-				})
+			stderr_capture.write(f'Original error: {e}, pipe send error: {send_err}')
 	finally:
 		sys.stdout = orig_stdout
 		sys.stderr = orig_stderr
-		stdout_text, stdout_truncated = _truncate_capture(stdout_capture.getvalue())
-		stderr_text, stderr_truncated = _truncate_capture(stderr_capture.getvalue())
+		stdout_text = _truncate_capture(stdout_capture.getvalue())
+		stderr_text = _truncate_capture(stderr_capture.getvalue())
 		with suppress(Exception):
 			stdconn.send({
 				'stdout': stdout_text,
 				'stderr': stderr_text,
-				'stdout_truncated': stdout_truncated,
-				'stderr_truncated': stderr_truncated,
 			})
-		_raw_diag(f'[exception_wrap/probe] pid={os.getpid()} target={_fun_name}: finally block complete')
 		with suppress(Exception):
-			os.close(_diag_fd)
+			os.close(_faulthandler_fd)
 
 
 def exec_in_proc(group=None, target=None, name=None, args=(), kwargs=None, *, daemon=None):
@@ -217,22 +175,17 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs=None, *, da
 		daemon=daemon,
 	)
 	target_name = getattr(target, '__name__', str(target))
-	_logger.debug('Starting subprocess for %s', target_name)
 	start = perf_counter_ns()
 	p.start()
-	_logger.debug('Subprocess PID %d started for %s, waiting for it to finish (no timeout)', p.pid, target_name)
+	_logger.debug('Subprocess PID %d started for %s', p.pid, target_name)
 
 	result = None
-	stdobj = {
-		'stdout': '',
-		'stderr': '',
-		'stdout_truncated': False,
-		'stderr_truncated': False,
-	}
+	stdobj = { 'stdout': '', 'stderr': '' }
 	got_result = False
 	got_std = False
 
 	# Drain result/std pipes while child is still alive to avoid deadlock on full pipe buffers.
+	# Pipe's buffer size is 64 KiB
 	while p.is_alive() and (not got_result or not got_std):
 		if not got_result and pconn.poll(0.1):
 			with suppress(EOFError, OSError, BrokenPipeError):
@@ -254,72 +207,55 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs=None, *, da
 		with suppress(EOFError, OSError, BrokenPipeError):
 			if std_pconn.poll():
 				stdobj = std_pconn.recv()
-				got_std = True
-	if stdobj['stdout'] or stdobj['stderr']:
-		extra = {
-			'stdout': stdobj['stdout'],
-			'stderr': stdobj['stderr'],
-		}
-		if stdobj.get('stdout_truncated') or stdobj.get('stderr_truncated'):
-			extra['stdio_truncated'] = {
-				'stdout': bool(stdobj.get('stdout_truncated')),
-				'stderr': bool(stdobj.get('stderr_truncated')),
-			}
-		_logger.info('std info for %s', target_name, extra=extra)
+				# no need to update got_std here
+	if stdobj.get('stdout') or stdobj.get('stderr'):
+		_logger.info('std info for %s', target_name, extra={
+			'stdout': stdobj.get('stdout', ''),
+			'stderr': stdobj.get('stderr', ''),
+		})
 
 	if not got_result:
 		with suppress(EOFError, OSError, BrokenPipeError):
 			if pconn.poll():
 				result = pconn.recv()
-				got_result = True
+				# no need to update got_result here
 
 	if result is not None and result.get('error') is not None:
-		_logger.error('original traceback: %s', result.get('traceback', ''))
+		_logger.error(
+			'original traceback of %s (PID %d, exitcode: %s): %s',
+			target_name,
+			p.pid,
+			p.exitcode,
+			result.get('traceback', ''),
+		)
 		raise result['error']
 
-	if result is not None and result.get('error_type'):
-		details = (
-			f"{result.get('error_module', '')}.{result.get('error_type', '')}: "
-			f"{result.get('error_message', '')}"
-		)
-		if result.get('traceback'):
-			_logger.error('remote traceback: %s', result['traceback'])
-		raise SubprocessExecutionError(p.pid or 0, target_name, p.exitcode or 1, details)
-
-	# If we received a valid result payload, return it even if the exit
-	# code is non-zero.  The non-zero code typically comes from
-	# multiprocessing/C-extension cleanup (e.g. util._exit_function or
-	# a native atexit handler) that runs *after* exception_wrap has
-	# already sent the result over the pipe.
 	if result is not None and 'value' in result:
 		if p.exitcode not in (None, 0):
 			_logger.warning(
 				'Subprocess PID %d for %s exited with code %s after %.2f ms'
-				' but returned a valid result — accepting the result.'
-				' The non-zero exit likely originates from process'
-				' cleanup (multiprocessing finalizers, C-extension'
-				' atexit, etc.).',
+				' but returned a valid result',
 				p.pid, target_name, p.exitcode, elapsed_ms,
 			)
 		return result['value']
 
 	if p.exitcode and p.exitcode < 0:
 		_logger.warning(
-			'Subprocess PID %d for %s exited due to signal %d after %.2f ms',
-			p.pid, target_name, abs(p.exitcode), elapsed_ms,
+			'Subprocess PID %d for %s exited due to signal %d, exitcode %d after %.2f ms',
+			p.pid, target_name, abs(p.exitcode), p.exitcode, elapsed_ms,
 		)
-		raise SubprocessKilledError(p.pid or 0, target_name, p.exitcode)
+		raise SubprocessKilledError(p.pid, target_name, p.exitcode)
 
 	if p.exitcode not in (None, 0):
 		raise SubprocessExecutionError(
-			p.pid or 0,
+			p.pid,
 			target_name,
 			p.exitcode,
-			'No structured exception payload received from child process',
+			f'No structured exception payload received from child process: {result}',
 		)
 
 	raise SubprocessExecutionError(
-		p.pid or 0,
+		p.pid,
 		target_name,
 		0,
 		f'Subprocess exited successfully but returned no result payload: {result}',
diff --git a/context_chat_backend/vectordb/pgvector.py b/context_chat_backend/vectordb/pgvector.py
index 33dfb03..41d7f0d 100644
--- a/context_chat_backend/vectordb/pgvector.py
+++ b/context_chat_backend/vectordb/pgvector.py
@@ -156,7 +156,7 @@ def add_indocuments(self, indocuments: Mapping[int, InDocument]) -> Mapping[int,
 					total_chunks = len(indoc.documents)
 					num_batches = max(1, -(-total_chunks // batch_size))  # ceiling division
 					logger.debug(
-						'Embedding source %s: %d chunk(s) in %d batch(es) — blocks on embedding model',
+						'Embedding source %s: %d chunk(s) in %d batch(es)',
 						indoc.source_id, total_chunks, num_batches,
 					)
 					for i in range(0, total_chunks, batch_size):

From d9ebdac85772930b556f02ea501d3c73160d567b Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 17:54:44 +0530
Subject: [PATCH 47/96] fix: use zip on the subset of filtered sources

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/chain/ingest/injest.py | 23 ++++++++++-----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/context_chat_backend/chain/ingest/injest.py b/context_chat_backend/chain/ingest/injest.py
index 8e32108..190eebd 100644
--- a/context_chat_backend/chain/ingest/injest.py
+++ b/context_chat_backend/chain/ingest/injest.py
@@ -89,6 +89,7 @@ async def __fetch_files_content(
 	error_items = {}
 	semaphore = asyncio.Semaphore(CONCURRENT_FILE_FETCHES)
 	tasks = []
+	task_sources = {}
 
 	file_count = sum(1 for s in sources.values() if isinstance(s, ReceivedFileItem))
 	logger.debug('Fetching content for %d file(s) (max %d concurrent)', file_count, CONCURRENT_FILE_FETCHES)
@@ -126,13 +127,18 @@ async def __fetch_files_content(
 			continue
 		# any user id from the list should have read access to the file
 		tasks.append(asyncio.ensure_future(__fetch_file_content(semaphore, file.file_id, file.userIds[0])))
+		task_sources[db_id] = file
 
 	results = await asyncio.gather(*tasks, return_exceptions=True)
-	for (db_id, file), result in zip(sources.items(), results, strict=True):
-		if isinstance(file, SourceItem):
-			continue
-
-		if isinstance(result, IndexingException):
+	for (db_id, file), result in zip(task_sources.items(), results, strict=True):
+		if isinstance(result, str) or isinstance(result, BytesIO):
+			source_items[db_id] = SourceItem(
+				**{
+					**file.model_dump(),
+					'content': result,
+				}
+			)
+		elif isinstance(result, IndexingException):
 			logger.error(
 				f'Error fetching content for db id {db_id}, file id {file.file_id}, reference {file.reference}'
 				f': {result}',
@@ -142,13 +148,6 @@ async def __fetch_files_content(
 				error=str(result),
 				retryable=result.retryable,
 			)
-		elif isinstance(result, str) or isinstance(result, BytesIO):
-			source_items[db_id] = SourceItem(
-				**{
-					**file.model_dump(),
-					'content': result,
-				}
-			)
 		elif isinstance(result, BaseException):
 			logger.error(
 				f'Unexpected error fetching content for db id {db_id}, file id {file.file_id},'

From ea77480df7060a21cb556d7dfe13f8d5da21337f Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 18:41:30 +0530
Subject: [PATCH 48/96] fix(em): use tcp socket connection check

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/network_em.py   | 29 ++++++++++++++++++++++++----
 context_chat_backend/task_fetcher.py | 17 +++++++---------
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/context_chat_backend/network_em.py b/context_chat_backend/network_em.py
index 43ced6c..ba1edc9 100644
--- a/context_chat_backend/network_em.py
+++ b/context_chat_backend/network_em.py
@@ -3,8 +3,10 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
 import logging
+import socket
 from time import sleep
 from typing import Literal, TypedDict
+from urllib.parse import urlparse
 
 import niquests
 from langchain_core.embeddings import Embeddings
@@ -19,6 +21,7 @@
 )
 
 logger = logging.getLogger('ccb.nextwork_em')
+TCP_CONNECT_TIMEOUT = 2.0  # seconds
 
 # Copied from llama_cpp/llama_types.py
 
@@ -44,12 +47,30 @@ class NetworkEmbeddings(Embeddings):
 	def __init__(self, app_config: TConfig):
 		self.app_config = app_config
 
-	def check_connection(self) -> bool:
+	def _get_host_and_port(self) -> tuple[str, int]:
+		parsed = urlparse(self.app_config.embedding.base_url)
+		host = parsed.hostname
+
+		if not host:
+			raise ValueError("Invalid URL: Missing hostname")
+
+		if parsed.port:
+			port = parsed.port
+		else:
+			port = 443 if parsed.scheme == "https" else 80
+
+		return host, port
+
+	def check_connection(self, check_origin: str) -> bool:
 		try:
-			self.embed_query('hello')
+			host, port = self._get_host_and_port()
+			sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+			sock.settimeout(TCP_CONNECT_TIMEOUT)
+			sock.connect((host, port))
+			sock.close()
 			return True
-		except EmbeddingException as e:
-			logger.warning('Embedding server connection failed', exc_info=e)
+		except (ValueError, TimeoutError, ConnectionRefusedError, socket.gaierror) as e:
+			logger.warning(f'[{check_origin}] Embedding server is not reachable, retrying after some time: {e}')
 			return False
 
 	def _get_embedding(self, input_: str | list[str], try_: int = 3) -> list[float] | list[list[float]]:
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index c75cec0..c931e7d 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -83,6 +83,7 @@ class ThreadType(Enum):
 
 def files_indexing_thread(app_config: TConfig, app_enabled: Event) -> None:
 	try:
+		network_em = NetworkEmbeddings(app_config)
 		vectordb_loader = VectorDBLoader(app_config)
 	except LoaderException as e:
 		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
@@ -141,7 +142,7 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 			return
 
 		try:
-			if not __check_em_server(app_config):
+			if not network_em.check_connection(ThreadType.FILES_INDEXING.value):
 				sleep(POLLING_COOLDOWN)
 				continue
 
@@ -456,6 +457,7 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 	LOGGER.info('Starting task fetcher loop')
 
 	try:
+		network_em = NetworkEmbeddings(app_config)
 		vectordb_loader = VectorDBLoader(app_config)
 		llm_loader = LLMModelLoader(app_config)
 	except LoaderException as e:
@@ -466,14 +468,14 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 	llm: LLM = llm_loader.load()
 
 	while True:
-		if not __check_em_server(app_config):
-			sleep(POLLING_COOLDOWN)
-			continue
-
 		if THREAD_STOP_EVENT.is_set():
 			LOGGER.info('Updates processing thread is stopping due to stop event being set')
 			return
 
+		if not network_em.check_connection(ThreadType.REQUEST_PROCESSING.value):
+			sleep(POLLING_COOLDOWN)
+			continue
+
 		try:
 			# Fetch pending task
 			try:
@@ -877,8 +879,3 @@ def process_search_task(
 			task_input.get('scopeList'),
 		)
 	)
-
-
-def __check_em_server(app_config: TConfig) -> bool:
-	embedding_model = NetworkEmbeddings(app_config=app_config)
-	return embedding_model.check_connection()

From 1ce237a36addb872e3affc790faeae5583e80b28 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 18:42:59 +0530
Subject: [PATCH 49/96] fix(ci): remove github CI restrictions

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/task_fetcher.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index c931e7d..004104f 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -55,16 +55,9 @@
 THREAD_STOP_EVENT = Event()
 LOGGER = logging.getLogger('ccb.task_fetcher')
 FILES_INDEXING_BATCH_SIZE = 16  # theoretical max RAM usage: 16 * 100 MiB, todo: config?
-if os.getenv('GITHUB_ACTIONS'):
-	FILES_INDEXING_BATCH_SIZE = 4
 MIN_FILES_PER_CPU = 4
-if os.getenv('GITHUB_ACTIONS'):
-	MIN_FILES_PER_CPU = 2
 # divides the batch into these many chunks
 PARALLEL_FILE_PARSING_COUNT = max(1, (os.cpu_count() or 2) - 1)  # todo: config?
-if os.getenv('GITHUB_ACTIONS'):
-	# Keep CI memory usage predictable and avoid OOM-killed workers.
-	PARALLEL_FILE_PARSING_COUNT = max(1, min(PARALLEL_FILE_PARSING_COUNT, 2))
 LOGGER.info(f'Using {PARALLEL_FILE_PARSING_COUNT} parallel file parsing workers')
 ACTIONS_BATCH_SIZE = 512  # todo: config?
 POLLING_COOLDOWN = 30

From d82e01b6555e4a362ba58fda1414cba83dc00023 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 18:54:20 +0530
Subject: [PATCH 50/96] fix: remove unused code and some de-duplication

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/task_fetcher.py | 286 +++++++--------------------
 1 file changed, 75 insertions(+), 211 deletions(-)

diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 004104f..1e45646 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -15,16 +15,14 @@
 
 import niquests
 from langchain.llms.base import LLM
-from langchain.schema import Document
 from nc_py_api import NextcloudApp, NextcloudException
 from niquests import JSONDecodeError, RequestException
 from pydantic import ValidationError
 
-from .chain.context import do_doc_search, get_context_chunks, get_context_docs
+from .chain.context import do_doc_search
 from .chain.ingest.injest import embed_sources
 from .chain.one_shot import process_context_query
-from .chain.query_proc import get_pruned_query
-from .chain.types import ContextException, EnrichedSourceList, LLMOutput, ScopeList, ScopeType, SearchResult
+from .chain.types import ContextException, EnrichedSourceList, LLMOutput, ScopeList, SearchResult
 from .dyn_loader import LLMModelLoader, VectorDBLoader
 from .network_em import NetworkEmbeddings
 from .types import (
@@ -39,7 +37,6 @@
 	TConfig,
 )
 from .utils import SubprocessKilledError, exec_in_proc, get_app_role
-from .vectordb.base import BaseVectorDB
 from .vectordb.service import (
 	decl_update_access,
 	delete_by_provider,
@@ -498,11 +495,16 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 				if task['type'] == 'context_chat:context_chat':
 					result: LLMOutput = process_normal_task(task, vectordb_loader, llm, app_config)
 					# Return result to Nextcloud
-					success = return_normal_result_to_nextcloud(task['id'], userId, result)
+					success = return_result_to_nextcloud(task['id'], userId, {
+						'output': result['output'],
+						'sources': enrich_sources(result['sources'], userId),
+					})
 				elif task['type'] == 'context_chat:context_chat_search':
 					search_result: list[SearchResult] = process_search_task(task, vectordb_loader)
 					# Return result to Nextcloud
-					success = return_search_result_to_nextcloud(task['id'], userId, search_result)
+					success = return_result_to_nextcloud(task['id'], userId, {
+						'sources': enrich_sources(search_result, userId),
+					})
 				else:
 					LOGGER.error(f'Unknown task type {task["type"]}')
 					success = return_error_to_nextcloud(task['id'], Exception(f'Unknown task type {task["type"]}'))
@@ -541,200 +543,6 @@ def wait_for_tasks(interval = None):
 	TRIGGER.clear()
 
 
-
-def start_bg_threads(app_config: TConfig, app_enabled: Event):
-	if APP_ROLE == AppRole.INDEXING or APP_ROLE == AppRole.NORMAL:
-		if (
-			ThreadType.FILES_INDEXING in THREADS
-			or ThreadType.UPDATES_PROCESSING in THREADS
-		):
-			LOGGER.info('Background threads already running, skipping start')
-			return
-
-		THREAD_STOP_EVENT.clear()
-		THREADS[ThreadType.FILES_INDEXING] = Thread(
-			target=files_indexing_thread,
-			args=(app_config, app_enabled),
-			name='FilesIndexingThread',
-		)
-		THREADS[ThreadType.UPDATES_PROCESSING] = Thread(
-			target=updates_processing_thread,
-			args=(app_config, app_enabled),
-			name='UpdatesProcessingThread',
-		)
-		THREADS[ThreadType.FILES_INDEXING].start()
-		THREADS[ThreadType.UPDATES_PROCESSING].start()
-
-	if APP_ROLE == AppRole.RP or APP_ROLE == AppRole.NORMAL:
-		if ThreadType.REQUEST_PROCESSING in THREADS:
-			LOGGER.info('Background threads already running, skipping start')
-			return
-
-		THREAD_STOP_EVENT.clear()
-		THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
-			target=request_processing_thread,
-			args=(app_config, app_enabled),
-			name='RequestProcessingThread',
-		)
-		THREADS[ThreadType.REQUEST_PROCESSING].start()
-
-
-def wait_for_bg_threads():
-	if APP_ROLE == AppRole.INDEXING or APP_ROLE == AppRole.NORMAL:
-		if (ThreadType.FILES_INDEXING not in THREADS or ThreadType.UPDATES_PROCESSING not in THREADS):
-			return
-
-		THREAD_STOP_EVENT.set()
-		THREADS[ThreadType.FILES_INDEXING].join()
-		THREADS[ThreadType.UPDATES_PROCESSING].join()
-		THREADS.pop(ThreadType.FILES_INDEXING)
-		THREADS.pop(ThreadType.UPDATES_PROCESSING)
-
-	if APP_ROLE == AppRole.RP or APP_ROLE == AppRole.NORMAL:
-		if (ThreadType.REQUEST_PROCESSING not in THREADS):
-			return
-
-		THREAD_STOP_EVENT.set()
-		THREADS[ThreadType.REQUEST_PROCESSING].join()
-		THREADS.pop(ThreadType.REQUEST_PROCESSING)
-
-
-def query_vector_database(
-	user_id: str,
-	query: str,
-	vectordb: BaseVectorDB,
-	ctx_limit: int,
-	scope_type: ScopeType | None = None,
-	scope_list: list[str] | None = None,
-) -> list[Document]:
-	"""
-	Query the vector database to retrieve relevant documents.
-
-	Args:
-		user_id: User ID for scoping the search
-		query: The search query text
-		vectordb: Vector database instance
-		ctx_limit: Maximum number of documents to return
-		scope_type: Optional scope type (PROVIDER or SOURCE)
-		scope_list: Optional list of scope identifiers
-
-	Returns:
-		List of relevant Document objects
-
-	Raises:
-		ContextException: If scope type is provided without scope list
-	"""
-	context_docs = get_context_docs(user_id, query, vectordb, ctx_limit, scope_type, scope_list)
-	LOGGER.debug('Retrieved context documents', extra={
-		'user_id': user_id,
-		'num_docs': len(context_docs),
-		'ctx_limit': ctx_limit,
-	})
-	return context_docs
-
-
-def prepare_context_chunks(context_docs: list[Document]) -> list[str]:
-	"""
-	Extract and format text chunks from documents for LLM context.
-
-	Args:
-		context_docs: List of Document objects from vector DB
-
-	Returns:
-		List of formatted text chunks including titles and content
-	"""
-	return get_context_chunks(context_docs)
-
-
-def generate_llm_response(
-	llm: LLM,
-	app_config: TConfig,
-	user_id: str,
-	query: str,
-	template: str,
-	context_chunks: list[str],
-	end_separator: str = '',
-) -> str:
-	"""
-	Generate LLM response using the pruned query and context.
-
-	Args:
-		llm: Language model instance
-		app_config: Application configuration
-		user_id: User ID for the request
-		query: The original query text
-		template: Template for formatting the prompt
-		context_chunks: Context chunks to include in the prompt
-		end_separator: Optional separator to stop generation
-
-	Returns:
-		Generated LLM output text
-
-	Raises:
-		ValueError: If context length is too small to fit the query
-	"""
-	pruned_query_text = get_pruned_query(llm, app_config, query, template, context_chunks)
-
-	stop = [end_separator] if end_separator else None
-	output = llm.invoke(
-		pruned_query_text,
-		stop=stop,
-		userid=user_id,
-	).strip()
-
-	LOGGER.debug('Generated LLM response', extra={
-		'user_id': user_id,
-		'output_length': len(output),
-	})
-	return output
-
-
-def extract_unique_sources(context_docs: list[Document]) -> list[str]:
-	"""
-	Extract unique source IDs from context documents.
-
-	Args:
-		context_docs: List of Document objects
-
-	Returns:
-		List of unique source IDs
-	"""
-	unique_sources: list[str] = list({
-		source for d in context_docs if (source := d.metadata.get('source'))
-	})
-	return unique_sources
-
-def return_normal_result_to_nextcloud(task_id: int, userId: str, result: LLMOutput) -> bool:
-	"""
-	Return query result back to Nextcloud.
-
-	Args:
-		task_id: Unique task identifier
-		result: The LLMOutput result to return
-
-	Returns:
-		True if successful, False otherwise
-	"""
-	LOGGER.debug('Returning result to Nextcloud', extra={
-		'task_id': task_id,
-		'output_length': len(result['output']),
-		'num_sources': len(result['sources']),
-	})
-
-	nc = NextcloudApp()
-
-	try:
-		nc.providers.task_processing.report_result(task_id, {
-			'output': result['output'],
-			'sources': enrich_sources(result['sources'], userId),
-		})
-	except (NextcloudException, RequestException, JSONDecodeError) as e:
-		LOGGER.error(f"Network error reporting task result {e}", exc_info=e)
-		return False
-
-	return True
-
-
 def enrich_sources(results: list[SearchResult], userId: str) -> list[str]:
 	nc = NextcloudApp()
 	data = nc.ocs('POST', '/ocs/v2.php/apps/context_chat/enrich_sources', json={'sources': results, 'userId': userId})
@@ -742,34 +550,32 @@ def enrich_sources(results: list[SearchResult], userId: str) -> list[str]:
 	return [s.model_dump_json() for s in sources]
 
 
-def return_search_result_to_nextcloud(task_id: int, userId: str, result: list[SearchResult]) -> bool:
+def return_result_to_nextcloud(task_id: int, userId: str, result: dict[str, Any]) -> bool:
 	"""
-	Return search result back to Nextcloud.
+	Return query result back to Nextcloud.
 
 	Args:
-		task_id: Unique task identifier
-		result: The list of search results to return
+		result: dict[str, Any]
 
 	Returns:
 		True if successful, False otherwise
 	"""
-	LOGGER.debug('Returning search result to Nextcloud', extra={
+	LOGGER.debug('Returning result to Nextcloud', extra={
 		'task_id': task_id,
-		'num_sources': len(result),
+		'result': result,
 	})
 
 	nc = NextcloudApp()
 
 	try:
-		nc.providers.task_processing.report_result(task_id, {
-			'sources': enrich_sources(result, userId),
-		})
+		nc.providers.task_processing.report_result(task_id, result)
 	except (NextcloudException, RequestException, JSONDecodeError) as e:
-		LOGGER.error(f"Network error reporting search task result {e}", exc_info=e)
+		LOGGER.error(f"Network error reporting task result {e}", exc_info=e)
 		return False
 
 	return True
 
+
 def return_error_to_nextcloud(task_id: int, e: Exception) -> bool:
 	"""
 	Return error result back to Nextcloud.
@@ -827,6 +633,7 @@ def process_normal_task(
 	if task_input.get('scopeType') == 'none':
 		task_input['scopeType'] = None
 
+	# todo: document no template support
 	return exec_in_proc(target=process_context_query,
 		args=(
 			user_id,
@@ -872,3 +679,60 @@ def process_search_task(
 			task_input.get('scopeList'),
 		)
 	)
+
+
+def start_bg_threads(app_config: TConfig, app_enabled: Event):
+	if APP_ROLE == AppRole.INDEXING or APP_ROLE == AppRole.NORMAL:
+		if (
+			ThreadType.FILES_INDEXING in THREADS
+			or ThreadType.UPDATES_PROCESSING in THREADS
+		):
+			LOGGER.info('Background threads already running, skipping start')
+			return
+
+		THREAD_STOP_EVENT.clear()
+		THREADS[ThreadType.FILES_INDEXING] = Thread(
+			target=files_indexing_thread,
+			args=(app_config, app_enabled),
+			name='FilesIndexingThread',
+		)
+		THREADS[ThreadType.UPDATES_PROCESSING] = Thread(
+			target=updates_processing_thread,
+			args=(app_config, app_enabled),
+			name='UpdatesProcessingThread',
+		)
+		THREADS[ThreadType.FILES_INDEXING].start()
+		THREADS[ThreadType.UPDATES_PROCESSING].start()
+
+	if APP_ROLE == AppRole.RP or APP_ROLE == AppRole.NORMAL:
+		if ThreadType.REQUEST_PROCESSING in THREADS:
+			LOGGER.info('Background threads already running, skipping start')
+			return
+
+		THREAD_STOP_EVENT.clear()
+		THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
+			target=request_processing_thread,
+			args=(app_config, app_enabled),
+			name='RequestProcessingThread',
+		)
+		THREADS[ThreadType.REQUEST_PROCESSING].start()
+
+
+def wait_for_bg_threads():
+	if APP_ROLE == AppRole.INDEXING or APP_ROLE == AppRole.NORMAL:
+		if (ThreadType.FILES_INDEXING not in THREADS or ThreadType.UPDATES_PROCESSING not in THREADS):
+			return
+
+		THREAD_STOP_EVENT.set()
+		THREADS[ThreadType.FILES_INDEXING].join()
+		THREADS[ThreadType.UPDATES_PROCESSING].join()
+		THREADS.pop(ThreadType.FILES_INDEXING)
+		THREADS.pop(ThreadType.UPDATES_PROCESSING)
+
+	if APP_ROLE == AppRole.RP or APP_ROLE == AppRole.NORMAL:
+		if (ThreadType.REQUEST_PROCESSING not in THREADS):
+			return
+
+		THREAD_STOP_EVENT.set()
+		THREADS[ThreadType.REQUEST_PROCESSING].join()
+		THREADS.pop(ThreadType.REQUEST_PROCESSING)

From 286db22e8cb664f600ddfa3b759ce8e83963ff2b Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 3 Apr 2026 19:32:28 +0530
Subject: [PATCH 51/96] fix(mp): run repairs and config file check only in
 MainProcess

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/controller.py | 8 ++++++--
 main.py                            | 3 +--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 3a8e15a..9c3812e 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -16,6 +16,7 @@
 # ruff: noqa: E402
 
 import logging
+import multiprocessing as mp
 import os
 import tempfile
 import threading
@@ -39,8 +40,11 @@
 
 # setup
 
-repair_run()
-ensure_config_file()
+# only run once
+if mp.current_process().name == 'MainProcess':
+	repair_run()
+	ensure_config_file()
+
 logger = logging.getLogger('ccb.controller')
 app_config = get_config(os.environ['CC_CONFIG_PATH'])
 __download_models_from_hf = os.environ.get('CC_DOWNLOAD_MODELS_FROM_HF', 'true').lower() in ('1', 'true', 'yes')
diff --git a/main.py b/main.py
index c261451..076b7db 100755
--- a/main.py
+++ b/main.py
@@ -5,6 +5,7 @@
 #
 
 import logging
+import multiprocessing as mp
 from os import cpu_count, getenv
 
 import psutil
@@ -45,8 +46,6 @@ def _setup_log_levels(debug: bool):
 
 
 if __name__ == '__main__':
-	import multiprocessing as mp
-
 	logging_config = get_logging_config(LOGGER_CONFIG_NAME)
 	setup_logging(logging_config)
 	app_config: TConfig = app.extra['CONFIG']

From 726eb64f5624eb9a2262aa6c6b17641e04b33973 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 7 Apr 2026 16:43:07 +0530
Subject: [PATCH 52/96] fix: attach source_ids as keys in json logs

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/task_fetcher.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 1e45646..be74b31 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -81,7 +81,9 @@ def files_indexing_thread(app_config: TConfig, app_enabled: Event) -> None:
 
 	def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) -> Mapping[int, IndexingError | None]:
 		source_refs = [s.reference for s in source_items.values()]
-		LOGGER.info('Starting embed_sources subprocess for %d source(s): %s', len(source_items), source_refs)
+		LOGGER.info('Starting embed_sources subprocess for %d source(s)', len(source_items), extra={
+			'source_ids': source_refs,
+		})
 		try:
 			result = exec_in_proc(
 				target=embed_sources,
@@ -96,8 +98,10 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 			return result
 		except SubprocessKilledError as e:
 			LOGGER.error(
-				'embed_sources subprocess was killed for %d source(s) with exitcode %s: %s',
-				len(source_items), e.exitcode, source_refs, exc_info=e,
+				'embed_sources subprocess was killed for %d source(s) with exitcode %s',
+				len(source_items), e.exitcode, exc_info=e, extra={
+					'source_ids': source_refs,
+				},
 			)
 			if len(source_items) == 1:
 				return dict.fromkeys(
@@ -120,8 +124,10 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 				retryable=True,
 			)
 			LOGGER.error(
-				'embed_sources subprocess raised a %s error for sources %s, marking all as retryable',
-				e.__class__.__name__, source_refs, exc_info=e,
+				'embed_sources subprocess raised a %s error for %d sources, marking all as retryable',
+				e.__class__.__name__, len(source_refs), exc_info=e, extra={
+					'source_ids': source_refs,
+				}
 			)
 			return dict.fromkeys(source_items, err)
 

From 073f9d0e4a2f7fd52c1ef0df3410ea390c70c683 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 7 Apr 2026 16:43:26 +0530
Subject: [PATCH 53/96] fix(ci): upload db dump artifacts

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 8ec8eab..9c66483 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -89,7 +89,7 @@ jobs:
           POSTGRES_USER: root
           POSTGRES_PASSWORD: rootpassword
           POSTGRES_DB: nextcloud
-        options: --health-cmd pg_isready --health-interval 5s --health-timeout 2s --health-retries 5
+        options: --health-cmd pg_isready --health-interval 5s --health-timeout 2s --health-retries 5 --name postgres --hostname postgres
 
     steps:
       - name: Checkout server
@@ -214,6 +214,13 @@ jobs:
             php cron.php
             sleep 10
           done &
+          sleep 30
+          # list all the bg jobs
+          ./occ background-job:list
+
+      - name: Initial dump of DB with context_chat_queue populated
+        run: |
+          docker exec postgres pg_dump nextcloud > /tmp/0_pgdump_nextcloud
 
       - name: Periodically check context_chat stats for 15 minutes to allow the backend to index the files
         run: |
@@ -315,6 +322,10 @@ jobs:
             echo "Memory usage during scan is stable. No memory leak detected."
           fi
 
+      - name: Final dump of DB with vectordb populated
+        run: |
+          docker exec postgres pg_dump nextcloud > /tmp/1_pgdump_nextcloud
+
       - name: Show server logs
         if: always()
         run: |
@@ -350,6 +361,14 @@ jobs:
         run: |
           tail -v -n +1 context_chat_backend/persistent_storage/logs/em_server.log* || echo "No logs in logs directory"
 
+      - name: Upload database dumps
+        uses: actions/upload-artifact@v4
+        with:
+          name: database-dumps
+          path: |
+            /tmp/0_pgdump_nextcloud
+            /tmp/1_pgdump_nextcloud
+
   summary:
     permissions:
       contents: none

From 13ea740d94841069b1c72398440dab9a2a30cd31 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 7 Apr 2026 18:01:47 +0530
Subject: [PATCH 54/96] fix: retry PGVector object creation if table already
 exists

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/vectordb/pgvector.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/context_chat_backend/vectordb/pgvector.py b/context_chat_backend/vectordb/pgvector.py
index 41d7f0d..d7b718d 100644
--- a/context_chat_backend/vectordb/pgvector.py
+++ b/context_chat_backend/vectordb/pgvector.py
@@ -120,7 +120,15 @@ def __init__(self, embedding: Embeddings | None = None, **kwargs):
 			kwargs['connection'] = os.environ['CCB_DB_URL']
 
 		# setup langchain db + our access list table
-		self.client = PGVector(embedding, collection_name=COLLECTION_NAME, **kwargs)
+		try:
+			self.client = PGVector(embedding, collection_name=COLLECTION_NAME, **kwargs)
+		except sa.exc.IntegrityError as ie:  # pyright: ignore[reportAttributeAccessIssue]
+			if not isinstance(ie.orig, psycopg.errors.UniqueViolation):
+				raise
+
+			# tried to create the tables but it was already created in another process
+			# init the client again to detect it already exists, and continue from there
+			self.client = PGVector(embedding, collection_name=COLLECTION_NAME, **kwargs)
 
 	def get_instance(self) -> VectorStore:
 		return self.client

From dcb04e7209558ea9185f902637474e301d70f1b9 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 7 Apr 2026 20:11:24 +0530
Subject: [PATCH 55/96] fix: unique db dump artifact id

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 9c66483..384e352 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -364,7 +364,7 @@ jobs:
       - name: Upload database dumps
         uses: actions/upload-artifact@v4
         with:
-          name: database-dumps
+          name: database-dumps-${{ matrix.server-versions }}-php@${{ matrix.php-versions }}
           path: |
             /tmp/0_pgdump_nextcloud
             /tmp/1_pgdump_nextcloud

From dc1d57b15161ff13ffa56208bc4a21bb4e13b10b Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 7 Apr 2026 20:12:51 +0530
Subject: [PATCH 56/96] fix(ci): log stats before exit

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 384e352..d30073a 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -282,9 +282,6 @@ jobs:
 
           echo "::endgroup::"
 
-          ./occ context_chat:stats
-          ./occ context_chat:stats --json
-
           if [ $success -ne 1 ]; then
             echo "Max attempts reached"
             exit 1
@@ -369,6 +366,11 @@ jobs:
             /tmp/0_pgdump_nextcloud
             /tmp/1_pgdump_nextcloud
 
+      - name: Final stats log
+        run: |
+          ./occ context_chat:stats
+          ./occ context_chat:stats --json
+
   summary:
     permissions:
       contents: none

From eae1cd4e7c4958fcb7046a9638a6a1c5f6c7df91 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 9 Apr 2026 16:44:24 +0530
Subject: [PATCH 57/96] fix: mark unembeddable files as such

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/network_em.py        | 20 +++++++++++++++++---
 context_chat_backend/task_fetcher.py      |  4 ++++
 context_chat_backend/types.py             | 10 +++++++++-
 context_chat_backend/vectordb/pgvector.py | 16 +++++++++++++++-
 4 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/context_chat_backend/network_em.py b/context_chat_backend/network_em.py
index ba1edc9..8b85169 100644
--- a/context_chat_backend/network_em.py
+++ b/context_chat_backend/network_em.py
@@ -12,6 +12,7 @@
 from langchain_core.embeddings import Embeddings
 
 from .types import (
+	DocErrorEmbeddingException,
 	EmbeddingException,
 	FatalEmbeddingException,
 	RetryableEmbeddingException,
@@ -105,14 +106,27 @@ def _get_embedding(self, input_: str | list[str], try_: int = 3) -> list[float]
 			if response.status_code is None:
 				raise EmbeddingException('Error: no response from embedding service')
 			if response.status_code // 100 == 4:
-				raise FatalEmbeddingException(response.text)
+				raise FatalEmbeddingException(
+					response.text or f'Error: embedding request returned non-2xx status code {response.status_code}',
+				)
 			if response.status_code // 100 != 2:
-				raise EmbeddingException(response.text)
-		# todo: rework exception handling and their downstream interpretation
+				raise EmbeddingException(
+					response.text or f'Error: embedding request returned non-2xx status code {response.status_code}',
+					response,
+				)
 		except FatalEmbeddingException as e:
 			logger.error('Fatal error while getting embeddings: %s', str(e), exc_info=e)
 			raise e
 		except EmbeddingException as e:
+			try:
+				if e.response:
+					err_msg = e.response.json().get('error', {}).get('message', '')
+					if err_msg == 'llama_decode returned -1':
+						# the document coult not be processed
+						raise DocErrorEmbeddingException(f'Failed to embed the document: {err_msg}') from e
+			except niquests.exceptions.JSONDecodeError:
+				...
+
 			if try_ > 0:
 				logger.debug('Retrying embedding request in 5 secs', extra={'try': try_})
 				sleep(5)
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index be74b31..09be98a 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -29,6 +29,7 @@
 	ActionsQueueItems,
 	ActionType,
 	AppRole,
+	EmbeddingException,
 	FilesQueueItems,
 	IndexingError,
 	LoaderException,
@@ -520,6 +521,9 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 				else:
 					LOGGER.error(f'Failed to return result for task {task["id"]}')
 
+			except EmbeddingException as e:
+				LOGGER.warning(f'Embedding server error for task {task["id"]}: {e}')
+				return_error_to_nextcloud(task['id'], e)
 			except ContextException as e:
 				LOGGER.warning(f'Context error for task {task["id"]}: {e}')
 				return_error_to_nextcloud(task['id'], e)
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 59d2568..410dc3f 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -8,6 +8,7 @@
 from io import BytesIO
 from typing import Annotated, Literal, Self
 
+import niquests
 from pydantic import AfterValidator, BaseModel, Discriminator, computed_field, field_validator, model_validator
 
 from .mimetype_list import SUPPORTED_MIMETYPES
@@ -123,7 +124,9 @@ class LoaderException(Exception):
 
 
 class EmbeddingException(Exception):
-	...
+	def __init__(self, msg: str, response: niquests.Response | None = None):
+		super().__init__(msg)
+		self.response = response
 
 class RetryableEmbeddingException(EmbeddingException):
 	"""
@@ -140,6 +143,11 @@ class FatalEmbeddingException(EmbeddingException):
 	Either malformed request, authentication error, or other non-retryable error.
 	"""
 
+class DocErrorEmbeddingException(EmbeddingException):
+	"""
+	Exception that indicates a fatal error for the document, this document should not be retried.
+	"""
+
 
 class AppRole(str, Enum):
 	NORMAL = 'normal'
diff --git a/context_chat_backend/vectordb/pgvector.py b/context_chat_backend/vectordb/pgvector.py
index d7b718d..9d88024 100644
--- a/context_chat_backend/vectordb/pgvector.py
+++ b/context_chat_backend/vectordb/pgvector.py
@@ -20,6 +20,7 @@
 
 from ..chain.types import InDocument, ScopeType
 from ..types import (
+	DocErrorEmbeddingException,
 	EmbeddingException,
 	FatalEmbeddingException,
 	IndexingError,
@@ -215,13 +216,24 @@ def add_indocuments(self, indocuments: Mapping[int, InDocument]) -> Mapping[int,
 						retryable=True,
 					)
 					continue
+				except DocErrorEmbeddingException as e:
+					logger.warning(
+						'Error adding documents to vectordb, server failed to index it, it will not be retried',
+						exc_info=e,
+						extra={ 'source_id': indoc.source_id },
+					)
+					results[php_db_id] = IndexingError(
+						error=str(e),
+						retryable=False,
+					)
+					continue
 				except FatalEmbeddingException as e:
 					raise EmbeddingException(
 						f'Fatal error while embedding documents for source {indoc.source_id}: {e}'
 					) from e
 				except (RetryableEmbeddingException, EmbeddingException) as e:
 					# temporary error, continue with the next document
-					logger.exception('Error adding documents to vectordb, should be retried later.', exc_info=e, extra={
+					logger.warning('Error adding documents to vectordb, should be retried later.', exc_info=e, extra={
 						'source_id': indoc.source_id,
 					})
 					results[php_db_id] = IndexingError(
@@ -615,6 +627,8 @@ def doc_search(
 
 				# get embeddings
 				return self._similarity_search(session, query, chunk_ids, k)
+		except EmbeddingException:
+			raise
 		except Exception as e:
 			raise DbException('Error: performing doc search in vectordb') from e
 

From 7b10b27afe5a3e6bfebab3c16895f4fe64808a4d Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 9 Apr 2026 16:51:34 +0530
Subject: [PATCH 58/96] chore: migrate default values in the type definition

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/config_parser.py | 22 ++++++++--------------
 context_chat_backend/types.py         | 17 +++++++++--------
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/context_chat_backend/config_parser.py b/context_chat_backend/config_parser.py
index dafef75..0a62019 100644
--- a/context_chat_backend/config_parser.py
+++ b/context_chat_backend/config_parser.py
@@ -103,17 +103,11 @@ def get_config(file_path: str) -> TConfig:
 		except Exception as e:
 			raise AssertionError('Error: could not create embedding config from config file') from e
 
-	return TConfig(
-		debug=config.get('debug', False),
-		uvicorn_log_level=config.get('uvicorn_log_level', 'info'),
-		disable_aaa=config.get('disable_aaa', False),
-		verify_ssl=config.get('verify_ssl', config.get('httpx_verify_ssl', True)),
-		use_colors=config.get('use_colors', True),
-		uvicorn_workers=config.get('uvicorn_workers', 1),
-		embedding_chunk_size=config.get('embedding_chunk_size', 1000),
-		doc_parser_worker_limit=config.get('doc_parser_worker_limit', 10),
-
-		vectordb=vectordb,
-		embedding=embedding_config,
-		llm=llm,
-	)
+	config['verify_ssl']  = config.get('verify_ssl', config.get('httpx_verify_ssl', True))
+	config.pop('httpx_verify_ssl', None)
+
+	config['llm'] = llm
+	config['vectordb'] = vectordb
+	config['embedding'] = embedding_config
+
+	return TConfig(**config)
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 410dc3f..345eb6e 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -105,14 +105,15 @@ class TEmbeddingConfig(BaseModel):
 
 
 class TConfig(BaseModel):
-	debug: bool
-	uvicorn_log_level: str
-	disable_aaa: bool
-	verify_ssl: bool
-	use_colors: bool
-	uvicorn_workers: int
-	embedding_chunk_size: int
-	doc_parser_worker_limit: int
+	debug: bool = False
+	uvicorn_log_level: str = 'info'
+	disable_aaa: bool = False
+	verify_ssl: bool = True
+	use_colors: bool = True
+	uvicorn_workers: int = 1
+	embedding_chunk_size: int = 2000
+	# todo: unused now
+	doc_parser_worker_limit: int = 10
 
 	vectordb: tuple[str, dict]
 	embedding: TEmbeddingConfig

From 8b4d26046e87317392e756963529f3d16758bd34 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 9 Apr 2026 18:33:32 +0530
Subject: [PATCH 59/96] chore(config): add config entries for tunables

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 config.cpu.yaml                             |  5 +-
 config.gpu.yaml                             |  5 +-
 context_chat_backend/chain/ingest/injest.py | 15 ++---
 context_chat_backend/task_fetcher.py        | 66 +++++++++++----------
 context_chat_backend/types.py               |  6 +-
 5 files changed, 55 insertions(+), 42 deletions(-)

diff --git a/config.cpu.yaml b/config.cpu.yaml
index 1512ea0..304cb7d 100644
--- a/config.cpu.yaml
+++ b/config.cpu.yaml
@@ -7,7 +7,10 @@ verify_ssl: true
 use_colors: true
 uvicorn_workers: 1
 embedding_chunk_size: 2000
-doc_parser_worker_limit: 10
+doc_indexing_batch_size: 32  # theoretical max RAM usage: 32 * 100 MiB
+actions_batch_size: 512
+file_parsing_cpu_count: -1  # divides the batch into these many chunks, -1 = auto
+concurrent_file_fetches: 10  # maximum number of files to fetch concurrently to not overload the NC server
 
 
 vectordb:
diff --git a/config.gpu.yaml b/config.gpu.yaml
index fc3acaf..16dcb01 100644
--- a/config.gpu.yaml
+++ b/config.gpu.yaml
@@ -7,7 +7,10 @@ verify_ssl: true
 use_colors: true
 uvicorn_workers: 1
 embedding_chunk_size: 2000
-doc_parser_worker_limit: 10
+doc_indexing_batch_size: 32  # theoretical max RAM usage: 32 * 100 MiB
+actions_batch_size: 512
+file_parsing_cpu_count: -1  # divides the batch into these many chunks, -1 = auto
+concurrent_file_fetches: 10  # maximum number of files to fetch concurrently to not overload the NC server
 
 
 vectordb:
diff --git a/context_chat_backend/chain/ingest/injest.py b/context_chat_backend/chain/ingest/injest.py
index 190eebd..ad2777e 100644
--- a/context_chat_backend/chain/ingest/injest.py
+++ b/context_chat_backend/chain/ingest/injest.py
@@ -23,9 +23,7 @@
 
 logger = logging.getLogger('ccb.injest')
 
-# max concurrent fetches to avoid overloading the NC server or hitting rate limits
-CONCURRENT_FILE_FETCHES = 10  # todo: config?
-MAX_FILE_SIZE = 100 * 1024 * 1024  # 100 MB, all loaded in RAM at once, todo: config?
+MAX_FILE_SIZE = 100 * 1024 * 1024  # 100 MB, all loaded in RAM at once
 
 
 async def __fetch_file_content(
@@ -83,16 +81,17 @@ async def __fetch_file_content(
 
 
 async def __fetch_files_content(
-	sources: Mapping[int, SourceItem | ReceivedFileItem]
+	sources: Mapping[int, SourceItem | ReceivedFileItem],
+	concurrent_file_fetches: int,
 ) -> tuple[Mapping[int, SourceItem], Mapping[int, IndexingError]]:
 	source_items = {}
 	error_items = {}
-	semaphore = asyncio.Semaphore(CONCURRENT_FILE_FETCHES)
 	tasks = []
 	task_sources = {}
+	semaphore = asyncio.Semaphore(concurrent_file_fetches)
 
 	file_count = sum(1 for s in sources.values() if isinstance(s, ReceivedFileItem))
-	logger.debug('Fetching content for %d file(s) (max %d concurrent)', file_count, CONCURRENT_FILE_FETCHES)
+	logger.debug('Fetching content for %d file(s) (max %d concurrent)', file_count, concurrent_file_fetches)
 
 	for db_id, file in sources.items():
 		if isinstance(file, SourceItem):
@@ -345,7 +344,9 @@ def _process_sources(
 		len(to_embed_sources),
 	)
 	t0 = perf_counter_ns()
-	populated_to_embed_sources, errored_sources = asyncio.run(__fetch_files_content(to_embed_sources))
+	populated_to_embed_sources, errored_sources = asyncio.run(
+		__fetch_files_content(to_embed_sources, config.concurrent_file_fetches)
+	)
 	elapsed_ms = (perf_counter_ns() - t0) / 1e6
 	logger.debug(
 		'File content fetch complete in %.2f ms: %d fetched, %d errored',
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 09be98a..38f0df8 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -52,17 +52,14 @@
 THREADS = {}
 THREAD_STOP_EVENT = Event()
 LOGGER = logging.getLogger('ccb.task_fetcher')
-FILES_INDEXING_BATCH_SIZE = 16  # theoretical max RAM usage: 16 * 100 MiB, todo: config?
 MIN_FILES_PER_CPU = 4
-# divides the batch into these many chunks
-PARALLEL_FILE_PARSING_COUNT = max(1, (os.cpu_count() or 2) - 1)  # todo: config?
-LOGGER.info(f'Using {PARALLEL_FILE_PARSING_COUNT} parallel file parsing workers')
-ACTIONS_BATCH_SIZE = 512  # todo: config?
 POLLING_COOLDOWN = 30
-TRIGGER = Event()
-CHECK_INTERVAL = 5
-CHECK_INTERVAL_WITH_TRIGGER = 5 * 60
-CHECK_INTERVAL_ON_ERROR = 15
+
+# task processing or request processing
+TP_TRIGGER = Event()
+TP_CHECK_INTERVAL = 5
+TP_CHECK_INTERVAL_WITH_TRIGGER = 5 * 60
+TP_CHECK_INTERVAL_ON_ERROR = 15
 CONTEXT_LIMIT=20
 
 
@@ -133,6 +130,13 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 			return dict.fromkeys(source_items, err)
 
 
+	# divides the batch into these many chunks
+	file_parsing_cpu_count = (
+		app_config.file_parsing_cpu_count,  # when set to a positive value
+		max(1, (os.cpu_count() or 2) - 1),  # when set to auto (-1)
+	)[app_config.file_parsing_cpu_count == -1]
+	LOGGER.info(f'Using {file_parsing_cpu_count} parallel file parsing workers')
+
 	while True:
 		if THREAD_STOP_EVENT.is_set():
 			LOGGER.info('Files indexing thread is stopping due to stop event being set')
@@ -147,7 +151,7 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 			q_items_res = nc.ocs(
 				'GET',
 				'/ocs/v2.php/apps/context_chat/queues/documents',
-				params={ 'n': FILES_INDEXING_BATCH_SIZE }
+				params={ 'n': app_config.doc_indexing_batch_size }
 			)
 
 			try:
@@ -164,14 +168,14 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 			providers_result = {}
 
 			# chunk file parsing for better file operation parallelism
-			file_chunk_size = max(MIN_FILES_PER_CPU, math.ceil(len(q_items.files) / PARALLEL_FILE_PARSING_COUNT))
+			file_chunk_size = max(MIN_FILES_PER_CPU, math.ceil(len(q_items.files) / file_parsing_cpu_count))
 			file_chunks = [
 				dict(list(q_items.files.items())[i:i+file_chunk_size])
 				for i in range(0, len(q_items.files), file_chunk_size)
 			]
 			provider_chunk_size = max(
 				MIN_FILES_PER_CPU,
-				math.ceil(len(q_items.content_providers) / PARALLEL_FILE_PARSING_COUNT),
+				math.ceil(len(q_items.content_providers) / file_parsing_cpu_count),
 			)
 			provider_chunks = [
 				dict(list(q_items.content_providers.items())[i:i+provider_chunk_size])
@@ -179,12 +183,12 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 			]
 
 			with ThreadPoolExecutor(
-				max_workers=PARALLEL_FILE_PARSING_COUNT,
+				max_workers=file_parsing_cpu_count,
 				thread_name_prefix='IndexingPool',
 			) as executor:
 				LOGGER.info(
-					'Dispatching %d file chunk(s) and %d provider chunk(s) to %d IndexingPool worker(s)',
-					len(file_chunks), len(provider_chunks), PARALLEL_FILE_PARSING_COUNT,
+					'Dispatching %d file chunk(s) and %d provider chunk(s)',
+					len(file_chunks), len(provider_chunks),
 				)
 				file_futures = [executor.submit(_load_sources, chunk) for chunk in file_chunks]
 				provider_futures = [executor.submit(_load_sources, chunk) for chunk in provider_chunks]
@@ -286,7 +290,7 @@ def updates_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 			q_items_res = nc.ocs(
 				'GET',
 				'/ocs/v2.php/apps/context_chat/queues/actions',
-				params={ 'n': ACTIONS_BATCH_SIZE }
+				params={ 'n': app_config.actions_batch_size }
 			)
 
 			try:
@@ -451,7 +455,7 @@ def resolve_scope_list(source_ids: list[str], userId: str) -> list[str]:
 
 
 def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
-	LOGGER.info('Starting task fetcher loop')
+	LOGGER.info('Starting request processing thread')
 
 	try:
 		network_em = NetworkEmbeddings(app_config)
@@ -466,7 +470,7 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 
 	while True:
 		if THREAD_STOP_EVENT.is_set():
-			LOGGER.info('Updates processing thread is stopping due to stop event being set')
+			LOGGER.info('Request processing thread is stopping due to stop event being set')
 			return
 
 		if not network_em.check_connection(ThreadType.REQUEST_PROCESSING.value):
@@ -485,7 +489,7 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 					continue
 			except (NextcloudException, RequestException, JSONDecodeError) as e:
 				LOGGER.error(f"Network error fetching the next task {e}", exc_info=e)
-				wait_for_tasks(CHECK_INTERVAL_ON_ERROR)
+				wait_for_tasks(TP_CHECK_INTERVAL_ON_ERROR)
 				continue
 
 			# Process task
@@ -536,21 +540,21 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 
 		except Exception as e:
 			LOGGER.exception('Error in task fetcher loop', exc_info=e)
-			wait_for_tasks(CHECK_INTERVAL_ON_ERROR)
+			wait_for_tasks(TP_CHECK_INTERVAL_ON_ERROR)
 
-def trigger_handler(providerId: str):
-	global TRIGGER
-	print('TRIGGER called')
-	TRIGGER.set()
+def trigger_handler(provider_id: str):
+	global TP_TRIGGER
+	LOGGER.debug('Task processing trigger received', extra={'provider_id': provider_id})
+	TP_TRIGGER.set()
 
 def wait_for_tasks(interval = None):
-	global TRIGGER
-	global CHECK_INTERVAL
-	global CHECK_INTERVAL_WITH_TRIGGER
-	actual_interval = CHECK_INTERVAL if interval is None else interval
-	if TRIGGER.wait(timeout=actual_interval):
-		CHECK_INTERVAL = CHECK_INTERVAL_WITH_TRIGGER
-	TRIGGER.clear()
+	global TP_TRIGGER
+	global TP_CHECK_INTERVAL
+	global TP_CHECK_INTERVAL_WITH_TRIGGER
+	actual_interval = TP_CHECK_INTERVAL if interval is None else interval
+	if TP_TRIGGER.wait(timeout=actual_interval):
+		TP_CHECK_INTERVAL = TP_CHECK_INTERVAL_WITH_TRIGGER
+	TP_TRIGGER.clear()
 
 
 def enrich_sources(results: list[SearchResult], userId: str) -> list[str]:
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 345eb6e..2694998 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -112,8 +112,10 @@ class TConfig(BaseModel):
 	use_colors: bool = True
 	uvicorn_workers: int = 1
 	embedding_chunk_size: int = 2000
-	# todo: unused now
-	doc_parser_worker_limit: int = 10
+	doc_indexing_batch_size: int = 32
+	actions_batch_size: int = 512
+	file_parsing_cpu_count: int = -1
+	concurrent_file_fetches: int = 10
 
 	vectordb: tuple[str, dict]
 	embedding: TEmbeddingConfig

From e4be682f4261a5428517cf9a8f20ee2432f745b2 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 9 Apr 2026 18:35:11 +0530
Subject: [PATCH 60/96] fix: ignore SIGTERM and SIGINT for subprocesses

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/utils.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index 4552e32..c793978 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -7,6 +7,7 @@
 import logging
 import multiprocessing as mp
 import os
+import signal
 import sys
 import traceback
 from collections.abc import Callable
@@ -112,6 +113,12 @@ def _truncate_capture(text: str) -> str:
 
 
 def exception_wrap(fun: Callable | None, *args, resconn: Connection, stdconn: Connection, **kwargs):
+	# ignore SIGINT and SIGTERM in child processes these signals don't immediately stop these processes
+	# the handling is done in the fastapi lifetime to do a graceful shutdown
+	# SIGKILL is not ignored
+	signal.signal(signal.SIGINT, signal.SIG_IGN)
+	signal.signal(signal.SIGTERM, signal.SIG_IGN)
+
 	# Preserve real stderr FD for faulthandler before we redirect sys.stderr.
 	_faulthandler_fd = os.dup(2)
 	with suppress(Exception):
@@ -128,10 +135,11 @@ def exception_wrap(fun: Callable | None, *args, resconn: Connection, stdconn: Co
 	sys.stderr = stderr_capture
 
 	try:
-		if fun is None:
-			resconn.send({ 'value': None, 'error': None })
-		else:
-			resconn.send({ 'value': fun(*args, **kwargs), 'error': None })
+		value = None if fun is None else fun(*args, **kwargs)
+		try:
+			resconn.send({ 'value': value, 'error': None })
+		except (BrokenPipeError, OSError, EOFError):
+			...  # parent closed the pipe during shutdown, exit cleanly
 	except BaseException as e:
 		tb = traceback.format_exc()
 		payload = {

From d7c9e4f4837329848060eb91b8c89a3caacc0a76 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 9 Apr 2026 19:07:31 +0530
Subject: [PATCH 61/96] fix: cleanup request processing, get template from
 config

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 config.cpu.yaml                        |  5 ++--
 config.gpu.yaml                        |  5 ++--
 context_chat_backend/chain/one_shot.py | 41 ++++++++------------------
 context_chat_backend/task_fetcher.py   |  2 +-
 4 files changed, 20 insertions(+), 33 deletions(-)

diff --git a/config.cpu.yaml b/config.cpu.yaml
index 304cb7d..6ceac91 100644
--- a/config.cpu.yaml
+++ b/config.cpu.yaml
@@ -46,6 +46,9 @@ embedding:
 
 llm:
   nc_texttotext:
+    # template:
+    # n_ctx:
+    # max_tokens:
 
   llama:
     # all options: https://python.langchain.com/api_reference/community/llms/langchain_community.llms.llamacpp.LlamaCpp.html
@@ -55,14 +58,12 @@ llm:
     max_tokens: 4096
     template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant, good at finding relevant context from documents to answer questions provided by the user. <|im_end|>\n<|im_start|> user\nUse the following documents as context to answer the question at the end. REMEMBER to excersice source critisicm as the documents are returned by a search provider that can return unrelated documents.\n\nSTART OF CONTEXT: \n{context} \n\nEND OF CONTEXT!\n\nIf you don't know the answer or are unsure, just say that you don't know, don't try to make up an answer. Don't mention the context in your answer but rather just answer the question directly. Detect the language of the question and make sure to use the same language that was used in the question to answer the question. Don't mention which language was used, but just answer the question directly in the same langauge. \nQuestion: {question} Let's think this step-by-step. \n<|im_end|>\n<|im_start|> assistant\n"
     no_ctx_template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant.<|im_end|>\n<|im_start|> user\n{question}<|im_end|>\n<|im_start|> assistant\n"
-    end_separator: "<|im_end|>"
 
   ctransformer:
     # all options: https://python.langchain.com/api_reference/community/llms/langchain_community.llms.ctransformers.CTransformers.html
     model: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf
     template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant, good at finding relevant context from documents to answer questions provided by the user. <|im_end|>\n<|im_start|> user\nUse the following documents as context to answer the question at the end. REMEMBER to excersice source critisicm as the documents are returned by a search provider that can return unrelated documents.\n\nSTART OF CONTEXT: \n{context} \n\nEND OF CONTEXT!\n\nIf you don't know the answer or are unsure, just say that you don't know, don't try to make up an answer. Don't mention the context in your answer but rather just answer the question directly.  Detect the language of the question and make sure to use the same language that was used in the question to answer the question. Don't mention which language was used, but just answer the question directly in the same langauge. \nQuestion: {question} Let's think this step-by-step. \n<|im_end|>\n<|im_start|> assistant\n"
     no_ctx_template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant.<|im_end|>\n<|im_start|> user\n{question}<|im_end|>\n<|im_start|> assistant\n"
-    end_separator: "<|im_end|>"
     config:
       context_length: 8192
       max_new_tokens: 4096
diff --git a/config.gpu.yaml b/config.gpu.yaml
index 16dcb01..a12fd1b 100644
--- a/config.gpu.yaml
+++ b/config.gpu.yaml
@@ -47,6 +47,9 @@ embedding:
 
 llm:
   nc_texttotext:
+    # template:
+    # n_ctx:
+    # max_tokens:
 
   llama:
     # all options: https://python.langchain.com/api_reference/community/llms/langchain_community.llms.llamacpp.LlamaCpp.html
@@ -56,7 +59,6 @@ llm:
     max_tokens: 4096
     template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant, good at finding relevant context from documents to answer questions provided by the user. <|im_end|>\n<|im_start|> user\nUse the following documents as context to answer the question at the end. REMEMBER to excersice source critisicm as the documents are returned by a search provider that can return unrelated documents.\n\nSTART OF CONTEXT: \n{context} \n\nEND OF CONTEXT!\n\nIf you don't know the answer or are unsure, just say that you don't know, don't try to make up an answer. Don't mention the context in your answer but rather just answer the question directly.  Detect the language of the question and make sure to use the same language that was used in the question to answer the question. Don't mention which language was used, but just answer the question directly in the same langauge. \nQuestion: {question} Let's think this step-by-step. \n<|im_end|>\n<|im_start|> assistant\n"
     no_ctx_template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant.<|im_end|>\n<|im_start|> user\n{question}<|im_end|>\n<|im_start|> assistant\n"
-    end_separator: "<|im_end|>"
     n_gpu_layers: -1
     model_kwargs:
       device: cuda
@@ -66,7 +68,6 @@ llm:
     model: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf
     template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant, good at finding relevant context from documents to answer questions provided by the user. <|im_end|>\n<|im_start|> user\nUse the following documents as context to answer the question at the end. REMEMBER to excersice source critisicm as the documents are returned by a search provider that can return unrelated documents.\n\nSTART OF CONTEXT: \n{context} \n\nEND OF CONTEXT!\n\nIf you don't know the answer or are unsure, just say that you don't know, don't try to make up an answer. Don't mention the context in your answer but rather just answer the question directly.  Detect the language of the question and make sure to use the same language that was used in the question to answer the question. Don't mention which language was used, but just answer the question directly in the same langauge. \nQuestion: {question} Let's think this step-by-step. \n<|im_end|>\n<|im_start|> assistant\n"
     no_ctx_template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant.<|im_end|>\n<|im_start|> user\n{question}<|im_end|>\n<|im_start|> assistant\n"
-    end_separator: "<|im_end|>"
     config:
       context_length: 8192
       max_new_tokens: 4096
diff --git a/context_chat_backend/chain/one_shot.py b/context_chat_backend/chain/one_shot.py
index c79f272..c387621 100644
--- a/context_chat_backend/chain/one_shot.py
+++ b/context_chat_backend/chain/one_shot.py
@@ -12,38 +12,25 @@
 from .query_proc import get_pruned_query
 from .types import ContextException, LLMOutput, ScopeType, SearchResult
 
-_LLM_TEMPLATE = '''Answer based only on this context and do not add any imaginative details. Make sure to use the same language as the question in your answer.
+_LLM_TEMPLATE = '''You're an AI assistant named Nextcloud Assistant, good at finding relevant context from documents to answer questions provided by the user.
+Use the following documents as context to answer the question at the end. REMEMBER to excersice source critisicm as the documents are returned by a search provider that can return unrelated documents.
+
+START OF CONTEXT:
 {context}
 
-{question}
-''' # noqa: E501
+END OF CONTEXT!
 
-logger = logging.getLogger('ccb.chain')
+If you don't know the answer or are unsure, just say that you don't know, don't try to make up an answer.
+Don't mention the context in your answer but rather just answer the question directly.
+Detect the language of the question and make sure to use the same language that was used in the question to answer the question.
+Don't mention which language was used, but just answer the question directly in the same langauge.
 
-# todo: remove this maybe
-def process_query(
-	user_id: str,
-	llm: LLM,
-	app_config: TConfig,
-	query: str,
-	no_ctx_template: str | None = None,
-	end_separator: str = '',
-):
-	"""
-	Raises
-	------
-	ValueError
-		If the context length is too small to fit the query
-	"""
-	stop = [end_separator] if end_separator else None
-	output = llm.invoke(
-		(query, get_pruned_query(llm, app_config, query, no_ctx_template, []))[no_ctx_template is not None],  # pyright: ignore[reportArgumentType]
-		stop=stop,
-		userid=user_id,
-	).strip()
+Question: {question}
 
-	return LLMOutput(output=output, sources=[])
+Let's think this step-by-step.
+''' # noqa: E501
 
+logger = logging.getLogger('ccb.chain')
 
 def process_context_query(
 	user_id: str,
@@ -55,7 +42,6 @@ def process_context_query(
 	scope_type: ScopeType | None = None,
 	scope_list: list[str] | None = None,
 	template: str | None = None,
-	end_separator: str = '',
 ):
 	"""
 	Raises
@@ -76,7 +62,6 @@ def process_context_query(
 
 	output = llm.invoke(
 		get_pruned_query(llm, app_config, query, template or _LLM_TEMPLATE, context_chunks),
-		stop=[end_separator],
 		userid=user_id,
 	).strip()
 	unique_sources = [SearchResult(
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 38f0df8..d8fee7c 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -647,7 +647,6 @@ def process_normal_task(
 	if task_input.get('scopeType') == 'none':
 		task_input['scopeType'] = None
 
-	# todo: document no template support
 	return exec_in_proc(target=process_context_query,
 		args=(
 			user_id,
@@ -658,6 +657,7 @@ def process_normal_task(
 			CONTEXT_LIMIT,
 			task_input.get('scopeType'),
 			task_input.get('scopeList'),
+			app_config.llm[1].get('template'),
 		)
 	)
 

From da680e3ddfd09ed20d0e4fa283995e636cc0180c Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Fri, 10 Apr 2026 12:08:40 +0530
Subject: [PATCH 62/96] fix: explicit check for non-None response

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/network_em.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/context_chat_backend/network_em.py b/context_chat_backend/network_em.py
index 8b85169..5ba8faf 100644
--- a/context_chat_backend/network_em.py
+++ b/context_chat_backend/network_em.py
@@ -119,7 +119,7 @@ def _get_embedding(self, input_: str | list[str], try_: int = 3) -> list[float]
 			raise e
 		except EmbeddingException as e:
 			try:
-				if e.response:
+				if e.response is not None:
 					err_msg = e.response.json().get('error', {}).get('message', '')
 					if err_msg == 'llama_decode returned -1':
 						# the document coult not be processed

From ecf07c4f3cf0d693074ee9ab236c1b6c584bf022 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 15 Apr 2026 12:05:02 +0530
Subject: [PATCH 63/96] fix: add default value of limit in search task type

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/controller.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 9c3812e..13fbb7c 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -67,16 +67,19 @@ def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 	try:
 		if enabled:
 			provider = TaskProcessingProvider(
-				id="context_chat-context_chat_search",
-				name="Context Chat",
-				task_type="context_chat:context_chat_search",
+				id='context_chat-context_chat_search',
+				name='Context Chat',
+				task_type='context_chat:context_chat_search',
 				expected_runtime=30,
+				input_shape_defaults={
+					'limit': 10,
+				},
 			)
 			nc.providers.task_processing.register(provider)
 			provider = TaskProcessingProvider(
-				id="context_chat-context_chat",
-				name="Context Chat",
-				task_type="context_chat:context_chat",
+				id='context_chat-context_chat',
+				name='Context Chat',
+				task_type='context_chat:context_chat',
 				expected_runtime=30,
 			)
 			nc.providers.task_processing.register(provider)

From 531e58105951f0d43f5ab432665e08850b4f0fb6 Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 15 Apr 2026 10:24:05 +0200
Subject: [PATCH 64/96] fix(bg_threads): Poll app enabled state every 30s in
 all threads

needed because enabled handler
---
 context_chat_backend/controller.py   | 14 ++++----
 context_chat_backend/task_fetcher.py | 51 ++++++++++++++++++++++------
 2 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 13fbb7c..007e945 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -35,7 +35,7 @@
 from .models.types import LlmException
 from nc_py_api.ex_app import AppAPIAuthMiddleware
 from .utils import JSONResponse, exec_in_proc
-from .task_fetcher import start_bg_threads, trigger_handler, wait_for_bg_threads
+from .task_fetcher import THREAD_STOP_EVENT, start_bg_threads, trigger_handler, wait_for_bg_threads
 from .vectordb.service import count_documents_by_provider
 
 # setup
@@ -84,7 +84,11 @@ def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 			)
 			nc.providers.task_processing.register(provider)
 			app_enabled.set()
-			start_bg_threads(app_config, app_enabled)
+			if THREAD_STOP_EVENT.is_set():
+				# If the threads were previously stopped, we start them again
+				# otherwise the lifecycle handler has already started them
+				start_bg_threads(app_config)
+				THREAD_STOP_EVENT.clear()
 		else:
 			app_enabled.clear()
 			wait_for_bg_threads()
@@ -99,11 +103,9 @@ def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 @asynccontextmanager
 async def lifespan(app: FastAPI):
 	set_handlers(app, enabled_handler, models_to_fetch=models_to_fetch, trigger_handler=trigger_handler)
+	start_bg_threads(app_config)
 	nc = NextcloudApp()
-	if nc.enabled_state:
-		app_enabled.set()
-		start_bg_threads(app_config, app_enabled)
-	logger.info(f'App enable state at startup: {app_enabled.is_set()}')
+	logger.info(f'App enable state at startup: {nc.enabled_state}')
 	yield
 	vectordb_loader.offload()
 	wait_for_bg_threads()
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index d8fee7c..b40ea2a 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -10,7 +10,7 @@
 from contextlib import suppress
 from enum import Enum
 from threading import Event, Thread
-from time import sleep
+from time import sleep, time
 from typing import Any
 
 import niquests
@@ -69,7 +69,7 @@ class ThreadType(Enum):
 	REQUEST_PROCESSING = 'request_processing'
 
 
-def files_indexing_thread(app_config: TConfig, app_enabled: Event) -> None:
+def files_indexing_thread(app_config: TConfig) -> None:
 	try:
 		network_em = NetworkEmbeddings(app_config)
 		vectordb_loader = VectorDBLoader(app_config)
@@ -137,17 +137,28 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 	)[app_config.file_parsing_cpu_count == -1]
 	LOGGER.info(f'Using {file_parsing_cpu_count} parallel file parsing workers')
 
+	nc = NextcloudApp()
+	last_enabled_check = time()
+	enabled_state = nc.enabled_state
 	while True:
 		if THREAD_STOP_EVENT.is_set():
 			LOGGER.info('Files indexing thread is stopping due to stop event being set')
 			return
 
+		if time() - last_enabled_check > 30:  # check enabled state every 30 seconds
+			enabled_state = nc.enabled_state
+			last_enabled_check = time()
+
+		if not enabled_state:
+			LOGGER.info('App is disabled, files indexing thread will sleep until next enabled state check')
+			sleep(POLLING_COOLDOWN)
+			continue
+
 		try:
 			if not network_em.check_connection(ThreadType.FILES_INDEXING.value):
 				sleep(POLLING_COOLDOWN)
 				continue
 
-			nc = NextcloudApp()
 			q_items_res = nc.ocs(
 				'GET',
 				'/ocs/v2.php/apps/context_chat/queues/documents',
@@ -273,20 +284,30 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 
 
 
-def updates_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
+def updates_processing_thread(app_config: TConfig) -> None:
 	try:
 		vectordb_loader = VectorDBLoader(app_config)
 	except LoaderException as e:
 		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
 		return
 
+	nc = NextcloudApp()
+	enabled_state = nc.enabled_state
+	last_enabled_check = time()
 	while True:
 		if THREAD_STOP_EVENT.is_set():
 			LOGGER.info('Updates processing thread is stopping due to stop event being set')
 			return
 
+		if time() - last_enabled_check > 30:  # check enabled state every 30 seconds
+			enabled_state = nc.enabled_state
+			last_enabled_check = time()
+
+		if not enabled_state:
+			sleep(POLLING_COOLDOWN)
+			continue
+
 		try:
-			nc = NextcloudApp()
 			q_items_res = nc.ocs(
 				'GET',
 				'/ocs/v2.php/apps/context_chat/queues/actions',
@@ -454,7 +475,7 @@ def resolve_scope_list(source_ids: list[str], userId: str) -> list[str]:
 	return ScopeList.model_validate(data).source_ids
 
 
-def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
+def request_processing_thread(app_config: TConfig) -> None:
 	LOGGER.info('Starting request processing thread')
 
 	try:
@@ -466,6 +487,8 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 		return
 
 	nc = NextcloudApp()
+	enabled_state = nc.enabled_state
+	last_enabled_check = time()
 	llm: LLM = llm_loader.load()
 
 	while True:
@@ -477,6 +500,14 @@ def request_processing_thread(app_config: TConfig, app_enabled: Event) -> None:
 			sleep(POLLING_COOLDOWN)
 			continue
 
+		if time() - last_enabled_check > 30:  # check enabled state every 30 seconds
+			enabled_state = nc.enabled_state
+			last_enabled_check = time()
+
+		if not enabled_state:
+			sleep(POLLING_COOLDOWN)
+			continue
+
 		try:
 			# Fetch pending task
 			try:
@@ -695,7 +726,7 @@ def process_search_task(
 	)
 
 
-def start_bg_threads(app_config: TConfig, app_enabled: Event):
+def start_bg_threads(app_config: TConfig):
 	if APP_ROLE == AppRole.INDEXING or APP_ROLE == AppRole.NORMAL:
 		if (
 			ThreadType.FILES_INDEXING in THREADS
@@ -707,12 +738,12 @@ def start_bg_threads(app_config: TConfig, app_enabled: Event):
 		THREAD_STOP_EVENT.clear()
 		THREADS[ThreadType.FILES_INDEXING] = Thread(
 			target=files_indexing_thread,
-			args=(app_config, app_enabled),
+			args=(app_config,),
 			name='FilesIndexingThread',
 		)
 		THREADS[ThreadType.UPDATES_PROCESSING] = Thread(
 			target=updates_processing_thread,
-			args=(app_config, app_enabled),
+			args=(app_config,),
 			name='UpdatesProcessingThread',
 		)
 		THREADS[ThreadType.FILES_INDEXING].start()
@@ -726,7 +757,7 @@ def start_bg_threads(app_config: TConfig, app_enabled: Event):
 		THREAD_STOP_EVENT.clear()
 		THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
 			target=request_processing_thread,
-			args=(app_config, app_enabled),
+			args=(app_config,),
 			name='RequestProcessingThread',
 		)
 		THREADS[ThreadType.REQUEST_PROCESSING].start()

From 7337d1710dae3b422f5a5ab3d981f7ea0b602856 Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 15 Apr 2026 11:41:28 +0200
Subject: [PATCH 65/96] fix(app_enabled): centralize app_enabled check

to reduce requests to nextcloud server

Signed-off-by: Marcel Klehr <mklehr@gmx.net>
---
 context_chat_backend/controller.py   | 26 +++++++++++++-----
 context_chat_backend/task_fetcher.py | 40 ++++++++--------------------
 context_chat_backend/utils.py        |  4 +--
 3 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 007e945..a719f11 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -2,6 +2,8 @@
 # SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
+import time
+
 from nc_py_api.ex_app.providers.task_processing import TaskProcessingProvider
 
 # isort: off
@@ -61,7 +63,20 @@
 		'revision': '607a30d783dfa663caf39e06633721c8d4cfcd7e',
 	}
 } if __download_models_from_hf else {}
+
+
 app_enabled = threading.Event()
+last_enabled_check: int|None = None
+def get_enabled_state() -> bool:
+	global last_enabled_check
+	if last_enabled_check is None or time.time() - last_enabled_check > 30:
+		nc = NextcloudApp()
+		if nc.enabled_state:
+			app_enabled.set()
+		else:
+			app_enabled.clear()
+		last_enabled_check = time.time()
+	return app_enabled.is_set()
 
 def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 	try:
@@ -87,7 +102,7 @@ def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 			if THREAD_STOP_EVENT.is_set():
 				# If the threads were previously stopped, we start them again
 				# otherwise the lifecycle handler has already started them
-				start_bg_threads(app_config)
+				start_bg_threads(app_config, get_enabled_state)
 				THREAD_STOP_EVENT.clear()
 		else:
 			app_enabled.clear()
@@ -103,9 +118,8 @@ def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 @asynccontextmanager
 async def lifespan(app: FastAPI):
 	set_handlers(app, enabled_handler, models_to_fetch=models_to_fetch, trigger_handler=trigger_handler)
-	start_bg_threads(app_config)
-	nc = NextcloudApp()
-	logger.info(f'App enable state at startup: {nc.enabled_state}')
+	start_bg_threads(app_config, get_enabled_state)
+	logger.info(f'App enable state at startup: {get_enabled_state()}')
 	yield
 	vectordb_loader.offload()
 	wait_for_bg_threads()
@@ -192,7 +206,7 @@ def decorator(func: Callable):
 		@wraps(func)
 		def wrapper(*args, **kwargs):
 			disable_aaa = app.extra['CONFIG'].disable_aaa
-			if not disable_aaa and not app_enabled.is_set():
+			if not disable_aaa and not get_enabled_state():
 				return JSONResponse('Context Chat is disabled, enable it from AppAPI to use it.', 503)
 
 			return func(*args, **kwargs)
@@ -213,7 +227,7 @@ def _(request: Request):
 
 @app.get('/enabled')
 def _():
-	return JSONResponse(content={'enabled': app_enabled.is_set()}, status_code=200)
+	return JSONResponse(content={'enabled': get_enabled_state()}, status_code=200)
 
 
 @app.post('/countIndexedDocuments')
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index b40ea2a..6cce556 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -10,7 +10,7 @@
 from contextlib import suppress
 from enum import Enum
 from threading import Event, Thread
-from time import sleep, time
+from time import sleep
 from typing import Any
 
 import niquests
@@ -69,7 +69,7 @@ class ThreadType(Enum):
 	REQUEST_PROCESSING = 'request_processing'
 
 
-def files_indexing_thread(app_config: TConfig) -> None:
+def files_indexing_thread(app_config: TConfig, get_enabled_state) -> None:
 	try:
 		network_em = NetworkEmbeddings(app_config)
 		vectordb_loader = VectorDBLoader(app_config)
@@ -138,18 +138,12 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 	LOGGER.info(f'Using {file_parsing_cpu_count} parallel file parsing workers')
 
 	nc = NextcloudApp()
-	last_enabled_check = time()
-	enabled_state = nc.enabled_state
 	while True:
 		if THREAD_STOP_EVENT.is_set():
 			LOGGER.info('Files indexing thread is stopping due to stop event being set')
 			return
 
-		if time() - last_enabled_check > 30:  # check enabled state every 30 seconds
-			enabled_state = nc.enabled_state
-			last_enabled_check = time()
-
-		if not enabled_state:
+		if not get_enabled_state():
 			LOGGER.info('App is disabled, files indexing thread will sleep until next enabled state check')
 			sleep(POLLING_COOLDOWN)
 			continue
@@ -284,7 +278,7 @@ def _load_sources(source_items: Mapping[int, SourceItem | ReceivedFileItem]) ->
 
 
 
-def updates_processing_thread(app_config: TConfig) -> None:
+def updates_processing_thread(app_config: TConfig, get_enabled_state) -> None:
 	try:
 		vectordb_loader = VectorDBLoader(app_config)
 	except LoaderException as e:
@@ -292,18 +286,12 @@ def updates_processing_thread(app_config: TConfig) -> None:
 		return
 
 	nc = NextcloudApp()
-	enabled_state = nc.enabled_state
-	last_enabled_check = time()
 	while True:
 		if THREAD_STOP_EVENT.is_set():
 			LOGGER.info('Updates processing thread is stopping due to stop event being set')
 			return
 
-		if time() - last_enabled_check > 30:  # check enabled state every 30 seconds
-			enabled_state = nc.enabled_state
-			last_enabled_check = time()
-
-		if not enabled_state:
+		if not get_enabled_state():
 			sleep(POLLING_COOLDOWN)
 			continue
 
@@ -475,7 +463,7 @@ def resolve_scope_list(source_ids: list[str], userId: str) -> list[str]:
 	return ScopeList.model_validate(data).source_ids
 
 
-def request_processing_thread(app_config: TConfig) -> None:
+def request_processing_thread(app_config: TConfig, get_enabled_state) -> None:
 	LOGGER.info('Starting request processing thread')
 
 	try:
@@ -487,8 +475,6 @@ def request_processing_thread(app_config: TConfig) -> None:
 		return
 
 	nc = NextcloudApp()
-	enabled_state = nc.enabled_state
-	last_enabled_check = time()
 	llm: LLM = llm_loader.load()
 
 	while True:
@@ -500,11 +486,7 @@ def request_processing_thread(app_config: TConfig) -> None:
 			sleep(POLLING_COOLDOWN)
 			continue
 
-		if time() - last_enabled_check > 30:  # check enabled state every 30 seconds
-			enabled_state = nc.enabled_state
-			last_enabled_check = time()
-
-		if not enabled_state:
+		if not get_enabled_state():
 			sleep(POLLING_COOLDOWN)
 			continue
 
@@ -726,7 +708,7 @@ def process_search_task(
 	)
 
 
-def start_bg_threads(app_config: TConfig):
+def start_bg_threads(app_config: TConfig, get_enabled_state):
 	if APP_ROLE == AppRole.INDEXING or APP_ROLE == AppRole.NORMAL:
 		if (
 			ThreadType.FILES_INDEXING in THREADS
@@ -738,12 +720,12 @@ def start_bg_threads(app_config: TConfig):
 		THREAD_STOP_EVENT.clear()
 		THREADS[ThreadType.FILES_INDEXING] = Thread(
 			target=files_indexing_thread,
-			args=(app_config,),
+			args=(app_config,get_enabled_state),
 			name='FilesIndexingThread',
 		)
 		THREADS[ThreadType.UPDATES_PROCESSING] = Thread(
 			target=updates_processing_thread,
-			args=(app_config,),
+			args=(app_config,get_enabled_state),
 			name='UpdatesProcessingThread',
 		)
 		THREADS[ThreadType.FILES_INDEXING].start()
@@ -757,7 +739,7 @@ def start_bg_threads(app_config: TConfig):
 		THREAD_STOP_EVENT.clear()
 		THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
 			target=request_processing_thread,
-			args=(app_config,),
+			args=(app_config,get_enabled_state),
 			name='RequestProcessingThread',
 		)
 		THREADS[ThreadType.REQUEST_PROCESSING].start()
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index c793978..2e82353 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -9,6 +9,7 @@
 import os
 import signal
 import sys
+import time
 import traceback
 from collections.abc import Callable
 from contextlib import suppress
@@ -18,7 +19,6 @@
 from typing import Any, TypeGuard, TypeVar
 
 from fastapi.responses import JSONResponse as FastAPIJSONResponse
-
 from .types import AppRole, TConfig, TEmbeddingAuthApiKey, TEmbeddingAuthBasic, TEmbeddingConfig
 
 T = TypeVar('T')
@@ -313,4 +313,4 @@ def get_app_role() -> AppRole:
 	if role not in ['indexing', 'rp']:
 		_logger.warning(f'Invalid app role: {role}, defaulting to all roles')
 		return AppRole.NORMAL
-	return AppRole(role)
+	return AppRole(role)
\ No newline at end of file

From 7c1cf456ca53aa15e69617b4e3f319769edbf3b7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 15 Apr 2026 10:53:56 +0000
Subject: [PATCH 66/96] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 context_chat_backend/utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index 2e82353..ca7a486 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -9,7 +9,6 @@
 import os
 import signal
 import sys
-import time
 import traceback
 from collections.abc import Callable
 from contextlib import suppress
@@ -313,4 +312,4 @@ def get_app_role() -> AppRole:
 	if role not in ['indexing', 'rp']:
 		_logger.warning(f'Invalid app role: {role}, defaulting to all roles')
 		return AppRole.NORMAL
-	return AppRole(role)
\ No newline at end of file
+	return AppRole(role)

From 5e9eb76e2b9c63a31ecabf5a1bb96e107433bd2f Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Mon, 20 Apr 2026 15:30:04 +0530
Subject: [PATCH 67/96] pyright and ruff fixes

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/controller.py | 2 +-
 context_chat_backend/utils.py      | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index a719f11..bcf48e0 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -66,7 +66,7 @@
 
 
 app_enabled = threading.Event()
-last_enabled_check: int|None = None
+last_enabled_check: float | None = None
 def get_enabled_state() -> bool:
 	global last_enabled_check
 	if last_enabled_check is None or time.time() - last_enabled_check > 30:
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index ca7a486..c793978 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -18,6 +18,7 @@
 from typing import Any, TypeGuard, TypeVar
 
 from fastapi.responses import JSONResponse as FastAPIJSONResponse
+
 from .types import AppRole, TConfig, TEmbeddingAuthApiKey, TEmbeddingAuthBasic, TEmbeddingConfig
 
 T = TypeVar('T')

From 04d7fe1c56366e1dbaad83a2095e868a97ee6492 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Mon, 20 Apr 2026 21:53:52 +0530
Subject: [PATCH 68/96] fix(k8s): do not register task proc trigger endpoint

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/controller.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index bcf48e0..f65f19b 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -8,7 +8,7 @@
 
 # isort: off
 from .chain.types import ContextException
-from .types import LoaderException, EmbeddingException
+from .types import AppRole, LoaderException, EmbeddingException
 from .vectordb.types import DbException, SafeDbException
 from .setup_functions import ensure_config_file, repair_run, setup_env_vars
 
@@ -36,7 +36,7 @@
 from .dyn_loader import VectorDBLoader
 from .models.types import LlmException
 from nc_py_api.ex_app import AppAPIAuthMiddleware
-from .utils import JSONResponse, exec_in_proc
+from .utils import JSONResponse, exec_in_proc, get_app_role
 from .task_fetcher import THREAD_STOP_EVENT, start_bg_threads, trigger_handler, wait_for_bg_threads
 from .vectordb.service import count_documents_by_provider
 
@@ -117,7 +117,13 @@ def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-	set_handlers(app, enabled_handler, models_to_fetch=models_to_fetch, trigger_handler=trigger_handler)
+	app_role = get_app_role()
+	if app_role == AppRole.NORMAL:
+		set_handlers(app, enabled_handler, models_to_fetch=models_to_fetch, trigger_handler=trigger_handler)
+	else:
+		# k8s' rp role pulls tasks
+		set_handlers(app, enabled_handler, models_to_fetch=models_to_fetch)
+
 	start_bg_threads(app_config, get_enabled_state)
 	logger.info(f'App enable state at startup: {get_enabled_state()}')
 	yield

From 2fbf9fc47de33015e0e3a21a2b3d0f07869d366b Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Mon, 20 Apr 2026 21:54:40 +0530
Subject: [PATCH 69/96] fix(k8s): do not start internal pgsql in k8s env

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 dockerfile_scripts/pgsql/setup.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dockerfile_scripts/pgsql/setup.sh b/dockerfile_scripts/pgsql/setup.sh
index cee4295..7578ed8 100755
--- a/dockerfile_scripts/pgsql/setup.sh
+++ b/dockerfile_scripts/pgsql/setup.sh
@@ -18,7 +18,7 @@ fi
 # Check if EXTERNAL_DB is set
 if [ -n "${EXTERNAL_DB}" ]; then
     if [[ "$EXTERNAL_DB" != "postgresql+psycopg://"* ]]; then
-        echo "EXTERNAL_DB must be a PostgreSQL URL and start with 'postgresql+psycopg://'"
+        printf "%s\n" "EXTERNAL_DB must be a PostgreSQL URL and start with 'postgresql+psycopg://'" >&2
         exit 1
     fi
 
@@ -31,6 +31,11 @@ if [ -n "${EXTERNAL_DB}" ]; then
     exit 0
 fi
 
+if [[ -n "${APP_ROLE}" && "$APP_ROLE" != "normal" && "$APP_ROLE" != "" ]]; then
+    printf "%s\n" "Refusing to start the internal postgresql server in Kubernetes environment, use an external database through the EXTERNAL_DB env var." >&2
+    exit 1
+fi
+
 # Ensure the directory exists and has the correct permissions
 mkdir -p "$DATA_DIR"
 chmod +rx "${APP_PERSISTENT_STORAGE:-persistent_storage}"

From cf98ab2ec33cc1f28fd008ba67196a7c86e8a0be Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 21 Apr 2026 15:38:13 +0530
Subject: [PATCH 70/96] fix(k8s): log exclusively to stderr for k8s env

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/controller.py   | 11 +++++--
 context_chat_backend/logger.py       |  1 +
 context_chat_backend/task_fetcher.py | 14 +++++----
 context_chat_backend/utils.py        |  5 ++++
 logger_config.k8s.yaml               | 43 ++++++++++++++++++++++++++++
 main.py                              | 21 +++++++++-----
 main_em.py                           | 21 +++++++++-----
 7 files changed, 94 insertions(+), 22 deletions(-)
 create mode 100644 logger_config.k8s.yaml

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index f65f19b..02db402 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -36,7 +36,7 @@
 from .dyn_loader import VectorDBLoader
 from .models.types import LlmException
 from nc_py_api.ex_app import AppAPIAuthMiddleware
-from .utils import JSONResponse, exec_in_proc, get_app_role
+from .utils import JSONResponse, exec_in_proc, get_app_role, is_k8s_env
 from .task_fetcher import THREAD_STOP_EVENT, start_bg_threads, trigger_handler, wait_for_bg_threads
 from .vectordb.service import count_documents_by_provider
 
@@ -134,6 +134,7 @@ async def lifespan(app: FastAPI):
 app = FastAPI(debug=app_config.debug, lifespan=lifespan)  # pyright: ignore[reportArgumentType]
 
 app.extra['CONFIG'] = app_config
+k8s_env = is_k8s_env()
 
 
 # loaders
@@ -244,7 +245,13 @@ def _():
 
 
 @app.get('/downloadLogs')
-def download_logs() -> FileResponse:
+def download_logs():
+	if k8s_env:
+		return JSONResponse(
+			'Download of logs is not supported in Kubernetes environment. Use the standard logging infrastructure.',
+			status_code=400,
+		)
+
 	with tempfile.NamedTemporaryFile('wb', delete=False) as tmp:
 		with zipfile.ZipFile(tmp, mode='w', compression=zipfile.ZIP_DEFLATED) as zip_file:
 			files = os.listdir(os.path.join(persistent_storage(), 'logs'))
diff --git a/context_chat_backend/logger.py b/context_chat_backend/logger.py
index 79e99af..25fb161 100644
--- a/context_chat_backend/logger.py
+++ b/context_chat_backend/logger.py
@@ -51,6 +51,7 @@ def __init__(
 		self,
 		*,
 		fmt_keys: dict[str, str] | None = None,
+		use_colors: bool = False,
 	):
 		super().__init__()
 		self.fmt_keys = fmt_keys if fmt_keys is not None else {}
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 6cce556..dd6f296 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -282,7 +282,7 @@ def updates_processing_thread(app_config: TConfig, get_enabled_state) -> None:
 	try:
 		vectordb_loader = VectorDBLoader(app_config)
 	except LoaderException as e:
-		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
+		LOGGER.error('Error initializing vector DB loader, updates processing thread will not start:', exc_info=e)
 		return
 
 	nc = NextcloudApp()
@@ -292,6 +292,7 @@ def updates_processing_thread(app_config: TConfig, get_enabled_state) -> None:
 			return
 
 		if not get_enabled_state():
+			LOGGER.info('App is disabled, updates processing thread will sleep until next enabled state check')
 			sleep(POLLING_COOLDOWN)
 			continue
 
@@ -471,7 +472,7 @@ def request_processing_thread(app_config: TConfig, get_enabled_state) -> None:
 		vectordb_loader = VectorDBLoader(app_config)
 		llm_loader = LLMModelLoader(app_config)
 	except LoaderException as e:
-		LOGGER.error('Error initializing vector DB loader, files indexing thread will not start:', exc_info=e)
+		LOGGER.error('Error initializing vector DB loader, request processing thread will not start:', exc_info=e)
 		return
 
 	nc = NextcloudApp()
@@ -482,15 +483,16 @@ def request_processing_thread(app_config: TConfig, get_enabled_state) -> None:
 			LOGGER.info('Request processing thread is stopping due to stop event being set')
 			return
 
-		if not network_em.check_connection(ThreadType.REQUEST_PROCESSING.value):
-			sleep(POLLING_COOLDOWN)
-			continue
-
 		if not get_enabled_state():
+			LOGGER.info('App is disabled, request processing thread will sleep until next enabled state check')
 			sleep(POLLING_COOLDOWN)
 			continue
 
 		try:
+			if not network_em.check_connection(ThreadType.REQUEST_PROCESSING.value):
+				sleep(POLLING_COOLDOWN)
+				continue
+
 			# Fetch pending task
 			try:
 				response = nc.providers.task_processing.next_task(
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index c793978..d1e6210 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -314,3 +314,8 @@ def get_app_role() -> AppRole:
 		_logger.warning(f'Invalid app role: {role}, defaulting to all roles')
 		return AppRole.NORMAL
 	return AppRole(role)
+
+
+def is_k8s_env():
+	role = get_app_role()
+	return role == AppRole.NORMAL
diff --git a/logger_config.k8s.yaml b/logger_config.k8s.yaml
new file mode 100644
index 0000000..6d5c729
--- /dev/null
+++ b/logger_config.k8s.yaml
@@ -0,0 +1,43 @@
+#
+# SPDX-FileCopyrightText: 2022 MCODING, LLC
+# SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
+# SPDX-License-Identifier: AGPL-3.0-or-later
+#
+
+version: 1
+disable_existing_loggers: false
+
+formatters:
+  json:
+    (): context_chat_backend.logger.JSONFormatter
+    fmt_keys:
+      timestamp: timestamp
+      level: levelname
+      logger: name
+      message: message
+      filename: filename
+      function: funcName
+      line: lineno
+      thread_name: threadName
+      pid: process
+
+
+handlers:
+  stderr:
+    class: logging.StreamHandler
+    level: DEBUG
+    formatter: json
+    stream: ext://sys.stderr
+
+
+loggers:
+  root:
+    level: WARNING
+    handlers:
+    - stderr
+
+  ccb:
+    level: WARNING
+    handlers:
+    - stderr
+    propagate: false
diff --git a/main.py b/main.py
index 076b7db..8a2beda 100755
--- a/main.py
+++ b/main.py
@@ -15,9 +15,10 @@
 from context_chat_backend.types import TConfig  # isort: skip
 from context_chat_backend.controller import app  # isort: skip
 from context_chat_backend.logger import get_logging_config, setup_logging  # isort: skip
-from context_chat_backend.utils import redact_config  # isort: skip
+from context_chat_backend.utils import is_k8s_env, redact_config  # isort: skip
 
 LOGGER_CONFIG_NAME = 'logger_config.yaml'
+LOGGER_K8S_CONFIG_NAME = 'logger_config.k8s.yaml'
 
 def _setup_log_levels(debug: bool):
 	'''
@@ -46,7 +47,8 @@ def _setup_log_levels(debug: bool):
 
 
 if __name__ == '__main__':
-	logging_config = get_logging_config(LOGGER_CONFIG_NAME)
+	k8s_env = is_k8s_env()
+	logging_config = get_logging_config(LOGGER_K8S_CONFIG_NAME if k8s_env else LOGGER_CONFIG_NAME)
 	setup_logging(logging_config)
 	app_config: TConfig = app.extra['CONFIG']
 	_setup_log_levels(app_config.debug)
@@ -66,11 +68,16 @@ def _setup_log_levels(debug: bool):
 	print('App config:\n' + redact_config(app_config).model_dump_json(indent=2), flush=True)
 
 	uv_log_config = uvicorn.config.LOGGING_CONFIG  # pyright: ignore[reportAttributeAccessIssue]
-	uv_log_config['formatters']['json'] = logging_config['formatters']['json']
-	uv_log_config['handlers']['file_json'] = logging_config['handlers']['file_json']
+	use_colors = False if k8s_env else (app_config.use_colors and getenv('CI', 'false') == 'false')
 
-	uv_log_config['loggers']['uvicorn']['handlers'].append('file_json')
-	uv_log_config['loggers']['uvicorn.access']['handlers'].append('file_json')
+	if k8s_env:
+		uv_log_config['formatters']['default'] = logging_config['formatters']['json']
+		uv_log_config['formatters']['access'] = logging_config['formatters']['json']
+	else:
+		uv_log_config['formatters']['json'] = logging_config['formatters']['json']
+		uv_log_config['handlers']['file_json'] = logging_config['handlers']['file_json']
+		uv_log_config['loggers']['uvicorn']['handlers'].append('file_json')
+		uv_log_config['loggers']['uvicorn.access']['handlers'].append('file_json')
 
 	run_app(
 		uvicorn_app=app,
@@ -78,7 +85,7 @@ def _setup_log_levels(debug: bool):
 		interface='asgi3',
 		log_config=uv_log_config,
 		log_level=app_config.uvicorn_log_level,
-		use_colors=bool(app_config.use_colors and getenv('CI', 'false') == 'false'),
+		use_colors=use_colors,
 		# limit_concurrency=10,
 		# backlog=20,
 		timeout_keep_alive=120,
diff --git a/main_em.py b/main_em.py
index b7d5a93..17d9f9a 100755
--- a/main_em.py
+++ b/main_em.py
@@ -16,10 +16,11 @@
 from context_chat_backend.config_parser import get_config  # isort: skip
 from context_chat_backend.logger import get_logging_config, setup_logging  # isort: skip
 from context_chat_backend.setup_functions import ensure_config_file, setup_env_vars  # isort: skip
-from context_chat_backend.utils import redact_config	# isort: skip
+from context_chat_backend.utils import is_k8s_env, redact_config	# isort: skip
 
 
 LOGGER_CONFIG_NAME = 'logger_config_em.yaml'
+LOGGER_K8S_CONFIG_NAME = 'logger_config.k8s.yaml'
 STARTUP_CHECK_SEC = 10
 MAX_TRIES = 180  # 180*10 secs = 30 minutes max
 
@@ -108,7 +109,8 @@ def _wait_main_app_enabled() -> None:
 	# in local embedding server config
 	print('Embedder config:\n' + redact_config(em_conf).model_dump_json(indent=2), flush=True)
 
-	logging_config = get_logging_config(LOGGER_CONFIG_NAME)
+	k8s_env = is_k8s_env()
+	logging_config = get_logging_config(LOGGER_K8S_CONFIG_NAME if k8s_env else LOGGER_CONFIG_NAME)
 	setup_logging(logging_config)
 	logger = logging.getLogger('emserver')
 	if app_config.debug:
@@ -158,11 +160,16 @@ def _wait_main_app_enabled() -> None:
 	)
 
 	uv_log_config = uvicorn.config.LOGGING_CONFIG  # pyright: ignore[reportAttributeAccessIssue]
-	uv_log_config['formatters']['json'] = logging_config['formatters']['json']
-	uv_log_config['handlers']['file_json'] = logging_config['handlers']['file_json']
+	use_colors = False if k8s_env else (app_config.use_colors and os.getenv('CI', 'false') == 'false')
 
-	uv_log_config['loggers']['uvicorn']['handlers'].append('file_json')
-	uv_log_config['loggers']['uvicorn.access']['handlers'].append('file_json')
+	if k8s_env:
+		uv_log_config['formatters']['default'] = logging_config['formatters']['json']
+		uv_log_config['formatters']['access'] = logging_config['formatters']['json']
+	else:
+		uv_log_config['formatters']['json'] = logging_config['formatters']['json']
+		uv_log_config['handlers']['file_json'] = logging_config['handlers']['file_json']
+		uv_log_config['loggers']['uvicorn']['handlers'].append('file_json')
+		uv_log_config['loggers']['uvicorn.access']['handlers'].append('file_json')
 
 	uvicorn.run(
 		# todo: use string import of the app
@@ -173,6 +180,6 @@ def _wait_main_app_enabled() -> None:
 		interface='asgi3',
 		log_config=uv_log_config,
 		log_level=app_config.uvicorn_log_level,
-		use_colors=bool(app_config.use_colors and os.getenv('CI', 'false') == 'false'),
+		use_colors=use_colors,
 		workers=em_conf.workers,
 	)

From 6e7f20b8cb19e0d49172764f0aa95c86e2fde8af Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 21 Apr 2026 14:22:50 +0530
Subject: [PATCH 71/96] chore: separate out updates processing in an app role

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 appinfo/info.xml                     |  6 +++++
 context_chat_backend/task_fetcher.py | 34 +++++++++++++++++-----------
 context_chat_backend/types.py        |  1 +
 context_chat_backend/utils.py        |  2 +-
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/appinfo/info.xml b/appinfo/info.xml
index 30194ba..879524d 100644
--- a/appinfo/info.xml
+++ b/appinfo/info.xml
@@ -89,6 +89,12 @@ Setup background job workers as described here: https://docs.nextcloud.com/serve
 				<env>APP_ROLE=rp</env>
 				<expose>true</expose>
 			</role>
+			<role>
+				<name>up</name>
+				<display-name>Metadata Updates Processing Mode</display-name>
+				<env>APP_ROLE=up</env>
+				<expose>false</expose>
+			</role>
 			<role>
 				<name>indexing</name>
 				<display-name>Indexing Mode</display-name>
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index dd6f296..81307d1 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -711,34 +711,37 @@ def process_search_task(
 
 
 def start_bg_threads(app_config: TConfig, get_enabled_state):
+	THREAD_STOP_EVENT.clear()
+
 	if APP_ROLE == AppRole.INDEXING or APP_ROLE == AppRole.NORMAL:
-		if (
-			ThreadType.FILES_INDEXING in THREADS
-			or ThreadType.UPDATES_PROCESSING in THREADS
-		):
-			LOGGER.info('Background threads already running, skipping start')
+		if ThreadType.FILES_INDEXING in THREADS:
+			LOGGER.info('Indexing background threads are already up, skipping start')
 			return
 
-		THREAD_STOP_EVENT.clear()
 		THREADS[ThreadType.FILES_INDEXING] = Thread(
 			target=files_indexing_thread,
 			args=(app_config,get_enabled_state),
 			name='FilesIndexingThread',
 		)
+		THREADS[ThreadType.FILES_INDEXING].start()
+
+	if APP_ROLE == AppRole.UP or APP_ROLE == AppRole.NORMAL:
+		if ThreadType.UPDATES_PROCESSING in THREADS:
+			LOGGER.info('Updates processing background threads are already up, skipping start')
+			return
+
 		THREADS[ThreadType.UPDATES_PROCESSING] = Thread(
 			target=updates_processing_thread,
 			args=(app_config,get_enabled_state),
 			name='UpdatesProcessingThread',
 		)
-		THREADS[ThreadType.FILES_INDEXING].start()
 		THREADS[ThreadType.UPDATES_PROCESSING].start()
 
 	if APP_ROLE == AppRole.RP or APP_ROLE == AppRole.NORMAL:
 		if ThreadType.REQUEST_PROCESSING in THREADS:
-			LOGGER.info('Background threads already running, skipping start')
+			LOGGER.info('Request processing background threads are already up, skipping start')
 			return
 
-		THREAD_STOP_EVENT.clear()
 		THREADS[ThreadType.REQUEST_PROCESSING] = Thread(
 			target=request_processing_thread,
 			args=(app_config,get_enabled_state),
@@ -748,20 +751,25 @@ def start_bg_threads(app_config: TConfig, get_enabled_state):
 
 
 def wait_for_bg_threads():
+	THREAD_STOP_EVENT.set()
+
 	if APP_ROLE == AppRole.INDEXING or APP_ROLE == AppRole.NORMAL:
-		if (ThreadType.FILES_INDEXING not in THREADS or ThreadType.UPDATES_PROCESSING not in THREADS):
+		if ThreadType.FILES_INDEXING not in THREADS:
 			return
 
-		THREAD_STOP_EVENT.set()
 		THREADS[ThreadType.FILES_INDEXING].join()
-		THREADS[ThreadType.UPDATES_PROCESSING].join()
 		THREADS.pop(ThreadType.FILES_INDEXING)
+
+	if APP_ROLE == AppRole.UP or APP_ROLE == AppRole.NORMAL:
+		if ThreadType.UPDATES_PROCESSING not in THREADS:
+			return
+
+		THREADS[ThreadType.UPDATES_PROCESSING].join()
 		THREADS.pop(ThreadType.UPDATES_PROCESSING)
 
 	if APP_ROLE == AppRole.RP or APP_ROLE == AppRole.NORMAL:
 		if (ThreadType.REQUEST_PROCESSING not in THREADS):
 			return
 
-		THREAD_STOP_EVENT.set()
 		THREADS[ThreadType.REQUEST_PROCESSING].join()
 		THREADS.pop(ThreadType.REQUEST_PROCESSING)
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 2694998..12574a9 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -156,6 +156,7 @@ class AppRole(str, Enum):
 	NORMAL = 'normal'
 	INDEXING = 'indexing'
 	RP = 'rp'
+	UP = 'up'
 
 
 class CommonSourceItem(BaseModel):
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index d1e6210..f27a38d 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -310,7 +310,7 @@ def get_app_role() -> AppRole:
 	role = os.getenv('APP_ROLE', '').lower()
 	if role == '':
 		return AppRole.NORMAL
-	if role not in ['indexing', 'rp']:
+	if role not in ['indexing', 'rp', 'up']:
 		_logger.warning(f'Invalid app role: {role}, defaulting to all roles')
 		return AppRole.NORMAL
 	return AppRole(role)

From 309de32d2f588880bc0a512b0f034c4336228563 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 21 Apr 2026 18:23:59 +0530
Subject: [PATCH 72/96] fix(k8s): app role fixes

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 Dockerfile                         |  3 +--
 context_chat_backend/controller.py |  7 ++++---
 context_chat_backend/utils.py      |  2 +-
 main_em.py                         | 11 ++++++++---
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 3430a5e..79c0d47 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -47,8 +47,7 @@ COPY context_chat_backend context_chat_backend
 COPY main.py .
 COPY main_em.py .
 COPY config.?pu.yaml .
-COPY logger_config.yaml .
-COPY logger_config_em.yaml .
+COPY logger_config*.yaml .
 COPY hwdetect.sh .
 COPY harp_connect.sh .
 COPY supervisord.conf /etc/supervisor/supervisord.conf
diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 02db402..4c2729b 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -43,7 +43,9 @@
 # setup
 
 # only run once
-if mp.current_process().name == 'MainProcess':
+APP_ROLE = get_app_role()
+if mp.current_process().name == 'MainProcess' and APP_ROLE in (AppRole.NORMAL, AppRole.RP):
+	# normal docker containers and RP role in k8s
 	repair_run()
 	ensure_config_file()
 
@@ -117,8 +119,7 @@ def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-	app_role = get_app_role()
-	if app_role == AppRole.NORMAL:
+	if APP_ROLE == AppRole.NORMAL:
 		set_handlers(app, enabled_handler, models_to_fetch=models_to_fetch, trigger_handler=trigger_handler)
 	else:
 		# k8s' rp role pulls tasks
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index f27a38d..507b53b 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -318,4 +318,4 @@ def get_app_role() -> AppRole:
 
 def is_k8s_env():
 	role = get_app_role()
-	return role == AppRole.NORMAL
+	return role != AppRole.NORMAL
diff --git a/main_em.py b/main_em.py
index 17d9f9a..4cadbd4 100755
--- a/main_em.py
+++ b/main_em.py
@@ -12,11 +12,11 @@
 import niquests
 import uvicorn
 
-from context_chat_backend.types import DEFAULT_EM_MODEL_ALIAS  # isort: skip
+from context_chat_backend.types import DEFAULT_EM_MODEL_ALIAS, AppRole  # isort: skip
 from context_chat_backend.config_parser import get_config  # isort: skip
 from context_chat_backend.logger import get_logging_config, setup_logging  # isort: skip
 from context_chat_backend.setup_functions import ensure_config_file, setup_env_vars  # isort: skip
-from context_chat_backend.utils import is_k8s_env, redact_config	# isort: skip
+from context_chat_backend.utils import get_app_role, is_k8s_env, redact_config	# isort: skip
 
 
 LOGGER_CONFIG_NAME = 'logger_config_em.yaml'
@@ -89,9 +89,14 @@ def _wait_main_app_enabled() -> None:
 
 
 if __name__ == '__main__':
+	app_role = get_app_role()
+	if app_role == AppRole.UP:
+		print('Internal embedding server is not required for the Updates Processing role, stopping this process.')
+		exit(0)
+
 	# intial buffer
 	print(
-		f"Waiting for {STARTUP_CHECK_SEC} seconds before starting embedding server to allow main app to start",
+		f'Waiting for {STARTUP_CHECK_SEC} seconds before starting embedding server to allow main app to start',
 		flush=True,
 	)
 	sleep(STARTUP_CHECK_SEC)

From 7b5020e1a408b3f80f5f49d21141b6bd3bf0c864 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 21 Apr 2026 18:25:23 +0530
Subject: [PATCH 73/96] fix: scoped context search fixes

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/chain/one_shot.py    | 2 ++
 context_chat_backend/vectordb/pgvector.py | 8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/context_chat_backend/chain/one_shot.py b/context_chat_backend/chain/one_shot.py
index c387621..3b4224c 100644
--- a/context_chat_backend/chain/one_shot.py
+++ b/context_chat_backend/chain/one_shot.py
@@ -52,6 +52,8 @@ def process_context_query(
 	db = vectordb_loader.load()
 	context_docs = get_context_docs(user_id, query, db, ctx_limit, scope_type, scope_list)
 	if len(context_docs) == 0:
+		if scope_type is not None:
+			raise ContextException('No documents retrieved, please choose a wider scope of documents to search from')
 		raise ContextException('No documents retrieved, please index a few documents first')
 
 	context_chunks = get_context_chunks(context_docs)
diff --git a/context_chat_backend/vectordb/pgvector.py b/context_chat_backend/vectordb/pgvector.py
index 9d88024..e833356 100644
--- a/context_chat_backend/vectordb/pgvector.py
+++ b/context_chat_backend/vectordb/pgvector.py
@@ -610,10 +610,9 @@ def doc_search(
 		try:
 			with self.session_maker() as session:
 				doc_filters = [AccessListStore.uid == user_id]
-				match scope_type:
-					case ScopeType.PROVIDER:
+				if scope_type == ScopeType.PROVIDER.value:
 						doc_filters.append(DocumentsStore.provider.in_(scope_list))  # pyright: ignore[reportArgumentType]
-					case ScopeType.SOURCE:
+				elif scope_type == ScopeType.SOURCE.value:
 						doc_filters.append(DocumentsStore.source_id.in_(scope_list))  # pyright: ignore[reportArgumentType]
 
 				# get chunks associated with the user
@@ -625,6 +624,9 @@ def doc_search(
 				result = session.execute(stmt).fetchall()
 				chunk_ids = [str(c) for res in result for c in res.chunks]
 
+				if len(chunk_ids) == 0:
+					return []
+
 				# get embeddings
 				return self._similarity_search(session, query, chunk_ids, k)
 		except EmbeddingException:

From bdd842fb41b0535b3f433aa1d82745e8bcfadd76 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 22 Apr 2026 14:13:29 +0530
Subject: [PATCH 74/96] chore: better naming of app roles

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 appinfo/info.xml                     | 4 ++--
 context_chat_backend/controller.py   | 2 +-
 context_chat_backend/task_fetcher.py | 8 ++++----
 context_chat_backend/types.py        | 4 ++--
 context_chat_backend/utils.py        | 5 +++--
 main_em.py                           | 2 +-
 6 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/appinfo/info.xml b/appinfo/info.xml
index 879524d..c65a8e8 100644
--- a/appinfo/info.xml
+++ b/appinfo/info.xml
@@ -86,13 +86,13 @@ Setup background job workers as described here: https://docs.nextcloud.com/serve
 			<role>
 				<name>rp</name>
 				<display-name>Request Processing Mode</display-name>
-				<env>APP_ROLE=rp</env>
+				<env>APP_ROLE=requestproc</env>
 				<expose>true</expose>
 			</role>
 			<role>
 				<name>up</name>
 				<display-name>Metadata Updates Processing Mode</display-name>
-				<env>APP_ROLE=up</env>
+				<env>APP_ROLE=updatesproc</env>
 				<expose>false</expose>
 			</role>
 			<role>
diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 4c2729b..278f892 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -44,7 +44,7 @@
 
 # only run once
 APP_ROLE = get_app_role()
-if mp.current_process().name == 'MainProcess' and APP_ROLE in (AppRole.NORMAL, AppRole.RP):
+if mp.current_process().name == 'MainProcess' and APP_ROLE in (AppRole.NORMAL, AppRole.REQUEST_PROC):
 	# normal docker containers and RP role in k8s
 	repair_run()
 	ensure_config_file()
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index 81307d1..a41ae06 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -725,7 +725,7 @@ def start_bg_threads(app_config: TConfig, get_enabled_state):
 		)
 		THREADS[ThreadType.FILES_INDEXING].start()
 
-	if APP_ROLE == AppRole.UP or APP_ROLE == AppRole.NORMAL:
+	if APP_ROLE == AppRole.UPDATES_PROC or APP_ROLE == AppRole.NORMAL:
 		if ThreadType.UPDATES_PROCESSING in THREADS:
 			LOGGER.info('Updates processing background threads are already up, skipping start')
 			return
@@ -737,7 +737,7 @@ def start_bg_threads(app_config: TConfig, get_enabled_state):
 		)
 		THREADS[ThreadType.UPDATES_PROCESSING].start()
 
-	if APP_ROLE == AppRole.RP or APP_ROLE == AppRole.NORMAL:
+	if APP_ROLE == AppRole.REQUEST_PROC or APP_ROLE == AppRole.NORMAL:
 		if ThreadType.REQUEST_PROCESSING in THREADS:
 			LOGGER.info('Request processing background threads are already up, skipping start')
 			return
@@ -760,14 +760,14 @@ def wait_for_bg_threads():
 		THREADS[ThreadType.FILES_INDEXING].join()
 		THREADS.pop(ThreadType.FILES_INDEXING)
 
-	if APP_ROLE == AppRole.UP or APP_ROLE == AppRole.NORMAL:
+	if APP_ROLE == AppRole.UPDATES_PROC or APP_ROLE == AppRole.NORMAL:
 		if ThreadType.UPDATES_PROCESSING not in THREADS:
 			return
 
 		THREADS[ThreadType.UPDATES_PROCESSING].join()
 		THREADS.pop(ThreadType.UPDATES_PROCESSING)
 
-	if APP_ROLE == AppRole.RP or APP_ROLE == AppRole.NORMAL:
+	if APP_ROLE == AppRole.REQUEST_PROC or APP_ROLE == AppRole.NORMAL:
 		if (ThreadType.REQUEST_PROCESSING not in THREADS):
 			return
 
diff --git a/context_chat_backend/types.py b/context_chat_backend/types.py
index 12574a9..700d7dd 100644
--- a/context_chat_backend/types.py
+++ b/context_chat_backend/types.py
@@ -155,8 +155,8 @@ class DocErrorEmbeddingException(EmbeddingException):
 class AppRole(str, Enum):
 	NORMAL = 'normal'
 	INDEXING = 'indexing'
-	RP = 'rp'
-	UP = 'up'
+	REQUEST_PROC = 'requestproc'
+	UPDATES_PROC = 'updatesproc'
 
 
 class CommonSourceItem(BaseModel):
diff --git a/context_chat_backend/utils.py b/context_chat_backend/utils.py
index 507b53b..d572714 100644
--- a/context_chat_backend/utils.py
+++ b/context_chat_backend/utils.py
@@ -310,10 +310,11 @@ def get_app_role() -> AppRole:
 	role = os.getenv('APP_ROLE', '').lower()
 	if role == '':
 		return AppRole.NORMAL
-	if role not in ['indexing', 'rp', 'up']:
+	try:
+		return AppRole(role)
+	except ValueError:
 		_logger.warning(f'Invalid app role: {role}, defaulting to all roles')
 		return AppRole.NORMAL
-	return AppRole(role)
 
 
 def is_k8s_env():
diff --git a/main_em.py b/main_em.py
index 4cadbd4..addcfd6 100755
--- a/main_em.py
+++ b/main_em.py
@@ -90,7 +90,7 @@ def _wait_main_app_enabled() -> None:
 
 if __name__ == '__main__':
 	app_role = get_app_role()
-	if app_role == AppRole.UP:
+	if app_role == AppRole.UPDATES_PROC:
 		print('Internal embedding server is not required for the Updates Processing role, stopping this process.')
 		exit(0)
 

From f691743ec5d1156be4cd3e3d6516e1ac4b4580c8 Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Wed, 22 Apr 2026 11:26:19 +0200
Subject: [PATCH 75/96] fix(app_enabled): Add lock for app enabled check

Signed-off-by: Marcel Klehr <mklehr@gmx.net>
---
 context_chat_backend/controller.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index 278f892..3dadf18 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -69,16 +69,18 @@
 
 app_enabled = threading.Event()
 last_enabled_check: float | None = None
+enabled_check_lock: threading.Lock = threading.Lock()
 def get_enabled_state() -> bool:
 	global last_enabled_check
-	if last_enabled_check is None or time.time() - last_enabled_check > 30:
-		nc = NextcloudApp()
-		if nc.enabled_state:
-			app_enabled.set()
-		else:
-			app_enabled.clear()
-		last_enabled_check = time.time()
-	return app_enabled.is_set()
+	with enabled_check_lock:
+		if last_enabled_check is None or time.time() - last_enabled_check > 30:
+			nc = NextcloudApp()
+			if nc.enabled_state:
+				app_enabled.set()
+			else:
+				app_enabled.clear()
+			last_enabled_check = time.time()
+		return app_enabled.is_set()
 
 def enabled_handler(enabled: bool, nc: NextcloudApp | AsyncNextcloudApp) -> str:
 	try:

From 17aa8105c49b54e7d4a30024073167134d1b3e7a Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 23 Apr 2026 13:39:02 +0530
Subject: [PATCH 76/96] feat: build llama cpp python and add cpu/cuda/vulkan
 builds

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 Dockerfile | 270 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 251 insertions(+), 19 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 79c0d47..63eec9a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,24 +1,191 @@
 # SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 #
-FROM docker.io/nvidia/cuda:12.2.2-runtime-ubuntu22.04
+ARG CPU_IMAGE=ubuntu:22.04
+ARG CUDA_DEVEL_IMAGE=nvidia/cuda:12.4.1-devel-ubuntu22.04
+ARG CUDA_RUNTIME_IMAGE=nvidia/cuda:12.4.1-runtime-ubuntu22.04
+ARG LLAMA_CPP_PYTHON_VERSION=0.3.20
+
+# ============================================================
+# CPU / ARM builder
+# Builds llama_cpp_python for any x86_64 (AVX+, Sandy Bridge 2011+)
+# and for arm64 (NEON always available).
+# ubuntu:22.04 is a multi-arch image so this stage covers both.
+#
+# GGML_NATIVE=OFF: no -march=native; the host build machine's SIMD
+# capabilities are not baked in.  AVX/AVX2/FMA/F16C default to ON in
+# llama.cpp cmake and are used when the CPU supports them at runtime
+# (the ggml_cpu_has_*() guards).  On arm64 those x86 flags are never
+# emitted by cmake, so NEON/SVE detection remains intact.
+# ============================================================
+FROM ubuntu:22.04 AS llama-builder-cpu
+ARG LLAMA_CPP_PYTHON_VERSION
+
+ENV DEBIAN_FRONTEND=noninteractive
+WORKDIR /build
+ADD dockerfile_scripts/install_py11.sh dockerfile_scripts/install_py11.sh
+RUN ./dockerfile_scripts/install_py11.sh
+# install_py11.sh leaves apt lists in place – install build tools in one layer
+RUN apt-get install -y --no-install-recommends \
+        python3.11-dev \
+        cmake build-essential ninja-build git \
+        libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN python3.11 -m pip install --no-cache-dir --upgrade pip setuptools wheel
+
+ENV CMAKE_ARGS="-DGGML_NATIVE=OFF"
+
+RUN python3.11 -m pip wheel \
+    --no-cache-dir \
+    --no-binary llama-cpp-python \
+    --wheel-dir=/wheels \
+    "llama-cpp-python==${LLAMA_CPP_PYTHON_VERSION}"
+
+# ============================================================
+# CUDA (NVIDIA) builder
+# Builds llama_cpp_python with CUDA support.
+# sm_90 is the maximum compute capability supported by CUDA 12.4
+# (Hopper / H100).  Blackwell sm_100 requires CUDA 12.8+.
+# ============================================================
+FROM ${CUDA_DEVEL_IMAGE} AS llama-builder-cuda
+ARG LLAMA_CPP_PYTHON_VERSION
+
+ENV DEBIAN_FRONTEND=noninteractive
+WORKDIR /build
+ADD dockerfile_scripts/install_py11.sh dockerfile_scripts/install_py11.sh
+RUN ./dockerfile_scripts/install_py11.sh
+# gcc-12 is required: Ubuntu 22.04 ships gcc-11 by default which CUDA 12.4
+# treats as "unsupported"; we pin gcc-12 to match the official CI workflow.
+RUN apt-get install -y --no-install-recommends \
+        python3.11-dev \
+        cmake build-essential ninja-build git \
+        gcc-12 g++-12 \
+        libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV CC=/usr/bin/gcc-12
+ENV CXX=/usr/bin/g++-12
+ENV CUDAHOSTCXX=/usr/bin/g++-12
+
+RUN python3.11 -m pip install --no-cache-dir --upgrade pip setuptools wheel
+
+# Architecture list aligned with the official llama-cpp-python CUDA CI workflow:
+#   https://github.com/abetlen/llama-cpp-python/blob/main/.github/workflows/build-wheels-cuda.yaml
+ENV CMAKE_ARGS="-DGGML_CUDA=ON -DGGML_CUDA_FORCE_MMQ=ON -DGGML_NATIVE=OFF \
+    -DCMAKE_CUDA_ARCHITECTURES=70-real;75-real;80-real;86-real;89-real;90-real;90-virtual \
+    -DCMAKE_CUDA_FLAGS=--allow-unsupported-compiler \
+    -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/g++-12"
+
+RUN python3.11 -m pip wheel \
+    --no-cache-dir \
+    --no-binary llama-cpp-python \
+    --wheel-dir=/wheels \
+    "llama-cpp-python==${LLAMA_CPP_PYTHON_VERSION}"
+
+# ============================================================
+# Vulkan (AMD / Intel / any Vulkan-capable GPU) builder
+# Builds llama_cpp_python with Vulkan compute backend.
+# Works on RDNA1/2/3, GCN, Intel Arc, and more.
+# ============================================================
+FROM ubuntu:22.04 AS llama-builder-vulkan
+ARG LLAMA_CPP_PYTHON_VERSION
+
+ENV DEBIAN_FRONTEND=noninteractive
+WORKDIR /build
+ADD dockerfile_scripts/install_py11.sh dockerfile_scripts/install_py11.sh
+RUN ./dockerfile_scripts/install_py11.sh
+# Vulkan headers + glslang (shader compiler) are build-time only
+RUN apt-get install -y --no-install-recommends \
+        python3.11-dev \
+        cmake build-essential ninja-build git \
+        libgomp1 \
+        libvulkan-dev glslang-tools \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN python3.11 -m pip install --no-cache-dir --upgrade pip setuptools wheel
+
+ENV CMAKE_ARGS="-DGGML_VULKAN=ON -DGGML_NATIVE=OFF"
+
+RUN python3.11 -m pip wheel \
+    --no-cache-dir \
+    --no-binary llama-cpp-python \
+    --wheel-dir=/wheels \
+    "llama-cpp-python==${LLAMA_CPP_PYTHON_VERSION}"
+
+# ============================================================
+# CPU / ARM runtime
+# ============================================================
+FROM ubuntu:22.04 AS runtime-cpu
+
+ARG CCB_DB_NAME=ccb
+ARG CCB_DB_USER=ccbuser
+ARG CCB_DB_PASS=ccbpass
+
+ENV CCB_DB_NAME=${CCB_DB_NAME}
+ENV CCB_DB_USER=${CCB_DB_USER}
+ENV CCB_DB_PASS=${CCB_DB_PASS}
+ENV DEBIAN_FRONTEND=noninteractive
+ENV AA_DOCKER_ENV=1
+
+WORKDIR /app
+
+ADD dockerfile_scripts/install_deps.sh dockerfile_scripts/install_deps.sh
+RUN ./dockerfile_scripts/install_deps.sh
+ADD dockerfile_scripts/install_py11.sh dockerfile_scripts/install_py11.sh
+RUN ./dockerfile_scripts/install_py11.sh
+ADD dockerfile_scripts/pgsql dockerfile_scripts/pgsql
+RUN ./dockerfile_scripts/pgsql/install.sh
+ADD dockerfile_scripts/install_frpc.sh dockerfile_scripts/install_frpc.sh
+RUN ./dockerfile_scripts/install_frpc.sh
+RUN apt-get autoclean
+ADD dockerfile_scripts/entrypoint.sh dockerfile_scripts/entrypoint.sh
+
+ENV DEBIAN_FRONTEND=dialog
+
+# Install llama_cpp_python from the CPU builder wheel
+COPY --from=llama-builder-cpu /wheels /wheels
+RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel \
+    && python3 -m pip install --no-cache-dir --no-index --find-links=/wheels llama-cpp-python \
+    && python3 -m pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu \
+    && rm -rf /wheels \
+    && pip cache purge
+
+COPY requirements.txt .
+RUN sed -i '/^llama_cpp_python/d' requirements.txt \
+    && python3 -m pip install --no-cache-dir -r requirements.txt \
+    && python3 -m pip cache purge
+
+COPY context_chat_backend context_chat_backend
+COPY main.py .
+COPY main_em.py .
+COPY config.?pu.yaml .
+COPY logger_config*.yaml .
+COPY hwdetect.sh .
+COPY harp_connect.sh .
+COPY supervisord.conf /etc/supervisor/supervisord.conf
+
+ENTRYPOINT ["supervisord", "-c", "/etc/supervisor/supervisord.conf"]
+
+# ============================================================
+# CUDA (NVIDIA GPU) runtime
+# ============================================================
+FROM ${CUDA_RUNTIME_IMAGE} AS runtime-cuda
 
 ARG CCB_DB_NAME=ccb
 ARG CCB_DB_USER=ccbuser
 ARG CCB_DB_PASS=ccbpass
 
-ENV CCB_DB_NAME ${CCB_DB_NAME}
-ENV CCB_DB_USER ${CCB_DB_USER}
-ENV CCB_DB_PASS ${CCB_DB_PASS}
-ENV DEBIAN_FRONTEND noninteractive
-ENV NVIDIA_VISIBLE_DEVICES all
-ENV NVIDIA_DRIVER_CAPABILITIES compute
-ENV AA_DOCKER_ENV 1
+ENV CCB_DB_NAME=${CCB_DB_NAME}
+ENV CCB_DB_USER=${CCB_DB_USER}
+ENV CCB_DB_PASS=${CCB_DB_PASS}
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute
+ENV AA_DOCKER_ENV=1
 
-# Set working directory
 WORKDIR /app
 
-# Install dependencies
 ADD dockerfile_scripts/install_deps.sh dockerfile_scripts/install_deps.sh
 RUN ./dockerfile_scripts/install_deps.sh
 ADD dockerfile_scripts/install_py11.sh dockerfile_scripts/install_py11.sh
@@ -30,19 +197,82 @@ RUN ./dockerfile_scripts/install_frpc.sh
 RUN apt-get autoclean
 ADD dockerfile_scripts/entrypoint.sh dockerfile_scripts/entrypoint.sh
 
-# Restore interactivity
-ENV DEBIAN_FRONTEND dialog
+ENV DEBIAN_FRONTEND=dialog
+
+# Install llama_cpp_python from the CUDA builder wheel
+COPY --from=llama-builder-cuda /wheels /wheels
+RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel \
+    && python3 -m pip install --no-cache-dir --no-index --find-links=/wheels llama-cpp-python \
+    && rm -rf /wheels \
+    && pip cache purge
 
-# Copy requirements files
 COPY requirements.txt .
+RUN sed -i '/^llama_cpp_python/d' requirements.txt \
+    && python3 -m pip install --no-cache-dir -r requirements.txt \
+    && python3 -m pip cache purge
 
-# Install requirements
-RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel
-RUN python3 -m pip install --no-cache-dir https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.13-cu122/llama_cpp_python-0.3.13-cp311-cp311-linux_x86_64.whl
-RUN sed -i '/llama_cpp_python/d' requirements.txt
-RUN python3 -m pip install --no-cache-dir -r requirements.txt && python3 -m pip cache purge
+COPY context_chat_backend context_chat_backend
+COPY main.py .
+COPY main_em.py .
+COPY config.?pu.yaml .
+COPY logger_config*.yaml .
+COPY hwdetect.sh .
+COPY harp_connect.sh .
+COPY supervisord.conf /etc/supervisor/supervisord.conf
+
+ENTRYPOINT ["supervisord", "-c", "/etc/supervisor/supervisord.conf"]
+
+# ============================================================
+# Vulkan (AMD / Intel / any Vulkan-capable GPU) runtime
+# Run with: --device /dev/dri (and optionally --device /dev/kfd for AMD)
+# The RADV Mesa driver (mesa-vulkan-drivers) is included and covers
+# GCN, RDNA1/2/3 and newer AMD GPUs out of the box.
+# ============================================================
+FROM ubuntu:22.04 AS runtime-vulkan
+
+ARG CCB_DB_NAME=ccb
+ARG CCB_DB_USER=ccbuser
+ARG CCB_DB_PASS=ccbpass
+
+ENV CCB_DB_NAME=${CCB_DB_NAME}
+ENV CCB_DB_USER=${CCB_DB_USER}
+ENV CCB_DB_PASS=${CCB_DB_PASS}
+ENV DEBIAN_FRONTEND=noninteractive
+ENV AA_DOCKER_ENV=1
+
+WORKDIR /app
+
+ADD dockerfile_scripts/install_deps.sh dockerfile_scripts/install_deps.sh
+RUN ./dockerfile_scripts/install_deps.sh
+ADD dockerfile_scripts/install_py11.sh dockerfile_scripts/install_py11.sh
+RUN ./dockerfile_scripts/install_py11.sh
+ADD dockerfile_scripts/pgsql dockerfile_scripts/pgsql
+RUN ./dockerfile_scripts/pgsql/install.sh
+ADD dockerfile_scripts/install_frpc.sh dockerfile_scripts/install_frpc.sh
+RUN ./dockerfile_scripts/install_frpc.sh
+RUN apt-get autoclean
+ADD dockerfile_scripts/entrypoint.sh dockerfile_scripts/entrypoint.sh
+
+# Install Vulkan runtime + AMD RADV open-source driver
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        libvulkan1 mesa-vulkan-drivers \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV DEBIAN_FRONTEND=dialog
+
+# Install llama_cpp_python from the Vulkan builder wheel
+COPY --from=llama-builder-vulkan /wheels /wheels
+RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel \
+    && python3 -m pip install --no-cache-dir --no-index --find-links=/wheels llama-cpp-python \
+    && rm -rf /wheels \
+    && pip cache purge
+
+COPY requirements.txt .
+RUN sed -i '/^llama_cpp_python/d' requirements.txt \
+    && python3 -m pip install --no-cache-dir -r requirements.txt \
+    && python3 -m pip cache purge
 
-# Copy application files
 COPY context_chat_backend context_chat_backend
 COPY main.py .
 COPY main_em.py .
@@ -53,3 +283,5 @@ COPY harp_connect.sh .
 COPY supervisord.conf /etc/supervisor/supervisord.conf
 
 ENTRYPOINT ["supervisord", "-c", "/etc/supervisor/supervisord.conf"]
+
+FROM runtime-cpu AS final

From 10092cb40bbd54cd40db77c6d37f27c99241dda2 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 23 Apr 2026 14:20:23 +0530
Subject: [PATCH 77/96] feat(ci): add kubernetes integration test

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 421 +++++++++++++++++++++
 1 file changed, 421 insertions(+)
 create mode 100644 .github/workflows/integration-test-k8s.yml

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
new file mode 100644
index 0000000..12e3a11
--- /dev/null
+++ b/.github/workflows/integration-test-k8s.yml
@@ -0,0 +1,421 @@
+# SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+name: Integration test k8s
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+      - stable*
+
+permissions:
+  contents: read
+
+concurrency:
+  group: integration-test-k8s-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+
+jobs:
+  changes:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+
+    outputs:
+      src: ${{ steps.changes.outputs.src}}
+
+    steps:
+      - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
+        id: changes
+        continue-on-error: true
+        with:
+          filters: |
+            src:
+              - 'main.py'
+              - 'main_em.py'
+              - 'config.cpu.yaml'
+              - 'config.gpu.yaml'
+              - 'context_chat_backend/**'
+              - 'appinfo/**'
+              - 'example.env'
+              - 'hwdetect.sh'
+              - 'persistent_storage/**'
+              - 'project.toml'
+              - 'requirements.txt'
+              - 'logger_config.k8s.yaml'
+              - 'supervisord.conf'
+              - '.github/workflows/integration-test-k8s.yml'
+
+  integration:
+    runs-on: ubuntu-24.04
+
+    needs: changes
+    if: needs.changes.outputs.src != 'false'
+
+    strategy:
+      # do not stop on another job's failure
+      fail-fast: false
+      matrix:
+        php-versions: [ '8.2' ]
+        databases: [ 'pgsql' ]
+        server-versions: [ 'stable32', 'stable33', 'master' ]
+
+    name: Integration test k8s on ${{ matrix.server-versions }} php@${{ matrix.php-versions }}
+
+    env:
+      MYSQL_PORT: 4444
+      PGSQL_PORT: 4445
+      # use the same db for ccb and nextcloud
+      CCB_DB_URL: postgresql+psycopg://root:rootpassword@localhost:4445/nextcloud
+      HP_SHARED_KEY: test_shared_key_12345
+
+    services:
+      mysql:
+        image: mariadb:10.5
+        ports:
+          - 4444:3306/tcp
+        env:
+          MYSQL_ROOT_PASSWORD: rootpassword
+        options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 5
+      postgres:
+        image: pgvector/pgvector:pg17
+        ports:
+          - 4445:5432/tcp
+        env:
+          POSTGRES_USER: root
+          POSTGRES_PASSWORD: rootpassword
+          POSTGRES_DB: nextcloud
+        options: --health-cmd pg_isready --health-interval 5s --health-timeout 2s --health-retries 5 --name postgres --hostname postgres
+
+    steps:
+      - name: Checkout server
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        with:
+          repository: nextcloud/server
+          ref: ${{ matrix.server-versions }}
+          submodules: 'recursive'
+          persist-credentials: false
+
+      - name: Set up php ${{ matrix.php-versions }}
+        uses: shivammathur/setup-php@9e72090525849c5e82e596468b86eb55e9cc5401 # v2
+        with:
+          php-version: ${{ matrix.php-versions }}
+          tools: phpunit
+          extensions: mbstring, iconv, fileinfo, intl, sqlite, pdo_mysql, pdo_sqlite, pgsql, pdo_pgsql, gd, zip
+
+      - name: Checkout context_chat php app
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        with:
+          repository: nextcloud/context_chat
+          path: apps/context_chat
+          persist-credentials: false
+
+      - name: Checkout backend
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        with:
+          path: context_chat_backend/
+          persist-credentials: false
+
+      - name: Get app version
+        id: appinfo
+        uses: skjnldsv/xpath-action@7e6a7c379d0e9abc8acaef43df403ab4fc4f770c # master
+        with:
+          filename: context_chat_backend/appinfo/info.xml
+          expression: "/info/version/text()"
+
+      - name: Set up Nextcloud MYSQL
+        if: ${{ matrix.databases != 'pgsql'}}
+        run: |
+          sleep 25
+          mkdir data
+          ./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud --database-host=127.0.0.1 --database-port=$MYSQL_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass password
+          composer run serve &
+
+      - name: Set up Nextcloud PGSQL
+        if: ${{ matrix.databases == 'pgsql'}}
+        run: |
+          sleep 25
+          mkdir data
+          ./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud --database-host=127.0.0.1 --database-port=$PGSQL_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass password
+          composer run serve &
+
+      - name: Enable context_chat, app_api and testing
+        run: ./occ app:enable -vvv -f context_chat app_api testing
+
+      - name: Checkout documentation
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        with:
+          repository: nextcloud/documentation
+          path: data/admin/files/documentation
+          persist-credentials: false
+
+      - name: Prepare docs
+        run: |
+          cd data/admin/files
+          mv documentation/admin_manual .
+          cp -R documentation/developer_manual .
+          cd developer_manual
+          find . -type f -name "*.rst" -exec bash -c 'mv "$0" "${0%.rst}.md"' {} \;
+          cd ..
+          cp -R documentation/developer_manual ./developer_manual2
+          cd developer_manual2
+          find . -type f -name "*.rst" -exec bash -c 'mv "$0" "${0%.rst}.txt"' {} \;
+          cd ..
+          rm -rf documentation
+
+      - name: Run files scan
+        run: |
+          ./occ files:scan --all
+
+      - name: Install k3s
+        run: |
+          curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="--disable traefik --disable servicelb" sh -
+          sudo chmod 644 /etc/rancher/k3s/k3s.yaml
+          echo "KUBECONFIG=/etc/rancher/k3s/k3s.yaml" >> $GITHUB_ENV
+
+      - name: Wait for k3s and create namespace
+        run: |
+          kubectl wait --for=condition=Ready node --all --timeout=120s
+          kubectl create namespace nextcloud-exapps
+          NODE_IP=$(kubectl get node -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}')
+          echo "NODE_IP=${NODE_IP}" >> $GITHUB_ENV
+          echo "k3s node IP: $NODE_IP"
+
+      - name: Configure Nextcloud for k3s networking
+        run: |
+          ./occ config:system:set overwrite.cli.url --value "http://${{ env.NODE_IP }}" --type=string
+          ./occ config:system:set trusted_domains 1 --value "${{ env.NODE_IP }}"
+
+      - name: Create K8s service account for HaRP
+        run: |
+          kubectl -n nextcloud-exapps create serviceaccount harp-sa
+          kubectl create clusterrolebinding harp-admin \
+            --clusterrole=cluster-admin \
+            --serviceaccount=nextcloud-exapps:harp-sa
+          K3S_TOKEN=$(kubectl -n nextcloud-exapps create token harp-sa --duration=2h)
+          echo "K3S_TOKEN=${K3S_TOKEN}" >> $GITHUB_ENV
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
+        with:
+          cache-image: false
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3
+        with:
+          cache-binary: false
+
+      - name: Build the context_chat_backend cpu image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6
+        with:
+          context: context_chat_backend
+          push: false
+          platforms: linux/amd64
+          tags: ccb-cpu:latest
+          target: runtime-cpu
+          load: true
+
+      - name: Pre-load CCB ExApp image into k3s
+        run: docker save ccb-cpu:latest | sudo k3s ctr images import -
+
+      - name: Start HaRP with K8s backend
+        run: |
+          docker run --net host --name appapi-harp \
+            -e HP_SHARED_KEY="${{ env.HP_SHARED_KEY }}" \
+            -e NC_INSTANCE_URL="http://${{ env.NODE_IP }}" \
+            -e HP_LOG_LEVEL="debug" \
+            -e HP_K8S_ENABLED="true" \
+            -e HP_K8S_API_SERVER="https://127.0.0.1:6443" \
+            -e HP_K8S_BEARER_TOKEN="${{ env.K3S_TOKEN }}" \
+            -e HP_K8S_NAMESPACE="nextcloud-exapps" \
+            -e HP_K8S_VERIFY_SSL="false" \
+            --restart unless-stopped \
+            -d ghcr.io/nextcloud/nextcloud-appapi-harp:latest
+
+      - name: Start nginx proxy
+        run: |
+          docker run --net host --name nextcloud --rm \
+            -v $(pwd)/apps/app_api/tests/simple-nginx-NOT-FOR-PRODUCTION.conf:/etc/nginx/conf.d/default.conf:ro \
+            -d nginx
+
+      - name: Wait for HaRP K8s readiness
+        run: |
+          for i in $(seq 1 30); do
+            if curl -sf http://${{ env.NODE_IP }}:8780/exapps/app_api/info \
+                -H "harp-shared-key: ${{ env.HP_SHARED_KEY }}" 2>/dev/null | grep -q '"kubernetes"'; then
+              echo "HaRP is ready with K8s backend"
+              exit 0
+            fi
+            echo "Waiting for HaRP... ($i/30)"
+            sleep 2
+          done
+          echo "HaRP K8s readiness check failed"
+          docker logs appapi-harp
+          exit 1
+
+      - name: Register K8s daemon
+        run: |
+          ./occ app_api:daemon:register \
+            k8s_test "K8s Test" "kubernetes-install" "http" "${{ env.NODE_IP }}:8780" "http://${{ env.NODE_IP }}" \
+            --harp --harp_shared_key "${{ env.HP_SHARED_KEY }}" \
+            --k8s --k8s_expose_type=nodeport --set-default
+          ./occ app_api:daemon:list
+
+      - name: Register backend
+        run: |
+          sed -i 's;<image>.*</image>;<image>ccb-cpu</image>;' appinfo/info.xml
+          sed -i 's;<image-tag>.*</image-tag>;<image-tag>latest</image-tag>;' appinfo/info.xml
+          timeout 120 ./occ app_api:app:register context_chat_backend k8s_test --info-xml context_chat_backend/appinfo/info.xml
+
+      - name: Run cron jobs
+        run: |
+          # every 10 seconds indefinitely
+          while true; do
+            php cron.php
+            sleep 10
+          done &
+          sleep 30
+          # list all the bg jobs
+          ./occ background-job:list
+
+      - name: Initial dump of DB with context_chat_queue populated
+        run: |
+          docker exec postgres pg_dump nextcloud > /tmp/0_pgdump_nextcloud
+
+      - name: Periodically check context_chat stats for 15 minutes to allow the backend to index the files
+        run: |
+          success=0
+          echo "::group::Checking stats periodically for 15 minutes to allow the backend to index the files"
+          for i in {1..90}; do
+            echo "Checking stats, attempt $i..."
+
+            stats_err=$(mktemp)
+            stats=$(timeout 5 ./occ context_chat:stats --json 2>"$stats_err")
+            stats_exit=$?
+            echo "Stats output:"
+            echo "$stats"
+            if [ -s "$stats_err" ]; then
+              echo "Stderr:"
+              cat "$stats_err"
+            fi
+            echo "---"
+            rm -f "$stats_err"
+
+            # Check for critical errors in output
+            if [ $stats_exit -ne 0 ] || echo "$stats" | grep -q "Error during request"; then
+              echo "Backend connection error detected (exit=$stats_exit), retrying..."
+              sleep 10
+              continue
+            fi
+
+            # Extract total eligible files
+            total_eligible_files=$(echo "$stats" | jq '.eligible_files_count' || echo "")
+
+            # Extract indexed documents count (files__default)
+            indexed_count=$(echo "$stats" | jq '.vectordb_document_counts.files__default' || echo "")
+
+            echo "Total eligible files: $total_eligible_files"
+            echo "Indexed documents (files__default): $indexed_count"
+
+            diff=$((total_eligible_files - indexed_count))
+            threshold=$((total_eligible_files * 3 / 100))
+
+            # Check if difference is within tolerance
+            if [ $diff -le $threshold ]; then
+              echo "Indexing within 3% tolerance (diff=$diff, threshold=$threshold)"
+              success=1
+              break
+            else
+              progress=$((diff * 100 / total_eligible_files))
+              echo "Outside 3% tolerance: diff=$diff (${progress}%), threshold=$threshold"
+            fi
+
+            sleep 10
+          done
+
+          echo "::endgroup::"
+
+          if [ $success -ne 1 ]; then
+            echo "Max attempts reached"
+            exit 1
+          fi
+
+      - name: Run the prompts
+        run: |
+          ./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' > worker1_logs 2>&1 &
+          ./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' > worker2_logs 2>&1 &
+
+          OUT1=$(./occ context_chat:prompt admin "Which factors are taken into account for the Ethical AI Rating?")
+          echo "$OUT1"
+          echo '--------------------------------------------------'
+          OUT2=$(./occ context_chat:prompt admin "Welche Faktoren beeinflussen das Ethical AI Rating?")
+          echo "$OUT2"
+
+          echo "$OUT1" | grep -q "If all of these points are met, we give a Green label." || exit 1
+          echo "$OUT2" | grep -q "If all of these points are met, we give a Green label." || exit 1
+
+      - name: Final dump of DB with vectordb populated
+        run: |
+          docker exec postgres pg_dump nextcloud > /tmp/1_pgdump_nextcloud
+
+      - name: Show server logs
+        if: always()
+        run: |
+          cat data/nextcloud.log
+
+      - name: Show context_chat specific logs
+        if: always()
+        run: |
+          cat data/context_chat.log
+
+      - name: Show task processing worker logs
+        if: always()
+        run: |
+          tail -v -n +1 worker?_logs || echo "No worker logs"
+
+      - name: Show main app indexing logs
+        if: always()
+        run: |
+          kubectl logs -n nextcloud-exapps -l app=nc-app-context-chat-backend-indexing
+
+      - name: Show main app updates processing logs
+        if: always()
+        run: |
+          kubectl logs -n nextcloud-exapps -l app=nc-app-context-chat-backend-updatesproc
+
+      - name: Show main app request processing logs
+        if: always()
+        run: |
+          kubectl logs -n nextcloud-exapps -l app=nc-app-context-chat-backend-requestproc
+
+      - name: Upload database dumps
+        uses: actions/upload-artifact@v4
+        with:
+          name: database-dumps-${{ matrix.server-versions }}-php@${{ matrix.php-versions }}
+          path: |
+            /tmp/0_pgdump_nextcloud
+            /tmp/1_pgdump_nextcloud
+
+      - name: Final stats log
+        run: |
+          ./occ context_chat:stats
+          ./occ context_chat:stats --json
+
+  summary:
+    permissions:
+      contents: none
+    runs-on: ubuntu-latest-low
+    needs: [changes, integration]
+
+    if: always()
+
+    # This is the summary, we just avoid to rename it so that branch protection rules still match
+    name: integration-test-k8s
+
+    steps:
+      - name: Summary status
+        run: if ${{ needs.changes.outputs.src != 'false' && needs.integration.result != 'success' }}; then exit 1; fi

From 68199023559bc62819f617f84054efdc3e09285f Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 23 Apr 2026 15:01:31 +0530
Subject: [PATCH 78/96] fix: checkout app_api

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 7 +++++++
 .github/workflows/integration-test.yml     | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index 12e3a11..15fc132 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -120,6 +120,13 @@ jobs:
           path: context_chat_backend/
           persist-credentials: false
 
+      - name: Checkout app_api
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        with:
+          repository: nextcloud/app_api
+          path: apps/app_api
+          persist-credentials: false
+
       - name: Get app version
         id: appinfo
         uses: skjnldsv/xpath-action@7e6a7c379d0e9abc8acaef43df403ab4fc4f770c # master
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index d30073a..69d5945 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -122,6 +122,13 @@ jobs:
           path: context_chat_backend/
           persist-credentials: false
 
+      - name: Checkout app_api
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        with:
+          repository: nextcloud/app_api
+          path: apps/app_api
+          persist-credentials: false
+
       - name: Get app version
         id: appinfo
         uses: skjnldsv/xpath-action@7e6a7c379d0e9abc8acaef43df403ab4fc4f770c # master

From 2376535191016713e109c620ce08e341602871f2 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 23 Apr 2026 15:21:34 +0530
Subject: [PATCH 79/96] fix: cache docker build image

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 6 ++----
 .github/workflows/integration-test.yml     | 2 --
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index 15fc132..2e751f5 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -208,13 +208,9 @@ jobs:
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
-        with:
-          cache-image: false
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3
-        with:
-          cache-binary: false
 
       - name: Build the context_chat_backend cpu image
         uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6
@@ -225,6 +221,8 @@ jobs:
           tags: ccb-cpu:latest
           target: runtime-cpu
           load: true
+          cache-from: type=registry,ref=nextcloud/context_chat_backend:cpu-build-cache,compression=zstd,mode=max
+          cache-to: type=registry,ref=nextcloud/context_chat_backend:cpu-build-cache
 
       - name: Pre-load CCB ExApp image into k3s
         run: docker save ccb-cpu:latest | sudo k3s ctr images import -
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 69d5945..7c8a4cf 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -113,8 +113,6 @@ jobs:
           repository: nextcloud/context_chat
           path: apps/context_chat
           persist-credentials: false
-          # todo: remove later
-          ref: feat/reverse-content-flow
 
       - name: Checkout backend
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4

From cf6ba4c7da78e2587b37af00658da787fd7f128e Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 23 Apr 2026 15:53:19 +0530
Subject: [PATCH 80/96] fix: correct info.xml path + register command fixes

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index 2e751f5..ea0b051 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -69,8 +69,6 @@ jobs:
     env:
       MYSQL_PORT: 4444
       PGSQL_PORT: 4445
-      # use the same db for ccb and nextcloud
-      CCB_DB_URL: postgresql+psycopg://root:rootpassword@localhost:4445/nextcloud
       HP_SHARED_KEY: test_shared_key_12345
 
     services:
@@ -81,6 +79,7 @@ jobs:
         env:
           MYSQL_ROOT_PASSWORD: rootpassword
         options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 5
+      # use the same db for ccb and nextcloud
       postgres:
         image: pgvector/pgvector:pg17
         ports:
@@ -272,9 +271,12 @@ jobs:
 
       - name: Register backend
         run: |
-          sed -i 's;<image>.*</image>;<image>ccb-cpu</image>;' appinfo/info.xml
-          sed -i 's;<image-tag>.*</image-tag>;<image-tag>latest</image-tag>;' appinfo/info.xml
-          timeout 120 ./occ app_api:app:register context_chat_backend k8s_test --info-xml context_chat_backend/appinfo/info.xml
+          sed -i 's;<image>.*</image>;<image>ccb-cpu</image>;' context_chat_backend/appinfo/info.xml
+          sed -i 's;<image-tag>.*</image-tag>;<image-tag>latest</image-tag>;' context_chat_backend/appinfo/info.xml
+          timeout 120 ./occ app_api:app:register context_chat_backend k8s_test \
+            --info-xml context_chat_backend/appinfo/info.xml \
+            --env EXTERNAL_DB="postgresql+psycopg://root:rootpassword@localhost:4445/nextcloud" \
+            --wait-finish
 
       - name: Run cron jobs
         run: |

From 7413e5e1195812bd58974f1f7c8ba414675291bf Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 23 Apr 2026 16:05:29 +0530
Subject: [PATCH 81/96] fix: use gha as cache backend for docker images

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index ea0b051..b944d55 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -211,6 +211,13 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3
 
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Build the context_chat_backend cpu image
         uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6
         with:
@@ -220,8 +227,8 @@ jobs:
           tags: ccb-cpu:latest
           target: runtime-cpu
           load: true
-          cache-from: type=registry,ref=nextcloud/context_chat_backend:cpu-build-cache,compression=zstd,mode=max
-          cache-to: type=registry,ref=nextcloud/context_chat_backend:cpu-build-cache
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
 
       - name: Pre-load CCB ExApp image into k3s
         run: docker save ccb-cpu:latest | sudo k3s ctr images import -

From 4f86758735d16f51e4da612ac8c06203ac8bab60 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 23 Apr 2026 16:30:41 +0530
Subject: [PATCH 82/96] fix: replace role names in info.xml

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 appinfo/info.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/appinfo/info.xml b/appinfo/info.xml
index c65a8e8..e589638 100644
--- a/appinfo/info.xml
+++ b/appinfo/info.xml
@@ -84,13 +84,13 @@ Setup background job workers as described here: https://docs.nextcloud.com/serve
 		</environment-variables>
 		<k8s-service-roles>
 			<role>
-				<name>rp</name>
+				<name>requestproc</name>
 				<display-name>Request Processing Mode</display-name>
 				<env>APP_ROLE=requestproc</env>
 				<expose>true</expose>
 			</role>
 			<role>
-				<name>up</name>
+				<name>updatesproc</name>
 				<display-name>Metadata Updates Processing Mode</display-name>
 				<env>APP_ROLE=updatesproc</env>
 				<expose>false</expose>

From 9ca5dd7d5e69add0dab4334f9e89484feb8a52c3 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 23 Apr 2026 16:47:29 +0530
Subject: [PATCH 83/96] fix: use local tag so image is not pulled from remote

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index b944d55..a018784 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -224,14 +224,15 @@ jobs:
           context: context_chat_backend
           push: false
           platforms: linux/amd64
-          tags: ccb-cpu:latest
+          # use local tag so image is not pulled from remote
+          tags: ccb-cpu:local
           target: runtime-cpu
           load: true
           cache-from: type=gha
           cache-to: type=gha,mode=max
 
       - name: Pre-load CCB ExApp image into k3s
-        run: docker save ccb-cpu:latest | sudo k3s ctr images import -
+        run: docker save ccb-cpu:local | sudo k3s ctr images import -
 
       - name: Start HaRP with K8s backend
         run: |
@@ -279,7 +280,7 @@ jobs:
       - name: Register backend
         run: |
           sed -i 's;<image>.*</image>;<image>ccb-cpu</image>;' context_chat_backend/appinfo/info.xml
-          sed -i 's;<image-tag>.*</image-tag>;<image-tag>latest</image-tag>;' context_chat_backend/appinfo/info.xml
+          sed -i 's;<image-tag>.*</image-tag>;<image-tag>local</image-tag>;' context_chat_backend/appinfo/info.xml
           timeout 120 ./occ app_api:app:register context_chat_backend k8s_test \
             --info-xml context_chat_backend/appinfo/info.xml \
             --env EXTERNAL_DB="postgresql+psycopg://root:rootpassword@localhost:4445/nextcloud" \

From 41c85fef99dec2cb742d96949150f99fba923e5b Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 23 Apr 2026 17:04:52 +0530
Subject: [PATCH 84/96] chore: show HaRP container's logs

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index a018784..ab9e439 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -392,6 +392,11 @@ jobs:
         run: |
           tail -v -n +1 worker?_logs || echo "No worker logs"
 
+      - name: Show HaRP logs
+        if: always()
+        run: |
+          docker logs appapi-harp
+
       - name: Show main app indexing logs
         if: always()
         run: |

From cfbc2a17b7c0889376a9ebc8d1bc967905a20de4 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Thu, 23 Apr 2026 17:41:16 +0530
Subject: [PATCH 85/96] fix: add ghcr.io to the docker image name

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index ab9e439..d2939ed 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -225,14 +225,14 @@ jobs:
           push: false
           platforms: linux/amd64
           # use local tag so image is not pulled from remote
-          tags: ccb-cpu:local
+          tags: ghcr.io/ccb-cpu:local
           target: runtime-cpu
           load: true
           cache-from: type=gha
           cache-to: type=gha,mode=max
 
       - name: Pre-load CCB ExApp image into k3s
-        run: docker save ccb-cpu:local | sudo k3s ctr images import -
+        run: docker save ghcr.io/ccb-cpu:local | sudo k3s ctr images import -
 
       - name: Start HaRP with K8s backend
         run: |

From 25ba6880d58ebac384e0f9a5d1eb2742a31354fb Mon Sep 17 00:00:00 2001
From: Marcel Klehr <mklehr@gmx.net>
Date: Thu, 23 Apr 2026 15:24:19 +0200
Subject: [PATCH 86/96] tests(k8s): make php listen on all interfaces

Signed-off-by: Marcel Klehr <mklehr@gmx.net>
---
 .github/workflows/integration-test-k8s.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index d2939ed..953892b 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -139,7 +139,6 @@ jobs:
           sleep 25
           mkdir data
           ./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud --database-host=127.0.0.1 --database-port=$MYSQL_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass password
-          composer run serve &
 
       - name: Set up Nextcloud PGSQL
         if: ${{ matrix.databases == 'pgsql'}}
@@ -147,7 +146,6 @@ jobs:
           sleep 25
           mkdir data
           ./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud --database-host=127.0.0.1 --database-port=$PGSQL_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass password
-          composer run serve &
 
       - name: Enable context_chat, app_api and testing
         run: ./occ app:enable -vvv -f context_chat app_api testing
@@ -254,6 +252,9 @@ jobs:
             -v $(pwd)/apps/app_api/tests/simple-nginx-NOT-FOR-PRODUCTION.conf:/etc/nginx/conf.d/default.conf:ro \
             -d nginx
 
+      - name: Start nextcloud
+        run: PHP_CLI_SERVER_WORKERS=2 php -S 0.0.0.0:8080 &
+
       - name: Wait for HaRP K8s readiness
         run: |
           for i in $(seq 1 30); do

From 3df8fdcfe07f2b2cdafea0007371aa1ab2fb59f3 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 28 Apr 2026 05:54:00 +0530
Subject: [PATCH 87/96] fix(ci): use NODE_IP to reach the vector db

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index 953892b..3e9adb7 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -284,7 +284,7 @@ jobs:
           sed -i 's;<image-tag>.*</image-tag>;<image-tag>local</image-tag>;' context_chat_backend/appinfo/info.xml
           timeout 120 ./occ app_api:app:register context_chat_backend k8s_test \
             --info-xml context_chat_backend/appinfo/info.xml \
-            --env EXTERNAL_DB="postgresql+psycopg://root:rootpassword@localhost:4445/nextcloud" \
+            --env EXTERNAL_DB="postgresql+psycopg://root:rootpassword@${{ env.NODE_IP }}:4445/nextcloud" \
             --wait-finish
 
       - name: Run cron jobs

From fd12d841fb0630b6e04f92652487e03beaf5016e Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 28 Apr 2026 07:25:55 +0530
Subject: [PATCH 88/96] fix(ci): increase timeout for context chat stats and
 handle exit status better

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 4 ++--
 .github/workflows/integration-test.yml     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index 3e9adb7..cb1aa53 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -310,8 +310,8 @@ jobs:
             echo "Checking stats, attempt $i..."
 
             stats_err=$(mktemp)
-            stats=$(timeout 5 ./occ context_chat:stats --json 2>"$stats_err")
-            stats_exit=$?
+            stats_exit=0
+            stats=$(timeout 30 ./occ context_chat:stats --json 2>"$stats_err") || stats_exit=$?
             echo "Stats output:"
             echo "$stats"
             if [ -s "$stats_err" ]; then
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 7c8a4cf..ba536b8 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -235,8 +235,8 @@ jobs:
             echo "Checking stats, attempt $i..."
 
             stats_err=$(mktemp)
-            stats=$(timeout 5 ./occ context_chat:stats --json 2>"$stats_err")
-            stats_exit=$?
+            stats_exit=0
+            stats=$(timeout 30 ./occ context_chat:stats --json 2>"$stats_err") || stats_exit=$?
             echo "Stats output:"
             echo "$stats"
             if [ -s "$stats_err" ]; then

From 98d2765df442b8d10ff433e4c21a8bc974fae8c8 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 28 Apr 2026 10:10:54 +0530
Subject: [PATCH 89/96] fix(ci): checkout the correct branch of app_api

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 1 +
 .github/workflows/integration-test.yml     | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index cb1aa53..9511066 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -123,6 +123,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
         with:
           repository: nextcloud/app_api
+          ref: ${{ matrix.server-versions }}
           path: apps/app_api
           persist-credentials: false
 
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index ba536b8..6ebdd36 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -224,6 +224,7 @@ jobs:
           ./occ background-job:list
 
       - name: Initial dump of DB with context_chat_queue populated
+        if: always()
         run: |
           docker exec postgres pg_dump nextcloud > /tmp/0_pgdump_nextcloud
 
@@ -365,6 +366,7 @@ jobs:
 
       - name: Upload database dumps
         uses: actions/upload-artifact@v4
+        if: always()
         with:
           name: database-dumps-${{ matrix.server-versions }}-php@${{ matrix.php-versions }}
           path: |
@@ -372,6 +374,7 @@ jobs:
             /tmp/1_pgdump_nextcloud
 
       - name: Final stats log
+        if: always()
         run: |
           ./occ context_chat:stats
           ./occ context_chat:stats --json

From 50547915dc8a9a113bb427c958c4b14adf13d7a3 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 28 Apr 2026 10:33:32 +0530
Subject: [PATCH 90/96] fix(ci): show all k8s pods' logs

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index 9511066..fe43f45 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -178,7 +178,7 @@ jobs:
 
       - name: Install k3s
         run: |
-          curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="--disable traefik --disable servicelb" sh -
+          curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="--disable traefik --disable servicelb --kubelet-arg=container-log-max-size=0" sh -
           sudo chmod 644 /etc/rancher/k3s/k3s.yaml
           echo "KUBECONFIG=/etc/rancher/k3s/k3s.yaml" >> $GITHUB_ENV
 
@@ -300,6 +300,7 @@ jobs:
           ./occ background-job:list
 
       - name: Initial dump of DB with context_chat_queue populated
+        if: always()
         run: |
           docker exec postgres pg_dump nextcloud > /tmp/0_pgdump_nextcloud
 
@@ -402,20 +403,21 @@ jobs:
       - name: Show main app indexing logs
         if: always()
         run: |
-          kubectl logs -n nextcloud-exapps -l app=nc-app-context-chat-backend-indexing
+          kubectl logs -n nextcloud-exapps -l app=nc-app-context-chat-backend-indexing --prefix --tail=-1 --ignore-errors
 
       - name: Show main app updates processing logs
         if: always()
         run: |
-          kubectl logs -n nextcloud-exapps -l app=nc-app-context-chat-backend-updatesproc
+          kubectl logs -n nextcloud-exapps -l app=nc-app-context-chat-backend-updatesproc --prefix --tail=-1 --ignore-errors
 
       - name: Show main app request processing logs
         if: always()
         run: |
-          kubectl logs -n nextcloud-exapps -l app=nc-app-context-chat-backend-requestproc
+          kubectl logs -n nextcloud-exapps -l app=nc-app-context-chat-backend-requestproc --prefix --tail=-1 --ignore-errors
 
       - name: Upload database dumps
         uses: actions/upload-artifact@v4
+        if: always()
         with:
           name: database-dumps-${{ matrix.server-versions }}-php@${{ matrix.php-versions }}
           path: |
@@ -423,6 +425,7 @@ jobs:
             /tmp/1_pgdump_nextcloud
 
       - name: Final stats log
+        if: always()
         run: |
           ./occ context_chat:stats
           ./occ context_chat:stats --json

From f74908b6a00eb4787034807463e57fe5dbef3506 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 28 Apr 2026 10:49:29 +0530
Subject: [PATCH 91/96] fix(ci): app_api branch translation + only run k8s for
 master

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 4 ++--
 .github/workflows/integration-test.yml     | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index fe43f45..db3f760 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -62,7 +62,7 @@ jobs:
       matrix:
         php-versions: [ '8.2' ]
         databases: [ 'pgsql' ]
-        server-versions: [ 'stable32', 'stable33', 'master' ]
+        server-versions: [ 'master' ]
 
     name: Integration test k8s on ${{ matrix.server-versions }} php@${{ matrix.php-versions }}
 
@@ -123,7 +123,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
         with:
           repository: nextcloud/app_api
-          ref: ${{ matrix.server-versions }}
+          ref: ${{ matrix.server-versions == 'master' && 'main' || matrix.server-versions }}
           path: apps/app_api
           persist-credentials: false
 
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 6ebdd36..d211d16 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -124,6 +124,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
         with:
           repository: nextcloud/app_api
+          ref: ${{ matrix.server-versions == 'master' && 'main' || matrix.server-versions }}
           path: apps/app_api
           persist-credentials: false
 

From d3f957558ab0a451cbc557b8a668f17222a7ab2d Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 28 Apr 2026 13:16:11 +0530
Subject: [PATCH 92/96] fix(ci): separate prompt responses in groups

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 9 ++++++---
 .github/workflows/integration-test.yml     | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index db3f760..0438c81 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -367,14 +367,17 @@ jobs:
           ./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' > worker1_logs 2>&1 &
           ./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' > worker2_logs 2>&1 &
 
+          echo ::group::English prompt
           OUT1=$(./occ context_chat:prompt admin "Which factors are taken into account for the Ethical AI Rating?")
           echo "$OUT1"
-          echo '--------------------------------------------------'
+          echo "$OUT1" | grep -q "If all of these points are met, we give a Green label." || exit 1
+          echo ::endgroup::
+
+          echo ::group::German prompt
           OUT2=$(./occ context_chat:prompt admin "Welche Faktoren beeinflussen das Ethical AI Rating?")
           echo "$OUT2"
-
-          echo "$OUT1" | grep -q "If all of these points are met, we give a Green label." || exit 1
           echo "$OUT2" | grep -q "If all of these points are met, we give a Green label." || exit 1
+          echo ::endgroup::
 
       - name: Final dump of DB with vectordb populated
         run: |
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index d211d16..6649735 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -299,14 +299,17 @@ jobs:
           ./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' > worker1_logs 2>&1 &
           ./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' > worker2_logs 2>&1 &
 
+          echo ::group::English prompt
           OUT1=$(./occ context_chat:prompt admin "Which factors are taken into account for the Ethical AI Rating?")
           echo "$OUT1"
-          echo '--------------------------------------------------'
+          echo "$OUT1" | grep -q "If all of these points are met, we give a Green label." || exit 1
+          echo ::endgroup::
+
+          echo ::group::German prompt
           OUT2=$(./occ context_chat:prompt admin "Welche Faktoren beeinflussen das Ethical AI Rating?")
           echo "$OUT2"
-
-          echo "$OUT1" | grep -q "If all of these points are met, we give a Green label." || exit 1
           echo "$OUT2" | grep -q "If all of these points are met, we give a Green label." || exit 1
+          echo ::endgroup::
 
       - name: Check python memory usage
         run: |

From c9edba6f6396363d1f026a2efe5d6a86abcc14ad Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Tue, 28 Apr 2026 13:16:29 +0530
Subject: [PATCH 93/96] fix(ci): always dump db

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 .github/workflows/integration-test-k8s.yml | 1 +
 .github/workflows/integration-test.yml     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
index 0438c81..5555f7c 100644
--- a/.github/workflows/integration-test-k8s.yml
+++ b/.github/workflows/integration-test-k8s.yml
@@ -380,6 +380,7 @@ jobs:
           echo ::endgroup::
 
       - name: Final dump of DB with vectordb populated
+        if: always()
         run: |
           docker exec postgres pg_dump nextcloud > /tmp/1_pgdump_nextcloud
 
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 6649735..4a6123c 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -330,6 +330,7 @@ jobs:
           fi
 
       - name: Final dump of DB with vectordb populated
+        if: always()
         run: |
           docker exec postgres pg_dump nextcloud > /tmp/1_pgdump_nextcloud
 

From 7da999a82e62062c0f3c1d348e30a8569f7b5dc0 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 29 Apr 2026 18:49:19 +0530
Subject: [PATCH 94/96] fix(context): break the loop after the first chunk does
 not fit in the context

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/chain/context.py    | 10 ----------
 context_chat_backend/chain/one_shot.py   |  6 ++----
 context_chat_backend/chain/query_proc.py | 17 ++++++++++-------
 3 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/context_chat_backend/chain/context.py b/context_chat_backend/chain/context.py
index adbac2d..c575d1a 100644
--- a/context_chat_backend/chain/context.py
+++ b/context_chat_backend/chain/context.py
@@ -32,16 +32,6 @@ def get_context_docs(
 	return vectordb.doc_search(user_id, query, ctx_limit, scope_type, scope_list)
 
 
-def get_context_chunks(context_docs: list[Document]) -> list[str]:
-	context_chunks = []
-	for doc in context_docs:
-		if title := doc.metadata.get('title'):
-			context_chunks.append(title)
-		context_chunks.append(doc.page_content)
-
-	return context_chunks
-
-
 def do_doc_search(
 	user_id: str,
 	query: str,
diff --git a/context_chat_backend/chain/one_shot.py b/context_chat_backend/chain/one_shot.py
index 3b4224c..d723e9e 100644
--- a/context_chat_backend/chain/one_shot.py
+++ b/context_chat_backend/chain/one_shot.py
@@ -8,7 +8,7 @@
 
 from ..dyn_loader import VectorDBLoader
 from ..types import TConfig
-from .context import get_context_chunks, get_context_docs
+from .context import get_context_docs
 from .query_proc import get_pruned_query
 from .types import ContextException, LLMOutput, ScopeType, SearchResult
 
@@ -56,14 +56,12 @@ def process_context_query(
 			raise ContextException('No documents retrieved, please choose a wider scope of documents to search from')
 		raise ContextException('No documents retrieved, please index a few documents first')
 
-	context_chunks = get_context_chunks(context_docs)
 	logger.debug('context retrieved', extra={
 		'len(context_docs)': len(context_docs),
-		'len(context_chunks)': len(context_chunks),
 	})
 
 	output = llm.invoke(
-		get_pruned_query(llm, app_config, query, template or _LLM_TEMPLATE, context_chunks),
+		get_pruned_query(llm, app_config, query, template or _LLM_TEMPLATE, context_docs),
 		userid=user_id,
 	).strip()
 	unique_sources = [SearchResult(
diff --git a/context_chat_backend/chain/query_proc.py b/context_chat_backend/chain/query_proc.py
index b6a9982..685246b 100644
--- a/context_chat_backend/chain/query_proc.py
+++ b/context_chat_backend/chain/query_proc.py
@@ -7,6 +7,7 @@
 from sys import maxsize as SYS_MAXSIZE
 
 from langchain.llms.base import LLM
+from langchain.schema import Document
 from transformers import GPT2Tokenizer
 
 from ..types import TConfig
@@ -22,7 +23,7 @@ def get_num_tokens(text: str, tokenizer: GPT2Tokenizer) -> int:
 	return len(tokenizer.encode(text, max_length=SYS_MAXSIZE, truncation=True))
 
 
-def get_pruned_query(llm: LLM, config: TConfig, query: str, template: str, text_chunks: list[str]) -> str:
+def get_pruned_query(llm: LLM, config: TConfig, query: str, template: str, doc_chunks: list[Document]) -> str:
 	'''
 	Truncates the input to fit the model's maximum context length
 	and returns the model's prediction
@@ -69,19 +70,21 @@ def get_pruned_query(llm: LLM, config: TConfig, query: str, template: str, text_
 
 	accepted_chunks = []
 
-	while text_chunks and remaining_tokens > 0:
-		context = text_chunks.pop(0)
+	for chunk in doc_chunks:
+		context = f'{chunk.metadata.get("title", "")}:\n\n{chunk.page_content}'
 		context_tokens = get_num_tokens(context, tokenizer)
 
-		if context_tokens <= remaining_tokens:
-			accepted_chunks.append(context)
-			remaining_tokens -= context_tokens
+		if context_tokens > remaining_tokens or remaining_tokens <= 0:
+			break
+
+		accepted_chunks.append(context)
+		remaining_tokens -= context_tokens
 
 	logger.debug('pruned query stats', extra={
 		'total tokens': n_ctx - remaining_tokens,
 		'remaining tokens': remaining_tokens,
 		'accepted chunks': len(accepted_chunks),
-		'total chunks': len(text_chunks),
+		'total chunks': len(doc_chunks),
 	})
 
 	return template.format(context='\n\n'.join(accepted_chunks), question=query)

From 4bbb08ce5679056492934f5fe1d862121bdba615 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 29 Apr 2026 18:50:53 +0530
Subject: [PATCH 95/96] chore(context): increase default context size to 16384

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/chain/query_proc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/context_chat_backend/chain/query_proc.py b/context_chat_backend/chain/query_proc.py
index 685246b..1fe6827 100644
--- a/context_chat_backend/chain/query_proc.py
+++ b/context_chat_backend/chain/query_proc.py
@@ -40,7 +40,7 @@ def get_pruned_query(llm: LLM, config: TConfig, query: str, template: str, doc_c
 	n_ctx = llm_config.get('n_ctx') \
 		or llm_config.get('config', {}).get('context_length') \
 		or llm_config.get('pipeline_kwargs', {}).get('config', {}).get('max_length') \
-		or 8192
+		or 16384
 
 	# fav: tokens to generate
 	n_gen = llm_config.get('max_tokens') \

From 205dba71e4b69936c765c0b1b8b1776eadb20a57 Mon Sep 17 00:00:00 2001
From: Anupam Kumar <kyteinsky@gmail.com>
Date: Wed, 29 Apr 2026 18:55:54 +0530
Subject: [PATCH 96/96] chore: increase context chunks fetched to 30

Signed-off-by: Anupam Kumar <kyteinsky@gmail.com>
---
 context_chat_backend/chain/context.py     | 2 +-
 context_chat_backend/chain/one_shot.py    | 2 +-
 context_chat_backend/task_fetcher.py      | 2 +-
 context_chat_backend/vectordb/pgvector.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/context_chat_backend/chain/context.py b/context_chat_backend/chain/context.py
index c575d1a..81a58f9 100644
--- a/context_chat_backend/chain/context.py
+++ b/context_chat_backend/chain/context.py
@@ -36,7 +36,7 @@ def do_doc_search(
 	user_id: str,
 	query: str,
 	vectordb_loader: VectorDBLoader,
-	ctx_limit: int = 20,
+	ctx_limit: int = 30,
 	scope_type: ScopeType | None = None,
 	scope_list: list[str] | None = None,
 ) -> list[SearchResult]:
diff --git a/context_chat_backend/chain/one_shot.py b/context_chat_backend/chain/one_shot.py
index d723e9e..3bd4557 100644
--- a/context_chat_backend/chain/one_shot.py
+++ b/context_chat_backend/chain/one_shot.py
@@ -38,7 +38,7 @@ def process_context_query(
 	llm: LLM,
 	app_config: TConfig,
 	query: str,
-	ctx_limit: int = 20,
+	ctx_limit: int = 30,
 	scope_type: ScopeType | None = None,
 	scope_list: list[str] | None = None,
 	template: str | None = None,
diff --git a/context_chat_backend/task_fetcher.py b/context_chat_backend/task_fetcher.py
index a41ae06..baa882d 100644
--- a/context_chat_backend/task_fetcher.py
+++ b/context_chat_backend/task_fetcher.py
@@ -60,7 +60,7 @@
 TP_CHECK_INTERVAL = 5
 TP_CHECK_INTERVAL_WITH_TRIGGER = 5 * 60
 TP_CHECK_INTERVAL_ON_ERROR = 15
-CONTEXT_LIMIT=20
+CONTEXT_LIMIT = 30
 
 
 class ThreadType(Enum):
diff --git a/context_chat_backend/vectordb/pgvector.py b/context_chat_backend/vectordb/pgvector.py
index e833356..4b820cd 100644
--- a/context_chat_backend/vectordb/pgvector.py
+++ b/context_chat_backend/vectordb/pgvector.py
@@ -640,7 +640,7 @@ def _similarity_search(
 		session: orm.Session,
 		query: str,
 		chunk_ids: list[str],
-		k: int = 20,
+		k: int,
 	) -> list[Document]:
 		embedding = self.client.embeddings.embed_query(query)
 		collection = self.client.get_collection(session)