Skip to content

Implement next.js #1730

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
51ce063
feat: add ColPali reranker implementation with Qdrant binary quantiza…
athrael-soju Jul 22, 2025
da9c42d
feat: add ColPali and ColQwen2 PDF retrieval implementation with vect…
athrael-soju Jul 22, 2025
0b0540d
feat: implement ColNomic server-client architecture for efficient mod…
athrael-soju Jul 22, 2025
d24d4ea
feat: add ColNomic model server with image and query embedding endpoints
athrael-soju Jul 22, 2025
1fa9199
feat: add nomic AI image embedding and vector search implementation w…
athrael-soju Jul 22, 2025
a41d5b8
feat: implement ColPali image search with Qwen2.5 model and Qdrant ve…
athrael-soju Jul 23, 2025
ba6b33a
feat: add nomic image embedding with UFO dataset integration
athrael-soju Jul 23, 2025
cac6b44
Add vector padding to allow batch processing
athrael-soju Jul 23, 2025
0cacfa7
feat: initialize FastAPI backend with ColPali integration and OpenAPI…
athrael-soju Jul 23, 2025
04112e3
chore: initialize project with Python dependencies and Docker config
athrael-soju Jul 23, 2025
9955165
No changes detected in the provided diff - please share the code chan…
athrael-soju Jul 23, 2025
06db935
feat: initialize Next.js frontend with authentication, UI components …
athrael-soju Jul 23, 2025
3225ff0
feat: initialize Next.js frontend with app router, TailwindCSS and au…
athrael-soju Jul 23, 2025
4605d85
feat: initialize NextJS frontend with UI components and dashboard layout
athrael-soju Jul 23, 2025
219a0f3
feat: initialize Next.js frontend with UI dependencies and build scripts
athrael-soju Jul 23, 2025
2fde636
feat: implement login page with API client integration for authentica…
athrael-soju Jul 23, 2025
abb453e
feat: add dashboard pages and colpali search interface with sidebar n…
athrael-soju Jul 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ PROJECT_NAME="Full Stack FastAPI Project"
STACK_NAME=full-stack-fastapi-project

# Backend
BACKEND_CORS_ORIGINS="http://localhost,http://localhost:5173,https://localhost,https://localhost:5173,http://localhost.tiangolo.com"
BACKEND_CORS_ORIGINS="http://localhost,http://localhost:5173,https://localhost,https://localhost:5173,http://localhost.tiangolo.com,http://localhost:3000"
SECRET_KEY=changethis
[email protected]
FIRST_SUPERUSER_PASSWORD=changethis
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ node_modules/
/playwright-report/
/blob-report/
/playwright/.cache/
.env
__pycache__
3 changes: 2 additions & 1 deletion backend/app/api/main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from fastapi import APIRouter

from app.api.routes import items, login, private, users, utils
from app.api.routes import colpali, items, login, private, users, utils
from app.core.config import settings

api_router = APIRouter()
api_router.include_router(login.router)
api_router.include_router(users.router)
api_router.include_router(utils.router)
api_router.include_router(items.router)
api_router.include_router(colpali.router)


if settings.ENVIRONMENT == "local":
Expand Down
244 changes: 244 additions & 0 deletions backend/app/api/routes/colpali.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
"""
ColPali API routes for document embedding and search functionality.
"""
from typing import Any

from fastapi import APIRouter, HTTPException, BackgroundTasks
from fastapi.responses import JSONResponse

from app.api.deps import CurrentUser
from app.models import (
ColPaliSearchRequest,
ColPaliSearchResponse,
ColPaliSearchResult,
ColPaliUploadRequest,
ColPaliUploadResponse,
Message,
)
from app.services.colpali import colpali_service

router = APIRouter(prefix="/colpali", tags=["colpali"])


@router.post("/search", response_model=ColPaliSearchResponse)
def search_documents(
current_user: CurrentUser,
search_request: ColPaliSearchRequest,
) -> Any:
"""
Search for documents using ColPali semantic search.

This endpoint allows users to search through embedded documents using natural language queries.
The search uses ColPali's multimodal embedding approach with reranking for improved results.
"""
try:
# Perform search using ColPali service
results = colpali_service.search(
query=search_request.query,
collection_name=search_request.collection_name,
search_limit=search_request.search_limit,
prefetch_limit=search_request.prefetch_limit,
)

# Format results
search_results = [
ColPaliSearchResult(
id=result["id"],
score=result["score"],
payload=result["payload"]
)
for result in results
]

return ColPaliSearchResponse(
results=search_results,
query=search_request.query,
collection_name=search_request.collection_name,
total_results=len(search_results)
)

except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Search failed: {str(e)}"
)


@router.post("/upload", response_model=ColPaliUploadResponse)
def upload_dataset(
current_user: CurrentUser,
upload_request: ColPaliUploadRequest,
background_tasks: BackgroundTasks,
) -> Any:
"""
Upload a dataset to Qdrant collection for ColPali search.

This endpoint processes and embeds documents from a specified dataset,
then uploads them to a Qdrant collection for later search operations.
The upload process runs in the background to avoid request timeouts.
"""
try:
# Start upload process in background
def upload_task():
return colpali_service.upload_dataset(
dataset_name=upload_request.dataset_name,
collection_name=upload_request.collection_name,
batch_size=upload_request.batch_size,
)

# For now, we'll run synchronously but this could be made async
result = upload_task()

return ColPaliUploadResponse(
message=f"Upload completed. {result['total_uploaded']} out of {result['total_items']} items uploaded successfully.",
collection_name=upload_request.collection_name,
total_uploaded=result["total_uploaded"],
total_items=result["total_items"],
success=result["success"]
)

except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Upload failed: {str(e)}"
)


@router.get("/collections", response_model=list[str])
def list_collections(current_user: CurrentUser) -> Any:
"""
List all available Qdrant collections.

Returns a list of collection names that are available for search operations.
"""
try:
collections = colpali_service.client.get_collections().collections
return [collection.name for collection in collections]

except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to list collections: {str(e)}"
)


@router.get("/collections/{collection_name}/info")
def get_collection_info(
current_user: CurrentUser,
collection_name: str,
) -> Any:
"""
Get information about a specific collection.

Returns metadata and statistics about the specified collection.
"""
try:
collection_info = colpali_service.client.get_collection(collection_name)
return {
"name": collection_name,
"vectors_count": collection_info.vectors_count,
"indexed_vectors_count": collection_info.indexed_vectors_count,
"points_count": collection_info.points_count,
"segments_count": collection_info.segments_count,
"config": {
"params": collection_info.config.params.dict() if collection_info.config.params else None,
"hnsw_config": collection_info.config.hnsw_config.dict() if collection_info.config.hnsw_config else None,
"optimizer_config": collection_info.config.optimizer_config.dict() if collection_info.config.optimizer_config else None,
}
}

except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get collection info: {str(e)}"
)


@router.delete("/collections/{collection_name}", response_model=Message)
def delete_collection(
current_user: CurrentUser,
collection_name: str,
) -> Any:
"""
Delete a collection and all its data.

WARNING: This operation is irreversible and will delete all embedded documents
in the specified collection.
"""
try:
# Check if user is superuser for destructive operations
if not current_user.is_superuser:
raise HTTPException(
status_code=403,
detail="Only superusers can delete collections"
)

colpali_service.client.delete_collection(collection_name)
return Message(message=f"Collection '{collection_name}' deleted successfully")

except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to delete collection: {str(e)}"
)


@router.post("/collections/{collection_name}/create", response_model=Message)
def create_collection(
current_user: CurrentUser,
collection_name: str,
) -> Any:
"""
Create a new empty collection for ColPali embeddings.

Creates a new Qdrant collection with the appropriate vector configuration
for ColPali embeddings (original, mean_pooling_rows, mean_pooling_columns).
"""
try:
created = colpali_service.create_collection_if_not_exists(collection_name)

if created:
return Message(message=f"Collection '{collection_name}' created successfully")
else:
return Message(message=f"Collection '{collection_name}' already exists")

except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to create collection: {str(e)}"
)


@router.get("/health")
def health_check() -> Any:
"""
Health check endpoint for ColPali service.

Verifies that the ColPali model, Qdrant client, and other dependencies
are properly initialized and accessible.
"""
try:
# Check Qdrant connection
collections = colpali_service.client.get_collections()

# Check model initialization
model_ready = colpali_service.model is not None and colpali_service.processor is not None

return {
"status": "healthy",
"qdrant_connected": True,
"collections_count": len(collections.collections),
"model_ready": model_ready,
"device": str(colpali_service.model.device) if colpali_service.model else "unknown"
}

except Exception as e:
return JSONResponse(
status_code=503,
content={
"status": "unhealthy",
"error": str(e),
"qdrant_connected": False,
"model_ready": False
}
)
3 changes: 3 additions & 0 deletions backend/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def all_cors_origins(self) -> list[str]:
POSTGRES_PASSWORD: str = ""
POSTGRES_DB: str = ""

# ColPali/Qdrant settings
QDRANT_URL: str = "http://localhost:6333"

@computed_field # type: ignore[prop-decorator]
@property
def SQLALCHEMY_DATABASE_URI(self) -> PostgresDsn:
Expand Down
35 changes: 35 additions & 0 deletions backend/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,38 @@ class TokenPayload(SQLModel):
class NewPassword(SQLModel):
token: str
new_password: str = Field(min_length=8, max_length=40)


# ColPali models
class ColPaliSearchRequest(SQLModel):
query: str = Field(min_length=1, max_length=1000, description="Search query for ColPali")
collection_name: str = Field(default="le-collection", description="Qdrant collection name")
search_limit: int = Field(default=20, ge=1, le=100, description="Number of results to return")
prefetch_limit: int = Field(default=200, ge=1, le=1000, description="Number of results to prefetch")


class ColPaliSearchResult(SQLModel):
score: float = Field(description="Similarity score")
payload: dict = Field(description="Document metadata")
id: str = Field(description="Document ID")


class ColPaliSearchResponse(SQLModel):
results: list[ColPaliSearchResult]
query: str
collection_name: str
total_results: int


class ColPaliUploadRequest(SQLModel):
dataset_name: str = Field(description="Name of the dataset to upload")
collection_name: str = Field(default="le-collection", description="Qdrant collection name")
batch_size: int = Field(default=4, ge=1, le=16, description="Batch size for processing")


class ColPaliUploadResponse(SQLModel):
message: str
collection_name: str
total_uploaded: int
total_items: int
success: bool
1 change: 1 addition & 0 deletions backend/app/services/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Services module
Loading
Loading