Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ RUN prisma generate
EXPOSE 8000

# Command to run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
55 changes: 55 additions & 0 deletions Dockerfile-airgapped
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
##############################
# Stage 1: builder (ONLINE)
##############################
FROM python:3.11-slim-bookworm AS builder
ENV PYTHONDONTWRITEBYTECODE=1 PIP_NO_CACHE_DIR=1
WORKDIR /app

# minimal system deps
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates \
&& rm -rf /var/lib/apt/lists/*

# isolated venv we’ll copy into the runtime
RUN python -m venv /venv
ENV PATH="/venv/bin:$PATH"

# install deps
COPY requirements.txt .
RUN pip install -r requirements.txt

# bring in app code
COPY . .

# Bake Prisma engines into a deterministic cache dir, and pre-generate client
# NOTE: we DO NOT set PRISMA_QUERY_ENGINE_BINARY here; we only cache binaries.
ENV PRISMA_BINARY_CACHE_DIR=/opt/prisma-engines
RUN mkdir -p "$PRISMA_BINARY_CACHE_DIR" \
&& python -m prisma py fetch \
&& python -m prisma generate --schema=prisma/schema.prisma

##############################
# Stage 2: runtime (AIR-GAPPED)
##############################
FROM python:3.11-slim-bookworm AS runtime
ENV PYTHONDONTWRITEBYTECODE=1 PIP_DISABLE_PIP_VERSION_CHECK=1
WORKDIR /app

# copy venv with installed deps + generated client
COPY --from=builder /venv /venv
ENV PATH="/venv/bin:$PATH"

# copy app code
COPY . .

# copy pre-fetched Prisma engines
COPY --from=builder /opt/prisma-engines /opt/prisma-engines

# Tell Prisma Python where the baked-in cache lives (no network at runtime)
# Important: don't set PRISMA_QUERY_ENGINE_BINARY here; let Prisma pick from the cache.
ENV PRISMA_BINARY_CACHE_DIR=/opt/prisma-engines \
PRISMA_HIDE_UPDATE_MESSAGE=true

EXPOSE 8000
ENV HOST=0.0.0.0 PORT=8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

53 changes: 53 additions & 0 deletions k8s/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm-pgvector
namespace: {{ YOUR_NAMESPACE }}
spec:
replicas: 1
selector:
matchLabels:
app: litellm-pgvector
template:
metadata:
labels:
app: litellm-pgvector
spec:
containers:
- name: api
image: {{ YOUR_IMAGE }}
imagePullPolicy: IfNotPresent
envFrom:
- secretRef:
name: litellm-pgvector-env
# do NOT run prisma generate at runtime
env:
- name: RUN_DB_PUSH
value: "false"
ports:
- name: http
containerPort: 8000
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 6
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 20
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 6
resources:
requests:
cpu: "250m"
memory: "512Mi"
limits:
cpu: "1"
memory: "1Gi"

33 changes: 33 additions & 0 deletions k8s/secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
apiVersion: v1
kind: Secret
metadata:
name: litellm-pgvector-env
namespace: {{ YOUR_NAMESPACE }}
type: Opaque
stringData:
# Server
SERVER_API_KEY: "your-api-key-here"
# Database Configuration
DATABASE_URL: "postgresql://username:password@localhost:5432/vectordb?schema=public"

# API Configuration
OPENAI_API_KEY: "your-api-key-here"

# Server Configuration
HOST: "0.0.0.0"
PORT: 8000

# LiteLLM Proxy Configuration
EMBEDDING__MODEL: "text-embedding-ada-002"
EMBEDDING__BASE_URL: "http://localhost:4000"
EMBEDDING__API_KEY: "sk-1234"
EMBEDDING__DIMENSIONS: 1536

# Database Field Configuration (optional)
DB_FIELDS__ID_FIELD: "id"
DB_FIELDS__CONTENT_FIELD: "content"
DB_FIELDS__METADATA_FIELD: "metadata"
DB_FIELDS__EMBEDDING_FIELD: "embedding"
DB_FIELDS__VECTOR_STORE_ID_FIELD: "vector_store_id"
DB_FIELDS__CREATED_AT_FIELD: "created_at"

14 changes: 14 additions & 0 deletions k8s/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
name: litellm-pgvector-svc
namespace: {{ YOUR_NAMESPACE }}
spec:
type: LoadBalancer
selector:
app: litellm-pgvector
ports:
- name: http
port: 8000
targetPort: http

Loading