BerriAI · Badlybear · Sep 17, 2025 · Sep 17, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -29,4 +29,4 @@ RUN prisma generate
 EXPOSE 8000
 
 # Command to run the application
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] 
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] 
diff --git a/Dockerfile-airgapped b/Dockerfile-airgapped
@@ -0,0 +1,55 @@
+##############################
+# Stage 1: builder (ONLINE)
+##############################
+FROM python:3.11-slim-bookworm AS builder
+ENV PYTHONDONTWRITEBYTECODE=1 PIP_NO_CACHE_DIR=1
+WORKDIR /app
+
+# minimal system deps
+RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates \
+  && rm -rf /var/lib/apt/lists/*
+
+# isolated venv we’ll copy into the runtime
+RUN python -m venv /venv
+ENV PATH="/venv/bin:$PATH"
+
+# install deps
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+# bring in app code
+COPY . .
+
+# Bake Prisma engines into a deterministic cache dir, and pre-generate client
+# NOTE: we DO NOT set PRISMA_QUERY_ENGINE_BINARY here; we only cache binaries.
+ENV PRISMA_BINARY_CACHE_DIR=/opt/prisma-engines
+RUN mkdir -p "$PRISMA_BINARY_CACHE_DIR" \
+ && python -m prisma py fetch \
+ && python -m prisma generate --schema=prisma/schema.prisma
+
+##############################
+# Stage 2: runtime (AIR-GAPPED)
+##############################
+FROM python:3.11-slim-bookworm AS runtime
+ENV PYTHONDONTWRITEBYTECODE=1 PIP_DISABLE_PIP_VERSION_CHECK=1
+WORKDIR /app
+
+# copy venv with installed deps + generated client
+COPY --from=builder /venv /venv
+ENV PATH="/venv/bin:$PATH"
+
+# copy app code
+COPY . .
+
+# copy pre-fetched Prisma engines
+COPY --from=builder /opt/prisma-engines /opt/prisma-engines
+
+# Tell Prisma Python where the baked-in cache lives (no network at runtime)
+# Important: don't set PRISMA_QUERY_ENGINE_BINARY here; let Prisma pick from the cache.
+ENV PRISMA_BINARY_CACHE_DIR=/opt/prisma-engines \
+    PRISMA_HIDE_UPDATE_MESSAGE=true
+
+EXPOSE 8000
+ENV HOST=0.0.0.0 PORT=8000
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+
diff --git a/k8s/deployment.yaml b/k8s/deployment.yaml
@@ -0,0 +1,53 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: litellm-pgvector
+  namespace: {{ YOUR_NAMESPACE }}
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: litellm-pgvector
+  template:
+    metadata:
+      labels:
+        app: litellm-pgvector
+    spec:
+      containers:
+        - name: api
+          image: {{ YOUR_IMAGE }}
+          imagePullPolicy: IfNotPresent
+          envFrom:
+            - secretRef:
+                name: litellm-pgvector-env
+          # do NOT run prisma generate at runtime
+          env:
+            - name: RUN_DB_PUSH
+              value: "false"
+          ports:
+            - name: http
+              containerPort: 8000
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: 10
+            periodSeconds: 5
+            timeoutSeconds: 3
+            failureThreshold: 6
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: 20
+            periodSeconds: 10
+            timeoutSeconds: 5
+            failureThreshold: 6
+          resources:
+            requests:
+              cpu: "250m"
+              memory: "512Mi"
+            limits:
+              cpu: "1"
+              memory: "1Gi"
+
diff --git a/k8s/secret.yaml b/k8s/secret.yaml
@@ -0,0 +1,33 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: litellm-pgvector-env
+  namespace: {{ YOUR_NAMESPACE }}
+type: Opaque
+stringData:
+  # Server
+  SERVER_API_KEY: "your-api-key-here"
+  # Database Configuration
+  DATABASE_URL: "postgresql://username:password@localhost:5432/vectordb?schema=public"
+
+  # API Configuration
+  OPENAI_API_KEY: "your-api-key-here"
+
+  # Server Configuration
+  HOST: "0.0.0.0"
+  PORT: 8000
+
+  # LiteLLM Proxy Configuration
+  EMBEDDING__MODEL: "text-embedding-ada-002"
+  EMBEDDING__BASE_URL: "http://localhost:4000"
+  EMBEDDING__API_KEY: "sk-1234"
+  EMBEDDING__DIMENSIONS: 1536
+
+  # Database Field Configuration (optional)
+  DB_FIELDS__ID_FIELD: "id"
+  DB_FIELDS__CONTENT_FIELD: "content"
+  DB_FIELDS__METADATA_FIELD: "metadata"
+  DB_FIELDS__EMBEDDING_FIELD: "embedding"
+  DB_FIELDS__VECTOR_STORE_ID_FIELD: "vector_store_id"
+  DB_FIELDS__CREATED_AT_FIELD: "created_at"
+
diff --git a/k8s/service.yaml b/k8s/service.yaml
@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: litellm-pgvector-svc
+  namespace: {{ YOUR_NAMESPACE }}
+spec:
+  type: LoadBalancer
+  selector:
+    app: litellm-pgvector
+  ports:
+    - name: http
+      port: 8000
+      targetPort: http
+