diff --git a/.gitignore b/.gitignore index b7faf40..7baf72d 100644 --- a/.gitignore +++ b/.gitignore @@ -143,6 +143,7 @@ venv/ ENV/ env.bak/ venv.bak/ +my_env/ # Spyder project settings .spyderproject @@ -205,3 +206,6 @@ cython_debug/ marimo/_static/ marimo/_lsp/ __marimo__/ + +#kfp +pipelines/github_rag_pipeline.yaml \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 5d3a8e7..0000000 --- a/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# docs-agent -Kubeflow Documentation AI Agent to power the Kubeflow Website diff --git a/manifests/inference-service.yaml b/manifests/inference-service.yaml new file mode 100644 index 0000000..ca61feb --- /dev/null +++ b/manifests/inference-service.yaml @@ -0,0 +1,38 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: llama + namespace: santhosh +spec: + predictor: + model: + modelFormat: + name: huggingface + version: "1" + runtime: llm-runtime + args: + - --model_name=llama3.1-8B + - --model_id=RedHatAI/Llama-3.1-8B-Instruct + - --backend=vllm + - --max-model-len=32768 + - --gpu-memory-utilization=0.90 + - --enable-auto-tool-choice + - --tool-call-parser=llama3_json + - --enable-tool-call-parser + env: + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: huggingface-secret + key: token + - name: CUDA_VISIBLE_DEVICES + value: "0" + resources: + requests: + cpu: "4" + memory: "16Gi" + nvidia.com/gpu: "1" + limits: + cpu: "6" + memory: "24Gi" + nvidia.com/gpu: "1" diff --git a/manifests/milvus-deployment.yaml b/manifests/milvus-deployment.yaml new file mode 100644 index 0000000..38c5dd9 --- /dev/null +++ b/manifests/milvus-deployment.yaml @@ -0,0 +1,256 @@ +apiVersion: v1 +kind: Pod +metadata: + annotations: + sidecar.istio.io/inject: "false" + creationTimestamp: "2025-08-03T16:17:18Z" + generateName: milvus-standalone-final-5cb655b8d6- + generation: 1 + labels: + app: milvus-standalone-final + pod-template-hash: 5cb655b8d6 + name: milvus-standalone-final-5cb655b8d6-6ngrn + namespace: santhosh + ownerReferences: + - apiVersion: apps/v1 + blockOwnerDeletion: true + controller: true + kind: ReplicaSet + name: milvus-standalone-final-5cb655b8d6 + uid: 7859a7a3-5ff8-41af-8e07-a533a298b141 + resourceVersion: "14225268" + uid: 804d95ff-f4e4-47a4-a7b3-d5c6edaa5042 +spec: + containers: + - command: + - milvus + - run + - standalone + env: + - name: ETCD_ENDPOINTS + value: localhost:2379 + - name: MINIO_ADDRESS + value: localhost:9000 + - name: MINIO_ACCESS_KEY_ID + value: minioadmin + - name: MINIO_SECRET_ACCESS_KEY + value: minioadmin + image: milvusdb/milvus:v2.3.4 + imagePullPolicy: IfNotPresent + name: milvus + ports: + - containerPort: 19530 + protocol: TCP + - containerPort: 9091 + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 9091 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access-2hjlp + readOnly: true + - command: + - etcd + - --advertise-client-urls=http://127.0.0.1:2379 + - --listen-client-urls=http://0.0.0.0:2379 + - --data-dir=/etcd-data + image: quay.io/coreos/etcd:v3.5.0 + imagePullPolicy: IfNotPresent + name: etcd + ports: + - containerPort: 2379 + protocol: TCP + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etcd-data + name: etcd-data + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access-2hjlp + readOnly: true + - command: + - minio + - server + - /minio-data + - --console-address + - :9001 + env: + - name: MINIO_ROOT_USER + value: minioadmin + - name: MINIO_ROOT_PASSWORD + value: minioadmin + image: minio/minio:RELEASE.2023-03-20T20-16-18Z + imagePullPolicy: IfNotPresent + name: minio + ports: + - containerPort: 9000 + protocol: TCP + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /minio-data + name: minio-data + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access-2hjlp + readOnly: true + dnsPolicy: ClusterFirst + enableServiceLinks: true + nodeName: 10.0.10.183 + preemptionPolicy: PreemptLowerPriority + priority: 0 + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccount: default + serviceAccountName: default + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 + volumes: + - emptyDir: {} + name: etcd-data + - emptyDir: {} + name: minio-data + - name: kube-api-access-2hjlp + projected: + defaultMode: 420 + sources: + - serviceAccountToken: + expirationSeconds: 3607 + path: token + - configMap: + items: + - key: ca.crt + path: ca.crt + name: kube-root-ca.crt + - downwardAPI: + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + path: namespace +status: + conditions: + - lastProbeTime: null + lastTransitionTime: "2025-08-03T16:17:30Z" + status: "True" + type: PodReadyToStartContainers + - lastProbeTime: null + lastTransitionTime: "2025-08-03T16:17:18Z" + status: "True" + type: Initialized + - lastProbeTime: null + lastTransitionTime: "2025-08-03T16:18:01Z" + status: "True" + type: Ready + - lastProbeTime: null + lastTransitionTime: "2025-08-03T16:18:01Z" + status: "True" + type: ContainersReady + - lastProbeTime: null + lastTransitionTime: "2025-08-03T16:17:18Z" + status: "True" + type: PodScheduled + containerStatuses: + - containerID: cri-o://974cb9b3a881ab0f7965bd5e31621686474764d8d01550ef3710e9b2058e48a7 + image: quay.io/coreos/etcd:v3.5.0 + imageID: quay.io/coreos/etcd@sha256:28759af54acd6924b2191dc1a1d096e2fa2e219717a21b9d8edf89717db3631b + lastState: {} + name: etcd + ready: true + resources: {} + restartCount: 0 + started: true + state: + running: + startedAt: "2025-08-03T16:17:27Z" + user: + linux: + gid: 0 + supplementalGroups: + - 0 + uid: 0 + volumeMounts: + - mountPath: /etcd-data + name: etcd-data + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access-2hjlp + readOnly: true + recursiveReadOnly: Disabled + - containerID: cri-o://b864b918e771fcb9e1fdb41baeb8f46f5024ecfd08f88679210603506682463b + image: docker.io/milvusdb/milvus:v2.3.4 + imageID: docker.io/milvusdb/milvus@sha256:efd6ef720b6ad0de62d006319996ba18504842ffaa543e3b072aeb5963305907 + lastState: {} + name: milvus + ready: true + resources: {} + restartCount: 0 + started: true + state: + running: + startedAt: "2025-08-03T16:17:25Z" + user: + linux: + gid: 0 + supplementalGroups: + - 0 + uid: 0 + volumeMounts: + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access-2hjlp + readOnly: true + recursiveReadOnly: Disabled + - containerID: cri-o://4a19f6821fa31b1d3f5db68a0b49f0df03cba496770ba4d46922cb3308ff781e + image: docker.io/minio/minio:RELEASE.2023-03-20T20-16-18Z + imageID: docker.io/minio/minio@sha256:6d770d7f255cda1f18d841ffc4365cb7e0d237f6af6a15fcdb587480cd7c3b93 + lastState: {} + name: minio + ready: true + resources: {} + restartCount: 0 + started: true + state: + running: + startedAt: "2025-08-03T16:17:29Z" + user: + linux: + gid: 0 + supplementalGroups: + - 0 + uid: 0 + volumeMounts: + - mountPath: /minio-data + name: minio-data + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access-2hjlp + readOnly: true + recursiveReadOnly: Disabled + hostIP: 10.0.10.183 + hostIPs: + - ip: 10.0.10.183 + phase: Running + podIP: 10.0.10.93 + podIPs: + - ip: 10.0.10.93 + qosClass: BestEffort + startTime: "2025-08-03T16:17:18Z" diff --git a/manifests/serving-runtime.yaml b/manifests/serving-runtime.yaml new file mode 100644 index 0000000..5f45810 --- /dev/null +++ b/manifests/serving-runtime.yaml @@ -0,0 +1,23 @@ +apiVersion: serving.kserve.io/v1alpha1 +kind: ServingRuntime +metadata: + name: llm-runtime + namespace: santhosh +spec: + supportedModelFormats: + - name: huggingface + version: "1" + autoSelect: true + containers: + - name: kserve-container + image: kserve/huggingfaceserver:latest-gpu + command: ["python", "-m", "huggingfaceserver"] + resources: + requests: + cpu: "4" + memory: "16Gi" + nvidia.com/gpu: "1" + limits: + cpu: "6" + memory: "24Gi" + nvidia.com/gpu: "1" \ No newline at end of file diff --git a/pipelines/github_rag_pipeline.yaml b/pipelines/github_rag_pipeline.yaml new file mode 100644 index 0000000..e6c8bc8 --- /dev/null +++ b/pipelines/github_rag_pipeline.yaml @@ -0,0 +1,393 @@ +# PIPELINE DEFINITION +# Name: github-rag-full-build +# Description: RAG pipeline for processing GitHub documentation +# Inputs: +# base_url: str [Default: 'https://www.kubeflow.org/docs'] +# chunk_overlap: int [Default: 100.0] +# chunk_size: int [Default: 1000.0] +# collection_name: str [Default: 'docs_rag'] +# directory_path: str [Default: 'content/en'] +# github_token: str [Default: ''] +# milvus_host: str [Default: 'milvus-standalone-final.santhosh.svc.cluster.local'] +# milvus_port: str [Default: '19530'] +# repo_name: str [Default: 'website'] +# repo_owner: str [Default: 'kubeflow'] +components: + comp-chunk-and-embed: + executorLabel: exec-chunk-and-embed + inputDefinitions: + artifacts: + github_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + base_url: + parameterType: STRING + chunk_overlap: + parameterType: NUMBER_INTEGER + chunk_size: + parameterType: NUMBER_INTEGER + repo_name: + parameterType: STRING + outputDefinitions: + artifacts: + embedded_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-download-github-directory: + executorLabel: exec-download-github-directory + inputDefinitions: + parameters: + directory_path: + parameterType: STRING + github_token: + parameterType: STRING + repo_name: + parameterType: STRING + repo_owner: + parameterType: STRING + outputDefinitions: + artifacts: + github_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-store-milvus: + executorLabel: exec-store-milvus + inputDefinitions: + artifacts: + embedded_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + collection_name: + parameterType: STRING + milvus_host: + parameterType: STRING + milvus_port: + parameterType: STRING +deploymentSpec: + executors: + exec-chunk-and-embed: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - chunk_and_embed + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'sentence-transformers'\ + \ 'langchain' && python3 -m pip install --quiet --no-warn-script-location\ + \ 'kfp==2.14.2' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"\ + 3.9\"' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef chunk_and_embed(\n github_data: dsl.Input[dsl.Dataset],\n\ + \ repo_name: str,\n base_url: str,\n chunk_size: int,\n chunk_overlap:\ + \ int,\n embedded_data: dsl.Output[dsl.Dataset]\n):\n import json\n\ + \ import os\n import re\n import torch\n from sentence_transformers\ + \ import SentenceTransformer\n from langchain.text_splitter import RecursiveCharacterTextSplitter\n\ + \n device = 'cuda' if torch.cuda.is_available() else 'cpu'\n model\ + \ = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device=device)\n\ + \ print(f\"Model loaded on {device}\")\n\n records = []\n\n with\ + \ open(github_data.path, 'r', encoding='utf-8') as f:\n for line\ + \ in f:\n file_data = json.loads(line)\n content =\ + \ file_data['content']\n\n # AGGRESSIVE CLEANING FOR BETTER EMBEDDINGS\n\ + \n # Remove Hugo frontmatter (both --- and +++ styles)\n \ + \ content = re.sub(r'^\\s*[+\\-]{3,}.*?[+\\-]{3,}\\s*', '', content,\ + \ flags=re.DOTALL | re.MULTILINE)\n\n # Remove Hugo template\ + \ syntax\n content = re.sub(r'\\{\\{.*?\\}\\}', '', content,\ + \ flags=re.DOTALL)\n\n # Remove HTML comments and tags\n \ + \ content = re.sub(r'', '', content, flags=re.DOTALL)\n\ + \ content = re.sub(r'<[^>]+>', ' ', content)\n\n #\ + \ Remove navigation/menu artifacts\n content = re.sub(r'\\b(Get\ + \ Started|Contribute|GenAI|Home|Menu|Navigation)\\b', '', content, flags=re.IGNORECASE)\n\ + \n # Clean up URLs and links\n content = re.sub(r'https?://[^\\\ + s]+', '', content)\n content = re.sub(r'\\[([^\\]]+)\\]\\([^\\\ + )]+\\)', r'\\1', content) # Convert [text](url) to text\n\n \ + \ # Remove excessive whitespace and normalize\n content = re.sub(r'\\\ + s+', ' ', content) # Multiple spaces to single\n content = re.sub(r'\\\ + n\\s*\\n\\s*\\n+', '\\n\\n', content) # Multiple newlines to double\n \ + \ content = content.strip()\n\n # Skip files that are\ + \ too short after cleaning\n if len(content) < 50:\n \ + \ print(f\"Skipping file after cleaning: {file_data['path']} ({len(content)}\ + \ chars)\")\n continue\n\n # Build citation URL\ + \ (same as before)\n path_parts = file_data['path'].split('/')\n\ + \ if 'content/en/docs' in file_data['path']:\n \ + \ docs_index = path_parts.index('docs')\n url_path = '/'.join(path_parts[docs_index+1:])\n\ + \ url_path = os.path.splitext(url_path)[0]\n \ + \ citation_url = f\"{base_url}/{url_path}\"\n else:\n \ + \ citation_url = f\"{base_url}/{file_data['path']}\"\n\n \ + \ file_unique_id = f\"{repo_name}:{file_data['path']}\"\n\n \ + \ # Create splitter\n text_splitter = RecursiveCharacterTextSplitter(\n\ + \ chunk_size=chunk_size,\n chunk_overlap=chunk_overlap,\n\ + \ length_function=len,\n separators=[\"\\\ + n\\n\", \"\\n\", \". \", \" \", \"\"]\n )\n\n # Split\ + \ into chunks\n chunks = text_splitter.split_text(content)\n\n\ + \ print(f\"File: {file_data['path']} -> {len(chunks)} chunks\ + \ (avg: {sum(len(c) for c in chunks)/len(chunks):.0f} chars)\")\n\n \ + \ # Create embeddings\n for chunk_idx, chunk in enumerate(chunks):\n\ + \ embedding = model.encode(chunk).tolist()\n \ + \ records.append({\n 'file_unique_id': file_unique_id,\n\ + \ 'repo_name': repo_name,\n 'file_path':\ + \ file_data['path'],\n 'file_name': file_data['file_name'],\n\ + \ 'citation_url': citation_url[:1024],\n \ + \ 'chunk_index': chunk_idx,\n 'content_text':\ + \ chunk[:2000],\n 'embedding': embedding\n \ + \ })\n\n print(f\"Created {len(records)} total chunks\")\n\n \ + \ with open(embedded_data.path, 'w', encoding='utf-8') as f:\n for\ + \ record in records:\n f.write(json.dumps(record, ensure_ascii=False)\ + \ + '\\n')\n\n" + image: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime + exec-download-github-directory: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - download_github_directory + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'requests' 'beautifulsoup4'\ + \ && python3 -m pip install --quiet --no-warn-script-location 'kfp==2.14.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef download_github_directory(\n repo_owner: str,\n repo_name:\ + \ str,\n directory_path: str,\n github_token: str,\n github_data:\ + \ dsl.Output[dsl.Dataset]\n):\n import requests\n import json\n \ + \ import base64\n from bs4 import BeautifulSoup\n\n headers = {\"\ + Authorization\": f\"token {github_token}\"} if github_token else {}\n \ + \ api_url = f\"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{directory_path}\"\ + \n\n def get_files_recursive(url):\n files = []\n try:\n\ + \ response = requests.get(url, headers=headers)\n \ + \ response.raise_for_status()\n items = response.json()\n\n \ + \ for item in items:\n if item['type'] == 'file'\ + \ and (item['name'].endswith('.md') or item['name'].endswith('.html')):\n\ + \ file_response = requests.get(item['url'], headers=headers)\n\ + \ file_response.raise_for_status()\n \ + \ file_data = file_response.json()\n content = base64.b64decode(file_data['content']).decode('utf-8')\n\ + \n # Extract text from HTML files\n \ + \ if item['name'].endswith('.html'):\n soup = BeautifulSoup(content,\ + \ 'html.parser')\n content = soup.get_text(separator='\ + \ ', strip=True)\n\n files.append({\n \ + \ 'path': item['path'],\n 'content': content,\n\ + \ 'file_name': item['name']\n \ + \ })\n elif item['type'] == 'dir':\n files.extend(get_files_recursive(item['url']))\n\ + \ except Exception as e:\n print(f\"Error fetching {url}:\ + \ {e}\")\n return files\n\n files = get_files_recursive(api_url)\n\ + \ print(f\"Downloaded {len(files)} files\")\n\n with open(github_data.path,\ + \ 'w', encoding='utf-8') as f:\n for file_data in files:\n \ + \ f.write(json.dumps(file_data, ensure_ascii=False) + '\\n')\n\n" + image: python:3.9 + exec-store-milvus: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - store_milvus + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'pymilvus' 'numpy'\ + \ && python3 -m pip install --quiet --no-warn-script-location 'kfp==2.14.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef store_milvus(\n embedded_data: dsl.Input[dsl.Dataset],\n \ + \ milvus_host: str,\n milvus_port: str,\n collection_name: str\n\ + ):\n from pymilvus import connections, utility, FieldSchema, CollectionSchema,\ + \ DataType, Collection\n import json\n from datetime import datetime\n\ + \n connections.connect(\"default\", host=milvus_host, port=milvus_port)\n\ + \n # DROP existing collection to fix schema mismatch\n if utility.has_collection(collection_name):\n\ + \ utility.drop_collection(collection_name)\n print(f\"Dropped\ + \ existing collection: {collection_name}\")\n\n # Enhanced schema with\ + \ 768 dimensions\n fields = [\n FieldSchema(name=\"id\", dtype=DataType.INT64,\ + \ is_primary=True, auto_id=True),\n FieldSchema(name=\"file_unique_id\"\ + , dtype=DataType.VARCHAR, max_length=512),\n FieldSchema(name=\"\ + repo_name\", dtype=DataType.VARCHAR, max_length=256),\n FieldSchema(name=\"\ + file_path\", dtype=DataType.VARCHAR, max_length=512),\n FieldSchema(name=\"\ + file_name\", dtype=DataType.VARCHAR, max_length=256),\n FieldSchema(name=\"\ + citation_url\", dtype=DataType.VARCHAR, max_length=1024),\n FieldSchema(name=\"\ + chunk_index\", dtype=DataType.INT64),\n FieldSchema(name=\"content_text\"\ + , dtype=DataType.VARCHAR, max_length=2000),\n FieldSchema(name=\"\ + vector\", dtype=DataType.FLOAT_VECTOR, dim=768), # Updated for all-mpnet-base-v2\n\ + \ FieldSchema(name=\"last_updated\", dtype=DataType.INT64)\n ]\n\ + \n # Create new collection with correct schema\n schema = CollectionSchema(fields,\ + \ \"RAG collection for documentation\")\n collection = Collection(collection_name,\ + \ schema)\n print(f\"Created new collection: {collection_name}\")\n\n\ + \ # Rest of your existing code remains the same...\n records = []\n\ + \ timestamp = int(datetime.now().timestamp())\n\n with open(embedded_data.path,\ + \ 'r', encoding='utf-8') as f:\n for line in f:\n record\ + \ = json.loads(line)\n records.append({\n \"file_unique_id\"\ + : record[\"file_unique_id\"],\n \"repo_name\": record[\"\ + repo_name\"],\n \"file_path\": record[\"file_path\"],\n \ + \ \"file_name\": record[\"file_name\"],\n \"\ + citation_url\": record[\"citation_url\"],\n \"chunk_index\"\ + : record[\"chunk_index\"],\n \"content_text\": record[\"\ + content_text\"],\n \"vector\": record[\"embedding\"],\n \ + \ \"last_updated\": timestamp\n })\n\n if records:\n\ + \ batch_size = 1000\n for i in range(0, len(records), batch_size):\n\ + \ batch = records[i:i + batch_size]\n collection.insert(batch)\n\ + \n collection.flush()\n\n # Create index\n index_params\ + \ = {\n \"metric_type\": \"COSINE\",\n \"index_type\"\ + : \"IVF_FLAT\", \n \"params\": {\"nlist\": min(1024, len(records))}\n\ + \ }\n collection.create_index(\"vector\", index_params)\n\ + \ collection.load()\n print(f\"\u2705 Inserted {len(records)}\ + \ records. Total: {collection.num_entities}\")\n\n" + image: python:3.9 +pipelineInfo: + description: RAG pipeline for processing GitHub documentation + name: github-rag-full-build +root: + dag: + tasks: + chunk-and-embed: + cachingOptions: + enableCache: true + componentRef: + name: comp-chunk-and-embed + dependentTasks: + - download-github-directory + inputs: + artifacts: + github_data: + taskOutputArtifact: + outputArtifactKey: github_data + producerTask: download-github-directory + parameters: + base_url: + componentInputParameter: base_url + chunk_overlap: + componentInputParameter: chunk_overlap + chunk_size: + componentInputParameter: chunk_size + repo_name: + componentInputParameter: repo_name + taskInfo: + name: chunk-and-embed + download-github-directory: + cachingOptions: + enableCache: true + componentRef: + name: comp-download-github-directory + inputs: + parameters: + directory_path: + componentInputParameter: directory_path + github_token: + componentInputParameter: github_token + repo_name: + componentInputParameter: repo_name + repo_owner: + componentInputParameter: repo_owner + taskInfo: + name: download-github-directory + store-milvus: + cachingOptions: + enableCache: true + componentRef: + name: comp-store-milvus + dependentTasks: + - chunk-and-embed + inputs: + artifacts: + embedded_data: + taskOutputArtifact: + outputArtifactKey: embedded_data + producerTask: chunk-and-embed + parameters: + collection_name: + componentInputParameter: collection_name + milvus_host: + componentInputParameter: milvus_host + milvus_port: + componentInputParameter: milvus_port + taskInfo: + name: store-milvus + inputDefinitions: + parameters: + base_url: + defaultValue: https://www.kubeflow.org/docs + isOptional: true + parameterType: STRING + chunk_overlap: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + chunk_size: + defaultValue: 1000.0 + isOptional: true + parameterType: NUMBER_INTEGER + collection_name: + defaultValue: docs_rag + isOptional: true + parameterType: STRING + directory_path: + defaultValue: content/en + isOptional: true + parameterType: STRING + github_token: + defaultValue: '' + isOptional: true + parameterType: STRING + milvus_host: + defaultValue: milvus-standalone-final.santhosh.svc.cluster.local + isOptional: true + parameterType: STRING + milvus_port: + defaultValue: '19530' + isOptional: true + parameterType: STRING + repo_name: + defaultValue: website + isOptional: true + parameterType: STRING + repo_owner: + defaultValue: kubeflow + isOptional: true + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.14.2 diff --git a/pipelines/kubeflow-pipeline.py b/pipelines/kubeflow-pipeline.py new file mode 100644 index 0000000..96fd83f --- /dev/null +++ b/pipelines/kubeflow-pipeline.py @@ -0,0 +1,292 @@ +import kfp +from kfp import dsl +from kfp.dsl import * +from typing import * + +@dsl.component( + base_image="python:3.9", + packages_to_install=["requests", "beautifulsoup4"] +) +def download_github_directory( + repo_owner: str, + repo_name: str, + directory_path: str, + github_token: str, + github_data: dsl.Output[dsl.Dataset] +): + import requests + import json + import base64 + from bs4 import BeautifulSoup + + headers = {"Authorization": f"token {github_token}"} if github_token else {} + api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{directory_path}" + + def get_files_recursive(url): + files = [] + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + items = response.json() + + for item in items: + if item['type'] == 'file' and (item['name'].endswith('.md') or item['name'].endswith('.html')): + file_response = requests.get(item['url'], headers=headers) + file_response.raise_for_status() + file_data = file_response.json() + content = base64.b64decode(file_data['content']).decode('utf-8') + + # Extract text from HTML files + if item['name'].endswith('.html'): + soup = BeautifulSoup(content, 'html.parser') + content = soup.get_text(separator=' ', strip=True) + + files.append({ + 'path': item['path'], + 'content': content, + 'file_name': item['name'] + }) + elif item['type'] == 'dir': + files.extend(get_files_recursive(item['url'])) + except Exception as e: + print(f"Error fetching {url}: {e}") + return files + + files = get_files_recursive(api_url) + print(f"Downloaded {len(files)} files") + + with open(github_data.path, 'w', encoding='utf-8') as f: + for file_data in files: + f.write(json.dumps(file_data, ensure_ascii=False) + '\n') + + +@dsl.component( + base_image="pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime", + packages_to_install=["sentence-transformers", "langchain"] +) +def chunk_and_embed( + github_data: dsl.Input[dsl.Dataset], + repo_name: str, + base_url: str, + chunk_size: int, + chunk_overlap: int, + embedded_data: dsl.Output[dsl.Dataset] +): + import json + import os + import re + import torch + from sentence_transformers import SentenceTransformer + from langchain.text_splitter import RecursiveCharacterTextSplitter + + device = 'cuda' if torch.cuda.is_available() else 'cpu' + model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device=device) + print(f"Model loaded on {device}") + + records = [] + + with open(github_data.path, 'r', encoding='utf-8') as f: + for line in f: + file_data = json.loads(line) + content = file_data['content'] + + # AGGRESSIVE CLEANING FOR BETTER EMBEDDINGS + + # Remove Hugo frontmatter (both --- and +++ styles) + content = re.sub(r'^\s*[+\-]{3,}.*?[+\-]{3,}\s*', '', content, flags=re.DOTALL | re.MULTILINE) + + # Remove Hugo template syntax + content = re.sub(r'\{\{.*?\}\}', '', content, flags=re.DOTALL) + + # Remove HTML comments and tags + content = re.sub(r'', '', content, flags=re.DOTALL) + content = re.sub(r'<[^>]+>', ' ', content) + + # Remove navigation/menu artifacts + content = re.sub(r'\b(Get Started|Contribute|GenAI|Home|Menu|Navigation)\b', '', content, flags=re.IGNORECASE) + + # Clean up URLs and links + content = re.sub(r'https?://[^\s]+', '', content) + content = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', content) # Convert [text](url) to text + + # Remove excessive whitespace and normalize + content = re.sub(r'\s+', ' ', content) # Multiple spaces to single + content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content) # Multiple newlines to double + content = content.strip() + + # Skip files that are too short after cleaning + if len(content) < 50: + print(f"Skipping file after cleaning: {file_data['path']} ({len(content)} chars)") + continue + + # Build citation URL (same as before) + path_parts = file_data['path'].split('/') + if 'content/en/docs' in file_data['path']: + docs_index = path_parts.index('docs') + url_path = '/'.join(path_parts[docs_index+1:]) + url_path = os.path.splitext(url_path)[0] + citation_url = f"{base_url}/{url_path}" + else: + citation_url = f"{base_url}/{file_data['path']}" + + file_unique_id = f"{repo_name}:{file_data['path']}" + + # Create splitter + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + length_function=len, + separators=["\n\n", "\n", ". ", " ", ""] + ) + + # Split into chunks + chunks = text_splitter.split_text(content) + + print(f"File: {file_data['path']} -> {len(chunks)} chunks (avg: {sum(len(c) for c in chunks)/len(chunks):.0f} chars)") + + # Create embeddings + for chunk_idx, chunk in enumerate(chunks): + embedding = model.encode(chunk).tolist() + records.append({ + 'file_unique_id': file_unique_id, + 'repo_name': repo_name, + 'file_path': file_data['path'], + 'file_name': file_data['file_name'], + 'citation_url': citation_url[:1024], + 'chunk_index': chunk_idx, + 'content_text': chunk[:2000], + 'embedding': embedding + }) + + print(f"Created {len(records)} total chunks") + + with open(embedded_data.path, 'w', encoding='utf-8') as f: + for record in records: + f.write(json.dumps(record, ensure_ascii=False) + '\n') + + +@dsl.component( + base_image="python:3.9", + packages_to_install=["pymilvus", "numpy"] +) +def store_milvus( + embedded_data: dsl.Input[dsl.Dataset], + milvus_host: str, + milvus_port: str, + collection_name: str +): + from pymilvus import connections, utility, FieldSchema, CollectionSchema, DataType, Collection + import json + from datetime import datetime + + connections.connect("default", host=milvus_host, port=milvus_port) + + # DROP existing collection to fix schema mismatch + if utility.has_collection(collection_name): + utility.drop_collection(collection_name) + print(f"Dropped existing collection: {collection_name}") + + # Enhanced schema with 768 dimensions + fields = [ + FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), + FieldSchema(name="file_unique_id", dtype=DataType.VARCHAR, max_length=512), + FieldSchema(name="repo_name", dtype=DataType.VARCHAR, max_length=256), + FieldSchema(name="file_path", dtype=DataType.VARCHAR, max_length=512), + FieldSchema(name="file_name", dtype=DataType.VARCHAR, max_length=256), + FieldSchema(name="citation_url", dtype=DataType.VARCHAR, max_length=1024), + FieldSchema(name="chunk_index", dtype=DataType.INT64), + FieldSchema(name="content_text", dtype=DataType.VARCHAR, max_length=2000), + FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=768), # Updated for all-mpnet-base-v2 + FieldSchema(name="last_updated", dtype=DataType.INT64) + ] + + # Create new collection with correct schema + schema = CollectionSchema(fields, "RAG collection for documentation") + collection = Collection(collection_name, schema) + print(f"Created new collection: {collection_name}") + + # Rest of your existing code remains the same... + records = [] + timestamp = int(datetime.now().timestamp()) + + with open(embedded_data.path, 'r', encoding='utf-8') as f: + for line in f: + record = json.loads(line) + records.append({ + "file_unique_id": record["file_unique_id"], + "repo_name": record["repo_name"], + "file_path": record["file_path"], + "file_name": record["file_name"], + "citation_url": record["citation_url"], + "chunk_index": record["chunk_index"], + "content_text": record["content_text"], + "vector": record["embedding"], + "last_updated": timestamp + }) + + if records: + batch_size = 1000 + for i in range(0, len(records), batch_size): + batch = records[i:i + batch_size] + collection.insert(batch) + + collection.flush() + + # Create index + index_params = { + "metric_type": "COSINE", + "index_type": "IVF_FLAT", + "params": {"nlist": min(1024, len(records))} + } + collection.create_index("vector", index_params) + collection.load() + print(f"✅ Inserted {len(records)} records. Total: {collection.num_entities}") + + +@dsl.pipeline( + name="github-rag-full-build", + description="RAG pipeline for processing GitHub documentation" +) +def github_rag_pipeline( + repo_owner: str = "kubeflow", + repo_name: str = "website", + directory_path: str = "content/en", + github_token: str = "", + base_url: str = "https://www.kubeflow.org/docs", + chunk_size: int = 1000, + chunk_overlap: int = 100, + milvus_host: str = "milvus-standalone-final.santhosh.svc.cluster.local", + milvus_port: str = "19530", + collection_name: str = "docs_rag" +): + # Download GitHub directory + download_task = download_github_directory( + repo_owner=repo_owner, + repo_name=repo_name, + directory_path=directory_path, + github_token=github_token + ) + + # Chunk and embed the content + chunk_task = chunk_and_embed( + github_data=download_task.outputs["github_data"], + repo_name=repo_name, + base_url=base_url, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap + ) + + # Store in Milvus + store_task = store_milvus( + embedded_data=chunk_task.outputs["embedded_data"], + milvus_host=milvus_host, + milvus_port=milvus_port, + collection_name=collection_name + ) + +if __name__ == "__main__": + # Compile the pipeline + kfp.compiler.Compiler().compile( + pipeline_func=github_rag_pipeline, + package_path="github_rag_pipeline.yaml" + ) \ No newline at end of file diff --git a/server/Dockerfile b/server/Dockerfile new file mode 100644 index 0000000..0eb7d80 --- /dev/null +++ b/server/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.11-slim + +# Create non-root user +RUN useradd -m -u 1000 appuser +WORKDIR /app + +# Install deps +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +# Writable caches for HF/Transformers/Sentence-Transformers +ENV HF_HOME=/home/appuser/.cache/huggingface \ + TRANSFORMERS_CACHE=/home/appuser/.cache/huggingface \ + SENTENCE_TRANSFORMERS_HOME=/home/appuser/.cache/sentence_transformers \ + XDG_CACHE_HOME=/home/appuser/.cache \ + PORT=8000 + +# Ensure directories exist and owned by appuser +RUN mkdir -p $HF_HOME $SENTENCE_TRANSFORMERS_HOME $XDG_CACHE_HOME && \ + chown -R appuser:appuser /home/appuser + +# Switch to non-root before running the app +USER appuser + +# App +COPY app.py /app/ + +EXPOSE 8000 +CMD ["python", "-u", "app.py"] diff --git a/server/app.py b/server/app.py new file mode 100644 index 0000000..8fd4596 --- /dev/null +++ b/server/app.py @@ -0,0 +1,247 @@ +import os +import json +import asyncio +import httpx +import websockets +from websockets.server import serve +from websockets.exceptions import InvalidMessage, ConnectionClosedError +import logging +from pymilvus import connections, Collection +from sentence_transformers import SentenceTransformer + +# Config +KSERVE_URL = os.getenv("KSERVE_URL", "http://llama.santhosh.svc.cluster.local/openai/v1/chat/completions") +MODEL = os.getenv("MODEL", "llama3.1-8B") +PORT = int(os.getenv("PORT", "8000")) +MILVUS_HOST = "milvus-standalone-final.santhosh.svc.cluster.local" +MILVUS_PORT = "19530" +COLLECTION_NAME = "docs_rag" + +# Load embedding model once +embedding_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') + +def rag_search(query: str) -> str: + """Search Milvus for relevant documents""" + try: + connections.connect("default", host=MILVUS_HOST, port=MILVUS_PORT) + collection = Collection(COLLECTION_NAME) + collection.load() + + query_vec = embedding_model.encode(query).tolist() + results = collection.search( + data=[query_vec], + anns_field="vector", + param={"metric_type": "COSINE", "params": {"nprobe": 32}}, + limit=3, + output_fields=["file_path", "content_text", "citation_url"] + ) + + context = "" + for hit in results[0]: + content = hit.entity.get('content_text', '') + url = hit.entity.get('citation_url', '') + context += f"Source: {url}\nContent: {content}\n\n" + + connections.disconnect("default") + return context + except Exception as e: + return f"RAG search failed: {e}" + +# System prompt to control tool usage +SYSTEM_PROMPT = """You are a helpful AI assistant with access to Kubeflow documentation through a search function. + +IMPORTANT: You have two response modes: + +1. KUBEFLOW MODE: + - For questions about: Kubeflow, KServe, Katib + - Use the search function silently (don't mention it) + - Start responses with "[KUBEFLOW]" + - Be detailed but concise + +2. GENERAL MODE: + - For everything else (jokes, math, chat) + - Don't use search function + - Start responses with "[GENERAL]" + - Keep responses short + +CRITICAL RULES: +- NEVER describe or mention the search function +- NEVER output function calls or JSON +- NEVER explain what you're going to do +- Just DO it and give the answer + +Example good responses: +[KUBEFLOW] Kubeflow can be installed using... +[GENERAL] Here's a joke: Why did the... + +Example BAD responses: +"I'll use the search function..." +"The function to use is..." +{any JSON or function calls}""" + +RAG_TOOL = { + "type": "function", + "function": { + "name": "search_kubeflow_docs", + "description": "Search Kubeflow documentation. ONLY call this function, do not describe it. NEVER output the function call details.", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"} + }, + "required": ["query"] + } + } +} + +async def handle_chat(message: str, websocket) -> None: + """Handle chat with RAG tool support - simple non-streaming approach""" + + try: + async with httpx.AsyncClient(timeout=120) as client: + # Single request with tools - let LLM decide if RAG is needed + payload = { + "model": MODEL, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": message} + ], + "tools": [RAG_TOOL], + "tool_choice": "auto", + "stream": False, + "max_tokens": 1500 + } + + response = await client.post(KSERVE_URL, json=payload, headers={"content-type": "application/json"}) + + if response.status_code != 200: + await websocket.send(f"ERROR: HTTP {response.status_code}: {response.text}") + return + + result = response.json() + choice = result.get("choices", [{}])[0] + message_obj = choice.get("message", {}) + + # Get direct content and tool calls + direct_content = message_obj.get("content", "") + tool_calls = message_obj.get("tool_calls", []) + + # Handle Kubeflow questions with tool calls + if tool_calls and any(tc["function"]["name"] == "search_kubeflow_docs" for tc in tool_calls): + try: + # Get the search query + tool_call = next(tc for tc in tool_calls if tc["function"]["name"] == "search_kubeflow_docs") + args = json.loads(tool_call["function"]["arguments"]) + query = args["query"] + + # Perform RAG search silently + context = rag_search(query) + + if "RAG search failed" in context: + # Fallback to general response + fallback_payload = { + "model": MODEL, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": message} + ], + "stream": False, + "max_tokens": 800 + } + fallback_response = await client.post(KSERVE_URL, json=fallback_payload, headers={"content-type": "application/json"}) + + if fallback_response.status_code == 200: + fallback_result = fallback_response.json() + fallback_content = fallback_result.get("choices", [{}])[0].get("message", {}).get("content", "") + await websocket.send(f"[GENERAL] {fallback_content}") + else: + await websocket.send("[ERROR] Could not generate response") + return + + # Generate response with context + final_payload = { + "model": MODEL, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": message}, + {"role": "system", "content": f"Use this documentation to answer:\n\n{context}"} + ], + "stream": False, + "max_tokens": 800 + } + + final_response = await client.post(KSERVE_URL, json=final_payload, headers={"content-type": "application/json"}) + + if final_response.status_code == 200: + final_result = final_response.json() + final_content = final_result.get("choices", [{}])[0].get("message", {}).get("content", "") + + if final_content.strip(): + # Ensure response starts with [KUBEFLOW] + if not final_content.startswith("[KUBEFLOW]"): + final_content = f"[KUBEFLOW] {final_content}" + await websocket.send(final_content) + else: + await websocket.send("[ERROR] Empty response from LLM") + else: + await websocket.send(f"[ERROR] Failed to generate response: {final_response.status_code}") + + except Exception as e: + await websocket.send(f"[ERROR] Processing failed: {e}") + + else: + # Handle direct responses (non-Kubeflow) + if direct_content.strip(): + # Ensure response starts with [GENERAL] + if not direct_content.startswith("[GENERAL]"): + direct_content = f"[GENERAL] {direct_content}" + await websocket.send(direct_content) + else: + await websocket.send("[ERROR] No response generated") + + except Exception as e: + await websocket.send(f"ERROR: Request failed: {e}") + +async def handle_websocket(websocket, path): + """Handle WebSocket connections with proper error handling""" + try: + print(f"New WebSocket connection from {websocket.remote_address}") + async for message in websocket: + print(f"Received message: {message[:100]}...") + await handle_chat(message, websocket) + except ConnectionClosedError: + print("WebSocket connection closed normally") + except Exception as e: + print(f"WebSocket error: {e}") + try: + await websocket.send(f"ERROR: Connection error: {str(e)}") + except: + pass + +async def health_check(path, request_headers): + """Handle HTTP health checks""" + if path == "/health": + return 200, [("Content-Type", "text/plain")], b"OK" + return None + +async def main(): + print(f"Starting RAG WebSocket server on 0.0.0.0:{PORT}") + print(f"Llama service: {KSERVE_URL}") + print(f"Milvus: {MILVUS_HOST}:{MILVUS_PORT}") + + # Configure logging to reduce noise + logging.getLogger("websockets").setLevel(logging.WARNING) + + async with serve( + handle_websocket, + "0.0.0.0", + PORT, + process_request=health_check, + ping_interval=None, # Disable ping/pong + ping_timeout=None + ): + print("WebSocket server is running...") + await asyncio.Future() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/server/deployment.yaml b/server/deployment.yaml new file mode 100644 index 0000000..dcdec4c --- /dev/null +++ b/server/deployment.yaml @@ -0,0 +1,96 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ws-proxy + labels: + app: ws-proxy + version: v1 +spec: + replicas: 1 + strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 0 + maxSurge: 1 + selector: + matchLabels: + app: ws-proxy + template: + metadata: + labels: + app: ws-proxy + version: v1 + spec: + containers: + - name: ws-proxy + image: santhoshtoorpu/ws-proxy:latest + imagePullPolicy: IfNotPresent # actually reduce pulls + resources: + requests: + cpu: "300m" + memory: "1Gi" + ephemeral-storage: "4Gi" + limits: + cpu: "1500m" + memory: "2Gi" + ephemeral-storage: "8Gi" + env: + # Match your app.py variable names + - name: KSERVE_URL + value: "http://llama.santhosh.svc.cluster.local/openai/v1/chat/completions" + - name: MODEL + value: "llama3.1-8B" + - name: PORT + value: "8000" + - name: PYTHONUNBUFFERED + value: "1" + + # Make cache paths explicit and writable (align with Dockerfile) + - name: HF_HOME + value: "/home/appuser/.cache/huggingface" + - name: TRANSFORMERS_CACHE + value: "/home/appuser/.cache/huggingface" + - name: SENTENCE_TRANSFORMERS_HOME + value: "/home/appuser/.cache/sentence_transformers" + - name: XDG_CACHE_HOME + value: "/home/appuser/.cache" + + ports: + - containerPort: 8000 + name: http + protocol: TCP + + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: ["ALL"] + + # Optional: mount an emptyDir to persist cache across container restarts + volumeMounts: + - name: hf-cache + mountPath: /home/appuser/.cache + + volumes: + - name: hf-cache + emptyDir: + sizeLimit: 5Gi diff --git a/server/requirements.txt b/server/requirements.txt new file mode 100644 index 0000000..fba0111 --- /dev/null +++ b/server/requirements.txt @@ -0,0 +1,13 @@ +# Core dependencies +websockets +httpx + +# Milvus client +pymilvus + +# Optimized ML dependencies - use lighter alternatives +sentence-transformers +torch --extra-index-url https://download.pytorch.org/whl/cpu + +# Essential only +numpy diff --git a/server/service.yaml b/server/service.yaml new file mode 100644 index 0000000..f7a058d --- /dev/null +++ b/server/service.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: Service +metadata: + name: ws-proxy + labels: + app: ws-proxy + version: v1 + annotations: + # Add service annotations for better observability + prometheus.io/scrape: "false" # Disable if no metrics endpoint +spec: + type: ClusterIP # Explicit service type + selector: + app: ws-proxy + ports: + - name: http + port: 80 + targetPort: 8000 + protocol: TCP + - name: websocket + port: 8000 + targetPort: 8000 + protocol: TCP + # Session affinity for websocket connections + sessionAffinity: ClientIP + sessionAffinityConfig: + clientIP: + timeoutSeconds: 3600 # 1 hour timeout for websocket sessions \ No newline at end of file