Skip to content

v0 initial commit #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ venv/
ENV/
env.bak/
venv.bak/
my_env/

# Spyder project settings
.spyderproject
Expand Down Expand Up @@ -205,3 +206,6 @@ cython_debug/
marimo/_static/
marimo/_lsp/
__marimo__/

#kfp
pipelines/github_rag_pipeline.yaml
2 changes: 0 additions & 2 deletions README.md

This file was deleted.

38 changes: 38 additions & 0 deletions manifests/inference-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: llama
namespace: santhosh
spec:
predictor:
model:
modelFormat:
name: huggingface
version: "1"
runtime: llm-runtime
args:
- --model_name=llama3.1-8B
- --model_id=RedHatAI/Llama-3.1-8B-Instruct
- --backend=vllm
- --max-model-len=32768
- --gpu-memory-utilization=0.90
- --enable-auto-tool-choice
- --tool-call-parser=llama3_json
- --enable-tool-call-parser
env:
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: huggingface-secret
key: token
- name: CUDA_VISIBLE_DEVICES
value: "0"
resources:
requests:
cpu: "4"
memory: "16Gi"
nvidia.com/gpu: "1"
limits:
cpu: "6"
memory: "24Gi"
nvidia.com/gpu: "1"
256 changes: 256 additions & 0 deletions manifests/milvus-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
apiVersion: v1
kind: Pod
metadata:
annotations:
sidecar.istio.io/inject: "false"
creationTimestamp: "2025-08-03T16:17:18Z"
generateName: milvus-standalone-final-5cb655b8d6-
generation: 1
labels:
app: milvus-standalone-final
pod-template-hash: 5cb655b8d6
name: milvus-standalone-final-5cb655b8d6-6ngrn
namespace: santhosh
ownerReferences:
- apiVersion: apps/v1
blockOwnerDeletion: true
controller: true
kind: ReplicaSet
name: milvus-standalone-final-5cb655b8d6
uid: 7859a7a3-5ff8-41af-8e07-a533a298b141
resourceVersion: "14225268"
uid: 804d95ff-f4e4-47a4-a7b3-d5c6edaa5042
spec:
containers:
- command:
- milvus
- run
- standalone
env:
- name: ETCD_ENDPOINTS
value: localhost:2379
- name: MINIO_ADDRESS
value: localhost:9000
- name: MINIO_ACCESS_KEY_ID
value: minioadmin
- name: MINIO_SECRET_ACCESS_KEY
value: minioadmin
image: milvusdb/milvus:v2.3.4
imagePullPolicy: IfNotPresent
name: milvus
ports:
- containerPort: 19530
protocol: TCP
- containerPort: 9091
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 9091
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: kube-api-access-2hjlp
readOnly: true
- command:
- etcd
- --advertise-client-urls=http://127.0.0.1:2379
- --listen-client-urls=http://0.0.0.0:2379
- --data-dir=/etcd-data
image: quay.io/coreos/etcd:v3.5.0
imagePullPolicy: IfNotPresent
name: etcd
ports:
- containerPort: 2379
protocol: TCP
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etcd-data
name: etcd-data
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: kube-api-access-2hjlp
readOnly: true
- command:
- minio
- server
- /minio-data
- --console-address
- :9001
env:
- name: MINIO_ROOT_USER
value: minioadmin
- name: MINIO_ROOT_PASSWORD
value: minioadmin
image: minio/minio:RELEASE.2023-03-20T20-16-18Z
imagePullPolicy: IfNotPresent
name: minio
ports:
- containerPort: 9000
protocol: TCP
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /minio-data
name: minio-data
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: kube-api-access-2hjlp
readOnly: true
dnsPolicy: ClusterFirst
enableServiceLinks: true
nodeName: 10.0.10.183
preemptionPolicy: PreemptLowerPriority
priority: 0
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: default
serviceAccountName: default
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 300
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 300
volumes:
- emptyDir: {}
name: etcd-data
- emptyDir: {}
name: minio-data
- name: kube-api-access-2hjlp
projected:
defaultMode: 420
sources:
- serviceAccountToken:
expirationSeconds: 3607
path: token
- configMap:
items:
- key: ca.crt
path: ca.crt
name: kube-root-ca.crt
- downwardAPI:
items:
- fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
path: namespace
status:
conditions:
- lastProbeTime: null
lastTransitionTime: "2025-08-03T16:17:30Z"
status: "True"
type: PodReadyToStartContainers
- lastProbeTime: null
lastTransitionTime: "2025-08-03T16:17:18Z"
status: "True"
type: Initialized
- lastProbeTime: null
lastTransitionTime: "2025-08-03T16:18:01Z"
status: "True"
type: Ready
- lastProbeTime: null
lastTransitionTime: "2025-08-03T16:18:01Z"
status: "True"
type: ContainersReady
- lastProbeTime: null
lastTransitionTime: "2025-08-03T16:17:18Z"
status: "True"
type: PodScheduled
containerStatuses:
- containerID: cri-o://974cb9b3a881ab0f7965bd5e31621686474764d8d01550ef3710e9b2058e48a7
image: quay.io/coreos/etcd:v3.5.0
imageID: quay.io/coreos/etcd@sha256:28759af54acd6924b2191dc1a1d096e2fa2e219717a21b9d8edf89717db3631b
lastState: {}
name: etcd
ready: true
resources: {}
restartCount: 0
started: true
state:
running:
startedAt: "2025-08-03T16:17:27Z"
user:
linux:
gid: 0
supplementalGroups:
- 0
uid: 0
volumeMounts:
- mountPath: /etcd-data
name: etcd-data
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: kube-api-access-2hjlp
readOnly: true
recursiveReadOnly: Disabled
- containerID: cri-o://b864b918e771fcb9e1fdb41baeb8f46f5024ecfd08f88679210603506682463b
image: docker.io/milvusdb/milvus:v2.3.4
imageID: docker.io/milvusdb/milvus@sha256:efd6ef720b6ad0de62d006319996ba18504842ffaa543e3b072aeb5963305907
lastState: {}
name: milvus
ready: true
resources: {}
restartCount: 0
started: true
state:
running:
startedAt: "2025-08-03T16:17:25Z"
user:
linux:
gid: 0
supplementalGroups:
- 0
uid: 0
volumeMounts:
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: kube-api-access-2hjlp
readOnly: true
recursiveReadOnly: Disabled
- containerID: cri-o://4a19f6821fa31b1d3f5db68a0b49f0df03cba496770ba4d46922cb3308ff781e
image: docker.io/minio/minio:RELEASE.2023-03-20T20-16-18Z
imageID: docker.io/minio/minio@sha256:6d770d7f255cda1f18d841ffc4365cb7e0d237f6af6a15fcdb587480cd7c3b93
lastState: {}
name: minio
ready: true
resources: {}
restartCount: 0
started: true
state:
running:
startedAt: "2025-08-03T16:17:29Z"
user:
linux:
gid: 0
supplementalGroups:
- 0
uid: 0
volumeMounts:
- mountPath: /minio-data
name: minio-data
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: kube-api-access-2hjlp
readOnly: true
recursiveReadOnly: Disabled
hostIP: 10.0.10.183
hostIPs:
- ip: 10.0.10.183
phase: Running
podIP: 10.0.10.93
podIPs:
- ip: 10.0.10.93
qosClass: BestEffort
startTime: "2025-08-03T16:17:18Z"
23 changes: 23 additions & 0 deletions manifests/serving-runtime.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
name: llm-runtime
namespace: santhosh
spec:
supportedModelFormats:
- name: huggingface
version: "1"
autoSelect: true
containers:
- name: kserve-container
image: kserve/huggingfaceserver:latest-gpu
command: ["python", "-m", "huggingfaceserver"]
resources:
requests:
cpu: "4"
memory: "16Gi"
nvidia.com/gpu: "1"
limits:
cpu: "6"
memory: "24Gi"
nvidia.com/gpu: "1"
Loading