diff --git a/.gitignore b/.gitignore
index 08519c7..dc79d9f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,10 @@ models
 generated
 .idea
 .DS_Store
+milvus/seed/data/*
+milvus/build/volumes/milvus/*data*
+*.venv
+*venv
 
 # UI assets
 **/node_modules
diff --git a/milvus/build/Containerfile b/milvus/build/Containerfile
new file mode 100644
index 0000000..779a32b
--- /dev/null
+++ b/milvus/build/Containerfile
@@ -0,0 +1,2 @@
+FROM docker.io/milvusdb/milvus:master-20240426-bed6363f
+ADD embedEtcd.yaml /milvus/configs/embedEtcd.yaml
diff --git a/milvus/build/Makefile b/milvus/build/Makefile
new file mode 100644
index 0000000..cdf6bea
--- /dev/null
+++ b/milvus/build/Makefile
@@ -0,0 +1,55 @@
+REGISTRY ?= quay.io
+REGISTRY_ORG ?= ai-lab
+COMPONENT = vector_dbs
+
+IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/milvus:latest
+
+ARCH ?= $(shell uname -m)
+PLATFORM ?= linux/$(ARCH)
+
+gRCP_PORT := 19530
+REST_PORT := 9091
+CLIENT_PORT := 2379
+
+LIB_MILVUS_DIR_MOUNTPATH := $(shell pwd)/volumes/milvus
+
+.PHONY: build
+build:
+	podman build --platform $(PLATFORM) -f Containerfile -t ${IMAGE} .
+
+.PHONY: run
+run:
+	podman run -d \
+		--name milvus-standalone \
+		--security-opt seccomp:unconfined \
+		-e ETCD_USE_EMBED=true \
+		-e ETCD_CONFIG_PATH=/milvus/configs/embedEtcd.yaml \
+		-e COMMON_STORAGETYPE=local \
+		-v $(LIB_MILVUS_DIR_MOUNTPATH):/var/lib/milvus \
+		-p $(gRCP_PORT):$(gRCP_PORT) \
+		-p $(REST_PORT):$(REST_PORT) \
+		-p $(CLIENT_PORT):$(CLIENT_PORT) \
+		--health-cmd="curl -f http://localhost:$(REST_PORT)/healthz" \
+		--health-interval=30s \
+		--health-start-period=90s \
+		--health-timeout=20s \
+		--health-retries=3 \
+		$(IMAGE) \
+		milvus run standalone 1> /dev/null
+
+.PHONY: stop
+stop:
+	-podman stop milvus-standalone
+
+.PHONY: delete
+delete:
+	-podman rm milvus-standalone -f
+
+.PHONY: podman-clean
+podman-clean:
+	@container_ids=$$(podman ps -a --format "{{.ID}} {{.Image}}" | awk '$$2 == "$(IMAGE)" {print $$1}'); \
+	echo "removing all containers with IMAGE=$(IMAGE)"; \
+    for id in $$container_ids; do \
+        echo "Removing container: $$id,"; \
+        podman rm -f $$id; \
+    done
diff --git a/milvus/build/embedEtcd.yaml b/milvus/build/embedEtcd.yaml
new file mode 100644
index 0000000..32954fa
--- /dev/null
+++ b/milvus/build/embedEtcd.yaml
@@ -0,0 +1,5 @@
+listen-client-urls: http://0.0.0.0:2379
+advertise-client-urls: http://0.0.0.0:2379
+quota-backend-bytes: 4294967296
+auto-compaction-mode: revision
+auto-compaction-retention: '1000'
diff --git a/milvus/build/volumes/Containerfile b/milvus/build/volumes/Containerfile
new file mode 100644
index 0000000..779a32b
--- /dev/null
+++ b/milvus/build/volumes/Containerfile
@@ -0,0 +1,2 @@
+FROM docker.io/milvusdb/milvus:master-20240426-bed6363f
+ADD embedEtcd.yaml /milvus/configs/embedEtcd.yaml
diff --git a/milvus/build/volumes/Makefile b/milvus/build/volumes/Makefile
new file mode 100644
index 0000000..1113215
--- /dev/null
+++ b/milvus/build/volumes/Makefile
@@ -0,0 +1,55 @@
+REGISTRY ?= quay.io
+REGISTRY_ORG ?= ai-lab
+COMPONENT = vector_dbs
+
+IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/milvus:latest
+
+ARCH ?= $(shell uname -m)
+PLATFORM ?= linux/$(ARCH)
+
+gRCP_PORT := 19530
+REST_PORT := 9091
+CLIENT_PORT := 2379
+
+LIB_MILVUS_DIR_MOUNTPATH := $(shell pwd)/volumes/milvus
+
+.PHONY: build
+build:
+	podman build --platform $(PLATFORM) -f Containerfile -t ${IMAGE} .
+
+.PHONY: run
+run:
+	podman run -it \
+		--name milvus-standalone \
+		--security-opt seccomp:unconfined \
+		-e ETCD_USE_EMBED=true \
+		-e ETCD_CONFIG_PATH=/milvus/configs/embedEtcd.yaml \
+		-e COMMON_STORAGETYPE=local \
+		-v $(LIB_MILVUS_DIR_MOUNTPATH):/var/lib/milvus \
+		-p $(gRCP_PORT):$(gRCP_PORT) \
+		-p $(REST_PORT):$(REST_PORT) \
+		-p $(CLIENT_PORT):$(CLIENT_PORT) \
+		--health-cmd="curl -f http://localhost:$(REST_PORT)/healthz" \
+		--health-interval=30s \
+		--health-start-period=90s \
+		--health-timeout=20s \
+		--health-retries=3 \
+		$(IMAGE) \
+		milvus run standalone 1> /dev/null
+
+.PHONY: stop
+stop:
+	-podman stop milvus-standalone
+
+.PHONY: delete
+delete:
+	-podman rm milvus-standalone -f
+
+.PHONY: podman-clean
+podman-clean:
+	@container_ids=$$(podman ps --format "{{.ID}} {{.Image}}" | awk '$$2 == "$(IMAGE)" {print $$1}'); \
+	echo "removing all containers with IMAGE=$(IMAGE)"; \
+    for id in $$container_ids; do \
+        echo "Removing container: $$id,"; \
+        podman rm -f $$id; \
+    done
diff --git a/milvus/build/volumes/embedEtcd.yaml b/milvus/build/volumes/embedEtcd.yaml
new file mode 100644
index 0000000..32954fa
--- /dev/null
+++ b/milvus/build/volumes/embedEtcd.yaml
@@ -0,0 +1,5 @@
+listen-client-urls: http://0.0.0.0:2379
+advertise-client-urls: http://0.0.0.0:2379
+quota-backend-bytes: 4294967296
+auto-compaction-mode: revision
+auto-compaction-retention: '1000'
diff --git a/milvus/build/volumes/milvus/.gitkeep b/milvus/build/volumes/milvus/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/milvus/seed/.env.example b/milvus/seed/.env.example
new file mode 100644
index 0000000..798bc46
--- /dev/null
+++ b/milvus/seed/.env.example
@@ -0,0 +1,3 @@
+MODEL_NAME=
+MODEL_ENDPOINT=
+MODEL_TOKEN=
\ No newline at end of file
diff --git a/milvus/seed/README.md b/milvus/seed/README.md
new file mode 100644
index 0000000..5df33a2
--- /dev/null
+++ b/milvus/seed/README.md
@@ -0,0 +1,29 @@
+RAG application with ILAB
+
+1. setup a vector DB (Milvus)
+
+Development story:
+    0. Starting Goal:
+        - Naive RAG no KG aided
+        - Addition: 
+    1. identify what the model lacks knowledge in 
+    2. Can I use the interal trained model or do I have to use the HF model
+        - 
+
+- UI integration
+
+-----------------------------------------------
+
+variable definition
+class Config
+
+_identify_params, 
+_llm_type, _extract_token_usage, 
+
+Inherint in defining this spec which could eventually live as a contribution to langchain are some assumptions / questions I made:
+    - Is the model serializable: Assumed no
+    - Max tokens for merlinite and granite: Both assumed 4096
+    - Does this model have attention / memmory?
+    - Does these models have a verbosity option for output?
+    - Recomended default values:
+        - 
\ No newline at end of file
diff --git a/milvus/seed/__pycache__/ilab_model.cpython-311.pyc b/milvus/seed/__pycache__/ilab_model.cpython-311.pyc
new file mode 100644
index 0000000..2b8da03
Binary files /dev/null and b/milvus/seed/__pycache__/ilab_model.cpython-311.pyc differ
diff --git a/milvus/seed/__pycache__/merlinite_model.cpython-311.pyc b/milvus/seed/__pycache__/merlinite_model.cpython-311.pyc
new file mode 100644
index 0000000..19d0734
Binary files /dev/null and b/milvus/seed/__pycache__/merlinite_model.cpython-311.pyc differ
diff --git a/milvus/seed/client.py b/milvus/seed/client.py
new file mode 100644
index 0000000..53d1e4e
--- /dev/null
+++ b/milvus/seed/client.py
@@ -0,0 +1,66 @@
+import requests
+import json
+import os
+from ilab_model import IlabLLM
+from dotenv import load_dotenv
+from langchain_core.prompts import PromptTemplate
+from langchain.chains import LLMChain
+
+load_dotenv()
+
+# manage ENV
+model_endpoint=os.getenv('MODEL_ENDPOINT')
+if model_endpoint == "":
+    model_endpoint = "http://localhost:8001"
+
+model_name=os.getenv('MODEL_NAME')
+if model_name == "":
+    model_name = "ibm/merlinite-7b"
+
+model_token=os.getenv('ILAB_API_TOKEN')
+
+# HTTPS client
+# client_key_path = "/home/fedora/client-tls-key.pem2"
+# client_crt_path = "/home/fedora/client-tls-crt.pem2"
+# server_ca_crt   = "/home/fedora/server-ca-crt.pem2"
+
+# ssl_context = ssl.create_default_context(cafile=server_ca_crt)
+# ssl_context.load_cert_chain(certfile=client_crt_path, keyfile=client_key_path)
+
+# client = httpx.Client(verify=ssl_context)
+
+# data = {
+#     "model": "instructlab/granite-7b-lab",
+#     "messages": [
+#         {"role": "system", "content": "your name is carl"},
+#         {"role": "user", "content": "what is your name?"}
+#     ],
+#     "temperature": 1,
+#     "max_tokens": 1792,
+#     "top_p": 1,
+#     "repetition_penalty": 1.05,
+#     "stop": ["<|endoftext|>"],
+#     "logprobs": False,
+#     "stream": False
+# }
+
+# response = requests.post(url, headers=headers, data=json.dumps(data), verify=False)
+# print(response.json())
+print(f'model_name={model_name}')
+llm = IlabLLM(
+    model_endpoint=model_endpoint,
+    model_name=model_name,
+    apikey=model_token,
+    temperature=1,
+    max_tokens=500,
+    top_p=1,
+    repetition_penalty=1.05,
+    stop=["<|endoftext|>"],
+    streaming=False
+)
+
+prompt="I am training for a marathon in 12 weeks. Can you help me build an exercise plan to help prepare myself?"
+prompts=[prompt]
+# prompt_template = PromptTemplate.from_template(prompt)
+llm.generate(prompts)
+# llm.invoke("dog")
diff --git a/milvus/seed/dumb_client.py b/milvus/seed/dumb_client.py
new file mode 100644
index 0000000..e08c912
--- /dev/null
+++ b/milvus/seed/dumb_client.py
@@ -0,0 +1,40 @@
+import requests
+import json
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# manage ENV
+model_endpoint=os.getenv('MODEL_ENDPOINT')
+if model_endpoint == "":
+    model_endpoint = "http://localhost:8001"
+
+model_name=os.getenv('MODEL_NAME')
+if model_name == "":
+    model_name = "ibm/merlinite-7b"
+
+model_token=os.getenv('MODEL_TOKEN')
+
+headers = {
+    "Content-Type": "application/json",
+    "Authorization": f"Bearer {model_token}"
+}
+
+data = {
+    "model": model_name,
+    "messages": [
+        {"role": "system", "content": "your name is carl"},
+        {"role": "user", "content": "what is your name?"}
+    ],
+    "temperature": 1,
+    "max_tokens": 1792,
+    "top_p": 1,
+    "repetition_penalty": 1.05,
+    "stop": ["<|endoftext|>"],
+    "logprobs": False,
+    "stream": False
+}
+
+response = requests.post(model_endpoint, headers=headers, data=json.dumps(data), verify=False)
+print(response.json())
\ No newline at end of file
diff --git a/milvus/seed/ilab_model.py b/milvus/seed/ilab_model.py
new file mode 100644
index 0000000..bc0b009
--- /dev/null
+++ b/milvus/seed/ilab_model.py
@@ -0,0 +1,372 @@
+#!/bin/python3
+
+## This is a langchain compatabible implementation for the Ilab models. It will remain in this repo until we publish APIKey 
+## functionality and route backendservice endpoints through a proxy that can be exposed, similary to openAI. At which point
+## we can move this pr as a contribution to langchain and easily scale our usage!
+
+### Fixes in progress: 
+    ### - override self params with calls invoke or generate for temperature, etc.
+    ### - test that invoke works, generate starts
+    ### - Feat: streaming implementation
+    ### - Callbacks with streaming
+    ### - Authentication enablement via user and password rather than just API keys
+    ### - Authentication checking for API keys (whole backend API setup)
+    ### - Utilize tags and metadata with langserve
+    ### - Allow logprobs as an option
+
+import os
+import httpx
+import requests
+import json
+from langchain_core.language_models.llms import BaseLLM
+from dotenv import load_dotenv
+from langchain_core.outputs import Generation, LLMResult
+from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
+from langchain_core.utils import (
+    convert_to_secret_str,
+    get_from_dict_or_env,
+    get_pydantic_field_names,
+)
+from langchain_core.utils.utils import build_extra_kwargs
+
+load_dotenv()
+from typing import (
+    Any,
+    Dict,
+    List,
+    Set,
+    Optional,
+    Mapping
+)
+
+class IlabLLM(BaseLLM):
+    """
+    Instructlab large language model.
+
+    As this model is currently private, you must have pre-arranged access.
+    """
+
+    # REQUIRED PARAMS
+
+    model_endpoint: str = ""
+    """The model Endpoint to Use"""
+
+    model_name: str = Field(alias="model")
+    """Type of deployed model to use."""
+
+    # OPTIONAL BUT DEFAULTS
+
+    system_prompt: Optional[str] = "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
+    """Default system prompt to use."""
+
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Holds any model parameters valid for `create` call not explicitly specified."""
+
+    max_tokens: int = 4096
+    """The maximum number of tokens to generate in the completion.
+    -1 returns as many tokens as possible given the prompt and
+    the models maximal context size."""
+
+    # TOTALLY OPTIONAL
+
+    apikey: Optional[SecretStr] = None
+    """Apikey to the Ilab model APIs (merlinte or granite)"""
+
+    top_p: Optional[float] = 1
+    """Total probability mass of tokens to consider at each step."""
+
+    frequency_penalty: Optional[float] = 0
+    """Penalizes repeated tokens according to frequency."""
+
+    repetition_penalty: Optional[float] = 0
+    """Penalizes repeated tokens."""
+
+    temperature: Optional[float] = 0.7
+    """What sampling temperature to use."""
+
+    # verbose: Optional[str] = None
+    # """If the model should return verbose output or standard"""
+
+    streaming: bool = False
+    """ Whether to stream the results or not. """
+
+    # FUTURE EXTENSIONS
+
+    tags: Optional[List[str]] = None
+    """Tags to add to the run trace."""
+
+    metadata: Optional[Dict[str, Any]] = None
+    """Metadata to add to the run trace."""
+
+    # This gets implemented with stream
+    # callbacks: Optional[SecretStr] = None
+    # """callbacks"""
+
+    # END PARMS
+
+    class Config:
+        """Configuration for this pydantic object."""
+        allow_population_by_field_name = True
+
+    @property
+    def lc_secrets(self) -> Dict[str, str]:
+        """A map of constructor argument names to secret ids.
+
+        For example:
+            {
+                "apikey": "ILAB_API_KEY",
+            }
+        """
+        return {
+            "apikey": "ILAB_API_KEY",
+        }
+    
+    @classmethod
+    def is_lc_serializable(cls) -> bool:
+        """Return whether this model can be serialized by Langchain."""
+        return False
+    
+    @root_validator(pre=True)
+    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Build extra kwargs from additional params that were passed in."""
+        all_required_field_names = get_pydantic_field_names(cls)
+        extra = values.get("model_kwargs", {})
+        values["model_kwargs"] = build_extra_kwargs(
+            extra, values, all_required_field_names
+        )
+        return values
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        if values["streaming"] == True:
+            raise ValueError("streaming has not yet been implemented.")
+        if values["apikey"] or "ILAB_API_KEY" in os.environ:
+            values["apikey"] = convert_to_secret_str(
+                get_from_dict_or_env(values, "apikey", "ILAB_API_KEY")
+            )
+        values['model_name'] = get_from_dict_or_env(
+            values,
+            "model_name",
+            "MODEL_NAME",
+        )
+        ## extension for more options for required auth params
+        ## client_params = {
+        ##     "api_key": (
+        ##         values["apikey"].get_secret_value()
+        ##         if values["apikey"]
+        ##         else None
+        ##     )
+        ## }
+        # CURRENTLY WE DONT CHECK KEYS
+        ## if not client_params['values']['apikey']:
+        ##     raise ValueError("Did not find token `apikey`.")
+        return  values
+
+    @property
+    def _params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        params = {**{
+            "model_name": self.model_name,
+            "model_endpoint": self.model_endpoint,
+        }, **self._default_params}
+        if self.apikey:
+            params['apikey'] = self.apikey
+        if self.model_name:
+            params['model_name'] = self.model_name
+        return params
+    
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling Merlinite API."""
+        normal_params: Dict[str, Any] = {
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "frequency_penalty": self.frequency_penalty,
+            "presence_penalty": self.repetition_penalty,
+        }
+
+        if self.max_tokens is not None:
+            normal_params["max_tokens"] = self.max_tokens
+
+        return {**normal_params, **self.model_kwargs}
+    
+
+    def _invocation_params(self) -> Dict[str, Any]:
+        """Get the parameters used to invoke the model."""
+        return self._params
+    
+    def make_request(self, params: Dict[str, Any], prompt: str, stop: Optional[List[str]]) -> Dict[str, Any]:
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.apikey}"
+        }
+        
+        data = {
+            "model": params['model_name'],
+            "messages": [
+                {
+                    "role": "system",
+                    "content": self.system_prompt
+                },
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            "temperature": params['temperature'],
+            "max_tokens": params['max_tokens'],
+            "top_p": params['top_p'],
+            "stop": stop,
+            "logprobs": False,
+        }
+
+        if 'repetition_penalty' in params:
+            data["repetition_penalty"] = params['repetition_penalty']
+
+        if 'streaming' in params:
+            # Shadowing basemodel re-route for streaming
+            data["stream"] = params["streaming"]
+
+        response = requests.post(self.model_endpoint, headers=headers, data=json.dumps(data), verify=False)
+        response_json = response.json()
+    
+    def _call(self, prompt: str, stop:Optional[List[str]] = None, **kwargs: Any) -> str:
+        """Call the ilab inference endpoint. The result of invoke.
+        Args:
+            prompt: The prompt to pass into the model.
+            stop: Optional list of stop words to use when generating.
+            run_manager: Optional callback manager.
+        Returns:
+            The string generated by the model.
+        Example:
+            .. code-block:: python
+
+                response = merlinite.invoke("What is a molecule")
+        """
+
+        invocation_params = self._invocation_params
+        params = {**invocation_params, **kwargs}
+
+        if stop == None:
+            stop = ["<|endoftext|>"]
+        response_json = self.make_request(
+            params=params, prompt=prompt, stop=stop, **kwargs
+        )
+        return response_json['choices'][0]['messages']['content']
+
+    def _generate(
+        self,
+        prompts: List[str],
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Call out to Ilab's endpoint with prompt.
+
+        Args:
+            prompt: The prompt to pass into the model.
+            stop: Optional list of stop words to use when generating.
+            
+        Returns:
+            The full LLM output.
+
+        Example:
+            .. code-block:: python
+
+                response = ilab.generate(["Tell me a joke."])
+        """
+        
+        invocation_params = self._invocation_params()
+        params = {**invocation_params, **kwargs}
+        token_usage: Dict[str, int] = {}
+        system_fingerprint: Optional[str] = None
+
+        response_json = self.make_request(
+            params=params, prompt=prompts[0], stop=stop, **kwargs
+        )
+
+        if not ('choices' in response_json and len(response_json['choices']) > 0):
+            raise ValueError("No valid response from the model")
+
+        if response_json.get("error"):
+            raise ValueError(response_json.get("error"))
+
+        if not system_fingerprint:
+            system_fingerprint = response_json.get("system_fingerprint")
+        return self._create_llm_result(
+            response_json=response_json,
+        )
+
+    def _llm_type(self) -> str:
+        """Get the type of language model used by this chat model. Used for logging purposes only."""
+        return "instructlab"
+
+    @property
+    def max_context_size(self) -> int:
+        """Get max context size for this model."""
+        return self.modelname_to_contextsize(self.model_name)
+
+    def _create_llm_result(self, response: List[dict]) -> LLMResult:
+        """Create the LLMResult from the choices and prompt."""
+        generations = []
+        for res in response:
+            results = res.get("results")
+            if results:
+                finish_reason = results[0].get("choices")[0].get('finished_reason')
+                gen = Generation(
+                    text=results[0].get("choices")[0].get('message').get('content'),
+                    generation_info={"finish_reason": finish_reason},
+                )
+                generations.append([gen])
+        final_token_usage = self._extract_token_usage(response)
+        llm_output = {
+            "token_usage": final_token_usage,
+            "model_name": self.model_name
+        }
+        return LLMResult(generations=generations, llm_output=llm_output)
+
+    @staticmethod
+    def _extract_token_usage(
+        response: Optional[List[Dict[str, Any]]] = None,
+    ) -> Dict[str, Any]:
+        if response is None:
+            return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+
+        prompt_tokens = 0
+        completion_tokens = 0
+        total_tokens = 0
+
+        def get_count_value(key: str, result: Dict[str, Any]) -> int:
+            return result.get(key, 0) or 0
+
+        for res in response:
+            results = res.get("results")
+            if results:
+                prompt_tokens += get_count_value("prompt_tokens", results[0])
+                completion_tokens += get_count_value(
+                    "completion_tokens", results[0]
+                )
+                total_tokens += get_count_value("total_tokens", results[0])
+
+        return {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens
+        }
+
+    @staticmethod
+    def modelname_to_contextsize(modelname: str) -> int:
+        """Calculate the maximum number of tokens possible to generate for a model."""
+        model_token_mapping = {
+            "ibm/merlinite-7b": 4096,
+            "instructlab/granite-7b-lab": 4096
+        }
+
+        context_size = model_token_mapping.get(modelname, None)
+
+        if context_size is None:
+            raise ValueError(
+                f"Unknown model: {modelname}. Please provide a valid Ilab model name."
+                "Known models are: " + ", ".join(model_token_mapping.keys())
+            )
+
+        return context_size
diff --git a/milvus/seed/new_seed.py b/milvus/seed/new_seed.py
new file mode 100644
index 0000000..60311c7
--- /dev/null
+++ b/milvus/seed/new_seed.py
@@ -0,0 +1,41 @@
+import os
+from pymilvus import MilvusClient, DataType
+from langchain_community.vectorstores import Milvus
+from langchain_experimental.text_splitter import SemanticChunker
+from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
+from langchain import hub
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+from tika import parser # pip install tika
+from langchain_openai import OpenAI
+from ilab_models import IlabOpenAILLM
+
+
+def log_step(step_num, step_name) -> None:
+    print("-----------------------------------------------")
+    print(f"{step_num}. {step_name}")
+    print("-----------------------------------------------")
+
+embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+
+text_splitter = SemanticChunker(embeddings=embeddings) # fails 
+
+loader = PyPDFLoader('./data/DnD-5e-Handbook.pdf')
+data = loader.load()
+split_data = text_splitter.split_documents(data)
+print(len(split_data))
+vector_store = Milvus.from_documents(
+    documents=split_data,
+    embedding=embeddings,
+    connection_args={"host": "localhost", "port": 19530},
+    collection_name="dnd"
+)
+
+llm = IlabOpenAILLM(
+    
+)
+
+retreiver = vector_store.as_retreiver()
+prompt = hub.pull("rlm/rag-prompt")
\ No newline at end of file
diff --git a/milvus/seed/requirements-lock.txt b/milvus/seed/requirements-lock.txt
new file mode 100644
index 0000000..0d2c865
--- /dev/null
+++ b/milvus/seed/requirements-lock.txt
@@ -0,0 +1,78 @@
+aiohttp==3.9.5
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.3.0
+attrs==23.2.0
+beautifulsoup4==4.12.3
+bs4==0.0.2
+certifi==2024.2.2
+charset-normalizer==3.3.2
+dataclasses-json==0.6.6
+distro==1.9.0
+environs==9.5.0
+filelock==3.14.0
+frozenlist==1.4.1
+fsspec==2024.5.0
+grpcio==1.63.0
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
+huggingface-hub==0.23.1
+idna==3.7
+Jinja2==3.1.4
+joblib==1.4.2
+jsonpatch==1.33
+jsonpointer==2.4
+langchain==0.2.1
+langchain-community==0.2.1
+langchain-core==0.2.1
+langchain-experimental==0.0.59
+langchain-openai==0.1.7
+langchain-text-splitters==0.2.0
+langsmith==0.1.63
+MarkupSafe==2.1.5
+marshmallow==3.21.2
+milvus-lite==2.4.5
+mpmath==1.3.0
+multidict==6.0.5
+mypy-extensions==1.0.0
+networkx==3.3
+numpy==1.26.4
+openai==1.30.3
+orjson==3.10.3
+packaging==23.2
+pandas==2.2.2
+pillow==10.3.0
+protobuf==5.27.0
+pydantic==2.7.1
+pydantic_core==2.18.2
+pymilvus==2.4.3
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+pytz==2024.1
+PyYAML==6.0.1
+regex==2024.5.15
+requests==2.32.2
+safetensors==0.4.3
+scikit-learn==1.5.0
+scipy==1.13.1
+sentence-transformers==2.7.0
+six==1.16.0
+sniffio==1.3.1
+soupsieve==2.5
+SQLAlchemy==2.0.30
+sympy==1.12
+tenacity==8.3.0
+threadpoolctl==3.5.0
+tika==2.6.0
+tiktoken==0.7.0
+tokenizers==0.19.1
+torch==2.3.0
+tqdm==4.66.4
+transformers==4.41.1
+typing-inspect==0.9.0
+typing_extensions==4.12.0
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.1
+yarl==1.9.4
diff --git a/milvus/seed/requirements.txt b/milvus/seed/requirements.txt
new file mode 100644
index 0000000..431c4f8
--- /dev/null
+++ b/milvus/seed/requirements.txt
@@ -0,0 +1,10 @@
+pymilvus==2.4.3
+langchain==0.2.1
+langchain-community==0.2.1
+langchain-core==0.2.1
+langchain-openai==0.1.7
+langchain-experimental==0.0.59
+tika==2.6.0
+sentence-transformers==2.7.0
+beautifulsoup4==4.12.3
+python-dotenv==1.0.1
diff --git a/milvus/seed/seed.py b/milvus/seed/seed.py
new file mode 100644
index 0000000..044158e
--- /dev/null
+++ b/milvus/seed/seed.py
@@ -0,0 +1,83 @@
+import os
+from pymilvus import MilvusClient, DataType
+from langchain_community.vectorstores import Milvus
+from langchain_experimental.text_splitter import SemanticChunker
+from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
+from langchain import hub
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+from tika import parser # pip install tika
+
+
+def log_step(step_num, step_name) -> None:
+    print("-----------------------------------------------")
+    print(f"{step_num}. {step_name}")
+    print("-----------------------------------------------")
+
+def milvus_init() -> MilvusClient:
+    client = MilvusClient()
+    if not client.has_connection('dnd'):
+        client.drop_connection('dnd')
+    return client
+
+def fill_dnd_collection(text_splitter: any, embeddings: any) -> None:
+    # local
+    # raw = parser.from_file("data/DnD-5e-Handbook.pdf")
+    # print(len(raw['content']))
+    # docs = text_splitter.create_documents([raw['content']])
+    # vector_store = Milvus.from_documents(
+    #     docs,
+    #     embedding=embeddings,
+    #     connection_args={"host": "localhost", "port": 19530},
+    #     collection_name="dnd"
+    # )
+    # remote
+    loader = PyPDFLoader('https://orkerhulen.dk/onewebmedia/DnD%205e%20Players%20Handbook%20%28BnW%20OCR%29.pdf')
+    data = loader.load()
+    split_data = text_splitter.split_documents(data)
+    vector_store = Milvus.from_documents(
+        documents=split_data,
+        embedding=embeddings,
+        connection_args={"host": "localhost", "port": 19530},
+        collection_name="dnd"
+    )
+
+def generate_embeddings() -> any:
+    # model_name = "ibm/merlinite-7b"
+    # model_kwargs={"device": "cuda"},
+    # encode_kwargs = {"device": "cuda", "batch_size": 100, "normalize_embeddings": True}
+    model_name = "all-MiniLM-L6-v2"
+    model_kwargs = {"device": "cpu"}
+    encode_kwargs = {"normalize_embeddings": True}
+    embeddings = HuggingFaceBgeEmbeddings(
+        model_name=model_name,
+        # model_kwargs=model_kwargs,
+        encode_kwargs=encode_kwargs,
+        query_instruction = "search_query:",
+        embed_instruction = "search_document:"
+    )
+
+def generate_text_splitter(chunk_size=512, chunk_overlap=50) -> any:
+    # text_splitter = SemanticChunker(embeddings=embeddings) # fails 
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        length_function=len,
+        is_separator_regex=False
+    )
+    return text_splitter 
+
+log_step(0, "Generate embeddings")
+embeddings = generate_embeddings()
+log_step(1, "Init text splitter")
+text_splitter = generate_text_splitter()
+log_step(2, "Read Raw data from PDF")
+log_step(3, "Text splitting")
+log_step(4, "Log result")
+fill_dnd_collection(embeddings=embeddings, text_splitter=text_splitter)
+
+
+# retreiver = vector_store.as_retreiver()
+# prompt = hub.pull("rlm/rag-prompt")
\ No newline at end of file
diff --git a/ui/compose.ui b/ui/compose.ui
index 1a1c761..6428e11 100644
--- a/ui/compose.ui
+++ b/ui/compose.ui
@@ -81,3 +81,27 @@ services:
           devices:
             - driver: nvidia
               capabilities: [gpu]
+  
+  milvus:
+    container_name: milvus-standalone
+    image: quay.io/ai-lab/vector_dbs/milvus@sha256:7d0be442cbcafeebdb056c56b16f7f7fe96b235d7a82cfdd57edc302042d00eb
+    security_opt:
+      - seccomp:unconfined
+    environment:
+      ETCD_USE_EMBED: "true"
+      ETCD_CONFIG_PATH: "/milvus/configs/embedEtcd.yaml"
+      COMMON_STORAGETYPE: "local"
+    volumes:
+      - /home/fedora/milvus-volume:/var/lib/milvus
+      - /home/fedora/instruct-lab-bot/milvus/seed:data/milvus/seed
+    ports:
+      - 19530:19530
+      - 9091:9091
+      - 2379:2379
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:9091/healthz || exit 1"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+      start_period: 90s
+    command: milvus run standalone