diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 094d7cac..f9dbedb4 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1 @@
-@ehhuang @ashwinb @raghotham @reluctantfuturist
+@ehhuang @ashwinb @raghotham @reluctantfuturist @leseb
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
deleted file mode 100644
index dd3f6e2b..00000000
--- a/.github/workflows/ci.yml
+++ /dev/null
@@ -1,80 +0,0 @@
-name: CI
-on:
-  push:
-    branches-ignore:
-      - 'generated'
-      - 'codegen/**'
-      - 'integrated/**'
-      - 'stl-preview-head/**'
-      - 'stl-preview-base/**'
-  pull_request:
-    branches-ignore:
-      - 'stl-preview-head/**'
-      - 'stl-preview-base/**'
-
-jobs:
-  lint:
-    timeout-minutes: 10
-    name: lint
-    runs-on: ${{ github.repository == 'stainless-sdks/llama-stack-client-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install Rye
-        run: |
-          curl -sSf https://rye.astral.sh/get | bash
-          echo "$HOME/.rye/shims" >> $GITHUB_PATH
-        env:
-          RYE_VERSION: '0.44.0'
-          RYE_INSTALL_OPTION: '--yes'
-
-      - name: Install dependencies
-        run: rye sync --all-features
-
-      - name: Run lints
-        run: ./scripts/lint
-
-  upload:
-    if: github.repository == 'stainless-sdks/llama-stack-client-python'
-    timeout-minutes: 10
-    name: upload
-    permissions:
-      contents: read
-      id-token: write
-    runs-on: depot-ubuntu-24.04
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Get GitHub OIDC Token
-        id: github-oidc
-        uses: actions/github-script@v6
-        with:
-          script: core.setOutput('github_token', await core.getIDToken());
-
-      - name: Upload tarball
-        env:
-          URL: https://pkg.stainless.com/s
-          AUTH: ${{ steps.github-oidc.outputs.github_token }}
-          SHA: ${{ github.sha }}
-        run: ./scripts/utils/upload-artifact.sh
-
-  test:
-    timeout-minutes: 10
-    name: test
-    runs-on: ${{ github.repository == 'stainless-sdks/llama-stack-client-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install Rye
-        run: |
-          curl -sSf https://rye.astral.sh/get | bash
-          echo "$HOME/.rye/shims" >> $GITHUB_PATH
-        env:
-          RYE_VERSION: '0.44.0'
-          RYE_INSTALL_OPTION: '--yes'
-
-      - name: Bootstrap
-        run: ./scripts/bootstrap
-
-      - name: Run tests
-        run: ./scripts/test
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 00000000..8bcc292a
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,25 @@
+name: Pre-commit
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
+
+      - name: Set up Python
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
+        with:
+          python-version: '3.12'
+          cache: pip
+          cache-dependency-path: |
+            **/requirements*.txt
+            .pre-commit-config.yaml
+
+      - uses: pre-commit/action@v3.0.1
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
deleted file mode 100644
index 1ac94793..00000000
--- a/.github/workflows/release-doctor.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: Release Doctor
-on:
-  pull_request:
-    branches:
-      - main
-  workflow_dispatch:
-
-jobs:
-  release_doctor:
-    name: release doctor
-    runs-on: ubuntu-latest
-    if: github.repository == 'llamastack/llama-stack-client-python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Check release environment
-        run: |
-          bash ./bin/check-release-environment
-        env:
-          PYPI_TOKEN: ${{ secrets.LLAMA_STACK_CLIENT_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..b55a0a86
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,60 @@
+exclude: 'build/'
+
+default_language_version:
+    python: python3
+
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0  # Latest stable version
+    hooks:
+    -   id: check-merge-conflict
+    -   id: check-added-large-files
+        args: ['--maxkb=1000']
+    -   id: end-of-file-fixer
+        exclude: '^(.*\.svg)$'
+
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.4
+    hooks:
+    -   id: ruff
+        files: ^src/llama_stack_client/lib/.*
+        args: [
+            --fix,
+            --exit-non-zero-on-fix
+        ]
+    -   id: ruff-format
+        files: ^src/llama_stack_client/lib/.*
+
+-   repo: https://github.com/adamchainz/blacken-docs
+    rev: 1.19.0
+    hooks:
+    -   id: blacken-docs
+        files: ^src/llama_stack_client/lib/.*
+        additional_dependencies:
+        - black==24.3.0
+
+# -   repo: https://github.com/pre-commit/mirrors-mypy
+#     rev: v1.14.0
+#     hooks:
+#     -   id: mypy
+#         additional_dependencies:
+#           - types-requests
+#           - types-setuptools
+#           - pydantic
+#         args: [--ignore-missing-imports]
+
+# - repo: https://github.com/jsh9/pydoclint
+#   rev: d88180a8632bb1602a4d81344085cf320f288c5a
+#   hooks:
+#     - id: pydoclint
+#       args: [--config=pyproject.toml]
+
+# - repo: https://github.com/tcort/markdown-link-check
+#   rev: v3.11.2
+#   hooks:
+#     - id: markdown-link-check
+#       args: ['--quiet']
+
+ci:
+    autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
+    autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
diff --git a/.python-version b/.python-version
index 43077b24..e4fba218 100644
--- a/.python-version
+++ b/.python-version
@@ -1 +1 @@
-3.9.18
+3.12
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index aaf968a1..17473a20 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
   ".": "0.1.0-alpha.3"
-}
\ No newline at end of file
+}
diff --git a/Brewfile b/Brewfile
index 492ca37b..fc55cbe7 100644
--- a/Brewfile
+++ b/Brewfile
@@ -1,2 +1 @@
 brew "rye"
-
diff --git a/README.md b/README.md
index 9e1e26ab..62812e1a 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1,85 @@
 # Llama Stack Client Python API library
 
-[![PyPI version](<https://img.shields.io/pypi/v/llama_stack_client.svg?label=pypi%20(stable)>)](https://pypi.org/project/llama_stack_client/)
+[![PyPI version](https://img.shields.io/pypi/v/llama_stack_client.svg)](https://pypi.org/project/llama_stack_client/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack-client)](https://pypi.org/project/llama-stack-client/)
+[![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack)
 
-The Llama Stack Client Python library provides convenient access to the Llama Stack Client REST API from any Python 3.8+
+The Llama Stack Client Python library provides convenient access to the Llama Stack Client REST API from any Python 3.7+
 application. The library includes type definitions for all request params and response fields,
 and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx).
 
-It is generated with [Stainless](https://www.stainless.com/).
+It is generated with [Stainless](https://www.stainlessapi.com/).
 
 ## Documentation
 
-The REST API documentation can be found on [llama-stack.readthedocs.io](https://llama-stack.readthedocs.io/en/latest/). The full API of this library can be found in [api.md](api.md).
+For starting up a Llama Stack server, please checkout our guides in our [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/docs/resources/llama-stack-spec.html) repo.
+
+The REST API documentation can be found on our [llama-stack OpenAPI spec](https://github.com/meta-llama/llama-stack/blob/main/docs/resources/llama-stack-spec.html). The full API of this library can be found in [api.md](api.md).
+
+You can find more example apps with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) repo.
 
 ## Installation
 
 ```sh
-# install from the production repo
-pip install git+ssh://git@github.com/llamastack/llama-stack-client-python.git
+pip install llama-stack-client
 ```
 
-> [!NOTE]
-> Once this package is [published to PyPI](https://www.stainless.com/docs/guides/publish), this will become: `pip install --pre llama_stack_client`
-
 ## Usage
 
-The full API of this library can be found in [api.md](api.md).
+The full API of this library can be found in [api.md](api.md). You may find basic client examples in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) repo.
 
 ```python
 from llama_stack_client import LlamaStackClient
 
-client = LlamaStackClient()
+client = LlamaStackClient(
+    base_url=f"http://{host}:{port}",
+)
 
-model = client.models.register(
-    model_id="model_id",
+response = client.chat.completions.create(
+    messages=[{"role": "user", "content": "hello world, write me a 2 sentence poem about the moon"}],
+    model="meta-llama/Llama-3.2-3B-Instruct",
+    stream=False,
 )
-print(model.identifier)
+print(response)
 ```
 
-While you can provide an `api_key` keyword argument,
-we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/)
-to add `LLAMA_STACK_CLIENT_API_KEY="My API Key"` to your `.env` file
-so that your API Key is not stored in source control.
+While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `LLAMA_STACK_CLIENT_API_KEY="My API Key"` to your `.env` file so that your API Key is not stored in source control.
+
+After installing the `llama-stack-client` package, you can also use the [`llama-stack-client` CLI](https://github.com/meta-llama/llama-stack/tree/main/llama-stack-client) to interact with the Llama Stack server.
+```bash
+llama-stack-client inference chat-completion --message "hello, what model are you"
+```
+
+```python
+OpenAIChatCompletion(
+    id='AmivnS0iMv-mmEE4_A0DK1T',
+    choices=[
+        OpenAIChatCompletionChoice(
+            finish_reason='stop',
+            index=0,
+            message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
+                role='assistant',
+                content="Hello! I am an AI designed by Meta AI, and my model is a type of recurrent neural network (RNN) called a transformer. My specific architecture is based on the BERT (Bidirectional Encoder Representations from Transformers) model, which is a pre-trained language model that has been fine-tuned for a variety of natural language processing tasks.\n\nHere are some key details about my model:\n\n* **Model type:** Transformer-based language model\n* **Architecture:** BERT (Bidirectional Encoder Representations from Transformers)\n* **Training data:** A massive corpus of text data, including but not limited to:\n\t+ Web pages\n\t+ Books\n\t+ Articles\n\t+ Forums\n\t+ Social media platforms\n* **Parameters:** My model has approximately 1.5 billion parameters, which allows me to understand and generate human-like language.\n* **Capabilities:** I can perform a wide range of tasks, including but not limited to:\n\t+ Answering questions\n\t+ Generating text\n\t+ Translating languages\n\t+ Summarizing content\n\t+ Offering suggestions and ideas\n\nI'm constantly learning and improving, so please bear with me if I make any mistakes or don't quite understand what you're asking. How can I assist you today?",
+                name=None,
+                tool_calls=None,
+                function_call=None
+            ),
+            logprobs=OpenAIChatCompletionChoiceLogprobs(content=None, refusal=None)
+        )
+    ],
+    created=1749825661,
+    model='Llama-3.3-70B-Instruct',
+    object='chat.completion',
+    system_fingerprint=None,
+    usage={
+        'completion_tokens': 258,
+        'prompt_tokens': 16,
+        'total_tokens': 274,
+        'completion_tokens_details': None,
+        'prompt_tokens_details': None
+    },
+    service_tier=None
+)
+```
 
 ## Async usage
 
@@ -50,14 +89,18 @@ Simply import `AsyncLlamaStackClient` instead of `LlamaStackClient` and use `awa
 import asyncio
 from llama_stack_client import AsyncLlamaStackClient
 
-client = AsyncLlamaStackClient()
+client = AsyncLlamaStackClient(
+    # defaults to "production".
+    environment="sandbox",
+)
 
 
 async def main() -> None:
-    model = await client.models.register(
-        model_id="model_id",
+    session = await client.agents.sessions.create(
+        agent_id="agent_id",
+        session_name="session_name",
     )
-    print(model.identifier)
+    print(session.session_id)
 
 
 asyncio.run(main())
@@ -65,82 +108,6 @@ asyncio.run(main())
 
 Functionality between the synchronous and asynchronous clients is otherwise identical.
 
-### With aiohttp
-
-By default, the async client uses `httpx` for HTTP requests. However, for improved concurrency performance you may also use `aiohttp` as the HTTP backend.
-
-You can enable this by installing `aiohttp`:
-
-```sh
-# install from the production repo
-pip install 'llama_stack_client[aiohttp] @ git+ssh://git@github.com/llamastack/llama-stack-client-python.git'
-```
-
-Then you can enable it by instantiating the client with `http_client=DefaultAioHttpClient()`:
-
-```python
-import asyncio
-from llama_stack_client import DefaultAioHttpClient
-from llama_stack_client import AsyncLlamaStackClient
-
-
-async def main() -> None:
-    async with AsyncLlamaStackClient(
-        http_client=DefaultAioHttpClient(),
-    ) as client:
-        model = await client.models.register(
-            model_id="model_id",
-        )
-        print(model.identifier)
-
-
-asyncio.run(main())
-```
-
-## Streaming responses
-
-We provide support for streaming responses using Server Side Events (SSE).
-
-```python
-from llama_stack_client import LlamaStackClient
-
-client = LlamaStackClient()
-
-stream = client.inference.chat_completion(
-    messages=[
-        {
-            "content": "string",
-            "role": "user",
-        }
-    ],
-    model_id="model_id",
-    stream=True,
-)
-for chat_completion_response in stream:
-    print(chat_completion_response.completion_message)
-```
-
-The async client uses the exact same interface.
-
-```python
-from llama_stack_client import AsyncLlamaStackClient
-
-client = AsyncLlamaStackClient()
-
-stream = await client.inference.chat_completion(
-    messages=[
-        {
-            "content": "string",
-            "role": "user",
-        }
-    ],
-    model_id="model_id",
-    stream=True,
-)
-async for chat_completion_response in stream:
-    print(chat_completion_response.completion_message)
-```
-
 ## Using types
 
 Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
@@ -150,46 +117,6 @@ Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typ
 
 Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
 
-## Nested params
-
-Nested parameters are dictionaries, typed using `TypedDict`, for example:
-
-```python
-from llama_stack_client import LlamaStackClient
-
-client = LlamaStackClient()
-
-chat_completion_response = client.inference.chat_completion(
-    messages=[
-        {
-            "content": "string",
-            "role": "user",
-        }
-    ],
-    model_id="model_id",
-    logprobs={},
-)
-print(chat_completion_response.logprobs)
-```
-
-## File uploads
-
-Request parameters that correspond to file uploads can be passed as `bytes`, or a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance or a tuple of `(filename, contents, media type)`.
-
-```python
-from pathlib import Path
-from llama_stack_client import LlamaStackClient
-
-client = LlamaStackClient()
-
-client.files.create(
-    file=Path("/path/to/file"),
-    purpose="assistants",
-)
-```
-
-The async client uses the exact same interface. If you pass a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance, the file contents will be read asynchronously automatically.
-
 ## Handling errors
 
 When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `llama_stack_client.APIConnectionError` is raised.
@@ -206,14 +133,9 @@ from llama_stack_client import LlamaStackClient
 client = LlamaStackClient()
 
 try:
-    client.inference.chat_completion(
-        messages=[
-            {
-                "content": "string",
-                "role": "user",
-            }
-        ],
-        model_id="model_id",
+    client.agents.sessions.create(
+        agent_id="agent_id",
+        session_name="session_name",
     )
 except llama_stack_client.APIConnectionError as e:
     print("The server could not be reached")
@@ -226,7 +148,7 @@ except llama_stack_client.APIStatusError as e:
     print(e.response)
 ```
 
-Error codes are as follows:
+Error codes are as followed:
 
 | Status Code | Error Type                 |
 | ----------- | -------------------------- |
@@ -257,21 +179,16 @@ client = LlamaStackClient(
 )
 
 # Or, configure per-request:
-client.with_options(max_retries=5).inference.chat_completion(
-    messages=[
-        {
-            "content": "string",
-            "role": "user",
-        }
-    ],
-    model_id="model_id",
+client.with_options(max_retries=5).agents.sessions.create(
+    agent_id="agent_id",
+    session_name="session_name",
 )
 ```
 
 ### Timeouts
 
 By default requests time out after 1 minute. You can configure this with a `timeout` option,
-which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/timeouts/#fine-tuning-the-configuration) object:
+which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/#fine-tuning-the-configuration) object:
 
 ```python
 from llama_stack_client import LlamaStackClient
@@ -288,14 +205,9 @@ client = LlamaStackClient(
 )
 
 # Override per-request:
-client.with_options(timeout=5.0).inference.chat_completion(
-    messages=[
-        {
-            "content": "string",
-            "role": "user",
-        }
-    ],
-    model_id="model_id",
+client.with_options(timeout=5.0).agents.sessions.create(
+    agent_id="agent_id",
+    session_name="session_name",
 )
 ```
 
@@ -309,14 +221,12 @@ Note that requests that time out are [retried twice by default](#retries).
 
 We use the standard library [`logging`](https://docs.python.org/3/library/logging.html) module.
 
-You can enable logging by setting the environment variable `LLAMA_STACK_CLIENT_LOG` to `info`.
+You can enable logging by setting the environment variable `LLAMA_STACK_CLIENT_LOG` to `debug`.
 
 ```shell
-$ export LLAMA_STACK_CLIENT_LOG=info
+$ export LLAMA_STACK_CLIENT_LOG=debug
 ```
 
-Or to `debug` for more verbose logging.
-
 ### How to tell whether `None` means `null` or missing
 
 In an API response, a field may be explicitly `null`, or missing entirely; in either case, its value is `None` in this library. You can differentiate the two cases with `.model_fields_set`:
@@ -337,22 +247,19 @@ The "raw" Response object can be accessed by prefixing `.with_raw_response.` to
 from llama_stack_client import LlamaStackClient
 
 client = LlamaStackClient()
-response = client.inference.with_raw_response.chat_completion(
-    messages=[{
-        "content": "string",
-        "role": "user",
-    }],
-    model_id="model_id",
+response = client.agents.sessions.with_raw_response.create(
+    agent_id="agent_id",
+    session_name="session_name",
 )
 print(response.headers.get('X-My-Header'))
 
-inference = response.parse()  # get the object that `inference.chat_completion()` would have returned
-print(inference.completion_message)
+session = response.parse()  # get the object that `agents.sessions.create()` would have returned
+print(session.session_id)
 ```
 
-These methods return an [`APIResponse`](https://github.com/llamastack/llama-stack-client-python/tree/main/src/llama_stack_client/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/meta-llama/llama-stack-python/tree/main/src/llama_stack_client/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/llamastack/llama-stack-client-python/tree/main/src/llama_stack_client/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/meta-llama/llama-stack-python/tree/main/src/llama_stack_client/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 
@@ -361,14 +268,9 @@ The above interface eagerly reads the full response body when you make the reque
 To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
 
 ```python
-with client.inference.with_streaming_response.chat_completion(
-    messages=[
-        {
-            "content": "string",
-            "role": "user",
-        }
-    ],
-    model_id="model_id",
+with client.agents.sessions.with_streaming_response.create(
+    agent_id="agent_id",
+    session_name="session_name",
 ) as response:
     print(response.headers.get("X-My-Header"))
 
@@ -387,7 +289,8 @@ If you need to access undocumented endpoints, params, or response properties, th
 #### Undocumented endpoints
 
 To make requests to undocumented endpoints, you can make requests using `client.get`, `client.post`, and other
-http verbs. Options on the client will be respected (such as retries) when making this request.
+http verbs. Options on the client will be respected (such as retries) will be respected when making this
+request.
 
 ```py
 import httpx
@@ -416,19 +319,18 @@ can also get all the extra fields on the Pydantic model as a dict with
 
 You can directly override the [httpx client](https://www.python-httpx.org/api/#client) to customize it for your use case, including:
 
-- Support for [proxies](https://www.python-httpx.org/advanced/proxies/)
-- Custom [transports](https://www.python-httpx.org/advanced/transports/)
+- Support for proxies
+- Custom transports
 - Additional [advanced](https://www.python-httpx.org/advanced/clients/) functionality
 
 ```python
-import httpx
 from llama_stack_client import LlamaStackClient, DefaultHttpxClient
 
 client = LlamaStackClient(
     # Or use the `LLAMA_STACK_CLIENT_BASE_URL` env var
     base_url="http://my.test.server.example.com:8083",
     http_client=DefaultHttpxClient(
-        proxy="http://my.test.proxy.example.com",
+        proxies="http://my.test.proxy.example.com",
         transport=httpx.HTTPTransport(local_address="0.0.0.0"),
     ),
 )
@@ -444,27 +346,17 @@ client.with_options(http_client=DefaultHttpxClient(...))
 
 By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
 
-```py
-from llama_stack_client import LlamaStackClient
-
-with LlamaStackClient() as client:
-  # make requests here
-  ...
-
-# HTTP client is now closed
-```
-
 ## Versioning
 
 This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:
 
 1. Changes that only affect static types, without breaking runtime behavior.
-2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals.)_
+2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals)_.
 3. Changes that we do not expect to impact the vast majority of users in practice.
 
 We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
 
-We are keen for your feedback; please open an [issue](https://www.github.com/llamastack/llama-stack-client-python/issues) with questions, bugs, or suggestions.
+We are keen for your feedback; please open an [issue](https://www.github.com/meta-llama/llama-stack-python/issues) with questions, bugs, or suggestions.
 
 ### Determining the installed version
 
@@ -479,8 +371,4 @@ print(llama_stack_client.__version__)
 
 ## Requirements
 
-Python 3.8 or higher.
-
-## Contributing
-
-See [the contributing documentation](./CONTRIBUTING.md).
+Python 3.10 or higher.
diff --git a/examples/.keep b/examples/.keep
index d8c73e93..0651c89c 100644
--- a/examples/.keep
+++ b/examples/.keep
@@ -1,4 +1,4 @@
 File generated from our OpenAPI spec by Stainless.
 
 This directory can be used to store example files demonstrating usage of this SDK.
-It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
\ No newline at end of file
+It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
diff --git a/pyproject.toml b/pyproject.toml
index 25aa09e2..ed2b910f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,25 +4,28 @@ version = "0.1.0-alpha.3"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "Apache-2.0"
-authors = [
-{ name = "Llama Stack Client", email = "llamastack@meta.com" },
-]
+authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 dependencies = [
     "httpx>=0.23.0, <1",
     "pydantic>=1.9.0, <3",
-    "typing-extensions>=4.10, <5",
+    "typing-extensions>=4.7, <5",
     "anyio>=3.5.0, <5",
     "distro>=1.7.0, <2",
     "sniffio",
+    "tqdm",
+    "rich",
+    "click",
+    "pyaml",
+    "prompt_toolkit",
+    "pandas",
+    "termcolor",
+    "fire",
+    "requests",
 ]
-requires-python = ">= 3.8"
+requires-python = ">= 3.12"
 classifiers = [
   "Typing :: Typed",
   "Intended Audience :: Developers",
-  "Programming Language :: Python :: 3.8",
-  "Programming Language :: Python :: 3.9",
-  "Programming Language :: Python :: 3.10",
-  "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
   "Operating System :: OS Independent",
   "Operating System :: POSIX",
@@ -33,60 +36,23 @@ classifiers = [
   "License :: OSI Approved :: Apache Software License"
 ]
 
+[dependency-groups]
+dev = [
+    "pytest>=7.1.1",
+    "pytest-asyncio",
+    "pre-commit",
+    "black",
+    "ruff",
+    "mypy",
+    "respx",
+    "dirty-equals"
+]
+
 [project.urls]
 Homepage = "https://github.com/llamastack/llama-stack-client-python"
 Repository = "https://github.com/llamastack/llama-stack-client-python"
 
-[project.optional-dependencies]
-aiohttp = ["aiohttp", "httpx_aiohttp>=0.1.6"]
-
-[tool.rye]
-managed = true
-# version pins are in requirements-dev.lock
-dev-dependencies = [
-    "pyright==1.1.399",
-    "mypy",
-    "respx",
-    "pytest",
-    "pytest-asyncio",
-    "ruff",
-    "time-machine",
-    "nox",
-    "dirty-equals>=0.6.0",
-    "importlib-metadata>=6.7.0",
-    "rich>=13.7.1",
-    "nest_asyncio==1.6.0",
-    "pytest-xdist>=3.6.1",
-]
 
-[tool.rye.scripts]
-format = { chain = [
-  "format:ruff",
-  "format:docs",
-  "fix:ruff",
-  # run formatting again to fix any inconsistencies when imports are stripped
-  "format:ruff",
-]}
-"format:docs" = "python scripts/utils/ruffen-docs.py README.md api.md"
-"format:ruff" = "ruff format"
-
-"lint" = { chain = [
-  "check:ruff",
-  "typecheck",
-  "check:importable",
-]}
-"check:ruff" = "ruff check ."
-"fix:ruff" = "ruff check --fix ."
-
-"check:importable" = "python -c 'import llama_stack_client'"
-
-typecheck = { chain = [
-  "typecheck:pyright",
-  "typecheck:mypy"
-]}
-"typecheck:pyright" = "pyright"
-"typecheck:verify-types" = "pyright --verifytypes llama_stack_client --ignoreexternal"
-"typecheck:mypy" = "mypy ."
 
 [build-system]
 requires = ["hatchling==1.26.3", "hatch-fancy-pypi-readme"]
@@ -136,29 +102,9 @@ filterwarnings = [
   "error"
 ]
 
-[tool.pyright]
-# this enables practically every flag given by pyright.
-# there are a couple of flags that are still disabled by
-# default in strict mode as they are experimental and niche.
-typeCheckingMode = "strict"
-pythonVersion = "3.8"
-
-exclude = [
-    "_dev",
-    ".venv",
-    ".nox",
-]
-
-reportImplicitOverride = true
-reportOverlappingOverload = false
-
-reportImportCycles = false
-reportPrivateUsage = false
-
 [tool.ruff]
 line-length = 120
 output-format = "grouped"
-target-version = "py37"
 
 [tool.ruff.format]
 docstring-code-format = true
@@ -179,7 +125,7 @@ select = [
   "T201",
   "T203",
   # misuse of typing.TYPE_CHECKING
-  "TC004",
+  "TCH004",
   # import rules
   "TID251",
 ]
@@ -207,4 +153,8 @@ known-first-party = ["llama_stack_client", "tests"]
 "bin/**.py" = ["T201", "T203"]
 "scripts/**.py" = ["T201", "T203"]
 "tests/**.py" = ["T201", "T203"]
-"examples/**.py" = ["T201", "T203"]
+"examples/**.py" = ["T201", "T203", "TCH004", "I", "B"]
+"src/llama_stack_client/lib/**.py" = ["T201", "T203", "TCH004", "I", "B"]
+
+[project.scripts]
+llama-stack-client = "llama_stack_client.lib.cli.llama_stack_client:main"
diff --git a/release-please-config.json b/release-please-config.json
index 3602e961..04870019 100644
--- a/release-please-config.json
+++ b/release-please-config.json
@@ -63,4 +63,4 @@
   "extra-files": [
     "src/llama_stack_client/_version.py"
   ]
-}
\ No newline at end of file
+}
diff --git a/src/llama_stack_client/lib/agents/event_logger.py b/src/llama_stack_client/lib/agents/event_logger.py
index 731c7b2f..6abdad8f 100644
--- a/src/llama_stack_client/lib/agents/event_logger.py
+++ b/src/llama_stack_client/lib/agents/event_logger.py
@@ -66,9 +66,7 @@ def yield_printable_events(self, chunk: Any) -> Iterator[TurnStreamPrintableEven
         if not hasattr(chunk, "error"):
             self.previous_event_type, self.previous_step_type = self._get_event_type_step_type(chunk)
 
-    def _yield_printable_events(
-        self, chunk: Any, previous_event_type: Optional[str] = None, previous_step_type: Optional[str] = None
-    ) -> Iterator[TurnStreamPrintableEvent]:
+    def _yield_printable_events(self, chunk: Any) -> Iterator[TurnStreamPrintableEvent]:
         if hasattr(chunk, "error"):
             yield TurnStreamPrintableEvent(role=None, content=chunk.error["message"], color="red")
             return
diff --git a/src/llama_stack_client/lib/cli/eval/run_scoring.py b/src/llama_stack_client/lib/cli/eval/run_scoring.py
index 78560a0a..a9b29bbb 100644
--- a/src/llama_stack_client/lib/cli/eval/run_scoring.py
+++ b/src/llama_stack_client/lib/cli/eval/run_scoring.py
@@ -61,7 +61,6 @@ def run_scoring(
     scoring_params_config: Optional[str],
     num_examples: Optional[int],
     output_dir: str,
-    visualize: bool,
 ):
     """Run scoring from application datasets"""
     # one of dataset_id or dataset_path is required
diff --git a/src/llama_stack_client/lib/tools/mcp_oauth.py b/src/llama_stack_client/lib/tools/mcp_oauth.py
index a3c03416..503b9c69 100644
--- a/src/llama_stack_client/lib/tools/mcp_oauth.py
+++ b/src/llama_stack_client/lib/tools/mcp_oauth.py
@@ -253,7 +253,7 @@ def do_GET(self):
                 self.send_response(404)
                 self.end_headers()
 
-        def log_message(self, format, *args):
+        def log_message(self):
             """Override to suppress HTTP server logs."""
             return