feat: Add template for structured extraction (#185)

marcusschiesser · web-flow · commit 6bd76fbfb164 · 2024-07-25T19:56:26.000+07:00
diff --git a/.changeset/proud-seals-yell.md b/.changeset/proud-seals-yell.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Add template for structured extraction
diff --git a/helpers/index.ts b/helpers/index.ts
@@ -163,7 +163,11 @@ export const installTemplate = async (
     // This is a backend, so we need to copy the test data and create the env file.
 
     // Copy the environment file to the target directory.
-    if (props.template === "streaming" || props.template === "multiagent") {
+    if (
+      props.template === "streaming" ||
+      props.template === "multiagent" ||
+      props.template === "extractor"
+    ) {
       await createBackendEnvFile(props.root, {
         modelConfig: props.modelConfig,
         llamaCloudKey: props.llamaCloudKey,
diff --git a/helpers/types.ts b/helpers/types.ts
@@ -18,6 +18,7 @@ export type ModelConfig = {
   isConfigured(): boolean;
 };
 export type TemplateType =
+  | "extractor"
   | "streaming"
   | "community"
   | "llamapack"
diff --git a/questions.ts b/questions.ts
@@ -342,6 +342,7 @@ export const askQuestions = async (
               title: "Multi-agent app (using llama-agents)",
               value: "multiagent",
             },
+            { title: "Structured Extractor", value: "extractor" },
             {
               title: `Community template from ${styledRepo}`,
               value: "community",
@@ -405,7 +406,7 @@ export const askQuestions = async (
     return; // early return - no further questions needed for llamapack projects
   }
 
-  if (program.template === "multiagent") {
+  if (program.template === "multiagent" || program.template === "extractor") {
     // TODO: multi-agents currently only supports FastAPI
     program.framework = preferences.framework = "fastapi";
   }
diff --git a/templates/types/extractor/fastapi/README-template.md b/templates/types/extractor/fastapi/README-template.md
@@ -0,0 +1,68 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project using [FastAPI](https://fastapi.tiangolo.com/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama) featuring [structured extraction](https://docs.llamaindex.ai/en/stable/examples/structured_outputs/structured_outputs/?h=structured+output).
+
+## Getting Started
+
+First, setup the environment with poetry:
+
+> **_Note:_** This step is not needed if you are using the dev-container.
+
+```shell
+poetry install
+poetry shell
+```
+
+Then check the parameters that have been pre-configured in the `.env` file in this directory. (E.g. you might need to configure an `OPENAI_API_KEY` if you're using OpenAI as model provider).
+
+Second, generate the embeddings of the documents in the `./data` directory (if this folder exists - otherwise, skip this step):
+
+```shell
+poetry run generate
+```
+
+Third, run the API in one command:
+
+```shell
+poetry run python main.py
+```
+
+The example provides the `/api/extractor/query` API endpoint.
+
+This query endpoint returns structured data in the format of the [Output](./app/api/routers/output.py) class. Modify this class to change the output format.
+
+You can test the endpoint with the following curl request:
+
+```shell
+curl --location 'localhost:8000/api/extractor/query' \
+--header 'Content-Type: application/json' \
+--data '{ "query": "What is the maximum weight for a parcel?" }'
+```
+
+Which will return a response that the RAG pipeline is confident about the answer.
+
+Try
+
+```shell
+curl --location 'localhost:8000/api/extractor/query' \
+--header 'Content-Type: application/json' \
+--data '{ "query": "What is the weather today?" }'
+```
+
+To retrieve a response with low confidence since the question is not related to the provided document in the `./data` directory.
+
+You can start editing the API endpoint by modifying [`extractor.py`](./app/api/routers/extractor.py). The endpoints auto-update as you save the file.
+
+Open [http://localhost:8000/docs](http://localhost:8000/docs) with your browser to see the Swagger UI of the API.
+
+The API allows CORS for all origins to simplify development. You can change this behavior by setting the `ENVIRONMENT` environment variable to `prod`:
+
+```
+ENVIRONMENT=prod python main.py
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
+
+You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
diff --git a/templates/types/extractor/fastapi/app/__init__.py b/templates/types/extractor/fastapi/app/__init__.py
diff --git a/templates/types/extractor/fastapi/app/api/__init__.py b/templates/types/extractor/fastapi/app/api/__init__.py
diff --git a/templates/types/extractor/fastapi/app/api/routers/__init__.py b/templates/types/extractor/fastapi/app/api/routers/__init__.py
diff --git a/templates/types/extractor/fastapi/app/api/routers/extractor.py b/templates/types/extractor/fastapi/app/api/routers/extractor.py
@@ -0,0 +1,58 @@
+import logging
+import os
+
+from fastapi import APIRouter, HTTPException
+from llama_index.core.settings import Settings
+from pydantic import BaseModel
+
+from app.api.routers.output import Output
+from app.engine.index import get_index
+
+extractor_router = r = APIRouter()
+
+logger = logging.getLogger("uvicorn")
+
+
+class RequestData(BaseModel):
+    query: str
+
+    class Config:
+        json_schema_extra = {
+            "examples": [
+                {"query": "What's the maximum weight for a parcel?"},
+            ],
+        }
+
+
+@r.post("/query")
+async def query_request(
+    data: RequestData,
+):
+    # Create a query engine using that returns responses in the format of the Output class
+    query_engine = get_query_engine(Output)
+
+    response = await query_engine.aquery(data.query)
+
+    output_data = response.response.dict()
+    return Output(**output_data)
+
+
+def get_query_engine(output_cls: BaseModel):
+    top_k = os.getenv("TOP_K", 3)
+
+    index = get_index()
+    if index is None:
+        raise HTTPException(
+            status_code=500,
+            detail=str(
+                "StorageContext is empty - call 'poetry run generate' to generate the storage first"
+            ),
+        )
+
+    sllm = Settings.llm.as_structured_llm(output_cls)
+
+    return index.as_query_engine(
+        similarity_top_k=int(top_k),
+        llm=sllm,
+        response_mode="tree_summarize",
+    )
diff --git a/templates/types/extractor/fastapi/app/api/routers/output.py b/templates/types/extractor/fastapi/app/api/routers/output.py
@@ -0,0 +1,32 @@
+import logging
+from llama_index.core.schema import BaseModel, Field
+from typing import List
+
+logger = logging.getLogger("uvicorn")
+
+
+class Output(BaseModel):
+    response: str = Field(..., description="The answer to the question.")
+    page_numbers: List[int] = Field(
+        ...,
+        description="The page numbers of the sources used to answer this question. Do not include a page number if the context is irrelevant.",
+    )
+    confidence: float = Field(
+        ...,
+        ge=0,
+        le=1,
+        description="Confidence value between 0-1 of the correctness of the result.",
+    )
+    confidence_explanation: str = Field(
+        ..., description="Explanation for the confidence score"
+    )
+
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "response": "This is an example answer.",
+                "page_numbers": [1, 2, 3],
+                "confidence": 0.85,
+                "confidence_explanation": "This is an explanation for the confidence score.",
+            }
+        }
diff --git a/templates/types/extractor/fastapi/main.py b/templates/types/extractor/fastapi/main.py
@@ -0,0 +1,45 @@
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import logging
+import os
+import uvicorn
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import RedirectResponse
+from app.api.routers.extractor import extractor_router
+from app.settings import init_settings
+
+
+app = FastAPI()
+
+init_settings()
+
+environment = os.getenv("ENVIRONMENT", "dev")  # Default to 'development' if not set
+logger = logging.getLogger("uvicorn")
+
+if environment == "dev":
+    logger.warning("Running in development mode - allowing CORS for all origins")
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
+    # Redirect to documentation page when accessing base URL
+    @app.get("/")
+    async def redirect_to_docs():
+        return RedirectResponse(url="/docs")
+
+
+app.include_router(extractor_router, prefix="/api/extractor")
+
+if __name__ == "__main__":
+    app_host = os.getenv("APP_HOST", "0.0.0.0")
+    app_port = int(os.getenv("APP_PORT", "8000"))
+    reload = True if environment == "dev" else False
+
+    uvicorn.run(app="main:app", host=app_host, port=app_port, reload=reload)
diff --git a/templates/types/extractor/fastapi/pyproject.toml b/templates/types/extractor/fastapi/pyproject.toml
@@ -0,0 +1,21 @@
+[tool.poetry]
+name = "app"
+version = "0.1.0"
+description = ""
+authors = ["Marcus Schiesser <mail@marcusschiesser.de>"]
+readme = "README.md"
+
+[tool.poetry.scripts]
+generate = "app.engine.generate:generate_datasource"
+
+[tool.poetry.dependencies]
+python = "^3.11,<3.12"
+fastapi = "^0.109.1"
+uvicorn = { extras = ["standard"], version = "^0.23.2" }
+python-dotenv = "^1.0.0"
+llama-index = "^0.10.58"
+cachetools = "^5.3.3"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/templates/types/streaming/fastapi/app/llmhub.py b/templates/types/streaming/fastapi/app/llmhub.py

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"create-llama": patch
 +---
++
 +Add template for structured extraction
Original file line number	Diff line number	Diff line change
`@@ -342,6 +342,7 @@ export const askQuestions = async (`
`342`	`342`	`title: "Multi-agent app (using llama-agents)",`
`343`	`343`	`value: "multiagent",`
`344`	`344`	`},`
	`345`	`+ { title: "Structured Extractor", value: "extractor" },`
`345`	`346`	`{`
`346`	`347`	title: `Community template from ${styledRepo}`,
`347`	`348`	`value: "community",`
`@@ -405,7 +406,7 @@ export const askQuestions = async (`
`405`	`406`	`return; // early return - no further questions needed for llamapack projects`
`406`	`407`	`}`
`407`	`408`
`408`		`- if (program.template === "multiagent") {`
	`409`	`+ if (program.template === "multiagent" \|\| program.template === "extractor") {`
`409`	`410`	`// TODO: multi-agents currently only supports FastAPI`
`410`	`411`	`program.framework = preferences.framework = "fastapi";`
`411`	`412`	`}`