Skip to content

Commit aeb6fef

Browse files
feat: use LlamaCloud for TS/Python (#149)
--------- Co-authored-by: Marcus Schiesser <[email protected]>
1 parent 64732f0 commit aeb6fef

File tree

17 files changed

+259
-48
lines changed

17 files changed

+259
-48
lines changed

.changeset/tough-pugs-destroy.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"create-llama": patch
3+
---
4+
5+
use llamacloud for chat

helpers/env-variables.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,26 @@ const getVectorDBEnvs = (
133133
"Optional API key for authenticating requests to Qdrant.",
134134
},
135135
];
136+
case "llamacloud":
137+
return [
138+
{
139+
name: "LLAMA_CLOUD_INDEX_NAME",
140+
description:
141+
"The name of the LlamaCloud index to use (part of the LlamaCloud project).",
142+
value: "test",
143+
},
144+
{
145+
name: "LLAMA_CLOUD_PROJECT_NAME",
146+
description: "The name of the LlamaCloud project.",
147+
value: "Default",
148+
},
149+
{
150+
name: "LLAMA_CLOUD_BASE_URL",
151+
description:
152+
"The base URL for the LlamaCloud API. Only change this for non-production environments",
153+
value: "https://api.cloud.llamaindex.ai",
154+
},
155+
];
136156
case "chroma":
137157
const envs = [
138158
{

helpers/python.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,12 @@ const getAdditionalDependencies = (
118118
version: "^2.9.9",
119119
});
120120
break;
121+
case "llamacloud":
122+
dependencies.push({
123+
name: "llama-index-indices-managed-llama-cloud",
124+
version: "^0.2.1",
125+
});
126+
break;
121127
}
122128
}
123129
}

helpers/types.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ export type TemplateVectorDB =
2626
| "milvus"
2727
| "astra"
2828
| "qdrant"
29-
| "chroma";
29+
| "chroma"
30+
| "llamacloud";
3031
export type TemplatePostInstallAction =
3132
| "none"
3233
| "VSCode"
@@ -36,7 +37,7 @@ export type TemplateDataSource = {
3637
type: TemplateDataSourceType;
3738
config: TemplateDataSourceConfig;
3839
};
39-
export type TemplateDataSourceType = "file" | "web" | "db";
40+
export type TemplateDataSourceType = "file" | "web" | "db" | "llamacloud";
4041
export type TemplateObservability = "none" | "opentelemetry";
4142
// Config for both file and folder
4243
export type FileSourceConfig = {

helpers/typescript.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import fs from "fs/promises";
22
import os from "os";
33
import path from "path";
4-
import { bold, cyan } from "picocolors";
4+
import { bold, cyan, yellow } from "picocolors";
55
import { assetRelocator, copy } from "../helpers/copy";
66
import { callPackageManager } from "../helpers/install";
77
import { templatesDir } from "./dir";
@@ -105,7 +105,13 @@ export const installTSTemplate = async ({
105105
const enginePath = path.join(root, relativeEngineDestPath, "engine");
106106

107107
// copy vector db component
108-
console.log("\nUsing vector DB:", vectorDb ?? "none", "\n");
108+
if (vectorDb === "llamacloud") {
109+
console.log(
110+
`\nUsing managed index from LlamaCloud. Ensure the ${yellow("LLAMA_CLOUD_* environment variables are set correctly.")}`,
111+
);
112+
} else {
113+
console.log("\nUsing vector DB:", vectorDb ?? "none");
114+
}
109115
await copy("**", enginePath, {
110116
parents: true,
111117
cwd: path.join(compPath, "vectordbs", "typescript", vectorDb ?? "none"),

questions.ts

Lines changed: 73 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,13 @@ export const getDataSourceChoices = (
123123
framework: TemplateFramework,
124124
selectedDataSource: TemplateDataSource[],
125125
) => {
126+
// If LlamaCloud is already selected, don't show any other options
127+
if (selectedDataSource.find((s) => s.type === "llamacloud")) {
128+
return [];
129+
}
130+
126131
const choices = [];
132+
127133
if (selectedDataSource.length > 0) {
128134
choices.push({
129135
title: "No",
@@ -171,6 +177,13 @@ export const getDataSourceChoices = (
171177
value: "db",
172178
});
173179
}
180+
181+
if (!selectedDataSource.length) {
182+
choices.push({
183+
title: "Use managed index from LlamaCloud",
184+
value: "llamacloud",
185+
});
186+
}
174187
return choices;
175188
};
176189

@@ -484,17 +497,19 @@ export const askQuestions = async (
484497
// continue asking user for data sources if none are initially provided
485498
while (true) {
486499
const firstQuestion = program.dataSources.length === 0;
500+
const choices = getDataSourceChoices(
501+
program.framework,
502+
program.dataSources,
503+
);
504+
if (choices.length === 0) break;
487505
const { selectedSource } = await prompts(
488506
{
489507
type: "select",
490508
name: "selectedSource",
491509
message: firstQuestion
492510
? "Which data source would you like to use?"
493511
: "Would you like to add another data source?",
494-
choices: getDataSourceChoices(
495-
program.framework,
496-
program.dataSources,
497-
),
512+
choices,
498513
initial: firstQuestion ? 1 : 0,
499514
},
500515
questionHandlers,
@@ -591,51 +606,76 @@ export const askQuestions = async (
591606
config: await prompts(dbPrompts, questionHandlers),
592607
});
593608
}
609+
case "llamacloud": {
610+
program.dataSources.push({
611+
type: "llamacloud",
612+
config: {},
613+
});
614+
program.dataSources.push(EXAMPLE_FILE);
615+
break;
616+
}
594617
}
595618
}
596619
}
597620
}
598621

599-
// Asking for LlamaParse if user selected file or folder data source
600-
if (
601-
program.dataSources.some((ds) => ds.type === "file") &&
602-
program.useLlamaParse === undefined
603-
) {
622+
const isUsingLlamaCloud = program.dataSources.some(
623+
(ds) => ds.type === "llamacloud",
624+
);
625+
626+
// Asking for LlamaParse if user selected file data source
627+
if (isUsingLlamaCloud) {
628+
// default to use LlamaParse if using LlamaCloud
629+
program.useLlamaParse = preferences.useLlamaParse = true;
630+
} else {
631+
if (program.dataSources.some((ds) => ds.type === "file")) {
632+
if (ciInfo.isCI) {
633+
program.useLlamaParse = getPrefOrDefault("useLlamaParse");
634+
} else {
635+
const { useLlamaParse } = await prompts(
636+
{
637+
type: "toggle",
638+
name: "useLlamaParse",
639+
message:
640+
"Would you like to use LlamaParse (improved parser for RAG - requires API key)?",
641+
initial: false,
642+
active: "yes",
643+
inactive: "no",
644+
},
645+
questionHandlers,
646+
);
647+
program.useLlamaParse = useLlamaParse;
648+
preferences.useLlamaParse = useLlamaParse;
649+
}
650+
}
651+
}
652+
653+
// Ask for LlamaCloud API key when using a LlamaCloud index or LlamaParse
654+
if (isUsingLlamaCloud || program.useLlamaParse) {
604655
if (ciInfo.isCI) {
605-
program.useLlamaParse = getPrefOrDefault("useLlamaParse");
606656
program.llamaCloudKey = getPrefOrDefault("llamaCloudKey");
607657
} else {
608-
const { useLlamaParse } = await prompts(
658+
// Ask for LlamaCloud API key
659+
const { llamaCloudKey } = await prompts(
609660
{
610-
type: "toggle",
611-
name: "useLlamaParse",
661+
type: "text",
662+
name: "llamaCloudKey",
612663
message:
613-
"Would you like to use LlamaParse (improved parser for RAG - requires API key)?",
614-
initial: false,
615-
active: "yes",
616-
inactive: "no",
664+
"Please provide your LlamaCloud API key (leave blank to skip):",
617665
},
618666
questionHandlers,
619667
);
620-
program.useLlamaParse = useLlamaParse;
621-
622-
// Ask for LlamaCloud API key
623-
if (useLlamaParse && program.llamaCloudKey === undefined) {
624-
const { llamaCloudKey } = await prompts(
625-
{
626-
type: "text",
627-
name: "llamaCloudKey",
628-
message:
629-
"Please provide your LlamaIndex Cloud API key (leave blank to skip):",
630-
},
631-
questionHandlers,
632-
);
633-
program.llamaCloudKey = llamaCloudKey;
634-
}
668+
program.llamaCloudKey = preferences.llamaCloudKey =
669+
llamaCloudKey || process.env.LLAMA_CLOUD_API_KEY;
635670
}
636671
}
637672

638-
if (program.dataSources.length > 0 && !program.vectorDb) {
673+
if (isUsingLlamaCloud) {
674+
// When using a LlamaCloud index, don't ask for vector database and use code in `llamacloud` folder for vector database
675+
const vectorDb = "llamacloud";
676+
program.vectorDb = vectorDb;
677+
preferences.vectorDb = vectorDb;
678+
} else if (program.dataSources.length > 0 && !program.vectorDb) {
639679
if (ciInfo.isCI) {
640680
program.vectorDb = getPrefOrDefault("vectorDb");
641681
} else {

templates/components/engines/typescript/agent/chat.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import { BaseToolWithCall, OpenAIAgent, QueryEngineTool } from "llamaindex";
22
import fs from "node:fs/promises";
33
import path from "node:path";
44
import { getDataSource } from "./index";
5-
import { STORAGE_CACHE_DIR } from "./shared";
65
import { createTools } from "./tools";
76

87
export async function createChatEngine() {
@@ -17,7 +16,7 @@ export async function createChatEngine() {
1716
queryEngine: index.asQueryEngine(),
1817
metadata: {
1918
name: "data_query_engine",
20-
description: `A query engine for documents in storage folder: ${STORAGE_CACHE_DIR}`,
19+
description: `A query engine for documents from your data source.`,
2120
},
2221
}),
2322
);

templates/components/engines/typescript/chat/chat.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@ export async function createChatEngine() {
88
`StorageContext is empty - call 'npm run generate' to generate the storage first`,
99
);
1010
}
11-
const retriever = index.asRetriever();
12-
retriever.similarityTopK = process.env.TOP_K
13-
? parseInt(process.env.TOP_K)
14-
: 3;
11+
const retriever = index.asRetriever({
12+
similarityTopK: process.env.TOP_K ? parseInt(process.env.TOP_K) : 3,
13+
});
1514

1615
return new ContextChatEngine({
1716
chatModel: Settings.llm,
1817
retriever,
19-
systemPrompt: process.env.SYSTEM_PROMPT,
18+
// disable as a custom system prompt disables the generated context
19+
// systemPrompt: process.env.SYSTEM_PROMPT,
2020
});
2121
}

templates/components/vectordbs/python/llamacloud/__init__.py

Whitespace-only changes.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from dotenv import load_dotenv
2+
3+
load_dotenv()
4+
5+
import os
6+
import logging
7+
from app.settings import init_settings
8+
from app.engine.loaders import get_documents
9+
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
10+
11+
12+
logging.basicConfig(level=logging.INFO)
13+
logger = logging.getLogger()
14+
15+
16+
def generate_datasource():
17+
init_settings()
18+
logger.info("Generate index for the provided data")
19+
20+
name = os.getenv("LLAMA_CLOUD_INDEX_NAME")
21+
project_name = os.getenv("LLAMA_CLOUD_PROJECT_NAME")
22+
api_key = os.getenv("LLAMA_CLOUD_API_KEY")
23+
base_url = os.getenv("LLAMA_CLOUD_BASE_URL")
24+
25+
if name is None or project_name is None or api_key is None:
26+
raise ValueError(
27+
"Please set LLAMA_CLOUD_INDEX_NAME, LLAMA_CLOUD_PROJECT_NAME and LLAMA_CLOUD_API_KEY"
28+
" to your environment variables or config them in .env file"
29+
)
30+
31+
documents = get_documents()
32+
33+
LlamaCloudIndex.from_documents(
34+
documents=documents,
35+
name=name,
36+
project_name=project_name,
37+
api_key=api_key,
38+
base_url=base_url,
39+
)
40+
41+
logger.info("Finished generating the index")
42+
43+
44+
if __name__ == "__main__":
45+
generate_datasource()

0 commit comments

Comments
 (0)