Skip to content

Commit 2a94cc1

Browse files
committed
Add indexing script
1 parent 9472a40 commit 2a94cc1

10 files changed

+141
-29
lines changed

scripts/sdk-indexer/ast/traverse.ts

Lines changed: 0 additions & 1 deletion
This file was deleted.

scripts/sdk-indexer/go-symbol-extractor.test.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { describe, it, expect } from "vitest";
22
import { GoSymbolExtractor } from "./go-symbol-extractor";
33
import { FunctionSymbol, TypeSymbol } from "./types";
44

5-
const extractor = new GoSymbolExtractor();
5+
const extractor = new GoSymbolExtractor(["test.go"]);
66

77
function extractFunctionSymbols(code: string): FunctionSymbol[] {
88
const symbols = extractor["extractFromFile"]("test.go", code);
@@ -288,14 +288,15 @@ type UserRepo interface {
288288
const allSymbols = extractor["extractFromFile"]("test.go", code);
289289
const functions = allSymbols.filter((s) => s.type === "function");
290290
const types = allSymbols.filter((s) => s.type === "type");
291-
291+
292292
expect(functions).toHaveLength(2);
293293
expect(types).toHaveLength(2);
294-
294+
295295
expect(functions[0]?.name).toBe("NewUser");
296296
expect(functions[1]?.name).toBe("GetName");
297297
expect(types[0]?.name).toBe("User");
298298
expect(types[1]?.name).toBe("UserRepo");
299299
});
300300
});
301-
});
301+
});
302+

scripts/sdk-indexer/go-symbol-extractor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export class GoSymbolExtractor extends BaseSymbolExtractor {
99
include = ["**/*.go"];
1010
exclude = ["**/vendor/**", "**/node_modules/**", "**/dist/**", "**/build/**", "**/.git/**"];
1111

12-
constructor() {
12+
constructor(public files: string[]) {
1313
super();
1414
this.parser.setLanguage(Go);
1515
}

scripts/sdk-indexer/index.ts

Lines changed: 117 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,123 @@
1-
import { writeFileSync } from "fs";
1+
import { mkdir, exists } from "node:fs/promises";
2+
import { resolve, join } from "node:path";
3+
import { $, file, write } from "bun";
24
import { TypeScriptSymbolExtractor } from "./typescript-symbol-extractor";
5+
import { GoSymbolExtractor } from "./go-symbol-extractor";
6+
import { PythonSymbolExtractor } from "./python-symbol-extractor";
7+
import { SymbolExtractor, Symbol } from "./types";
8+
import { BaseSymbolExtractor } from "./symbol-extractor";
39

4-
const extractor = new TypeScriptSymbolExtractor();
10+
const TMP_DIR_PATH = "./tmp";
511

6-
const Config = {
7-
repository: "https://github.com/supertokens/supertokens-node",
8-
files: [],
12+
type Repository = {
13+
url: string;
14+
files: { path: string; module: string; extract?: string[] }[];
15+
name: string;
16+
language: "typescript" | "go" | "python";
917
};
1018

11-
const symbols = extractor.extract("/Users/bogdan/src/supertokens/docs/scripts/sdk-indexer/samples");
19+
type IndexDocument = {
20+
name: string;
21+
type: "function" | "class" | "variable" | "type" | "method";
22+
meta: Record<string, unknown>;
23+
content: string;
24+
file: string;
25+
line: number;
26+
};
27+
28+
(async () => {
29+
const repositories: Repository[] = [
30+
{
31+
url: "https://github.com/supertokens/supertokens-node",
32+
files: [{ path: "./lib/ts/index.ts", module: "SuperTokens" }],
33+
name: "supertokens-node",
34+
language: "typescript",
35+
},
36+
];
37+
const documents: IndexDocument[] = [];
38+
39+
console.log("Initializing SDK indexer");
40+
try {
41+
await init();
42+
43+
for (const repository of repositories) {
44+
console.log(`Extracting symbols from ${repository.name}`);
45+
await cloneRepository(repository);
46+
const symbols = await extractSymbols(repository);
47+
console.log(symbols);
48+
const repositoryDocuments = transformSymbolsToDocuments(symbols, repository);
49+
console.log(repositoryDocuments);
50+
documents.push(...repositoryDocuments);
51+
}
52+
53+
await write("index.json", JSON.stringify(documents, null, 2));
54+
55+
// await updateIndex(documents);
56+
} catch (e) {
57+
console.error("Indexing failed");
58+
console.error(e);
59+
process.exit(1);
60+
}
61+
})();
62+
63+
async function init() {
64+
const tmpDirPath = "./tmp";
65+
const tmpDirExists = await exists(tmpDirPath);
66+
if (!tmpDirExists) {
67+
mkdir(tmpDirPath);
68+
}
69+
}
70+
71+
async function cloneRepository(repository: Repository) {
72+
const repositoryDirectoryPath = `./tmp/${repository.name}`;
73+
const repositoryDirectoryExists = await exists(repositoryDirectoryPath);
74+
if (repositoryDirectoryExists) {
75+
return;
76+
}
77+
console.log(`Cloning ${repository.url}`);
78+
await $`git clone ${repository.url} ${repository.name} --depth 1`.cwd("./tmp");
79+
}
80+
81+
const ExtractorsMap = {
82+
typescript: TypeScriptSymbolExtractor,
83+
go: GoSymbolExtractor,
84+
python: PythonSymbolExtractor,
85+
} as const;
86+
87+
async function extractSymbols(repository: Repository): Promise<Symbol[]> {
88+
const ExtractorClass = ExtractorsMap[repository.language];
89+
if (!ExtractorClass) throw new Error(`No extractor found for language ${repository.language}`);
90+
const extractor = new ExtractorClass(repository.files.map((f) => f.path));
91+
const basePath = resolve(`${TMP_DIR_PATH}/${repository.name}`);
92+
return extractor.extract(
93+
basePath,
94+
repository.files.map((f) => f.path),
95+
);
96+
}
97+
98+
function transformSymbolsToDocuments(symbols: Symbol[], repository: Repository): IndexDocument[] {
99+
return repository.files
100+
.map((file) => {
101+
const absolutePath = resolve(`${TMP_DIR_PATH}/${repository.name}/${file.path}`);
102+
const symbolsInFile = symbols.filter((symbol) => symbol.file === absolutePath);
103+
const relativePath = file.path.replace(`${TMP_DIR_PATH}/${repository.name}/`, "");
104+
105+
return symbolsInFile.map((symbol) => {
106+
const meta: Record<string, unknown> = symbol.meta || {};
107+
108+
return {
109+
name: symbol.name,
110+
type: symbol.type,
111+
meta,
112+
content: symbol.content,
113+
file: relativePath,
114+
line: symbol.line,
115+
};
116+
});
117+
})
118+
.flat();
119+
}
12120

13-
console.log(symbols);
14-
writeFileSync("./result.json", JSON.stringify(symbols, null, 2));
121+
async function updateIndex(documents: IndexDocument[]): Promise<void> {
122+
// TODO: Implement index update logic
123+
}

scripts/sdk-indexer/python-symbol-extractor.test.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { describe, it, expect } from "vitest";
22
import { PythonSymbolExtractor } from "./python-symbol-extractor";
33
import { FunctionSymbol, TypeSymbol } from "./types";
44

5-
const extractor = new PythonSymbolExtractor();
5+
const extractor = new PythonSymbolExtractor(["test.py"]);
66

77
function extractFunctionSymbols(code: string): FunctionSymbol[] {
88
const symbols = extractor["extractFromFile"]("test.py", code);
@@ -345,19 +345,20 @@ class UserRepository:
345345
const allSymbols = extractor["extractFromFile"]("test.py", code);
346346
const functions = allSymbols.filter((s) => s.type === "function");
347347
const types = allSymbols.filter((s) => s.type === "type");
348-
348+
349349
expect(functions).toHaveLength(5);
350350
expect(types).toHaveLength(2);
351-
351+
352352
expect(functions[0]?.name).toBe("__init__");
353353
expect(functions[1]?.name).toBe("get_name");
354354
expect(functions[2]?.name).toBe("create_user");
355355
expect(functions[3]?.name).toBe("save_user");
356356
expect(functions[3]?.meta.isAsync).toBe(true);
357357
expect(functions[4]?.name).toBe("find_by_name");
358-
358+
359359
expect(types[0]?.name).toBe("User");
360360
expect(types[1]?.name).toBe("UserRepository");
361361
});
362362
});
363-
});
363+
});
364+

scripts/sdk-indexer/python-symbol-extractor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ export class PythonSymbolExtractor extends BaseSymbolExtractor {
1717
"**/.git/**",
1818
];
1919

20-
constructor() {
20+
constructor(public files: string[]) {
2121
super();
2222
this.parser.setLanguage(Python);
2323
}

scripts/sdk-indexer/symbol-extractor.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ export abstract class BaseSymbolExtractor implements SymbolExtractor {
1414
this.parser = new Parser();
1515
}
1616

17-
extract(rootPath: string) {
18-
const files = this.findFiles(rootPath);
17+
extract(rootPath: string, _files?: string[]) {
18+
const files = this.findFiles(rootPath, _files);
1919
const symbols: Symbol[] = [];
2020

2121
for (const file of files) {
@@ -30,7 +30,9 @@ export abstract class BaseSymbolExtractor implements SymbolExtractor {
3030
return symbols;
3131
}
3232

33-
private findFiles(rootPath: string): string[] {
33+
private findFiles(rootPath: string, _files?: string[]): string[] {
34+
if (_files) return _files.map((f) => join(rootPath, f));
35+
3436
const files: string[] = [];
3537

3638
for (const pattern of this.include) {

scripts/sdk-indexer/types.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ export interface SymbolExtractor {
22
language: "typescript" | "go" | "python";
33
include: string[];
44
exclude: string[];
5-
extract(rootPath: string): void;
5+
extract(rootPath: string, files?: string[]): void;
66
}
77

88
export interface Symbol {
@@ -45,6 +45,7 @@ export interface ClassSymbol extends Symbol {
4545
isStatic: boolean;
4646
isAsync: boolean;
4747
line: number;
48+
content: string;
4849
}[];
4950
properties: {
5051
name: string;

scripts/sdk-indexer/typescript-symbol-extractor.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { describe, it, expect } from "vitest";
22
import { TypeScriptSymbolExtractor } from "./typescript-symbol-extractor";
33
import { ClassSymbol, TypeSymbol } from "./types";
44

5-
const extractor = new TypeScriptSymbolExtractor();
5+
const extractor = new TypeScriptSymbolExtractor(["test.ts"]);
66

77
function extractClassSymbols(code: string): ClassSymbol[] {
88
const symbols = extractor["extractFromFile"]("test.ts", code);

scripts/sdk-indexer/typescript-symbol-extractor.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,14 @@ import Parser from "tree-sitter";
22
import TypeScript from "tree-sitter-typescript";
33

44
import { BaseSymbolExtractor } from "./symbol-extractor";
5-
import { Symbol, VariableSymbol, FunctionSymbol, ClassSymbol, TypeSymbol } from "./types";
6-
import { writeFileSync } from "fs";
5+
import { Symbol, VariableSymbol, FunctionSymbol, ClassSymbol, TypeSymbol, SymbolExtractor } from "./types";
76

8-
export class TypeScriptSymbolExtractor extends BaseSymbolExtractor {
7+
export class TypeScriptSymbolExtractor extends BaseSymbolExtractor implements SymbolExtractor {
98
language: "typescript" = "typescript";
109
include = ["**/*.ts", "**/*.tsx"];
1110
exclude = ["**/node_modules/**", "**/dist/**", "**/build/**", "**/.git/**"];
1211

13-
constructor() {
12+
constructor(public files: string[]) {
1413
super();
1514
this.parser.setLanguage(TypeScript.typescript);
1615
}

0 commit comments

Comments
 (0)