Skip to content

Commit ebf3695

Browse files
authored
Implement Create/Drop Index/Metadata index in GO (#13791)
### What problem does this PR solve? Implement Create/Drop Index/Metadata index in GO New API handling in GO: POST/kb/index DELETE /kb/index POST /tenant/doc_meta_index DELETE /tenant/doc_meta_index CREATE INDEX FOR DATASET 'dataset_name' VECTOR_SIZE 1024; DROP INDEX FOR DATASET 'dataset_name'; CREATE INDEX DOC_META; DROP INDEX DOC_META; ### Type of change - [x] Refactoring
1 parent d19ca71 commit ebf3695

File tree

20 files changed

+1165
-30
lines changed

20 files changed

+1165
-30
lines changed

admin/client/parser.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@
8484
| list_user_chats
8585
| create_user_chat
8686
| drop_user_chat
87+
| create_index
88+
| drop_index
89+
| create_doc_meta_index
90+
| drop_doc_meta_index
8791
| list_user_model_providers
8892
| list_user_default_models
8993
| parse_dataset_docs
@@ -176,6 +180,7 @@
176180
INTO: "INTO"i
177181
IN: "IN"i
178182
WITH: "WITH"i
183+
VECTOR_SIZE: "VECTOR_SIZE"i
179184
PARSER: "PARSER"i
180185
PIPELINE: "PIPELINE"i
181186
SEARCH: "SEARCH"i
@@ -197,6 +202,8 @@
197202
LICENSE: "LICENSE"i
198203
CHECK: "CHECK"i
199204
CONFIG: "CONFIG"i
205+
INDEX: "INDEX"i
206+
DOC_META: "DOC_META"i
200207
CHUNK: "CHUNK"i
201208
CHUNKS: "CHUNKS"i
202209
GET: "GET"i
@@ -323,6 +330,10 @@
323330
list_user_chats: LIST CHATS ";"
324331
create_user_chat: CREATE CHAT quoted_string ";"
325332
drop_user_chat: DROP CHAT quoted_string ";"
333+
create_index: CREATE INDEX FOR DATASET quoted_string VECTOR_SIZE NUMBER ";"
334+
drop_index: DROP INDEX FOR DATASET quoted_string ";"
335+
create_doc_meta_index: CREATE INDEX DOC_META ";"
336+
drop_doc_meta_index: DROP INDEX DOC_META ";"
326337
create_chat_session: CREATE CHAT quoted_string SESSION ";"
327338
drop_chat_session: DROP CHAT quoted_string SESSION quoted_string ";"
328339
list_chat_sessions: LIST CHAT quoted_string SESSIONS ";"
@@ -650,6 +661,31 @@ def drop_user_chat(self, items):
650661
chat_name = items[2].children[0].strip("'\"")
651662
return {"type": "drop_user_chat", "chat_name": chat_name}
652663

664+
def create_index(self, items):
665+
# items: CREATE, INDEX, FOR, DATASET, quoted_string, VECTOR_SIZE, NUMBER, ";"
666+
dataset_name = None
667+
vector_size = None
668+
for i, item in enumerate(items):
669+
if hasattr(item, 'data') and item.data == 'quoted_string':
670+
dataset_name = item.children[0].strip("'\"")
671+
if hasattr(item, 'type') and item.type == 'NUMBER':
672+
if i > 0 and items[i-1].type == 'VECTOR_SIZE':
673+
vector_size = int(item)
674+
return {"type": "create_index", "dataset_name": dataset_name, "vector_size": vector_size}
675+
676+
def drop_index(self, items):
677+
dataset_name = None
678+
for item in items:
679+
if hasattr(item, 'data') and item.data == 'quoted_string':
680+
dataset_name = item.children[0].strip("'\"")
681+
return {"type": "drop_index", "dataset_name": dataset_name}
682+
683+
def create_doc_meta_index(self, items):
684+
return {"type": "create_doc_meta_index"}
685+
686+
def drop_doc_meta_index(self, items):
687+
return {"type": "drop_doc_meta_index"}
688+
653689
def list_user_model_providers(self, items):
654690
return {"type": "list_user_model_providers"}
655691

admin/client/ragflow_client.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,75 @@ def create_user_chat(self, command):
10801080
else:
10811081
print(f"Fail to create chat {chat_name}, code: {res_json['code']}, message: {res_json['message']}")
10821082

1083+
def create_index(self, command):
1084+
if self.server_type != "user":
1085+
print("This command is only allowed in USER mode")
1086+
return
1087+
dataset_name = command["dataset_name"]
1088+
vector_size = command.get("vector_size")
1089+
if not vector_size:
1090+
print("vector_size is required")
1091+
return
1092+
# Get dataset ID by name
1093+
dataset_id = self._get_dataset_id(dataset_name)
1094+
if dataset_id is None:
1095+
return
1096+
# Build payload
1097+
payload = {"kb_id": dataset_id, "vector_size": vector_size}
1098+
# Call API
1099+
response = self.http_client.request("POST", "/kb/index", json_body=payload,
1100+
use_api_base=False, auth_kind="web")
1101+
res_json = response.json()
1102+
if response.status_code == 200 and res_json.get("code") == 0:
1103+
print(f"Success to create index for dataset: {dataset_name}")
1104+
else:
1105+
print(f"Fail to create index for dataset {dataset_name}, code: {res_json.get('code')}, message: {res_json.get('message')}")
1106+
1107+
def drop_index(self, command):
1108+
if self.server_type != "user":
1109+
print("This command is only allowed in USER mode")
1110+
return
1111+
dataset_name = command["dataset_name"]
1112+
# Get dataset ID by name
1113+
dataset_id = self._get_dataset_id(dataset_name)
1114+
if dataset_id is None:
1115+
return
1116+
# Call API to delete index
1117+
payload = {"kb_id": dataset_id}
1118+
response = self.http_client.request("DELETE", "/kb/index", json_body=payload,
1119+
use_api_base=False, auth_kind="web")
1120+
res_json = response.json()
1121+
if response.status_code == 200 and res_json.get("code") == 0:
1122+
print(f"Success to drop index for dataset: {dataset_name}")
1123+
else:
1124+
print(f"Fail to drop index for dataset {dataset_name}, code: {res_json.get('code')}, message: {res_json.get('message')}")
1125+
1126+
def create_doc_meta_index(self, command):
1127+
if self.server_type != "user":
1128+
print("This command is only allowed in USER mode")
1129+
return
1130+
# Call API to create doc meta index
1131+
response = self.http_client.request("POST", "/tenant/doc_meta_index",
1132+
use_api_base=False, auth_kind="web")
1133+
res_json = response.json()
1134+
if response.status_code == 200 and res_json.get("code") == 0:
1135+
print("Success to create doc meta index")
1136+
else:
1137+
print(f"Fail to create doc meta index, code: {res_json.get('code')}, message: {res_json.get('message')}")
1138+
1139+
def drop_doc_meta_index(self, command):
1140+
if self.server_type != "user":
1141+
print("This command is only allowed in USER mode")
1142+
return
1143+
# Call API to delete doc meta index
1144+
response = self.http_client.request("DELETE", "/tenant/doc_meta_index",
1145+
use_api_base=False, auth_kind="web")
1146+
res_json = response.json()
1147+
if response.status_code == 200 and res_json.get("code") == 0:
1148+
print("Success to drop doc meta index")
1149+
else:
1150+
print(f"Fail to drop doc meta index, code: {res_json.get('code')}, message: {res_json.get('message')}")
1151+
10831152
def drop_user_chat(self, command):
10841153
if self.server_type != "user":
10851154
print("This command is only allowed in USER mode")
@@ -1804,6 +1873,14 @@ def run_command(client: RAGFlowClient, command_dict: dict):
18041873
client.create_user_chat(command_dict)
18051874
case "drop_user_chat":
18061875
client.drop_user_chat(command_dict)
1876+
case "create_index":
1877+
client.create_index(command_dict)
1878+
case "drop_index":
1879+
client.drop_index(command_dict)
1880+
case "create_doc_meta_index":
1881+
client.create_doc_meta_index(command_dict)
1882+
case "drop_doc_meta_index":
1883+
client.drop_doc_meta_index(command_dict)
18071884
case "create_chat_session":
18081885
client.create_chat_session(command_dict)
18091886
case "drop_chat_session":
@@ -1887,6 +1964,10 @@ def show_help():
18871964
LIST METADATA SUMMARY OF DATASET <dataset> DOCUMENTS <doc_id>[, <doc_id>]*
18881965
GET CHUNK <chunk_id>
18891966
LIST CHUNKS OF DOCUMENT <doc_id> [PAGE <page>] [SIZE <size>] [KEYWORDS <keywords>] [AVAILABLE <0|1>]
1967+
CREATE INDEX FOR DATASET <dataset> VECTOR_SIZE <vector_size>
1968+
DROP INDEX FOR DATASET <dataset>
1969+
CREATE INDEX DOC_META
1970+
DROP INDEX DOC_META
18901971
18911972
Meta Commands:
18921973
\\?, \\h, \\help Show this help

internal/cli/cli.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,10 @@ Commands (User Mode):
564564
SET TOKEN 'token_value'; - Set and validate API token
565565
SHOW TOKEN; - Show current API token
566566
UNSET TOKEN; - Remove current API token
567+
CREATE INDEX FOR DATASET 'name' VECTOR_SIZE N; - Create index for dataset
568+
DROP INDEX FOR DATASET 'name'; - Drop index for dataset
569+
CREATE INDEX DOC_META; - Create doc meta index
570+
DROP INDEX DOC_META; - Drop doc meta index
567571
568572
Commands (Admin Mode):
569573
LIST USERS; - List all users

internal/cli/client.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,14 @@ func (c *RAGFlowClient) ExecuteUserCommand(cmd *Command) (ResponseIf, error) {
372372
return c.UnsetToken(cmd)
373373
case "show_version":
374374
return c.ShowServerVersion(cmd)
375+
case "create_index":
376+
return c.CreateIndex(cmd)
377+
case "drop_index":
378+
return c.DropIndex(cmd)
379+
case "create_doc_meta_index":
380+
return c.CreateDocMetaIndex(cmd)
381+
case "drop_doc_meta_index":
382+
return c.DropDocMetaIndex(cmd)
375383
// TODO: Implement other commands
376384
default:
377385
return nil, fmt.Errorf("command '%s' would be executed with API", cmd.Type)

internal/cli/lexer.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,12 @@ func (l *Lexer) lookupIdent(ident string) Token {
293293
return Token{Type: TokenToken, Value: ident}
294294
case "TOKENS":
295295
return Token{Type: TokenTokens, Value: ident}
296+
case "INDEX":
297+
return Token{Type: TokenIndex, Value: ident}
298+
case "VECTOR_SIZE":
299+
return Token{Type: TokenVectorSize, Value: ident}
300+
case "DOC_META":
301+
return Token{Type: TokenDocMeta, Value: ident}
296302
default:
297303
return Token{Type: TokenIdentifier, Value: ident}
298304
}

internal/cli/parser.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ func (p *Parser) expectSemicolon() error {
208208
}
209209

210210
func isKeyword(tokenType int) bool {
211-
return tokenType >= TokenLogin && tokenType <= TokenPing
211+
return tokenType >= TokenLogin && tokenType <= TokenDocMeta
212212
}
213213

214214
// Helper functions for parsing

internal/cli/types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ const (
9898
TokenToken
9999
TokenTokens
100100
TokenUnset
101+
TokenIndex
102+
TokenVectorSize
103+
TokenDocMeta
101104

102105
// Literals
103106
TokenIdentifier

0 commit comments

Comments
 (0)