Skip to content

Commit c7f9da0

Browse files
authored
fix(milvus): Fix upsert operations when autoId is false (#8501)
2 parents cd5f6fc + 9f491d6 commit c7f9da0

File tree

3 files changed

+267
-1
lines changed

3 files changed

+267
-1
lines changed

.changeset/puny-ways-cry.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@langchain/community": patch
3+
---
4+
5+
milvus: Fix upsert operations when autoId is false

libs/langchain-community/src/vectorstores/milvus.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,9 @@ export class Milvus extends VectorStore {
574574
});
575575
desc.schema.fields.forEach((field) => {
576576
this.fields.push(field.name);
577-
if (field.autoID) {
577+
// Only remove autoID fields from this.fields if we're using autoId mode
578+
// When autoId is false, we need to include the primary field for upsert operations
579+
if (field.autoID && this.autoId) {
578580
const index = this.fields.indexOf(field.name);
579581
if (index !== -1) {
580582
this.fields.splice(index, 1);
Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
/* eslint-disable @typescript-eslint/no-explicit-any */
2+
import { jest, test, expect, beforeEach } from "@jest/globals";
3+
import { FakeEmbeddings } from "@langchain/core/utils/testing";
4+
import { Document } from "@langchain/core/documents";
5+
import { ErrorCode } from "@zilliz/milvus2-sdk-node";
6+
7+
import { Milvus } from "../milvus.js";
8+
9+
const fields = [
10+
{
11+
name: "id",
12+
is_primary_key: true,
13+
autoID: true,
14+
data_type: 21,
15+
},
16+
{
17+
name: "text",
18+
is_primary_key: false,
19+
autoID: false,
20+
data_type: 21,
21+
},
22+
{
23+
name: "vector",
24+
is_primary_key: false,
25+
autoID: false,
26+
data_type: 101,
27+
},
28+
{
29+
name: "custom_field",
30+
is_primary_key: false,
31+
autoID: false,
32+
data_type: 21,
33+
},
34+
];
35+
36+
// Mock successful responses
37+
const mockSuccessResponse = {
38+
status: { error_code: ErrorCode.SUCCESS },
39+
};
40+
41+
// Mock the Milvus client
42+
const mockMilvusClient = {
43+
describeCollection: jest.fn(),
44+
insert: jest.fn(),
45+
upsert: jest.fn(),
46+
flushSync: jest.fn(),
47+
createCollection: jest.fn(),
48+
hasCollection: jest.fn(),
49+
loadCollection: jest.fn(),
50+
createPartition: jest.fn(),
51+
hasPartition: jest.fn(),
52+
loadPartition: jest.fn(),
53+
} as any;
54+
55+
jest.mock("@zilliz/milvus2-sdk-node", () => ({
56+
MilvusClient: jest.fn().mockImplementation(() => mockMilvusClient),
57+
}));
58+
59+
beforeEach(() => {
60+
jest.clearAllMocks();
61+
mockMilvusClient.describeCollection.mockResolvedValue({});
62+
mockMilvusClient.insert.mockResolvedValue(mockSuccessResponse);
63+
mockMilvusClient.upsert.mockResolvedValue(mockSuccessResponse);
64+
mockMilvusClient.flushSync.mockResolvedValue(mockSuccessResponse);
65+
mockMilvusClient.hasCollection.mockResolvedValue(mockSuccessResponse);
66+
mockMilvusClient.loadCollection.mockResolvedValue(mockSuccessResponse);
67+
mockMilvusClient.createCollection.mockResolvedValue(mockSuccessResponse);
68+
mockMilvusClient.createPartition.mockResolvedValue(mockSuccessResponse);
69+
mockMilvusClient.hasPartition.mockResolvedValue(mockSuccessResponse);
70+
mockMilvusClient.loadPartition.mockResolvedValue(mockSuccessResponse);
71+
});
72+
73+
test("Milvus upsert with autoId: false includes primary field from metadata", async () => {
74+
// Mock collection schema with autoID primary field
75+
const mockSchema = {
76+
schema: {
77+
fields,
78+
},
79+
};
80+
81+
mockMilvusClient.describeCollection.mockResolvedValue(mockSchema);
82+
83+
const embeddings = new FakeEmbeddings();
84+
const milvus = new Milvus(embeddings, {
85+
collectionName: "test_collection",
86+
autoId: false, // User wants to provide their own IDs for upsert
87+
primaryField: "id",
88+
textField: "text",
89+
vectorField: "vector",
90+
clientConfig: {
91+
address: "localhost:19530",
92+
},
93+
});
94+
95+
// Replace the client with our mock after construction
96+
(milvus as any).client = mockMilvusClient;
97+
98+
// Test document with primary field in metadata
99+
const documents = [
100+
new Document({
101+
pageContent: "Test content for upsert",
102+
metadata: {
103+
id: "test_id_123",
104+
custom_field: "custom_value",
105+
},
106+
}),
107+
];
108+
109+
await milvus.addDocuments(documents);
110+
111+
// Verify upsert was called (not insert)
112+
expect(mockMilvusClient.upsert).toHaveBeenCalledTimes(1);
113+
expect(mockMilvusClient.insert).not.toHaveBeenCalled();
114+
115+
// Verify the upsert call includes the primary field
116+
const upsertCall = mockMilvusClient.upsert.mock.calls[0][0];
117+
expect(upsertCall.collection_name).toBe("test_collection");
118+
expect(upsertCall.fields_data).toHaveLength(1);
119+
120+
const upsertData = upsertCall.fields_data[0];
121+
expect(upsertData.id).toBe("test_id_123"); // Primary field should be included
122+
expect(upsertData.text).toBe("Test content for upsert");
123+
expect(upsertData.vector).toBeDefined();
124+
expect(upsertData.custom_field).toBe("custom_value");
125+
});
126+
127+
test("Milvus upsert with autoId: false throws error when primary field missing from metadata", async () => {
128+
// Mock collection schema
129+
const mockSchema = {
130+
schema: {
131+
fields,
132+
},
133+
};
134+
135+
mockMilvusClient.describeCollection.mockResolvedValue(mockSchema);
136+
137+
const embeddings = new FakeEmbeddings();
138+
const milvus = new Milvus(embeddings, {
139+
collectionName: "test_collection",
140+
autoId: false,
141+
primaryField: "id",
142+
textField: "text",
143+
vectorField: "vector",
144+
clientConfig: {
145+
address: "localhost:19530",
146+
},
147+
});
148+
149+
// Replace the client with our mock after construction
150+
(milvus as any).client = mockMilvusClient;
151+
152+
// Test document WITHOUT primary field in metadata
153+
const documents = [
154+
new Document({
155+
pageContent: "Test content",
156+
metadata: {
157+
custom_field: "custom_value",
158+
// Missing 'id' field
159+
},
160+
}),
161+
];
162+
163+
await expect(milvus.addDocuments(documents)).rejects.toThrow(
164+
"The Collection's primaryField is configured with autoId=false, thus its value must be provided through metadata."
165+
);
166+
});
167+
168+
test("Milvus insert with autoId: true excludes primary field from data", async () => {
169+
// Mock collection schema
170+
const mockSchema = {
171+
schema: {
172+
fields,
173+
},
174+
};
175+
176+
mockMilvusClient.describeCollection.mockResolvedValue(mockSchema);
177+
178+
const embeddings = new FakeEmbeddings();
179+
const milvus = new Milvus(embeddings, {
180+
collectionName: "test_collection",
181+
autoId: true, // Auto-generate IDs
182+
primaryField: "id",
183+
textField: "text",
184+
vectorField: "vector",
185+
clientConfig: {
186+
address: "localhost:19530",
187+
},
188+
});
189+
190+
// Replace the client with our mock after construction
191+
(milvus as any).client = mockMilvusClient;
192+
193+
const documents = [
194+
new Document({
195+
pageContent: "Test content for insert",
196+
metadata: {
197+
custom_field: "custom_value",
198+
},
199+
}),
200+
];
201+
202+
await milvus.addDocuments(documents);
203+
204+
// Verify insert was called (not upsert)
205+
expect(mockMilvusClient.insert).toHaveBeenCalledTimes(1);
206+
expect(mockMilvusClient.upsert).not.toHaveBeenCalled();
207+
208+
// Verify the insert call excludes the primary field (since autoId: true)
209+
const insertCall = mockMilvusClient.insert.mock.calls[0][0];
210+
const insertData = insertCall.fields_data[0];
211+
expect(insertData.id).toBeUndefined(); // Primary field should be excluded
212+
expect(insertData.text).toBe("Test content for insert");
213+
expect(insertData.vector).toBeDefined();
214+
expect(insertData.custom_field).toBe("custom_value");
215+
});
216+
217+
test("Milvus upsert with provided IDs uses those IDs instead of metadata", async () => {
218+
// Mock collection schema
219+
const mockSchema = {
220+
schema: {
221+
fields,
222+
},
223+
};
224+
225+
mockMilvusClient.describeCollection.mockResolvedValue(mockSchema);
226+
227+
const embeddings = new FakeEmbeddings();
228+
const milvus = new Milvus(embeddings, {
229+
collectionName: "test_collection",
230+
autoId: false,
231+
primaryField: "id",
232+
textField: "text",
233+
vectorField: "vector",
234+
clientConfig: {
235+
address: "localhost:19530",
236+
},
237+
});
238+
239+
// Replace the client with our mock after construction
240+
(milvus as any).client = mockMilvusClient;
241+
242+
const documents = [
243+
new Document({
244+
pageContent: "Test content",
245+
metadata: {
246+
id: "metadata_id", // This should be ignored
247+
custom_field: "custom_value",
248+
},
249+
}),
250+
];
251+
252+
// Provide explicit IDs
253+
await milvus.addDocuments(documents, { ids: ["explicit_id"] });
254+
255+
// Verify upsert was called with explicit ID
256+
const upsertCall = mockMilvusClient.upsert.mock.calls[0][0];
257+
const upsertData = upsertCall.fields_data[0];
258+
expect(upsertData.id).toBe("explicit_id"); // Should use explicit ID, not metadata ID
259+
});

0 commit comments

Comments
 (0)