|
| 1 | +/* eslint-disable @typescript-eslint/no-explicit-any */ |
| 2 | +import { jest, test, expect, beforeEach } from "@jest/globals"; |
| 3 | +import { FakeEmbeddings } from "@langchain/core/utils/testing"; |
| 4 | +import { Document } from "@langchain/core/documents"; |
| 5 | +import { ErrorCode } from "@zilliz/milvus2-sdk-node"; |
| 6 | + |
| 7 | +import { Milvus } from "../milvus.js"; |
| 8 | + |
| 9 | +const fields = [ |
| 10 | + { |
| 11 | + name: "id", |
| 12 | + is_primary_key: true, |
| 13 | + autoID: true, |
| 14 | + data_type: 21, |
| 15 | + }, |
| 16 | + { |
| 17 | + name: "text", |
| 18 | + is_primary_key: false, |
| 19 | + autoID: false, |
| 20 | + data_type: 21, |
| 21 | + }, |
| 22 | + { |
| 23 | + name: "vector", |
| 24 | + is_primary_key: false, |
| 25 | + autoID: false, |
| 26 | + data_type: 101, |
| 27 | + }, |
| 28 | + { |
| 29 | + name: "custom_field", |
| 30 | + is_primary_key: false, |
| 31 | + autoID: false, |
| 32 | + data_type: 21, |
| 33 | + }, |
| 34 | +]; |
| 35 | + |
| 36 | +// Mock successful responses |
| 37 | +const mockSuccessResponse = { |
| 38 | + status: { error_code: ErrorCode.SUCCESS }, |
| 39 | +}; |
| 40 | + |
| 41 | +// Mock the Milvus client |
| 42 | +const mockMilvusClient = { |
| 43 | + describeCollection: jest.fn(), |
| 44 | + insert: jest.fn(), |
| 45 | + upsert: jest.fn(), |
| 46 | + flushSync: jest.fn(), |
| 47 | + createCollection: jest.fn(), |
| 48 | + hasCollection: jest.fn(), |
| 49 | + loadCollection: jest.fn(), |
| 50 | + createPartition: jest.fn(), |
| 51 | + hasPartition: jest.fn(), |
| 52 | + loadPartition: jest.fn(), |
| 53 | +} as any; |
| 54 | + |
| 55 | +jest.mock("@zilliz/milvus2-sdk-node", () => ({ |
| 56 | + MilvusClient: jest.fn().mockImplementation(() => mockMilvusClient), |
| 57 | +})); |
| 58 | + |
| 59 | +beforeEach(() => { |
| 60 | + jest.clearAllMocks(); |
| 61 | + mockMilvusClient.describeCollection.mockResolvedValue({}); |
| 62 | + mockMilvusClient.insert.mockResolvedValue(mockSuccessResponse); |
| 63 | + mockMilvusClient.upsert.mockResolvedValue(mockSuccessResponse); |
| 64 | + mockMilvusClient.flushSync.mockResolvedValue(mockSuccessResponse); |
| 65 | + mockMilvusClient.hasCollection.mockResolvedValue(mockSuccessResponse); |
| 66 | + mockMilvusClient.loadCollection.mockResolvedValue(mockSuccessResponse); |
| 67 | + mockMilvusClient.createCollection.mockResolvedValue(mockSuccessResponse); |
| 68 | + mockMilvusClient.createPartition.mockResolvedValue(mockSuccessResponse); |
| 69 | + mockMilvusClient.hasPartition.mockResolvedValue(mockSuccessResponse); |
| 70 | + mockMilvusClient.loadPartition.mockResolvedValue(mockSuccessResponse); |
| 71 | +}); |
| 72 | + |
| 73 | +test("Milvus upsert with autoId: false includes primary field from metadata", async () => { |
| 74 | + // Mock collection schema with autoID primary field |
| 75 | + const mockSchema = { |
| 76 | + schema: { |
| 77 | + fields, |
| 78 | + }, |
| 79 | + }; |
| 80 | + |
| 81 | + mockMilvusClient.describeCollection.mockResolvedValue(mockSchema); |
| 82 | + |
| 83 | + const embeddings = new FakeEmbeddings(); |
| 84 | + const milvus = new Milvus(embeddings, { |
| 85 | + collectionName: "test_collection", |
| 86 | + autoId: false, // User wants to provide their own IDs for upsert |
| 87 | + primaryField: "id", |
| 88 | + textField: "text", |
| 89 | + vectorField: "vector", |
| 90 | + clientConfig: { |
| 91 | + address: "localhost:19530", |
| 92 | + }, |
| 93 | + }); |
| 94 | + |
| 95 | + // Replace the client with our mock after construction |
| 96 | + (milvus as any).client = mockMilvusClient; |
| 97 | + |
| 98 | + // Test document with primary field in metadata |
| 99 | + const documents = [ |
| 100 | + new Document({ |
| 101 | + pageContent: "Test content for upsert", |
| 102 | + metadata: { |
| 103 | + id: "test_id_123", |
| 104 | + custom_field: "custom_value", |
| 105 | + }, |
| 106 | + }), |
| 107 | + ]; |
| 108 | + |
| 109 | + await milvus.addDocuments(documents); |
| 110 | + |
| 111 | + // Verify upsert was called (not insert) |
| 112 | + expect(mockMilvusClient.upsert).toHaveBeenCalledTimes(1); |
| 113 | + expect(mockMilvusClient.insert).not.toHaveBeenCalled(); |
| 114 | + |
| 115 | + // Verify the upsert call includes the primary field |
| 116 | + const upsertCall = mockMilvusClient.upsert.mock.calls[0][0]; |
| 117 | + expect(upsertCall.collection_name).toBe("test_collection"); |
| 118 | + expect(upsertCall.fields_data).toHaveLength(1); |
| 119 | + |
| 120 | + const upsertData = upsertCall.fields_data[0]; |
| 121 | + expect(upsertData.id).toBe("test_id_123"); // Primary field should be included |
| 122 | + expect(upsertData.text).toBe("Test content for upsert"); |
| 123 | + expect(upsertData.vector).toBeDefined(); |
| 124 | + expect(upsertData.custom_field).toBe("custom_value"); |
| 125 | +}); |
| 126 | + |
| 127 | +test("Milvus upsert with autoId: false throws error when primary field missing from metadata", async () => { |
| 128 | + // Mock collection schema |
| 129 | + const mockSchema = { |
| 130 | + schema: { |
| 131 | + fields, |
| 132 | + }, |
| 133 | + }; |
| 134 | + |
| 135 | + mockMilvusClient.describeCollection.mockResolvedValue(mockSchema); |
| 136 | + |
| 137 | + const embeddings = new FakeEmbeddings(); |
| 138 | + const milvus = new Milvus(embeddings, { |
| 139 | + collectionName: "test_collection", |
| 140 | + autoId: false, |
| 141 | + primaryField: "id", |
| 142 | + textField: "text", |
| 143 | + vectorField: "vector", |
| 144 | + clientConfig: { |
| 145 | + address: "localhost:19530", |
| 146 | + }, |
| 147 | + }); |
| 148 | + |
| 149 | + // Replace the client with our mock after construction |
| 150 | + (milvus as any).client = mockMilvusClient; |
| 151 | + |
| 152 | + // Test document WITHOUT primary field in metadata |
| 153 | + const documents = [ |
| 154 | + new Document({ |
| 155 | + pageContent: "Test content", |
| 156 | + metadata: { |
| 157 | + custom_field: "custom_value", |
| 158 | + // Missing 'id' field |
| 159 | + }, |
| 160 | + }), |
| 161 | + ]; |
| 162 | + |
| 163 | + await expect(milvus.addDocuments(documents)).rejects.toThrow( |
| 164 | + "The Collection's primaryField is configured with autoId=false, thus its value must be provided through metadata." |
| 165 | + ); |
| 166 | +}); |
| 167 | + |
| 168 | +test("Milvus insert with autoId: true excludes primary field from data", async () => { |
| 169 | + // Mock collection schema |
| 170 | + const mockSchema = { |
| 171 | + schema: { |
| 172 | + fields, |
| 173 | + }, |
| 174 | + }; |
| 175 | + |
| 176 | + mockMilvusClient.describeCollection.mockResolvedValue(mockSchema); |
| 177 | + |
| 178 | + const embeddings = new FakeEmbeddings(); |
| 179 | + const milvus = new Milvus(embeddings, { |
| 180 | + collectionName: "test_collection", |
| 181 | + autoId: true, // Auto-generate IDs |
| 182 | + primaryField: "id", |
| 183 | + textField: "text", |
| 184 | + vectorField: "vector", |
| 185 | + clientConfig: { |
| 186 | + address: "localhost:19530", |
| 187 | + }, |
| 188 | + }); |
| 189 | + |
| 190 | + // Replace the client with our mock after construction |
| 191 | + (milvus as any).client = mockMilvusClient; |
| 192 | + |
| 193 | + const documents = [ |
| 194 | + new Document({ |
| 195 | + pageContent: "Test content for insert", |
| 196 | + metadata: { |
| 197 | + custom_field: "custom_value", |
| 198 | + }, |
| 199 | + }), |
| 200 | + ]; |
| 201 | + |
| 202 | + await milvus.addDocuments(documents); |
| 203 | + |
| 204 | + // Verify insert was called (not upsert) |
| 205 | + expect(mockMilvusClient.insert).toHaveBeenCalledTimes(1); |
| 206 | + expect(mockMilvusClient.upsert).not.toHaveBeenCalled(); |
| 207 | + |
| 208 | + // Verify the insert call excludes the primary field (since autoId: true) |
| 209 | + const insertCall = mockMilvusClient.insert.mock.calls[0][0]; |
| 210 | + const insertData = insertCall.fields_data[0]; |
| 211 | + expect(insertData.id).toBeUndefined(); // Primary field should be excluded |
| 212 | + expect(insertData.text).toBe("Test content for insert"); |
| 213 | + expect(insertData.vector).toBeDefined(); |
| 214 | + expect(insertData.custom_field).toBe("custom_value"); |
| 215 | +}); |
| 216 | + |
| 217 | +test("Milvus upsert with provided IDs uses those IDs instead of metadata", async () => { |
| 218 | + // Mock collection schema |
| 219 | + const mockSchema = { |
| 220 | + schema: { |
| 221 | + fields, |
| 222 | + }, |
| 223 | + }; |
| 224 | + |
| 225 | + mockMilvusClient.describeCollection.mockResolvedValue(mockSchema); |
| 226 | + |
| 227 | + const embeddings = new FakeEmbeddings(); |
| 228 | + const milvus = new Milvus(embeddings, { |
| 229 | + collectionName: "test_collection", |
| 230 | + autoId: false, |
| 231 | + primaryField: "id", |
| 232 | + textField: "text", |
| 233 | + vectorField: "vector", |
| 234 | + clientConfig: { |
| 235 | + address: "localhost:19530", |
| 236 | + }, |
| 237 | + }); |
| 238 | + |
| 239 | + // Replace the client with our mock after construction |
| 240 | + (milvus as any).client = mockMilvusClient; |
| 241 | + |
| 242 | + const documents = [ |
| 243 | + new Document({ |
| 244 | + pageContent: "Test content", |
| 245 | + metadata: { |
| 246 | + id: "metadata_id", // This should be ignored |
| 247 | + custom_field: "custom_value", |
| 248 | + }, |
| 249 | + }), |
| 250 | + ]; |
| 251 | + |
| 252 | + // Provide explicit IDs |
| 253 | + await milvus.addDocuments(documents, { ids: ["explicit_id"] }); |
| 254 | + |
| 255 | + // Verify upsert was called with explicit ID |
| 256 | + const upsertCall = mockMilvusClient.upsert.mock.calls[0][0]; |
| 257 | + const upsertData = upsertCall.fields_data[0]; |
| 258 | + expect(upsertData.id).toBe("explicit_id"); // Should use explicit ID, not metadata ID |
| 259 | +}); |
0 commit comments