pqnet · pqnet · Feb 26, 2025
diff --git a/README.md b/README.md
@@ -15,16 +15,37 @@ docker run -it --rm -p 3000:3000 pqnet/minivec
 ```
 
 add new documents to the store with a HTTP POST:
+
 ```bash
 curl -H "Content-Type: application/json" -d  '{ "documents": [{ "content": "hello world", "metadata":{} }]}' localhost:3000/api/documents
 ```
 
 And search them using HTTP GET:
+
 ```bash
 curl 'localhost:3000/api/documents?q=hello'
 ```
 
+### Backup and Restore
+
+Backup the database to a JSON file:
+
+```bash
+curl 'localhost:3000/api/backup' > minivec-backup.json
+```
+
+Restore from a backup file:
+
+```bash
+# Restore while preserving existing documents
+curl -X POST -H "Content-Type: application/json" -d @minivec-backup.json localhost:3000/api/backup
+
+# Restore and clear existing documents
+curl -X POST -H "Content-Type: application/json" -d @minivec-backup.json 'localhost:3000/api/backup?clear=true'
+```
+
 ## Persistence
+
 By default models are downloaded in the `/models` directory and the database is saved in the `/app/.data` directory (inside the container).
 To allow re-using model cache, or to persist the saved vectors between runs, you can map host directories or mount named volumes at these paths, e.g.
 
@@ -33,13 +54,17 @@ podman run -it --rm -p 3000:3000 -v minivec-models-cache:/models -v minivec-data
 ```
 
 (similarly with `docker`)
+
 ```bash
 docker run -it --rm -p 3000:3000 -v minivec-models-cache:/models -v minivec-data:/app/.data pqnet/minivec
 ```
 
 ## Configuration
+
 Use environment variables to configure which models to load. see [nitro.config.ts](nitro.config.ts) for a full list of the usable variables
+
 ### Model choice
+
 `bge-m3` (for embedding) and `bge-reranker-v2-m3` (for reranking) are automatically downloaded and used by the container.
 It is possible to choose different models by specifying a local file name, an http/https URL or an huggingface repository to download the models automatically.
 See https://node-llama-cpp.withcat.ai/guide/downloading-models for the list of compatible URL schemes and parameters.

diff --git a/server/api/backup/index.get.ts b/server/api/backup/index.get.ts
@@ -0,0 +1,41 @@
+export default defineLazyEventHandler(async () => {
+  const db = await getDb();
+
+  return defineEventHandler(async (event) => {
+    try {
+      // Get all documents from the database
+      const documents = await db.sql<{
+        rows: Array<{ id: number; content: string; metadata: string }>;
+      }>`
+        SELECT id, content, json(metadata) metadata FROM documents
+      `;
+
+      // Process the documents to have proper metadata objects
+      const processedDocuments = documents.rows.map((doc) => ({
+        id: doc.id,
+        content: doc.content,
+        metadata: JSON.parse(doc.metadata),
+      }));
+
+      // Set response headers for file download
+      setResponseHeaders(event, {
+        "Content-Type": "application/json",
+        "Content-Disposition": `attachment; filename="minivec-backup-${new Date()
+          .toISOString()
+          .replace(/:/g, "-")}.json"`,
+      });
+
+      return {
+        documents: processedDocuments,
+      };
+    } catch (error) {
+      console.error("Error creating backup:", error);
+      throw createError({
+        statusCode: 500,
+        statusMessage: "Internal Server Error",
+        cause: error,
+        message: "Failed to create backup",
+      });
+    }
+  });
+});
diff --git a/server/api/backup/index.post.ts b/server/api/backup/index.post.ts
@@ -0,0 +1,90 @@
+import { Primitive } from "db0";
+
+export default defineLazyEventHandler(async () => {
+  const db = await getDb();
+  const { embeddingContext } = await useAiContext();
+
+  return defineEventHandler(async (event) => {
+    // Check if write operations are disabled
+    const { disableWrite } = useRuntimeConfig(event);
+    if (disableWrite) {
+      throw createError({
+        statusCode: 403,
+        statusMessage: "Forbidden",
+        message: "Write operations are disabled",
+      });
+    }
+
+    try {
+      const { documents } = await readBody<{
+        documents: Array<{ id?: number; content: string; metadata: unknown }>;
+      }>(event);
+
+      if (!Array.isArray(documents) || documents.length === 0) {
+        throw createError({
+          statusCode: 400,
+          statusMessage: "Bad Request",
+          message: "No documents provided in backup file",
+        });
+      }
+
+      // Begin transaction
+      await db.sql`BEGIN TRANSACTION`;
+
+      try {
+        // Clear existing documents if requested (optional query parameter)
+        const query = getQuery(event);
+        const clearExisting = query.clear === "true";
+
+        if (clearExisting) {
+          await db.sql`DELETE FROM documents`;
+        }
+
+        // Process documents one by one
+        const embeddedDocuments = await Promise.all(
+          documents.map(async (document) => {
+            const embedding = await embeddingContext.getEmbeddingFor(
+              document.content
+            );
+            return { ...document, embedding };
+          })
+        );
+
+        const prepst = db.prepare(
+          `insert into documents (content, metadata, embedding) values (?, jsonb(?), ?)`
+        );
+
+        for (const { content, metadata, embedding } of embeddedDocuments) {
+          const { success } = await prepst.run(
+            content,
+            JSON.stringify(metadata),
+            new Float32Array(embedding.vector) as unknown as Primitive
+          );
+          if (!success) {
+            throw new Error("Failed to insert document");
+          }
+        }
+
+        // Commit the transaction
+        await db.sql`COMMIT`;
+
+        return {
+          message: `Successfully restored ${documents.length} documents`,
+          cleared: clearExisting,
+        };
+      } catch (error) {
+        // Rollback on error
+        await db.sql`ROLLBACK`;
+        throw error;
+      }
+    } catch (error) {
+      console.error("Error restoring backup:", error);
+      throw createError({
+        statusCode: 500,
+        statusMessage: "Internal Server Error",
+        cause: error,
+        message: "Failed to restore backup",
+      });
+    }
+  });
+});