From a14c19d242ff44ecef319c9ee2f1142b9b9856bf Mon Sep 17 00:00:00 2001 From: Guillaume Loulier Date: Tue, 15 Jul 2025 17:48:22 +0200 Subject: [PATCH] [Store] Add SurrealDB --- examples/.env | 7 +- examples/compose.yaml | 8 + .../store/surrealdb-similarity-search.php | 75 ++++ src/store/CHANGELOG.md | 1 + src/store/doc/index.rst | 4 + src/store/src/Bridge/SurrealDB/Store.php | 173 +++++++++ .../tests/Bridge/SurrealDB/StoreTest.php | 347 ++++++++++++++++++ 7 files changed, 614 insertions(+), 1 deletion(-) create mode 100644 examples/store/surrealdb-similarity-search.php create mode 100644 src/store/src/Bridge/SurrealDB/Store.php create mode 100644 src/store/tests/Bridge/SurrealDB/StoreTest.php diff --git a/examples/.env b/examples/.env index 77e26828..d4f193ea 100644 --- a/examples/.env +++ b/examples/.env @@ -83,5 +83,10 @@ MEILISEARCH_API_KEY=changeMe LMSTUDIO_HOST_URL=http://127.0.0.1:1234 # Qdrant -QDRANT_HOST='http://127.0.0.1:6333' +QDRANT_HOST=http://127.0.0.1:6333 QDRANT_SERVICE_API_KEY=changeMe + +# SurrealDB +SURREALDB_HOST=http://127.0.0.1:8000 +SURREALDB_USER=symfony +SURREALDB_PASS=symfony diff --git a/examples/compose.yaml b/examples/compose.yaml index 7e2b3365..ba488e42 100644 --- a/examples/compose.yaml +++ b/examples/compose.yaml @@ -29,3 +29,11 @@ services: QDRANT__SERVICE__API_KEY: '${QDRAT_SERVICE_API_KEY:-changeMe}' ports: - '6333:6333' + + surrealdb: + image: surrealdb/surrealdb:v2 + command: ['start', '--user', 'symfony', '--pass', 'symfony'] + environment: + SURREAL_HTTP_MAX_KEY_BODY_SIZE: 49152 + ports: + - '8000:8000' diff --git a/examples/store/surrealdb-similarity-search.php b/examples/store/surrealdb-similarity-search.php new file mode 100644 index 00000000..82174d79 --- /dev/null +++ b/examples/store/surrealdb-similarity-search.php @@ -0,0 +1,75 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Agent\Agent; +use Symfony\AI\Agent\Toolbox\AgentProcessor; +use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch; +use Symfony\AI\Agent\Toolbox\Toolbox; +use Symfony\AI\Fixtures\Movies; +use Symfony\AI\Platform\Bridge\OpenAI\Embeddings; +use Symfony\AI\Platform\Bridge\OpenAI\GPT; +use Symfony\AI\Platform\Bridge\OpenAI\PlatformFactory; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Store\Bridge\SurrealDB\Store; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\TextDocument; +use Symfony\AI\Store\Document\Vectorizer; +use Symfony\AI\Store\Indexer; +use Symfony\Component\HttpClient\HttpClient; +use Symfony\Component\Uid\Uuid; + +require_once dirname(__DIR__).'/bootstrap.php'; + +// initialize the store +$store = new Store( + httpClient: HttpClient::create(), + endpointUrl: env('SURREALDB_HOST'), + user: env('SURREALDB_USER'), + password: env('SURREALDB_PASS'), + namespace: 'default', + database: 'movies', + table: 'movies', +); + +// initialize the table +$store->initialize(); + +// create embeddings and documents +$documents = []; +foreach (Movies::all() as $i => $movie) { + $documents[] = new TextDocument( + id: Uuid::v4(), + content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'], + metadata: new Metadata($movie), + ); +} + +// create embeddings for documents +$platform = PlatformFactory::create($_SERVER['OPENAI_API_KEY']); +$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings()); +$indexer = new Indexer($vectorizer, $store); +$indexer->index($documents); + +$model = new GPT(GPT::GPT_4O_MINI); + +$similaritySearch = new SimilaritySearch($platform, $embeddings, $store); +$toolbox = new Toolbox([$similaritySearch], logger: logger()); +$processor = new AgentProcessor($toolbox); +$agent = new Agent($platform, $model, [$processor], [$processor]); + +$messages = new MessageBag( + Message::forSystem('Please answer all user questions only using SimilaritySearch function.'), + Message::ofUser('Which movie fits the theme of technology?') +); +$response = $agent->call($messages); + +echo $response->getContent().\PHP_EOL; diff --git a/src/store/CHANGELOG.md b/src/store/CHANGELOG.md index 1af6b01a..6f8306dd 100644 --- a/src/store/CHANGELOG.md +++ b/src/store/CHANGELOG.md @@ -43,6 +43,7 @@ CHANGELOG - ChromaDB - Pinecone - Qdrant + - SurrealDB * Add Retrieval Augmented Generation (RAG) support: - Document embedding storage - Similarity search for relevant documents diff --git a/src/store/doc/index.rst b/src/store/doc/index.rst index 0de6d4f4..893e9034 100644 --- a/src/store/doc/index.rst +++ b/src/store/doc/index.rst @@ -41,6 +41,7 @@ You can find more advanced usage in combination with an Agent using the store fo * `Similarity Search with MongoDB (RAG)`_ * `Similarity Search with Pinecone (RAG)`_ * `Similarity Search with Meilisearch (RAG)`_ +* `Similarity Search with SurrealDB (RAG)`_ * `Similarity Search with memory storage (RAG)`_ * `Similarity Search with Qdrant (RAG)`_ @@ -54,6 +55,7 @@ Supported Stores * `Pinecone`_ (requires `probots-io/pinecone-php` as additional dependency) * `Postgres`_ (requires `ext-pdo`) * `Meilisearch`_ +* `SurrealDB`_ * `InMemory`_ * `Qdrant`_ @@ -93,6 +95,7 @@ This leads to a store implementing two methods:: .. _`Similarity Search with MongoDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/mongodb-similarity-search.php .. _`Similarity Search with Pinecone (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/pinecone-similarity-search.php .. _`Similarity Search with Meilisearch (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/meilisearch-similarity-search.php +.. _`Similarity Search with SurrealDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/surrealdb-similarity-search.php .. _`Similarity Search with memory storage (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/memory-similarity-search.php .. _`Similarity Search with Qdrant (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/qdrant-similarity-search.php .. _`Azure AI Search`: https://azure.microsoft.com/products/ai-services/ai-search @@ -102,6 +105,7 @@ This leads to a store implementing two methods:: .. _`Pinecone`: https://www.pinecone.io/ .. _`Postgres`: https://www.postgresql.org/about/news/pgvector-070-released-2852/ .. _`Meilisearch`: https://www.meilisearch.com/ +.. _`SurrealDB`: https://surrealdb.com/ .. _`InMemory`: https://www.php.net/manual/en/language.types.array.php .. _`Qdrant`: https://qdrant.tech/ .. _`GitHub`: https://github.com/symfony/ai/issues/16 diff --git a/src/store/src/Bridge/SurrealDB/Store.php b/src/store/src/Bridge/SurrealDB/Store.php new file mode 100644 index 00000000..85c7adfd --- /dev/null +++ b/src/store/src/Bridge/SurrealDB/Store.php @@ -0,0 +1,173 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Bridge\SurrealDB; + +use Symfony\AI\Platform\Vector\NullVector; +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\InvalidArgumentException; +use Symfony\AI\Store\Exception\RuntimeException; +use Symfony\AI\Store\InitializableStoreInterface; +use Symfony\AI\Store\VectorStoreInterface; +use Symfony\Component\Uid\Uuid; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +/** + * @author Guillaume Loulier + */ +final class Store implements InitializableStoreInterface, VectorStoreInterface +{ + private string $authenticationToken = ''; + + public function __construct( + private readonly HttpClientInterface $httpClient, + private readonly string $endpointUrl, + #[\SensitiveParameter] private readonly string $user, + #[\SensitiveParameter] private readonly string $password, + #[\SensitiveParameter] private readonly string $namespace, + #[\SensitiveParameter] private readonly string $database, + private readonly string $table = 'vectors', + private readonly string $vectorFieldName = '_vectors', + private readonly string $strategy = 'cosine', + private readonly int $embeddingsDimension = 1536, + private readonly bool $isNamespacedUser = false, + ) { + } + + public function add(VectorDocument ...$documents): void + { + foreach ($documents as $document) { + $this->request('POST', \sprintf('key/%s', $this->table), $this->convertToIndexableArray($document)); + } + } + + public function query(Vector $vector, array $options = [], ?float $minScore = null): array + { + $vectors = json_encode($vector->getData()); + + $results = $this->request('POST', 'sql', \sprintf( + 'SELECT id, %s, _metadata, vector::similarity::%s(%s, %s) AS distance FROM %s WHERE %s <|2|> %s;', + $this->vectorFieldName, $this->strategy, $this->vectorFieldName, $vectors, $this->table, $this->vectorFieldName, $vectors, + )); + + return array_map($this->convertToVectorDocument(...), $results[0]['result']); + } + + public function initialize(array $options = []): void + { + $this->authenticate(); + + $this->request('POST', 'sql', \sprintf( + 'DEFINE INDEX %s_vectors ON %s FIELDS %s MTREE DIMENSION %d DIST %s TYPE F32', + $this->table, $this->table, $this->vectorFieldName, $this->embeddingsDimension, $this->strategy + )); + } + + /** + * @param array|string $payload + * + * @return array + */ + private function request(string $method, string $endpoint, array|string $payload): array + { + $url = \sprintf('%s/%s', $this->endpointUrl, $endpoint); + + $finalPayload = [ + 'json' => $payload, + ]; + + if (\is_string($payload)) { + $finalPayload = [ + 'body' => $payload, + ]; + } + + $response = $this->httpClient->request($method, $url, array_merge($finalPayload, [ + 'headers' => [ + 'Accept' => 'application/json', + 'Content-Type' => 'application/json', + 'Surreal-NS' => $this->namespace, + 'Surreal-DB' => $this->database, + 'Authorization' => \sprintf('Bearer %s', $this->authenticationToken), + ], + ])); + + return $response->toArray(); + } + + /** + * @return array + */ + private function convertToIndexableArray(VectorDocument $document): array + { + return [ + 'id' => $document->id->toRfc4122(), + $this->vectorFieldName => $document->vector->getData(), + '_metadata' => array_merge($document->metadata->getArrayCopy(), [ + '_id' => $document->id->toRfc4122(), + ]), + ]; + } + + /** + * @param array $data + */ + private function convertToVectorDocument(array $data): VectorDocument + { + $id = $data['_metadata']['_id'] ?? throw new InvalidArgumentException('Missing "id" field in the document data'); + + $vector = !\array_key_exists($this->vectorFieldName, $data) || null === $data[$this->vectorFieldName] + ? new NullVector() + : new Vector($data[$this->vectorFieldName]); + + unset($data['_metadata']['_id']); + + return new VectorDocument( + id: Uuid::fromString($id), + vector: $vector, + metadata: new Metadata($data['_metadata']), + ); + } + + private function authenticate(): void + { + if ('' !== $this->authenticationToken) { + return; + } + + $authenticationPayload = [ + 'user' => $this->user, + 'pass' => $this->password, + ]; + + if ($this->isNamespacedUser) { + $authenticationPayload['ns'] = $this->namespace; + $authenticationPayload['db'] = $this->database; + } + + $authenticationResponse = $this->httpClient->request('POST', \sprintf('%s/signin', $this->endpointUrl), [ + 'headers' => [ + 'Accept' => 'application/json', + ], + 'json' => $authenticationPayload, + ]); + + $payload = $authenticationResponse->toArray(); + + if (!\array_key_exists('token', $payload)) { + throw new RuntimeException('The SurrealDB authentication response does not contain a token.'); + } + + $this->authenticationToken = $payload['token']; + } +} diff --git a/src/store/tests/Bridge/SurrealDB/StoreTest.php b/src/store/tests/Bridge/SurrealDB/StoreTest.php new file mode 100644 index 00000000..b89cc38f --- /dev/null +++ b/src/store/tests/Bridge/SurrealDB/StoreTest.php @@ -0,0 +1,347 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Bridge\SurrealDB; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Bridge\SurrealDB\Store; +use Symfony\AI\Store\Document\VectorDocument; +use Symfony\Component\HttpClient\Exception\ClientException; +use Symfony\Component\HttpClient\MockHttpClient; +use Symfony\Component\HttpClient\Response\JsonMockResponse; +use Symfony\Component\Uid\Uuid; + +#[CoversClass(Store::class)] +final class StoreTest extends TestCase +{ + public function testStoreCannotInitializeOnInvalidResponse(): void + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([], [ + 'http_code' => 400, + ]), + ], 'http://localhost:8000'); + + $store = new Store($httpClient, 'http://localhost:8000', 'test', 'test', 'test', 'test'); + + self::expectException(ClientException::class); + self::expectExceptionMessage('HTTP 400 returned for "http://localhost:8000/signin".'); + self::expectExceptionCode(400); + $store->initialize(); + } + + public function testStoreCannotInitializeOnValidAuthenticationResponse(): void + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'code' => 200, + 'details' => 'Authentication succeeded.', + 'token' => 'bar', + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([], [ + 'http_code' => 400, + ]), + ], 'http://localhost:8000'); + + $store = new Store($httpClient, 'http://localhost:8000', 'test', 'test', 'test', 'test'); + + self::expectException(ClientException::class); + self::expectExceptionMessage('HTTP 400 returned for "http://localhost:8000/sql".'); + self::expectExceptionCode(400); + $store->initialize(); + } + + public function testStoreCannotInitializeOnValidAuthenticationAndIndexResponse(): void + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'code' => 200, + 'details' => 'Authentication succeeded.', + 'token' => 'bar', + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([ + [ + 'result' => 'DEFINE INDEX test_vectors ON movies FIELDS _vectors MTREE DIMENSION 1275 DIST cosine TYPE F32', + 'status' => 'OK', + 'time' => '263.208µs', + ], + ], [ + 'http_code' => 200, + ]), + ], 'http://localhost:8000'); + + $store = new Store($httpClient, 'http://localhost:8000', 'test', 'test', 'test', 'test'); + + $store->initialize(); + + self::assertSame(2, $httpClient->getRequestsCount()); + } + + public function testStoreCannotAddOnInvalidResponse(): void + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'code' => 200, + 'details' => 'Authentication succeeded.', + 'token' => 'bar', + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([ + [ + 'result' => 'DEFINE INDEX test_vectors ON movies FIELDS _vectors MTREE DIMENSION 1275 DIST cosine TYPE F32', + 'status' => 'OK', + 'time' => '263.208µs', + ], + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([], [ + 'http_code' => 400, + ]), + ], 'http://localhost:8000'); + + $store = new Store($httpClient, 'http://localhost:8000', 'test', 'test', 'test', 'test', 'test'); + $store->initialize(); + + self::expectException(ClientException::class); + self::expectExceptionMessage('HTTP 400 returned for "http://localhost:8000/key/test".'); + self::expectExceptionCode(400); + $store->add(new VectorDocument(Uuid::v4(), new Vector([0.1, 0.2, 0.3]))); + } + + public function testStoreCannotAddOnInvalidAddResponse(): void + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'code' => 200, + 'details' => 'Authentication succeeded.', + 'token' => 'bar', + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([ + [ + 'result' => 'DEFINE INDEX test_vectors ON movies FIELDS _vectors MTREE DIMENSION 1275 DIST cosine TYPE F32', + 'status' => 'OK', + 'time' => '263.208µs', + ], + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([], [ + 'http_code' => 400, + ]), + ], 'http://localhost:8000'); + + $store = new Store($httpClient, 'http://localhost:8000', 'test', 'test', 'test', 'test', 'test'); + $store->initialize(); + + self::expectException(ClientException::class); + self::expectExceptionMessage('HTTP 400 returned for "http://localhost:8000/key/test".'); + self::expectExceptionCode(400); + $store->add(new VectorDocument(Uuid::v4(), new Vector(array_fill(0, 1275, 0.1)))); + } + + public function testStoreCanAdd(): void + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'code' => 200, + 'details' => 'Authentication succeeded.', + 'token' => 'bar', + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([ + [ + 'result' => 'DEFINE INDEX test_vectors ON movies FIELDS _vectors MTREE DIMENSION 1275 DIST cosine TYPE F32', + 'status' => 'OK', + 'time' => '263.208µs', + ], + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([ + [ + 'result' => [ + [ + 'id' => Uuid::v4()->toRfc4122(), + '_vectors' => [0.1, 0.1, 0.1], + '_metadata' => [ + '_id' => Uuid::v4()->toRfc4122(), + ], + ], + [ + 'id' => Uuid::v4()->toRfc4122(), + '_vectors' => [0.1, 0.1, 0.1], + '_metadata' => [ + '_id' => Uuid::v4()->toRfc4122(), + ], + ], + ], + 'status' => 'OK', + 'time' => '263.208µs', + ], + ], [ + 'http_code' => 200, + ]), + ], 'http://localhost:8000'); + + $store = new Store($httpClient, 'http://localhost:8000', 'test', 'test', 'test', 'test', 'test'); + $store->initialize(); + + $store->add(new VectorDocument(Uuid::v4(), new Vector(array_fill(0, 1275, 0.1)))); + + self::assertSame(3, $httpClient->getRequestsCount()); + } + + public function testStoreCannotQueryOnInvalidResponse(): void + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'code' => 200, + 'details' => 'Authentication succeeded.', + 'token' => 'bar', + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([ + [ + 'result' => 'DEFINE INDEX test_vectors ON movies FIELDS _vectors MTREE DIMENSION 1275 DIST cosine TYPE F32', + 'status' => 'OK', + 'time' => '263.208µs', + ], + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([ + [ + 'result' => [ + [ + 'id' => Uuid::v4()->toRfc4122(), + '_vectors' => [0.1, 0.1, 0.1], + '_metadata' => [ + '_id' => Uuid::v4()->toRfc4122(), + ], + ], + [ + 'id' => Uuid::v4()->toRfc4122(), + '_vectors' => [0.1, 0.1, 0.1], + '_metadata' => [ + '_id' => Uuid::v4()->toRfc4122(), + ], + ], + ], + 'status' => 'OK', + 'time' => '263.208µs', + ], + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([], [ + 'http_code' => 400, + ]), + ], 'http://localhost:8000'); + + $store = new Store($httpClient, 'http://localhost:8000', 'test', 'test', 'test', 'test', 'test'); + $store->initialize(); + + $store->add(new VectorDocument(Uuid::v4(), new Vector(array_fill(0, 1275, 0.1)))); + + self::expectException(ClientException::class); + self::expectExceptionMessage('HTTP 400 returned for "http://localhost:8000/sql".'); + self::expectExceptionCode(400); + $store->query(new Vector(array_fill(0, 1275, 0.1))); + } + + public function testStoreCanQueryOnValidEmbeddings(): void + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'code' => 200, + 'details' => 'Authentication succeeded.', + 'token' => 'bar', + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([ + [ + 'result' => [ + [ + 'id' => Uuid::v4()->toRfc4122(), + '_vectors' => [0.1, 0.1, 0.1], + '_metadata' => [ + '_id' => Uuid::v4()->toRfc4122(), + ], + ], + [ + 'id' => Uuid::v4()->toRfc4122(), + '_vectors' => [0.1, 0.1, 0.1], + '_metadata' => [ + '_id' => Uuid::v4()->toRfc4122(), + ], + ], + ], + 'status' => 'OK', + 'time' => '263.208µs', + ], + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([ + 'code' => 200, + 'details' => 'Authentication succeeded.', + 'token' => 'bar', + ], [ + 'http_code' => 200, + ]), + new JsonMockResponse([ + [ + 'result' => [ + [ + 'id' => Uuid::v4()->toRfc4122(), + '_vectors' => [0.1, 0.1, 0.1], + '_metadata' => [ + '_id' => Uuid::v4()->toRfc4122(), + ], + ], + [ + 'id' => Uuid::v4()->toRfc4122(), + '_vectors' => [0.1, 0.1, 0.1], + '_metadata' => [ + '_id' => Uuid::v4()->toRfc4122(), + ], + ], + ], + 'status' => 'OK', + 'time' => '263.208µs', + ], + ], [ + 'http_code' => 200, + ]), + ], 'http://localhost:8000'); + + $store = new Store($httpClient, 'http://localhost:8000', 'test', 'test', 'test', 'test', 'test'); + + $store->add(new VectorDocument(Uuid::v4(), new Vector(array_fill(0, 1275, 0.1)))); + + $results = $store->query(new Vector(array_fill(0, 1275, 0.1))); + + self::assertCount(2, $results); + } +}