Skip to content

Commit e9b7322

Browse files
committed
feat(store): surrealDB started
1 parent aee7a35 commit e9b7322

File tree

6 files changed

+675
-0
lines changed

6 files changed

+675
-0
lines changed

examples/.env

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,8 @@ MARIADB_URI=pdo-mysql://[email protected]:3309/my_database
7474
# Meilisearch
7575
MEILISEARCH_HOST=http://127.0.0.1:7700
7676
MEILISEARCH_API_KEY=changeMe
77+
78+
# SurrealDB
79+
SURREALDB_HOST=http://127.0.0.1:8000
80+
SURREALDB_USER=symfony
81+
SURREALDB_PASS=symfony

examples/compose.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,9 @@ services:
2222
MEILI_MASTER_KEY: "${MEILISEARCH_MASTER_KEY:-changeMe}"
2323
ports:
2424
- "7700:7700"
25+
26+
surrealdb:
27+
image: surrealdb/surrealdb:v2
28+
command: ['start', '--user', 'symfony', '--pass', 'symfony']
29+
ports:
30+
- "8000:8000"
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Agent\Toolbox\AgentProcessor;
14+
use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch;
15+
use Symfony\AI\Agent\Toolbox\Toolbox;
16+
use Symfony\AI\Fixtures\Movies;
17+
use Symfony\AI\Platform\Bridge\OpenAI\Embeddings;
18+
use Symfony\AI\Platform\Bridge\OpenAI\GPT;
19+
use Symfony\AI\Platform\Bridge\OpenAI\PlatformFactory;
20+
use Symfony\AI\Platform\Message\Message;
21+
use Symfony\AI\Platform\Message\MessageBag;
22+
use Symfony\AI\Store\Bridge\SurrealDB\Store;
23+
use Symfony\AI\Store\Document\Metadata;
24+
use Symfony\AI\Store\Document\TextDocument;
25+
use Symfony\AI\Store\Document\Vectorizer;
26+
use Symfony\AI\Store\Indexer;
27+
use Symfony\Component\Dotenv\Dotenv;
28+
use Symfony\Component\HttpClient\HttpClient;
29+
use Symfony\Component\Uid\Uuid;
30+
31+
require_once dirname(__DIR__).'/vendor/autoload.php';
32+
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');
33+
34+
if (!isset($_SERVER['OPENAI_API_KEY'], $_SERVER['SURREALDB_HOST'], $_SERVER['SURREALDB_USER'], $_SERVER['SURREALDB_PASS'])) {
35+
echo 'Please set OPENAI_API_KEY, SURREALDB_HOST, SURREALDB_USER and SURREALDB_PASS environment variables.'.\PHP_EOL;
36+
exit(1);
37+
}
38+
39+
// initialize the store
40+
$store = new Store(
41+
httpClient: HttpClient::create(),
42+
endpointUrl: $_SERVER['SURREALDB_HOST'],
43+
user: $_SERVER['SURREALDB_USER'],
44+
password: $_SERVER['SURREALDB_PASS'],
45+
namespace: 'default',
46+
database: 'movies',
47+
table: 'movies',
48+
);
49+
50+
// initialize the table
51+
$store->initialize();
52+
53+
// create embeddings and documents
54+
$documents = [];
55+
foreach (Movies::all() as $i => $movie) {
56+
$documents[] = new TextDocument(
57+
id: Uuid::v4(),
58+
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
59+
metadata: new Metadata($movie),
60+
);
61+
}
62+
63+
// create embeddings for documents
64+
$platform = PlatformFactory::create($_SERVER['OPENAI_API_KEY']);
65+
$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings());
66+
$indexer = new Indexer($vectorizer, $store);
67+
$indexer->index($documents);
68+
69+
$model = new GPT(GPT::GPT_4O_MINI);
70+
71+
$similaritySearch = new SimilaritySearch($platform, $embeddings, $store);
72+
$toolbox = Toolbox::create($similaritySearch);
73+
$processor = new AgentProcessor($toolbox);
74+
$agent = new Agent($platform, $model, [$processor], [$processor]);
75+
76+
$messages = new MessageBag(
77+
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
78+
Message::ofUser('Which movie fits the theme of technology?')
79+
);
80+
$response = $agent->call($messages);
81+
82+
echo $response->getContent().\PHP_EOL;

src/store/doc/index.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ You can find more advanced usage in combination with an Agent using the store fo
4141
* `Similarity Search with MongoDB (RAG)`_
4242
* `Similarity Search with Pinecone (RAG)`_
4343
* `Similarity Search with Meilisearch (RAG)`_
44+
* `Similarity Search with SurrealDB (RAG)`_
4445
* `Similarity Search with memory storage (RAG)`_
4546

4647
Supported Stores
@@ -53,6 +54,7 @@ Supported Stores
5354
* `Pinecone`_ (requires `probots-io/pinecone-php` as additional dependency)
5455
* `Postgres`_ (requires `ext-pdo`)
5556
* `Meilisearch`_
57+
* `SurrealDB`_
5658
* `InMemory`_
5759

5860
.. note::
@@ -91,6 +93,7 @@ This leads to a store implementing two methods::
9193
.. _`Similarity Search with MongoDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/mongodb-similarity-search.php
9294
.. _`Similarity Search with Pinecone (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/pinecone-similarity-search.php
9395
.. _`Similarity Search with Meilisearch (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/meilisearch-similarity-search.php
96+
.. _`Similarity Search with SurrealDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/surrealdb-similarity-search.php
9497
.. _`Similarity Search with memory storage (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/memory-similarity-search.php
9598
.. _`Azure AI Search`: https://azure.microsoft.com/products/ai-services/ai-search
9699
.. _`Chroma`: https://www.trychroma.com/
@@ -99,5 +102,6 @@ This leads to a store implementing two methods::
99102
.. _`Pinecone`: https://www.pinecone.io/
100103
.. _`Postgres`: https://www.postgresql.org/about/news/pgvector-070-released-2852/
101104
.. _`Meilisearch`: https://www.meilisearch.com/
105+
.. _`SurrealDB`: https://surrealdb.com/
102106
.. _`InMemory`: https://www.php.net/manual/en/language.types.array.php
103107
.. _`GitHub`: https://github.com/symfony/ai/issues/16
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Bridge\SurrealDB;
13+
14+
use Symfony\AI\Platform\Vector\NullVector;
15+
use Symfony\AI\Platform\Vector\Vector;
16+
use Symfony\AI\Store\Document\Metadata;
17+
use Symfony\AI\Store\Document\VectorDocument;
18+
use Symfony\AI\Store\Exception\InvalidArgumentException;
19+
use Symfony\AI\Store\Exception\RuntimeException;
20+
use Symfony\AI\Store\InitializableStoreInterface;
21+
use Symfony\AI\Store\VectorStoreInterface;
22+
use Symfony\Component\Uid\Uuid;
23+
use Symfony\Contracts\HttpClient\HttpClientInterface;
24+
25+
/**
26+
* @author Guillaume Loulier <[email protected]>
27+
*/
28+
final class Store implements InitializableStoreInterface, VectorStoreInterface
29+
{
30+
private const MAXIMUM_EMBEDDINGS_DIMENSIONS = 1275;
31+
32+
public function __construct(
33+
private readonly HttpClientInterface $httpClient,
34+
private readonly string $endpointUrl,
35+
#[\SensitiveParameter]
36+
private readonly string $user,
37+
#[\SensitiveParameter]
38+
private readonly string $password,
39+
#[\SensitiveParameter]
40+
private readonly string $namespace,
41+
#[\SensitiveParameter]
42+
private readonly string $database,
43+
private readonly string $table = 'vectors',
44+
private readonly string $vectorFieldName = '_vectors',
45+
private readonly string $strategy = 'cosine',
46+
private readonly int $embeddingsDimension = self::MAXIMUM_EMBEDDINGS_DIMENSIONS,
47+
) {
48+
}
49+
50+
public function add(VectorDocument ...$documents): void
51+
{
52+
$authenticationToken = $this->authenticate([]);
53+
54+
foreach ($documents as $document) {
55+
if (self::MAXIMUM_EMBEDDINGS_DIMENSIONS < $document->vector->getDimensions()) {
56+
throw new InvalidArgumentException(\sprintf('The SurrealDB HTTP API does not support embeddings with more than %d dimensions, found %d', self::MAXIMUM_EMBEDDINGS_DIMENSIONS, $document->vector->getDimensions()));
57+
}
58+
59+
$this->request('POST', \sprintf('key/%s', $this->table), $this->convertToIndexableArray($document), [
60+
'Surreal-NS' => $this->namespace,
61+
'Surreal-DB' => $this->database,
62+
'Authorization' => \sprintf('Bearer %s', $authenticationToken),
63+
]);
64+
}
65+
}
66+
67+
public function query(Vector $vector, array $options = [], ?float $minScore = null): array
68+
{
69+
if (self::MAXIMUM_EMBEDDINGS_DIMENSIONS < $vector->getDimensions()) {
70+
throw new InvalidArgumentException(\sprintf('The dimensions of the vector must be less than or equal to %d, found %d', self::MAXIMUM_EMBEDDINGS_DIMENSIONS, $vector->getDimensions()));
71+
}
72+
73+
$authenticationToken = $this->authenticate($options);
74+
75+
$vectors = json_encode($vector->getData());
76+
77+
$results = $this->request('POST', 'sql', \sprintf(
78+
'SELECT id, %s, _metadata, vector::similarity::%s(%s, %s) AS distance FROM %s WHERE %s <|2|> %s;',
79+
$this->vectorFieldName, $this->strategy, $this->vectorFieldName, $vectors, $this->table, $this->vectorFieldName, $vectors,
80+
), [
81+
'Surreal-NS' => $this->namespace,
82+
'Surreal-DB' => $this->database,
83+
'Authorization' => \sprintf('Bearer %s', $authenticationToken),
84+
]);
85+
86+
return array_map($this->convertToVectorDocument(...), $results[0]['result']);
87+
}
88+
89+
public function initialize(array $options = []): void
90+
{
91+
$authenticationToken = $this->authenticate($options);
92+
93+
$this->request('POST', 'sql', \sprintf(
94+
'DEFINE INDEX %s_vectors ON %s FIELDS %s MTREE DIMENSION %d DIST %s TYPE F32',
95+
$this->table, $this->table, $this->vectorFieldName, $this->embeddingsDimension, $this->strategy
96+
), [
97+
'Surreal-NS' => $this->namespace,
98+
'Surreal-DB' => $this->database,
99+
'Authorization' => \sprintf('Bearer %s', $authenticationToken),
100+
]);
101+
}
102+
103+
/**
104+
* @param array<string, mixed>|string $payload
105+
* @param array<string, mixed> $extraHeaders
106+
*
107+
* @return array<string|int, mixed>
108+
*/
109+
private function request(string $method, string $endpoint, array|string $payload, array $extraHeaders = []): array
110+
{
111+
$url = \sprintf('%s/%s', $this->endpointUrl, $endpoint);
112+
113+
$finalPayload = [
114+
'json' => $payload,
115+
];
116+
117+
if (\is_string($payload)) {
118+
$finalPayload = [
119+
'body' => $payload,
120+
];
121+
}
122+
123+
$response = $this->httpClient->request($method, $url, array_merge($finalPayload, [
124+
'headers' => array_merge($extraHeaders, [
125+
'Accept' => 'application/json',
126+
'Content-Type' => 'application/json',
127+
]),
128+
]));
129+
130+
return $response->toArray();
131+
}
132+
133+
/**
134+
* @return array<string, mixed>
135+
*/
136+
private function convertToIndexableArray(VectorDocument $document): array
137+
{
138+
return [
139+
'id' => $document->id->toRfc4122(),
140+
$this->vectorFieldName => $document->vector->getData(),
141+
'_metadata' => array_merge($document->metadata->getArrayCopy(), [
142+
'_id' => $document->id->toRfc4122(),
143+
]),
144+
];
145+
}
146+
147+
/**
148+
* @param array<string, mixed> $data
149+
*/
150+
private function convertToVectorDocument(array $data): VectorDocument
151+
{
152+
$id = $data['_metadata']['_id'] ?? throw new InvalidArgumentException('Missing "id" field in the document data');
153+
154+
$vector = !\array_key_exists($this->vectorFieldName, $data) || null === $data[$this->vectorFieldName]
155+
? new NullVector()
156+
: new Vector($data[$this->vectorFieldName]);
157+
158+
unset($data['_metadata']['_id']);
159+
160+
return new VectorDocument(
161+
id: Uuid::fromString($id),
162+
vector: $vector,
163+
metadata: new Metadata(array_merge($data['_metadata'], [
164+
$this->vectorFieldName => $data[$this->vectorFieldName],
165+
])),
166+
);
167+
}
168+
169+
/**
170+
* @param array{
171+
* namespacedUser?: bool
172+
* } $options The namespacedUser option is used to determine if the user is root or not, if not, both the namespace and database must be specified
173+
*/
174+
private function authenticate(array $options): string
175+
{
176+
$authenticationPayload = [
177+
'user' => $this->user,
178+
'pass' => $this->password,
179+
];
180+
181+
if (\array_key_exists('namespacedUser', $options) && !$options['namespacedUser']) {
182+
$authenticationPayload['ns'] = $this->namespace;
183+
$authenticationPayload['db'] = $this->database;
184+
}
185+
186+
$authenticationResponse = $this->request('POST', 'signin', $authenticationPayload);
187+
188+
if (!\array_key_exists('token', $authenticationResponse)) {
189+
throw new RuntimeException('The SurrealDB authentication response does not contain a token.');
190+
}
191+
192+
return $authenticationResponse['token'];
193+
}
194+
}

0 commit comments

Comments
 (0)