Skip to content

Commit 2972efe

Browse files
committed
IBX-9846: Added search using embeddings
1 parent 86c9c16 commit 2972efe

28 files changed

+1283
-1
lines changed
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
<?php
2+
3+
/**
4+
* @copyright Copyright (C) Ibexa AS. All rights reserved.
5+
* @license For full copyright and license information view LICENSE file distributed with this source code.
6+
*/
7+
namespace Ibexa\Bundle\Core\DependencyInjection\Configuration\Parser;
8+
9+
use Ibexa\Bundle\Core\DependencyInjection\Configuration\AbstractParser;
10+
use Ibexa\Bundle\Core\DependencyInjection\Configuration\SiteAccessAware\ContextualizerInterface;
11+
use Symfony\Component\Config\Definition\Builder\NodeBuilder;
12+
13+
/**
14+
* Configuration parser for embedding models.
15+
*
16+
* Example configuration:
17+
* ```yaml
18+
* ibexa:
19+
* system:
20+
* default: # configuration per siteaccess or siteaccess group
21+
* embedding_models:
22+
* name: "text-embedding-3-small"
23+
* dimensions: 1536
24+
* field_suffix: "3small"
25+
* embedding_provider: "ibexa_openai"
26+
* default_embedding_model: text-embedding-ada-002
27+
* ```
28+
*/
29+
class Embeddings extends AbstractParser
30+
{
31+
public function addSemanticConfig(NodeBuilder $nodeBuilder): void
32+
{
33+
$nodeBuilder
34+
->arrayNode('embedding_models')
35+
->normalizeKeys(false)
36+
->info('Defines available embedding models')
37+
->arrayPrototype()
38+
->children()
39+
->scalarNode('name')->isRequired()->end()
40+
->integerNode('dimensions')->isRequired()->end()
41+
->scalarNode('field_suffix')->isRequired()->end()
42+
->scalarNode('embedding_provider')->isRequired()->end()
43+
->end()
44+
->end()
45+
->end()
46+
->scalarNode('default_embedding_model')
47+
->info('Default embedding model identifier')
48+
->defaultValue('text-embedding-ada-002')
49+
->end();
50+
}
51+
52+
/**
53+
* @param array<mixed> $config
54+
*/
55+
public function preMap(array $config, ContextualizerInterface $contextualizer): void
56+
{
57+
$contextualizer->mapConfigArray('embedding_models', $config);
58+
$contextualizer->mapSetting('default_embedding_model', $config);
59+
}
60+
61+
/**
62+
* @param array<mixed> $scopeSettings
63+
*/
64+
public function mapConfig(array &$scopeSettings, $currentScope, ContextualizerInterface $contextualizer): void
65+
{
66+
// Nothing to do here.
67+
}
68+
}

src/bundle/Core/IbexaCoreBundle.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ public function getContainerExtension()
123123
new ConfigParser\UrlChecker(),
124124
new ConfigParser\TwigVariablesParser(),
125125
new ConfigParser\UserContentTypeIdentifier(),
126+
new ConfigParser\Embeddings(),
126127
],
127128
[
128129
new RepositoryConfigParser\Storage(),

src/bundle/Core/Resources/config/default_settings.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,3 +272,6 @@ parameters:
272272
writeFlags: ~
273273
linkHandling: ~
274274
permissions: [ ]
275+
276+
ibexa.site_access.config.default.embedding_models: []
277+
ibexa.site_access.config.default.default_embedding_model: 'text-embedding-ada-002'
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
services:
2+
_defaults:
3+
autowire: true
4+
autoconfigure: true
5+
public: false
6+
7+
Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderRegistryInterface:
8+
alias: Ibexa\Core\Search\Embedding\EmbeddingProviderRegistry
9+
10+
Ibexa\Core\Search\Embedding\EmbeddingProviderRegistry:
11+
arguments:
12+
$embeddingProviders: !tagged_iterator { tag: 'ibexa.embedding_provider', index_by: 'provider_name' }
13+
14+
Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderResolverInterface:
15+
alias: Ibexa\Core\Search\Embedding\EmbeddingProviderResolver
16+
17+
Ibexa\Core\Search\Embedding\EmbeddingProviderResolver: ~
18+
19+
Ibexa\Contracts\Core\Search\Embedding\EmbeddingConfigurationInterface:
20+
alias: Ibexa\Core\Search\Embedding\EmbeddingConfiguration
21+
22+
Ibexa\Core\Search\Embedding\EmbeddingConfiguration: ~
23+
24+
Ibexa\Contracts\Core\Search\FieldType\EmbeddingFieldFactory: ~

src/bundle/Core/Resources/config/services.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
imports:
22
- { resource: commands.yml }
3+
- { resource: embeddings.yml }
34

45
parameters:
56
ibexa.site_access.default.name: default
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
<?php
2+
3+
/**
4+
* @copyright Copyright (C) Ibexa AS. All rights reserved.
5+
* @license For full copyright and license information view LICENSE file distributed with this source code.
6+
*/
7+
declare(strict_types=1);
8+
9+
namespace Ibexa\Contracts\Core\Repository\Values\Content;
10+
11+
use Ibexa\Contracts\Core\Repository\Values\Content\Query\Criterion;
12+
use Ibexa\Contracts\Core\Repository\Values\Content\Query\Embedding;
13+
use InvalidArgumentException;
14+
15+
/**
16+
* This class is used to perform an embedding query.
17+
*/
18+
class EmbeddingQuery extends Query
19+
{
20+
private ?Embedding $embedding = null;
21+
22+
public function getEmbedding(): ?Embedding
23+
{
24+
return $this->embedding;
25+
}
26+
27+
public function setEmbedding(?Embedding $embedding): void
28+
{
29+
$this->embedding = $embedding;
30+
}
31+
32+
public function getFilter(): ?Criterion
33+
{
34+
return $this->filter;
35+
}
36+
37+
public function setFilter(Criterion $filter): void
38+
{
39+
$this->filter = $filter;
40+
}
41+
42+
/**
43+
* @return \Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation[]
44+
*/
45+
public function getAggregations(): array
46+
{
47+
return $this->aggregations;
48+
}
49+
50+
/**
51+
* @param \Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation[] $aggregations
52+
*/
53+
public function setAggregations(array $aggregations): void
54+
{
55+
$this->aggregations = $aggregations;
56+
}
57+
58+
public function getOffset(): int
59+
{
60+
return $this->offset;
61+
}
62+
63+
public function setOffset(int $offset): void
64+
{
65+
$this->offset = $offset;
66+
}
67+
68+
public function getLimit(): int
69+
{
70+
return $this->limit;
71+
}
72+
73+
public function setLimit(int $limit): void
74+
{
75+
$this->limit = $limit;
76+
}
77+
78+
public function setPerformCount(bool $performCount): void
79+
{
80+
$this->performCount = $performCount;
81+
}
82+
83+
public function getPerformCount(): bool
84+
{
85+
return $this->performCount;
86+
}
87+
88+
public function isValid(): bool
89+
{
90+
$invalid = [];
91+
92+
if ($this->query !== null) {
93+
$invalid[] = 'query';
94+
}
95+
if (!empty($this->sortClauses)) {
96+
$invalid[] = 'sortClauses';
97+
}
98+
if (!empty($this->facetBuilders)) {
99+
$invalid[] = 'facetBuilders';
100+
}
101+
if ($this->spellcheck !== null) {
102+
$invalid[] = 'spellcheck';
103+
}
104+
105+
if (count($invalid) > 0) {
106+
throw new InvalidArgumentException(
107+
sprintf(
108+
'EmbeddingQuery may not set [%s].',
109+
implode(', ', $invalid)
110+
)
111+
);
112+
}
113+
114+
return true;
115+
}
116+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
<?php
2+
3+
/**
4+
* @copyright Copyright (C) Ibexa AS. All rights reserved.
5+
* @license For full copyright and license information view LICENSE file distributed with this source code.
6+
*/
7+
declare(strict_types=1);
8+
9+
namespace Ibexa\Contracts\Core\Repository\Values\Content;
10+
11+
use Ibexa\Contracts\Core\Repository\Values\Content\Query\Criterion;
12+
use Ibexa\Contracts\Core\Repository\Values\Content\Query\Embedding;
13+
14+
class EmbeddingQueryBuilder
15+
{
16+
private EmbeddingQuery $query;
17+
18+
private function __construct()
19+
{
20+
$this->query = new EmbeddingQuery();
21+
}
22+
23+
public static function create(): self
24+
{
25+
return new self();
26+
}
27+
28+
public function withEmbedding(Embedding $embed): self
29+
{
30+
$this->query->setEmbedding($embed);
31+
32+
return $this;
33+
}
34+
35+
public function setLimit(int $limit): self
36+
{
37+
$this->query->setLimit($limit);
38+
39+
return $this;
40+
}
41+
42+
public function setOffset(int $offset): self
43+
{
44+
$this->query->setOffset($offset);
45+
46+
return $this;
47+
}
48+
49+
public function setFilter(Criterion $filter): self
50+
{
51+
$this->query->setFilter($filter);
52+
53+
return $this;
54+
}
55+
56+
/**
57+
* @param array<\Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation> $aggregations
58+
*/
59+
public function setAggregations(array $aggregations): self
60+
{
61+
$this->query->setAggregations($aggregations);
62+
63+
return $this;
64+
}
65+
66+
public function setPerformCount(bool $performCount): self
67+
{
68+
$this->query->setPerformCount($performCount);
69+
70+
return $this;
71+
}
72+
73+
public function build(): EmbeddingQuery
74+
{
75+
return $this->query;
76+
}
77+
}

src/contracts/Repository/Values/Content/Query.php

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
/**
1515
* This class is used to perform a Content query.
1616
*/
17-
class Query extends ValueObject
17+
class Query extends ValueObject implements QueryValidatorInterface
1818
{
1919
public const SORT_ASC = 'ascending';
2020
public const SORT_DESC = 'descending';
@@ -102,6 +102,11 @@ class Query extends ValueObject
102102
* @var bool
103103
*/
104104
public $performCount = true;
105+
106+
public function isValid(): bool
107+
{
108+
return true;
109+
}
105110
}
106111

107112
class_alias(Query::class, 'eZ\Publish\API\Repository\Values\Content\Query');
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?php
2+
3+
/**
4+
* @copyright Copyright (C) Ibexa AS. All rights reserved.
5+
* @license For full copyright and license information view LICENSE file distributed with this source code.
6+
*/
7+
declare(strict_types=1);
8+
9+
namespace Ibexa\Contracts\Core\Repository\Values\Content\Query;
10+
11+
use Ibexa\Contracts\Core\Repository\Values\ValueObject;
12+
13+
abstract class Embedding extends ValueObject
14+
{
15+
/** @var float[] */
16+
protected array $value;
17+
18+
/**
19+
* @param float[] $value
20+
*/
21+
public function __construct(array $value)
22+
{
23+
$this->value = $value;
24+
}
25+
26+
/** @return float[] */
27+
public function getValue(): array
28+
{
29+
return $this->value;
30+
}
31+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<?php
2+
3+
/**
4+
* @copyright Copyright (C) Ibexa AS. All rights reserved.
5+
* @license For full copyright and license information view LICENSE file distributed with this source code.
6+
*/
7+
namespace Ibexa\Contracts\Core\Repository\Values\Content;
8+
9+
interface QueryValidatorInterface
10+
{
11+
public function isValid(): bool;
12+
}

0 commit comments

Comments
 (0)