Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions config/schema/0005_searchable_defaults.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
-- 2.2.0 — the per-field `searchable` flag becomes load-bearing.
--
-- Prior to 2.2.0, the FTS5 writer ignored `searchable` and flattened every
-- string/numeric value from `items.data` into `items_fts.body`. The
-- `Field` constructor defaulted `searchable` to false, so an honest read
-- of the column on an existing install would say "no field is searchable"
-- — which, applied as a behavioral switch, would silently drop ALL FTS
-- body coverage on upgrade.
--
-- This migration preserves the de-facto coverage for prose-typed content
-- so existing installs keep finding the same items via search. The four
-- promoted types are exactly those whose 2.2.0 factories
-- (`Field::text|longText|editor|slug()`) default to `searchable: true`.
--
-- Side effects (all deliberate, documented in CHANGELOG):
-- * `password` fields stop being indexed (was a bcrypt hash anyway).
-- * `fileupload`/`imageupload`/`filepicker` paths stop being indexed.
-- * `integer`/`decimal`/`money`/`datepicker`/`checkbox`/`dropdown`/
-- `hidden`/`arrayList` values stop being indexed.
--
-- After this migration, callers should run `vendor/bin/imanager fts:rebuild`
-- so the body column actually drops the now-excluded values. The flag is
-- already honored by per-save syncFts from this release onward; the
-- rebuild reconciles pre-existing rows.

UPDATE fields
SET searchable = 1
WHERE searchable = 0
AND type IN ('text', 'longtext', 'editor', 'slug');
52 changes: 52 additions & 0 deletions src/Search/FtsBody.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

declare(strict_types=1);

namespace Imanager\Search;

/**
* Composes the `body` column written into `items_fts`.
*
* Centralized so both the per-save writer
* ({@see \Imanager\Storage\Sqlite\SqliteItemRepository::syncFts()}) and the
* bulk rebuilder ({@see FullTextSearch::rebuild()}) flatten data the same
* way — drift between the two would silently corrupt search results.
*/
final readonly class FtsBody
{
/**
* Flatten an item's structural + dynamic fields into the single string
* stored in `items_fts.body`.
*
* `$name` and `$label` are structural columns on the items table and
* are always concatenated in. `$data` is the dynamic per-field bag.
*
* When `$allowedKeys` is non-null, only top-level entries in `$data`
* whose key appears in the list are walked — that's how the per-field
* `searchable` flag (honored from 2.2.0) takes effect. When `null`, the
* whole `$data` blob is flattened (legacy behavior, retained for the
* 2.0/2.1 constructor signature of `SqliteItemRepository`).
*
* @param array<string, mixed> $data
* @param list<string>|null $allowedKeys
*/
public static function compose(
?string $name,
?string $label,
array $data,
?array $allowedKeys,
): string {
if ($allowedKeys !== null) {
$data = array_intersect_key($data, array_flip($allowedKeys));
}

$parts = [];
array_walk_recursive($data, static function (mixed $value) use (&$parts): void {
if (\is_string($value) || \is_int($value) || \is_float($value)) {
$parts[] = (string) $value;
}
});

return ($name ?? '') . ' ' . ($label ?? '') . ' ' . implode(' ', $parts);
}
}
58 changes: 52 additions & 6 deletions src/Search/FullTextSearch.php
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,64 @@ public function count(string $query, ?int $categoryId = null): int

/**
* Drop and rebuild the FTS index from scratch. Useful as a CLI op when
* tokenizer settings or migration content changes.
* tokenizer settings or migration content changes, and the canonical
* step after upgrading to 2.2.0 so the body column drops values whose
* field's `searchable` flag is now false.
*
* The rebuild iterates items in PHP rather than running a single bulk
* INSERT…SELECT because the per-category set of searchable field names
* varies per row. This is a CLI op, not a hot path — per-row iteration
* is acceptable at the install sizes iManager realistically targets.
*/
public function rebuild(): void
{
try {
// Per-category set of searchable field names. One query, used
// for the entire rebuild.
$allowedByCategory = [];
$fieldsStmt = $this->connection->query(
'SELECT category_id, name FROM fields WHERE searchable = 1',
);
if ($fieldsStmt !== false) {
foreach ($fieldsStmt->fetchAll(\PDO::FETCH_ASSOC) as $row) {
$allowedByCategory[(int) $row['category_id']][] = (string) $row['name'];
}
}

$this->connection->exec('DELETE FROM items_fts');
$this->connection->exec(
'INSERT INTO items_fts(rowid, name, label, body) '
. 'SELECT i.id, IFNULL(i.name, \'\'), IFNULL(i.label, \'\'), '
. 'IFNULL(i.name, \'\') || \' \' || IFNULL(i.label, \'\') || \' \' || IFNULL(i.data, \'\') '
. 'FROM items i',

$itemsStmt = $this->connection->query(
'SELECT id, category_id, name, label, data FROM items',
);
if ($itemsStmt === false) {
return;
}

$insert = $this->connection->prepare(
'INSERT INTO items_fts (rowid, name, label, body) '
. 'VALUES (:id, :name, :label, :body)',
);

foreach ($itemsStmt->fetchAll(\PDO::FETCH_ASSOC) as $row) {
$categoryId = (int) $row['category_id'];
$allowed = $allowedByCategory[$categoryId] ?? [];

$rawData = $row['data'] !== null ? (string) $row['data'] : '';
$data = $rawData !== '' ? json_decode($rawData, true) : [];
if (! \is_array($data)) {
$data = [];
}

$name = $row['name'] !== null ? (string) $row['name'] : '';
$label = $row['label'] !== null ? (string) $row['label'] : '';

$insert->execute([
':id' => (int) $row['id'],
':name' => $name,
':label' => $label,
':body' => FtsBody::compose($name, $label, $data, $allowed),
]);
}
} catch (\PDOException $e) {
throw StorageException::fromPdo($e, 'Full-text index rebuild failed');
}
Expand Down
73 changes: 56 additions & 17 deletions src/Storage/Sqlite/SqliteItemRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
use Imanager\Query\Clause;
use Imanager\Query\Direction;
use Imanager\Query\Query;
use Imanager\Search\FtsBody;
use Imanager\Storage\FieldRepository;
use Imanager\Storage\ItemRepository;
use Psr\EventDispatcher\EventDispatcherInterface;

Expand All @@ -39,11 +41,21 @@

private readonly EventDispatcherInterface $events;

/**
* Optional repository used to look up the per-field `searchable`
* flag. When `null` (the 2.0/2.1 constructor signature), syncFts
* indexes every value — preserving legacy behavior for direct
* callers, with a one-time deprecation notice on first FTS write.
*/
private readonly ?FieldRepository $fields;

public function __construct(
private \PDO $connection,
?EventDispatcherInterface $events = null,
?FieldRepository $fields = null,
) {
$this->events = $events ?? new NullEventDispatcher();
$this->fields = $fields;
}

public function find(int $id): ?Item
Expand Down Expand Up @@ -118,7 +130,7 @@ public function save(Item $item): Item
}

$newId = (int) $this->connection->lastInsertId();
$this->syncFts($newId, $item->name, $item->label, $item->data->toArray());
$this->syncFts($newId, $item->categoryId, $item->name, $item->label, $item->data->toArray());

$created_item = new Item(
id: $newId,
Expand Down Expand Up @@ -161,7 +173,7 @@ public function save(Item $item): Item
throw self::translatePdoException($e);
}

$this->syncFts($item->id, $item->name, $item->label, $item->data->toArray());
$this->syncFts($item->id, $item->categoryId, $item->name, $item->label, $item->data->toArray());

$updated = new Item(
id: $item->id,
Expand Down Expand Up @@ -201,17 +213,17 @@ public function delete(int $id): void
}

/**
* Insert-or-replace the FTS index row for `$id`. Body is a flattened
* concatenation of all string / numeric values in `$data` so search
* matches across every dynamic field — see the `0002_fts.sql` migration
* comment for the rationale (hybrid index, post-Phase-8 we'll respect
* the per-field `searchable` flag once a use case asks for opt-out).
* Insert-or-replace the FTS index row for `$id`. When a
* `FieldRepository` was wired into this repository, only the fields
* whose `searchable` flag is true are written to the body; otherwise
* (the legacy 2.0/2.1 constructor signature) every dynamic value goes
* in and a one-time deprecation notice fires.
*
* @param array<string, mixed> $data
*/
private function syncFts(int $id, ?string $name, ?string $label, array $data): void
private function syncFts(int $id, int $categoryId, ?string $name, ?string $label, array $data): void
{
$body = ($name ?? '') . ' ' . ($label ?? '') . ' ' . self::flattenForSearch($data);
$body = FtsBody::compose($name, $label, $data, $this->searchableKeysFor($categoryId));

$delete = $this->connection->prepare('DELETE FROM items_fts WHERE rowid = :id');
$delete->execute([':id' => $id]);
Expand All @@ -228,17 +240,44 @@ private function syncFts(int $id, ?string $name, ?string $label, array $data): v
}

/**
* @param array<string, mixed> $data
* Return the list of field names whose `searchable` flag is true for
* `$categoryId`, or `null` when no `FieldRepository` was wired (legacy
* 2.0/2.1 signature — fall back to "index everything"). The first such
* fall-through emits an `E_USER_DEPRECATED` notice once per process so
* external integrators get a heads-up without breaking.
*
* Each call re-queries the fields table. The query is local SQLite
* (sub-millisecond for the dozens of fields per category iManager
* realistically targets), and skipping the cache avoids staleness in
* long-running CLI processes that mutate the schema mid-run.
*
* @return list<string>|null
*/
private static function flattenForSearch(array $data): string
private function searchableKeysFor(int $categoryId): ?array
{
$parts = [];
array_walk_recursive($data, static function (mixed $value) use (&$parts): void {
if (\is_string($value) || \is_int($value) || \is_float($value)) {
$parts[] = (string) $value;
if ($this->fields === null) {
static $warned = false;
if (! $warned) {
$warned = true;
@trigger_error(
'SqliteItemRepository was constructed without a FieldRepository — '
. 'FTS will index every field value (legacy 2.0/2.1 behavior). Pass '
. 'the FieldRepository into the third constructor argument to honor '
. 'per-field searchable flags. The no-arg form will become an error '
. 'in 3.0.',
\E_USER_DEPRECATED,
);
}
return null;
}

$keys = [];
foreach ($this->fields->findByCategory($categoryId) as $field) {
if ($field->searchable) {
$keys[] = $field->name;
}
});
return implode(' ', $parts);
}
return $keys;
}

public function query(Query $query): array
Expand Down
2 changes: 1 addition & 1 deletion src/Storage/Sqlite/SqliteStorage.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public function fields(): FieldRepository

public function items(): ItemRepository
{
return new SqliteItemRepository($this->connection, $this->events);
return new SqliteItemRepository($this->connection, $this->events, $this->fields());
}

public function files(): FileRepository
Expand Down
7 changes: 7 additions & 0 deletions tests/Unit/Search/FullTextSearchTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Imanager\Tests\Unit\Search;

use Imanager\Domain\Category;
use Imanager\Domain\Field;
use Imanager\Domain\Item;
use Imanager\Exception\StorageException;
use Imanager\Search\FullTextSearch;
Expand Down Expand Up @@ -35,6 +36,12 @@ protected function setUp(): void
$this->blogId = $blog->id;
$this->newsId = $news->id;

// Declare the `body` field on both categories — Field::longText()
// defaults to searchable:true (2.2.0+), which is what these tests
// need so per-save syncFts writes body content into FTS.
$this->storage->fields()->ensure(Field::longText($this->blogId, 'body', 'Body'));
$this->storage->fields()->ensure(Field::longText($this->newsId, 'body', 'Body'));

$this->storage->items()->save(new Item(
id: null,
categoryId: $this->blogId,
Expand Down
Loading
Loading