diff --git a/config/schema/0005_searchable_defaults.sql b/config/schema/0005_searchable_defaults.sql new file mode 100644 index 0000000..44f715a --- /dev/null +++ b/config/schema/0005_searchable_defaults.sql @@ -0,0 +1,29 @@ +-- 2.2.0 — the per-field `searchable` flag becomes load-bearing. +-- +-- Prior to 2.2.0, the FTS5 writer ignored `searchable` and flattened every +-- string/numeric value from `items.data` into `items_fts.body`. The +-- `Field` constructor defaulted `searchable` to false, so an honest read +-- of the column on an existing install would say "no field is searchable" +-- — which, applied as a behavioral switch, would silently drop ALL FTS +-- body coverage on upgrade. +-- +-- This migration preserves the de-facto coverage for prose-typed content +-- so existing installs keep finding the same items via search. The four +-- promoted types are exactly those whose 2.2.0 factories +-- (`Field::text|longText|editor|slug()`) default to `searchable: true`. +-- +-- Side effects (all deliberate, documented in CHANGELOG): +-- * `password` fields stop being indexed (was a bcrypt hash anyway). +-- * `fileupload`/`imageupload`/`filepicker` paths stop being indexed. +-- * `integer`/`decimal`/`money`/`datepicker`/`checkbox`/`dropdown`/ +-- `hidden`/`arrayList` values stop being indexed. +-- +-- After this migration, callers should run `vendor/bin/imanager fts:rebuild` +-- so the body column actually drops the now-excluded values. The flag is +-- already honored by per-save syncFts from this release onward; the +-- rebuild reconciles pre-existing rows. + +UPDATE fields + SET searchable = 1 + WHERE searchable = 0 + AND type IN ('text', 'longtext', 'editor', 'slug'); diff --git a/src/Search/FtsBody.php b/src/Search/FtsBody.php new file mode 100644 index 0000000..8147870 --- /dev/null +++ b/src/Search/FtsBody.php @@ -0,0 +1,52 @@ + $data + * @param list|null $allowedKeys + */ + public static function compose( + ?string $name, + ?string $label, + array $data, + ?array $allowedKeys, + ): string { + if ($allowedKeys !== null) { + $data = array_intersect_key($data, array_flip($allowedKeys)); + } + + $parts = []; + array_walk_recursive($data, static function (mixed $value) use (&$parts): void { + if (\is_string($value) || \is_int($value) || \is_float($value)) { + $parts[] = (string) $value; + } + }); + + return ($name ?? '') . ' ' . ($label ?? '') . ' ' . implode(' ', $parts); + } +} diff --git a/src/Search/FullTextSearch.php b/src/Search/FullTextSearch.php index f4b1bc7..1324188 100644 --- a/src/Search/FullTextSearch.php +++ b/src/Search/FullTextSearch.php @@ -106,18 +106,64 @@ public function count(string $query, ?int $categoryId = null): int /** * Drop and rebuild the FTS index from scratch. Useful as a CLI op when - * tokenizer settings or migration content changes. + * tokenizer settings or migration content changes, and the canonical + * step after upgrading to 2.2.0 so the body column drops values whose + * field's `searchable` flag is now false. + * + * The rebuild iterates items in PHP rather than running a single bulk + * INSERT…SELECT because the per-category set of searchable field names + * varies per row. This is a CLI op, not a hot path — per-row iteration + * is acceptable at the install sizes iManager realistically targets. */ public function rebuild(): void { try { + // Per-category set of searchable field names. One query, used + // for the entire rebuild. + $allowedByCategory = []; + $fieldsStmt = $this->connection->query( + 'SELECT category_id, name FROM fields WHERE searchable = 1', + ); + if ($fieldsStmt !== false) { + foreach ($fieldsStmt->fetchAll(\PDO::FETCH_ASSOC) as $row) { + $allowedByCategory[(int) $row['category_id']][] = (string) $row['name']; + } + } + $this->connection->exec('DELETE FROM items_fts'); - $this->connection->exec( - 'INSERT INTO items_fts(rowid, name, label, body) ' - . 'SELECT i.id, IFNULL(i.name, \'\'), IFNULL(i.label, \'\'), ' - . 'IFNULL(i.name, \'\') || \' \' || IFNULL(i.label, \'\') || \' \' || IFNULL(i.data, \'\') ' - . 'FROM items i', + + $itemsStmt = $this->connection->query( + 'SELECT id, category_id, name, label, data FROM items', + ); + if ($itemsStmt === false) { + return; + } + + $insert = $this->connection->prepare( + 'INSERT INTO items_fts (rowid, name, label, body) ' + . 'VALUES (:id, :name, :label, :body)', ); + + foreach ($itemsStmt->fetchAll(\PDO::FETCH_ASSOC) as $row) { + $categoryId = (int) $row['category_id']; + $allowed = $allowedByCategory[$categoryId] ?? []; + + $rawData = $row['data'] !== null ? (string) $row['data'] : ''; + $data = $rawData !== '' ? json_decode($rawData, true) : []; + if (! \is_array($data)) { + $data = []; + } + + $name = $row['name'] !== null ? (string) $row['name'] : ''; + $label = $row['label'] !== null ? (string) $row['label'] : ''; + + $insert->execute([ + ':id' => (int) $row['id'], + ':name' => $name, + ':label' => $label, + ':body' => FtsBody::compose($name, $label, $data, $allowed), + ]); + } } catch (\PDOException $e) { throw StorageException::fromPdo($e, 'Full-text index rebuild failed'); } diff --git a/src/Storage/Sqlite/SqliteItemRepository.php b/src/Storage/Sqlite/SqliteItemRepository.php index 4dd63f3..ab5e0a2 100644 --- a/src/Storage/Sqlite/SqliteItemRepository.php +++ b/src/Storage/Sqlite/SqliteItemRepository.php @@ -14,6 +14,8 @@ use Imanager\Query\Clause; use Imanager\Query\Direction; use Imanager\Query\Query; +use Imanager\Search\FtsBody; +use Imanager\Storage\FieldRepository; use Imanager\Storage\ItemRepository; use Psr\EventDispatcher\EventDispatcherInterface; @@ -39,11 +41,21 @@ private readonly EventDispatcherInterface $events; + /** + * Optional repository used to look up the per-field `searchable` + * flag. When `null` (the 2.0/2.1 constructor signature), syncFts + * indexes every value — preserving legacy behavior for direct + * callers, with a one-time deprecation notice on first FTS write. + */ + private readonly ?FieldRepository $fields; + public function __construct( private \PDO $connection, ?EventDispatcherInterface $events = null, + ?FieldRepository $fields = null, ) { $this->events = $events ?? new NullEventDispatcher(); + $this->fields = $fields; } public function find(int $id): ?Item @@ -118,7 +130,7 @@ public function save(Item $item): Item } $newId = (int) $this->connection->lastInsertId(); - $this->syncFts($newId, $item->name, $item->label, $item->data->toArray()); + $this->syncFts($newId, $item->categoryId, $item->name, $item->label, $item->data->toArray()); $created_item = new Item( id: $newId, @@ -161,7 +173,7 @@ public function save(Item $item): Item throw self::translatePdoException($e); } - $this->syncFts($item->id, $item->name, $item->label, $item->data->toArray()); + $this->syncFts($item->id, $item->categoryId, $item->name, $item->label, $item->data->toArray()); $updated = new Item( id: $item->id, @@ -201,17 +213,17 @@ public function delete(int $id): void } /** - * Insert-or-replace the FTS index row for `$id`. Body is a flattened - * concatenation of all string / numeric values in `$data` so search - * matches across every dynamic field — see the `0002_fts.sql` migration - * comment for the rationale (hybrid index, post-Phase-8 we'll respect - * the per-field `searchable` flag once a use case asks for opt-out). + * Insert-or-replace the FTS index row for `$id`. When a + * `FieldRepository` was wired into this repository, only the fields + * whose `searchable` flag is true are written to the body; otherwise + * (the legacy 2.0/2.1 constructor signature) every dynamic value goes + * in and a one-time deprecation notice fires. * * @param array $data */ - private function syncFts(int $id, ?string $name, ?string $label, array $data): void + private function syncFts(int $id, int $categoryId, ?string $name, ?string $label, array $data): void { - $body = ($name ?? '') . ' ' . ($label ?? '') . ' ' . self::flattenForSearch($data); + $body = FtsBody::compose($name, $label, $data, $this->searchableKeysFor($categoryId)); $delete = $this->connection->prepare('DELETE FROM items_fts WHERE rowid = :id'); $delete->execute([':id' => $id]); @@ -228,17 +240,44 @@ private function syncFts(int $id, ?string $name, ?string $label, array $data): v } /** - * @param array $data + * Return the list of field names whose `searchable` flag is true for + * `$categoryId`, or `null` when no `FieldRepository` was wired (legacy + * 2.0/2.1 signature — fall back to "index everything"). The first such + * fall-through emits an `E_USER_DEPRECATED` notice once per process so + * external integrators get a heads-up without breaking. + * + * Each call re-queries the fields table. The query is local SQLite + * (sub-millisecond for the dozens of fields per category iManager + * realistically targets), and skipping the cache avoids staleness in + * long-running CLI processes that mutate the schema mid-run. + * + * @return list|null */ - private static function flattenForSearch(array $data): string + private function searchableKeysFor(int $categoryId): ?array { - $parts = []; - array_walk_recursive($data, static function (mixed $value) use (&$parts): void { - if (\is_string($value) || \is_int($value) || \is_float($value)) { - $parts[] = (string) $value; + if ($this->fields === null) { + static $warned = false; + if (! $warned) { + $warned = true; + @trigger_error( + 'SqliteItemRepository was constructed without a FieldRepository — ' + . 'FTS will index every field value (legacy 2.0/2.1 behavior). Pass ' + . 'the FieldRepository into the third constructor argument to honor ' + . 'per-field searchable flags. The no-arg form will become an error ' + . 'in 3.0.', + \E_USER_DEPRECATED, + ); + } + return null; + } + + $keys = []; + foreach ($this->fields->findByCategory($categoryId) as $field) { + if ($field->searchable) { + $keys[] = $field->name; } - }); - return implode(' ', $parts); + } + return $keys; } public function query(Query $query): array diff --git a/src/Storage/Sqlite/SqliteStorage.php b/src/Storage/Sqlite/SqliteStorage.php index c428d3a..43df21f 100644 --- a/src/Storage/Sqlite/SqliteStorage.php +++ b/src/Storage/Sqlite/SqliteStorage.php @@ -49,7 +49,7 @@ public function fields(): FieldRepository public function items(): ItemRepository { - return new SqliteItemRepository($this->connection, $this->events); + return new SqliteItemRepository($this->connection, $this->events, $this->fields()); } public function files(): FileRepository diff --git a/tests/Unit/Search/FullTextSearchTest.php b/tests/Unit/Search/FullTextSearchTest.php index 095c7da..5dd7bd6 100644 --- a/tests/Unit/Search/FullTextSearchTest.php +++ b/tests/Unit/Search/FullTextSearchTest.php @@ -5,6 +5,7 @@ namespace Imanager\Tests\Unit\Search; use Imanager\Domain\Category; +use Imanager\Domain\Field; use Imanager\Domain\Item; use Imanager\Exception\StorageException; use Imanager\Search\FullTextSearch; @@ -35,6 +36,12 @@ protected function setUp(): void $this->blogId = $blog->id; $this->newsId = $news->id; + // Declare the `body` field on both categories — Field::longText() + // defaults to searchable:true (2.2.0+), which is what these tests + // need so per-save syncFts writes body content into FTS. + $this->storage->fields()->ensure(Field::longText($this->blogId, 'body', 'Body')); + $this->storage->fields()->ensure(Field::longText($this->newsId, 'body', 'Body')); + $this->storage->items()->save(new Item( id: null, categoryId: $this->blogId, diff --git a/tests/Unit/Search/SearchableFlagTest.php b/tests/Unit/Search/SearchableFlagTest.php new file mode 100644 index 0000000..5abee11 --- /dev/null +++ b/tests/Unit/Search/SearchableFlagTest.php @@ -0,0 +1,305 @@ +storage = SqliteStorageFactory::inMemory(); + $this->pdo = (new \ReflectionProperty($this->storage, 'connection'))->getValue($this->storage); + $this->search = new FullTextSearch($this->pdo); + } + + public function testSaveExcludesNonSearchableFields(): void + { + $cat = $this->storage->categories()->save(new Category(null, 'Posts', 'posts')); + \assert($cat->id !== null); + + // title is searchable, secret is not. + $this->storage->fields()->ensure(Field::text($cat->id, 'title')); + $this->storage->fields()->ensure(Field::text($cat->id, 'secret')->searchable(false)); + + $this->storage->items()->save(new Item( + id: null, + categoryId: $cat->id, + name: 'post-1', + label: 'Post One', + data: ['title' => 'Indexed prose', 'secret' => 'CLASSIFIED_PAYLOAD'], + )); + + self::assertNotEmpty($this->search->search('Indexed')); + self::assertSame([], $this->search->search('CLASSIFIED_PAYLOAD')); + } + + public function testStructuralNameAndLabelAlwaysIndexed(): void + { + $cat = $this->storage->categories()->save(new Category(null, 'Notes', 'notes')); + \assert($cat->id !== null); + + // Zero searchable fields declared. name+label must still index. + $this->storage->fields()->ensure(Field::text($cat->id, 'body')->searchable(false)); + + $this->storage->items()->save(new Item( + id: null, + categoryId: $cat->id, + name: 'uniqueNameToken', + label: 'Unique Label Token', + data: ['body' => 'BODY_PAYLOAD_EXCLUDED'], + )); + + self::assertNotEmpty($this->search->search('uniqueNameToken')); + self::assertNotEmpty($this->search->search('Token')); + self::assertSame([], $this->search->search('BODY_PAYLOAD_EXCLUDED')); + } + + public function testUpdateRefreshesFtsWithCurrentSearchableSet(): void + { + $cat = $this->storage->categories()->save(new Category(null, 'Posts', 'posts')); + \assert($cat->id !== null); + $this->storage->fields()->ensure(Field::text($cat->id, 'tagline')); + + $created = $this->storage->items()->save(new Item( + id: null, + categoryId: $cat->id, + name: 'p1', + label: 'Post', + data: ['tagline' => 'FIRST_VALUE'], + )); + \assert($created->id !== null); + + self::assertNotEmpty($this->search->search('FIRST_VALUE')); + + $this->storage->items()->save(new Item( + id: $created->id, + categoryId: $cat->id, + name: $created->name, + label: $created->label, + data: ['tagline' => 'SECOND_VALUE'], + )); + + self::assertSame([], $this->search->search('FIRST_VALUE')); + self::assertNotEmpty($this->search->search('SECOND_VALUE')); + } + + public function testRebuildRespectsSearchableFlag(): void + { + $cat = $this->storage->categories()->save(new Category(null, 'Posts', 'posts')); + \assert($cat->id !== null); + $this->storage->fields()->ensure(Field::text($cat->id, 'visible')); + $this->storage->fields()->ensure(Field::text($cat->id, 'hidden')->searchable(false)); + + $this->storage->items()->save(new Item( + id: null, + categoryId: $cat->id, + name: 'p1', + label: 'Post', + data: ['visible' => 'VISIBLE_TOKEN', 'hidden' => 'HIDDEN_TOKEN'], + )); + + // Wipe FTS, rebuild from scratch. + $this->pdo->exec('DELETE FROM items_fts'); + $this->search->rebuild(); + + self::assertNotEmpty($this->search->search('VISIBLE_TOKEN')); + self::assertSame([], $this->search->search('HIDDEN_TOKEN')); + } + + public function testRebuildHonorsFlagPerCategory(): void + { + $blog = $this->storage->categories()->save(new Category(null, 'Blog', 'blog')); + $vault = $this->storage->categories()->save(new Category(null, 'Vault', 'vault')); + \assert($blog->id !== null && $vault->id !== null); + + // Same field name, different searchable flag per category. + $this->storage->fields()->ensure(Field::text($blog->id, 'note')); + $this->storage->fields()->ensure(Field::text($vault->id, 'note')->searchable(false)); + + $this->storage->items()->save(new Item( + id: null, + categoryId: $blog->id, + name: 'blog-1', + label: 'Blog Post', + data: ['note' => 'CROSS_CATEGORY_TOKEN'], + )); + $this->storage->items()->save(new Item( + id: null, + categoryId: $vault->id, + name: 'vault-1', + label: 'Vault Entry', + data: ['note' => 'CROSS_CATEGORY_TOKEN'], + )); + + $this->pdo->exec('DELETE FROM items_fts'); + $this->search->rebuild(); + + $hits = $this->search->search('CROSS_CATEGORY_TOKEN'); + self::assertCount(1, $hits); + self::assertSame($blog->id, $hits[0]->categoryId); + } + + public function testFlippingFieldFromSearchableToNotRequiresRebuild(): void + { + // Per-save syncFts honors the CURRENT flag, but rows already in FTS + // retain their old body. This documents that contract: flipping the + // flag affects future writes; pre-existing rows need a rebuild. + $cat = $this->storage->categories()->save(new Category(null, 'Posts', 'posts')); + \assert($cat->id !== null); + $field = $this->storage->fields()->ensure(Field::text($cat->id, 'tagline')); + \assert($field->id !== null); + + $this->storage->items()->save(new Item( + id: null, + categoryId: $cat->id, + name: 'p1', + label: 'Post', + data: ['tagline' => 'WILL_BE_HIDDEN'], + )); + self::assertNotEmpty($this->search->search('WILL_BE_HIDDEN')); + + // Flip the flag. Save() to apply the new state. + $this->storage->fields()->save( + (new Field( + id: $field->id, + categoryId: $field->categoryId, + name: $field->name, + label: $field->label, + type: $field->type, + ))->searchable(false), + ); + + // Untouched: pre-existing FTS row still has the old body. + self::assertNotEmpty($this->search->search('WILL_BE_HIDDEN')); + + // Rebuild reconciles it. + $this->search->rebuild(); + self::assertSame([], $this->search->search('WILL_BE_HIDDEN')); + } + + /** + * Verifies migration 0005: existing fields rows with `searchable = 0` + * get promoted to `searchable = 1` iff their type is prose-typed. + * Other types stay at 0. Anything already at 1 is left alone. + */ + public function testMigration0005PromotesProseTypedRowsOnly(): void + { + // SqliteStorageFactory has already applied 0005. The migration is + // a one-time UPDATE; re-running it directly is idempotent, which + // is what we want: insert seed rows with searchable=0 directly + // (bypassing the factory smart defaults), re-execute the file, + // assert the promotion happened. + $cat = $this->storage->categories()->save(new Category(null, 'Mixed', 'mixed')); + \assert($cat->id !== null); + + $insert = $this->pdo->prepare( + 'INSERT INTO fields (category_id, name, label, type, position, ' + . 'required, indexed, searchable, config, created, updated) ' + . 'VALUES (:cid, :name, :label, :type, 0, 0, 0, 0, \'{}\', 0, 0)', + ); + $cases = [ + ['text', 'a_text', 'A'], + ['longtext', 'a_long', 'B'], + ['editor', 'a_editor', 'C'], + ['slug', 'a_slug', 'D'], + ['password', 'a_pw', 'E'], + ['integer', 'a_int', 'F'], + ['fileupload', 'a_file', 'G'], + ['datepicker', 'a_date', 'H'], + ]; + foreach ($cases as [$type, $name, $label]) { + $insert->execute([ + ':cid' => $cat->id, ':name' => $name, ':label' => $label, ':type' => $type, + ]); + } + + $sql = (string) file_get_contents( + SqliteStorageFactory::schemaDir() . '/0005_searchable_defaults.sql', + ); + $this->pdo->exec($sql); + + $rows = $this->pdo->query( + 'SELECT name, type, searchable FROM fields WHERE category_id = ' . $cat->id . ' ORDER BY name', + ); + \assert($rows !== false); + + $byName = []; + foreach ($rows->fetchAll(\PDO::FETCH_ASSOC) as $row) { + $byName[(string) $row['name']] = (int) $row['searchable']; + } + + self::assertSame(1, $byName['a_text']); + self::assertSame(1, $byName['a_long']); + self::assertSame(1, $byName['a_editor']); + self::assertSame(1, $byName['a_slug']); + self::assertSame(0, $byName['a_pw']); + self::assertSame(0, $byName['a_int']); + self::assertSame(0, $byName['a_file']); + self::assertSame(0, $byName['a_date']); + } + + public function testLegacyConstructorWithoutFieldRepoIndexesEverything(): void + { + // 2.0/2.1 callers that constructed SqliteItemRepository directly + // with the 2-arg signature must keep getting "index everything" + // behavior, plus a deprecation notice once per process. + $items = new SqliteItemRepository($this->pdo, new NullEventDispatcher()); + + $cat = $this->storage->categories()->save(new Category(null, 'Legacy', 'legacy')); + \assert($cat->id !== null); + // No fields declared; legacy code path indexes the raw data blob. + + $deprecationSeen = false; + $previousHandler = set_error_handler( + static function (int $errno) use (&$deprecationSeen): bool { + if ($errno === \E_USER_DEPRECATED) { + $deprecationSeen = true; + return true; + } + return false; + }, + \E_USER_DEPRECATED, + ); + + try { + $items->save(new Item( + id: null, + categoryId: $cat->id, + name: 'legacy-1', + label: 'Legacy', + data: ['undeclared' => 'LEGACY_TOKEN'], + )); + } finally { + restore_error_handler(); + unset($previousHandler); + } + + self::assertTrue($deprecationSeen, 'expected E_USER_DEPRECATED notice'); + self::assertNotEmpty($this->search->search('LEGACY_TOKEN')); + } +}