diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..2de711a --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @Esity @LegionIO/core diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..79ea87c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,18 @@ +version: 2 +updates: + - package-ecosystem: bundler + directory: / + schedule: + interval: weekly + day: monday + open-pull-requests-limit: 5 + labels: + - "type:dependencies" + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + day: monday + open-pull-requests-limit: 5 + labels: + - "type:dependencies" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..cc24499 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,93 @@ +name: CI +on: + push: + branches: [main] + pull_request: + schedule: + - cron: '0 9 * * 1' + +permissions: {} + +jobs: + ci: + permissions: + contents: read + uses: LegionIO/.github/.github/workflows/ci.yml@main + + ci-postgres: + name: "RSpec (PostgreSQL)" + permissions: + contents: read + timeout-minutes: 15 + runs-on: ubuntu-latest + services: + postgres: + image: postgres:16-alpine + env: + POSTGRES_USER: legion + POSTGRES_PASSWORD: legion + POSTGRES_DB: legionio + ports: + - 5432:5432 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - uses: actions/checkout@v4 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.4' + bundler-cache: true + - name: Run RSpec (PostgreSQL adapter) + env: + LEGION_DATA_ADAPTER: postgres + LEGION_DATA_HOST: 127.0.0.1 + LEGION_DATA_PORT: 5432 + LEGION_DATA_USER: legion + LEGION_DATA_PASSWORD: legion + LEGION_DATA_DATABASE: legionio + run: bundle exec rspec + + lint: + permissions: + checks: write + contents: read + pull-requests: read + uses: LegionIO/.github/.github/workflows/lint-patterns.yml@main + + security: + permissions: + contents: read + security-events: write + uses: LegionIO/.github/.github/workflows/security-scan.yml@main + + version-changelog: + permissions: + contents: read + pull-requests: read + uses: LegionIO/.github/.github/workflows/version-changelog.yml@main + + dependency-review: + permissions: + contents: read + pull-requests: write + uses: LegionIO/.github/.github/workflows/dependency-review.yml@main + + stale: + if: github.event_name == 'schedule' + permissions: + issues: write + pull-requests: write + uses: LegionIO/.github/.github/workflows/stale.yml@main + + release: + needs: [ci, ci-postgres, lint] + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + permissions: + contents: write + packages: write + uses: LegionIO/.github/.github/workflows/release.yml@main + secrets: + rubygems-api-key: ${{ secrets.RUBYGEMS_API_KEY }} diff --git a/.github/workflows/rubocop-analysis.yml b/.github/workflows/rubocop-analysis.yml deleted file mode 100644 index 0a07e18..0000000 --- a/.github/workflows/rubocop-analysis.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Rubocop -on: [push, pull_request] -jobs: - rubocop: - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - ruby: [2.7] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v2 - - uses: ruby/setup-ruby@v1 - with: - ruby-version: ${{ matrix.ruby }} - bundler-cache: true - - name: Install Rubocop - run: gem install rubocop code-scanning-rubocop - - name: Rubocop run --no-doc - run: | - bash -c " - rubocop --require code_scanning --format CodeScanning::SarifFormatter -o rubocop.sarif - [[ $? -ne 2 ]] - " - - name: Upload Sarif output - uses: github/codeql-action/upload-sarif@v1 - with: - sarif_file: rubocop.sarif \ No newline at end of file diff --git a/.github/workflows/sourcehawk-scan.yml b/.github/workflows/sourcehawk-scan.yml deleted file mode 100644 index 72a2af8..0000000 --- a/.github/workflows/sourcehawk-scan.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: Sourcehawk Scan -on: - push: - branches: - - main - - master - pull_request: - branches: - - main - - master -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Sourcehawk Scan - uses: optum/sourcehawk-scan-github-action@main - - - diff --git a/.gitignore b/.gitignore index 54781f1..5d5e2e0 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,12 @@ # rspec failure tracking .rspec_status legionio.key +# logs and OS artifacts +legion.log +.DS_Store +# gem build artifacts +*.gem +# SQLite database files +*.db +.worktrees +/docs/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..4e23453 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,29 @@ +# Standard LegionIO pre-commit configuration +# Install: pre-commit install +# Manual: pre-commit run --all-files +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-json + exclude: Gemfile\.lock + - id: check-merge-conflict + + - repo: local + hooks: + - id: rubocop + name: RuboCop (autofix) + entry: scripts/pre-commit-rubocop.sh + language: script + types: [ruby] + pass_filenames: true + + - id: ruby-syntax + name: Ruby syntax check + entry: bash -c 'status=0; for file in "$@"; do ruby -c "$file" || status=$?; done; exit $status' -- + language: system + types: [ruby] + pass_filenames: true diff --git a/.rubocop.yml b/.rubocop.yml index a23bdf8..5de70dc 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,22 +1,56 @@ +inherit_gem: + rubocop-legion: config/core.yml +AllCops: + TargetRubyVersion: 3.4 + NewCops: enable + SuggestExtensions: false Layout/LineLength: - Max: 120 - Exclude: - - 'lib/legion/data/migrations/*.rb' + Max: 160 +Layout/SpaceAroundEqualsInParameterDefault: + EnforcedStyle: space +Layout/HashAlignment: + EnforcedHashRocketStyle: table + EnforcedColonStyle: table Metrics/MethodLength: - Max: 30 + Max: 50 Metrics/ClassLength: Max: 1500 -Metrics/AbcSize: - Max: 34 +Metrics/ModuleLength: + Max: 1500 +Naming/VariableNumber: + Exclude: + - 'spec/**/*' + - lib/legion/data/connection.rb +Legion/Framework/EagerSequelModel: + Enabled: false Metrics/BlockLength: - Max: 50 + Max: 100 Exclude: + - 'spec/**/*' - 'lib/legion/data/migrations/*' +ThreadSafety/ClassInstanceVariable: + Enabled: false +ThreadSafety/ClassAndModuleAttributes: + Enabled: false +Metrics/AbcSize: + Max: 60 + Exclude: + - 'spec/**/*' +Metrics/CyclomaticComplexity: + Max: 15 + Exclude: + - 'spec/**/*' +Metrics/PerceivedComplexity: + Max: 17 + Exclude: + - 'spec/**/*' + Style/Documentation: Enabled: false -AllCops: - TargetRubyVersion: 2.5 - NewCops: enable - SuggestExtensions: false +Style/SymbolArray: + Enabled: true Style/FrozenStringLiteralComment: + Enabled: true + EnforcedStyle: always +Naming/PredicateMethod: Enabled: false diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..12ce9c6 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,77 @@ +Always run a full `bundle exec rspec` and `bundle exec rubocop -A` and fix all errors before committing. + +# AGENTS.md - legion-data + +## Repo Role + +`legion-data` owns persistent storage for LegionIO. Keep this repo focused on database connectivity, Sequel migrations, Sequel models, local SQLite state, extraction persistence, audit/governance storage, identity/RBAC storage, Apollo storage, and the LLM lifecycle ledger. + +HTTP APIs, runtime orchestration, extension behavior, and UI concerns belong in their owning repos. This repo should expose clean model contracts that those layers can call. + +## Required Commands + +Run from the repo root: + +```bash +bundle exec rubocop -A +bundle exec rspec --format json --out tmp/rspec_results.json --format progress --out tmp/rspec_progress.txt +``` + +If RSpec fails, extract failures with: + +```bash +jq '[.examples[] | select(.status != "passed") | {file_path, line_number, full_description, status, exception: .exception}]' tmp/rspec_results.json > tmp/rspec_failures.json +``` + +Do not run partial RSpec or partial RuboCop for release validation. + +## Migration Rules + +- Never edit published migrations. Add a new migration instead. +- Do not guard migrations with `create_table?`, `drop_table?`, `table_exists?`, `if_exists`, `if_not_exists`, `next if`, or `next unless`. +- Keep migrations split by domain and dependency. Do not hide a whole schema rewrite in one large migration. +- Use portable Sequel DSL by default. Adapter-specific code is acceptable only for adapter-specific features, such as PostgreSQL vector columns. +- Prefer `id` integer primary keys for joins and `uuid` public identifiers for APIs, logs, and external references. +- Avoid JSON columns unless the data is genuinely dynamic provider evidence or cannot be normalized without losing meaning. + +## Sequel Association Rules + +Use the official Sequel association APIs as the model contract: + +- Association API reference: https://sequel.jeremyevans.net/rdoc/classes/Sequel/Model/Associations/ClassMethods.html +- Association basics: https://github.com/jeremyevans/sequel/blob/master/doc/association_basics.rdoc + +Required mapping: + +| Schema shape | Sequel association | +|--------------|--------------------| +| This table has the foreign key | `many_to_one` | +| Other table has the foreign key | `one_to_many` or `one_to_one` | +| Join table connects both sides | `many_to_many` | +| One associated row through a join table | `one_through_one` | + +Rules: + +- Define associations for real foreign-key relationships when adding or changing models. +- Prefer association methods and association datasets over ad hoc `where(foreign_key: ...)` lookups in model helpers. +- When names are not inferable, explicitly set `:class`, `:key`, `:primary_key`, `:join_table`, `:left_key`, and `:right_key`. +- Do not create association names that collide with actual column names; Sequel creates methods using the association name. +- Keep namespace models aligned with API/domain shape, for example `Legion::Data::Model::Identity::*`, `LLM::*`, `Apollo::*`, and `RBAC::*`. + +## Current Schema Notes + +- Migrations currently run through `096`. +- `074`-`076` are mainline Apollo/task/extract migrations. +- `077`-`090` define the LLM lifecycle ledger. +- `091`-`096` define portable identity companion tables. +- Published PostgreSQL identity migrations remain in place; portable identity tables are additive. + +## Release Hygiene + +For behavior, model, migration, or Ruby code changes: + +- Update `lib/legion/data/version.rb`. +- Update `CHANGELOG.md`. +- Update `README.md` when public behavior, schema, configuration, or model surface changes. +- Keep `.gitignore` ignoring `/Gemfile.lock` and `*.gem`. +- Do not include generated DBs, logs, coverage output, built gems, or repo-external `/docs` workspace files in commits. diff --git a/CHANGELOG.md b/CHANGELOG.md index 9781de4..da202db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,706 @@ # Legion::Data Changelog +## [1.10.5] - 2026-06-16 + +### Added +- Migration 135: adds context token accounting columns to `llm_message_inference_metrics` — `llm_message_inference_metrics` is now the canonical source of truth for all pipeline context token metrics (request messages, loaded/curated/archived history, thinking strip savings, context-window enforcement savings, RAG injection, system/baseline prompt, tool definitions, final context estimate). Includes `context_accounting_status` and `context_accounting_json` for provenance. +- Migration 135: creates `llm_context_accounting_events` table for drill-down evidence rows (not a second source of token truth — totals reconcile to the canonical metrics row). +- Model: `Legion::Data::Models::LLM::ContextAccountingEvent` with foreign key associations to request, response, and metric. +- Association: `MessageInferenceMetric#context_accounting_events`. + +## [1.10.4] - 2026-06-12 + +### Added +- Migration 134: adds `operation` (String 64), `dispatch_path` (String 32), and `idempotency_key` (String 128) columns to `llm_route_attempts` with indexes on `operation` and `idempotency_key`. Enables per-attempt tracking of the LLM operation type, routing path, and deduplication key. + +## [1.10.3] - 2026-06-10 + +### Changed +- `connection_validation` now defaults to `false` — the Sequel connection_validator extension issues a `SELECT NULL` on every pool checkout/checkin and before real queries, which measurably degrades throughput; query-time error handling already recovers stale/dead connections. Set `connection_validation: true` in settings to opt back in (timeout semantics unchanged: `-1` validates every checkout) +- Pool timeout assignments read `connection_validation_timeout` / `connection_expiration_timeout` directly from settings — removed inline `|| 600` / `|| 14_400` shadow defaults that disagreed with the documented settings defaults + +## [1.10.2] - 2026-06-02 + +### Fixed +- Replace `return` with `next` in migration blocks — Sequel uses `instance_exec`, bare `return` raises `LocalJumpError` (migrations 019, 044, 045, 046, 118, 120) +- Make migrations 118-130 idempotent with schema column checks for safe re-run after partial failures +- Restore migration 131 (`add_column :schema_version` to `llm_tool_calls`) with idempotent guard — preserves contiguous migration sequence for existing installations + +### Added +- Migration 132: drops unused `schema_version` column from `llm_tool_calls` (no code reads/writes it) +- Migration 133: allows NULL on `context_tokens` in `llm_message_inference_requests` — prevents NOT NULL violations when token counts are unavailable + +## [1.10.1] - 2026-06-01 + +### Added + +- Migration 130: adds `pii_types_json` (TEXT) and `jurisdictions_json` (TEXT) to `llm_conversations`. Required by lex-llm-ledger OfficialRecordWriter for compliance metadata. + +### Removed + +- `schema_version` column removed from lex-llm-ledger writer — no longer written to any table. Column remains on `llm_skill_events` (migration 129) but is not actively populated. + +## [1.10.0] - 2026-06-01 + +### Added + +- Migration 129: creates `llm_skill_events` table as a core LLM lifecycle table (moved from lex-llm-ledger extension). Columns: `uuid`, `conversation_id`, `request_ref`, `skill_name`, `skill_version`, `trigger`, `status`, `duration_ms`, `identity_canonical_name`, `identity_principal_id`, `identity_id`, `schema_version`, `recorded_at`, `inserted_at`. Indexes on `conversation_id`, `request_ref`, `skill_name`, `identity_canonical_name`, `recorded_at`, `inserted_at`. + +## [1.9.0] - 2026-06-01 + +### Added + +- Migration 123: audit columns on `llm_tool_calls` — `tool_arguments_json` (TEXT), `tool_result_json` (TEXT), `tool_category` (String(64)), `data_handling_classification` (String(32)), `policy_decision` (String(32)), `requires_human_approval` (Boolean) plus indexes on `tool_category`, `data_handling_classification`, `policy_decision`. +- Migration 124: audit columns on `llm_tool_call_attempts` — `attempt_input_json` (TEXT), `attempt_output_json` (TEXT), `error_details_json` (TEXT). +- Migration 125: audit columns on `llm_escalation_events` — `history_json` (TEXT), `outcome` (String(32)), `total_attempts` (Integer) plus index on `outcome`. +- Migration 126: audit columns on `llm_message_inference_responses` — `route_attempts` (Integer, default 0), `escalation_chain_ref` (String(128)) plus index on `escalation_chain_ref`. Skips `response_content_hash` (already exists since migration 080). +- Migration 127: audit columns on `llm_message_inference_requests` — `parent_request_id` (Integer, self-referencing FK on_delete: :set_null). Skips `request_content_hash`, `curation_strategy`, `tool_policy` (all already exist since migration 079). + +## [1.8.9] - 2026-05-26 + +### Changed +- Migration 116: make `llm_tool_calls.message_inference_response_id` nullable and drop composite unique index on `[message_inference_response_id, tool_call_index]`. Eliminates 30-40% dead-letter rate on tool audit messages caused by AMQP race between response and tool call writers. +- Migration 117: add nullable `conversation_id` FK to `llm_tool_calls` referencing `llm_conversations`, so tool call rows can track their conversation even when the response row hasn't been written yet. +- Add `many_to_one :conversation` association to `LLM::ToolCall` model. + +## [1.8.8] - 2026-05-20 + +### Added +- `Legion::Data::Connection.reconnect_with_fresh_creds` — updates Sequel's internal connection opts with fresh credentials from `Legion::Settings[:data][:creds]` and reconnects the pool. Called by `LeaseManager#trigger_postgresql_reconnect` after Vault dynamic PostgreSQL lease rotation. + +### Fixed +- Vault dynamic PostgreSQL credential rotation: after lease expiry, connections would fail with `PG::ConnectionBad: role "v-legionio-node-..." does not exist` because Sequel retained the original (revoked) credentials in `@opts`. The legacy fallback (`disconnect` + `test_connection`) was insufficient since it doesn't update stored credentials. + +## [1.8.7] - 2026-05-17 + +### Added +- Migrations 103-114: adds standardized identity columns (`access_scope`, `identity_principal_id`, `identity_id`, `identity_canonical_name`) to all 12 LLM lifecycle tables. Tables that already carried identity columns under legacy names (`principal_id`/`identity_id` on `llm_conversations`, `caller_principal_id`/`caller_identity_id` on `llm_message_inference_requests`) receive only `access_scope` and `identity_canonical_name` — existing columns are not renamed. Each table is its own migration with full `access_scope` index and partial `identity_principal_id` index. + +## [1.8.6] - 2026-05-15 + +### Added +- Migration 100: creates `apollo_entries` and `apollo_entries_archive` tables on non-Postgres adapters (SQLite etc.), bringing them to parity with the existing Postgres schema. +- Migration 101: adds `access_scope VARCHAR(20) NOT NULL DEFAULT 'global'`, `identity_principal_id INTEGER`, `identity_id INTEGER`, `identity_canonical_name VARCHAR(255)` columns on both `apollo_entries` and `apollo_entries_archive` across all adapters. Existing rows default to `global` access scope. +- Migration 102: adds indexes on `apollo_entries` — full index on `access_scope`, partial indexes on `identity_principal_id` and `identity_id` (WHERE NOT NULL) across all adapters. + +## [1.8.5] - 2026-05-09 + +### Removed +- Unnecessary `defined?(Legion::Logging)` guards from connection and local database setup — legion-logging is a hard gemspec dependency and always available + +## [1.8.4] - 2026-05-08 + +### Removed +- Dropped legacy postgres-only identity tables (`principals`, `identity_providers`, `identities`, `identity_groups`, `identity_group_memberships`, `identity_audit_log`) via migration 098. +- Removed `table_available?` guards from all identity model files — models load unconditionally. + +### Changed +- Renamed `portable_identity_*` tables to canonical names (`identity_principals`, `identity_providers`, `identities`, `identity_groups`, `identity_group_memberships`, `identity_audit_log`, `identity_provider_capabilities`) via migration 099. +- Updated all identity models to reference the new table names. + +## [1.8.3] - 2026-05-07 + +### Removed +- Legacy top-level identity model files (`identity.rb`, `principal.rb`, `identity_provider.rb`, `identity_group.rb`, `identity_group_membership.rb`, `identity_audit_log.rb`) — superseded by the portable `identity/` namespace models backed by `portable_*` tables. + +### Fixed +- `TypeError: superclass mismatch for class Identity` on postgres startup caused by `model_helpers.rb` defining `class Identity` as a plain namespace before `identity.rb` tried to reopen it as `< Sequel::Model(:identities)`. + +## [1.8.2] - 2026-05-07 + +### Changed +- Refactored `Legion::Data.setup` to call `setup_global`, `setup_cache`, then `setup_local` in explicit order — eliminates the `ensure setup_local` footgun that ran local SQLite even when global setup failed. +- Extracted `setup_global` (connection + migrate + load_models) and promoted `setup_local` and `setup_cache` to top-level public methods with their own `rescue` blocks (`fatal` for local/global, `error` for cache). +- SQLite main database now resolves to `~/.legionio/data/legionio.db` instead of a relative path in the process working directory; existing absolute path overrides in settings are unchanged. + +## [1.8.1] - 2026-05-07 + +### Fixed +- `TypeError: superclass mismatch for class Identity` on startup: moved `require_relative 'identity/model_helpers'` inside the `Identity < Sequel::Model(:identities)` class body so the Sequel superclass is established before `model_helpers.rb` reopens the constant. + +## [1.8.0] - 2026-05-06 + +### Added +- Migration 097 adds official LLM dispatch fields for fleet operation, correlation, idempotency, provider instance, and dispatch path tracking. + +### Changed +- LLM lifecycle Sequel models now live under `Legion::Data::Models::LLM` to match the official data model namespace. + +## [1.7.5] - 2026-05-05 + +### Added +- Migrations 077-090: portable LLM lifecycle schema covering conversations, messages, message inference requests/responses, route attempts, inference metrics, provider-requested tool calls, tool call attempts, conversation compactions, policy evaluations, security events, and registry events. +- Migrations 091-096: portable identity companion schema with integer primary keys, public UUIDs, normalized provider capabilities, principals, identities, groups, memberships, and audit events. +- Sequel models and associations for the new `Legion::Data::Models::LLM` lifecycle tables. +- Nested Sequel model namespaces for Identity, Apollo, and RBAC tables. +- Lookup helpers for nested and legacy identity models. +- LLM reconstruction query helpers for audit lineage, finance rollups, security incident lineage, and message-to-tool incident flow. +- Additional Sequel associations for core execution and identity models, including function/task, relationship/chain, task/worker, task log aliases, and principal/group many-to-many membership helpers. + +## [1.7.4] - 2026-04-28 + +### Fixed +- Pre-commit RuboCop hook now distinguishes missing tools from real RuboCop failures and propagates failures instead of silently passing. +- Ruby syntax pre-commit hook now checks every staged Ruby file instead of only the first argument. +- Connection setup now refreshes the configured adapter before each setup call and clears fallback state on shutdown so fallback health checks do not stay stale across reconnects. + +### Changed +- README refreshed for the current migration count, version line, fallback diagnostics, pre-commit workflow, and recent model surface. + +## [1.7.3] - 2026-04-27 + +### Added +- Migration 074: widens Apollo `content_hash` to 64 fixed characters and `knowledge_domain` / `source_provider` / `source_agent` to 255 characters so SHA-256 hashes and real-world identifiers fit without ingestion truncation failures. (Fixes #33, #34) +- Migration 075: adds task `idempotency_key` and `idempotency_expires_at` columns plus indexes for SHA-256 payload deduplication windows. (Fixes #14) +- Migration 076: adds `extract_step_timings` for per-step Extract pipeline timing visibility. (Fixes #15) +- `Task.idempotency_key_for`, `Task.find_active_by_idempotency_key`, and `Task.create_idempotent` for stable content-addressed task dispatch deduplication. (Fixes #14) +- Extract results now include `extract_id` and `step_timings`, and persist timing rows when the migration is present. (Fixes #15) +- `AuditLogHashChain` plus `AuditLog.compute_hash` / `AuditLog.verify_chain` as the canonical data-side audit log hash-chain implementation for standard write paths to share. (Refs #13) + +### Fixed +- Migration 051 now adds SQLite/MySQL `tasks.created_at` without a non-constant default before backfilling from `created`, allowing later migration specs and fresh SQLite databases to migrate cleanly. + +## [1.7.2] - 2026-04-27 + +### Fixed +- Dev-fallback to SQLite now logs at `:error` level with explicit warnings that data written to SQLite will not be visible when the configured network database reconnects. + +### Added +- `Connection.connection_info` — returns adapter, connection state, and fallback status for health checks and diagnostics +- `Connection.fallback_active?` — returns true when the data layer fell back to SQLite from a configured network database; Apollo and other services can check this to detect degraded mode and log appropriate warnings + +## [1.7.1] - 2026-04-27 + +### Fixed +- `QueryFileLogger` now treats writes after `close` as no-ops, preventing repeated `IOError: closed stream` warnings from late Sequel query callbacks during shutdown. (Fixes #35) + +## [1.7.0] - 2026-04-24 + +### Added +- Migration 072: `identity_audit_log` table (Postgres-only) with indexes +- Migration 073: `employee_id` on principals, `account_type`/`qualifier`/`is_default`/`link_evidence` on identities, partial unique index for one-default-per-provider +- `IdentityAuditLog` model added to model loader + +## [1.6.30] - 2026-04-22 + +### Fixed +- `Spool.extension_path` now accepts any module under `Legion::`, not just `Legion::Extensions::` — fixes `ArgumentError` when core gems like `legion-llm` spool events via `Spool.for(Legion::LLM)` + +## [1.6.29] - 2026-04-17 + +### Fixed +- `Connection#log_connection_info`: renamed local variables `user`/`host`/`port`/`db` to `conn_user`/`conn_host`/`conn_port`/`conn_db` to avoid shadowing outer-scope names and resolve `rb/uninitialized-local-variable` CodeQL alert +- CI workflow: added explicit `permissions:` block to all jobs (`contents: read` for checkout jobs, `{}` for reusable workflow calls) to satisfy `actions/missing-workflow-permissions` code scanning alerts +- Spec: replaced deprecated `raise_exception` matcher with `raise_error` in `connection_spec.rb` and `model_spec.rb`; updated stale test description in `model_spec.rb` + +## [1.6.28] - 2026-04-17 + +### Changed +- `legion-json` added as explicit gemspec runtime dependency — `Legion::JSON` is used throughout and was previously only an implicit transitive dependency +- Rewrote `README.md` with accurate architecture diagram, full model table, migration history, configuration reference, and usage examples +- Updated `CLAUDE.md` with mandatory `bundle exec rspec` + `bundle exec rubocop -A` reminder for AI agents +- Added `AGENTS.md` with mandatory rspec/rubocop reminder and gem overview +- Updated `.github/CODEOWNERS` to `@Esity @LegionIO/core` +- Added `*.gem` to `.gitignore` to prevent build artifacts from being committed +- Removed `sonar-project.properties` + +## [1.6.27] - 2026-04-17 + +### Fixed +- `load_sequel_model` now reads `settings[:models][:continue_on_load_fail]` (was erroneously reading `continue_on_fail`, a key that never existed — LoadError was always re-raised regardless of the setting). (Fixes #22) +- `load_models` now skips model loading when `settings[:models][:autoload]` is `false`, honoring the documented knob. (Fixes #22) +- Audit record live-path specs (`append`, `verify`, `walk`, `query_by_type`, immutability guards) are now self-sufficient: the `before` block reconnects the DB if a prior spec tore it down, eliminating 19 pending examples when running the full suite. (Fixes #22) +- Default `preconnect` changed from `'concurrently'` to `false`. The concurrent preconnect mode spawned background threads that emitted noisy connection errors when a network adapter was unreachable before dev-fallback could catch the failure. `false` preserves identical behavior for SQLite (default) and avoids the noise for production deployments where operators can opt-in explicitly. (Fixes #22) + +## [1.6.26] - 2026-04-17 + +### Fixed +- `connection_validation_timeout` default reduced from 600s to -1 (validate every checkout) for non-SQLite adapters. The previous 10-minute window meant stale PG connections from a VPN drop, sleep/wake, or network interface change were not evicted until the next scheduled validation cycle, causing `Sequel::DatabaseDisconnectError` to repeat on every actor tick. With -1, Sequel pings the connection on every pool checkout and discards dead connections immediately. (Fixes #28) + +## [1.6.24] - 2026-04-13 + +### Added +- Migration 070: `resume_routing_key` and `resume_exchange` columns on `approval_queue` table (nullable String 255) to support fleet pipeline resume on approval +- Migration 071: `engine` VARCHAR(50) NULL column on `relationships` table — enables fleet pipeline to store explicit transformer engine selection per relationship + +## [1.6.23] - 2026-04-07 + +### Fixed +- Migration 067: parenthesize CASE expression in `idx_memberships_trust_tiebreak` index (PG syntax error) + +## [1.6.22] - 2026-04-06 + +### Added +- Migration 063: `identity_providers` table (provider_type, facing, priority, trust_weight, capabilities) +- Migration 064: `principals` table (canonical_name regex constraint, kind, unique composite) +- Migration 065: `identities` table (principal/provider FKs, partial unique index on active) +- Migration 066: `identity_groups` table (source: ldap/entra/manual) +- Migration 067: `identity_group_memberships` table (status, trust_weight, discovered_by, tie-break index) +- Migration 068: `entity_type` column on `audit_records` with index +- Migration 069: `principal_id` FK on `nodes` table +- 5 Sequel models: `IdentityProvider`, `Principal`, `Identity`, `IdentityGroup`, `IdentityGroupMembership` +- `Identity` model wired through `SequelPlugin` `encrypted_column :profile` for at-rest encryption +- `Node` model gains `many_to_one :principal` association + +### Changed +- Migration mode gate: only `:infra` mode runs migrations when `Legion::Mode` is available +- `auto_migrate` settings check wired into `Data.setup` (skips migrations when `auto_migrate: false`) +- Mode guard added to both `Data.migrate` and `Migration.migrate` for defense-in-depth + +## [1.6.21] - 2026-04-05 + +### Added +- Migration 062: `tool_embedding_cache` table for global embedding persistence + +## [1.6.20] - 2026-04-03 + +### Fixed +- Local SQLite now uses WAL journal mode, 30s busy_timeout, and synchronous=NORMAL to reduce write contention +- Local SQLite path resolved to `~/.legionio/` absolute path instead of using relative CWD + +## [1.6.19] - 2026-04-02 + +### Changed +- Logging uplift across non-API `lib/` modules to use `Legion::Logging::Helper` and `log.*` instead of direct `Legion::Logging.*` calls +- Removed direct `log_info` / `log_warn` wrapper usage in partition management and aligned logging with helper-backed tagged loggers +- Added broader info-level operational logs for archival, retention, spool, extract, storage-tier, and partition workflows + +### Fixed +- Added `handle_exception(...)` coverage to rescue paths across non-API data modules so failures are logged consistently without changing existing fallback behavior +- Added compatibility fallback for `handle_exception` when older `legion-logging` releases are present in the runtime +- Included `metadata_json` in EventStore integrity hashes for new events while preserving verification compatibility for legacy rows +- Fixed encrypted Sequel columns to re-encrypt newly-created rows with their persisted primary key and maintain legacy read compatibility +- Hardened spool persistence with atomic writes, deterministic replay ordering, and corrupt-file quarantine during read/flush +- Updated partition manager specs to assert against helper-backed logger behavior + +## [1.6.18] - 2026-03-30 + +### Added +- Migration 061: versioning and expiry columns on `apollo_entries` — `parent_knowledge_id` (UUID), `is_latest` (boolean, default true), `supersession_type` (VARCHAR 20), `expires_at` (timestamptz), `forget_reason` (VARCHAR 255), `is_inference` (boolean, default false) — postgres only +- Migration 061: 4 named indexes including partial indexes: `idx_apollo_parent_knowledge`, `idx_apollo_version_chain` (partial WHERE is_latest), `idx_apollo_expiry` (partial WHERE expires_at IS NOT NULL), `idx_apollo_inference` (partial WHERE is_inference) +- Spec for migration 061 covering column presence, types, nullability, defaults, all 4 indexes, and idempotency + +## [1.6.17] - 2026-03-30 + +### Added +- Migration 060: L0/L1 summary columns on `apollo_entries` (`summary_l0` VARCHAR 500, `summary_l1` TEXT, `knowledge_tier` VARCHAR 4 default 'L2', `parent_entry_id` UUID, `l0_generated_at` timestamptz, `l1_generated_at` timestamptz) — postgres only +- Migration 060: named indexes `idx_apollo_knowledge_tier` and `idx_apollo_parent_entry` on `apollo_entries` +- Spec for migration 060 covering column presence, types, nullability, defaults, indexes, and idempotency + +## [1.6.16] - 2026-03-30 + +### Fixed +- Migration 019: widen `record_hash` column to size 255 via `set_column_type` (column added in migration 017) +- Migration 019: rename `prev_hash` to `previous_hash` via `rename_column` instead of adding a duplicate column +- Migration 019: decouple index creation from column existence checks so indexes are always guarded by their own `idxs.key?` check +- Migration 019: `down` no longer drops `record_hash` (owned by migration 017, not 019) +- Migration 019: replace `db.indexes` with bare `indexes()` — inside a `Sequel.migration` block `self` is the DB object, so `db` is undefined +- Updated to rubocop-legion (`inherit_gem: config/core.yml`) for shared LegionIO cop configuration + +### Added +- Migration 019 spec: 8 examples covering column presence, defaults, indexes, idempotency, and rollback + +## [1.6.15] - 2026-03-29 + +### Added +- `data_adapter` — current database adapter type (:sqlite, :mysql2, :postgres) +- `data_pool_stats` — connection pool metrics (size, available, in_use, waiting) +- `data_stats` — combined shared + local database statistics +- `local_data_stats` — local SQLite database statistics +- `data_can_read?(table_name)` — table read permission check +- `data_can_write?(table_name)` — table write permission check + +## [1.6.14] - 2026-03-29 + +### Added +- Migration 059: `chains` table (id, name, active, created, updated) for workflow bundle chain tracking +- `Legion::Data::Model::Chain` — Sequel model with `one_to_many :relationships` association + +## [1.6.13] - 2026-03-28 + +### Added +- `Legion::Data::AuditRecord` — tamper-evident audit record primitive with SHA-256 hash chain (closes #7) + - `append(chain_id:, content_type:, content_hash:, metadata: {}, sign: false)` — inserts a new record, linking it to the previous tail via `parent_hash` and `chain_hash` + - `verify(chain_id:)` — walks the chain and re-derives every hash, returning `{ valid:, length: }` or `{ valid: false, broken_at:, reason: }` on tampering + - `walk(chain_id:, since: nil, limit: 1000)` — return deserialized records in chronological order + - `query_by_type(content_type:, since: nil, limit: 100)` — cross-chain query by content_type + - `compute_chain_hash(parent_hash, content_hash, timestamp, content_type)` — public for independent verification + - Multiple independent chains share a single `audit_records` table, keyed by `chain_id` + - Chain hash formula: `SHA256("parent_hash:content_hash:unix_ns:content_type")` — timezone-independent via nanosecond epoch + - Optional signing via `Legion::Crypt.sign` when `sign: true`; signature column is nil when signing is unavailable +- Migration 058: `audit_records` table with `chain_id`, `content_type`, `content_hash`, `parent_hash`, `chain_hash` (unique), `signature`, `metadata`, `created_at`; PostgreSQL `NO UPDATE/DELETE` rules for DB-level append-only enforcement +- `Legion::Data::Model::AuditRecord` — Sequel model with `before_update`/`before_destroy` immutability guards and `parsed_metadata` helper +- 29 new specs covering constant, hash computation, DB-unavailable guards, chain creation, chain verification, tamper detection, walk/query operations, and model immutability + +## [1.6.12] - 2026-03-28 + +### Added +- VTT (WebVTT) extract handler for meeting transcript parsing (`Handlers::Vtt`) + - Parses speaker tags (``), timestamps, and WEBVTT header + - `preserve_speakers: true` (default) prefixes each line with speaker name + - Accepts inline VTT string content or a file path + - Returns `{ text:, metadata: { bytes:, speakers:, line_count: } }` +- `.vtt` extension registered in `TypeDetector::EXTENSION_MAP` (maps to `:vtt`) + +## [1.6.11] - 2026-03-28 + +### Added +- Migration 050: critical missing indexes across 13 tables (runners, tasks, digital_workers, audit_log, webhook_deliveries, webhook_dead_letters, conversations, approval_queue, rbac_role_assignments, rbac_cross_team_grants, memory_traces, agent_cluster_tasks, finlog_executions) +- Migration 051: fix tasks archival column mismatch — adds `created_at` column (PG: generated from `created`, SQLite/MySQL: backfilled) +- Migration 052: drop redundant Apollo indexes (PG only) — auto-named duplicates from migration 012 superseded by explicit indexes in migration 047 +- Migration 053: FK constraint for `tasks.relationship_id` (PG only) with orphan cleanup and ON DELETE SET NULL +- Migration 054: add `component_type` column to functions table (v3.0 naming convention — runner/hook/absorber) +- Migration 055: add nullable `definition` text column to functions table (v3.0 method contract storage) +- Migration 056: add `absorber_patterns` table for pattern-matched content acquisition (v3.0) +- Migration 057: add `routing_key` column to runners table (v3.0 AMQP routing key storage) + +### Fixed +- `Archival::Policy` now includes `DATE_COLUMN_OVERRIDES` map for legacy tables using non-standard date columns + +## [1.6.10] - 2026-03-28 + +### Added +- Migration 049: adds `remote_invocable` boolean column (default: true) to the `functions` table. Allows per-function control over whether a registered function can be dispatched remotely via AMQP from the `LexDispatch` API layer. + +## [1.6.9] - 2026-03-27 + +### Added +- Migration 048: financial logging schemas (7 tables) for UAIS cost recovery model + - `finlog_identities` — worker/owner identity for cost attribution (worker_id, owner_msid, cost_center, business_segment) + - `finlog_assets` — Entra app / service principal metadata (entra_app_id, asset_type, extension_name, risk_tier) + - `finlog_environments` — cloud/infrastructure environment context (csp, account_id, askid, region, environment) + - `finlog_accounting` — financial classification per execution (aide_id, ucmg_id, billing_group, classification, recovery_ratio, rate_card_multiplier, provider_discount, chargeback_amount) + - `finlog_executions` — per-request execution record / central fact table (worker_id, task_id, provider, model_id, tokens, costs, latency) + - `finlog_tags` — flexible key-value metadata tags per execution + - `finlog_usages` — aggregated consumption rollup (daily period, per worker/provider/model) + +## [1.6.8] - 2026-03-27 + +### Changed +- Documentation updates (CLAUDE.md, README.md) + +## [1.6.7] - 2026-03-26 + +### Removed +- Legacy Vault credential fetch in `Connection#creds_builder` — hardcoded `database/creds/legion` path removed. Database credentials are now exclusively managed by the LeaseManager via `lease://postgresql#username` / `lease://postgresql#password` URI references in data settings. + +## [1.6.6] - 2026-03-25 + +### Added +- `connected?` — returns true when the shared DB is connected (reads `Settings[:data][:connected]`) +- `can_write?(table_name)` — checks INSERT privilege; sqlite always returns true, postgres queries `has_table_privilege`, results cached per table +- `can_read?(table_name)` — checks SELECT privilege; sqlite always returns true, postgres queries `has_table_privilege`, results cached per table +- `reset_privileges!` — clears cached privilege results (used in tests and after re-connect) +- `Legion::Data::Extract` — file format extraction with handler registry +- Built-in handlers: text, markdown, csv, json, jsonl (no external gems required) +- Optional handlers: pdf (pdf-reader), docx (docx), pptx (rubyzip), xlsx (rubyXL), html (nokogiri) — lazy-loaded, degrade gracefully if gem not installed +- `Extract.register_handler(type, klass)` — register custom format handlers +- `Extract.can_extract?(type)` — check if a type can be extracted (handler present and gem available) +- `Extract.supported_types` — list all registered types +- Added `csv` gem dependency (Ruby 3.4 stdlib split) + +## [1.6.4] - 2026-03-25 + +### Added +- Migration 047: Apollo identity columns (submitted_by, submitted_from), content hash dedup, apollo_operations table, apollo_entries_archive table, comprehensive indexes including partial HNSW on active entries only + +## [1.6.2] - 2026-03-25 + +### Changed +- Migration 041: Resize all pgvector columns from `vector(1536)` to `vector(1024)` for cross-provider embedding compatibility (apollo_entries.embedding, functions.embedding_vector, memory_traces.embedding). Drops and recreates HNSW cosine indexes. + +## [1.6.1] - 2026-03-25 + +### Fixed +- Load Sequel `pg_array` extension on Postgres connections — required by Apollo for `text[]` column inserts + +## [1.6.0] - 2026-03-25 + +### Fixed +- **Connection pool starvation**: `max_connections`, `pool_timeout`, `preconnect`, and all other Sequel options were never forwarded to `Sequel.connect` — pool was stuck at Sequel's default of 4 connections regardless of settings. 5+ second "slow queries" in daemon logs were actually pool wait time (5s `pool_timeout`) + fast query (~19ms). Now all configured options flow through properly. +- **Local DB had same issue**: `Legion::Data::Local.setup` used bare `Sequel.sqlite(path)` with no options. Now forwards SQLite adapter options (`timeout`, `readonly`, `disable_dqs`) via `Sequel.connect`. + +### Changed +- **Flat settings structure**: all connection settings now live directly on `data.*` instead of nested `data.connection.*` or `data.adapter_opts.*`. Users configure `data.max_connections`, `data.pool_timeout`, `data.connect_timeout`, etc. regardless of adapter — legion-data figures out which options apply. +- Default `max_connections` raised from 10 to 25 (was never applied before anyway) +- Default `preconnect` set to `'concurrently'` (warm pool at boot) +- Default `pool_timeout` remains 5s (now actually enforced) +- Per-adapter defaults applied at connection time via `ADAPTER_DEFAULTS`: sqlite (`timeout: 5000`, `readonly: false`, `disable_dqs: true`), postgres (`connect_timeout: 20`, `sslmode: 'disable'`), mysql2 (`connect_timeout: 120`, `encoding: 'utf8mb4'`) +- Adapter-specific settings (`connect_timeout`, `read_timeout`, `write_timeout`, `encoding`, `sql_mode`, `sslmode`, `sslrootcert`, `search_path`, `timeout`, `readonly`, `disable_dqs`) default to nil in settings and resolve to adapter built-in defaults — only forwarded when the current adapter supports them + +### Added +- `GENERIC_KEYS`, `ADAPTER_KEYS`, `ADAPTER_DEFAULTS` constants on `Connection` for option whitelisting and defaults +- Connection health extensions (non-SQLite only): `connection_validator` (pings idle connections, default timeout 600s) and `connection_expiration` (retires old connections, default timeout 14400s) — both enabled by default via `data.connection_validation` and `data.connection_expiration` +- `Legion::Data::Connection.stats` — comprehensive connection metrics: pool stats (type, size, available, in_use, waiting), tuning snapshot, and adapter-specific database stats (postgres: `pg_stat_activity`, `pg_database_size`, server settings; sqlite: PRAGMAs, file size; mysql: `information_schema`, `SHOW STATUS`) +- `Legion::Data::Connection.pool_stats` — works across all Sequel pool types (`timed_queue`, `threaded`, `single`, sharded variants) +- `Legion::Data::Local.stats` — local SQLite metrics: PRAGMAs, file size, database size, registered migrations +- `Legion::Data.stats` — combined `{ shared: Connection.stats, local: Local.stats }` for `/api/stats` endpoint +- `data.query_log` flag (default `false`): when enabled, pipes ALL SQL queries to `~/.legionio/logs/data-shared-query.log` (shared) or `data-local-query.log` (local) via dedicated `QueryFileLogger` — isolated from the main `Legion::Logging` domain so debug query floods don't pollute application logs +- `Legion::Data::Connection::QueryFileLogger` — thread-safe file-based logger with timestamped entries, used by both shared and local query log modes +- `Legion::Data::Connection::SlowQueryLogger` — wraps tagged `Legion::Logging::Logger`, prefixes warn-level messages with `[slow-query]` +- `data.local.query_log` flag (default `false`): same as above but for the local SQLite connection +- **StaticCache infrastructure** for lookup models: `Legion::Data.setup_static_cache` applies `Sequel::Plugins::StaticCache` to `Extension`, `Runner`, `Function` — loads entire tables into frozen in-memory hashes for zero-DB-hit reads. Enabled via `data.cache.static_cache: true` (default `false`). +- `Legion::Data.reload_static_cache` — refreshes in-memory static cache after hot-loading new extensions +- **External cache infrastructure**: `Legion::Data.setup_external_cache` applies `Sequel::Plugins::Caching` to `Relationship` (ttl 10s), `Node` (ttl 10s), `Setting` (ttl configurable) via `Legion::Cache` backend. Activates when `data.cache.auto_enable` is true and `Legion::Cache` is loaded. +- `data.cache.static_cache` setting (default `false`) + +## [1.5.3] - 2026-03-25 + +### Added +- Migration 040: add indexes on tasks table for slow query optimization (`idx_tasks_created`, `idx_tasks_status_func_rel`) + +## [1.5.2] - 2026-03-24 + +### Fixed +- TLS spec mock `resolve` methods used `_port:` keyword which mismatched production `port:` call, causing `ArgumentError: unknown keyword: :port` on CI + +## [1.5.1] - 2026-03-24 + +### Changed +- `Legion::Data::Connection#merge_tls_creds` — now respects explicit `data.tls.enabled` flag; TLS opt-in only (no behavior change when flag is absent or false) + +### Added +- Migration 039: `audit_archive_manifests` table for tracking cold storage uploads (tier, storage_url, date range, entry count, SHA-256 checksum, hash chain anchors) +- `spec/legion/data/tls_spec.rb` — full coverage for merge_tls_creds feature flag behavior + +## [1.5.0] - 2026-03-24 + +### Fixed +- Slow query warnings now tagged with `[data][slow-query]` instead of bare timestamps +- SQL log output uses tagged Legion::Logging::Logger for consistent `[data]` prefix +- Fix Style/SymbolArray in conversations migration + +## [1.4.18] - 2026-03-23 + +### Fixed +- Fix extension migration timing: late `register_migrations` calls now run immediately if DB is connected +- Fix cross-extension schema_migrations conflicts with per-extension migration tables + +## [1.4.17] - 2026-03-22 + +### Added +- `Legion::Data::Helper` mixin module with data convenience methods for LEX extensions (data_path, data_class, models_class, data_connected?, data_connection, local_data_connected?, local_data_connection, local_data_model) + +### Fixed +- Add missing `require 'spec_helper'` in `helper_spec.rb` that caused `NameError: uninitialized constant Legion::Data::Helper` + +## [1.4.16] - 2026-03-22 + +### Changed +- Add version constraints to gemspec dependencies: `legion-logging >= 1.2.8`, `legion-settings >= 1.3.12` + +## [1.4.15] - 2026-03-22 + +### Changed +- Added `Legion::Logging` calls (guarded with `defined?`) to all previously silent rescue blocks +- `archival/policy.rb`: warn log on `Policy.from_settings` failure +- `archival.rb`: debug log on `db_ready?` failure +- `connection.rb`: debug log on `data_tls_settings` failure +- `event_store.rb`: debug log on `db_ready?` failure +- `models/audit_log.rb`: warn log on `parsed_detail` JSON parse failure +- `models/function.rb`: debug log on `embedding_vector` JSON parse failure +- `models/node.rb`: debug log on `parsed_metrics` and `parsed_hosted_worker_ids` JSON parse failures +- `partition_manager.rb`: warn log (via `log_warn`) on `partition_names_for` failure +- `storage_tiers.rb`: debug log on `count_tier` failure +- `vector.rb`: debug log on `available?` check failure + +## [1.4.14] - 2026-03-22 + +### Changed +- Boot connection log for non-SQLite adapters now includes username: `adapter://user@host:port/db` + +## [1.4.13] - 2026-03-22 + +### Added +- Comprehensive logging across data operations: connection lifecycle, archival, retention, storage tiers, event store, encryption key provider, spool drain, and vector search +- `Connection.setup`: `.info` on successful connect (adapter://host:port/db or SQLite path) +- `Connection.shutdown`: `.info` on disconnect +- `Connection.connect_with_replicas`: `.debug` with replica count +- `Data.setup`: `.info` on setup completion +- `Data.shutdown`: `.info` on shutdown +- `Archiver.archive_table`: `.info` on start and completion with table name and row count; `.warn` before re-raising S3/Azure upload failures +- `Archival.archive!`: `.info` with table, destination, cutoff, and dry_run flag; `.info` on restore with row count +- `Retention.archive_old_records`: `.info` with table name and archived row count +- `Retention.purge_expired_records`: `.info` with archive table name and purged row count +- `StorageTiers.archive_to_warm`: `.info` with table name and row count +- `StorageTiers.export_to_cold`: `.info` with exported row count +- `EventStore.append`: `.debug` with stream, event type, and sequence number +- `EventStore.verify_chain`: `.warn` when hash chain is broken, with stream and sequence number +- `Encryption::KeyProvider`: `.warn` on dev key fallback; `.debug` on Vault key derivation +- `Encryption::SequelPlugin`: `.warn` on decrypt failure before re-raise +- `Spool#write`: `.debug` with sub-namespace and filename +- `Spool#flush`: `.info` with sub-namespace and drained item count +- `Vector.ensure_extension!`: `.info` on successful pgvector setup +- `Vector.cosine_search` / `Vector.l2_search`: `.debug` with table, column, and limit + +## [1.4.12] - 2026-03-21 + +### Added +- Migration 035: apollo_entries source_channel column (postgres-only) +- Migration 036: audit_log context_snapshot column +- Migration 037: apollo_entries knowledge_domain column with index (postgres-only) + +## v1.4.11 + +### Added +- Read replica support: `read_replica_url` and `replicas` settings, `Connection.connect_with_replicas` via Sequel `server_block` extension, `read_server` and `replica_servers` class methods for read/write splitting +- `PartitionManager`: PostgreSQL range partitioning helper — `ensure_partitions`, `drop_old_partitions`, `list_partitions` for monthly table partitioning +- `Archiver`: cold storage archival pipeline — batch export to JSONL+gzip, SHA-256 manifest, pluggable upload backends (S3, Azure, local tmpdir) +- Migration 034: `archive_manifest` table (PostgreSQL only) for tracking archived batches +- Archival settings: `retention_days`, `batch_size`, `storage_backend` defaults +- 58 new specs (257 total, 0 failures) + +## v1.4.10 + +### Added +- TLS support for PostgreSQL connections: `sslmode`, `sslrootcert`, `sslcert`, `sslkey` +- TLS support for MySQL connections: `ssl_mode`, `sslca`, `sslcert`, `sslkey` +- `Connection.merge_tls_creds` resolves TLS config via `Legion::Crypt::TLS.resolve` +- SQLite connections skip TLS entirely (local file, no network) + +## v1.4.8 + +### Fixed +- Migration 033: adds `task_delay` column (Integer, nullable) to tasks table to resolve `PG::UndefinedColumn` error when lex-tasker queries `tasks.task_delay` + +## v1.4.7 + +### Added +- Migration 031: adds `depth` column (Integer, default 0) to tasks table for sub-agent recursion tracking +- Migration 032: adds `cancelled_at` column (DateTime, nullable) to tasks table for cancellation support +- `cancelled?` predicate method on Task model + +## v1.4.6 + +### Added +- Migration 028: agent_cluster_nodes table (stub for agent cluster support) +- Migration 029: agent_cluster_tasks table (stub for agent cluster task tracking) +- Migration 030: approval_queue table for governance board with status, requester, reviewer, and tenant filtering + +## v1.4.5 + +### Added +- Migration 027: add `source_provider` column to `apollo_entries` (postgres-only) + Tracks the LLM provider or data origin of each knowledge entry for source diversity + enforcement in Apollo corroboration + +## v1.4.4 + +### Added +- Migration 026: `description` (TEXT) and `embedding` (TEXT, JSON-serialized vector) columns on `functions` table +- Postgres-only: `embedding_vector vector(1536)` column with HNSW cosine index for semantic similarity search +- `Function#embedding_vector` / `Function#embedding_vector=` helper methods for JSON serialization + +## v1.4.3 + +### Added +- `Legion::Data::Spool`: filesystem-based event buffer at `~/.legionio/data/spool/` + +## v1.4.2 + +### Fixed +- Migration 015: use `create_table?` instead of `create_table` for idempotent RBAC table creation + +## v1.4.1 + +### Added +- Migration 025: tenants table (tenant_id, name, status, quotas, token limits) + +## v1.4.0 + +### Added +- `Legion::Data::Vector`: reusable pgvector helpers (available?, cosine_search, l2_search, ensure_extension!) +- `Legion::Data::StorageTiers`: hot/warm/cold archival lifecycle (archive_to_warm, export_to_cold, stats) +- Migration 022: memory_traces table with optional pgvector embedding column (1536-dim, HNSW index) +- Migration 023: data_archive table for generic storage tier archival +- Migration 024: tenant_id partition columns on tasks, digital_workers, audit_log, memory_traces + +## v1.3.8 + +### Added +- `Legion::Data::Archival`: hot/warm/cold archival pipeline for tasks and metering records +- `Legion::Data::Archival::Policy`: configurable retention policies (warm_after_days, cold_after_days, batch_size) +- Archive, restore, and cross-table search operations with dry-run support +- Migration 021: archive tables for tasks and metering_records + +## v1.3.7 + +### Added +- Migration 020: `webhooks`, `webhook_deliveries`, `webhook_dead_letters` tables + +### Fixed +- Migration 019: guard against duplicate column adds when `record_hash` already exists from migration 017 + +## v1.3.6 + +### Added +- Migration 019: adds `record_hash`, `previous_hash`, `retention_tier` columns to `audit_log` + +## v1.3.5 + +### Added +- `Legion::Data::EventStore`: append-only governance event store with stream semantics +- Hash chain integrity verification for tamper detection +- `EventStore::Projection` base class with `build_from` stream replay +- `ConsentState` projection: rebuild consent state from event history +- `GovernanceTimeline` projection: chronological governance event timeline +- Migration 018: governance_events table with stream/sequence indexing + +## v1.3.4 + +### Added +- `Legion::Data::Encryption::Cipher`: AES-256-GCM with versioned binary format, random IV, and AAD +- `Legion::Data::Encryption::KeyProvider`: Vault-backed key derivation with local fallback for dev mode +- `Legion::Data::Encryption::SequelPlugin`: transparent `encrypted_column` DSL for Sequel models +- Per-tenant key scope support for cryptographic erasure compliance + +## v1.3.3 + +### Added +- Migration 017: `audit_log` table with SHA-256 hash chain columns (`record_hash`, `prev_hash`) +- `Legion::Data::Model::AuditLog` immutable Sequel model with event type/status validation +- Indexes on `event_type`, `principal_id`, and `created_at` for audit query performance + +## v1.3.2 + +### Added +- Migration 016: worker health columns (`health_status`, `last_heartbeat_at`, `health_node` on digital_workers; `metrics`, `hosted_worker_ids`, `version` on nodes) +- `DigitalWorker#health_status` validation against `HEALTH_STATUSES` (`online`, `offline`, `unknown`) +- `DigitalWorker#online?` and `DigitalWorker#offline?` convenience methods +- `Node#parsed_metrics` and `Node#parsed_hosted_worker_ids` JSON deserialization helpers + +## v1.3.1 + +### Added +- Migration 015: RBAC tables (rbac_role_assignments, rbac_runner_grants, rbac_cross_team_grants) +- `Legion::Data::Model::RbacRoleAssignment` Sequel model with expiry and validation +- `Legion::Data::Model::RbacRunnerGrant` Sequel model with actions_list helper +- `Legion::Data::Model::RbacCrossTeamGrant` Sequel model with cross-team validation + +## v1.3.0 + +### Added +- `Legion::Data::Local` module — parallel local SQLite database for agentic cognitive state persistence +- TimestampMigrator-based migration registration for per-extension local schemas +- `Legion::Data::Local.model(:table)` helper for local-bound Sequel models +- Dev mode fallback: shared DB falls back to SQLite when `dev_mode: true` and network DB unreachable +- New settings: `data.local.enabled`, `data.local.database`, `data.dev_mode`, `data.dev_fallback` +- `Legion::Data.local` accessor for the Local module +- Local connection lifecycle wired into `Legion::Data.setup` / `.shutdown` +- 13 new specs (62 total) + +## v1.2.2 + +### Added +- Migration 014: add missing columns to `relationships` table (`delay`, `chain_id`, `debug`, `allow_new_chains`, `conditions`, `transformation`, `active`) required by lex-tasker query helpers + +## v1.2.1 + +### Added +- Migration 013: `relationships` table with trigger/action foreign keys to functions +- `Legion::Data::Model::Relationship` Sequel model with trigger/action associations +- Relationship model registered in model loader (loaded before Task for association resolution) +- Uncommented `trigger_relationships` and `action_relationships` associations on Function model + ## v1.2.0 -Moving from BitBucket to GitHub inside the Optum org. All git history is reset from this point on +Moving from BitBucket to GitHub. All git history is reset from this point on diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..55d4f22 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,209 @@ +Always run a full `bundle exec rspec` and `bundle exec rubocop -A` and fix all errors before committing. + +# legion-data + +Persistent storage gem for LegionIO. Owns Sequel database connections, numbered migrations, models, local SQLite state, extract timing persistence, audit/governance storage, identity/RBAC storage, Apollo storage, and the LLM lifecycle ledger. + +## Commands + +```bash +bundle install +bundle exec rubocop -A +bundle exec rspec --format json --out tmp/rspec_results.json --format progress --out tmp/rspec_progress.txt +``` + +RSpec output belongs in `tmp/`. On failure, extract only failures: + +```bash +jq '[.examples[] | select(.status != "passed") | {file_path, line_number, full_description, status, exception: .exception}]' tmp/rspec_results.json > tmp/rspec_failures.json +``` + +## Architecture + +- `lib/legion/data/connection.rb`: Sequel connection setup, diagnostics, fallback, query logging. +- `lib/legion/data/migration.rb`: numbered Sequel migrations. +- `lib/legion/data/model.rb`: shared model loader. +- `lib/legion/data/models/`: flat and namespaced Sequel model classes. +- `lib/legion/data/local.rb`: local SQLite database for on-node state. +- `lib/legion/data/extract.rb`: text extraction and persisted extract step timings. +- `lib/legion/data/spool.rb`: filesystem write buffer when DB writes are unavailable. + +## Migration Rules + +- Never edit published migrations. Add a new migration. +- Do not guard migrations with `create_table?`, `drop_table?`, `table_exists?`, `if_exists`, `if_not_exists`, `next if`, or `next unless`. +- **One change per migration file.** Each migration modifies exactly ONE table. Never loop over tables. If a migration fails, you must be able to identify exactly what broke and roll back cleanly. +- Never use `.each`, `.map`, or any iterator in a migration. If 12 tables need the same column, that's 12 migration files. +- Never use raw SQL (`run '...'`) when Sequel DSL supports the operation. Use `add_index`, `drop_index`, `add_column`, `drop_column`, etc. +- Use portable Sequel DSL unless the feature truly requires adapter-specific behavior. +- Use integer `id` primary keys for joins and public `uuid` columns for APIs/logs/external references. +- Normalize stable fields. Use JSON only for genuinely dynamic provider payloads or evidence. + +### Sequel Migration DSL Reference + +**Create table**: https://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html#method-i-create_table +**Column options**: https://sequel.jeremyevans.net/rdoc/classes/Sequel/Schema/CreateTableGenerator.html#method-i-column + +### Create Table Pattern + +```ruby +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:example_records) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + + # Identity columns (required on every table) + String :access_scope, size: 20, null: false, default: 'global', index: true + foreign_key :identity_principal_id, :identity_principals, null: true, on_delete: :set_null, on_update: :cascade + foreign_key :identity_id, :identities, null: true, on_delete: :set_null, on_update: :cascade + String :identity_canonical_name, size: 255, null: true, index: true + + # Domain columns here... + + # Timestamps (required on every table) + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP # reflects when the event happened (request/AMQP timestamp) + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP # when the row was physically written to the database + DateTime :updated_at, null: true # set on row update; NULL means never updated + + index :identity_principal_id + end + end +end +``` + +### Alter Table Pattern (adding a column) + +```ruby +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:target_table) do + add_column :new_column, String, size: 128, null: true, index: true + end + end + + down do + alter_table(:target_table) do + drop_index :new_column + drop_column :new_column + end + end +end +``` + +### Column Option Reference + +| Option | Purpose | +|--------|---------| +| `:null` | `false` = NOT NULL, `true` = nullable | +| `:default` | Default value (use `Sequel::CURRENT_TIMESTAMP` for timestamps) | +| `:index` | `true` creates an index on this column; pass a Hash for index options | +| `:unique` | `true` adds a UNIQUE constraint | +| `:on_delete` | FK behavior: `:cascade`, `:set_null`, `:restrict`, `:no_action` | +| `:on_update` | FK behavior: `:cascade`, `:set_null`, `:restrict`, `:no_action` | +| `:key` | For FKs — the referenced column (unnecessary if referencing primary key) | +| `:size` | Column width for String/Decimal | +| `:text` | `true` for TEXT columns (unlimited length) | + +### Foreign Key Conventions + +```ruby +# FK to identity tables — always cascade updates, set null on delete +foreign_key :identity_principal_id, :identity_principals, null: true, on_delete: :set_null, on_update: :cascade +foreign_key :identity_id, :identities, null: true, on_delete: :set_null, on_update: :cascade + +# FK to domain tables — cascade delete (child dies with parent) +foreign_key :conversation_id, :llm_conversations, null: false, on_delete: :cascade + +# FK to optional parent — set null on delete (orphan is ok) +foreign_key :parent_message_id, :llm_messages, null: true, on_delete: :set_null +``` + +### Timestamp Semantics + +| Column | Meaning | Default | Nullable | +|--------|---------|---------|----------| +| `created_at` | When the event/action occurred in the real world (e.g. AMQP message timestamp, API request time) | `CURRENT_TIMESTAMP` | NOT NULL | +| `inserted_at` | When the row was physically written to this database — always DB clock time | `CURRENT_TIMESTAMP` | NOT NULL | +| `updated_at` | Last time the row was modified after initial insert. NULL means never updated. | none | NULL | + +`created_at` vs `inserted_at`: a message published at 14:00:00 that gets consumed and written at 14:00:03 has `created_at = 14:00:00` and `inserted_at = 14:00:03`. For synchronous writes they will be the same. + +### Index Conventions + +- `access_scope` — always indexed (high cardinality filter for multi-tenant queries) +- `identity_canonical_name` — always indexed (user-facing search/filter) +- `identity_principal_id` — always indexed (join path to identity tables) +- `uuid` — always unique index (external reference lookups) +- Timestamp columns used in WHERE clauses — indexed +- Composite indexes for common query patterns: `index [:provider, :model_key]` + +## Sequel ORM Rules + +Use Sequel associations as the object graph. References: +- https://sequel.jeremyevans.net/rdoc/classes/Sequel/Model/Associations/ClassMethods.html +- https://github.com/jeremyevans/sequel/blob/master/doc/association_basics.rdoc + +Association mapping: +- Foreign key on this model: `many_to_one`. +- Foreign key on the associated model: `one_to_many` or `one_to_one`. +- Join table between models: `many_to_many`. +- Single associated record through a join table: `one_through_one`. + +When Sequel cannot infer names, set `:class`, `:key`, `:primary_key`, `:join_table`, `:left_key`, and `:right_key` explicitly. Do not create association names that collide with real columns. + +## Common Fields Standard + +All new tables in legion-data should follow this column convention. Required fields must be present on every table. Optional fields are added when the domain warrants them. + +### Required (every table, in this order) + +| Column | Sequel DSL | Purpose | +|--------|-----------|---------| +| `id` | `primary_key :id` | Auto-increment integer PK — internal join key, never exposed externally | +| `uuid` | `String :uuid, size: 36, null: false, unique: true` | External reference — used in APIs, logs, AMQP correlation | +| `access_scope` | `String :access_scope, size: 20, null: false, default: 'global', index: true` | Multi-tenant scoping (global, personal, team, org) | +| `identity_principal_id` | `foreign_key :identity_principal_id, :identity_principals, null: true, on_delete: :set_null, on_update: :cascade` | FK to the principal who caused this row | +| `identity_id` | `foreign_key :identity_id, :identities, null: true, on_delete: :set_null, on_update: :cascade` | FK to the specific provider-bound identity credential | +| `identity_canonical_name` | `String :identity_canonical_name, size: 255, null: true, index: true` | Point-in-time snapshot of the identity's canonical name. NOT a FK. May become stale if principal is renamed — use FK join for authoritative lookups. | +| `created_at` | `DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP` | When the event/action occurred (AMQP timestamp, request time) | +| `inserted_at` | `DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP` | When the row was physically written to the database | +| `updated_at` | `DateTime :updated_at, null: true` | Set on row update; NULL means never updated after insert | + +### Optional (add when applicable) + +| Column | Type | Purpose | +|--------|------|---------| +| `expires_at` | `DateTime, null: true` | TTL / archival eligibility | +| `content_type` | `String, size: 64` | Classifier for the row's payload kind | +| `conversation_id` | `foreign_key ..., :llm_conversations, on_delete: :cascade` | Links to the LLM conversation that produced this row | +| `task_id` | `foreign_key ..., :tasks, on_delete: :set_null` | Links to the task that triggered this row | +| `contains_phi` | `TrueClass, default: false` | Row contains Protected Health Information | +| `contains_pii` | `TrueClass, default: false` | Row contains Personally Identifiable Information | + +### Naming rules + +- Identity FKs always use `identity_principal_id` and `identity_id` — never `agent_id`, `principal_id`, `user_id`, or other loose variants for new tables. +- The denormalized string field is always `identity_canonical_name` — not `canonical_name`, `actor`, `agent_id`, or `identity_name`. +- Existing columns (`agent_id`, `source_agent`, `submitted_by`, `actor`, etc.) on pre-existing tables are **not renamed or removed** — they are historical record and intentionally left as-is. New identity columns are purely additive. + +## Current Schema Landmarks + +- `074`-`076`: Apollo field width, task idempotency, extract step timings. +- `077`-`090`: LLM lifecycle ledger. +- `091`-`096`: portable identity companion tables. +- `097`: LLM dispatch fields (operation, correlation_id, provider_instance, dispatch_path). +- `098`-`099`: Legacy identity table drop + rename (portable_identity_* → identity_*). +- `100`-`102`: Apollo identity columns + access_scope + indexes. +- `103`-`114`: LLM table identity standardization (access_scope, identity_principal_id, identity_id, identity_canonical_name). +- Namespaced models: `Identity::*`, `Apollo::*`, `RBAC::*`, `LLM::*`. + +## Boundaries + +- REST APIs belong in LegionIO, not this gem. +- Extension runtime behavior belongs in the owning extension repos. +- Do not commit generated DBs, logs, coverage output, built gems, or workspace `/docs` files from outside this repo. diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..2bd7cf5 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,41 @@ +# Default owner — all files +* @Esity + +# Core library code +# lib/ @Esity @future-core-team + +# Database connection +# lib/legion/data/connection.rb @Esity @future-infra-team + +# Migrations +# lib/legion/data/migrations/ @Esity @future-core-team + +# Models +# lib/legion/data/models/ @Esity @future-core-team + +# Local SQLite (agentic cognitive state) +# lib/legion/data/local.rb @Esity @future-ai-team +# lib/legion/data/local/ @Esity @future-ai-team + +# Encryption at rest +# lib/legion/data/encryption/ @Esity @future-security-team + +# Event store (governance) +# lib/legion/data/event_store/ @Esity @future-security-team +# lib/legion/data/event_store.rb @Esity @future-security-team + +# Vector helpers (pgvector / Apollo) +# lib/legion/data/vector.rb @Esity @future-ai-team + +# Storage tiers and archival +# lib/legion/data/storage_tiers.rb @Esity @future-infra-team +# lib/legion/data/archival/ @Esity @future-infra-team + +# Specs +# spec/ @Esity @future-contributors + +# Documentation +# *.md @Esity @future-docs-team + +# CI/CD +# .github/ @Esity diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 52c7f95..0000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,75 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as -contributors and maintainers pledge to making participation in our project and -our community a harassment-free experience for everyone, regardless of age, body -size, disability, ethnicity, gender identity and expression, level of experience, -nationality, personal appearance, race, religion, or sexual identity and -orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment -include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or -advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic - address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable -behavior and are expected to take appropriate and fair corrective action in -response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or -reject comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct, or to ban temporarily or -permanently any contributor for other behaviors that they deem inappropriate, -threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. Examples of -representing a project or community include using an official project email -address, posting via an official social media account, or acting as an appointed -representative at an online or offline event. Representation of a project may be -further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the project team at [opensource@optum.com][email]. All -complaints will be reviewed and investigated and will result in a response that -is deemed necessary and appropriate to the circumstances. The project team is -obligated to maintain confidentiality with regard to the reporter of an incident. -Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good -faith may face temporary or permanent repercussions as determined by other -members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at [http://contributor-covenant.org/version/1/4][version] - -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ -[email]: mailto:opensource@optum.com \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index b0c397d..0000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,55 +0,0 @@ -# Contribution Guidelines - -Please note that this project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms. Please also review our [Contributor License Agreement ("CLA")](INDIVIDUAL_CONTRIBUTOR_LICENSE.md) prior to submitting changes to the project. You will need to attest to this agreement following the instructions in the [Paperwork for Pull Requests](#paperwork-for-pull-requests) section below. - ---- - -# How to Contribute - -Now that we have the disclaimer out of the way, let's get into how you can be a part of our project. There are many different ways to contribute. - -## Issues - -We track our work using Issues in GitHub. Feel free to open up your own issue to point out areas for improvement or to suggest your own new experiment. If you are comfortable with signing the waiver linked above and contributing code or documentation, grab your own issue and start working. - -## Coding Standards - -We have some general guidelines towards contributing to this project. -Please run RSpec and Rubocop while developing code for LegionIO - -### Languages - -*Ruby* - -## Pull Requests - -If you've gotten as far as reading this section, then thank you for your suggestions. - -## Paperwork for Pull Requests - -* Please read this guide and make sure you agree with our [Contributor License Agreement ("CLA")](INDIVIDUAL_CONTRIBUTOR_LICENSE.md). -* Make sure git knows your name and email address: - ``` - $ git config user.name "J. Random User" - $ git config user.email "j.random.user@example.com" - ``` ->The name and email address must be valid as we cannot accept anonymous contributions. -* Write good commit messages. -> Concise commit messages that describe your changes help us better understand your contributions. -* The first time you open a pull request in this repository, you will see a comment on your PR with a link that will allow you to sign our Contributor License Agreement (CLA) if necessary. -> The link will take you to a page that allows you to view our CLA. You will need to click the `Sign in with GitHub to agree button` and authorize the cla-assistant application to access the email addresses associated with your GitHub account. Agreeing to the CLA is also considered to be an attestation that you either wrote or have the rights to contribute the code. All committers to the PR branch will be required to sign the CLA, but you will only need to sign once. This CLA applies to all repositories in the Optum org. - -## General Guidelines - -Ensure your pull request (PR) adheres to the following guidelines: - -* Try to make the name concise and descriptive. -* Give a good description of the change being made. Since this is very subjective, see the [Updating Your Pull Request (PR)](#updating-your-pull-request-pr) section below for further details. -* Every pull request should be associated with one or more issues. If no issue exists yet, please create your own. -* Make sure that all applicable issues are mentioned somewhere in the PR description. This can be done by typing # to bring up a list of issues. - -### Updating Your Pull Request (PR) - -A lot of times, making a PR adhere to the standards above can be difficult. If the maintainers notice anything that we'd like changed, we'll ask you to edit your PR before we merge it. This applies to both the content documented in the PR and the changed contained within the branch being merged. There's no need to open a new PR. Just edit the existing one. - -[email]: mailto:opensource@optum.com \ No newline at end of file diff --git a/Gemfile b/Gemfile index edaf657..d639d18 100644 --- a/Gemfile +++ b/Gemfile @@ -1,10 +1,15 @@ +# frozen_string_literal: true + source 'https://rubygems.org' gemspec + group :test do gem 'rake' gem 'rspec' gem 'rspec_junit_formatter' - gem 'rubocop' + gem 'rubocop-legion' gem 'simplecov' end +gem 'mysql2', '>= 0.5.5' +gem 'pg', '>= 1.5' diff --git a/INDIVIDUAL_CONTRIBUTOR_LICENSE.md b/INDIVIDUAL_CONTRIBUTOR_LICENSE.md deleted file mode 100644 index 79460dc..0000000 --- a/INDIVIDUAL_CONTRIBUTOR_LICENSE.md +++ /dev/null @@ -1,30 +0,0 @@ -# Individual Contributor License Agreement ("Agreement") V2.0 - -Thank you for your interest in this Optum project (the "PROJECT"). In order to clarify the intellectual property license granted with Contributions from any person or entity, the PROJECT must have a Contributor License Agreement ("CLA") on file that has been signed by each Contributor, indicating agreement to the license terms below. This license is for your protection as a Contributor as well as the protection of the PROJECT and its users; it does not change your rights to use your own Contributions for any other purpose. - -You accept and agree to the following terms and conditions for Your present and future Contributions submitted to the PROJECT. In return, the PROJECT shall not use Your Contributions in a way that is inconsistent with stated project goals in effect at the time of the Contribution. Except for the license granted herein to the PROJECT and recipients of software distributed by the PROJECT, You reserve all right, title, and interest in and to Your Contributions. -1. Definitions. - -"You" (or "Your") shall mean the copyright owner or legal entity authorized by the copyright owner that is making this Agreement with the PROJECT. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single Contributor. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. - -"Contribution" shall mean any original work of authorship, including any modifications or additions to an existing work, that is intentionally submitted by You to the PROJECT for inclusion in, or documentation of, any of the products owned or managed by the PROJECT (the "Work"). For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the PROJECT or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the PROJECT for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution." - -2. Grant of Copyright License. - -Subject to the terms and conditions of this Agreement, You hereby grant to the PROJECT and to recipients of software distributed by the PROJECT a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your Contributions and such derivative works. - -3. Grant of Patent License. - -Subject to the terms and conditions of this Agreement, You hereby grant to the PROJECT and to recipients of software distributed by the PROJECT a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by combination of Your Contribution(s) with the Work to which such Contribution(s) was submitted. If any entity institutes patent litigation against You or any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that your Contribution, or the Work to which you have contributed, constitutes direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed. - -4. Representations. - - (a) You represent that you are legally entitled to grant the above license. If your employer(s) has rights to intellectual property that you create that includes your Contributions, you represent that you have received permission to make Contributions on behalf of that employer, that your employer has waived such rights for your Contributions to the PROJECT, or that your employer has executed a separate Corporate CLA with the PROJECT. - - (b) You represent that each of Your Contributions is Your original creation (see section 6 for submissions on behalf of others). You represent that Your Contribution submissions include complete details of any third-party license or other restriction (including, but not limited to, related patents and trademarks) of which you are personally aware and which are associated with any part of Your Contributions. - -5. You are not expected to provide support for Your Contributions, except to the extent You desire to provide support. You may provide support for free, for a fee, or not at all. Unless required by applicable law or agreed to in writing, You provide Your Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. - -6. Should You wish to submit work that is not Your original creation, You may submit it to the PROJECT separately from any Contribution, identifying the complete details of its source and of any license or other restriction (including, but not limited to, related patents, trademarks, and license agreements) of which you are personally aware, and conspicuously marking the work as "Submitted on behalf of a third-party: [named here]". - -7. You agree to notify the PROJECT of any facts or circumstances of which you become aware that would make these representations inaccurate in any respect. \ No newline at end of file diff --git a/LICENSE b/LICENSE index 93234d8..20cba51 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2021 Optum + Copyright 2021 Esity Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/NOTICE.txt b/NOTICE.txt deleted file mode 100644 index 0f20a32..0000000 --- a/NOTICE.txt +++ /dev/null @@ -1,9 +0,0 @@ -Legion::Crypt(legion-crypt) -Copyright 2021 Optum - -Project Description: -==================== -Manage - -Author(s): -Esity \ No newline at end of file diff --git a/README.md b/README.md index 63f0eed..000a700 100644 --- a/README.md +++ b/README.md @@ -1,74 +1,614 @@ -Legion::Data -===== +# legion-data -Legion::Data is a gem for the LegionIO framework to use persistent storage. Currently only MySQL is supported +Persistent database storage for the [LegionIO](https://github.com/LegionIO/LegionIO) async job engine and AI coding assistant platform. Provides database connectivity via the [Sequel ORM](https://sequel.jeremyevans.net/), automatic schema migrations (97 numbered migrations), Sequel models for the full LegionIO control plane, and a parallel local SQLite database for on-node agentic cognitive state. -Supported Ruby versions and implementations ------------------------------------------------- +**Version**: 1.8.0 | **Ruby**: >= 3.4 | **License**: Apache-2.0 -Legion::Json should work identically on: +--- -* Ruby 2.5+ +## What It Owns +`legion-data` is the data contract for LegionIO. It owns database connectivity, migrations, model loading, and portable Sequel model definitions for shared platform state. HTTP routes, runtime orchestration, and extension behavior live in other LegionIO repos and call into these models. -Installation and Usage ------------------------- +Core responsibilities: -You can verify your installation using this piece of code: +| Area | Tables and models | +|------|-------------------| +| Control plane | extensions, functions, runners, nodes, tasks, settings, workers, relationships, chains | +| Audit and governance | `audit_log`, `audit_records`, `governance_events`, archive manifests | +| Identity and RBAC | providers, principals, identities, groups, memberships, role grants, runner grants | +| LLM ledger | conversations, model-visible messages, inference requests/responses, routing, metrics, tool calls, policy/security events | +| Apollo knowledge | PostgreSQL `pgvector` knowledge entries, relations, expertise, access logs | +| Local state | on-node SQLite cognitive state, independent of the shared database | + +The schema is portable by default across SQLite, MySQL, and PostgreSQL. PostgreSQL-only behavior is isolated to features that need PostgreSQL, such as Apollo vector columns. + +## Supported Databases + +| Database | Adapter | Gem | Default | +|----------|---------|-----|---------| +| SQLite | `sqlite` | `sqlite3` (bundled) | Yes | +| MySQL | `mysql2` | `mysql2` (optional) | No | +| PostgreSQL | `postgres` | `pg` (optional) | No | + +SQLite is the default and requires no additional gems. For MySQL or PostgreSQL, install the corresponding gem and configure the adapter. + +--- + +## Installation ```bash gem install legion-data ``` +Or add to your `Gemfile`: + +```ruby +gem 'legion-data' + +# For production databases, add one of these: +# gem 'mysql2', '>= 0.5.5' +# gem 'pg', '>= 1.5' +``` + +--- + +## Architecture Overview + +``` +Legion::Data (singleton module) +├── .setup # Connect, migrate, load models, set up local DB +├── .connection # Sequel::Database handle (shared/central) +├── .local # Legion::Data::Local (local SQLite accessor) +├── .stats # Combined { shared: ..., local: ... } metrics +├── .reload_static_cache # Refresh in-memory StaticCache after extension hot-load +├── .shutdown # Close both shared and local connections +│ +├── Connection # Sequel database connection management +│ ├── .adapter # Reads adapter from settings (:sqlite, :mysql2, :postgres) +│ ├── .setup # Establish connection (dev_mode fallback to SQLite if unreachable) +│ ├── .sequel # Raw Sequel::Database accessor +│ ├── .connection_info # Adapter, liveness, and fallback diagnostics +│ ├── .fallback_active? # True when dev fallback moved a network DB to SQLite +│ ├── .stats # Pool metrics, tuning snapshot, adapter-specific DB stats +│ └── .shutdown # Disconnect and close query file logger +│ +├── Migration # Auto-migration system (97 numbered Sequel DSL migrations) +│ +├── Model # Sequel model autoloader +│ └── Models: Extension, Function, Runner, Node, Task, TaskLog, Setting, +│ DigitalWorker, Relationship, AuditLog, AuditRecord, Chain, +│ RbacRoleAssignment, RbacRunnerGrant, RbacCrossTeamGrant, +│ IdentityProvider, Principal, Identity, IdentityGroup, +│ IdentityGroupMembership, IdentityAuditLog, ExtractStepTiming, +│ ApolloEntry, ApolloRelation, ApolloExpertise, ApolloAccessLog (PG only), +│ LLM::Conversation, LLM::Message, LLM::MessageInferenceRequest, +│ LLM::MessageInferenceResponse, LLM::RouteAttempt, +│ LLM::MessageInferenceMetric, LLM::ToolCall, LLM::ToolCallAttempt, +│ LLM::ConversationCompaction, LLM::PolicyEvaluation, +│ LLM::SecurityEvent, LLM::RegistryEvent +│ +├── Local # Parallel local SQLite for agentic cognitive state +│ ├── .setup # Lazy init — creates legionio_local.db on first access +│ ├── .connection # Sequel::SQLite::Database handle +│ ├── .model(:table) # Create Sequel::Model bound to local connection +│ ├── .register_migrations(name:, path:) # Extensions add their own migration dirs +│ ├── .stats # Local SQLite metrics (PRAGMAs, file size, registered migrations) +│ └── .shutdown # Close local connection +│ +├── Extract # 10-handler text extraction registry (txt/md/csv/json/jsonl/html/xlsx/docx/pdf/pptx/vtt) +├── Spool # Filesystem write buffer for DB-unavailable scenarios +├── Rls # PostgreSQL row-level security helpers (tenant isolation) +├── StorageTiers # Hot/warm/cold archival lifecycle +├── EventStore # Append-only governance event store with hash chain integrity +├── Vector # Reusable pgvector helpers (cosine_search, l2_search, ensure_extension!) +└── Settings # Default configuration with per-adapter credential presets +``` + +### Two-Database Architecture + +`legion-data` maintains two independent databases: + +1. **Shared DB** (SQLite / MySQL / PostgreSQL) — control plane data: extensions, tasks, runners, nodes, settings, audit logs, relationships. Shared across the cluster. +2. **Local DB** (always SQLite) — agentic cognitive state: memory traces, trust scores, dream journals. On-node only; no cross-database joins. + +Deleting `legionio_local.db` provides cryptographic erasure — no residual data. + +--- + +## Usage + ```ruby require 'legion/data' +# Set up shared DB + local SQLite, run migrations, load models Legion::Data.setup -Legion::Data.connected? # => true -Legion::Data::Model::Extension.all # Sequel::Dataset + +# Access the Sequel database handle +Legion::Data.connection # => Sequel::Database + +# Access models +Legion::Data::Model::Extension.all # => Sequel::Dataset +Legion::Data::Model::Task.first(id: 42) +Legion::Data::Model::Setting.where(key: 'my_setting').first + +# Access local cognitive state DB +Legion::Data.local.connection # => Sequel::SQLite::Database +Legion::Data.local.connected? # => true +Legion::Data.local.db_path # => "legionio_local.db" + +# Check connection health +Legion::Data.connected? # => true +Legion::Data.stats # => { shared: {...}, local: {...} } + +# Inspect shared DB diagnostics, including dev fallback state +Legion::Data::Connection.connection_info +# => { adapter: :sqlite, connected: true, fallback_active: false, ... } + +# Shut down both connections +Legion::Data.shutdown +``` + +### Model Associations + +Models use Sequel associations as the public object graph. Prefer association methods and association datasets over hand-written foreign-key lookups when the relationship is part of the schema contract. + +```ruby +task = Legion::Data::Model::Task.first(id: 42) +task.function # many_to_one :function +task.relationship # many_to_one :relationship +task.task_logs_dataset # further filter/order without losing the relationship + +conversation = Legion::Data::Models::LLM::Conversation.first(uuid: conversation_uuid) +conversation.messages_dataset.order(:seq).all +conversation.security_incident_lineage +``` + +Official LLM lifecycle data lives under `Legion::Data::Models::LLM`. `legion-llm` and `lex-llm-ledger` should use these models and the `llm_*` migration tables for conversations, model-visible messages, inference requests, responses, route attempts, metrics, tool calls, policy decisions, security events, and registry events. Legacy ledger-only tables are not the canonical schema. + +Association rules used in this repo follow Sequel's own association model: + +| Relationship | Use this Sequel association | +|--------------|-----------------------------| +| Current table has the foreign key | `many_to_one` | +| Associated table has the foreign key | `one_to_many` or `one_to_one` | +| Join table connects both sides | `many_to_many` | +| One associated record through a join table | `one_through_one` | + +When Sequel cannot infer names from the schema, models must be explicit with `:class`, `:key`, `:primary_key`, `:join_table`, `:left_key`, and `:right_key`. Association names must not collide with real column names because Sequel creates methods with the association name. + +### Local Database (Agentic Cognitive State) + +Extensions register their own migration directories and create models bound to the local connection: + +```ruby +# Register extension migrations (called during extension setup) +Legion::Data::Local.register_migrations(name: :memory, path: '/path/to/migrations') + +# Create a model class bound to the local DB +MyMemoryTrace = Legion::Data::Local.model(:memory_traces) +MyMemoryTrace.all # queries legionio_local.db, never the shared DB +``` + +### Text Extraction + +`Legion::Data::Extract` provides a handler registry for extracting text from documents, used by `lex-knowledge` for corpus ingestion: + +```ruby +result = Legion::Data::Extract.extract('/path/to/document.pdf') +text = result[:text] +result[:step_timings] # per-step name, start_time, end_time, status, error, duration_ms +``` + +Supported formats: `.txt`, `.md`, `.csv`, `.json`, `.jsonl`, `.html`, `.xlsx`, `.docx`, `.pdf`, `.pptx`, `.vtt` + +When migration 076 is present, Extract also persists the same per-step timing rows to `extract_step_timings` +under the returned `extract_id`. + +### Task Idempotency + +`Task.idempotency_key_for` computes a stable SHA-256 key from canonical JSON payloads. `Task.create_idempotent` +returns an existing non-terminal task for the same key inside the optional TTL window, or creates a new task +with `idempotency_key` and `idempotency_expires_at` populated: + +```ruby +task = Legion::Data::Model::Task.create_idempotent( + { status: 'pending', payload: Legion::JSON.dump(payload) }, + payload: payload, + ttl: 300 +) +``` + +### Filesystem Spool (Write Buffer) + +When the database is unavailable, `Legion::Data::Spool` buffers writes to `~/.legionio/data/spool/` and replays once the connection is restored: + +```ruby +spool = Legion::Data::Spool.for(Legion::Extensions::MyLex) +spool.write({ task_id: SecureRandom.uuid, data: payload }) +spool.drain { |entry| process(entry) } +``` + +### Row-Level Security (PostgreSQL) + +`Legion::Data::Rls` provides tenant isolation via PostgreSQL session variables (migration 043): + +```ruby +Legion::Data::Rls.with_tenant(tenant_id) do + Legion::Data::Model::Task.all # scoped to tenant_id via RLS policy +end +``` + +### Permission Checks + +```ruby +Legion::Data.can_write?(:tasks) # => true (SQLite always true) +Legion::Data.can_read?(:tasks) # => true +Legion::Data.reset_privileges! # clear cached privilege checks +``` + +--- + +## Configuration + +All settings live under the `data` key. The adapter controls which options apply. + +### SQLite (default) + +```json +{ + "data": { + "adapter": "sqlite", + "creds": { + "database": "legionio.db" + } + } +} +``` + +### MySQL + +```json +{ + "data": { + "adapter": "mysql2", + "creds": { + "username": "legion", + "password": "legion", + "database": "legionio", + "host": "127.0.0.1", + "port": 3306 + } + } +} +``` + +### PostgreSQL + +```json +{ + "data": { + "adapter": "postgres", + "creds": { + "user": "legion", + "password": "legion", + "database": "legionio", + "host": "127.0.0.1", + "port": 5432 + } + } +} ``` -Settings ----------- +PostgreSQL with `pgvector` is required for Apollo models: + +```sql +CREATE EXTENSION IF NOT EXISTS vector; +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; +``` + +### Full Configuration Reference ```json { - "connected": false, - "cache": { + "data": { + "adapter": "sqlite", "connected": false, - "auto_enable": null, - "ttl": 60 - }, - "connection": { + "dev_mode": false, + "dev_fallback": true, + "connect_on_start": true, + + "max_connections": 25, + "pool_timeout": 5, + "preconnect": "concurrently", + "single_threaded": false, + "test": true, + "log": false, - "log_connection_info": false, + "query_log": false, "log_warn_duration": 1, - "log_warn_duration": "debug", - "max_connections": 10, - "preconnect": false - }, - "creds": { - "username": "legion", - "password": "legion", - "database": "legionio", - "host": "127.0.0.1", - "port": 3306 - }, - "migrations": { - "continue_on_fail": false, - "auto_migrate": true, - "ran": false, - "version": null - }, - "models": { - "continue_on_load_fail": false, - "autoload": true - }, - "connect_on_start": true + "sql_log_level": "debug", + + "connection_validation": true, + "connection_validation_timeout": 600, + "connection_expiration": true, + "connection_expiration_timeout": 14400, + + "read_replica_url": null, + "replicas": [], + + "creds": { "database": "legionio.db" }, + + "migrations": { + "continue_on_fail": false, + "auto_migrate": true + }, + "models": { + "continue_on_load_fail": false, + "autoload": true + }, + "local": { + "enabled": true, + "database": "legionio_local.db", + "migrations": { "auto_migrate": true } + }, + "cache": { + "connected": false, + "auto_enable": false, + "static_cache": false, + "ttl": 60 + } + } } ``` -Authors ----------- +### Dev Mode Fallback + +When `dev_mode: true` and a network database is unreachable, the shared connection automatically falls back to SQLite: + +```json +{ "data": { "dev_mode": true, "dev_fallback": true } } +``` + +Fallback is intentionally loud. `Connection.setup` logs the degraded mode at error level, `Connection.fallback_active?` returns `true`, and `Connection.connection_info` reports the configured adapter, actual adapter, connection state, and Sequel liveness. Data written during fallback is local-only SQLite data and will not appear in the configured network database after reconnect. + +### HashiCorp Vault Integration + +When Vault is connected, credentials are fetched dynamically from `database/creds/legion`, overriding any static `creds` block. + +### Caching + +Two independent caching tiers, both disabled by default: + +| Tier | Setting | Models | Backend | +|------|---------|--------|---------| +| **StaticCache** | `data.cache.static_cache: true` | Extension, Runner, Function | In-process frozen Ruby hash | +| **External Cache** | `data.cache.auto_enable: true` + `Legion::Cache` | Relationship, Node, Setting | Redis/Memcached/Memory | + +```ruby +# After hot-loading extensions, refresh the static cache: +Legion::Data.reload_static_cache +``` + +### Read Replicas (PostgreSQL) + +```json +{ + "data": { + "read_replica_url": "postgres://user:pass@replica1/db", + "replicas": ["postgres://user:pass@replica2/db"] + } +} +``` + +--- + +## Common Fields Standard + +All new tables follow a column convention. Required fields are present on every table. Optional fields are added when the domain warrants them. + +### Required + +| Column | Type | Notes | +|--------|------|-------| +| `id` | `INTEGER PRIMARY KEY` (auto-increment) | Internal join key. Never expose externally — use a `uuid` column for API/log references. | +| `identity_principal_id` | `INTEGER` FK → `identity_principals.id` | The principal who caused this row to exist. | +| `identity_id` | `INTEGER` FK → `identities.id` | The specific provider-bound identity credential. | +| `identity_canonical_name` | `VARCHAR(255)` | Denormalized snapshot of the principal's canonical name. Point-in-time copy — may become stale if the principal is renamed. Use the FK join for authoritative lookups. Exists for fast filtering without joins. | +| `created_at` | `TIMESTAMPTZ` | Row creation time. | +| `updated_at` | `TIMESTAMPTZ` | Last modification time. | + +### Optional (add when applicable) + +| Column | Type | Notes | +|--------|------|-------| +| `expires_at` | `TIMESTAMPTZ` | TTL / archival eligibility. | +| `content_type` | `VARCHAR(...)` | Classifier for the row's payload kind. | +| `conversation_id` | `INTEGER` FK → `llm_conversations.id` | Links to the LLM conversation that produced this row. | +| `contains_phi` | `BOOLEAN` | Row contains Protected Health Information. | +| `contains_pii` | `BOOLEAN` | Row contains Personally Identifiable Information. | + +### Naming rules + +- Identity FKs are always `identity_principal_id` and `identity_id` — not `principal_id`, `agent_id`, `user_id`, or other loose variants on new tables. +- The denormalized string column is always `identity_canonical_name` — not `canonical_name`, `actor`, `agent_id`, or `identity_name`. +- **Existing columns on pre-existing tables are never renamed or removed.** Columns like `agent_id`, `source_agent`, `submitted_by`, and `actor` are historical record. The new identity columns are purely additive. + +--- + +## Data Models + +| Model | Table | Description | +|-------|-------|-------------| +| `Extension` | `extensions` | Installed LEX extensions | +| `Function` | `functions` | Available functions per extension (with embeddings) | +| `Runner` | `runners` | Runner definitions (AMQP routing keys) | +| `Node` | `nodes` | Cluster node registry | +| `Task` | `tasks` | Task instances | +| `TaskLog` | `task_logs` | Task execution logs | +| `Setting` | `settings` | Persistent settings store | +| `DigitalWorker` | `digital_workers` | Digital worker registry | +| `Relationship` | `relationships` | Task trigger/action chains between functions | +| `Chain` | `chains` | Task execution chains | +| `AuditLog` | `audit_log` | Tamper-evident audit trail with hash chain | +| `AuditRecord` | `audit_records` | Structured audit records | +| `ExtractStepTiming` | `extract_step_timings` | Per-step Extract pipeline timing metadata | +| `RbacRoleAssignment` | `rbac_role_assignments` | RBAC principal -> role mappings | +| `RbacRunnerGrant` | `rbac_runner_grants` | Per-runner permission grants | +| `RbacCrossTeamGrant` | `rbac_cross_team_grants` | Cross-team access grants | +| `IdentityProvider` | `identity_providers` | Identity provider registrations | +| `Principal` | `principals` | Authentication principals | +| `Identity` | `identities` | Identity records tied to principals | +| `IdentityGroup` | `identity_groups` | Identity groups | +| `IdentityGroupMembership` | `identity_group_memberships` | Group membership records | +| `ApolloEntry` | `apollo_entries` | Knowledge entries — PostgreSQL only (pgvector) | +| `ApolloRelation` | `apollo_relations` | Relations between Apollo entries — PostgreSQL only | +| `ApolloExpertise` | `apollo_expertise` | Per-agent domain expertise — PostgreSQL only | +| `ApolloAccessLog` | `apollo_access_log` | Apollo access audit log — PostgreSQL only | + +Apollo models require PostgreSQL with the `pgvector` extension. They are skipped silently on SQLite and MySQL. + +The `Legion::Data::Model::Identity::*`, `Apollo::*`, and `RBAC::*` namespaces provide cleaner Sequel model names for API-facing code while preserving the legacy flat model classes. Official LLM lifecycle models live under `Legion::Data::Models::LLM`. + +### Identity Namespace Models + +| Model | Table | Description | +|-------|-------|-------------| +| `Identity::Provider` | `identity_providers` | Provider records with integer primary keys and public UUIDs | +| `Identity::ProviderCapability` | `identity_provider_capabilities` | Normalized provider capability declarations | +| `Identity::Principal` | `identity_principals` | Human, service, worker, or system principals | +| `Identity::Identity` | `identities` | Provider-bound identities for principals | +| `Identity::Group` | `identity_groups` | Identity groups | +| `Identity::GroupMembership` | `identity_group_memberships` | Principal and identity group membership rows | +| `Identity::AuditLog` | `identity_audit_log` | Identity lifecycle and lookup audit events | + +### LLM Lifecycle Models + +| Model | Table | Description | +|-------|-------|-------------| +| `LLM::Conversation` | `llm_conversations` | Conversation container tied to the base user identity | +| `LLM::Message` | `llm_messages` | Model-visible conversation transcript messages | +| `LLM::MessageInferenceRequest` | `llm_message_inference_requests` | Provider request assembled from message, context, tools, policy, and routing inputs | +| `LLM::MessageInferenceResponse` | `llm_message_inference_responses` | Provider/runtime response for one inference request | +| `LLM::RouteAttempt` | `llm_route_attempts` | Provider/model/runner routing attempts, including failures and escalations | +| `LLM::MessageInferenceMetric` | `llm_message_inference_metrics` | Token, latency, cost, and finance usage metrics for an inference pair | +| `LLM::ToolCall` | `llm_tool_calls` | Tool calls requested by an LLM provider response | +| `LLM::ToolCallAttempt` | `llm_tool_call_attempts` | Execution attempts, retries, failures, and results for provider-requested tool calls | +| `LLM::ConversationCompaction` | `llm_conversation_compactions` | Conversation-scoped compaction events | +| `LLM::PolicyEvaluation` | `llm_policy_evaluations` | Policy, classification, RBAC, and enforcement decisions for inference requests | +| `LLM::SecurityEvent` | `llm_security_events` | Security-relevant events tied to conversation, inference, response, or tool attempts | +| `LLM::RegistryEvent` | `llm_registry_events` | Provider/model registry availability and health events | + +--- + +## Dependencies + +| Gem | Purpose | +|-----|---------| +| `sequel` (>= 5.70) | ORM and migration framework | +| `sqlite3` (>= 2.0) | SQLite adapter (default, bundled) | +| `csv` (>= 3.2) | CSV extraction handler | +| `legion-json` | JSON serialization via Legion::JSON | +| `legion-logging` (>= 1.5.0) | Structured logging | +| `legion-settings` (>= 1.3.26) | Configuration management | +| `mysql2` (>= 0.5.5) | MySQL adapter (optional) | +| `pg` (>= 1.5) | PostgreSQL adapter (optional) | + +--- + +## Migrations + +97 numbered Sequel DSL migrations run automatically on startup (`auto_migrate: true`). Key milestones: + +| Range | What was added | +|-------|---------------| +| 001–011 | Core schema: nodes, settings, extensions, runners, functions, tasks, digital workers, value metrics | +| 012 | Apollo tables (PG only: pgvector, uuid-ossp, 4 tables) | +| 013–014 | Relationships table with trigger/action FK chains | +| 015 | RBAC tables | +| 017–019 | Audit log with tamper-evident hash chain | +| 020–025 | Webhooks, archive tables, memory traces, tenant partitions | +| 026 | Function embeddings (description + vector on functions) | +| 028–030 | Agent clusters and approval queue | +| 047–048 | Apollo knowledge capture + financial logging (UAIS cost recovery, 7 tables) | +| 050 | Critical indexes across 13 tables | +| 058–067 | Audit records, chains, knowledge tiers, tool embedding cache, identity system (providers, principals, identities, groups) | +| 068–071 | Entity type on audit records, principal on nodes, approval queue resume, engine on relationships | +| 072–073 | Identity audit log and multi-instance identity columns | +| 074–076 | Apollo field width fixes, task idempotency columns, and Extract step timing rows | +| 077–090 | Portable LLM lifecycle schema: conversations, messages, inference requests/responses, route attempts, inference metrics, provider-requested tool calls, compactions, policy/security, and registry events | +| 091–096 | Portable identity companion schema with integer primary keys, public UUIDs, provider capabilities, principals, identities, groups, memberships, and audit log | +| 097 | LLM dispatch identifiers for fleet operation, correlation, idempotency, provider instance, and dispatch path | + +Run migrations standalone: + +```bash +bundle exec legionio_migrate +``` + +Migration rules: + +- Do not edit published migrations. +- Do not guard migrations with `create_table?`, `table_exists?`, `if_not_exists`, or similar conditional schema logic. +- Add new migrations in the next available number and keep domains split by dependency and rollback risk. +- Use portable Sequel DSL unless a feature truly requires adapter-specific behavior. +- Prefer integer `id` primary keys for joins plus public `uuid` columns for APIs, logs, and external references. +- Avoid JSON columns unless the shape is genuinely provider-specific or dynamic evidence. + +--- + +## CLI Executable + +`exe/legionio_migrate` runs database migrations standalone, outside the full LegionIO service: + +```bash +bundle exec legionio_migrate +``` + +--- + +## Role in LegionIO + +`legion-data` is optional but provides core platform persistence. It initializes during `Legion::Service` startup (after transport). Key responsibilities: + +1. Extension and function registry +2. Task scheduling, logging, and relationship chains +3. Node cluster membership tracking +4. Persistent settings storage +5. Digital worker registry (AI-as-labor platform) +6. RBAC assignment tables +7. Audit log with tamper-evident hash chain +8. Governance event store with append-only integrity +9. Apollo shared knowledge store (PostgreSQL + pgvector, used by `lex-apollo`) +10. Local SQLite for agentic cognitive state — always on-node, independent of shared DB +11. Financial logging for UAIS cost recovery +12. Global tool embedding cache (L4 tier for `Legion::Tools::EmbeddingCache`) +13. Unified identity system (providers, principals, identities, groups) +14. LLM lifecycle ledger for audit, finance metrics, routing reconstruction, tool calls, and security incident lineage + +--- + +## Contributing + +```bash +git clone https://github.com/LegionIO/legion-data +cd legion-data +bundle install +bundle exec rspec # all tests must pass +bundle exec rubocop -A # zero offenses expected +``` + +This repo also includes a pre-commit configuration: + +```bash +pre-commit install +pre-commit run --all-files +``` + +The local RuboCop hook auto-corrects staged Ruby files when RuboCop is available and fails the commit when RuboCop reports real offenses. The Ruby syntax hook checks every staged Ruby file. + +Follow the [LegionIO contribution guide](https://github.com/LegionIO/.github/blob/main/CONTRIBUTING.md). Open a PR against `main`. + +--- -* [Matthew Iverson](https://github.com/Esity) - current maintainer \ No newline at end of file +**Maintained by**: Matthew Iverson ([@Esity](https://github.com/Esity)) diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index acc4d53..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,9 +0,0 @@ -# Security Policy - -## Supported Versions -| Version | Supported | -| ------- | ------------------ | -| 1.x.x | :white_check_mark: | - -## Reporting a Vulnerability -To be added diff --git a/attribution.txt b/attribution.txt deleted file mode 100644 index e4c875c..0000000 --- a/attribution.txt +++ /dev/null @@ -1 +0,0 @@ -Add attributions here. \ No newline at end of file diff --git a/legion-data.gemspec b/legion-data.gemspec index 968f294..49f1bf8 100644 --- a/legion-data.gemspec +++ b/legion-data.gemspec @@ -6,28 +6,30 @@ Gem::Specification.new do |spec| spec.name = 'legion-data' spec.version = Legion::Data::VERSION spec.authors = ['Esity'] - spec.email = %w[matthewdiverson@gmail.com ruby@optum.com] + spec.email = ['matthewdiverson@gmail.com'] spec.summary = 'Manages the connects to the backend database' spec.description = 'A LegionIO gem to connect to a persistent data store' - spec.homepage = 'https://github.com/Optum/legion-data' + spec.homepage = 'https://github.com/LegionIO/legion-data' spec.license = 'Apache-2.0' - spec.required_ruby_version = '>= 2.5' + spec.required_ruby_version = '>= 3.4' spec.require_paths = ['lib'] spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } - spec.test_files = spec.files.select { |p| p =~ %r{^test/.*_test.rb} } - spec.extra_rdoc_files = %w[README.md LICENSE CHANGELOG.md] + spec.extra_rdoc_files = %w[README.md LICENSE CHANGELOG.md] spec.metadata = { - 'bug_tracker_uri' => 'https://github.com/Optum/legion-data/issues', - 'changelog_uri' => 'https://github.com/Optum/legion-data/src/main/CHANGELOG.md', - 'documentation_uri' => 'https://github.com/Optum/legion-data', - 'homepage_uri' => 'https://github.com/Optum/LegionIO', - 'source_code_uri' => 'https://github.com/Optum/legion-data', - 'wiki_uri' => 'https://github.com/Optum/legion-data/wiki' + 'bug_tracker_uri' => 'https://github.com/LegionIO/legion-data/issues', + 'changelog_uri' => 'https://github.com/LegionIO/legion-data/blob/main/CHANGELOG.md', + 'documentation_uri' => 'https://github.com/LegionIO/legion-data', + 'homepage_uri' => 'https://github.com/LegionIO/LegionIO', + 'source_code_uri' => 'https://github.com/LegionIO/legion-data', + 'wiki_uri' => 'https://github.com/LegionIO/legion-data/wiki', + 'rubygems_mfa_required' => 'true' } - spec.add_dependency 'legion-logging' - spec.add_dependency 'legion-settings' - spec.add_dependency 'mysql2' - spec.add_dependency 'sequel' + spec.add_dependency 'csv', '>= 3.2' + spec.add_dependency 'legion-json' + spec.add_dependency 'legion-logging', '>= 1.5.0' + spec.add_dependency 'legion-settings', '>= 1.3.26' + spec.add_dependency 'sequel', '>= 5.70' + spec.add_dependency 'sqlite3', '>= 2.0' end diff --git a/lib/legion/data.rb b/lib/legion/data.rb index 675c936..2e09dab 100755 --- a/lib/legion/data.rb +++ b/lib/legion/data.rb @@ -1,3 +1,6 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' require 'legion/data/version' require 'legion/data/settings' require 'sequel' @@ -5,15 +8,72 @@ require 'legion/data/connection' require 'legion/data/model' require 'legion/data/migration' +require_relative 'data/local' +require_relative 'data/spool' +require_relative 'data/partition_manager' +require_relative 'data/archiver' +require_relative 'data/helper' +require_relative 'data/rls' +require_relative 'data/extract' +require_relative 'data/audit_record' +require_relative 'data/audit_log_hash_chain' + +unless Legion::Logging::Helper.method_defined?(:handle_exception) + module Legion + module Logging + module Helper + def handle_exception(exception, task_id: nil, level: :error, handled: true, **opts) + context = opts.map { |key, value| "#{key}=#{value.inspect}" }.join(' ') + message = "#{exception.class}: #{exception.message}" + message = "#{message} task_id=#{task_id}" if task_id + message = "#{message} handled=#{handled}" + message = "#{message} #{context}" unless context.empty? + warn("[#{level}] #{message}") + rescue StandardError => e + warn("handle_exception fallback failed: #{e.class}: #{e.message}") + end + end + end + end +end module Legion module Data class << self + include Legion::Logging::Helper + def setup + log.info 'Legion::Data setup starting' + setup_global + setup_cache + setup_local + log.info 'Legion::Data setup complete' + end + + def setup_local + return if Legion::Settings[:data].dig(:local, :enabled) == false + + Legion::Data::Local.setup + log.info "Legion::Data::Local connected to #{Legion::Data::Local.db_path}" + rescue StandardError => e + handle_exception(e, level: :fatal, operation: :setup_local) + raise + end + + def setup_global connection_setup migrate load_models - setup_cache + rescue StandardError => e + handle_exception(e, level: :fatal, operation: :setup_global) + end + + def setup_cache + cache_settings = Legion::Settings[:data][:cache] + setup_static_cache if cache_settings[:static_cache] + setup_external_cache if cache_settings[:auto_enable] && defined?(::Legion::Cache) + rescue StandardError => e + handle_exception(e, level: :error, operation: :setup_cache) end def connection_setup @@ -23,10 +83,14 @@ def connection_setup end def migrate + return if skip_migrations? + Legion::Data::Migration.migrate end def load_models + return unless Legion::Settings[:data][:models][:autoload] != false + Legion::Data::Models.load end @@ -34,26 +98,120 @@ def connection Legion::Data::Connection.sequel end - def setup_cache - return if Legion::Settings[:data][:cache][:enabled] + def local + Legion::Data::Local + end - return unless defined?(::Legion::Cache) + def stats + { + shared: Legion::Data::Connection.stats, + local: Legion::Data::Local.stats + } + end - # Legion::Data::Model::Relationship.plugin :caching, Legion::Cache, ttl: 10 - # Legion::Data::Model::Runner.plugin :caching, Legion::Cache, ttl: 60 - # Legion::Data::Model::Chain.plugin :caching, Legion::Cache, ttl: 60 - # Legion::Data::Model::Function.plugin :caching, Legion::Cache, ttl: 120 - # Legion::Data::Model::Extension.plugin :caching, Legion::Cache, ttl: 120 - # Legion::Data::Model::Node.plugin :caching, Legion::Cache, ttl: 10 - # Legion::Data::Model::TaskLog.plugin :caching, Legion::Cache, ttl: 12 - # Legion::Data::Model::Task.plugin :caching, Legion::Cache, ttl: 10 - # Legion::Data::Model::User.plugin :caching, Legion::Cache, ttl: 120 - # Legion::Data::Model::Group.plugin :caching, Legion::Cache, ttl: 120 - # Legion::Logging.info 'Legion::Data connected to Legion::Cache' + def connected? + Legion::Settings[:data][:connected] == true + rescue StandardError => e + handle_exception(e, level: :debug, handled: true, operation: :connected?) + false + end + + def can_write?(table_name) + return false unless connected? + + adapter = Legion::Settings[:data][:adapter]&.to_s + return true if adapter == 'sqlite' + + @write_privileges ||= {} + return @write_privileges[table_name] unless @write_privileges[table_name].nil? + + @write_privileges[table_name] = connection + .fetch("SELECT has_table_privilege(current_user, ?, 'INSERT') AS can", table_name.to_s) + .first[:can] == true + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :can_write?, table: table_name) + @write_privileges[table_name] = false if @write_privileges + false + end + + def can_read?(table_name) + return false unless connected? + + adapter = Legion::Settings[:data][:adapter]&.to_s + return true if adapter == 'sqlite' + + @read_privileges ||= {} + return @read_privileges[table_name] unless @read_privileges[table_name].nil? + + @read_privileges[table_name] = connection + .fetch("SELECT has_table_privilege(current_user, ?, 'SELECT') AS can", table_name.to_s) + .first[:can] == true + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :can_read?, table: table_name) + @read_privileges[table_name] = false if @read_privileges + false + end + + def reset_privileges! + @write_privileges = nil + @read_privileges = nil + end + + def setup_static_cache + [Model::Extension, Model::Runner, Model::Function].each do |model| + model.plugin :static_cache + log.debug("StaticCache enabled for #{model}") + rescue StandardError => e + handle_exception(e, level: :warn, operation: :setup_static_cache, model: model.to_s) + end + log.info 'Legion::Data static cache loaded' + end + + def reload_static_cache + [Model::Extension, Model::Runner, Model::Function].each do |model| + model.load_cache if model.respond_to?(:load_cache) + end + log.info 'Legion::Data static cache reloaded' + end + + def setup_external_cache + ttl = Legion::Settings[:data][:cache][:ttl] || 60 + { + Model::Relationship => 10, + Model::Node => 10, + Model::Setting => ttl + }.each do |model, model_ttl| + model.plugin :caching, ::Legion::Cache, ttl: model_ttl + log.debug("Caching enabled for #{model} (ttl: #{model_ttl})") + rescue StandardError => e + handle_exception(e, level: :warn, operation: :setup_external_cache, model: model.to_s, ttl: model_ttl) + end + log.info 'Legion::Data external cache connected' end def shutdown + Legion::Data::Local.shutdown if defined?(Legion::Data::Local) && Legion::Data::Local.connected? Legion::Data::Connection.shutdown + log.info 'Legion::Data shutdown complete' + end + + private + + def skip_migrations? + # Check auto_migrate setting + auto_migrate = Legion::Settings[:data][:migrations][:auto_migrate] + unless auto_migrate + log.info 'Legion::Data migrations skipped (auto_migrate: false)' + return true + end + + # Check mode gate: only infra mode runs migrations (when Mode is available) + if defined?(Legion::Mode) && Legion::Mode.respond_to?(:current) && !Legion::Mode.infra? + log.info "Legion::Data migrations skipped (mode: #{Legion::Mode.current}, requires: infra)" + return true + end + + false end end end diff --git a/lib/legion/data/archival.rb b/lib/legion/data/archival.rb new file mode 100644 index 0000000..78b722b --- /dev/null +++ b/lib/legion/data/archival.rb @@ -0,0 +1,167 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' +require_relative 'archival/policy' + +module Legion + module Data + module Archival + ARCHIVE_TABLE_MAP = { + tasks: :tasks_archive, + metering_records: :metering_records_archive + }.freeze + + class << self + include Legion::Logging::Helper + + def archive!(policy: Policy.new, dry_run: false) + log.info "Archival run started dry_run=#{dry_run} tables=#{policy.tables.size}" + results = {} + policy.tables.each do |table_name| + table = table_name.to_sym + archive_table = ARCHIVE_TABLE_MAP[table] + next unless archive_table && db_ready?(table) && db_ready?(archive_table) + + log.info "Archiving #{table} -> #{archive_table} (cutoff: #{policy.warm_cutoff}, dry_run: #{dry_run})" + count = archive_table!( + source: table, destination: archive_table, + cutoff: policy.warm_cutoff, batch_size: policy.batch_size, dry_run: dry_run + ) + results[table] = count + end + log.info "Archival run completed tables=#{results.keys.join(',')}" unless results.empty? + results + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :archive!, dry_run: dry_run) + raise + end + + def restore(table:, ids:) + source_table = table.to_sym + archive_table = ARCHIVE_TABLE_MAP[source_table] + return 0 unless archive_table && db_ready?(archive_table) + + conn = Legion::Data.connection + restored = 0 + conn.transaction do + conn[archive_table].where(original_id: ids).each do |row| + restore_row = row.except(:id, :archived_at, :original_id, :original_created_at, :original_updated_at) + restore_row[:id] = row[:original_id] + restore_row[:created_at] = row[:original_created_at] + restore_row[:updated_at] = row[:original_updated_at] + conn[source_table].insert(restore_row) + restored += 1 + end + conn[archive_table].where(original_id: ids).delete + end + log.info "Restored #{restored} row(s) from #{archive_table} -> #{source_table}" + restored + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :restore, table: source_table, ids: Array(ids)) + raise + end + + def search(table:, where: {}) + source_table = table.to_sym + archive_table = ARCHIVE_TABLE_MAP[source_table] + return [] unless db_ready?(source_table) + + log.info "Archival search table=#{source_table} where_keys=#{where.keys.join(',')}" + conn = Legion::Data.connection + hot = conn[source_table].where(where).all + warm = db_ready?(archive_table) ? conn[archive_table].where(where).all : [] + hot + warm + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :search, table: source_table, where_keys: where.keys) + raise + end + + def archive_completed_tasks(days_old: 90, batch_size: 1000) + conn = Legion::Data.connection + cutoff = Time.now - (days_old * 86_400) + + return { archived: 0, cutoff: cutoff.iso8601 } unless conn&.table_exists?(:tasks) && conn.table_exists?(:tasks_archive) + + candidates = conn[:tasks] + .where(status: %w[completed failed]) + .where(Sequel.lit('created < ?', cutoff)) + .limit(batch_size) + + count = candidates.count + if count.positive? + archive_cols = conn.schema(:tasks_archive).to_set(&:first) + conn.transaction do + candidates.each do |row| + archive_row = { + original_id: row[:id], + status: row[:status], + relationship_id: row[:relationship_id], + original_created_at: row[:created], + original_updated_at: row[:updated], + archived_at: Time.now + } + archive_row[:archive_reason] = 'completed_task_archival' if archive_cols.include?(:archive_reason) + conn[:tasks_archive].insert(archive_row) + end + conn[:tasks].where(id: candidates.select(:id)).delete + end + end + + log.info "archive_completed_tasks: archived #{count} tasks (cutoff: #{cutoff.iso8601})" + { archived: count, cutoff: cutoff.iso8601 } + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :archive_completed_tasks, days_old: days_old, batch_size: batch_size) + raise + end + + def run_scheduled_archival + log.info 'Running scheduled archival' + results = {} + results[:tasks] = archive_completed_tasks + + conn = Legion::Data.connection + if conn&.table_exists?(:metering_records) + results[:metering] = Legion::Data::Retention.archive_old_records( + table: :metering_records, date_column: :recorded_at + ) + end + + log.info "Scheduled archival completed keys=#{results.keys.join(',')}" + results + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :run_scheduled_archival) + raise + end + + private + + def archive_table!(source:, destination:, cutoff:, batch_size:, dry_run:) + conn = Legion::Data.connection + candidates = conn[source].where { created_at < cutoff }.limit(batch_size) + count = candidates.count + return count if dry_run || count.zero? + + conn.transaction do + candidates.each do |row| + archive_row = row.dup + archive_row[:original_id] = archive_row.delete(:id) + archive_row[:original_created_at] = archive_row.delete(:created_at) + archive_row[:original_updated_at] = archive_row.delete(:updated_at) + archive_row[:archived_at] = Time.now + conn[destination].insert(archive_row) + end + conn[source].where(id: candidates.select(:id)).delete + end + count + end + + def db_ready?(table) + defined?(Legion::Data) && Legion::Data.connection&.table_exists?(table) + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :archival_db_ready, table: table) + false + end + end + end + end +end diff --git a/lib/legion/data/archival/policy.rb b/lib/legion/data/archival/policy.rb new file mode 100644 index 0000000..9b35723 --- /dev/null +++ b/lib/legion/data/archival/policy.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + +module Legion + module Data + module Archival + class Policy + DEFAULTS = { + warm_after_days: 7, + cold_after_days: 90, + batch_size: 1000, + tables: %w[tasks metering_records].freeze + }.freeze + + # Per-table date column overrides. The Retention module defaults to :created_at, + # but legacy tables (like tasks) use :created. Migration 051 may add a created_at + # column/alias for tasks (implementation varies by adapter); this map forces use of + # :created so behavior is consistent across legacy schemas and adapters. + DATE_COLUMN_OVERRIDES = { + 'tasks' => :created + }.freeze + + attr_reader :warm_after_days, :cold_after_days, :batch_size, :tables + + def initialize(**opts) + config = DEFAULTS.merge(opts) + @warm_after_days = config[:warm_after_days] + @cold_after_days = config[:cold_after_days] + @batch_size = config[:batch_size] + @tables = config[:tables] + end + + def warm_cutoff + Time.now - (warm_after_days * 86_400) + end + + def cold_cutoff + Time.now - (cold_after_days * 86_400) + end + + class << self + include Legion::Logging::Helper + end + + def self.from_settings + return new unless defined?(Legion::Settings) + + data_settings = Legion::Settings[:data] + archival = data_settings.is_a?(Hash) ? data_settings[:archival] : nil + return new unless archival.is_a?(Hash) + + new(**archival.slice(:warm_after_days, :cold_after_days, :batch_size, :tables)) + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :policy_from_settings) + new + end + end + end + end +end diff --git a/lib/legion/data/archiver.rb b/lib/legion/data/archiver.rb new file mode 100644 index 0000000..5793d28 --- /dev/null +++ b/lib/legion/data/archiver.rb @@ -0,0 +1,219 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' +require 'digest' +require 'fileutils' +require 'json' +require 'securerandom' +require 'tmpdir' +require 'zlib' + +module Legion + module Data + module Archiver + class UploadError < StandardError; end + + class << self + include Legion::Logging::Helper + + def archive_table(table:, retention_days: 90, batch_size: 1000, storage_backend: nil) + return { skipped: true, reason: 'not_postgres' } unless postgres? + + log.info "Archiving table #{table} (retention: #{retention_days}d)" + + conn = Legion::Data.connection + cutoff = Time.now - (retention_days * 86_400) + archive_results = archive_batches( + conn: conn, + table: table, + cutoff: cutoff, + batch_size: batch_size, + storage_backend: storage_backend + ) + + log.info "Archived #{archive_results[:total_rows]} rows from #{table} in #{archive_results[:batches]} batch(es)" + archive_results + rescue StandardError => e + handle_exception( + e, + level: :error, + handled: false, + operation: :archive_table, + table: table, + retention_days: retention_days, + batch_size: batch_size, + storage_backend: storage_backend + ) + raise + end + + def upload_batch(data:, table:, year:, month:, batch_n:, backend:) + log.info "Archiver storing batch table=#{table} backend=#{backend || :tmpdir} year=#{year} month=#{month} batch=#{batch_n}" + case backend + when :s3 + upload_s3(data: data, table: table, year: year, month: month, batch_n: batch_n) + when :azure + upload_azure(data: data, table: table, year: year, month: month, batch_n: batch_n) + else + upload_tmpdir(data: data, table: table, year: year, month: month, batch_n: batch_n) + end + end + + def manifest_stats + return {} unless postgres? + return {} unless Legion::Data.connection.table_exists?(:archive_manifest) + + Legion::Data.connection[:archive_manifest] + .group_and_count(:source_table) + .select_append( + Sequel.function(:sum, :row_count).as(:total_rows), + Sequel.function(:min, :archived_at).as(:earliest), + Sequel.function(:max, :archived_at).as(:latest) + ) + .all + .to_h do |row| + [row[:source_table], { + batches: row[:count], + total_rows: row[:total_rows].to_i, + earliest: row[:earliest], + latest: row[:latest] + }] + end + end + + private + + def postgres? + Legion::Data::Connection.adapter == :postgres + end + + def serialize_rows(rows) + rows.map { |row| json_dump(row) }.join("\n") + end + + def archive_batches(conn:, table:, cutoff:, batch_size:, storage_backend:) + now = Time.now.utc + batches = 0 + total_rows = 0 + paths = [] + + loop do + batch_result = archive_batch( + conn: conn, + table: table, + cutoff: cutoff, + batch_size: batch_size, + batch_n: batches + 1, + now: now, + storage_backend: storage_backend + ) + break unless batch_result + + batches += 1 + total_rows += batch_result[:row_count] + paths << batch_result[:path] + end + + { batches: batches, total_rows: total_rows, paths: paths } + end + + def archive_batch(conn:, table:, cutoff:, batch_size:, batch_n:, now:, storage_backend:) + rows = conn[table].where { created_at < cutoff }.limit(batch_size).all + return if rows.empty? + + compressed = gzip_compress(serialize_rows(rows)) + path = upload_batch( + data: compressed, + table: table.to_s, + year: now.year, + month: now.month, + batch_n: batch_n, + backend: storage_backend + ) + + record_archived_batch( + conn: conn, + table: table, + rows: rows, + compressed: compressed, + path: path, + now: now + ) + + { row_count: rows.size, path: path } + end + + def record_archived_batch(conn:, table:, rows:, compressed:, path:, now:) + conn.transaction do + conn[:archive_manifest].insert( + batch_id: SecureRandom.uuid, + source_table: table.to_s, + row_count: rows.size, + checksum: Digest::SHA256.hexdigest(compressed), + storage_path: path, + archived_at: now + ) + conn[table].where(id: rows.map { |row| row[:id] }).delete + end + end + + def json_dump(obj) + Legion::JSON.generate(obj) + end + + def gzip_compress(data) + output = StringIO.new + output.binmode + gz = Zlib::GzipWriter.new(output) + gz.write(data) + gz.close + output.string + end + + def upload_s3(data:, table:, year:, month:, batch_n:) + raise UploadError, 'S3 backend not available: Legion::Extensions::S3::Runners::Put not defined' unless defined?(Legion::Extensions::S3::Runners::Put) + + key = "legion-archive/#{table}/#{year}/#{month}/batch_#{batch_n}.jsonl.gz" + Legion::Extensions::S3::Runners::Put.run(key: key, body: data) + log.info "Archiver uploaded batch to s3 key=#{key}" + "s3://#{key}" + rescue UploadError => e + handle_exception(e, level: :error, handled: false, operation: :upload_s3, table: table, year: year, month: month, batch_n: batch_n) + raise + rescue StandardError => e + handle_exception(e, level: :error, handled: true, operation: :upload_s3, table: table, year: year, month: month, batch_n: batch_n) + raise UploadError, "S3 upload failed: #{e.message}" + end + + def upload_azure(data:, table:, year:, month:, batch_n:) + unless defined?(Legion::Extensions::AzureStorage::Runners::Upload) + raise UploadError, 'Azure backend not available: Legion::Extensions::AzureStorage::Runners::Upload not defined' + end + + blob_name = "legion-archive/#{table}/#{year}/#{month}/batch_#{batch_n}.jsonl.gz" + Legion::Extensions::AzureStorage::Runners::Upload.run(blob_name: blob_name, data: data) + log.info "Archiver uploaded batch to azure blob=#{blob_name}" + "azure://#{blob_name}" + rescue UploadError => e + handle_exception(e, level: :error, handled: false, operation: :upload_azure, table: table, year: year, month: month, batch_n: batch_n) + raise + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :upload_azure, table: table, year: year, month: month, batch_n: batch_n) + raise UploadError, "Azure upload failed: #{e.message}" + end + + def upload_tmpdir(data:, table:, year:, month:, batch_n:) + dir = File.join(Dir.tmpdir, 'legion-archive', table.to_s, year.to_s, month.to_s) + FileUtils.mkdir_p(dir) + path = File.join(dir, "batch_#{batch_n}.jsonl.gz") + File.binwrite(path, data) + log.info "Archiver stored batch locally path=#{path}" + "file://#{path}" + rescue StandardError => e + handle_exception(e, level: :error, handled: true, operation: :upload_tmpdir, table: table, year: year, month: month, batch_n: batch_n) + raise UploadError, "Tmpdir upload failed: #{e.message}" + end + end + end + end +end diff --git a/lib/legion/data/audit_log_hash_chain.rb b/lib/legion/data/audit_log_hash_chain.rb new file mode 100644 index 0000000..2139f04 --- /dev/null +++ b/lib/legion/data/audit_log_hash_chain.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +require 'digest' +require 'legion/json' +require 'time' + +module Legion + module Data + module AuditLogHashChain + GENESIS_HASH = ('0' * 64).freeze + CANONICAL_FIELDS = %i[ + principal_id action resource source status detail created_at previous_hash + ].freeze + + class << self + def compute_hash(record) + Digest::SHA256.hexdigest(canonical_payload(record)) + end + + def verify(records) + previous_hash = GENESIS_HASH + records.each do |record| + return invalid(record, :parent_mismatch) unless value_for(record, :previous_hash).to_s == previous_hash + + expected = compute_hash(record) + return invalid(record, :hash_mismatch) unless value_for(record, :record_hash).to_s == expected + + previous_hash = expected + end + + { valid: true, length: records.size } + end + + def canonical_payload(record) + CANONICAL_FIELDS.map do |field| + "#{field}:#{canonical_value(value_for(record, field))}" + end.join('|') + end + + private + + def invalid(record, reason) + { valid: false, broken_at: value_for(record, :id), reason: reason } + end + + def canonical_value(value) + case value + when Time + value.utc.iso8601(6) + when DateTime + value.to_time.utc.iso8601(6) + when Hash + Legion::JSON.dump(canonical_hash(value)) + when Array + Legion::JSON.dump(value.map { |item| canonical_json_value(item) }) + else + value.to_s + end + end + + def canonical_json_value(value) + case value + when Hash then canonical_hash(value) + when Array then value.map { |item| canonical_json_value(item) } + else value + end + end + + def canonical_hash(hash) + hash.keys.map(&:to_s).sort.to_h do |key| + [key, canonical_json_value(hash.fetch(key) { hash.fetch(key.to_sym) })] + end + end + + def value_for(record, field) + return record[field] if record.respond_to?(:[]) && !record[field].nil? + return record[field.to_s] if record.respond_to?(:[]) && !record[field.to_s].nil? + return record.public_send(field) if record.respond_to?(field) + + nil + end + end + end + end +end diff --git a/lib/legion/data/audit_record.rb b/lib/legion/data/audit_record.rb new file mode 100644 index 0000000..50ae3c2 --- /dev/null +++ b/lib/legion/data/audit_record.rb @@ -0,0 +1,179 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' +require 'digest' + +module Legion + module Data + module AuditRecord + GENESIS_HASH = ('0' * 64).freeze + + class << self + include Legion::Logging::Helper + + # Append a new record to the named chain. Returns the persisted record hash + # on success, or an error hash when the database is unavailable. + # + # @param chain_id [String] chain identifier (scopes the sequence) + # @param content_type [String] caller-defined type label + # @param content_hash [String] SHA-256 hex digest of the content being recorded + # @param metadata [Hash] optional structured context (serialised to JSON) + # @param sign [Boolean] when true, attempt signing via legion-crypt + def append(chain_id:, content_type:, content_hash:, metadata: {}, sign: false) + return { error: 'db unavailable' } unless db_ready? + + conn = Legion::Data.connection + conn.transaction do + parent_hash = latest_chain_hash(conn, chain_id) + ts = truncate_to_us(Time.now) + ch = compute_chain_hash(parent_hash, content_hash, ts, content_type) + sig = sign ? sign_record(ch) : nil + meta_json = metadata.empty? ? nil : Legion::JSON.dump(metadata) + + id = conn[:audit_records].insert( + chain_id: chain_id, + content_type: content_type, + content_hash: content_hash, + parent_hash: parent_hash, + chain_hash: ch, + signature: sig, + metadata: meta_json, + created_at: ts + ) + + log.debug "AuditRecord append: chain=#{chain_id} type=#{content_type} id=#{id}" + { id: id, chain_id: chain_id, chain_hash: ch, parent_hash: parent_hash } + end + end + + # Walk all records in the chain ordered by creation time and verify that + # each record's stored chain_hash matches a freshly computed one. + # + # @param chain_id [String] + # @return [Hash] { valid: Boolean, length: Integer, broken_at: Integer? } + def verify(chain_id:) + return { valid: false, error: 'db unavailable' } unless db_ready? + + records = Legion::Data.connection[:audit_records] + .where(chain_id: chain_id) + .order(:created_at, :id) + .all + + prev_hash = GENESIS_HASH + records.each do |r| + unless r[:parent_hash] == prev_hash + log.warn "AuditRecord chain broken: chain=#{chain_id} id=#{r[:id]}" + return { valid: false, broken_at: r[:id], reason: :parent_mismatch } + end + + expected = compute_chain_hash(prev_hash, r[:content_hash], r[:created_at], r[:content_type]) + unless r[:chain_hash] == expected + log.warn "AuditRecord hash mismatch: chain=#{chain_id} id=#{r[:id]}" + return { valid: false, broken_at: r[:id], reason: :hash_mismatch } + end + + prev_hash = r[:chain_hash] + end + + { valid: true, length: records.size } + end + + # Return all records for a chain as deserialised hashes. + # + # @param chain_id [String] + # @param since [Time, nil] optional lower bound on created_at + # @param limit [Integer] + def walk(chain_id:, since: nil, limit: 1000) + return [] unless db_ready? + + ds = Legion::Data.connection[:audit_records].where(chain_id: chain_id) + ds = ds.where { created_at >= since } if since + ds.order(:created_at, :id).limit(limit).all.map { |r| deserialize(r) } + end + + # Return records filtered by content_type across all chains. + # + # @param content_type [String] + # @param since [Time, nil] + # @param limit [Integer] + def query_by_type(content_type:, since: nil, limit: 100) + return [] unless db_ready? + + ds = Legion::Data.connection[:audit_records].where(content_type: content_type) + ds = ds.where { created_at >= since } if since + ds.order(Sequel.desc(:created_at)).limit(limit).all.map { |r| deserialize(r) } + end + + # SHA-256 of "parent_hash:content_hash:unix_ts_us:content_type". + # + # The timestamp is normalised to microseconds-since-epoch. PostgreSQL + # TIMESTAMP columns have microsecond precision, so nanosecond values + # written by Ruby would be truncated on read, causing recomputed hashes + # to diverge. Microsecond normalisation keeps write-time and read-time + # hashes identical across all supported adapters. + def compute_chain_hash(parent_hash, content_hash, timestamp, content_type) + ts_us = normalise_timestamp_us(timestamp) + Digest::SHA256.hexdigest("#{parent_hash}:#{content_hash}:#{ts_us}:#{content_type}") + end + + private + + # Normalise a timestamp to integer microseconds-since-epoch regardless of + # whether the database returned a Time, DateTime, or String. Always uses + # the absolute epoch value so timezone differences don't affect the hash. + def normalise_timestamp_us(timestamp) + t = case timestamp + when ::Time then timestamp + when ::DateTime then timestamp.to_time + else ::Time.parse(timestamp.to_s) + end + (t.to_r * 1_000_000).to_i + end + + def truncate_to_us(time) + us = (time.to_r * 1_000_000).to_i + ::Time.at(Rational(us, 1_000_000)) + end + + def latest_chain_hash(conn, chain_id) + last = conn[:audit_records] + .select(:chain_hash) + .where(chain_id: chain_id) + .order(Sequel.desc(:created_at), Sequel.desc(:id)) + .first + last ? last[:chain_hash] : GENESIS_HASH + end + + def sign_record(chain_hash) + return nil unless defined?(Legion::Crypt) && Legion::Crypt.respond_to?(:sign) + + Legion::Crypt.sign(chain_hash) + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :sign_record) + nil + end + + def deserialize(row) + { + id: row[:id], + chain_id: row[:chain_id], + content_type: row[:content_type], + content_hash: row[:content_hash], + parent_hash: row[:parent_hash], + chain_hash: row[:chain_hash], + signature: row[:signature], + metadata: row[:metadata] ? Legion::JSON.load(row[:metadata]) : {}, + created_at: row[:created_at] + } + end + + def db_ready? + defined?(Legion::Data) && Legion::Data.connection&.table_exists?(:audit_records) + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :audit_record_db_ready?) + false + end + end + end + end +end diff --git a/lib/legion/data/connection.rb b/lib/legion/data/connection.rb index df140d2..49d9c88 100755 --- a/lib/legion/data/connection.rb +++ b/lib/legion/data/connection.rb @@ -1,64 +1,614 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + +require 'fileutils' require 'sequel' module Legion module Data module Connection + ADAPTERS = %i[sqlite mysql2 postgres].freeze + + GENERIC_KEYS = %i[max_connections pool_timeout preconnect single_threaded test name].freeze + + ADAPTER_KEYS = { + sqlite: %i[timeout readonly disable_dqs], + postgres: %i[connect_timeout sslmode sslrootcert search_path], + mysql2: %i[connect_timeout read_timeout write_timeout encoding sql_mode] + }.freeze + + ADAPTER_DEFAULTS = { + sqlite: { timeout: 5000, readonly: false, disable_dqs: true }, + postgres: { connect_timeout: 20, sslmode: 'disable' }, + mysql2: { connect_timeout: 120, encoding: 'utf8mb4' } + }.freeze + + QUERY_LOG_DIR = File.expand_path('~/.legionio/logs').freeze + + # Wraps a tagged Legion::Logging::Logger for Sequel's logger interface. + # Prefixes warn-level messages with [slow-query] since Sequel uses warn + # for queries exceeding log_warn_duration. + class SlowQueryLogger + attr_reader :tagged + + def initialize(tagged_logger) + @tagged = tagged_logger + end + + def warn(message) + @tagged.warn("[slow-query] #{message}") + end + + def info(message) + @tagged.info(message) + end + + def debug(message) + @tagged.debug(message) + end + + def error(message) + @tagged.error(message) + end + end + + class SegmentedTaggedLogger + attr_reader :segments + + def initialize(segments:, logger: nil) + @segments = segments + @logger = logger || Legion::Logging + end + + def warn(message) + with_segments { dispatch(:warn, message) } + end + + def info(message) + with_segments { dispatch(:info, message) } + end + + def debug(message) + with_segments { dispatch(:debug, message) } + end + + def error(message) + with_segments { dispatch(:error, message) } + end + + private + + def dispatch(level, message) + return unless @logger.respond_to?(level) + + @logger.public_send(level, message) + end + + def with_segments + previous = Thread.current[:legion_log_segments] + Thread.current[:legion_log_segments] = @segments + yield + ensure + Thread.current[:legion_log_segments] = previous + end + end + + # File-based query logger that writes all SQL to a dedicated log file. + # Isolated from the main Legion::Logging domain. + class QueryFileLogger + include Legion::Logging::Helper + + attr_reader :path + + def initialize(path) + @path = path + @closed = false + @mutex = Mutex.new + dir = File.dirname(path) + FileUtils.mkdir_p(dir) + FileUtils.chmod(0o700, dir) if File.directory?(dir) + @file = File.open(path, File::WRONLY | File::APPEND | File::CREAT, 0o600) + @file.sync = true + end + + def debug(message) + write('DEBUG', message) + end + + def info(message) + write('INFO', message) + end + + def warn(message) + write('WARN', message) + end + + def error(message) + write('ERROR', message) + end + + def close + @mutex.synchronize do + @closed = true + @file.close unless @file.closed? + end + end + + private + + def write(level, message) + @mutex.synchronize do + return if @closed || @file.closed? + + @file.puts "[#{Time.now.strftime('%Y-%m-%d %H:%M:%S.%L')}] #{level} #{message}" + end + rescue IOError => e + return nil if @closed || @file.closed? + + handle_exception(e, level: :warn, handled: true, operation: :query_file_write, path: @path) + nil + end + end + class << self + include Legion::Logging::Helper + attr_accessor :sequel def adapter - @adapter ||= RUBY_ENGINE == 'jruby' ? :jdbc : :mysql2 + @adapter ||= Legion::Settings[:data][:adapter]&.to_sym || :sqlite end def setup - @sequel = if adapter == :mysql2 - ::Sequel.connect(adapter: adapter, **creds_builder) + @adapter = Legion::Settings[:data][:adapter]&.to_sym || :sqlite + opts = sequel_opts + log.info("Legion::Data::Connection setup adapter=#{adapter}") + @fallback_active = false + @sequel = if adapter == :sqlite + ::Sequel.connect(opts.merge(adapter: :sqlite, database: sqlite_path)) else - ::Sequel.connect("jdbc:mysql://#{creds_builder[:host]}:#{creds_builder[:port]}/#{creds_builder[:database]}?user=#{creds_builder[:username]}&password=#{creds_builder[:password]}&serverTimezone=UTC") # rubocop:disable Layout/LineLength + attempted_adapter = adapter + begin + ::Sequel.connect(connection_opts_for(adapter: attempted_adapter, opts: opts)) + rescue StandardError => e + raise unless dev_fallback? + + log.error("Legion::Data FALLING BACK TO SQLITE — #{attempted_adapter} network DB connection failed: #{e.message}") + log.error("Legion::Data WARNING: Data written to SQLite will NOT be visible when #{attempted_adapter} reconnects. " \ + 'Apollo knowledge, audit logs, and other DB-backed services will use a local-only store.') + handle_exception(e, level: :error, handled: true, operation: :shared_connect, fallback: :sqlite) + @adapter = :sqlite + @fallback_active = true + sqlite_opts = sequel_opts + ::Sequel.connect(sqlite_opts.merge(adapter: :sqlite, database: sqlite_path)) + end end Legion::Settings[:data][:connected] = true - return if Legion::Settings[:data][:connection].nil? || Legion::Settings[:data][:connection][:log].nil? + log_connection_info + configure_extensions + connect_with_replicas + end + + # Returns connection metadata for health checks and diagnostics. + # Apollo and other services can use this to detect silent fallback. + def connection_info + { + adapter: adapter, + connected: Legion::Settings[:data][:connected], + fallback_active: @fallback_active || false, + configured_adapter: Legion::Settings[:data][:adapter]&.to_sym || :sqlite, + sequel_alive: (begin + !@sequel&.test_connection.nil? + rescue StandardError => e + log.debug("connection health check failed: #{e.message}") + false + end) + } + end - @sequel.logger = Legion::Logging - @sequel.sql_log_level = Legion::Settings[:data][:connection][:sql_log_level] - @sequel.log_warn_duration = Legion::Settings[:data][:connection][:log_warn_duration] + # Returns true if the data layer fell back to SQLite from a configured + # network database (PostgreSQL/MySQL). Services should check this and + # log warnings when operating in degraded mode. + def fallback_active? + @fallback_active == true + end + + def stats + return { connected: false } unless @sequel + + data = Legion::Settings[:data] + { + connected: data[:connected], + adapter: adapter, + pool: pool_stats, + tuning: tuning_stats(data), + database: database_stats + } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :data_connection_stats, adapter: adapter) + { connected: (data[:connected] if data.is_a?(Hash)), adapter: adapter, error: e.message } + end + + def pool_stats + return {} unless @sequel + + pool = @sequel.pool + stats = { + type: pool.pool_type, + size: pool.size, + max_size: pool.respond_to?(:max_size) ? pool.max_size : nil + } + + case pool.pool_type + when :timed_queue, :sharded_timed_queue + queue_size = pool.instance_variable_get(:@queue)&.size || 0 + stats[:available] = queue_size + stats[:in_use] = stats[:size] - queue_size + stats[:waiting] = pool.num_waiting + when :threaded, :sharded_threaded + avail = pool.instance_variable_get(:@available_connections) + stats[:available] = avail&.size || 0 + stats[:in_use] = stats[:size] - stats[:available] + stats[:waiting] = pool.num_waiting + when :single, :sharded_single + stats[:available] = pool.size + stats[:in_use] = 0 + stats[:waiting] = 0 + end + + stats.compact + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :data_pool_stats, adapter: adapter) + {} end def shutdown @sequel&.disconnect + @query_file_logger&.close + @query_file_logger = nil + @fallback_active = false Legion::Settings[:data][:connected] = false + log.info 'Legion::Data connection closed' + end + + def reconnect_with_fresh_creds + return false unless @sequel + return false if adapter == :sqlite + + fresh_creds = Legion::Settings[:data][:creds] + return false unless fresh_creds.is_a?(Hash) + + new_user = fresh_creds[:user] || fresh_creds[:username] + new_pass = fresh_creds[:password] + + unless new_user && new_pass + log.error('reconnect_with_fresh_creds: no user/password in Settings[:data][:creds]') + return false + end + + old_user = @sequel.opts[:user] + @sequel.opts[:user] = new_user + @sequel.opts[:password] = new_pass + + @sequel.disconnect + + @sequel.test_connection + log.info("reconnect_with_fresh_creds: rotated credentials (#{old_user} → #{new_user})") + true + rescue StandardError => e + handle_exception(e, level: :error, handled: true, operation: :reconnect_with_fresh_creds, + old_user: old_user, new_user: new_user) + false + end + + def connect_with_replicas + return unless adapter == :postgres + + replica_url = Legion::Settings[:data][:read_replica_url] + replica_list = Array(Legion::Settings[:data][:replicas]).dup + + replica_list.prepend(replica_url) if replica_url && !replica_url.empty? + replica_list.uniq! + replica_list.compact! + + return if replica_list.empty? + + @sequel.extension(:server_block) + + replica_list.each_with_index do |url, idx| + @sequel.add_servers("read_#{idx}": url) + end + + @replica_servers = replica_list.each_with_index.map { |_, idx| :"read_#{idx}" } + log.debug "Registered #{@replica_servers.size} read replica(s)" + end + + def read_server + return :default if @replica_servers.nil? || @replica_servers.empty? + + :read_0 + end + + def replica_servers + @replica_servers || [] + end + + def merge_tls_creds(creds, adapter:, port:) + return creds if adapter == :sqlite + return creds unless defined?(Legion::Crypt::TLS) + + tls_settings = data_tls_settings + return creds unless tls_settings[:enabled] == true + + tls = Legion::Crypt::TLS.resolve(tls_settings, port: port) + return creds unless tls[:enabled] + + case adapter + when :postgres + creds[:sslmode] = tls[:verify] == :none ? 'require' : 'verify-full' + creds[:sslrootcert] = tls[:ca] if tls[:ca] + creds[:sslcert] = tls[:cert] if tls[:cert] + creds[:sslkey] = tls[:key] if tls[:key] + when :mysql2 + creds[:ssl_mode] = tls[:verify] == :none ? 'required' : 'verify_identity' + creds[:sslca] = tls[:ca] if tls[:ca] + creds[:sslcert] = tls[:cert] if tls[:cert] + creds[:sslkey] = tls[:key] if tls[:key] + end + + creds end def creds_builder(final_creds = {}) - final_creds.merge! Legion::Data::Settings.creds + final_creds.merge! Legion::Data::Settings.creds(adapter) final_creds.merge! Legion::Settings[:data][:creds] if Legion::Settings[:data][:creds].is_a? Hash - # if Legion::Settings[:data][:connection][:max_connections].is_a? Integer - # final_creds[:max_connections] = Legion::Settings[:data][:connection][:max_connections] - # end + port = final_creds[:port] + merge_tls_creds(final_creds, adapter: adapter, port: port) + + final_creds + end - # final_creds[:preconnect] = :concurrently if Legion::Settings[:data][:connection][:preconnect] + private - return final_creds if Legion::Settings[:vault].nil? + def data_tls_settings + return {} unless defined?(Legion::Settings) - if Legion::Settings[:vault][:connected] && ::Vault.sys.mounts.key?(:database) - temp_vault_creds = Legion::Crypt.read('database/creds/legion') - final_creds[:user] = temp_vault_creds[:username] - final_creds[:password] = temp_vault_creds[:password] + Legion::Settings[:data][:tls] || {} + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :data_tls_settings) + {} + end + + def log_connection_info + if adapter == :sqlite + log.info "Connected to SQLite at #{sqlite_path}" + else + actual = Legion::Settings[:data][:creds] || {} + conn_user = actual[:user] || actual[:username] || 'unknown' + conn_host = actual[:host] || '127.0.0.1' + conn_port = actual[:port] + conn_db = actual[:database] || actual[:db] + log.info "Connected to #{adapter}://#{conn_user}@#{conn_host}:#{conn_port}/#{conn_db}" end + end - final_creds + def dev_fallback? + data_settings = Legion::Settings[:data] + data_settings[:dev_mode] == true && data_settings[:dev_fallback] != false end - def default_creds - { - host: '127.0.0.1', - port: 3306, - username: 'legion', - password: 'legion', - database: 'legion', - max_connections: 4 + def sqlite_path + path = Legion::Settings[:data][:creds][:database] || 'legionio.db' + return path if File.absolute_path?(path) + + base_dir = File.expand_path('~/.legionio/data') + FileUtils.mkdir_p(base_dir) + File.join(base_dir, path) + end + + def connection_opts_for(adapter:, opts:) + connection_opts = opts.merge(adapter: adapter, **creds_builder) + connection_opts[:preconnect] = false if adapter != :sqlite && dev_fallback? + connection_opts + end + + def sequel_opts + data = Legion::Settings[:data] + opts = {} + + # Generic pool options + GENERIC_KEYS.each do |key| + val = data[key] + opts[key] = val unless val.nil? + end + + # Query log mode: all queries to dedicated file, isolated from main domain + if data[:query_log] + log_path = File.join(QUERY_LOG_DIR, 'data-shared-query.log') + @query_file_logger = QueryFileLogger.new(log_path) + opts[:logger] = @query_file_logger + opts[:sql_log_level] = :debug + opts[:log_connection_info] = data[:log_connection_info] || false + elsif data[:log] + # Standard mode: slow-query warnings through Legion::Logging domain + opts[:logger] = build_data_logger + opts[:sql_log_level] = data[:sql_log_level]&.to_sym || :debug + opts[:log_warn_duration] = data[:log_warn_duration] + opts[:log_connection_info] = data[:log_connection_info] || false + end + + # Adapter-specific: user setting wins, then built-in default, skip if nil + defaults = ADAPTER_DEFAULTS.fetch(adapter, {}) + ADAPTER_KEYS.fetch(adapter, []).each do |key| + val = data.key?(key) && !data[key].nil? ? data[key] : defaults[key] + opts[key] = val unless val.nil? + end + + opts + end + + def tuning_stats(data) + tuning = {} + + # Pool tuning + GENERIC_KEYS.each { |key| tuning[key] = data[key] } + + # Logging + tuning[:log] = data[:log] + tuning[:query_log] = data[:query_log] + tuning[:query_log_path] = @query_file_logger&.path + tuning[:log_warn_duration] = data[:log_warn_duration] + tuning[:sql_log_level] = data[:sql_log_level] + tuning[:log_connection_info] = data[:log_connection_info] + + # Connection health + tuning[:connection_validation] = data[:connection_validation] + tuning[:connection_validation_timeout] = data[:connection_validation_timeout] + tuning[:connection_expiration] = data[:connection_expiration] + tuning[:connection_expiration_timeout] = data[:connection_expiration_timeout] + + # Adapter-specific (only keys relevant to current adapter) + defaults = ADAPTER_DEFAULTS.fetch(adapter, {}) + ADAPTER_KEYS.fetch(adapter, []).each do |key| + tuning[key] = data.key?(key) && !data[key].nil? ? data[key] : defaults[key] + end + + tuning + end + + def database_stats + case adapter + when :sqlite then sqlite_stats + when :postgres then postgres_stats + when :mysql2 then mysql_stats + else {} + end + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :data_database_stats, adapter: adapter) + { error: e.message } + end + + def sqlite_stats + db = @sequel + stats = {} + %w[page_size page_count freelist_count journal_mode wal_autocheckpoint + cache_size busy_timeout].each do |pragma| + val = begin + db.fetch("PRAGMA #{pragma}").single_value + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :sqlite_stats_pragma, pragma: pragma) + nil + end + stats[pragma.to_sym] = val unless val.nil? + end + + db_path = Legion::Settings[:data][:creds][:database] || 'legionio.db' + stats[:file_size] = File.size(db_path) if File.exist?(db_path) + stats[:database_size_bytes] = (stats[:page_size].to_i * stats[:page_count].to_i) if stats[:page_size] && stats[:page_count] + stats + end + + def postgres_stats + db = @sequel + stats = {} + + row = db.fetch('SELECT current_database() AS db, pg_database_size(current_database()) AS size_bytes').first + stats[:database_name] = row[:db] + stats[:database_size_bytes] = row[:size_bytes] + + activity = db.fetch(<<~SQL).first + SELECT + count(*) FILTER (WHERE state = 'active') AS active, + count(*) FILTER (WHERE state = 'idle') AS idle, + count(*) FILTER (WHERE state = 'idle in transaction') AS idle_in_transaction, + count(*) AS total + FROM pg_stat_activity + WHERE datname = current_database() + SQL + stats[:server_connections] = activity + + settings = db.fetch(<<~SQL).first + SELECT + current_setting('max_connections')::int AS max_connections, + current_setting('shared_buffers') AS shared_buffers, + current_setting('work_mem') AS work_mem, + current_setting('server_version') AS server_version + SQL + stats[:server] = settings + + stats + end + + def mysql_stats + db = @sequel + stats = {} + + size_row = db.fetch(<<~SQL).first + SELECT SUM(data_length + index_length) AS size_bytes + FROM information_schema.tables + WHERE table_schema = DATABASE() + SQL + stats[:database_name] = db.fetch('SELECT DATABASE() AS db').single_value + stats[:database_size_bytes] = size_row[:size_bytes]&.to_i + + threads = {} + db.fetch("SHOW STATUS WHERE Variable_name IN ('Threads_connected','Threads_running','Max_used_connections')").each do |row| + threads[row[:Variable_name].downcase.to_sym] = row[:Value].to_i + end + stats[:server_connections] = threads + + max_conn = db.fetch("SHOW VARIABLES LIKE 'max_connections'").first + version = db.fetch('SELECT VERSION() AS v').single_value + stats[:server] = { + max_connections: max_conn ? max_conn[:Value].to_i : nil, + server_version: version } + + stats + end + + def configure_extensions + return if adapter == :sqlite + + data = Legion::Settings[:data] + + if adapter == :postgres + Sequel.extension(:pg_array) + @sequel.extension(:pg_array) + end + + if data[:connection_validation] != false + @sequel.extension(:connection_validator) + @sequel.pool.connection_validation_timeout = data[:connection_validation_timeout] + end + + if data[:connection_expiration] != false + @sequel.extension(:connection_expiration) + @sequel.pool.connection_expiration_timeout = data[:connection_expiration_timeout] + end + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :configure_extensions, adapter: adapter) + end + + def build_data_logger + tagged = if defined?(Legion::Logging::TaggedLogger) && respond_to?(:tagged_logger_settings, true) + Legion::Logging::TaggedLogger.new( + segments: %w[data connection], + **send(:tagged_logger_settings) + ) + else + SegmentedTaggedLogger.new(segments: %w[data connection]) + end + SlowQueryLogger.new(tagged) + rescue StandardError => e + if respond_to?(:handle_exception, true) + handle_exception(e, level: :warn, handled: true, operation: :build_data_logger) + else + log.warn("build_data_logger failed: #{e.class}: #{e.message}") + end + SlowQueryLogger.new(SegmentedTaggedLogger.new(segments: %w[data connection], logger: log)) end end end diff --git a/lib/legion/data/encryption/cipher.rb b/lib/legion/data/encryption/cipher.rb new file mode 100644 index 0000000..cddb027 --- /dev/null +++ b/lib/legion/data/encryption/cipher.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +require 'openssl' + +module Legion + module Data + module Encryption + module Cipher + VERSION_BYTE = "\x01".b.freeze + IV_LENGTH = 12 + TAG_LENGTH = 16 + + class << self + def encrypt(plaintext, key:, aad: '') + cipher = OpenSSL::Cipher.new('aes-256-gcm').encrypt + iv = OpenSSL::Random.random_bytes(IV_LENGTH) + cipher.key = key + cipher.iv = iv + cipher.auth_data = aad + + ciphertext = cipher.update(plaintext.to_s) + cipher.final + tag = cipher.auth_tag(TAG_LENGTH) + + VERSION_BYTE + iv + ciphertext + tag + end + + def decrypt(blob, key:, aad: '') + raise ArgumentError, 'data too short' if blob.bytesize < 1 + IV_LENGTH + TAG_LENGTH + + version = blob.byteslice(0, 1) + raise ArgumentError, "unsupported version: #{version.unpack1('C')}" unless version == VERSION_BYTE + + iv = blob.byteslice(1, IV_LENGTH) + tag = blob.byteslice(-TAG_LENGTH, TAG_LENGTH) + ciphertext = blob.byteslice(1 + IV_LENGTH, blob.bytesize - 1 - IV_LENGTH - TAG_LENGTH) + + cipher = OpenSSL::Cipher.new('aes-256-gcm').decrypt + cipher.key = key + cipher.iv = iv + cipher.auth_tag = tag + cipher.auth_data = aad + + cipher.update(ciphertext) + cipher.final + end + end + end + end + end +end diff --git a/lib/legion/data/encryption/key_provider.rb b/lib/legion/data/encryption/key_provider.rb new file mode 100644 index 0000000..05cb448 --- /dev/null +++ b/lib/legion/data/encryption/key_provider.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' +require 'openssl' + +module Legion + module Data + module Encryption + class KeyProvider + include Legion::Logging::Helper + + def initialize(mode: :auto) + @mode = mode + @key_cache = {} + end + + def key_for(tenant_id: nil) + cache_key = tenant_id || '__default__' + @key_cache[cache_key] ||= derive_key(tenant_id) + end + + def clear_cache! + @key_cache.clear + log.debug 'Cleared encryption key cache' + end + + private + + def derive_key(tenant_id) + if tenant_id && crypt_available? + log.debug "Deriving Vault key for tenant #{tenant_id}" + Legion::Crypt::PartitionKeys.derive(tenant_id: tenant_id) + elsif crypt_available? + Legion::Crypt.default_encryption_key + else + log.warn 'Legion::Crypt unavailable, falling back to dev encryption key' + local_key + end + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :derive_key, tenant_id: tenant_id) + raise + end + + def crypt_available? + defined?(Legion::Crypt::PartitionKeys) + end + + def local_key + OpenSSL::Digest.digest('SHA256', 'legion-dev-encryption-key') + end + end + end + end +end diff --git a/lib/legion/data/encryption/sequel_plugin.rb b/lib/legion/data/encryption/sequel_plugin.rb new file mode 100644 index 0000000..eb4155d --- /dev/null +++ b/lib/legion/data/encryption/sequel_plugin.rb @@ -0,0 +1,173 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' +require_relative 'cipher' +require_relative 'key_provider' + +module Legion + module Data + module Encryption + module SequelPlugin + extend Legion::Logging::Helper + + class << self + def aad_for(table_name:, primary_key:, column:) + "#{table_name}:#{primary_key || 0}:#{column}" + end + + def decrypt_value(blob:, key:, table_name:, primary_key:, column:) + errors = [] + + aad_candidates(primary_key).each do |aad_primary_key| + aad = aad_for(table_name: table_name, primary_key: aad_primary_key, column: column) + return Legion::Data::Encryption::Cipher.decrypt(blob, key: key, aad: aad) + rescue OpenSSL::Cipher::CipherError, ArgumentError => e + errors << e + end + + raise errors.last if errors.any? + end + + private + + def aad_candidates(primary_key) + [primary_key, 0].compact.uniq + end + end + + module ClassMethods + def encrypted_columns + @encrypted_columns ||= {} + end + + def encrypted_column(name, key_scope: :default) + col_scope = key_scope + encrypted_columns[name] = { key_scope: col_scope } + + define_method(name) do + raw = super() + return nil if raw.nil? + + begin + decrypt_encrypted_column(name, raw, key_scope: col_scope) + rescue StandardError => e + Legion::Data::Encryption::SequelPlugin.handle_exception( + e, + level: :warn, + handled: false, + operation: :decrypt_column, + table: self.class.table_name, + primary_key: pk, + column: name + ) + raise + end + end + + define_method(:"#{name}=") do |value| + if value.nil? + clear_pending_encrypted_column(name) + super(nil) + else + begin + remember_pending_encrypted_column(name, value, key_scope: col_scope) if new? + super(encrypt_encrypted_column(name, value, key_scope: col_scope, primary_key: pk || 0)) + rescue StandardError => e + Legion::Data::Encryption::SequelPlugin.handle_exception( + e, + level: :error, + handled: false, + operation: :encrypt_column, + table: self.class.table_name, + primary_key: pk, + column: name + ) + raise + end + end + end + end + + def encryption_key_provider + @encryption_key_provider ||= KeyProvider.new + end + end + + module InstanceMethods + def after_create + super + reencrypt_pending_encrypted_columns + end + + private + + def decrypt_encrypted_column(column, raw, key_scope:) + provider = self.class.encryption_key_provider + tenant = key_scope == :tenant ? self[:tenant_id] : nil + key = provider.key_for(tenant_id: tenant) + + Legion::Data::Encryption::SequelPlugin.decrypt_value( + blob: raw.b, + key: key, + table_name: self.class.table_name, + primary_key: pk, + column: column + ) + end + + def encrypt_encrypted_column(column, value, key_scope:, primary_key:) + provider = self.class.encryption_key_provider + tenant = key_scope == :tenant ? self[:tenant_id] : nil + key = provider.key_for(tenant_id: tenant) + aad = Legion::Data::Encryption::SequelPlugin.aad_for( + table_name: self.class.table_name, + primary_key: primary_key, + column: column + ) + encrypted = Legion::Data::Encryption::Cipher.encrypt(value.to_s, key: key, aad: aad) + Sequel.blob(encrypted) + end + + def pending_encrypted_columns + @pending_encrypted_columns ||= {} + end + + def remember_pending_encrypted_column(column, value, key_scope:) + pending_encrypted_columns[column] = { key_scope: key_scope, value: value.to_s } + end + + def clear_pending_encrypted_column(column) + pending_encrypted_columns.delete(column) if defined?(@pending_encrypted_columns) + end + + def reencrypt_pending_encrypted_columns + return if pending_encrypted_columns.empty? + + encrypted_values = pending_encrypted_columns.each_with_object({}) do |(column, config), updates| + updates[column] = encrypt_encrypted_column( + column, + config[:value], + key_scope: config[:key_scope], + primary_key: pk + ) + end + + self.class.where(pk_hash).update(encrypted_values) + encrypted_values.each { |column, encrypted| values[column] = encrypted } + pending_encrypted_columns.clear + rescue StandardError => e + Legion::Data::Encryption::SequelPlugin.handle_exception( + e, + level: :error, + handled: false, + operation: :reencrypt_pending_columns, + table: self.class.table_name, + primary_key: pk + ) + raise + end + end + end + end + end +end diff --git a/lib/legion/data/event_store.rb b/lib/legion/data/event_store.rb new file mode 100644 index 0000000..56866e8 --- /dev/null +++ b/lib/legion/data/event_store.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' +require 'digest' + +module Legion + module Data + module EventStore + GOVERNANCE_EVENT_TYPES = %w[ + consent.granted consent.revoked consent.modified + extinction.triggered extinction.resolved + worker.registered worker.retired worker.transferred + scope.approved scope.violated scope.reconciled + audit.retention_applied audit.exported + ].freeze + + class << self + include Legion::Logging::Helper + + def append(stream:, type:, data: {}, metadata: {}) + return { error: 'db unavailable' } unless db_ready? + + conn = Legion::Data.connection + conn.transaction do + last = conn[:governance_events] + .where(stream_id: stream) + .order(Sequel.desc(:sequence_number)) + .first + + seq = (last&.[](:sequence_number) || 0) + 1 + prev_hash = last&.[](:event_hash) || ('0' * 64) + + data_json = Legion::JSON.dump(data) + metadata_json = Legion::JSON.dump(metadata) + event_hash = compute_hash(stream, seq, type, data_json, metadata_json, prev_hash) + + conn[:governance_events].insert( + stream_id: stream, + event_type: type, + sequence_number: seq, + data_json: data_json, + metadata_json: metadata_json, + event_hash: event_hash, + previous_hash: prev_hash, + created_at: Time.now + ) + + log.debug "EventStore append: stream=#{stream} type=#{type} seq=#{seq}" + { stream: stream, sequence: seq, hash: event_hash } + end + end + + def read_stream(stream, since: nil) + return [] unless db_ready? + + ds = Legion::Data.connection[:governance_events].where(stream_id: stream) + ds = ds.where { created_at >= since } if since + ds.order(:sequence_number).all.map { |e| deserialize(e) } + end + + def read_by_type(type, since: nil, limit: 100) + return [] unless db_ready? + + ds = Legion::Data.connection[:governance_events].where(event_type: type) + ds = ds.where { created_at >= since } if since + ds.order(Sequel.desc(:created_at)).limit(limit).all.map { |e| deserialize(e) } + end + + def verify_chain(stream) + return { valid: false, error: 'db unavailable' } unless db_ready? + + events = Legion::Data.connection[:governance_events] + .where(stream_id: stream) + .order(:sequence_number) + .all + + prev_hash = '0' * 64 + legacy_hashes = 0 + events.each do |e| + expected = compute_hash(stream, e[:sequence_number], e[:event_type], e[:data_json], e[:metadata_json], prev_hash) + legacy_expected = legacy_compute_hash(stream, e[:sequence_number], e[:event_type], e[:data_json], prev_hash) + + unless [expected, legacy_expected].include?(e[:event_hash]) + log.warn "EventStore chain broken: stream=#{stream} seq=#{e[:sequence_number]}" + return { valid: false, broken_at: e[:sequence_number] } + end + unless e[:previous_hash] == prev_hash + log.warn "EventStore chain broken: stream=#{stream} seq=#{e[:sequence_number]}" + return { valid: false, broken_at: e[:sequence_number] } + end + + legacy_hashes += 1 if e[:event_hash] == legacy_expected && e[:event_hash] != expected + prev_hash = e[:event_hash] + end + + result = { valid: true, length: events.size } + result[:legacy_hashes] = legacy_hashes if legacy_hashes.positive? + result + end + + private + + def compute_hash(stream, seq, type, data_json, metadata_json, prev_hash) + Digest::SHA256.hexdigest( + "#{stream}:#{seq}:#{type}:#{normalized_json(data_json)}:#{normalized_json(metadata_json)}:#{prev_hash}" + ) + end + + def legacy_compute_hash(stream, seq, type, data_json, prev_hash) + Digest::SHA256.hexdigest("#{stream}:#{seq}:#{type}:#{normalized_json(data_json)}:#{prev_hash}") + end + + def normalized_json(json) + json || '{}' + end + + def deserialize(event) + { + id: event[:id], + stream: event[:stream_id], + type: event[:event_type], + sequence: event[:sequence_number], + data: Legion::JSON.load(event[:data_json] || '{}'), + metadata: Legion::JSON.load(event[:metadata_json] || '{}'), + hash: event[:event_hash], + created_at: event[:created_at] + } + end + + def db_ready? + defined?(Legion::Data) && Legion::Data.connection&.table_exists?(:governance_events) + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :event_store_db_ready?) + false + end + end + end + end +end diff --git a/lib/legion/data/event_store/projection.rb b/lib/legion/data/event_store/projection.rb new file mode 100644 index 0000000..8093212 --- /dev/null +++ b/lib/legion/data/event_store/projection.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Legion + module Data + module EventStore + class Projection + attr_reader :state + + def initialize + @state = {} + end + + def apply(_event) + raise NotImplementedError, "#{self.class} must implement #apply" + end + + def self.build_from(stream, since: nil) + projection = new + events = EventStore.read_stream(stream, since: since) + events.each { |e| projection.apply(e) } + projection + end + end + + class ConsentState < Projection + def apply(event) + scope = event.dig(:data, :scope) + return unless scope + + case event[:type] + when 'consent.granted', 'consent.modified' + @state[scope] = event.dig(:data, :tier) + when 'consent.revoked' + @state.delete(scope) + end + end + end + + class GovernanceTimeline < Projection + def initialize + super + @state = [] + end + + def apply(event) + @state << { + type: event[:type], + stream: event[:stream], + at: event[:created_at], + data: event[:data] + } + end + end + end + end +end diff --git a/lib/legion/data/extract.rb b/lib/legion/data/extract.rb new file mode 100644 index 0000000..51250ed --- /dev/null +++ b/lib/legion/data/extract.rb @@ -0,0 +1,131 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' +require 'securerandom' +require_relative 'extract/type_detector' +require_relative 'extract/handlers/base' + +module Legion + module Data + module Extract + class << self + include Legion::Logging::Helper + + def extract(source, type: :auto) + extract_id = SecureRandom.uuid + timings = [] + detected_type = timed_step(:detect_type, timings) do + type == :auto ? TypeDetector.detect(source) : type&.to_sym + end + unless detected_type + result = { success: false, text: nil, error: :unknown_type, extract_id: extract_id, + step_timings: timings } + persist_step_timings(extract_id, timings) + return result + end + + handler = timed_step(:resolve_handler, timings) { Handlers::Base.for_type(detected_type) } + unless handler + result = { success: false, text: nil, error: :no_handler, type: detected_type, extract_id: extract_id, + step_timings: timings } + persist_step_timings(extract_id, timings) + return result + end + + available = timed_step(:check_availability, timings) { handler.available? } + unless available + return { success: false, text: nil, error: :gem_not_installed, + gem: handler.gem_name, type: detected_type, extract_id: extract_id, + step_timings: timings }.tap { persist_step_timings(extract_id, timings) } + end + + log.info "Extract starting type=#{detected_type} handler=#{handler.name}" + result = timed_step(:handler_extract, timings) { handler.extract(source) } + if result[:text] + log.info "Extract succeeded type=#{detected_type}" + { success: true, text: result[:text], metadata: result[:metadata], type: detected_type, + extract_id: extract_id, step_timings: timings } + else + log.warn "Extract failed type=#{detected_type} error=#{result[:error]}" + { success: false, text: nil, error: result[:error], type: detected_type, + extract_id: extract_id, step_timings: timings } + end.tap { persist_step_timings(extract_id, timings) } + rescue StandardError => e + handle_exception(e, level: :error, handled: true, operation: :extract, type: detected_type) + persist_step_timings(extract_id, timings) if extract_id + { success: false, text: nil, error: e.message, type: detected_type, extract_id: extract_id, + step_timings: timings } + end + + def supported_types + load_all_handlers + Handlers::Base.supported_types + end + + def can_extract?(type) + load_all_handlers + handler = Handlers::Base.for_type(type&.to_sym) + handler&.available? || false + end + + def register_handler(type, klass) + Handlers::Base.instance_variable_set(:@registry, + Handlers::Base.registry.merge(type.to_sym => klass).freeze) + end + + private + + def timed_step(name, timings) + monotonic_start = Process.clock_gettime(Process::CLOCK_MONOTONIC) + start_time = Time.now.utc + result = yield + record_step_timing(timings, name: name, start_time: start_time, monotonic_start: monotonic_start, + status: :success) + result + rescue StandardError => e + record_step_timing(timings, name: name, start_time: start_time, monotonic_start: monotonic_start, + status: :error, error: "#{e.class}: #{e.message}") + raise + end + + def record_step_timing(timings, name:, start_time:, monotonic_start:, status:, error: nil) + end_time = Time.now.utc + duration_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - monotonic_start) * 1000).round + timings << { + name: name.to_s, + start_time: start_time, + end_time: end_time, + status: status.to_s, + error: error, + duration_ms: duration_ms + } + end + + def persist_step_timings(extract_id, timings) + return unless defined?(Legion::Data) + + connection = Legion::Data.connection + return unless connection&.table_exists?(:extract_step_timings) + + existing_steps = connection[:extract_step_timings].where(extract_id: extract_id).select_map(:name) + rows = timings.reject { |timing| existing_steps.include?(timing[:name]) }.map do |timing| + timing.merge(extract_id: extract_id) + end + connection[:extract_step_timings].multi_insert(rows) unless rows.empty? + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :persist_extract_step_timings, + extract_id: extract_id) + end + + def load_all_handlers + return if @handlers_loaded + + Dir[File.join(__dir__, 'extract', 'handlers', '*.rb')].each do |f| + require f unless f.end_with?('base.rb') + end + @handlers_loaded = true + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/base.rb b/lib/legion/data/extract/handlers/base.rb new file mode 100644 index 0000000..3417ba4 --- /dev/null +++ b/lib/legion/data/extract/handlers/base.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + +module Legion + module Data + module Extract + module Handlers + class Base + @registry = {}.freeze + + class << self + include Legion::Logging::Helper + + attr_reader :registry + + def inherited(subclass) + super + # Deferred registration — subclass defines type after class body loads + TracePoint.new(:end) do |tp| + if tp.self == subclass + register(subclass) if subclass.respond_to?(:type) && subclass.type + tp.disable + end + end.enable + end + + def register(handler_class) + log.debug "Registered extract handler type=#{handler_class.type} class=#{handler_class.name}" + @registry = @registry.merge(handler_class.type => handler_class).freeze + end + + def for_type(type) + @registry[type&.to_sym] + end + + def supported_types + @registry.keys + end + + # Override in subclasses + def type = nil + def extensions = [] + def gem_name = nil + + def extract(_source) + raise NotImplementedError, "#{name} must implement .extract" + end + + def available? + return true if gem_name.nil? + + require gem_name + true + rescue LoadError => e + handle_exception(e, level: :debug, handled: true, operation: :extract_handler_available, handler: name, gem: gem_name) + false + end + end + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/csv.rb b/lib/legion/data/extract/handlers/csv.rb new file mode 100644 index 0000000..6bb4813 --- /dev/null +++ b/lib/legion/data/extract/handlers/csv.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'csv' + +module Legion + module Data + module Extract + module Handlers + class Csv < Base + def self.type = :csv + def self.extensions = %w[.csv] + def self.gem_name = nil + + def self.extract(source) + content = source.respond_to?(:read) ? source.read : File.read(source.to_s) + table = ::CSV.parse(content, headers: true) + text = table.map { |row| row.to_h.map { |k, v| "#{k}: #{v}" }.join(', ') }.join("\n") + { text: text, metadata: { rows: table.size, columns: table.headers.size, headers: table.headers } } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_csv) + { text: nil, error: e.message } + end + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/docx.rb b/lib/legion/data/extract/handlers/docx.rb new file mode 100644 index 0000000..7a8aeff --- /dev/null +++ b/lib/legion/data/extract/handlers/docx.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Legion + module Data + module Extract + module Handlers + class Docx < Base + def self.type = :docx + def self.extensions = %w[.docx] + def self.gem_name = 'docx' + + def self.extract(source) + require 'docx' + + doc = ::Docx::Document.open(source) + paragraphs = doc.paragraphs.map(&:text).reject(&:empty?) + text = paragraphs.join("\n\n") + { text: text, metadata: { paragraphs: paragraphs.size } } + rescue LoadError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_docx, gem: gem_name) + { text: nil, error: :gem_not_installed, gem: gem_name } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_docx) + { text: nil, error: e.message } + end + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/html.rb b/lib/legion/data/extract/handlers/html.rb new file mode 100644 index 0000000..e8ee58a --- /dev/null +++ b/lib/legion/data/extract/handlers/html.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module Legion + module Data + module Extract + module Handlers + class Html < Base + def self.type = :html + def self.extensions = %w[.html .htm] + def self.gem_name = 'nokogiri' + + def self.extract(source) + require 'nokogiri' + + content = source.respond_to?(:read) ? source.read : File.read(source.to_s) + doc = ::Nokogiri::HTML(content) + + # Remove script and style elements + doc.css('script, style, noscript').each(&:remove) + + title = doc.at_css('title')&.text&.strip + text = doc.text.gsub(/\s+/, ' ').strip + { text: text, metadata: { title: title } } + rescue LoadError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_html, gem: gem_name) + { text: nil, error: :gem_not_installed, gem: gem_name } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_html) + { text: nil, error: e.message } + end + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/json.rb b/lib/legion/data/extract/handlers/json.rb new file mode 100644 index 0000000..0939c83 --- /dev/null +++ b/lib/legion/data/extract/handlers/json.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'json' + +module Legion + module Data + module Extract + module Handlers + class Json < Base + def self.type = :json + def self.extensions = %w[.json] + def self.gem_name = nil + + def self.extract(source) + content = source.respond_to?(:read) ? source.read : File.read(source.to_s) + parsed = ::JSON.parse(content) + text = ::JSON.pretty_generate(parsed) + { text: text, metadata: { keys: parsed.is_a?(Hash) ? parsed.keys : nil } } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_json) + { text: nil, error: e.message } + end + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/jsonl.rb b/lib/legion/data/extract/handlers/jsonl.rb new file mode 100644 index 0000000..474c6c5 --- /dev/null +++ b/lib/legion/data/extract/handlers/jsonl.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'json' + +module Legion + module Data + module Extract + module Handlers + class Jsonl < Base + def self.type = :jsonl + def self.extensions = %w[.jsonl] + def self.gem_name = nil + + def self.extract(source) + content = source.respond_to?(:read) ? source.read : File.read(source.to_s) + lines = content.each_line.map { |l| ::JSON.parse(l.strip) rescue l.strip } # rubocop:disable Style/RescueModifier + text = lines.map { |l| l.is_a?(Hash) ? ::JSON.pretty_generate(l) : l }.join("\n---\n") + { text: text, metadata: { lines: lines.size } } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_jsonl) + { text: nil, error: e.message } + end + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/markdown.rb b/lib/legion/data/extract/handlers/markdown.rb new file mode 100644 index 0000000..644e068 --- /dev/null +++ b/lib/legion/data/extract/handlers/markdown.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Legion + module Data + module Extract + module Handlers + class Markdown < Base + def self.type = :markdown + def self.extensions = %w[.md .markdown] + def self.gem_name = nil + + def self.extract(source) + content = source.respond_to?(:read) ? source.read : File.read(source.to_s) + # Strip YAML frontmatter if present + text = content.sub(/\A---\n.*?\n---\n/m, '') + { text: text.strip, metadata: { bytes: content.bytesize, has_frontmatter: content != text } } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_markdown) + { text: nil, error: e.message } + end + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/pdf.rb b/lib/legion/data/extract/handlers/pdf.rb new file mode 100644 index 0000000..fa6d975 --- /dev/null +++ b/lib/legion/data/extract/handlers/pdf.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module Legion + module Data + module Extract + module Handlers + class Pdf < Base + def self.type = :pdf + def self.extensions = %w[.pdf] + def self.gem_name = 'pdf-reader' + + def self.extract(source) + require 'pdf-reader' + + reader = ::PDF::Reader.new(source) + text = reader.pages.map(&:text).join("\n\n") + { text: text, metadata: { pages: reader.page_count, title: reader.info[:Title] } } + rescue LoadError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_pdf, gem: gem_name) + { text: nil, error: :gem_not_installed, gem: gem_name } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_pdf) + { text: nil, error: e.message } + end + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/pptx.rb b/lib/legion/data/extract/handlers/pptx.rb new file mode 100644 index 0000000..ea50a4d --- /dev/null +++ b/lib/legion/data/extract/handlers/pptx.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Legion + module Data + module Extract + module Handlers + class Pptx < Base + def self.type = :pptx + def self.extensions = %w[.pptx] + def self.gem_name = 'rubyzip' + + def self.extract(source) + require 'zip' + require 'rexml/document' + + slides = [] + ::Zip::File.open(source) do |zip| + zip.glob('ppt/slides/slide*.xml').sort_by(&:name).each do |entry| + doc = REXML::Document.new(entry.get_input_stream.read) + texts = [] + doc.each_element('//a:t') { |e| texts << e.text } + slides << texts.join(' ') unless texts.empty? + end + end + text = slides.each_with_index.map { |s, i| "Slide #{i + 1}: #{s}" }.join("\n\n") + { text: text, metadata: { slides: slides.size } } + rescue LoadError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_pptx, gem: gem_name) + { text: nil, error: :gem_not_installed, gem: 'rubyzip' } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_pptx) + { text: nil, error: e.message } + end + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/text.rb b/lib/legion/data/extract/handlers/text.rb new file mode 100644 index 0000000..7d2089c --- /dev/null +++ b/lib/legion/data/extract/handlers/text.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Legion + module Data + module Extract + module Handlers + class Text < Base + def self.type = :text + def self.extensions = %w[.txt] + def self.gem_name = nil + + def self.extract(source) + content = source.respond_to?(:read) ? source.read : File.read(source.to_s) + { text: content, metadata: { bytes: content.bytesize } } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_text) + { text: nil, error: e.message } + end + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/vtt.rb b/lib/legion/data/extract/handlers/vtt.rb new file mode 100644 index 0000000..d445513 --- /dev/null +++ b/lib/legion/data/extract/handlers/vtt.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +module Legion + module Data + module Extract + module Handlers + class Vtt < Base + TIMESTAMP_PATTERN = /^\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}/ + SPEAKER_TAG_PATTERN = /^]+)>(.*)$/ + + def self.type = :vtt + def self.extensions = %w[.vtt] + def self.gem_name = nil + + def self.extract(source, preserve_speakers: true) + content = if source.respond_to?(:read) + source.read + elsif source.is_a?(String) && source.include?("\n") + source + else + File.read(source.to_s) + end + lines = parse_vtt(content, preserve_speakers: preserve_speakers) + text = lines.join("\n") + speakers = extract_speakers(content) + { + text: text, + metadata: { + bytes: content.bytesize, + speakers: speakers, + line_count: lines.size + } + } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_vtt) + { text: nil, error: e.message } + end + + def self.parse_vtt(content, preserve_speakers: true) + lines = [] + content.each_line do |raw| + line = raw.strip + next if line.empty? + next if line == 'WEBVTT' + next if TIMESTAMP_PATTERN.match?(line) + + if (match = SPEAKER_TAG_PATTERN.match(line)) + speaker = match[1].strip + text = match[2].strip + lines << (preserve_speakers ? "#{speaker}: #{text}" : text) + else + lines << line + end + end + lines + end + + def self.extract_speakers(content) + content.scan(SPEAKER_TAG_PATTERN).map { |m| m[0].strip }.uniq + end + private_class_method :parse_vtt, :extract_speakers + end + end + end + end +end diff --git a/lib/legion/data/extract/handlers/xlsx.rb b/lib/legion/data/extract/handlers/xlsx.rb new file mode 100644 index 0000000..3df2373 --- /dev/null +++ b/lib/legion/data/extract/handlers/xlsx.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Legion + module Data + module Extract + module Handlers + class Xlsx < Base + def self.type = :xlsx + def self.extensions = %w[.xlsx .xls] + def self.gem_name = 'rubyXL' + + def self.extract(source) + require 'rubyXL' + require 'rubyXL/convenience_methods' + + workbook = ::RubyXL::Parser.parse(source) + sheets = [] + workbook.worksheets.each do |sheet| + rows = sheet.each.filter_map do |row| + next unless row + + row.cells.map { |c| c&.value.to_s }.join(', ') + end + sheets << "Sheet: #{sheet.sheet_name}\n#{rows.join("\n")}" unless rows.empty? + end + text = sheets.join("\n\n") + { text: text, metadata: { sheets: workbook.worksheets.size } } + rescue LoadError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_xlsx, gem: gem_name) + { text: nil, error: :gem_not_installed, gem: gem_name } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :extract_xlsx) + { text: nil, error: e.message } + end + end + end + end + end +end diff --git a/lib/legion/data/extract/type_detector.rb b/lib/legion/data/extract/type_detector.rb new file mode 100644 index 0000000..8abc396 --- /dev/null +++ b/lib/legion/data/extract/type_detector.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Legion + module Data + module Extract + module TypeDetector + EXTENSION_MAP = { + '.pdf' => :pdf, + '.docx' => :docx, + '.pptx' => :pptx, + '.xlsx' => :xlsx, + '.xls' => :xlsx, + '.md' => :markdown, + '.markdown' => :markdown, + '.txt' => :text, + '.csv' => :csv, + '.json' => :json, + '.jsonl' => :jsonl, + '.html' => :html, + '.htm' => :html, + '.vtt' => :vtt + }.freeze + + module_function + + def detect(source) + return detect_from_path(source) if source.is_a?(String) && File.exist?(source) + return detect_from_io(source) if source.respond_to?(:path) + + nil + end + + def detect_from_path(path) + ext = File.extname(path).downcase + EXTENSION_MAP[ext] + end + + def detect_from_io(io) + return nil unless io.respond_to?(:path) && io.path + + detect_from_path(io.path) + end + end + end + end +end diff --git a/lib/legion/data/helper.rb b/lib/legion/data/helper.rb new file mode 100644 index 0000000..dbbb0b3 --- /dev/null +++ b/lib/legion/data/helper.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + +module Legion + module Data + module Helper + include Legion::Logging::Helper + + def data_path + @data_path ||= "#{full_path}/data" + end + + def data_class + @data_class ||= lex_class::Data + end + + def models_class + @models_class ||= data_class::Model + end + + def data_connected? + defined?(Legion::Settings) && Legion::Settings[:data][:connected] + end + + def data_connection + Legion::Data::Connection.sequel + end + + def local_data_connected? + Legion::Data::Local.connected? + end + + def local_data_connection + Legion::Data::Local.connection + end + + def local_data_model(table_name) + Legion::Data::Local.model(table_name) + end + + # --- Pool / Resource Info --- + + def data_adapter + Legion::Data::Connection.adapter + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :data_adapter) + :unknown + end + + def data_pool_stats + return {} unless data_connected? + + Legion::Data::Connection.pool_stats + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :data_pool_stats) + {} + end + + def data_stats + return {} unless data_connected? + + Legion::Data.stats + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :data_stats) + {} + end + + def local_data_stats + return {} unless local_data_connected? + + Legion::Data::Local.stats + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :local_data_stats) + {} + end + + # --- Permission Helpers --- + + def data_can_read?(table_name) + Legion::Data.can_read?(table_name) + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :data_can_read, table: table_name) + false + end + + def data_can_write?(table_name) + Legion::Data.can_write?(table_name) + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :data_can_write, table: table_name) + false + end + end + end +end diff --git a/lib/legion/data/local.rb b/lib/legion/data/local.rb new file mode 100644 index 0000000..91e42c7 --- /dev/null +++ b/lib/legion/data/local.rb @@ -0,0 +1,192 @@ +# frozen_string_literal: true + +require 'fileutils' +require 'legion/logging/helper' + +require 'sequel' +require 'sequel/extensions/migration' + +module Legion + module Data + module Local + class << self + include Legion::Logging::Helper + + attr_reader :connection, :db_path + + def setup(database: nil, **) + return if @connected + + db_file = database || local_settings[:database] || 'legionio_local.db' + unless File.absolute_path?(db_file) + base_dir = File.expand_path('~/.legionio') + FileUtils.mkdir_p(base_dir) + db_file = File.join(base_dir, db_file) + end + @db_path = db_file + + sqlite_defaults = Legion::Data::Connection::ADAPTER_DEFAULTS.fetch(:sqlite, {}) + data = defined?(Legion::Settings) ? Legion::Settings[:data] : {} + opts = { adapter: :sqlite, database: db_file } + Legion::Data::Connection::ADAPTER_KEYS.fetch(:sqlite, []).each do |key| + val = data.key?(key) && !data[key].nil? ? data[key] : sqlite_defaults[key] + opts[key] = val unless val.nil? + end + + if local_settings[:query_log] + log_path = File.join(Legion::Data::Connection::QUERY_LOG_DIR, 'data-local-query.log') + @query_file_logger = Legion::Data::Connection::QueryFileLogger.new(log_path) + opts[:logger] = @query_file_logger + opts[:sql_log_level] = :debug + elsif data[:log] + opts[:logger] = build_local_logger + opts[:sql_log_level] = resolved_sql_log_level + opts[:log_warn_duration] = resolved_log_warn_duration + end + + @connection = ::Sequel.connect(opts) + @connection.run('PRAGMA journal_mode=WAL') + @connection.run('PRAGMA busy_timeout=30000') + @connection.run('PRAGMA synchronous=NORMAL') + @connection.run('PRAGMA cache_size=-20000') + @connection.run('PRAGMA mmap_size=268435456') + @connected = true + run_migrations + log.info "Legion::Data::Local connected to #{db_file} (WAL mode, 30s busy_timeout, 20MB cache, 256MB mmap)" + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :local_setup, database: db_file) + raise + end + + def shutdown + @connection&.disconnect + @query_file_logger&.close + @query_file_logger = nil + @connection = nil + @connected = false + end + + def connected? + @connected == true + end + + def register_migrations(name:, path:) + @registered_migrations ||= {} + @registered_migrations[name] = path + run_single_migration(name, path) if connected? + end + + def registered_migrations + @registered_migrations || {} + end + + def model(table_name) + raise 'Legion::Data::Local not connected' unless connected? + + ::Sequel::Model(connection[table_name]) + end + + def stats + return { connected: false } unless connected? + + stats = { + connected: true, + adapter: :sqlite, + path: @db_path, + query_log: local_settings[:query_log] || false, + query_log_path: @query_file_logger&.path, + registered_migrations: registered_migrations.keys + } + + stats[:file_size] = File.size(@db_path) if @db_path && File.exist?(@db_path) + + %w[page_size page_count freelist_count journal_mode + wal_autocheckpoint cache_size busy_timeout mmap_size].each do |pragma| + val = begin + @connection.fetch("PRAGMA #{pragma}").single_value + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :local_stats_pragma, pragma: pragma) + nil + end + stats[pragma.to_sym] = val unless val.nil? + end + + stats[:database_size_bytes] = stats[:page_size].to_i * stats[:page_count].to_i if stats[:page_size] && stats[:page_count] + + stats + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :local_stats, database: @db_path) + { connected: connected?, error: e.message } + end + + def reset! + @connection = nil + @connected = false + @db_path = nil + @registered_migrations = nil + end + + private + + def run_migrations + return unless local_settings.dig(:migrations, :auto_migrate) != false + + registered_migrations.each do |name, path| + run_single_migration(name, path) + end + end + + def run_single_migration(name, path) + return unless local_settings.dig(:migrations, :auto_migrate) != false + return unless File.directory?(path) + + table = :"schema_migrations_#{name}" + ::Sequel::TimestampMigrator.new(@connection, path, table: table).run + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :local_migration, name: name, path: path) + end + + def local_settings + return {} unless defined?(Legion::Settings) + + Legion::Settings[:data]&.dig(:local) || {} + end + + def build_local_logger + tagged = if defined?(Legion::Logging::TaggedLogger) && respond_to?(:tagged_logger_settings, true) + Legion::Logging::TaggedLogger.new( + segments: %w[data local], + **send(:tagged_logger_settings) + ) + else + Legion::Data::Connection::SegmentedTaggedLogger.new(segments: %w[data local]) + end + Legion::Data::Connection::SlowQueryLogger.new(tagged) + rescue StandardError => e + if respond_to?(:handle_exception, true) + handle_exception(e, level: :warn, handled: true, operation: :build_local_logger) + else + log.warn("build_local_logger failed: #{e.class}: #{e.message}") + end + Legion::Data::Connection::SlowQueryLogger.new( + Legion::Data::Connection::SegmentedTaggedLogger.new(segments: %w[data local], logger: log) + ) + end + + def resolved_sql_log_level + (local_settings[:sql_log_level] || Legion::Settings[:data][:sql_log_level] || 'debug').to_sym + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :resolved_sql_log_level) + :debug + end + + def resolved_log_warn_duration + local_settings[:log_warn_duration] || Legion::Settings[:data][:log_warn_duration] + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :resolved_log_warn_duration) + nil + end + end + end + end +end diff --git a/lib/legion/data/migration.rb b/lib/legion/data/migration.rb index 526858a..e5bc215 100755 --- a/lib/legion/data/migration.rb +++ b/lib/legion/data/migration.rb @@ -1,13 +1,32 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + require 'sequel/extensions/migration' module Legion module Data module Migration class << self - def migrate(connection = Legion::Data.connection, path = "#{__dir__}/migrations", **opts) - Legion::Settings[:data][:migrations][:version] = Sequel::Migrator.run(connection, path, **opts) - Legion::Logging.info("Legion::Data::Migration ran successfully to version #{Legion::Settings[:data][:migrations][:version]}") # rubocop:disable Layout/LineLength + include Legion::Logging::Helper + + def migrate(connection = Legion::Data.connection, path = "#{__dir__}/migrations", **) + if defined?(Legion::Mode) && Legion::Mode.respond_to?(:current) && !Legion::Mode.infra? + log.info "Legion::Data::Migration skipped (mode: #{Legion::Mode.current}, requires: infra)" + return + end + + Legion::Settings[:data][:migrations][:version] = Sequel::Migrator.run(connection, path, **) + log.info("Legion::Data::Migration ran successfully to version #{Legion::Settings[:data][:migrations][:version]}") Legion::Settings[:data][:migrations][:ran] = true + rescue Sequel::DatabaseError => e + handle_exception(e, level: :error, handled: false, operation: :migrate, path: path) + if e.message.include?('InsufficientPrivilege') || e.message.include?('permission denied') + raise Sequel::DatabaseError, + "#{e.message}\n Hint: the database user lacks CREATE on schema public " \ + '(required for PG 15+). Grant via: GRANT CREATE ON SCHEMA public TO ;' + end + raise end end end diff --git a/lib/legion/data/migrations/001_add_schema_columns.rb b/lib/legion/data/migrations/001_add_schema_columns.rb index 31dbbfd..1ad573d 100755 --- a/lib/legion/data/migrations/001_add_schema_columns.rb +++ b/lib/legion/data/migrations/001_add_schema_columns.rb @@ -1,10 +1,16 @@ +# frozen_string_literal: true + require 'sequel/extensions/migration' Sequel.migration do up do - run 'ALTER TABLE `schema_info` ADD `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP AFTER `version`;' - run 'ALTER TABLE `schema_info` ADD `updated_at` TIMESTAMP NULL ON UPDATE CURRENT_TIMESTAMP AFTER `created_at`;' - run 'ALTER TABLE `schema_info` ADD `catalog` VARCHAR(255) NULL DEFAULT NULL AFTER `version`;' + alter_table(:schema_info) do + # SQLite does not support non-constant defaults in ALTER TABLE ADD COLUMN, + # so we omit the default here and let the application set timestamps. + add_column :created_at, DateTime, null: true + add_column :updated_at, DateTime, null: true + add_column :catalog, String, size: 255, null: true + end end down do diff --git a/lib/legion/data/migrations/002_add_nodes.rb b/lib/legion/data/migrations/002_add_nodes.rb index 337d97e..4db7fa1 100755 --- a/lib/legion/data/migrations/002_add_nodes.rb +++ b/lib/legion/data/migrations/002_add_nodes.rb @@ -1,17 +1,15 @@ +# frozen_string_literal: true + Sequel.migration do up do - run "CREATE TABLE `nodes` ( - `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `name` varchar(128) NOT NULL DEFAULT '', - `status` varchar(255) NOT NULL DEFAULT 'unknown', - `active` tinyint(1) unsigned NOT NULL DEFAULT '1', - `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, - `updated` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP, - PRIMARY KEY (`id`), - UNIQUE KEY `name` (`name`), - KEY `active` (`active`), - KEY `status` (`status`) - ) ENGINE=InnoDB DEFAULT CHARSET=utf8;" + create_table(:nodes) do + primary_key :id + String :name, size: 128, null: false, default: '', unique: true + String :status, size: 255, null: false, default: 'unknown', index: true + TrueClass :active, null: false, default: true, index: true + DateTime :created, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated, null: true + end end down do diff --git a/lib/legion/data/migrations/003_add_settings.rb b/lib/legion/data/migrations/003_add_settings.rb index 6b57601..9ccda46 100755 --- a/lib/legion/data/migrations/003_add_settings.rb +++ b/lib/legion/data/migrations/003_add_settings.rb @@ -1,15 +1,15 @@ +# frozen_string_literal: true + Sequel.migration do up do - run "CREATE TABLE `settings` ( - `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `key` varchar(128) NOT NULL, - `value` varchar(256) NOT NULL, - `encrypted` tinyint(1) unsigned NOT NULL DEFAULT '0', - `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, - `updated` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP, - PRIMARY KEY (`id`), - UNIQUE KEY `key` (`key`) - ) ENGINE=InnoDB DEFAULT CHARSET=utf8;" + create_table(:settings) do + primary_key :id + String :key, size: 128, null: false, unique: true + String :value, size: 256, null: false + TrueClass :encrypted, null: false, default: false + DateTime :created, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated, null: true + end end down do diff --git a/lib/legion/data/migrations/004_add_extensions.rb b/lib/legion/data/migrations/004_add_extensions.rb index 10e5431..8324eed 100755 --- a/lib/legion/data/migrations/004_add_extensions.rb +++ b/lib/legion/data/migrations/004_add_extensions.rb @@ -1,22 +1,20 @@ +# frozen_string_literal: true + Sequel.migration do up do - run "CREATE TABLE `extensions` ( - `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `active` tinyint(1) unsigned NOT NULL DEFAULT '1', - `name` varchar(128) NOT NULL, - `namespace` varchar(128) NOT NULL DEFAULT '', - `exchange` varchar(255) DEFAULT NULL, - `uri` varchar(256) DEFAULT NULL, - `schema_version` int(11) unsigned NOT NULL DEFAULT 0, - `updated` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP, - `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (`id`), - UNIQUE KEY `name_namespace` (`name`,`namespace`), - KEY `active` (`active`), - KEY `name` (`name`), - KEY `namespace` (`namespace`), - key `schema_version` (`schema_version`) - ) ENGINE=InnoDB DEFAULT CHARSET=utf8;" + create_table(:extensions) do + primary_key :id + TrueClass :active, null: false, default: true, index: true + String :name, size: 128, null: false, index: true + String :namespace, size: 128, null: false, default: '', index: true + String :exchange, size: 255, null: true + String :uri, size: 256, null: true + Integer :schema_version, null: false, default: 0, index: true + DateTime :updated, null: true + DateTime :created, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[name namespace] + end end down do diff --git a/lib/legion/data/migrations/005_add_runners.rb b/lib/legion/data/migrations/005_add_runners.rb index aa4a2bb..d407c3c 100755 --- a/lib/legion/data/migrations/005_add_runners.rb +++ b/lib/legion/data/migrations/005_add_runners.rb @@ -1,18 +1,18 @@ +# frozen_string_literal: true + Sequel.migration do up do - run "CREATE TABLE `runners` ( - `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `extension_id` int(11) unsigned NOT NULL, - `name` varchar(256) NOT NULL DEFAULT '', - `namespace` varchar(256) NOT NULL DEFAULT '', - `active` tinyint(1) unsigned NOT NULL DEFAULT '1', - `queue` varchar(256) DEFAULT NULL, - `uri` varchar(256) DEFAULT NULL, - `created` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, - `updated` datetime DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP, - PRIMARY KEY (`id`), - CONSTRAINT `runner_extension_id` FOREIGN KEY (`extension_id`) REFERENCES `extensions` (`id`) ON DELETE CASCADE ON UPDATE CASCADE - ) ENGINE=InnoDB DEFAULT CHARSET=utf8;" + create_table(:runners) do + primary_key :id + foreign_key :extension_id, :extensions, null: false, on_delete: :cascade, on_update: :cascade + String :name, size: 256, null: false, default: '' + String :namespace, size: 256, null: false, default: '' + TrueClass :active, null: false, default: true + String :queue, size: 256, null: true + String :uri, size: 256, null: true + DateTime :created, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated, null: true + end end down do diff --git a/lib/legion/data/migrations/006_add_functions.rb b/lib/legion/data/migrations/006_add_functions.rb index 81fbe70..9829aef 100755 --- a/lib/legion/data/migrations/006_add_functions.rb +++ b/lib/legion/data/migrations/006_add_functions.rb @@ -1,20 +1,18 @@ +# frozen_string_literal: true + Sequel.migration do up do - run "CREATE TABLE `functions` ( - `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `name` varchar(128) NOT NULL, - `active` tinyint(1) unsigned NOT NULL DEFAULT '1', - `runner_id` int(11) unsigned NOT NULL, - `args` text, - `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, - `updated` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP, - PRIMARY KEY (`id`), - UNIQUE KEY `runner_id` (`runner_id`,`name`), - KEY `active` (`active`), - KEY `namespace` (`runner_id`), - KEY `name` (`name`), - CONSTRAINT `function_runner_id` FOREIGN KEY (`runner_id`) REFERENCES `runners` (`id`) ON DELETE CASCADE ON UPDATE CASCADE - ) ENGINE=InnoDB DEFAULT CHARSET=utf8;" + create_table(:functions) do + primary_key :id + String :name, size: 128, null: false, index: true + TrueClass :active, null: false, default: true, index: true + foreign_key :runner_id, :runners, null: false, on_delete: :cascade, on_update: :cascade, index: true + String :args, text: true, null: true + DateTime :created, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated, null: true + + unique %i[runner_id name] + end end down do diff --git a/lib/legion/data/migrations/007_add_default_extensions.rb b/lib/legion/data/migrations/007_add_default_extensions.rb index 8ee6c29..8b306aa 100755 --- a/lib/legion/data/migrations/007_add_default_extensions.rb +++ b/lib/legion/data/migrations/007_add_default_extensions.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + Sequel.migration do up do lex = from(:extensions).insert(namespace: 'Legion::Extensions::Lex', name: 'lex', exchange: 'lex', uri: 'lex') diff --git a/lib/legion/data/migrations/008_add_tasks.rb b/lib/legion/data/migrations/008_add_tasks.rb index d186db7..c9c1522 100755 --- a/lib/legion/data/migrations/008_add_tasks.rb +++ b/lib/legion/data/migrations/008_add_tasks.rb @@ -1,26 +1,20 @@ +# frozen_string_literal: true + Sequel.migration do up do - run "CREATE TABLE `tasks` ( - `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `relationship_id` int(11) unsigned DEFAULT NULL, - `function_id` int(11) unsigned DEFAULT NULL, - `status` varchar(255) NOT NULL, - `parent_id` int(11) unsigned DEFAULT NULL, - `master_id` int(11) unsigned DEFAULT NULL, - `function_args` text, - `results` text, - `payload` text, - `created` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, - `updated` datetime DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP, - PRIMARY KEY (`id`), - KEY `status` (`status`), - KEY `parent_id` (`parent_id`), - KEY `master_id` (`master_id`), - KEY `relationship_id` (`relationship_id`), - KEY `function_id` (`function_id`), - CONSTRAINT `parent_id` FOREIGN KEY (`parent_id`) REFERENCES `tasks` (`id`) ON DELETE SET NULL ON UPDATE CASCADE, - CONSTRAINT `master_id` FOREIGN KEY (`master_id`) REFERENCES `tasks` (`id`) ON DELETE SET NULL ON UPDATE CASCADE - ) ENGINE=InnoDB DEFAULT CHARSET=utf8;" + create_table(:tasks) do + primary_key :id + Integer :relationship_id, null: true + foreign_key :function_id, :functions, null: true + String :status, size: 255, null: false, index: true + foreign_key :parent_id, :tasks, null: true, on_delete: :set_null, on_update: :cascade, index: true + foreign_key :master_id, :tasks, null: true, on_delete: :set_null, on_update: :cascade, index: true + String :function_args, text: true, null: true + String :results, text: true, null: true + String :payload, text: true, null: true + DateTime :created, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated, null: true + end end down do diff --git a/lib/legion/data/migrations/009_add_digital_workers.rb b/lib/legion/data/migrations/009_add_digital_workers.rb new file mode 100644 index 0000000..68a044e --- /dev/null +++ b/lib/legion/data/migrations/009_add_digital_workers.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table(:digital_workers) do + primary_key :id + String :worker_id, null: false, unique: true, size: 36 + String :name, null: false, size: 255 + String :entra_app_id, null: false, unique: true, size: 255 + String :entra_object_id, null: true, size: 255 + String :owner_msid, null: false, size: 255 + String :owner_name, null: true, size: 255 + String :extension_name, null: false, size: 255 + String :business_role, null: true, size: 255 + String :risk_tier, null: true, size: 50 + String :lifecycle_state, null: false, default: 'bootstrap', size: 50 + String :consent_tier, null: false, default: 'supervised', size: 50 + Float :trust_score, null: false, default: 0.0 + String :team, null: true, size: 255 + String :manager_msid, null: true, size: 255 + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: true + DateTime :retired_at, null: true + String :retired_by, null: true, size: 255 + String :retired_reason, null: true, text: true + index :owner_msid + index :lifecycle_state + index :team + end + + alter_table(:tasks) do + add_column :worker_id, String, null: true, size: 36 + add_index :worker_id + end + end + + down do + alter_table(:tasks) do + drop_index :worker_id + drop_column :worker_id + end + + drop_table :digital_workers + end +end diff --git a/lib/legion/data/migrations/010_add_value_metrics.rb b/lib/legion/data/migrations/010_add_value_metrics.rb new file mode 100644 index 0000000..0dbb3eb --- /dev/null +++ b/lib/legion/data/migrations/010_add_value_metrics.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table(:value_metrics) do + primary_key :id + String :worker_id, null: false, size: 36, index: true + String :metric_name, null: false, size: 255, index: true + String :metric_type, null: false, size: 50 + Float :value, null: false, default: 0.0 + String :metadata, text: true, null: true + DateTime :recorded_at, null: false, default: Sequel::CURRENT_TIMESTAMP, index: true + end + end + + down do + drop_table :value_metrics + end +end diff --git a/lib/legion/data/migrations/011_add_extensions_registry.rb b/lib/legion/data/migrations/011_add_extensions_registry.rb new file mode 100644 index 0000000..957a659 --- /dev/null +++ b/lib/legion/data/migrations/011_add_extensions_registry.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:extensions_registry) do + primary_key :id + String :name, null: false, unique: true, size: 100 + String :module_name, null: false, size: 100 + String :category, null: false, size: 50, default: 'cognition' + String :description, text: true + String :cognitive_concept, text: true + String :metaphor_description, text: true + Integer :build_batch + DateTime :build_date + String :status, null: false, size: 20, default: 'active' + Integer :spec_count, default: 0 + Integer :spec_pass_count, default: 0 + String :wired_phase, size: 100 + Float :health_score, default: 1.0 + Integer :invocation_count, default: 0 + DateTime :last_invoked_at + DateTime :created_at + DateTime :updated_at + + index :category + index :status + index :health_score + end + end +end diff --git a/lib/legion/data/migrations/012_add_apollo_tables.rb b/lib/legion/data/migrations/012_add_apollo_tables.rb new file mode 100644 index 0000000..ebbd564 --- /dev/null +++ b/lib/legion/data/migrations/012_add_apollo_tables.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + run 'CREATE EXTENSION IF NOT EXISTS vector' + run 'CREATE EXTENSION IF NOT EXISTS "uuid-ossp"' + + create_table(:apollo_entries) do + column :id, :uuid, default: Sequel.lit('uuid_generate_v4()'), primary_key: true + String :content, text: true, null: false + String :content_type, null: false, size: 50 + Float :confidence, default: 0.5 + String :source_agent, null: false, size: 100 + column :source_context, :jsonb, default: Sequel.lit("'{}'::jsonb") + column :tags, :'text[]', default: Sequel.lit("'{}'::text[]") + String :status, null: false, size: 20, default: 'candidate' + Integer :access_count, default: 0 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :confirmed_at + + index :status + end + run 'ALTER TABLE apollo_entries ADD COLUMN embedding vector(1536)' + run 'CREATE INDEX idx_apollo_entries_embedding ON apollo_entries USING hnsw (embedding vector_cosine_ops)' + run 'CREATE INDEX idx_apollo_entries_tags ON apollo_entries USING gin (tags)' + + create_table(:apollo_relations) do + column :id, :uuid, default: Sequel.lit('uuid_generate_v4()'), primary_key: true + foreign_key :from_entry_id, :apollo_entries, type: :uuid, null: false, index: true + foreign_key :to_entry_id, :apollo_entries, type: :uuid, null: false, index: true + String :relation_type, null: false, size: 50 + Float :weight, default: 1.0 + String :source_agent, size: 100 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + end + + create_table(:apollo_expertise) do + column :id, :uuid, default: Sequel.lit('uuid_generate_v4()'), primary_key: true + String :agent_id, null: false, size: 100, index: true + String :domain, null: false, size: 100, index: true + Float :proficiency, default: 0.0 + Integer :entry_count, default: 0 + DateTime :last_active_at, default: Sequel::CURRENT_TIMESTAMP + end + + create_table(:apollo_access_log) do + column :id, :uuid, default: Sequel.lit('uuid_generate_v4()'), primary_key: true + foreign_key :entry_id, :apollo_entries, type: :uuid, index: true + String :agent_id, null: false, size: 100 + String :action, null: false, size: 20 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + end + end + + down do + next unless adapter_scheme == :postgres + + drop_table(:apollo_access_log) if table_exists?(:apollo_access_log) + drop_table(:apollo_expertise) if table_exists?(:apollo_expertise) + drop_table(:apollo_relations) if table_exists?(:apollo_relations) + drop_table(:apollo_entries) if table_exists?(:apollo_entries) + end +end diff --git a/lib/legion/data/migrations/013_add_relationships.rb b/lib/legion/data/migrations/013_add_relationships.rb new file mode 100644 index 0000000..09e7c38 --- /dev/null +++ b/lib/legion/data/migrations/013_add_relationships.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table(:relationships) do + primary_key :id + foreign_key :trigger_id, :functions, null: true, on_delete: :set_null, index: true + foreign_key :action_id, :functions, null: true, on_delete: :set_null, index: true + String :name, size: 255, null: true + String :status, size: 50, null: false, default: 'active', index: true + String :relationship_type, size: 50, null: false, default: 'chain' + String :options, text: true, null: true + DateTime :created, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated, null: true + end + end + + down do + drop_table :relationships + end +end diff --git a/lib/legion/data/migrations/014_add_relationship_columns.rb b/lib/legion/data/migrations/014_add_relationship_columns.rb new file mode 100644 index 0000000..7905c62 --- /dev/null +++ b/lib/legion/data/migrations/014_add_relationship_columns.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:relationships) do + add_column :delay, Integer, null: false, default: 0 + add_column :chain_id, Integer, null: true, index: true + add_column :debug, TrueClass, null: false, default: false + add_column :allow_new_chains, TrueClass, null: false, default: false + add_column :conditions, String, text: true, null: true + add_column :transformation, String, text: true, null: true + add_column :active, TrueClass, null: false, default: true, index: true + end + end + + down do + alter_table(:relationships) do + drop_column :delay + drop_column :chain_id + drop_column :debug + drop_column :allow_new_chains + drop_column :conditions + drop_column :transformation + drop_column :active + end + end +end diff --git a/lib/legion/data/migrations/015_add_rbac_tables.rb b/lib/legion/data/migrations/015_add_rbac_tables.rb new file mode 100644 index 0000000..bd68cc7 --- /dev/null +++ b/lib/legion/data/migrations/015_add_rbac_tables.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table?(:rbac_role_assignments) do + primary_key :id + String :principal_type, null: false, size: 10 + String :principal_id, null: false, size: 255 + String :role, null: false, size: 100 + String :team, null: true, size: 255 + String :granted_by, null: false, size: 255 + DateTime :granted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :expires_at, null: true + unique %i[principal_type principal_id role team] + index :principal_id + index :team + end + + create_table?(:rbac_runner_grants) do + primary_key :id + String :team, null: false, size: 255 + String :runner_pattern, null: false, size: 500 + String :actions, null: false, size: 255 + String :granted_by, null: false, size: 255 + DateTime :granted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + unique %i[team runner_pattern] + index :team + end + + create_table?(:rbac_cross_team_grants) do + primary_key :id + String :source_team, null: false, size: 255 + String :target_team, null: false, size: 255 + String :runner_pattern, null: false, size: 500 + String :actions, null: false, size: 255 + String :granted_by, null: false, size: 255 + DateTime :granted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :expires_at, null: true + unique %i[source_team target_team runner_pattern] + index :source_team + end + end + + down do + drop_table :rbac_cross_team_grants + drop_table :rbac_runner_grants + drop_table :rbac_role_assignments + end +end diff --git a/lib/legion/data/migrations/016_add_worker_health.rb b/lib/legion/data/migrations/016_add_worker_health.rb new file mode 100644 index 0000000..2fdd81b --- /dev/null +++ b/lib/legion/data/migrations/016_add_worker_health.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:digital_workers) do + add_column :health_status, String, size: 20, default: 'unknown', null: false + add_column :last_heartbeat_at, DateTime, null: true + add_column :health_node, String, size: 255, null: true + add_index :health_status + end + + alter_table(:nodes) do + add_column :metrics, :text, null: true + add_column :hosted_worker_ids, :text, null: true + add_column :version, String, size: 50, null: true + end + end + + down do + alter_table(:digital_workers) do + drop_index :health_status + drop_column :health_node + drop_column :last_heartbeat_at + drop_column :health_status + end + + alter_table(:nodes) do + drop_column :version + drop_column :hosted_worker_ids + drop_column :metrics + end + end +end diff --git a/lib/legion/data/migrations/017_add_audit_log.rb b/lib/legion/data/migrations/017_add_audit_log.rb new file mode 100644 index 0000000..55739b9 --- /dev/null +++ b/lib/legion/data/migrations/017_add_audit_log.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table(:audit_log) do + primary_key :id + String :event_type, null: false, size: 50 + String :principal_id, null: false, size: 255 + String :principal_type, null: false, size: 20 + String :action, null: false, size: 100 + String :resource, null: false, size: 500 + String :source, null: false, size: 20 + String :node, null: false, size: 255 + String :status, null: false, size: 20 + Integer :duration_ms, null: true + column :detail, :text, null: true + String :record_hash, null: false, size: 64 + String :prev_hash, null: false, size: 64 + DateTime :created_at, null: false + + index :event_type + index :principal_id + index :created_at + end + end + + down do + drop_table :audit_log + end +end diff --git a/lib/legion/data/migrations/018_add_governance_events.rb b/lib/legion/data/migrations/018_add_governance_events.rb new file mode 100644 index 0000000..3ad6c79 --- /dev/null +++ b/lib/legion/data/migrations/018_add_governance_events.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:governance_events) do + primary_key :id + String :stream_id, null: false + String :event_type, null: false + Integer :sequence_number, null: false + column :data_json, :text + column :metadata_json, :text + String :event_hash, size: 64 + String :previous_hash, size: 64 + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index %i[stream_id sequence_number], unique: true + index :event_type + index :created_at + end + end +end diff --git a/lib/legion/data/migrations/019_add_audit_hash_chain.rb b/lib/legion/data/migrations/019_add_audit_hash_chain.rb new file mode 100644 index 0000000..7ee24ad --- /dev/null +++ b/lib/legion/data/migrations/019_add_audit_hash_chain.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:audit_log) + + cols = schema(:audit_log).map(&:first) + idxs = indexes(:audit_log) + + # record_hash exists from migration 017 at size 64; widen to 255 if needed. + if cols.include?(:record_hash) + set_column_type :audit_log, :record_hash, String, size: 255 + else + alter_table(:audit_log) { add_column :record_hash, String, size: 255 } + end + + add_index :audit_log, :record_hash unless idxs.key?(:audit_log_record_hash_index) + + # Rename prev_hash (introduced in migration 017) to previous_hash for clarity, + # then widen it to 255 to match record_hash. + if cols.include?(:prev_hash) && !cols.include?(:previous_hash) + rename_column :audit_log, :prev_hash, :previous_hash + set_column_type :audit_log, :previous_hash, String, size: 255 + elsif !cols.include?(:previous_hash) + alter_table(:audit_log) { add_column :previous_hash, String, size: 255 } + elsif cols.include?(:previous_hash) + set_column_type :audit_log, :previous_hash, String, size: 255 + end + + alter_table(:audit_log) { add_column :retention_tier, String, size: 10, default: 'hot' } unless cols.include?(:retention_tier) + + add_index :audit_log, :retention_tier unless idxs.key?(:audit_log_retention_tier_index) + end + + down do + next unless table_exists?(:audit_log) + + cols = schema(:audit_log).map(&:first) + + drop_index :audit_log, :record_hash, if_exists: true + + # Restore record_hash to its original size (64 from migration 017). + set_column_type :audit_log, :record_hash, String, size: 64 if cols.include?(:record_hash) + + # Rename previous_hash back to prev_hash (reverse of the up rename) and restore size to 64. + if cols.include?(:previous_hash) && !cols.include?(:prev_hash) + rename_column :audit_log, :previous_hash, :prev_hash + set_column_type :audit_log, :prev_hash, String, size: 64 + elsif cols.include?(:previous_hash) + alter_table(:audit_log) { drop_column :previous_hash } + end + + if cols.include?(:retention_tier) + drop_index :audit_log, :retention_tier, if_exists: true + alter_table(:audit_log) { drop_column :retention_tier } + end + end +end diff --git a/lib/legion/data/migrations/020_add_webhooks.rb b/lib/legion/data/migrations/020_add_webhooks.rb new file mode 100644 index 0000000..20e3a43 --- /dev/null +++ b/lib/legion/data/migrations/020_add_webhooks.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table(:webhooks) do + primary_key :id + String :url, null: false, size: 2048 + String :secret, null: false, size: 255 + String :event_types, text: true + String :status, default: 'active', size: 20 + Integer :max_retries, default: 5 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP + end + + create_table(:webhook_deliveries) do + primary_key :id + foreign_key :webhook_id, :webhooks, null: false, index: true + String :event_name, null: false, size: 255 + Integer :response_status + TrueClass :success + Integer :attempt, default: 1 + String :error, text: true + DateTime :delivered_at, default: Sequel::CURRENT_TIMESTAMP + end + + create_table(:webhook_dead_letters) do + primary_key :id + foreign_key :webhook_id, :webhooks, null: false, index: true + String :event_name, null: false, size: 255 + String :payload, text: true + Integer :attempts + String :last_error, text: true + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + end + end + + down do + drop_table?(:webhook_dead_letters) + drop_table?(:webhook_deliveries) + drop_table?(:webhooks) + end +end diff --git a/lib/legion/data/migrations/021_add_archive_tables.rb b/lib/legion/data/migrations/021_add_archive_tables.rb new file mode 100644 index 0000000..30eb147 --- /dev/null +++ b/lib/legion/data/migrations/021_add_archive_tables.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + unless table_exists?(:tasks_archive) + create_table(:tasks_archive) do + primary_key :id + Integer :original_id, null: false + String :function_name + String :status + String :runner_class + column :args, :text + column :result, :text + String :queue + Integer :relationship_id + String :chain_id + DateTime :original_created_at + DateTime :original_updated_at + DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP + index :original_id + index :chain_id + index :archived_at + end + end + + unless table_exists?(:metering_records_archive) + create_table(:metering_records_archive) do + primary_key :id + Integer :original_id, null: false + String :worker_id + String :event_type + String :extension + String :runner_function + String :status + Integer :tokens_in + Integer :tokens_out + Float :cost_usd + Integer :wall_clock_ms + Integer :cpu_time_ms + Integer :external_api_calls + String :model + String :tenant_id + DateTime :original_created_at + DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP + index :original_id + index :worker_id + index :tenant_id + index :archived_at + end + end + end + + down do + drop_table(:metering_records_archive) if table_exists?(:metering_records_archive) + drop_table(:tasks_archive) if table_exists?(:tasks_archive) + end +end diff --git a/lib/legion/data/migrations/022_add_memory_traces.rb b/lib/legion/data/migrations/022_add_memory_traces.rb new file mode 100644 index 0000000..bc43e5c --- /dev/null +++ b/lib/legion/data/migrations/022_add_memory_traces.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table(:memory_traces) do + primary_key :id + String :agent_id, null: false, size: 64, index: true + String :trace_type, null: false, size: 32 + String :content, text: true, null: false + Float :significance, default: 0.5 + Float :confidence, default: 1.0 + String :associations, text: true + String :metadata, text: true + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :accessed_at + DateTime :decayed_at + index %i[agent_id trace_type] + end + + next unless adapter_scheme == :postgres + + run 'ALTER TABLE memory_traces ADD COLUMN IF NOT EXISTS embedding vector(1536)' + run 'CREATE INDEX IF NOT EXISTS idx_memory_traces_embedding ON memory_traces USING hnsw (embedding vector_cosine_ops)' + end + + down do + drop_table?(:memory_traces) + end +end diff --git a/lib/legion/data/migrations/023_add_data_archive.rb b/lib/legion/data/migrations/023_add_data_archive.rb new file mode 100644 index 0000000..1725611 --- /dev/null +++ b/lib/legion/data/migrations/023_add_data_archive.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table(:data_archive) do + primary_key :id + String :source_table, null: false, size: 64, index: true + Integer :source_id, null: false + String :data, text: true, null: false + Integer :tier, default: 1 + DateTime :archived_at, default: Sequel::CURRENT_TIMESTAMP + index %i[source_table source_id] + index :tier + end + end + + down do + drop_table?(:data_archive) + end +end diff --git a/lib/legion/data/migrations/024_add_tenant_partition_columns.rb b/lib/legion/data/migrations/024_add_tenant_partition_columns.rb new file mode 100644 index 0000000..0c09b2d --- /dev/null +++ b/lib/legion/data/migrations/024_add_tenant_partition_columns.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + %i[tasks digital_workers audit_log memory_traces].each do |table| + next unless table_exists?(table) + next if schema(table).any? { |col, _| col == :tenant_id } + + alter_table(table) do + add_column :tenant_id, String, size: 64 + add_index :tenant_id + end + end + end + + down do + %i[tasks digital_workers audit_log memory_traces].each do |table| + next unless table_exists?(table) + next unless schema(table).any? { |col, _| col == :tenant_id } + + alter_table(table) do + drop_index :tenant_id + drop_column :tenant_id + end + end + end +end diff --git a/lib/legion/data/migrations/025_add_tenants_table.rb b/lib/legion/data/migrations/025_add_tenants_table.rb new file mode 100644 index 0000000..bb3d57c --- /dev/null +++ b/lib/legion/data/migrations/025_add_tenants_table.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table(:tenants) do + primary_key :id + String :tenant_id, null: false, unique: true, size: 100 + String :name, size: 255 + String :status, default: 'active', size: 20 + Integer :max_workers, default: 10 + Integer :max_queue_depth, default: 10_000 + Float :monthly_token_limit + Float :daily_token_limit + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP + index :status + end + end + + down do + drop_table?(:tenants) + end +end diff --git a/lib/legion/data/migrations/026_add_function_embeddings.rb b/lib/legion/data/migrations/026_add_function_embeddings.rb new file mode 100644 index 0000000..a94bf25 --- /dev/null +++ b/lib/legion/data/migrations/026_add_function_embeddings.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:functions) do + add_column :description, String, text: true, null: true + add_column :embedding, String, text: true, null: true + end + + next unless adapter_scheme == :postgres + + run 'ALTER TABLE functions ADD COLUMN IF NOT EXISTS embedding_vector vector(1536)' + run 'CREATE INDEX IF NOT EXISTS idx_functions_embedding ON functions USING hnsw (embedding_vector vector_cosine_ops)' + end + + down do + alter_table(:functions) do + drop_column :embedding + drop_column :description + end + + if adapter_scheme == :postgres + run 'DROP INDEX IF EXISTS idx_functions_embedding' + run 'ALTER TABLE functions DROP COLUMN IF EXISTS embedding_vector' + end + end +end diff --git a/lib/legion/data/migrations/027_add_apollo_source_provider.rb b/lib/legion/data/migrations/027_add_apollo_source_provider.rb new file mode 100644 index 0000000..f304a56 --- /dev/null +++ b/lib/legion/data/migrations/027_add_apollo_source_provider.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + alter_table(:apollo_entries) do + add_column :source_provider, String, size: 50, null: true + end + + run "UPDATE apollo_entries SET source_provider = 'unknown' WHERE source_provider IS NULL" + end + + down do + next unless adapter_scheme == :postgres + + alter_table(:apollo_entries) do + drop_column :source_provider + end + end +end diff --git a/lib/legion/data/migrations/028_add_agent_cluster.rb b/lib/legion/data/migrations/028_add_agent_cluster.rb new file mode 100644 index 0000000..ae282b5 --- /dev/null +++ b/lib/legion/data/migrations/028_add_agent_cluster.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next if table_exists?(:agent_cluster_nodes) + + create_table(:agent_cluster_nodes) do + primary_key :id + String :node_id, null: false, unique: true + String :role, null: false, default: 'worker' + String :status, null: false, default: 'active' + DateTime :joined_at, null: false + DateTime :last_seen + String :tenant_id + index :status + index :tenant_id + end + end + + down do + drop_table?(:agent_cluster_nodes) + end +end diff --git a/lib/legion/data/migrations/029_add_agent_cluster_tasks.rb b/lib/legion/data/migrations/029_add_agent_cluster_tasks.rb new file mode 100644 index 0000000..a917452 --- /dev/null +++ b/lib/legion/data/migrations/029_add_agent_cluster_tasks.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next if table_exists?(:agent_cluster_tasks) + + create_table(:agent_cluster_tasks) do + primary_key :id + String :task_type, null: false + Text :payload + String :assigned_to + String :status, null: false, default: 'pending' + DateTime :created_at, null: false + DateTime :completed_at + String :tenant_id + index :status + index :assigned_to + index :tenant_id + end + end + + down do + drop_table?(:agent_cluster_tasks) + end +end diff --git a/lib/legion/data/migrations/030_add_approval_queue.rb b/lib/legion/data/migrations/030_add_approval_queue.rb new file mode 100644 index 0000000..909dbf5 --- /dev/null +++ b/lib/legion/data/migrations/030_add_approval_queue.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next if table_exists?(:approval_queue) + + create_table(:approval_queue) do + primary_key :id + String :approval_type, null: false + Text :payload + String :requester_id, null: false + String :status, null: false, default: 'pending' + String :reviewer_id + DateTime :reviewed_at + DateTime :created_at, null: false + String :tenant_id + index :status + index :tenant_id + index :created_at + end + end + + down do + drop_table?(:approval_queue) + end +end diff --git a/lib/legion/data/migrations/031_add_task_depth.rb b/lib/legion/data/migrations/031_add_task_depth.rb new file mode 100644 index 0000000..cc24e2b --- /dev/null +++ b/lib/legion/data/migrations/031_add_task_depth.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:tasks) + next if schema(:tasks).any? { |col, _| col == :depth } + + alter_table(:tasks) do + add_column :depth, Integer, default: 0, null: false + end + end + + down do + next unless table_exists?(:tasks) + next unless schema(:tasks).any? { |col, _| col == :depth } + + alter_table(:tasks) do + drop_column :depth + end + end +end diff --git a/lib/legion/data/migrations/032_add_task_cancelled_at.rb b/lib/legion/data/migrations/032_add_task_cancelled_at.rb new file mode 100644 index 0000000..6d00171 --- /dev/null +++ b/lib/legion/data/migrations/032_add_task_cancelled_at.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:tasks) + next if schema(:tasks).any? { |col, _| col == :cancelled_at } + + alter_table(:tasks) do + add_column :cancelled_at, DateTime, null: true + end + end + + down do + next unless table_exists?(:tasks) + next unless schema(:tasks).any? { |col, _| col == :cancelled_at } + + alter_table(:tasks) do + drop_column :cancelled_at + end + end +end diff --git a/lib/legion/data/migrations/033_add_task_delay.rb b/lib/legion/data/migrations/033_add_task_delay.rb new file mode 100644 index 0000000..adf9fa2 --- /dev/null +++ b/lib/legion/data/migrations/033_add_task_delay.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:tasks) + next if schema(:tasks).any? { |col, _| col == :task_delay } + + alter_table(:tasks) do + add_column :task_delay, Integer, null: true + end + end + + down do + next unless table_exists?(:tasks) + next unless schema(:tasks).any? { |col, _| col == :task_delay } + + alter_table(:tasks) do + drop_column :task_delay + end + end +end diff --git a/lib/legion/data/migrations/034_add_archive_manifest.rb b/lib/legion/data/migrations/034_add_archive_manifest.rb new file mode 100644 index 0000000..abdd37e --- /dev/null +++ b/lib/legion/data/migrations/034_add_archive_manifest.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless [:postgres].include?(adapter_scheme) + next if table_exists?(:archive_manifest) + + create_table(:archive_manifest) do + primary_key :id + String :batch_id, null: false, unique: true + String :source_table, null: false + Integer :row_count, null: false + String :checksum, null: false + String :storage_path, null: false + DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP + column :metadata, :jsonb + + index :source_table + index :archived_at + end + end + + down do + next unless [:postgres].include?(adapter_scheme) + + drop_table(:archive_manifest) if table_exists?(:archive_manifest) + end +end diff --git a/lib/legion/data/migrations/035_add_apollo_source_channel.rb b/lib/legion/data/migrations/035_add_apollo_source_channel.rb new file mode 100644 index 0000000..d13e346 --- /dev/null +++ b/lib/legion/data/migrations/035_add_apollo_source_channel.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + alter_table(:apollo_entries) do + add_column :source_channel, String, size: 100, null: true + end + end + + down do + next unless adapter_scheme == :postgres + + alter_table(:apollo_entries) do + drop_column :source_channel + end + end +end diff --git a/lib/legion/data/migrations/036_add_audit_context_snapshot.rb b/lib/legion/data/migrations/036_add_audit_context_snapshot.rb new file mode 100644 index 0000000..f899778 --- /dev/null +++ b/lib/legion/data/migrations/036_add_audit_context_snapshot.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:audit_log) do + add_column :context_snapshot, :text, null: true + end + end + + down do + alter_table(:audit_log) do + drop_column :context_snapshot + end + end +end diff --git a/lib/legion/data/migrations/037_add_apollo_knowledge_domain.rb b/lib/legion/data/migrations/037_add_apollo_knowledge_domain.rb new file mode 100644 index 0000000..0775c04 --- /dev/null +++ b/lib/legion/data/migrations/037_add_apollo_knowledge_domain.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + alter_table(:apollo_entries) do + add_column :knowledge_domain, String, size: 50, default: 'general' + end + + add_index :apollo_entries, :knowledge_domain + end + + down do + next unless adapter_scheme == :postgres + + drop_index :apollo_entries, :knowledge_domain + alter_table(:apollo_entries) do + drop_column :knowledge_domain + end + end +end diff --git a/lib/legion/data/migrations/038_add_conversations.rb b/lib/legion/data/migrations/038_add_conversations.rb new file mode 100644 index 0000000..3232bf7 --- /dev/null +++ b/lib/legion/data/migrations/038_add_conversations.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table(:conversations) do + String :id, primary_key: true, size: 64 + String :caller_identity, size: 255 + String :metadata, text: true + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP + end + + create_table(:conversation_messages) do + primary_key :id + String :conversation_id, size: 64, null: false + Integer :seq, null: false + String :role, size: 32, null: false + String :content, text: true + String :provider, size: 64 + String :model, size: 128 + Integer :input_tokens + Integer :output_tokens + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + + index %i[conversation_id seq], unique: true + foreign_key [:conversation_id], :conversations, key: :id + end + end + + down do + drop_table(:conversation_messages) + drop_table(:conversations) + end +end diff --git a/lib/legion/data/migrations/039_add_audit_archive_manifest.rb b/lib/legion/data/migrations/039_add_audit_archive_manifest.rb new file mode 100644 index 0000000..be2095e --- /dev/null +++ b/lib/legion/data/migrations/039_add_audit_archive_manifest.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + unless table_exists?(:audit_archive_manifests) + create_table(:audit_archive_manifests) do + primary_key :id + String :tier, null: false, size: 10 # hot, warm, cold + String :storage_url, null: false, size: 2000 + DateTime :start_date, null: false + DateTime :end_date, null: false + Integer :entry_count, null: false + String :checksum, null: false, size: 64 # SHA-256 hex + String :first_hash, null: false, size: 64 # record_hash of first entry + String :last_hash, null: false, size: 64 # record_hash of last entry + DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :tier + index :archived_at + index %i[start_date end_date] + end + end + end + + down do + drop_table(:audit_archive_manifests) if table_exists?(:audit_archive_manifests) + end +end diff --git a/lib/legion/data/migrations/040_add_slow_query_indexes.rb b/lib/legion/data/migrations/040_add_slow_query_indexes.rb new file mode 100644 index 0000000..43448a9 --- /dev/null +++ b/lib/legion/data/migrations/040_add_slow_query_indexes.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + # tasks.created — used by tasker check_subtask time-range scans + next unless table_exists?(:tasks) + + alter_table(:tasks) do + add_index :created, name: :idx_tasks_created, if_not_exists: true + add_index %i[status function_id relationship_id], name: :idx_tasks_status_func_rel, if_not_exists: true + end + end + + down do + next unless table_exists?(:tasks) + + alter_table(:tasks) do + drop_index :created, name: :idx_tasks_created, if_exists: true + drop_index %i[status function_id relationship_id], name: :idx_tasks_status_func_rel, if_exists: true + end + end +end diff --git a/lib/legion/data/migrations/041_resize_vector_columns.rb b/lib/legion/data/migrations/041_resize_vector_columns.rb new file mode 100644 index 0000000..73d1308 --- /dev/null +++ b/lib/legion/data/migrations/041_resize_vector_columns.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + # Resize embedding columns from 1536 to 1024 for cross-provider compatibility + # (Bedrock Titan v2, OpenAI with dimensions:, Ollama mxbai-embed-large all support 1024) + # Knowledge store is empty so no data re-embedding needed. + + if table_exists?(:apollo_entries) + run 'DROP INDEX IF EXISTS idx_apollo_entries_embedding' + run 'ALTER TABLE apollo_entries ALTER COLUMN embedding TYPE vector(1024)' + run 'CREATE INDEX idx_apollo_entries_embedding ON apollo_entries USING hnsw (embedding vector_cosine_ops)' + end + + if table_exists?(:functions) + run 'DROP INDEX IF EXISTS idx_functions_embedding' + run 'ALTER TABLE functions ALTER COLUMN embedding_vector TYPE vector(1024)' + run 'CREATE INDEX idx_functions_embedding ON functions USING hnsw (embedding_vector vector_cosine_ops)' + end + + if table_exists?(:memory_traces) + run 'DROP INDEX IF EXISTS idx_memory_traces_embedding' + run 'ALTER TABLE memory_traces ALTER COLUMN embedding TYPE vector(1024)' + run 'CREATE INDEX idx_memory_traces_embedding ON memory_traces USING hnsw (embedding vector_cosine_ops)' + end + end + + down do + next unless adapter_scheme == :postgres + + if table_exists?(:apollo_entries) + run 'DROP INDEX IF EXISTS idx_apollo_entries_embedding' + run 'ALTER TABLE apollo_entries ALTER COLUMN embedding TYPE vector(1536)' + run 'CREATE INDEX idx_apollo_entries_embedding ON apollo_entries USING hnsw (embedding vector_cosine_ops)' + end + + if table_exists?(:functions) + run 'DROP INDEX IF EXISTS idx_functions_embedding' + run 'ALTER TABLE functions ALTER COLUMN embedding_vector TYPE vector(1536)' + run 'CREATE INDEX idx_functions_embedding ON functions USING hnsw (embedding_vector vector_cosine_ops)' + end + + if table_exists?(:memory_traces) + run 'DROP INDEX IF EXISTS idx_memory_traces_embedding' + run 'ALTER TABLE memory_traces ALTER COLUMN embedding TYPE vector(1536)' + run 'CREATE INDEX idx_memory_traces_embedding ON memory_traces USING hnsw (embedding vector_cosine_ops)' + end + end +end diff --git a/lib/legion/data/migrations/042_add_tenant_to_registry_tables.rb b/lib/legion/data/migrations/042_add_tenant_to_registry_tables.rb new file mode 100644 index 0000000..10d0034 --- /dev/null +++ b/lib/legion/data/migrations/042_add_tenant_to_registry_tables.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + %i[extensions functions runners nodes settings value_metrics].each do |table| + next unless table_exists?(table) + next if schema(table).any? { |col, _| col == :tenant_id } + + alter_table(table) do + add_column :tenant_id, String, size: 64 + add_index :tenant_id, name: :"idx_#{table}_tenant_id" + end + end + end + + down do + %i[extensions functions runners nodes settings value_metrics].each do |table| + next unless table_exists?(table) + next unless schema(table).any? { |col, _| col == :tenant_id } + + alter_table(table) do + drop_index :tenant_id, name: :"idx_#{table}_tenant_id" + drop_column :tenant_id + end + end + end +end diff --git a/lib/legion/data/migrations/043_add_rls_placeholder.rb b/lib/legion/data/migrations/043_add_rls_placeholder.rb new file mode 100644 index 0000000..2a7c3c7 --- /dev/null +++ b/lib/legion/data/migrations/043_add_rls_placeholder.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + tables = %i[ + tasks digital_workers audit_log memory_traces extensions + functions runners nodes settings value_metrics + ] + + tables.each do |table| + next unless table_exists?(table) + next unless schema(table).any? { |col, _| col == :tenant_id } + + run "ALTER TABLE #{table} ENABLE ROW LEVEL SECURITY" + run <<~SQL + DO $$ BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_policies WHERE tablename = '#{table}' AND policyname = 'tenant_isolation_#{table}' + ) THEN + CREATE POLICY tenant_isolation_#{table} ON #{table} + USING (tenant_id = current_setting('app.current_tenant', true)); + END IF; + END $$; + SQL + end + end + + down do + next unless adapter_scheme == :postgres + + tables = %i[ + tasks digital_workers audit_log memory_traces extensions + functions runners nodes settings value_metrics + ] + + tables.each do |table| + next unless table_exists?(table) + + run "DROP POLICY IF EXISTS tenant_isolation_#{table} ON #{table}" + run "ALTER TABLE #{table} DISABLE ROW LEVEL SECURITY" + end + end +end diff --git a/lib/legion/data/migrations/044_expand_memory_traces.rb b/lib/legion/data/migrations/044_expand_memory_traces.rb new file mode 100644 index 0000000..7407bec --- /dev/null +++ b/lib/legion/data/migrations/044_expand_memory_traces.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:memory_traces) + + existing = schema(:memory_traces).map(&:first) + + alter_table(:memory_traces) do + add_column :trace_id, String, size: 36 unless existing.include?(:trace_id) + add_column :strength, Float, default: 0.5 unless existing.include?(:strength) + add_column :peak_strength, Float, default: 0.5 unless existing.include?(:peak_strength) + add_column :base_decay_rate, Float, default: 0.05 unless existing.include?(:base_decay_rate) + add_column :emotional_valence, Float, default: 0.0 unless existing.include?(:emotional_valence) + add_column :emotional_intensity, Float, default: 0.0 unless existing.include?(:emotional_intensity) + add_column :domain_tags, :text unless existing.include?(:domain_tags) + add_column :origin, String, size: 50 unless existing.include?(:origin) + add_column :source_agent_id, String, size: 255 unless existing.include?(:source_agent_id) + add_column :storage_tier, String, size: 10, default: 'warm' unless existing.include?(:storage_tier) + add_column :last_reinforced, DateTime unless existing.include?(:last_reinforced) + add_column :last_decayed, DateTime unless existing.include?(:last_decayed) + add_column :reinforcement_count, Integer, default: 0 unless existing.include?(:reinforcement_count) + add_column :unresolved, TrueClass, default: false unless existing.include?(:unresolved) + add_column :consolidation_candidate, TrueClass, default: false unless existing.include?(:consolidation_candidate) + add_column :parent_trace_id, String, size: 36 unless existing.include?(:parent_trace_id) + add_column :encryption_key_id, String, size: 255 unless existing.include?(:encryption_key_id) + add_column :partition_id, String, size: 255 unless existing.include?(:partition_id) + end + + indexes = begin + db.indexes(:memory_traces).keys + rescue StandardError => e + if defined?(Legion::Data) && Legion::Data.respond_to?(:handle_exception) + Legion::Data.handle_exception(e, level: :warn, handled: true, operation: :migration_044_indexes) + end + [] + end + + add_index :memory_traces, :trace_id, unique: true, name: :idx_memory_traces_trace_id unless existing.include?(:trace_id) + + add_index :memory_traces, :storage_tier, name: :idx_memory_traces_storage_tier unless indexes.include?(:idx_memory_traces_storage_tier) + add_index :memory_traces, :partition_id, name: :idx_memory_traces_partition_id unless indexes.include?(:idx_memory_traces_partition_id) + add_index :memory_traces, %i[partition_id trace_type], name: :idx_memory_traces_partition_type unless indexes.include?(:idx_memory_traces_partition_type) + add_index :memory_traces, :unresolved, name: :idx_memory_traces_unresolved unless indexes.include?(:idx_memory_traces_unresolved) + end + + down do + next unless table_exists?(:memory_traces) + + existing = schema(:memory_traces).map(&:first) + + %i[trace_id strength peak_strength base_decay_rate emotional_valence emotional_intensity + domain_tags origin source_agent_id storage_tier last_reinforced last_decayed + reinforcement_count unresolved consolidation_candidate parent_trace_id + encryption_key_id partition_id].each do |col| + alter_table(:memory_traces) { drop_column col } if existing.include?(col) + end + end +end diff --git a/lib/legion/data/migrations/045_add_memory_associations.rb b/lib/legion/data/migrations/045_add_memory_associations.rb new file mode 100644 index 0000000..815b153 --- /dev/null +++ b/lib/legion/data/migrations/045_add_memory_associations.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next if table_exists?(:memory_associations) + + create_table(:memory_associations) do + primary_key :id + String :trace_id_a, size: 36, null: false + String :trace_id_b, size: 36, null: false + Integer :coactivation_count, default: 1, null: false + TrueClass :linked, default: false, null: false + String :tenant_id, size: 64 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP + + unique %i[trace_id_a trace_id_b] + index :trace_id_a + index :trace_id_b + index :tenant_id + end + end + + down do + drop_table?(:memory_associations) + end +end diff --git a/lib/legion/data/migrations/046_add_metering_hourly_rollup.rb b/lib/legion/data/migrations/046_add_metering_hourly_rollup.rb new file mode 100644 index 0000000..75863c3 --- /dev/null +++ b/lib/legion/data/migrations/046_add_metering_hourly_rollup.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next if table_exists?(:metering_hourly_rollup) + + create_table(:metering_hourly_rollup) do + primary_key :id + String :worker_id, size: 36, null: false + String :provider, size: 100, null: false + String :model_id, size: 255, null: false + DateTime :hour, null: false + Integer :total_input_tokens, default: 0, null: false + Integer :total_output_tokens, default: 0, null: false + Integer :total_thinking_tokens, default: 0, null: false + Integer :total_calls, default: 0, null: false + Float :total_cost_usd, default: 0.0, null: false + Float :avg_latency_ms, default: 0.0, null: false + String :tenant_id, size: 64 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + + unique %i[worker_id provider model_id hour], name: :idx_rollup_unique_hour + index :hour + index :tenant_id + index %i[worker_id hour] + end + end + + down do + drop_table?(:metering_hourly_rollup) + end +end diff --git a/lib/legion/data/migrations/047_apollo_knowledge_capture.rb b/lib/legion/data/migrations/047_apollo_knowledge_capture.rb new file mode 100644 index 0000000..3548066 --- /dev/null +++ b/lib/legion/data/migrations/047_apollo_knowledge_capture.rb @@ -0,0 +1,152 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + # --- Identity columns on apollo_entries --- + alter_table(:apollo_entries) do + add_column :submitted_by, String, size: 255 + add_column :submitted_from, String, size: 255 + add_column :content_hash, String, fixed: true, size: 32 + end + + # --- apollo_operations table --- + run <<~SQL + CREATE TABLE IF NOT EXISTS apollo_operations ( + id BIGSERIAL PRIMARY KEY, + operation VARCHAR(50) NOT NULL, + actor VARCHAR(100) NOT NULL, + target_type VARCHAR(50), + target_ids INTEGER[], + summary TEXT, + detail JSONB, + old_state JSONB, + new_state JSONB, + reason TEXT, + principal_id VARCHAR(255), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ); + SQL + + # --- apollo_entries_archive table --- + run <<~SQL + CREATE TABLE IF NOT EXISTS apollo_entries_archive ( + LIKE apollo_entries INCLUDING ALL, + archived_at TIMESTAMPTZ DEFAULT NOW(), + archive_reason TEXT + ); + SQL + + # --- Indexes: apollo_entries --- + run 'CREATE INDEX IF NOT EXISTS idx_apollo_submitted_by ON apollo_entries (submitted_by);' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_submitted_from ON apollo_entries (submitted_from);' + + # Content hash dedup (unique among active entries only) + run <<~SQL + CREATE UNIQUE INDEX IF NOT EXISTS idx_apollo_content_hash + ON apollo_entries (content_hash) + WHERE status != 'archived'; + SQL + + # Status filtering (every read query filters on status) + run 'CREATE INDEX IF NOT EXISTS idx_apollo_status ON apollo_entries (status);' + + # Partial index: active entries only (hot path) + run <<~SQL + CREATE INDEX IF NOT EXISTS idx_apollo_active + ON apollo_entries (id) + WHERE status IN ('candidate', 'confirmed', 'disputed'); + SQL + + # Confidence ranking and decay targeting + run 'CREATE INDEX IF NOT EXISTS idx_apollo_confidence ON apollo_entries (confidence);' + + # Time-based: decay age, archival sweep + run 'CREATE INDEX IF NOT EXISTS idx_apollo_created ON apollo_entries (created_at);' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_updated ON apollo_entries (updated_at);' + + # Composite: decay cycle targets + run <<~SQL + CREATE INDEX IF NOT EXISTS idx_apollo_decay_target + ON apollo_entries (updated_at) + WHERE status != 'archived'; + SQL + + # Composite: corroboration targets + run <<~SQL + CREATE INDEX IF NOT EXISTS idx_apollo_candidates + ON apollo_entries (status, source_provider, source_channel) + WHERE status = 'candidate' AND embedding IS NOT NULL; + SQL + + # Knowledge domain (expertise, RBAC) + run 'CREATE INDEX IF NOT EXISTS idx_apollo_domain ON apollo_entries (knowledge_domain);' + + # Source agent (expertise aggregation) + run 'CREATE INDEX IF NOT EXISTS idx_apollo_source_agent ON apollo_entries (source_agent);' + + # Drop existing HNSW index and recreate as partial (active entries only) + run 'DROP INDEX IF EXISTS apollo_entries_embedding_idx;' + run <<~SQL + CREATE INDEX IF NOT EXISTS idx_apollo_embedding_active + ON apollo_entries USING hnsw (embedding vector_cosine_ops) + WITH (m = 16, ef_construction = 64) + WHERE status IN ('candidate', 'confirmed', 'disputed'); + SQL + + # --- Indexes: apollo_relations --- + run 'CREATE INDEX IF NOT EXISTS idx_apollo_rel_from ON apollo_relations (from_entry_id);' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_rel_to ON apollo_relations (to_entry_id);' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_rel_type ON apollo_relations (relation_type);' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_rel_composite ON apollo_relations (from_entry_id, relation_type);' + + # --- Indexes: apollo_expertise --- + run 'CREATE INDEX IF NOT EXISTS idx_apollo_exp_agent ON apollo_expertise (agent_id);' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_exp_domain ON apollo_expertise (domain);' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_exp_composite ON apollo_expertise (agent_id, domain);' + + # --- Indexes: apollo_operations --- + run 'CREATE INDEX IF NOT EXISTS idx_apollo_ops_created ON apollo_operations (created_at);' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_ops_operation ON apollo_operations (operation);' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_ops_actor ON apollo_operations (actor);' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_ops_target ON apollo_operations USING GIN (target_ids);' + + # --- Indexes: apollo_entries_archive --- + run 'CREATE INDEX IF NOT EXISTS idx_archive_content_hash ON apollo_entries_archive (content_hash);' + run 'CREATE INDEX IF NOT EXISTS idx_archive_source_agent ON apollo_entries_archive (source_agent);' + run 'CREATE INDEX IF NOT EXISTS idx_archive_archived_at ON apollo_entries_archive (archived_at);' + end + + down do + next unless adapter_scheme == :postgres + + # Restore original HNSW index (non-partial) + run 'DROP INDEX IF EXISTS idx_apollo_embedding_active;' + run <<~SQL + CREATE INDEX IF NOT EXISTS apollo_entries_embedding_idx + ON apollo_entries USING hnsw (embedding vector_cosine_ops); + SQL + + drop_table?(:apollo_entries_archive) + drop_table?(:apollo_operations) + + # Drop new indexes + %w[ + idx_apollo_submitted_by idx_apollo_submitted_from idx_apollo_content_hash + idx_apollo_status idx_apollo_active idx_apollo_confidence + idx_apollo_created idx_apollo_updated idx_apollo_decay_target + idx_apollo_candidates idx_apollo_domain idx_apollo_source_agent + idx_apollo_rel_from idx_apollo_rel_to idx_apollo_rel_type idx_apollo_rel_composite + idx_apollo_exp_agent idx_apollo_exp_domain idx_apollo_exp_composite + idx_apollo_ops_created idx_apollo_ops_operation idx_apollo_ops_actor idx_apollo_ops_target + idx_archive_content_hash idx_archive_source_agent idx_archive_archived_at + ].each { |idx| run "DROP INDEX IF EXISTS #{idx};" } + + alter_table(:apollo_entries) do + drop_column :content_hash + drop_column :submitted_from + drop_column :submitted_by + end + end +end diff --git a/lib/legion/data/migrations/048_add_financial_logging.rb b/lib/legion/data/migrations/048_add_financial_logging.rb new file mode 100644 index 0000000..b8e6bef --- /dev/null +++ b/lib/legion/data/migrations/048_add_financial_logging.rb @@ -0,0 +1,188 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + # 1. Identity — who owns the cost (worker, owner, cost center) + unless table_exists?(:finlog_identities) + create_table(:finlog_identities) do + primary_key :id + String :worker_id, size: 36, null: false + String :owner_msid, size: 64, null: false + String :owner_name, size: 255 + String :team, size: 255 + String :cost_center, size: 64 + String :department, size: 255 + String :business_segment, size: 64 + String :tenant_id, size: 64 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP + + unique :worker_id, name: :idx_finlog_ident_worker + index :owner_msid, name: :idx_finlog_ident_owner + index :cost_center, name: :idx_finlog_ident_cost_center + index :tenant_id, name: :idx_finlog_ident_tenant + end + end + + # 2. Asset — what Entra app / service principal generated the cost + unless table_exists?(:finlog_assets) + create_table(:finlog_assets) do + primary_key :id + String :worker_id, size: 36, null: false + String :entra_app_id, size: 36 + String :entra_object_id, size: 36 + String :asset_name, size: 255, null: false + String :asset_type, size: 64, null: false, default: 'extension' + String :extension_name, size: 128 + String :risk_tier, size: 32 + String :tenant_id, size: 64 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP + + index :worker_id, name: :idx_finlog_asset_worker + index :entra_app_id, name: :idx_finlog_asset_entra + index :asset_type, name: :idx_finlog_asset_type + index :tenant_id, name: :idx_finlog_asset_tenant + end + end + + # 3. Environment — where the cost was incurred (cloud, region, account) + unless table_exists?(:finlog_environments) + create_table(:finlog_environments) do + primary_key :id + String :csp, size: 16, null: false + String :account_id, size: 64, null: false + String :account_name, size: 255 + String :askid, size: 64 + String :region, size: 64 + String :environment, size: 32, default: 'prod' + String :subscription_id, size: 64 + String :resource_group, size: 255 + String :tenant_id, size: 64 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + + index :csp, name: :idx_finlog_env_csp + index :account_id, name: :idx_finlog_env_account + index :askid, name: :idx_finlog_env_askid + index %i[csp region], name: :idx_finlog_env_csp_region + index :tenant_id, name: :idx_finlog_env_tenant + end + end + + # 4. Accounting — how the cost is classified financially + unless table_exists?(:finlog_accounting) + create_table(:finlog_accounting) do + primary_key :id + String :execution_id, size: 36, null: false + String :aide_id, size: 64 + String :ucmg_id, size: 64 + String :billing_group, size: 128 + String :funding_source, size: 128 + String :classification, size: 16, null: false, default: 'expense' + Float :recovery_ratio, default: 2.0 + Float :rate_card_multiplier, default: 1.28 + Float :provider_discount, default: 1.0 + Float :chargeback_amount, default: 0.0 + String :tenant_id, size: 64 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + + index :execution_id, name: :idx_finlog_acct_exec + index :aide_id, name: :idx_finlog_acct_aide + index :ucmg_id, name: :idx_finlog_acct_ucmg + index :billing_group, name: :idx_finlog_acct_billing + index :classification, name: :idx_finlog_acct_class + index :tenant_id, name: :idx_finlog_acct_tenant + end + end + + # 5. Execution — per-request/task execution record (central fact table) + unless table_exists?(:finlog_executions) + create_table(:finlog_executions) do + primary_key :id + String :execution_id, size: 36, null: false + String :worker_id, size: 36, null: false + Integer :task_id + String :request_id, size: 64 + String :provider, size: 100, null: false + String :model_id, size: 255, null: false + Integer :input_tokens, default: 0 + Integer :output_tokens, default: 0 + Integer :thinking_tokens, default: 0 + Float :latency_ms, default: 0.0 + Float :raw_cost_usd, default: 0.0, null: false + Float :discounted_cost_usd, default: 0.0 + Float :chargeback_usd, default: 0.0 + String :status, size: 32, default: 'completed' + Integer :environment_id + String :tenant_id, size: 64 + DateTime :started_at + DateTime :completed_at + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + + unique :execution_id, name: :idx_finlog_exec_id + index :worker_id, name: :idx_finlog_exec_worker + index :task_id, name: :idx_finlog_exec_task + index :provider, name: :idx_finlog_exec_provider + index :model_id, name: :idx_finlog_exec_model + index :status, name: :idx_finlog_exec_status + index :created_at, name: :idx_finlog_exec_created + index %i[worker_id created_at], name: :idx_finlog_exec_worker_time + index %i[provider model_id created_at], name: :idx_finlog_exec_prov_model_time + index :tenant_id, name: :idx_finlog_exec_tenant + end + end + + # 6. Tags — flexible key-value metadata for cost events + unless table_exists?(:finlog_tags) + create_table(:finlog_tags) do + primary_key :id + String :execution_id, size: 36, null: false + String :tag_key, size: 128, null: false + String :tag_value, size: 512, null: false + String :tenant_id, size: 64 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + + index :execution_id, name: :idx_finlog_tag_exec + index :tag_key, name: :idx_finlog_tag_key + index %i[execution_id tag_key], name: :idx_finlog_tag_exec_key, unique: true + index :tenant_id, name: :idx_finlog_tag_tenant + end + end + + # 7. Usage — aggregated consumption data (daily rollup) + unless table_exists?(:finlog_usages) + create_table(:finlog_usages) do + primary_key :id + String :worker_id, size: 36, null: false + DateTime :period_start, null: false + DateTime :period_end, null: false + String :provider, size: 100, null: false + String :model_id, size: 255, null: false + Integer :total_requests, default: 0, null: false + Integer :total_input_tokens, default: 0, null: false + Integer :total_output_tokens, default: 0, null: false + Integer :total_thinking_tokens, default: 0, null: false + Float :total_raw_cost_usd, default: 0.0, null: false + Float :total_discounted_cost_usd, default: 0.0, null: false + Float :total_chargeback_usd, default: 0.0, null: false + String :tenant_id, size: 64 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + + unique %i[worker_id provider model_id period_start], name: :idx_finlog_usage_unique + index :period_start, name: :idx_finlog_usage_period + index %i[worker_id period_start], name: :idx_finlog_usage_worker_period + index :tenant_id, name: :idx_finlog_usage_tenant + end + end + end + + down do + drop_table?(:finlog_usages) + drop_table?(:finlog_tags) + drop_table?(:finlog_executions) + drop_table?(:finlog_accounting) + drop_table?(:finlog_environments) + drop_table?(:finlog_assets) + drop_table?(:finlog_identities) + end +end diff --git a/lib/legion/data/migrations/049_add_remote_invocable_to_functions.rb b/lib/legion/data/migrations/049_add_remote_invocable_to_functions.rb new file mode 100644 index 0000000..19425c9 --- /dev/null +++ b/lib/legion/data/migrations/049_add_remote_invocable_to_functions.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:functions) + next if schema(:functions).any? { |c, _| c == :remote_invocable } + + alter_table(:functions) do + add_column :remote_invocable, TrueClass, default: true, null: false + end + end + + down do + next unless table_exists?(:functions) + next unless schema(:functions).any? { |c, _| c == :remote_invocable } + + alter_table(:functions) do + drop_column :remote_invocable + end + end +end diff --git a/lib/legion/data/migrations/050_add_missing_indexes.rb b/lib/legion/data/migrations/050_add_missing_indexes.rb new file mode 100644 index 0000000..04f91b4 --- /dev/null +++ b/lib/legion/data/migrations/050_add_missing_indexes.rb @@ -0,0 +1,177 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + # runners: FK without index, hot-path lookups, duplicate prevention + if table_exists?(:runners) + # Remove any duplicate (extension_id, name) rows before adding the unique index. + # Keep the active and most recently updated row per pair; use id DESC as tie-breaker. + run <<~SQL + WITH ranked AS ( + SELECT + id, + ROW_NUMBER() OVER ( + PARTITION BY extension_id, name + ORDER BY + active DESC, + updated DESC, + id DESC + ) AS rn + FROM runners + ) + DELETE FROM runners + WHERE id IN ( + SELECT id + FROM ranked + WHERE rn > 1 + ) + SQL + + alter_table(:runners) do + add_index :extension_id, name: :idx_runners_extension_id, if_not_exists: true + add_index :namespace, name: :idx_runners_namespace, if_not_exists: true + add_index :name, name: :idx_runners_name, if_not_exists: true + add_index %i[extension_id name], name: :idx_runners_extension_name, unique: true, if_not_exists: true + end + end + + # tasks: plain Integer relationship_id used by ORM association + if table_exists?(:tasks) + alter_table(:tasks) do + add_index :relationship_id, name: :idx_tasks_relationship_id, if_not_exists: true + end + end + + # digital_workers: consent/trust-based queries + if table_exists?(:digital_workers) + alter_table(:digital_workers) do + add_index :consent_tier, name: :idx_digital_workers_consent_tier, if_not_exists: true + add_index :trust_score, name: :idx_digital_workers_trust_score, if_not_exists: true + end + end + + # audit_log: composite principal+time query, action/node lookups + if table_exists?(:audit_log) + alter_table(:audit_log) do + add_index %i[principal_id created_at], name: :idx_audit_log_principal_time, if_not_exists: true + add_index :action, name: :idx_audit_log_action, if_not_exists: true + add_index :node, name: :idx_audit_log_node, if_not_exists: true + end + end + + # webhook_deliveries: event/time/success filtering + if table_exists?(:webhook_deliveries) + alter_table(:webhook_deliveries) do + add_index :event_name, name: :idx_webhook_deliveries_event_name, if_not_exists: true + add_index :delivered_at, name: :idx_webhook_deliveries_delivered_at, if_not_exists: true + add_index :success, name: :idx_webhook_deliveries_success, if_not_exists: true + end + end + + # webhook_dead_letters: event/time filtering + if table_exists?(:webhook_dead_letters) + alter_table(:webhook_dead_letters) do + add_index :event_name, name: :idx_webhook_dead_letters_event_name, if_not_exists: true + add_index :created_at, name: :idx_webhook_dead_letters_created_at, if_not_exists: true + end + end + + # conversations: identity and recency lookups + if table_exists?(:conversations) + alter_table(:conversations) do + add_index :caller_identity, name: :idx_conversations_caller_identity, if_not_exists: true + add_index :updated_at, name: :idx_conversations_updated_at, if_not_exists: true + end + end + + # approval_queue: requester/reviewer lookups + if table_exists?(:approval_queue) + alter_table(:approval_queue) do + add_index :requester_id, name: :idx_approval_queue_requester_id, if_not_exists: true + add_index :reviewer_id, name: :idx_approval_queue_reviewer_id, if_not_exists: true + end + end + + # rbac_role_assignments: role and expiry lookups + if table_exists?(:rbac_role_assignments) + alter_table(:rbac_role_assignments) do + add_index :role, name: :idx_rbac_role_assignments_role, if_not_exists: true + add_index :expires_at, name: :idx_rbac_role_assignments_expires_at, if_not_exists: true + end + end + + # rbac_cross_team_grants: target team and expiry lookups + if table_exists?(:rbac_cross_team_grants) + alter_table(:rbac_cross_team_grants) do + add_index :target_team, name: :idx_rbac_cross_team_grants_target_team, if_not_exists: true + add_index :expires_at, name: :idx_rbac_cross_team_grants_expires_at, if_not_exists: true + end + end + + # memory_traces: consolidation and source agent lookups + if table_exists?(:memory_traces) + existing_cols = schema(:memory_traces).map(&:first) + + if existing_cols.include?(:consolidation_candidate) + alter_table(:memory_traces) do + add_index :consolidation_candidate, name: :idx_memory_traces_consolidation, if_not_exists: true + end + end + + if existing_cols.include?(:source_agent_id) + alter_table(:memory_traces) do + add_index :source_agent_id, name: :idx_memory_traces_source_agent_id, if_not_exists: true + end + end + end + + # agent_cluster_tasks: time-based querying + if table_exists?(:agent_cluster_tasks) + alter_table(:agent_cluster_tasks) do + add_index :created_at, name: :idx_agent_cluster_tasks_created_at, if_not_exists: true + end + end + + # finlog_executions: environment_id FK without index + if table_exists?(:finlog_executions) + alter_table(:finlog_executions) do + add_index :environment_id, name: :idx_finlog_exec_environment_id, if_not_exists: true + end + end + end + + down do + [ + [:runners, %i[ + idx_runners_extension_id idx_runners_namespace idx_runners_name idx_runners_extension_name + ]], + [:tasks, %i[idx_tasks_relationship_id]], + [:digital_workers, %i[idx_digital_workers_consent_tier idx_digital_workers_trust_score]], + [:audit_log, %i[idx_audit_log_principal_time idx_audit_log_action idx_audit_log_node]], + [:webhook_deliveries, %i[ + idx_webhook_deliveries_event_name idx_webhook_deliveries_delivered_at idx_webhook_deliveries_success + ]], + [:webhook_dead_letters, %i[ + idx_webhook_dead_letters_event_name idx_webhook_dead_letters_created_at + ]], + [:conversations, %i[idx_conversations_caller_identity idx_conversations_updated_at]], + [:approval_queue, %i[idx_approval_queue_requester_id idx_approval_queue_reviewer_id]], + [:rbac_role_assignments, %i[idx_rbac_role_assignments_role idx_rbac_role_assignments_expires_at]], + [:rbac_cross_team_grants, %i[ + idx_rbac_cross_team_grants_target_team idx_rbac_cross_team_grants_expires_at + ]], + [:memory_traces, %i[idx_memory_traces_consolidation idx_memory_traces_source_agent_id]], + [:agent_cluster_tasks, %i[idx_agent_cluster_tasks_created_at]], + [:finlog_executions, %i[idx_finlog_exec_environment_id]] + ].each do |table, indexes| + next unless table_exists?(table) + + existing_indexes = indexes(table).keys + indexes.each do |idx_name| + next unless existing_indexes.include?(idx_name) + + alter_table(table) { drop_index nil, name: idx_name } + end + end + end +end diff --git a/lib/legion/data/migrations/051_fix_tasks_created_at.rb b/lib/legion/data/migrations/051_fix_tasks_created_at.rb new file mode 100644 index 0000000..a060c1b --- /dev/null +++ b/lib/legion/data/migrations/051_fix_tasks_created_at.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:tasks) + + existing_cols = schema(:tasks).map(&:first) + next if existing_cols.include?(:created_at) + + if adapter_scheme == :postgres + # Add a generated column so retention/archival queries using created_at work transparently + run 'ALTER TABLE tasks ADD COLUMN created_at TIMESTAMPTZ GENERATED ALWAYS AS (created) STORED' + run 'CREATE INDEX IF NOT EXISTS idx_tasks_created_at ON tasks (created_at)' + else + # SQLite/MySQL: add real column and backfill from created + alter_table(:tasks) do + add_column :created_at, DateTime + end + + run 'UPDATE tasks SET created_at = created WHERE created_at IS NULL' + + alter_table(:tasks) do + add_index :created_at, name: :idx_tasks_created_at, if_not_exists: true + end + end + end + + down do + next unless table_exists?(:tasks) + + existing_cols = schema(:tasks).map(&:first) + next unless existing_cols.include?(:created_at) + + if adapter_scheme == :postgres + run 'DROP INDEX IF EXISTS idx_tasks_created_at' + run 'ALTER TABLE tasks DROP COLUMN IF EXISTS created_at' + else + alter_table(:tasks) do + drop_index :created_at, name: :idx_tasks_created_at, if_exists: true + drop_column :created_at + end + end + end +end diff --git a/lib/legion/data/migrations/052_drop_redundant_apollo_indexes.rb b/lib/legion/data/migrations/052_drop_redundant_apollo_indexes.rb new file mode 100644 index 0000000..70b45b3 --- /dev/null +++ b/lib/legion/data/migrations/052_drop_redundant_apollo_indexes.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + # PostgreSQL only — these auto-named indexes from migration 012 are exact duplicates + # of explicitly named indexes added in migration 047. + next unless adapter_scheme == :postgres + + run 'DROP INDEX IF EXISTS apollo_entries_status_index' + run 'DROP INDEX IF EXISTS apollo_relations_from_entry_id_index' + run 'DROP INDEX IF EXISTS apollo_relations_to_entry_id_index' + run 'DROP INDEX IF EXISTS apollo_expertise_agent_id_index' + run 'DROP INDEX IF EXISTS apollo_expertise_domain_index' + end + + down do + next unless adapter_scheme == :postgres + + # Recreate the auto-named indexes that migration 012 created inline. + # idx_apollo_status, idx_apollo_rel_from, etc. from migration 047 remain in place. + run 'CREATE INDEX IF NOT EXISTS apollo_entries_status_index ON apollo_entries (status)' \ + if table_exists?(:apollo_entries) + run 'CREATE INDEX IF NOT EXISTS apollo_relations_from_entry_id_index ON apollo_relations (from_entry_id)' \ + if table_exists?(:apollo_relations) + run 'CREATE INDEX IF NOT EXISTS apollo_relations_to_entry_id_index ON apollo_relations (to_entry_id)' \ + if table_exists?(:apollo_relations) + run 'CREATE INDEX IF NOT EXISTS apollo_expertise_agent_id_index ON apollo_expertise (agent_id)' \ + if table_exists?(:apollo_expertise) + run 'CREATE INDEX IF NOT EXISTS apollo_expertise_domain_index ON apollo_expertise (domain)' \ + if table_exists?(:apollo_expertise) + end +end diff --git a/lib/legion/data/migrations/053_add_tasks_relationship_fk.rb b/lib/legion/data/migrations/053_add_tasks_relationship_fk.rb new file mode 100644 index 0000000..7e7b56f --- /dev/null +++ b/lib/legion/data/migrations/053_add_tasks_relationship_fk.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + # PostgreSQL only — add FK constraint for tasks.relationship_id with ON DELETE SET NULL. + # Orphaned values must be cleaned first. + next unless adapter_scheme == :postgres + next unless table_exists?(:tasks) + next unless table_exists?(:relationships) + + # Clean orphaned relationship_id values before adding constraint + run <<~SQL + UPDATE tasks + SET relationship_id = NULL + WHERE relationship_id IS NOT NULL + AND relationship_id NOT IN (SELECT id FROM relationships); + SQL + + # Skip if constraint already exists (idempotency guard) + constraint_exists = self[:pg_constraint].where(conname: 'fk_tasks_relationship_id').any? + + unless constraint_exists + run <<~SQL + ALTER TABLE tasks + ADD CONSTRAINT fk_tasks_relationship_id + FOREIGN KEY (relationship_id) REFERENCES relationships(id) + ON DELETE SET NULL; + SQL + end + end + + down do + next unless adapter_scheme == :postgres + next unless table_exists?(:tasks) + + run 'ALTER TABLE tasks DROP CONSTRAINT IF EXISTS fk_tasks_relationship_id' + end +end diff --git a/lib/legion/data/migrations/054_add_component_type_to_functions.rb b/lib/legion/data/migrations/054_add_component_type_to_functions.rb new file mode 100644 index 0000000..8fb9cea --- /dev/null +++ b/lib/legion/data/migrations/054_add_component_type_to_functions.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:functions) + next if schema(:functions).any? { |c, _| c == :component_type } + + alter_table(:functions) do + add_column :component_type, String, size: 32, null: false, default: 'runner' + add_index :component_type, name: :idx_functions_component_type, if_not_exists: true + end + end + + down do + next unless table_exists?(:functions) + next unless schema(:functions).any? { |c, _| c == :component_type } + + alter_table(:functions) do + drop_index :component_type, name: :idx_functions_component_type, if_exists: true + drop_column :component_type + end + end +end diff --git a/lib/legion/data/migrations/055_add_definition_to_functions.rb b/lib/legion/data/migrations/055_add_definition_to_functions.rb new file mode 100644 index 0000000..2a9dde0 --- /dev/null +++ b/lib/legion/data/migrations/055_add_definition_to_functions.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:functions) + next if schema(:functions).any? { |c, _| c == :definition } + + alter_table(:functions) do + add_column :definition, String, text: true, null: true + end + end + + down do + next unless table_exists?(:functions) + next unless schema(:functions).any? { |c, _| c == :definition } + + alter_table(:functions) do + drop_column :definition + end + end +end diff --git a/lib/legion/data/migrations/056_add_absorber_patterns.rb b/lib/legion/data/migrations/056_add_absorber_patterns.rb new file mode 100644 index 0000000..174eb88 --- /dev/null +++ b/lib/legion/data/migrations/056_add_absorber_patterns.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next if table_exists?(:absorber_patterns) + + create_table(:absorber_patterns) do + primary_key :id + foreign_key :function_id, :functions, null: false, on_delete: :cascade, index: true + String :pattern_type, size: 32, null: false, default: 'url' + String :pattern, size: 1024, null: false + Integer :priority, null: false, default: 0 + TrueClass :active, null: false, default: true + String :tenant_id, size: 64, null: true + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: true + + index :pattern_type, name: :idx_absorber_patterns_pattern_type + index :active, name: :idx_absorber_patterns_active + index :tenant_id, name: :idx_absorber_patterns_tenant_id + index %i[pattern_type active], name: :idx_absorber_patterns_type_active + end + end + + down do + drop_table?(:absorber_patterns) + end +end diff --git a/lib/legion/data/migrations/057_add_routing_key_to_runners.rb b/lib/legion/data/migrations/057_add_routing_key_to_runners.rb new file mode 100644 index 0000000..044f9e3 --- /dev/null +++ b/lib/legion/data/migrations/057_add_routing_key_to_runners.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:runners) + next if schema(:runners).any? { |c, _| c == :routing_key } + + alter_table(:runners) do + add_column :routing_key, String, size: 512, null: true + add_index :routing_key, name: :idx_runners_routing_key, if_not_exists: true + end + end + + down do + next unless table_exists?(:runners) + next unless schema(:runners).any? { |c, _| c == :routing_key } + + alter_table(:runners) do + drop_index :routing_key, name: :idx_runners_routing_key, if_exists: true + drop_column :routing_key + end + end +end diff --git a/lib/legion/data/migrations/058_add_audit_records.rb b/lib/legion/data/migrations/058_add_audit_records.rb new file mode 100644 index 0000000..1d0912d --- /dev/null +++ b/lib/legion/data/migrations/058_add_audit_records.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next if table_exists?(:audit_records) + + create_table(:audit_records) do + primary_key :id + String :chain_id, size: 255, null: false + String :content_type, size: 100, null: false + column :metadata, :text, null: true + String :content_hash, size: 64, null: false + String :parent_hash, size: 64, null: false + String :chain_hash, size: 64, null: false, unique: true + String :signature, size: 512, null: true + DateTime :created_at, null: false + + index :chain_id, name: :idx_audit_records_chain_id + index :content_type, name: :idx_audit_records_content_type + index :created_at, name: :idx_audit_records_created_at + index %i[chain_id created_at], name: :idx_audit_records_chain_time + end + + if database_type == :postgres + run <<~SQL + CREATE RULE no_update_audit_records AS ON UPDATE TO audit_records DO INSTEAD NOTHING; + CREATE RULE no_delete_audit_records AS ON DELETE TO audit_records DO INSTEAD NOTHING; + SQL + end + end + + down do + next unless table_exists?(:audit_records) + + if database_type == :postgres + run 'DROP RULE IF EXISTS no_update_audit_records ON audit_records;' + run 'DROP RULE IF EXISTS no_delete_audit_records ON audit_records;' + end + + drop_table :audit_records + end +end diff --git a/lib/legion/data/migrations/059_create_chains.rb b/lib/legion/data/migrations/059_create_chains.rb new file mode 100644 index 0000000..6c4af61 --- /dev/null +++ b/lib/legion/data/migrations/059_create_chains.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table :chains do + primary_key :id + String :name, null: false, size: 255, index: true + TrueClass :active, null: false, default: true, index: true + DateTime :created, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated, null: true + end + end +end diff --git a/lib/legion/data/migrations/060_add_knowledge_tiers.rb b/lib/legion/data/migrations/060_add_knowledge_tiers.rb new file mode 100644 index 0000000..52d54b8 --- /dev/null +++ b/lib/legion/data/migrations/060_add_knowledge_tiers.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + next unless table_exists?(:apollo_entries) + + existing_columns = schema(:apollo_entries).map(&:first) + + alter_table(:apollo_entries) do + add_column :summary_l0, String, size: 500, null: true unless existing_columns.include?(:summary_l0) + add_column :summary_l1, :text, null: true unless existing_columns.include?(:summary_l1) + add_column :knowledge_tier, String, size: 4, null: false, default: 'L2' unless existing_columns.include?(:knowledge_tier) + add_column :parent_entry_id, :uuid, null: true unless existing_columns.include?(:parent_entry_id) + add_column :l0_generated_at, :timestamptz, null: true unless existing_columns.include?(:l0_generated_at) + add_column :l1_generated_at, :timestamptz, null: true unless existing_columns.include?(:l1_generated_at) + end + + add_index :apollo_entries, :knowledge_tier, name: :idx_apollo_knowledge_tier, if_not_exists: true + add_index :apollo_entries, :parent_entry_id, name: :idx_apollo_parent_entry, if_not_exists: true + end + + down do + next unless adapter_scheme == :postgres + next unless table_exists?(:apollo_entries) + + existing_columns = schema(:apollo_entries).map(&:first) + + alter_table(:apollo_entries) do + drop_column :summary_l0 if existing_columns.include?(:summary_l0) + drop_column :summary_l1 if existing_columns.include?(:summary_l1) + drop_column :knowledge_tier if existing_columns.include?(:knowledge_tier) + drop_column :parent_entry_id if existing_columns.include?(:parent_entry_id) + drop_column :l0_generated_at if existing_columns.include?(:l0_generated_at) + drop_column :l1_generated_at if existing_columns.include?(:l1_generated_at) + end + end +end diff --git a/lib/legion/data/migrations/061_add_versioning_and_expiry.rb b/lib/legion/data/migrations/061_add_versioning_and_expiry.rb new file mode 100644 index 0000000..8ba98cb --- /dev/null +++ b/lib/legion/data/migrations/061_add_versioning_and_expiry.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + next unless table_exists?(:apollo_entries) + + existing_columns = schema(:apollo_entries).map(&:first) + + alter_table(:apollo_entries) do + add_column :parent_knowledge_id, :uuid, null: true unless existing_columns.include?(:parent_knowledge_id) + add_column :is_latest, :boolean, null: false, default: true unless existing_columns.include?(:is_latest) + add_column :supersession_type, String, size: 20, null: true unless existing_columns.include?(:supersession_type) + add_column :expires_at, :timestamptz, null: true unless existing_columns.include?(:expires_at) + add_column :forget_reason, String, size: 255, null: true unless existing_columns.include?(:forget_reason) + add_column :is_inference, :boolean, null: false, default: false unless existing_columns.include?(:is_inference) + end + + add_index :apollo_entries, :parent_knowledge_id, name: :idx_apollo_parent_knowledge, if_not_exists: true + add_index :apollo_entries, %i[parent_knowledge_id is_latest], + name: :idx_apollo_version_chain, + where: Sequel.lit('is_latest = true'), + if_not_exists: true + add_index :apollo_entries, :expires_at, + name: :idx_apollo_expiry, + where: Sequel.lit("expires_at IS NOT NULL AND status != 'archived'"), + if_not_exists: true + add_index :apollo_entries, :is_inference, + name: :idx_apollo_inference, + where: Sequel.lit('is_inference = true'), + if_not_exists: true + end + + down do + next unless adapter_scheme == :postgres + next unless table_exists?(:apollo_entries) + + existing_columns = schema(:apollo_entries).map(&:first) + + alter_table(:apollo_entries) do + drop_column :parent_knowledge_id if existing_columns.include?(:parent_knowledge_id) + drop_column :is_latest if existing_columns.include?(:is_latest) + drop_column :supersession_type if existing_columns.include?(:supersession_type) + drop_column :expires_at if existing_columns.include?(:expires_at) + drop_column :forget_reason if existing_columns.include?(:forget_reason) + drop_column :is_inference if existing_columns.include?(:is_inference) + end + end +end diff --git a/lib/legion/data/migrations/062_create_tool_embedding_cache.rb b/lib/legion/data/migrations/062_create_tool_embedding_cache.rb new file mode 100644 index 0000000..898dfad --- /dev/null +++ b/lib/legion/data/migrations/062_create_tool_embedding_cache.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:tool_embedding_cache) do + primary_key :id + String :content_hash, size: 32, null: false + String :model, size: 100, null: false + String :tool_name, size: 200, null: false + column :vector, :text, null: false + DateTime :embedded_at, null: false + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + unique %i[content_hash model] + index :tool_name + end + end +end diff --git a/lib/legion/data/migrations/063_create_identity_providers.rb b/lib/legion/data/migrations/063_create_identity_providers.rb new file mode 100644 index 0000000..642f636 --- /dev/null +++ b/lib/legion/data/migrations/063_create_identity_providers.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + create_table(:identity_providers) do + column :id, :uuid, default: Sequel.lit('gen_random_uuid()'), primary_key: true + String :name, null: false, unique: true + String :provider_type, null: false # authenticate, profile, fallback + String :facing, null: false # human, machine, both + Integer :priority, null: false, default: 100 + Integer :trust_weight, null: false, default: 50 + column :capabilities, :'text[]', default: Sequel.lit("'{}'") + String :source, null: false, default: 'gem' # gem, db + TrueClass :enabled, null: false, default: true + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + end + end + + down do + next unless adapter_scheme == :postgres + + drop_table?(:identity_providers) + end +end diff --git a/lib/legion/data/migrations/064_create_principals.rb b/lib/legion/data/migrations/064_create_principals.rb new file mode 100644 index 0000000..914feda --- /dev/null +++ b/lib/legion/data/migrations/064_create_principals.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + create_table(:principals) do + column :id, :uuid, default: Sequel.lit('gen_random_uuid()'), primary_key: true + String :canonical_name, null: false + String :kind, null: false # human, service, machine + String :display_name + TrueClass :active, null: false, default: true + DateTime :last_seen_at + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + constraint(:canonical_name_format, Sequel.lit("canonical_name ~ '^[a-z0-9][a-z0-9_-]*$'")) + unique %i[canonical_name kind] + end + + add_index :principals, :canonical_name + add_index :principals, :kind + end + + down do + next unless adapter_scheme == :postgres + + drop_table?(:principals) + end +end diff --git a/lib/legion/data/migrations/065_create_identities.rb b/lib/legion/data/migrations/065_create_identities.rb new file mode 100644 index 0000000..3b7043b --- /dev/null +++ b/lib/legion/data/migrations/065_create_identities.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + create_table(:identities) do + column :id, :uuid, default: Sequel.lit('gen_random_uuid()'), primary_key: true + foreign_key :principal_id, :principals, type: :uuid, null: false, on_delete: :cascade + foreign_key :provider_id, :identity_providers, type: :uuid, null: false, on_delete: :cascade + String :provider_identity, null: false # external ID from provider + column :profile, :bytea + TrueClass :active, null: false, default: true + DateTime :last_authenticated_at + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[principal_id provider_id provider_identity] + end + + # Partial unique index: only one active identity per provider+provider_identity + run 'CREATE UNIQUE INDEX identities_active_provider_uniq ON identities (provider_id, provider_identity) WHERE active = true' + + add_index :identities, :principal_id + add_index :identities, :provider_id + end + + down do + next unless adapter_scheme == :postgres + + drop_table?(:identities) + end +end diff --git a/lib/legion/data/migrations/066_create_identity_groups.rb b/lib/legion/data/migrations/066_create_identity_groups.rb new file mode 100644 index 0000000..9fd2e80 --- /dev/null +++ b/lib/legion/data/migrations/066_create_identity_groups.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + create_table(:identity_groups) do + column :id, :uuid, default: Sequel.lit('gen_random_uuid()'), primary_key: true + String :name, null: false, unique: true + String :source, null: false, default: 'ldap' # ldap, entra, manual + String :description + TrueClass :active, null: false, default: true + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + end + end + + down do + next unless adapter_scheme == :postgres + + drop_table?(:identity_groups) + end +end diff --git a/lib/legion/data/migrations/067_create_identity_group_memberships.rb b/lib/legion/data/migrations/067_create_identity_group_memberships.rb new file mode 100644 index 0000000..52e72d5 --- /dev/null +++ b/lib/legion/data/migrations/067_create_identity_group_memberships.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + create_table(:identity_group_memberships) do + column :id, :uuid, default: Sequel.lit('gen_random_uuid()'), primary_key: true + foreign_key :principal_id, :principals, type: :uuid, null: false, on_delete: :cascade + foreign_key :group_id, :identity_groups, type: :uuid, null: false, on_delete: :cascade + String :status, null: false, default: 'active' # active, stale, expired + String :discovered_by, null: false # provider name that discovered this membership + Integer :trust_weight, null: false, default: 50 + DateTime :expires_at + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[principal_id group_id discovered_by] + end + + add_index :identity_group_memberships, :principal_id + add_index :identity_group_memberships, :group_id + add_index :identity_group_memberships, :status + run <<~SQL + CREATE INDEX idx_memberships_trust_tiebreak + ON identity_group_memberships (principal_id, trust_weight ASC, + (CASE status WHEN 'expired' THEN 0 WHEN 'stale' THEN 1 WHEN 'active' THEN 2 END) ASC) + SQL + end + + down do + next unless adapter_scheme == :postgres + + drop_table?(:identity_group_memberships) + end +end diff --git a/lib/legion/data/migrations/068_add_entity_type_to_audit_records.rb b/lib/legion/data/migrations/068_add_entity_type_to_audit_records.rb new file mode 100644 index 0000000..20aac0e --- /dev/null +++ b/lib/legion/data/migrations/068_add_entity_type_to_audit_records.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + next unless table_exists?(:audit_records) + + alter_table(:audit_records) do + add_column :entity_type, String, size: 100, null: true + end + + add_index :audit_records, :entity_type, name: :idx_audit_records_entity_type + end + + down do + next unless adapter_scheme == :postgres + next unless table_exists?(:audit_records) + + alter_table(:audit_records) do + drop_column :entity_type + end + end +end diff --git a/lib/legion/data/migrations/069_add_principal_id_to_nodes.rb b/lib/legion/data/migrations/069_add_principal_id_to_nodes.rb new file mode 100644 index 0000000..28c91b4 --- /dev/null +++ b/lib/legion/data/migrations/069_add_principal_id_to_nodes.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + alter_table(:nodes) do + add_column :principal_id, :uuid + add_foreign_key [:principal_id], :principals + end + + add_index :nodes, :principal_id + end + + down do + next unless adapter_scheme == :postgres + + alter_table(:nodes) do + drop_column :principal_id + end + end +end diff --git a/lib/legion/data/migrations/070_add_approval_queue_resume.rb b/lib/legion/data/migrations/070_add_approval_queue_resume.rb new file mode 100644 index 0000000..9413e59 --- /dev/null +++ b/lib/legion/data/migrations/070_add_approval_queue_resume.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:approval_queue) do + add_column :resume_routing_key, String, size: 255, null: true + add_column :resume_exchange, String, size: 255, null: true + end + end + + down do + alter_table(:approval_queue) do + drop_column :resume_routing_key + drop_column :resume_exchange + end + end +end diff --git a/lib/legion/data/migrations/071_add_engine_to_relationships.rb b/lib/legion/data/migrations/071_add_engine_to_relationships.rb new file mode 100644 index 0000000..875533c --- /dev/null +++ b/lib/legion/data/migrations/071_add_engine_to_relationships.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:relationships) do + add_column :engine, String, size: 50, null: true + end + end + + down do + alter_table(:relationships) do + drop_column :engine + end + end +end diff --git a/lib/legion/data/migrations/072_create_identity_audit_log.rb b/lib/legion/data/migrations/072_create_identity_audit_log.rb new file mode 100644 index 0000000..282a6ca --- /dev/null +++ b/lib/legion/data/migrations/072_create_identity_audit_log.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + create_table(:identity_audit_log) do + column :id, :uuid, default: Sequel.lit('gen_random_uuid()'), primary_key: true + foreign_key :principal_id, :principals, type: :uuid, on_delete: :set_null + foreign_key :identity_id, :identities, type: :uuid, on_delete: :set_null + String :provider_name, null: false + String :event_type, null: false + String :trust_level + column :detail, :jsonb, null: false, default: Sequel.lit("'{}'") + String :node_id + String :session_id + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + end + + add_index :identity_audit_log, :principal_id + add_index :identity_audit_log, :event_type + add_index :identity_audit_log, :created_at + add_index :identity_audit_log, %i[principal_id event_type created_at] + end + + down do + next unless adapter_scheme == :postgres + + drop_table?(:identity_audit_log) + end +end diff --git a/lib/legion/data/migrations/073_add_identity_multi_instance_columns.rb b/lib/legion/data/migrations/073_add_identity_multi_instance_columns.rb new file mode 100644 index 0000000..548a27e --- /dev/null +++ b/lib/legion/data/migrations/073_add_identity_multi_instance_columns.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + + alter_table(:principals) do + add_column :employee_id, String + end + run 'CREATE INDEX idx_principals_employee_id ON principals (employee_id) WHERE employee_id IS NOT NULL' + + alter_table(:identities) do + add_column :account_type, String, null: false, default: 'primary' + add_column :qualifier, String + add_column :is_default, TrueClass, null: false, default: false + add_column :link_evidence, String + end + + run 'CREATE UNIQUE INDEX identities_one_default_per_provider ON identities (principal_id, provider_id) WHERE is_default = true AND active = true' + end + + down do + next unless adapter_scheme == :postgres + + run 'DROP INDEX IF EXISTS identities_one_default_per_provider' + + alter_table(:identities) do + drop_column :link_evidence + drop_column :is_default + drop_column :qualifier + drop_column :account_type + end + + run 'DROP INDEX IF EXISTS idx_principals_employee_id' + alter_table(:principals) do + drop_column :employee_id + end + end +end diff --git a/lib/legion/data/migrations/074_widen_apollo_entry_identifiers.rb b/lib/legion/data/migrations/074_widen_apollo_entry_identifiers.rb new file mode 100644 index 0000000..2b65dea --- /dev/null +++ b/lib/legion/data/migrations/074_widen_apollo_entry_identifiers.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless adapter_scheme == :postgres + next unless table_exists?(:apollo_entries) + + apollo_columns = schema(:apollo_entries).map(&:first) + alter_table(:apollo_entries) do + set_column_type :content_hash, String, fixed: true, size: 64 if apollo_columns.include?(:content_hash) + set_column_type :knowledge_domain, String, size: 255 if apollo_columns.include?(:knowledge_domain) + set_column_type :source_provider, String, size: 255 if apollo_columns.include?(:source_provider) + set_column_type :source_agent, String, size: 255 if apollo_columns.include?(:source_agent) + end + + next unless table_exists?(:apollo_entries_archive) + + archive_columns = schema(:apollo_entries_archive).map(&:first) + alter_table(:apollo_entries_archive) do + set_column_type :content_hash, String, fixed: true, size: 64 if archive_columns.include?(:content_hash) + set_column_type :knowledge_domain, String, size: 255 if archive_columns.include?(:knowledge_domain) + set_column_type :source_provider, String, size: 255 if archive_columns.include?(:source_provider) + set_column_type :source_agent, String, size: 255 if archive_columns.include?(:source_agent) + end + end + + down do + next unless adapter_scheme == :postgres + next unless table_exists?(:apollo_entries) + + apollo_columns = schema(:apollo_entries).map(&:first) + alter_table(:apollo_entries) do + set_column_type :content_hash, String, fixed: true, size: 32 if apollo_columns.include?(:content_hash) + set_column_type :knowledge_domain, String, size: 50 if apollo_columns.include?(:knowledge_domain) + set_column_type :source_provider, String, size: 50 if apollo_columns.include?(:source_provider) + set_column_type :source_agent, String, size: 50 if apollo_columns.include?(:source_agent) + end + + next unless table_exists?(:apollo_entries_archive) + + archive_columns = schema(:apollo_entries_archive).map(&:first) + alter_table(:apollo_entries_archive) do + set_column_type :content_hash, String, fixed: true, size: 32 if archive_columns.include?(:content_hash) + set_column_type :knowledge_domain, String, size: 50 if archive_columns.include?(:knowledge_domain) + set_column_type :source_provider, String, size: 50 if archive_columns.include?(:source_provider) + set_column_type :source_agent, String, size: 50 if archive_columns.include?(:source_agent) + end + end +end diff --git a/lib/legion/data/migrations/075_add_task_idempotency.rb b/lib/legion/data/migrations/075_add_task_idempotency.rb new file mode 100644 index 0000000..ed3d8ea --- /dev/null +++ b/lib/legion/data/migrations/075_add_task_idempotency.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:tasks) + + existing_columns = schema(:tasks).map(&:first) + alter_table(:tasks) do + add_column :idempotency_key, String, size: 64 unless existing_columns.include?(:idempotency_key) + add_column :idempotency_expires_at, DateTime unless existing_columns.include?(:idempotency_expires_at) + end + + add_index :tasks, :idempotency_key, name: :idx_tasks_idempotency_key, if_not_exists: true + add_index :tasks, :idempotency_expires_at, name: :idx_tasks_idempotency_expires_at, if_not_exists: true + end + + down do + next unless table_exists?(:tasks) + + existing_columns = schema(:tasks).map(&:first) + alter_table(:tasks) do + drop_index :idempotency_key, name: :idx_tasks_idempotency_key, if_exists: true + drop_index :idempotency_expires_at, name: :idx_tasks_idempotency_expires_at, if_exists: true + drop_column :idempotency_expires_at if existing_columns.include?(:idempotency_expires_at) + drop_column :idempotency_key if existing_columns.include?(:idempotency_key) + end + end +end diff --git a/lib/legion/data/migrations/076_create_extract_step_timings.rb b/lib/legion/data/migrations/076_create_extract_step_timings.rb new file mode 100644 index 0000000..60219bf --- /dev/null +++ b/lib/legion/data/migrations/076_create_extract_step_timings.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + create_table?(:extract_step_timings) do + primary_key :id + String :extract_id, size: 36, null: false + String :name, size: 100, null: false + DateTime :start_time, null: false + DateTime :end_time, null: false + String :status, size: 20, null: false + String :error, text: true + Integer :duration_ms, null: false, default: 0 + + index :extract_id, name: :idx_extract_step_timings_extract_id + index %i[extract_id name], name: :idx_extract_step_timings_extract_name + index :status, name: :idx_extract_step_timings_status + end + end + + down do + drop_table?(:extract_step_timings) + end +end diff --git a/lib/legion/data/migrations/077_create_llm_conversations.rb b/lib/legion/data/migrations/077_create_llm_conversations.rb new file mode 100644 index 0000000..0f203d9 --- /dev/null +++ b/lib/legion/data/migrations/077_create_llm_conversations.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_conversations) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + Integer :principal_id + Integer :identity_id + String :title, size: 255 + String :status, size: 32, null: false, default: 'active' + String :system_prompt_key, size: 255 + String :system_prompt_hash, size: 128 + String :classification_level, size: 64 + TrueClass :contains_phi, null: false, default: false + TrueClass :contains_pii, null: false, default: false + String :retention_policy, size: 64, null: false, default: 'default' + DateTime :expires_at + DateTime :recorded_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index :principal_id + index :identity_id + index :status + index :retention_policy + index :expires_at + end + end +end diff --git a/lib/legion/data/migrations/078_create_llm_messages.rb b/lib/legion/data/migrations/078_create_llm_messages.rb new file mode 100644 index 0000000..24e140a --- /dev/null +++ b/lib/legion/data/migrations/078_create_llm_messages.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_messages) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :conversation_id, :llm_conversations, null: false, on_delete: :cascade + foreign_key :parent_message_id, :llm_messages, null: true, on_delete: :set_null + Integer :message_inference_request_id + Integer :message_inference_response_id + Integer :tool_call_id + Integer :seq, null: false + String :role, size: 32, null: false + String :content_type, size: 64, null: false, default: 'text' + String :content, text: true + Integer :input_tokens, null: false, default: 0 + Integer :output_tokens, null: false, default: 0 + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[conversation_id seq] + index :uuid + index :conversation_id + index :parent_message_id + index :message_inference_request_id + index :message_inference_response_id + index :tool_call_id + index %i[conversation_id role] + index :created_at + end + end +end diff --git a/lib/legion/data/migrations/079_create_llm_message_inference_requests.rb b/lib/legion/data/migrations/079_create_llm_message_inference_requests.rb new file mode 100644 index 0000000..4eb83a8 --- /dev/null +++ b/lib/legion/data/migrations/079_create_llm_message_inference_requests.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_message_inference_requests) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :conversation_id, :llm_conversations, null: false, on_delete: :cascade + foreign_key :latest_message_id, :llm_messages, null: true, on_delete: :set_null + Integer :caller_principal_id + Integer :caller_identity_id + String :runtime_caller_type, size: 64 + String :request_ref, size: 128 + String :correlation_ref, size: 128 + String :exchange_ref, size: 128 + String :request_type, size: 64, null: false, default: 'chat' + String :status, size: 64, null: false, default: 'created' + Integer :context_message_count, null: false, default: 0 + Integer :context_tokens, null: false, default: 0 + Integer :token_budget, null: false, default: 0 + String :curation_strategy, size: 128 + Integer :injected_tool_count, null: false, default: 0 + String :tool_policy, size: 128 + String :request_capture_mode, size: 64, null: false, default: 'metadata_only' + String :request_content_hash, size: 128 + String :request_json, text: true + String :classification_level, size: 64 + String :rbac_decision, size: 64 + String :cost_center, size: 128 + String :budget_key, size: 128 + DateTime :requested_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index :conversation_id + index :latest_message_id + index :caller_principal_id + index :caller_identity_id + index :request_ref + index :correlation_ref + index :exchange_ref + index :status + index %i[cost_center requested_at] + index :requested_at + end + end +end diff --git a/lib/legion/data/migrations/080_create_llm_message_inference_responses.rb b/lib/legion/data/migrations/080_create_llm_message_inference_responses.rb new file mode 100644 index 0000000..21afbc4 --- /dev/null +++ b/lib/legion/data/migrations/080_create_llm_message_inference_responses.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_message_inference_responses) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :message_inference_request_id, :llm_message_inference_requests, null: false, on_delete: :cascade + foreign_key :response_message_id, :llm_messages, null: true, on_delete: :set_null + String :provider, size: 128 + String :model_key, size: 255 + String :tier, size: 64 + String :runner_ref, size: 128 + String :provider_response_ref, size: 255 + String :status, size: 64, null: false, default: 'created' + String :finish_reason, size: 128 + String :error_category, size: 128 + String :error_code, size: 128 + String :error_message, text: true + Integer :latency_ms, null: false, default: 0 + Integer :wall_clock_ms, null: false, default: 0 + String :response_capture_mode, size: 64, null: false, default: 'metadata_only' + String :response_content_hash, size: 128 + String :response_json, text: true + String :response_thinking_json, text: true + DateTime :responded_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index :message_inference_request_id + index :response_message_id + index %i[provider model_key] + index :runner_ref + index :provider_response_ref + index :status + index :responded_at + end + end +end diff --git a/lib/legion/data/migrations/081_add_llm_message_inference_foreign_keys.rb b/lib/legion/data/migrations/081_add_llm_message_inference_foreign_keys.rb new file mode 100644 index 0000000..2a9829d --- /dev/null +++ b/lib/legion/data/migrations/081_add_llm_message_inference_foreign_keys.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_messages) do + add_foreign_key [:message_inference_request_id], :llm_message_inference_requests, key: :id, on_delete: :set_null + add_foreign_key [:message_inference_response_id], :llm_message_inference_responses, key: :id, on_delete: :set_null + end + end + + down do + alter_table(:llm_messages) do + drop_foreign_key [:message_inference_response_id] + drop_foreign_key [:message_inference_request_id] + end + end +end diff --git a/lib/legion/data/migrations/082_create_llm_route_attempts.rb b/lib/legion/data/migrations/082_create_llm_route_attempts.rb new file mode 100644 index 0000000..cd95e48 --- /dev/null +++ b/lib/legion/data/migrations/082_create_llm_route_attempts.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_route_attempts) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :message_inference_request_id, :llm_message_inference_requests, null: false, on_delete: :cascade + foreign_key :message_inference_response_id, :llm_message_inference_responses, null: true, on_delete: :set_null + Integer :attempt_no, null: false + String :provider, size: 128 + String :model_key, size: 255 + String :tier, size: 64 + String :route_target, size: 255 + String :status, size: 64, null: false + String :failure_reason, text: true + Integer :latency_ms, null: false, default: 0 + DateTime :started_at + DateTime :ended_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[message_inference_request_id attempt_no] + index :uuid + index :message_inference_request_id + index :message_inference_response_id + index %i[provider model_key] + index :status + index :started_at + end + end +end diff --git a/lib/legion/data/migrations/083_create_llm_message_inference_metrics.rb b/lib/legion/data/migrations/083_create_llm_message_inference_metrics.rb new file mode 100644 index 0000000..b2be36d --- /dev/null +++ b/lib/legion/data/migrations/083_create_llm_message_inference_metrics.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_message_inference_metrics) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :message_inference_request_id, :llm_message_inference_requests, null: false, on_delete: :cascade + foreign_key :message_inference_response_id, :llm_message_inference_responses, null: true, on_delete: :set_null + String :provider, size: 128 + String :model_key, size: 255 + String :tier, size: 64 + Integer :input_tokens, null: false, default: 0 + Integer :output_tokens, null: false, default: 0 + Integer :thinking_tokens, null: false, default: 0 + Integer :total_tokens, null: false, default: 0 + Integer :latency_ms, null: false, default: 0 + Integer :wall_clock_ms, null: false, default: 0 + BigDecimal :cost_usd, size: [20, 8], null: false, default: 0 + String :currency, size: 3, null: false, default: 'USD' + String :cost_center, size: 128 + String :budget_key, size: 128 + DateTime :recorded_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index :message_inference_request_id + index :message_inference_response_id + index %i[provider model_key] + index :cost_center + index :budget_key + index :recorded_at + index %i[cost_center recorded_at] + end + end +end diff --git a/lib/legion/data/migrations/084_create_llm_tool_calls.rb b/lib/legion/data/migrations/084_create_llm_tool_calls.rb new file mode 100644 index 0000000..33b1946 --- /dev/null +++ b/lib/legion/data/migrations/084_create_llm_tool_calls.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_tool_calls) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :message_inference_response_id, :llm_message_inference_responses, null: false, on_delete: :cascade + foreign_key :requested_by_message_id, :llm_messages, null: true, on_delete: :set_null + foreign_key :result_message_id, :llm_messages, null: true, on_delete: :set_null + Integer :tool_call_index, null: false, default: 0 + String :provider_tool_call_ref, size: 255 + String :tool_name, size: 255, null: false + String :tool_source_type, size: 128 + String :tool_source_server, size: 255 + String :status, size: 64, null: false, default: 'requested' + DateTime :requested_at + DateTime :completed_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[message_inference_response_id tool_call_index] + index :uuid + index :message_inference_response_id + index :requested_by_message_id + index :result_message_id + index :provider_tool_call_ref + index :tool_name + index :status + index :requested_at + end + end +end diff --git a/lib/legion/data/migrations/085_add_llm_message_tool_call_foreign_key.rb b/lib/legion/data/migrations/085_add_llm_message_tool_call_foreign_key.rb new file mode 100644 index 0000000..8b63608 --- /dev/null +++ b/lib/legion/data/migrations/085_add_llm_message_tool_call_foreign_key.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_messages) do + add_foreign_key [:tool_call_id], :llm_tool_calls, key: :id, on_delete: :set_null + end + end + + down do + alter_table(:llm_messages) do + drop_foreign_key [:tool_call_id] + end + end +end diff --git a/lib/legion/data/migrations/086_create_llm_tool_call_attempts.rb b/lib/legion/data/migrations/086_create_llm_tool_call_attempts.rb new file mode 100644 index 0000000..49b89be --- /dev/null +++ b/lib/legion/data/migrations/086_create_llm_tool_call_attempts.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_tool_call_attempts) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :tool_call_id, :llm_tool_calls, null: false, on_delete: :cascade + Integer :attempt_no, null: false + String :runner_ref, size: 128 + String :status, size: 64, null: false + String :error_category, size: 128 + String :error_code, size: 128 + String :error_message, text: true + Integer :duration_ms, null: false, default: 0 + String :arguments_ref, size: 255 + String :result_ref, size: 255 + DateTime :started_at + DateTime :ended_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[tool_call_id attempt_no] + index :uuid + index :tool_call_id + index :runner_ref + index :status + index :started_at + end + end +end diff --git a/lib/legion/data/migrations/087_create_llm_conversation_compactions.rb b/lib/legion/data/migrations/087_create_llm_conversation_compactions.rb new file mode 100644 index 0000000..370d802 --- /dev/null +++ b/lib/legion/data/migrations/087_create_llm_conversation_compactions.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_conversation_compactions) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :conversation_id, :llm_conversations, null: false, on_delete: :cascade + foreign_key :triggered_by_message_inference_request_id, :llm_message_inference_requests, null: true, + on_delete: :set_null + foreign_key :replaces_message_from_id, :llm_messages, null: true, on_delete: :set_null + foreign_key :replaces_message_to_id, :llm_messages, null: true, on_delete: :set_null + String :strategy, size: 128 + String :status, size: 64, null: false, default: 'created' + Integer :source_message_count, null: false, default: 0 + Integer :source_token_count, null: false, default: 0 + Integer :compacted_token_count, null: false, default: 0 + String :content_hash, size: 128 + String :summary, text: true + String :error_message, text: true + DateTime :compacted_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index :conversation_id + index :triggered_by_message_inference_request_id + index :status + index :compacted_at + end + end +end diff --git a/lib/legion/data/migrations/088_create_llm_policy_evaluations.rb b/lib/legion/data/migrations/088_create_llm_policy_evaluations.rb new file mode 100644 index 0000000..3d4398a --- /dev/null +++ b/lib/legion/data/migrations/088_create_llm_policy_evaluations.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_policy_evaluations) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :conversation_id, :llm_conversations, null: true, on_delete: :set_null + foreign_key :message_inference_request_id, :llm_message_inference_requests, null: true, on_delete: :set_null + foreign_key :message_inference_response_id, :llm_message_inference_responses, null: true, on_delete: :set_null + String :policy_key, size: 128, null: false + String :policy_version, size: 64 + String :evaluation_type, size: 64, null: false + String :decision, size: 64, null: false + String :enforcement_action, size: 64 + String :classification_level, size: 64 + TrueClass :contains_phi, null: false, default: false + TrueClass :contains_pii, null: false, default: false + String :reason_code, size: 128 + String :reason, text: true + DateTime :evaluated_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index :conversation_id + index :message_inference_request_id + index :message_inference_response_id + index :policy_key + index :decision + index :evaluated_at + end + end +end diff --git a/lib/legion/data/migrations/089_create_llm_security_events.rb b/lib/legion/data/migrations/089_create_llm_security_events.rb new file mode 100644 index 0000000..c274353 --- /dev/null +++ b/lib/legion/data/migrations/089_create_llm_security_events.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_security_events) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :conversation_id, :llm_conversations, null: true, on_delete: :set_null + foreign_key :message_inference_request_id, :llm_message_inference_requests, null: true, on_delete: :set_null + foreign_key :message_inference_response_id, :llm_message_inference_responses, null: true, on_delete: :set_null + foreign_key :tool_call_id, :llm_tool_calls, null: true, on_delete: :set_null + foreign_key :tool_call_attempt_id, :llm_tool_call_attempts, null: true, on_delete: :set_null + foreign_key :policy_evaluation_id, :llm_policy_evaluations, null: true, on_delete: :set_null + String :event_type, size: 128, null: false + String :severity, size: 32, null: false, default: 'info' + String :status, size: 64, null: false, default: 'open' + String :description, text: true + DateTime :detected_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index :conversation_id + index :message_inference_request_id + index :message_inference_response_id + index :tool_call_id + index :tool_call_attempt_id + index :policy_evaluation_id + index :event_type + index :severity + index :detected_at + end + end +end diff --git a/lib/legion/data/migrations/090_create_llm_registry_events.rb b/lib/legion/data/migrations/090_create_llm_registry_events.rb new file mode 100644 index 0000000..79abc19 --- /dev/null +++ b/lib/legion/data/migrations/090_create_llm_registry_events.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:llm_registry_events) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + String :provider, size: 128 + String :model_key, size: 255 + String :event_type, size: 128, null: false + String :status, size: 64, null: false + String :reason, text: true + DateTime :recorded_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index %i[provider model_key] + index :event_type + index :status + index :recorded_at + end + end +end diff --git a/lib/legion/data/migrations/091_create_portable_identity_providers.rb b/lib/legion/data/migrations/091_create_portable_identity_providers.rb new file mode 100644 index 0000000..f6bafa4 --- /dev/null +++ b/lib/legion/data/migrations/091_create_portable_identity_providers.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:portable_identity_providers) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + String :name, size: 255, null: false, unique: true + String :provider_type, size: 64, null: false + String :facing, size: 32, null: false + Integer :priority, null: false, default: 100 + Integer :trust_weight, null: false, default: 50 + String :source, size: 64, null: false, default: 'gem' + TrueClass :enabled, null: false, default: true + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index :name + index :provider_type + index :enabled + end + + create_table(:portable_identity_provider_capabilities) do + primary_key :id + foreign_key :provider_id, :portable_identity_providers, null: false, on_delete: :cascade + String :capability_key, size: 128, null: false + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[provider_id capability_key] + index :provider_id + index :capability_key + end + end +end diff --git a/lib/legion/data/migrations/092_create_portable_identity_principals.rb b/lib/legion/data/migrations/092_create_portable_identity_principals.rb new file mode 100644 index 0000000..aafbb9a --- /dev/null +++ b/lib/legion/data/migrations/092_create_portable_identity_principals.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:portable_identity_principals) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + String :canonical_name, size: 255, null: false + String :kind, size: 64, null: false + String :employee_key, size: 255 + String :display_name, size: 255 + TrueClass :active, null: false, default: true + DateTime :last_seen_at + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[canonical_name kind] + index :uuid + index :canonical_name + index :kind + index :employee_key + index :active + end + end +end diff --git a/lib/legion/data/migrations/093_create_portable_identities.rb b/lib/legion/data/migrations/093_create_portable_identities.rb new file mode 100644 index 0000000..fbb6c98 --- /dev/null +++ b/lib/legion/data/migrations/093_create_portable_identities.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:portable_identities) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :principal_id, :portable_identity_principals, null: false, on_delete: :cascade + foreign_key :provider_id, :portable_identity_providers, null: false, on_delete: :cascade + String :provider_identity_key, size: 255, null: false + String :profile_ciphertext, text: true + TrueClass :active, null: false, default: true + DateTime :last_authenticated_at + String :account_type, size: 64, null: false, default: 'primary' + String :qualifier, size: 255 + TrueClass :is_default, null: false, default: false + String :link_evidence, text: true + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[principal_id provider_id provider_identity_key] + index :uuid + index :principal_id + index :provider_id + index :provider_identity_key + index %i[provider_id provider_identity_key] + index :active + index :is_default + end + end +end diff --git a/lib/legion/data/migrations/094_create_portable_identity_groups.rb b/lib/legion/data/migrations/094_create_portable_identity_groups.rb new file mode 100644 index 0000000..a8a8e50 --- /dev/null +++ b/lib/legion/data/migrations/094_create_portable_identity_groups.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:portable_identity_groups) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + String :name, size: 255, null: false, unique: true + String :source, size: 64, null: false, default: 'ldap' + String :description, text: true + TrueClass :active, null: false, default: true + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index :name + index :source + index :active + end + end +end diff --git a/lib/legion/data/migrations/095_create_portable_identity_group_memberships.rb b/lib/legion/data/migrations/095_create_portable_identity_group_memberships.rb new file mode 100644 index 0000000..4fd088a --- /dev/null +++ b/lib/legion/data/migrations/095_create_portable_identity_group_memberships.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:portable_identity_group_memberships) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :principal_id, :portable_identity_principals, null: false, on_delete: :cascade + foreign_key :group_id, :portable_identity_groups, null: false, on_delete: :cascade + String :status, size: 32, null: false, default: 'active' + String :discovered_by, size: 255, null: false + Integer :trust_weight, null: false, default: 50 + DateTime :expires_at + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + unique %i[principal_id group_id discovered_by] + index :uuid + index :principal_id + index :group_id + index :status + index %i[principal_id status] + end + end +end diff --git a/lib/legion/data/migrations/096_create_portable_identity_audit_log.rb b/lib/legion/data/migrations/096_create_portable_identity_audit_log.rb new file mode 100644 index 0000000..ea42132 --- /dev/null +++ b/lib/legion/data/migrations/096_create_portable_identity_audit_log.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + create_table(:portable_identity_audit_log) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :principal_id, :portable_identity_principals, on_delete: :set_null + foreign_key :identity_id, :portable_identities, on_delete: :set_null + String :provider_name, size: 255, null: false + String :event_type, size: 128, null: false + String :trust_level, size: 64 + String :detail_payload, text: true + String :node_ref, size: 255 + String :session_ref, size: 255 + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :uuid + index :principal_id + index :identity_id + index :event_type + index :created_at + index %i[principal_id event_type created_at] + end + end +end diff --git a/lib/legion/data/migrations/097_add_llm_dispatch_fields.rb b/lib/legion/data/migrations/097_add_llm_dispatch_fields.rb new file mode 100644 index 0000000..13c71d9 --- /dev/null +++ b/lib/legion/data/migrations/097_add_llm_dispatch_fields.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + alter_table(:llm_message_inference_requests) do + add_column :operation, String, size: 64, null: false, default: 'chat' + add_column :correlation_id, String, size: 64 + add_column :idempotency_key, String, size: 128 + end + + alter_table(:llm_message_inference_responses) do + add_column :provider_instance, String, size: 128 + add_column :dispatch_path, String, size: 32 + end + end +end diff --git a/lib/legion/data/migrations/098_drop_legacy_identity_tables.rb b/lib/legion/data/migrations/098_drop_legacy_identity_tables.rb new file mode 100644 index 0000000..5bf8e66 --- /dev/null +++ b/lib/legion/data/migrations/098_drop_legacy_identity_tables.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + drop_table(:identity_audit_log) if table_exists?(:identity_audit_log) + drop_table(:identity_group_memberships) if table_exists?(:identity_group_memberships) + drop_table(:identity_groups) if table_exists?(:identity_groups) + drop_table(:identities) if table_exists?(:identities) + + alter_table(:nodes) { drop_column :principal_id } if table_exists?(:nodes) && schema(:nodes).any? { |col, _| col == :principal_id } + + drop_table(:principals) if table_exists?(:principals) + drop_table(:identity_providers) if table_exists?(:identity_providers) + end + + down do + nil + end +end diff --git a/lib/legion/data/migrations/099_rename_portable_identity_tables.rb b/lib/legion/data/migrations/099_rename_portable_identity_tables.rb new file mode 100644 index 0000000..38e83d7 --- /dev/null +++ b/lib/legion/data/migrations/099_rename_portable_identity_tables.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + rename_table(:portable_identity_provider_capabilities, :identity_provider_capabilities) + rename_table(:portable_identity_audit_log, :identity_audit_log) + rename_table(:portable_identity_group_memberships, :identity_group_memberships) + rename_table(:portable_identity_groups, :identity_groups) + rename_table(:portable_identities, :identities) + rename_table(:portable_identity_principals, :identity_principals) + rename_table(:portable_identity_providers, :identity_providers) + end + + down do + rename_table(:identity_providers, :portable_identity_providers) + rename_table(:identity_principals, :portable_identity_principals) + rename_table(:identities, :portable_identities) + rename_table(:identity_groups, :portable_identity_groups) + rename_table(:identity_group_memberships, :portable_identity_group_memberships) + rename_table(:identity_audit_log, :portable_identity_audit_log) + rename_table(:identity_provider_capabilities, :portable_identity_provider_capabilities) + end +end diff --git a/lib/legion/data/migrations/100_create_apollo_entries_sqlite.rb b/lib/legion/data/migrations/100_create_apollo_entries_sqlite.rb new file mode 100644 index 0000000..16d940f --- /dev/null +++ b/lib/legion/data/migrations/100_create_apollo_entries_sqlite.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next if adapter_scheme == :postgres + + create_table(:apollo_entries) do + primary_key :id + String :content, text: true, null: false + String :content_type, null: false, size: 50 + Float :confidence, default: 0.5 + String :source_agent, null: false, size: 255 + String :source_context, text: true, default: '{}' + String :tags, text: true, default: '{}' + String :status, null: false, size: 20, default: 'candidate' + Integer :access_count, default: 0 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :confirmed_at + String :source_provider, size: 255 + String :source_channel, size: 100 + String :knowledge_domain, size: 255, default: 'general' + String :submitted_by, size: 255 + String :submitted_from, size: 255 + String :content_hash, fixed: true, size: 64 + String :summary_l0, size: 500 + String :summary_l1, text: true + String :knowledge_tier, null: false, size: 4, default: 'L2' + String :parent_entry_id, size: 36 + DateTime :l0_generated_at + DateTime :l1_generated_at + String :parent_knowledge_id, size: 36 + TrueClass :is_latest, null: false, default: true + String :supersession_type, size: 20 + DateTime :expires_at + String :forget_reason, size: 255 + TrueClass :is_inference, null: false, default: false + end + + create_table(:apollo_entries_archive) do + primary_key :id + String :content, text: true, null: false + String :content_type, null: false, size: 50 + Float :confidence, default: 0.5 + String :source_agent, null: false, size: 255 + String :source_context, text: true, default: '{}' + String :tags, text: true, default: '{}' + String :status, null: false, size: 20, default: 'candidate' + Integer :access_count, default: 0 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP + DateTime :confirmed_at + String :source_provider, size: 255 + String :source_channel, size: 100 + String :knowledge_domain, size: 255, default: 'general' + String :submitted_by, size: 255 + String :submitted_from, size: 255 + String :content_hash, fixed: true, size: 64 + String :summary_l0, size: 500 + String :summary_l1, text: true + String :knowledge_tier, null: false, size: 4, default: 'L2' + String :parent_entry_id, size: 36 + DateTime :l0_generated_at + DateTime :l1_generated_at + String :parent_knowledge_id, size: 36 + TrueClass :is_latest, null: false, default: true + String :supersession_type, size: 20 + DateTime :expires_at + String :forget_reason, size: 255 + TrueClass :is_inference, null: false, default: false + DateTime :archived_at, default: Sequel::CURRENT_TIMESTAMP + String :archive_reason, text: true + end + end + + down do + next if adapter_scheme == :postgres + + drop_table(:apollo_entries_archive) if table_exists?(:apollo_entries_archive) + drop_table(:apollo_entries) if table_exists?(:apollo_entries) + end +end diff --git a/lib/legion/data/migrations/101_add_apollo_identity_and_access_scope.rb b/lib/legion/data/migrations/101_add_apollo_identity_and_access_scope.rb new file mode 100644 index 0000000..340e20d --- /dev/null +++ b/lib/legion/data/migrations/101_add_apollo_identity_and_access_scope.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:apollo_entries) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + end + + alter_table(:apollo_entries_archive) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + end + end + + down do + alter_table(:apollo_entries) do + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + + alter_table(:apollo_entries_archive) do + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/102_add_apollo_access_scope_and_identity_indexes.rb b/lib/legion/data/migrations/102_add_apollo_access_scope_and_identity_indexes.rb new file mode 100644 index 0000000..1289dae --- /dev/null +++ b/lib/legion/data/migrations/102_add_apollo_access_scope_and_identity_indexes.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + run 'CREATE INDEX IF NOT EXISTS idx_apollo_access_scope ON apollo_entries (access_scope)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_identity_principal_id ON apollo_entries (identity_principal_id) WHERE identity_principal_id IS NOT NULL' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_identity_id ON apollo_entries (identity_id) WHERE identity_id IS NOT NULL' + end + + down do + run 'DROP INDEX IF EXISTS idx_apollo_access_scope' + run 'DROP INDEX IF EXISTS idx_apollo_identity_principal_id' + run 'DROP INDEX IF EXISTS idx_apollo_identity_id' + end +end diff --git a/lib/legion/data/migrations/103_add_llm_conversations_identity_columns.rb b/lib/legion/data/migrations/103_add_llm_conversations_identity_columns.rb new file mode 100644 index 0000000..2122ff1 --- /dev/null +++ b/lib/legion/data/migrations/103_add_llm_conversations_identity_columns.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +# llm_conversations already has principal_id and identity_id (077). +# Add only the two missing standardized columns: access_scope and identity_canonical_name. +# Existing columns are NOT renamed — they are in active use by lex-llm-ledger. + +Sequel.migration do + up do + alter_table(:llm_conversations) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_conversations_access_scope + end + end + + down do + alter_table(:llm_conversations) do + drop_index :access_scope, name: :idx_conversations_access_scope + drop_column :access_scope + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/104_add_llm_messages_identity_columns.rb b/lib/legion/data/migrations/104_add_llm_messages_identity_columns.rb new file mode 100644 index 0000000..f01fb4b --- /dev/null +++ b/lib/legion/data/migrations/104_add_llm_messages_identity_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_messages) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_messages_access_scope + add_index :identity_principal_id, name: :idx_messages_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_messages) do + drop_index :identity_principal_id, name: :idx_messages_identity_principal_id + drop_index :access_scope, name: :idx_messages_access_scope + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/105_add_llm_message_inference_requests_identity_columns.rb b/lib/legion/data/migrations/105_add_llm_message_inference_requests_identity_columns.rb new file mode 100644 index 0000000..47d5d43 --- /dev/null +++ b/lib/legion/data/migrations/105_add_llm_message_inference_requests_identity_columns.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +# llm_message_inference_requests already has caller_principal_id and caller_identity_id (079). +# Add only the two missing standardized columns: access_scope and identity_canonical_name. +# Existing columns are NOT renamed — they are in active use by lex-llm-ledger. + +Sequel.migration do + up do + alter_table(:llm_message_inference_requests) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_inference_requests_access_scope + end + end + + down do + alter_table(:llm_message_inference_requests) do + drop_index :access_scope, name: :idx_inference_requests_access_scope + drop_column :access_scope + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/106_add_llm_message_inference_responses_identity_columns.rb b/lib/legion/data/migrations/106_add_llm_message_inference_responses_identity_columns.rb new file mode 100644 index 0000000..b71d7fc --- /dev/null +++ b/lib/legion/data/migrations/106_add_llm_message_inference_responses_identity_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_message_inference_responses) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_message_inference_responses_access_scope + add_index :identity_principal_id, name: :idx_message_inference_responses_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_message_inference_responses) do + drop_index :identity_principal_id, name: :idx_message_inference_responses_identity_principal_id + drop_index :access_scope, name: :idx_message_inference_responses_access_scope + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/107_add_llm_route_attempts_identity_columns.rb b/lib/legion/data/migrations/107_add_llm_route_attempts_identity_columns.rb new file mode 100644 index 0000000..0d8da26 --- /dev/null +++ b/lib/legion/data/migrations/107_add_llm_route_attempts_identity_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_route_attempts) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_route_attempts_access_scope + add_index :identity_principal_id, name: :idx_route_attempts_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_route_attempts) do + drop_index :identity_principal_id, name: :idx_route_attempts_identity_principal_id + drop_index :access_scope, name: :idx_route_attempts_access_scope + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/108_add_llm_message_inference_metrics_identity_columns.rb b/lib/legion/data/migrations/108_add_llm_message_inference_metrics_identity_columns.rb new file mode 100644 index 0000000..2f51139 --- /dev/null +++ b/lib/legion/data/migrations/108_add_llm_message_inference_metrics_identity_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_message_inference_metrics) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_message_inference_metrics_access_scope + add_index :identity_principal_id, name: :idx_message_inference_metrics_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_message_inference_metrics) do + drop_index :identity_principal_id, name: :idx_message_inference_metrics_identity_principal_id + drop_index :access_scope, name: :idx_message_inference_metrics_access_scope + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/109_add_llm_tool_calls_identity_columns.rb b/lib/legion/data/migrations/109_add_llm_tool_calls_identity_columns.rb new file mode 100644 index 0000000..55ba53d --- /dev/null +++ b/lib/legion/data/migrations/109_add_llm_tool_calls_identity_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_tool_calls) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_tool_calls_access_scope + add_index :identity_principal_id, name: :idx_tool_calls_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_tool_calls) do + drop_index :identity_principal_id, name: :idx_tool_calls_identity_principal_id + drop_index :access_scope, name: :idx_tool_calls_access_scope + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/110_add_llm_tool_call_attempts_identity_columns.rb b/lib/legion/data/migrations/110_add_llm_tool_call_attempts_identity_columns.rb new file mode 100644 index 0000000..924909a --- /dev/null +++ b/lib/legion/data/migrations/110_add_llm_tool_call_attempts_identity_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_tool_call_attempts) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_tool_call_attempts_access_scope + add_index :identity_principal_id, name: :idx_tool_call_attempts_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_tool_call_attempts) do + drop_index :identity_principal_id, name: :idx_tool_call_attempts_identity_principal_id + drop_index :access_scope, name: :idx_tool_call_attempts_access_scope + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/111_add_llm_conversation_compactions_identity_columns.rb b/lib/legion/data/migrations/111_add_llm_conversation_compactions_identity_columns.rb new file mode 100644 index 0000000..7c4925e --- /dev/null +++ b/lib/legion/data/migrations/111_add_llm_conversation_compactions_identity_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_conversation_compactions) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_conversation_compactions_access_scope + add_index :identity_principal_id, name: :idx_conversation_compactions_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_conversation_compactions) do + drop_index :identity_principal_id, name: :idx_conversation_compactions_identity_principal_id + drop_index :access_scope, name: :idx_conversation_compactions_access_scope + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/112_add_llm_policy_evaluations_identity_columns.rb b/lib/legion/data/migrations/112_add_llm_policy_evaluations_identity_columns.rb new file mode 100644 index 0000000..e5f7229 --- /dev/null +++ b/lib/legion/data/migrations/112_add_llm_policy_evaluations_identity_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_policy_evaluations) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_policy_evaluations_access_scope + add_index :identity_principal_id, name: :idx_policy_evaluations_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_policy_evaluations) do + drop_index :identity_principal_id, name: :idx_policy_evaluations_identity_principal_id + drop_index :access_scope, name: :idx_policy_evaluations_access_scope + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/113_add_llm_security_events_identity_columns.rb b/lib/legion/data/migrations/113_add_llm_security_events_identity_columns.rb new file mode 100644 index 0000000..98ca94e --- /dev/null +++ b/lib/legion/data/migrations/113_add_llm_security_events_identity_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_security_events) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_security_events_access_scope + add_index :identity_principal_id, name: :idx_security_events_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_security_events) do + drop_index :identity_principal_id, name: :idx_security_events_identity_principal_id + drop_index :access_scope, name: :idx_security_events_access_scope + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/114_add_llm_registry_events_identity_columns.rb b/lib/legion/data/migrations/114_add_llm_registry_events_identity_columns.rb new file mode 100644 index 0000000..d7b4bea --- /dev/null +++ b/lib/legion/data/migrations/114_add_llm_registry_events_identity_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_registry_events) do + add_column :access_scope, String, size: 20, null: false, default: 'global' + add_column :identity_principal_id, Integer, null: true + add_column :identity_id, Integer, null: true + add_column :identity_canonical_name, String, size: 255, null: true + add_index :access_scope, name: :idx_registry_events_access_scope + add_index :identity_principal_id, name: :idx_registry_events_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_registry_events) do + drop_index :identity_principal_id, name: :idx_registry_events_identity_principal_id + drop_index :access_scope, name: :idx_registry_events_access_scope + drop_column :access_scope + drop_column :identity_principal_id + drop_column :identity_id + drop_column :identity_canonical_name + end + end +end diff --git a/lib/legion/data/migrations/115_add_runtime_caller_columns.rb b/lib/legion/data/migrations/115_add_runtime_caller_columns.rb new file mode 100644 index 0000000..c2469ef --- /dev/null +++ b/lib/legion/data/migrations/115_add_runtime_caller_columns.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_message_inference_requests) do + add_column :runtime_caller_class, String, size: 255, null: true, index: true + add_column :runtime_caller_client, String, size: 255, null: true + end + end + + down do + alter_table(:llm_message_inference_requests) do + drop_index :runtime_caller_class + drop_column :runtime_caller_class + drop_column :runtime_caller_client + end + end +end diff --git a/lib/legion/data/migrations/116_make_tool_calls_response_id_nullable.rb b/lib/legion/data/migrations/116_make_tool_calls_response_id_nullable.rb new file mode 100644 index 0000000..3950ac6 --- /dev/null +++ b/lib/legion/data/migrations/116_make_tool_calls_response_id_nullable.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_tool_calls) do + drop_index :identity_principal_id, name: :idx_tool_calls_identity_principal_id, if_exists: true + set_column_allow_null :message_inference_response_id + add_index :identity_principal_id, name: :idx_tool_calls_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end + + down do + alter_table(:llm_tool_calls) do + drop_index :identity_principal_id, name: :idx_tool_calls_identity_principal_id, if_exists: true + set_column_not_null :message_inference_response_id + add_index :identity_principal_id, name: :idx_tool_calls_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil) + end + end +end diff --git a/lib/legion/data/migrations/117_add_conversation_id_to_llm_tool_calls.rb b/lib/legion/data/migrations/117_add_conversation_id_to_llm_tool_calls.rb new file mode 100644 index 0000000..38edfb1 --- /dev/null +++ b/lib/legion/data/migrations/117_add_conversation_id_to_llm_tool_calls.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_tool_calls) do + add_foreign_key :conversation_id, :llm_conversations, null: true, on_delete: :set_null, on_update: :cascade + add_index :conversation_id + end + end + + down do + alter_table(:llm_tool_calls) do + drop_column :conversation_id + # On SQLite, drop_column triggers table recreation which silently destroys + # partial indexes. Recreate the one from migration 109. + add_index :identity_principal_id, + name: :idx_tool_calls_identity_principal_id, + where: Sequel.negate(identity_principal_id: nil), + if_not_exists: true + end + end +end diff --git a/lib/legion/data/migrations/118_add_entity_type_to_audit_records.rb b/lib/legion/data/migrations/118_add_entity_type_to_audit_records.rb new file mode 100644 index 0000000..ab52ad4 --- /dev/null +++ b/lib/legion/data/migrations/118_add_entity_type_to_audit_records.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +# The Great Convergence (part 1 of 2): add entity_type column to audit_records on all adapters. +# Migration 068 added this column on PostgreSQL only. +# Production is already at 117+, so this migration only runs on SQLite/MySQL +# deployments that missed it due to the postgres-only guard in migration 068. + +Sequel.migration do + up do + next unless table_exists?(:audit_records) + + existing = schema(:audit_records).map(&:first) + next if existing.include?(:entity_type) + + alter_table(:audit_records) do + add_column :entity_type, String, size: 100, null: true + end + + add_index :audit_records, :entity_type, name: :idx_audit_records_entity_type, if_not_exists: true + end + + down do + next unless table_exists?(:audit_records) + + alter_table(:audit_records) do + drop_column :entity_type if schema(:audit_records).any? { |col, _| col == :entity_type } + end + end +end diff --git a/lib/legion/data/migrations/119_create_missing_apollo_tables.rb b/lib/legion/data/migrations/119_create_missing_apollo_tables.rb new file mode 100644 index 0000000..e62354d --- /dev/null +++ b/lib/legion/data/migrations/119_create_missing_apollo_tables.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +# The Great Convergence (part 2): create apollo_relations, apollo_expertise, +# apollo_access_log, and apollo_operations on all adapters. +# +# Migration 012 (postgres-only) created apollo_relations, apollo_expertise, +# and apollo_access_log. +# Migration 047 (postgres-only) created apollo_operations. +# These tables were never created on SQLite/MySQL deployments. + +Sequel.migration do + up do + # apollo_relations + unless table_exists?(:apollo_relations) + create_table(:apollo_relations) do + primary_key :id + String :from_entry_id, size: 36, null: false + String :to_entry_id, size: 36, null: false + String :relation_type, null: false, size: 50 + Float :weight, default: 1.0 + String :source_agent, size: 255 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + + index :from_entry_id, name: :idx_apollo_rel_from + index :to_entry_id, name: :idx_apollo_rel_to + index :relation_type, name: :idx_apollo_rel_type + index %i[from_entry_id relation_type], name: :idx_apollo_rel_composite + end + end + + # apollo_expertise + unless table_exists?(:apollo_expertise) + create_table(:apollo_expertise) do + primary_key :id + String :agent_id, null: false, size: 255, index: { name: :idx_apollo_exp_agent } + String :domain, null: false, size: 255, index: { name: :idx_apollo_exp_domain } + Float :proficiency, default: 0.0 + Integer :entry_count, default: 0 + DateTime :last_active_at, default: Sequel::CURRENT_TIMESTAMP + + index %i[agent_id domain], name: :idx_apollo_exp_composite + end + end + + # apollo_access_log + unless table_exists?(:apollo_access_log) + create_table(:apollo_access_log) do + primary_key :id + String :entry_id, size: 36, index: { name: :idx_apollo_access_entry } + String :agent_id, null: false, size: 255 + String :action, null: false, size: 20 + DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP + end + end + + # apollo_operations + unless table_exists?(:apollo_operations) + create_table(:apollo_operations) do + primary_key :id + String :operation, size: 50, null: false + String :actor, size: 255, null: false + String :target_type, size: 50 + String :target_ids, text: true # serialized array; PG uses INTEGER[] + String :summary, text: true + String :detail, text: true, default: '{}' # serialized json; PG uses JSONB + String :old_state, text: true + String :new_state, text: true + String :reason, text: true + String :principal_id, size: 255 + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :created_at, name: :idx_apollo_ops_created + index :operation, name: :idx_apollo_ops_operation + index :actor, name: :idx_apollo_ops_actor + end + end + end + + down do + drop_table :apollo_operations if table_exists?(:apollo_operations) + drop_table :apollo_access_log if table_exists?(:apollo_access_log) + drop_table :apollo_expertise if table_exists?(:apollo_expertise) + drop_table :apollo_relations if table_exists?(:apollo_relations) + end +end diff --git a/lib/legion/data/migrations/120_add_missing_apollo_indexes.rb b/lib/legion/data/migrations/120_add_missing_apollo_indexes.rb new file mode 100644 index 0000000..f36abb2 --- /dev/null +++ b/lib/legion/data/migrations/120_add_missing_apollo_indexes.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +# The Great Convergence (part 3): add missing indexes on apollo_* tables. +# +# Migration 047 (postgres-only) created dozens of indexes on apollo_entries, +# apollo_relations, apollo_expertise, apollo_operations, and +# apollo_entries_archive. These were never created on SQLite/MySQL. +# +# Vector indexes (hnsw) and GIN indexes are postgres-specific and skipped. +# +# NOTE: Uses raw CREATE INDEX IF NOT EXISTS SQL because Sequel's add_index +# inside alter_table does not honor if_not_exists on SQLite (it triggers +# table recreation which fails if the index already exists). + +Sequel.migration do + up do + next unless table_exists?(:apollo_entries) + + run 'CREATE INDEX IF NOT EXISTS idx_apollo_submitted_by ON apollo_entries (submitted_by)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_submitted_from ON apollo_entries (submitted_from)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_status ON apollo_entries (status)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_confidence ON apollo_entries (confidence)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_created ON apollo_entries (created_at)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_updated ON apollo_entries (updated_at)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_domain ON apollo_entries (knowledge_domain)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_source_agent ON apollo_entries (source_agent)' + run "CREATE UNIQUE INDEX IF NOT EXISTS idx_apollo_content_hash ON apollo_entries (content_hash) WHERE status != 'archived'" + run "CREATE INDEX IF NOT EXISTS idx_apollo_active ON apollo_entries (id) WHERE status IN ('candidate', 'confirmed', 'disputed')" + run "CREATE INDEX IF NOT EXISTS idx_apollo_decay_target ON apollo_entries (updated_at) WHERE status != 'archived'" + run "CREATE INDEX IF NOT EXISTS idx_apollo_candidates ON apollo_entries (status, source_provider, source_channel) WHERE status = 'candidate'" + + next unless table_exists?(:apollo_entries_archive) + + run 'CREATE INDEX IF NOT EXISTS idx_archive_content_hash ON apollo_entries_archive (content_hash)' + run 'CREATE INDEX IF NOT EXISTS idx_archive_source_agent ON apollo_entries_archive (source_agent)' + run 'CREATE INDEX IF NOT EXISTS idx_archive_archived_at ON apollo_entries_archive (archived_at)' + + next unless table_exists?(:apollo_relations) + + run 'CREATE INDEX IF NOT EXISTS idx_apollo_rel_from ON apollo_relations (from_entry_id)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_rel_to ON apollo_relations (to_entry_id)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_rel_type ON apollo_relations (relation_type)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_rel_composite ON apollo_relations (from_entry_id, relation_type)' + + next unless table_exists?(:apollo_expertise) + + run 'CREATE INDEX IF NOT EXISTS idx_apollo_exp_agent ON apollo_expertise (agent_id)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_exp_domain ON apollo_expertise (domain)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_exp_composite ON apollo_expertise (agent_id, domain)' + + next unless table_exists?(:apollo_operations) + + run 'CREATE INDEX IF NOT EXISTS idx_apollo_ops_created ON apollo_operations (created_at)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_ops_operation ON apollo_operations (operation)' + run 'CREATE INDEX IF NOT EXISTS idx_apollo_ops_actor ON apollo_operations (actor)' + end + + down do + %w[ + idx_apollo_submitted_by idx_apollo_submitted_from idx_apollo_status + idx_apollo_confidence idx_apollo_created idx_apollo_updated + idx_apollo_domain idx_apollo_source_agent idx_apollo_content_hash + idx_apollo_active idx_apollo_decay_target idx_apollo_candidates + idx_archive_content_hash idx_archive_source_agent idx_archive_archived_at + idx_apollo_rel_from idx_apollo_rel_to idx_apollo_rel_type + idx_apollo_rel_composite + idx_apollo_exp_agent idx_apollo_exp_domain idx_apollo_exp_composite + idx_apollo_ops_created idx_apollo_ops_operation idx_apollo_ops_actor + ].each do |name| + run "DROP INDEX IF EXISTS #{name}" + end + end +end diff --git a/lib/legion/data/migrations/121_add_cache_token_metrics.rb b/lib/legion/data/migrations/121_add_cache_token_metrics.rb new file mode 100644 index 0000000..cb797a2 --- /dev/null +++ b/lib/legion/data/migrations/121_add_cache_token_metrics.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:llm_message_inference_metrics) + + existing = schema(:llm_message_inference_metrics).map(&:first) + + alter_table(:llm_message_inference_metrics) do + add_column :cached_input_tokens, Integer, null: false, default: 0 unless existing.include?(:cached_input_tokens) + add_column :cache_creation_tokens, Integer, null: false, default: 0 unless existing.include?(:cache_creation_tokens) + end + end + + down do + next unless table_exists?(:llm_message_inference_metrics) + + alter_table(:llm_message_inference_metrics) do + drop_column :cache_creation_tokens + drop_column :cached_input_tokens + end + end +end diff --git a/lib/legion/data/migrations/122_ensure_runtime_caller_columns.rb b/lib/legion/data/migrations/122_ensure_runtime_caller_columns.rb new file mode 100644 index 0000000..b308ced --- /dev/null +++ b/lib/legion/data/migrations/122_ensure_runtime_caller_columns.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +# Migration 115 had a bug: it guarded the up block on the :definition column +# (added by migration 055) instead of :runtime_caller_class. This means on +# deployments where :definition existed but :runtime_caller_class did not, +# the columns were added correctly. But the guard was checking the wrong +# thing, and the down block has no guard at all. +# +# This migration ensures the columns exist on any deployment that might have +# skipped them due to the 115 bug. + +Sequel.migration do + up do + if table_exists?(:llm_message_inference_requests) + cols = schema(:llm_message_inference_requests).map(&:first) + unless cols.include?(:runtime_caller_class) && cols.include?(:runtime_caller_client) + alter_table(:llm_message_inference_requests) do + add_column :runtime_caller_class, String, size: 255, null: true, index: true unless cols.include?(:runtime_caller_class) + add_column :runtime_caller_client, String, size: 255, null: true unless cols.include?(:runtime_caller_client) + end + end + end + end + + down do + if table_exists?(:llm_message_inference_requests) + cols = schema(:llm_message_inference_requests).map(&:first) + if cols.include?(:runtime_caller_class) || cols.include?(:runtime_caller_client) + alter_table(:llm_message_inference_requests) do + drop_column :runtime_caller_client if cols.include?(:runtime_caller_client) + drop_column :runtime_caller_class if cols.include?(:runtime_caller_class) + end + end + end + end +end diff --git a/lib/legion/data/migrations/123_add_llm_tool_calls_audit_columns.rb b/lib/legion/data/migrations/123_add_llm_tool_calls_audit_columns.rb new file mode 100644 index 0000000..184b7d6 --- /dev/null +++ b/lib/legion/data/migrations/123_add_llm_tool_calls_audit_columns.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:llm_tool_calls) + + existing = schema(:llm_tool_calls).map(&:first) + + alter_table(:llm_tool_calls) do + add_column :tool_arguments_json, :text, null: true unless existing.include?(:tool_arguments_json) + add_column :tool_result_json, :text, null: true unless existing.include?(:tool_result_json) + add_column :tool_category, String, size: 64, null: true unless existing.include?(:tool_category) + add_column :data_handling_classification, String, size: 32, null: true unless existing.include?(:data_handling_classification) + add_column :policy_decision, String, size: 32, null: true unless existing.include?(:policy_decision) + add_column :requires_human_approval, TrueClass, null: true unless existing.include?(:requires_human_approval) + end + + add_index :llm_tool_calls, :tool_category, name: :idx_tool_calls_tool_category, if_not_exists: true + add_index :llm_tool_calls, :data_handling_classification, name: :idx_tool_calls_data_handling_classification, if_not_exists: true + add_index :llm_tool_calls, :policy_decision, name: :idx_tool_calls_policy_decision, if_not_exists: true + end + + down do + next unless table_exists?(:llm_tool_calls) + + alter_table(:llm_tool_calls) do + drop_column :requires_human_approval + drop_column :policy_decision + drop_column :data_handling_classification + drop_column :tool_category + drop_column :tool_result_json + drop_column :tool_arguments_json + end + end +end diff --git a/lib/legion/data/migrations/124_add_llm_tool_call_attempts_audit_columns.rb b/lib/legion/data/migrations/124_add_llm_tool_call_attempts_audit_columns.rb new file mode 100644 index 0000000..5ea5130 --- /dev/null +++ b/lib/legion/data/migrations/124_add_llm_tool_call_attempts_audit_columns.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:llm_tool_call_attempts) + + existing = schema(:llm_tool_call_attempts).map(&:first) + + alter_table(:llm_tool_call_attempts) do + add_column :attempt_input_json, :text, null: true unless existing.include?(:attempt_input_json) + add_column :attempt_output_json, :text, null: true unless existing.include?(:attempt_output_json) + add_column :error_details_json, :text, null: true unless existing.include?(:error_details_json) + end + end + + down do + next unless table_exists?(:llm_tool_call_attempts) + + alter_table(:llm_tool_call_attempts) do + drop_column :error_details_json + drop_column :attempt_output_json + drop_column :attempt_input_json + end + end +end diff --git a/lib/legion/data/migrations/125_add_llm_escalation_events_audit_columns.rb b/lib/legion/data/migrations/125_add_llm_escalation_events_audit_columns.rb new file mode 100644 index 0000000..90ced82 --- /dev/null +++ b/lib/legion/data/migrations/125_add_llm_escalation_events_audit_columns.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:llm_escalation_events) + + alter_table(:llm_escalation_events) do + add_column :history_json, :text, null: true + add_column :outcome, String, size: 32, null: true + add_column :total_attempts, Integer, null: true + add_index :outcome, name: :idx_escalation_events_outcome + end + end + + down do + next unless table_exists?(:llm_escalation_events) + + alter_table(:llm_escalation_events) do + drop_index :outcome, name: :idx_escalation_events_outcome + drop_column :total_attempts + drop_column :outcome + drop_column :history_json + end + end +end diff --git a/lib/legion/data/migrations/126_add_llm_message_inference_responses_audit_columns.rb b/lib/legion/data/migrations/126_add_llm_message_inference_responses_audit_columns.rb new file mode 100644 index 0000000..fd29b74 --- /dev/null +++ b/lib/legion/data/migrations/126_add_llm_message_inference_responses_audit_columns.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:llm_message_inference_responses) + + existing = schema(:llm_message_inference_responses).map(&:first) + + alter_table(:llm_message_inference_responses) do + add_column :route_attempts, Integer, null: true, default: 0 unless existing.include?(:route_attempts) + add_column :escalation_chain_ref, String, size: 128, null: true unless existing.include?(:escalation_chain_ref) + end + + add_index :llm_message_inference_responses, :escalation_chain_ref, + name: :idx_inference_responses_escalation_chain_ref, if_not_exists: true + end + + down do + next unless table_exists?(:llm_message_inference_responses) + + alter_table(:llm_message_inference_responses) do + drop_column :escalation_chain_ref + drop_column :route_attempts + end + end +end diff --git a/lib/legion/data/migrations/127_add_llm_message_inference_requests_audit_columns.rb b/lib/legion/data/migrations/127_add_llm_message_inference_requests_audit_columns.rb new file mode 100644 index 0000000..206a614 --- /dev/null +++ b/lib/legion/data/migrations/127_add_llm_message_inference_requests_audit_columns.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:llm_message_inference_requests) + + existing = schema(:llm_message_inference_requests).map(&:first) + next if existing.include?(:parent_request_id) + + alter_table(:llm_message_inference_requests) do + add_foreign_key :parent_request_id, :llm_message_inference_requests, null: true, on_delete: :set_null + end + end + + down do + next unless table_exists?(:llm_message_inference_requests) + + alter_table(:llm_message_inference_requests) do + drop_foreign_key :parent_request_id + end + end +end diff --git a/lib/legion/data/migrations/128_add_identity_columns_to_shared_tables.rb b/lib/legion/data/migrations/128_add_identity_columns_to_shared_tables.rb new file mode 100644 index 0000000..666f66c --- /dev/null +++ b/lib/legion/data/migrations/128_add_identity_columns_to_shared_tables.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + %i[apollo_access_log memory_traces memory_associations audit_log audit_records].each do |table| + next unless table_exists?(table) + + cols = schema(table).map(&:first) + alter_table(table) do + add_column :identity_principal_id, Integer, null: true unless cols.include?(:identity_principal_id) + add_column :identity_id, Integer, null: true unless cols.include?(:identity_id) + add_column :identity_canonical_name, String, size: 255, null: true unless cols.include?(:identity_canonical_name) + end + end + end + + down do + %i[apollo_access_log memory_traces memory_associations audit_log audit_records].each do |table| + next unless table_exists?(table) + + cols = schema(table).map(&:first) + alter_table(table) do + drop_column :identity_canonical_name if cols.include?(:identity_canonical_name) + drop_column :identity_id if cols.include?(:identity_id) + drop_column :identity_principal_id if cols.include?(:identity_principal_id) + end + end + end +end diff --git a/lib/legion/data/migrations/129_create_llm_skill_events.rb b/lib/legion/data/migrations/129_create_llm_skill_events.rb new file mode 100644 index 0000000..109b034 --- /dev/null +++ b/lib/legion/data/migrations/129_create_llm_skill_events.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next if table_exists?(:llm_skill_events) + + create_table(:llm_skill_events) do + primary_key :id + + String :uuid, null: false, unique: true, size: 36 + Integer :conversation_id, index: true + String :request_ref, index: true + String :skill_name, null: false, index: true + String :skill_version + String :trigger + String :status, null: false, default: 'completed' + Integer :duration_ms, default: 0 + String :identity_canonical_name, index: true + Integer :identity_principal_id + Integer :identity_id + Integer :schema_version, null: false, default: 15 + DateTime :recorded_at, null: false, index: true + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP, index: true + end + end + + down do + drop_table(:llm_skill_events) if table_exists?(:llm_skill_events) + end +end diff --git a/lib/legion/data/migrations/130_add_llm_conversations_compliance_columns.rb b/lib/legion/data/migrations/130_add_llm_conversations_compliance_columns.rb new file mode 100644 index 0000000..a3cf7be --- /dev/null +++ b/lib/legion/data/migrations/130_add_llm_conversations_compliance_columns.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + next unless table_exists?(:llm_conversations) + + existing = schema(:llm_conversations).map(&:first) + + alter_table(:llm_conversations) do + add_column :pii_types_json, :text, null: true unless existing.include?(:pii_types_json) + add_column :jurisdictions_json, :text, null: true unless existing.include?(:jurisdictions_json) + end + end + + down do + next unless table_exists?(:llm_conversations) + + alter_table(:llm_conversations) do + drop_column :jurisdictions_json + drop_column :pii_types_json + end + end +end diff --git a/lib/legion/data/migrations/131_add_llm_tool_calls_schema_version.rb b/lib/legion/data/migrations/131_add_llm_tool_calls_schema_version.rb new file mode 100644 index 0000000..5395703 --- /dev/null +++ b/lib/legion/data/migrations/131_add_llm_tool_calls_schema_version.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + existing = schema(:llm_tool_calls).map(&:first) + next if existing.include?(:schema_version) + + alter_table(:llm_tool_calls) do + add_column :schema_version, Integer, null: false, default: 15 + end + end + + down do + alter_table(:llm_tool_calls) do + drop_column :schema_version + end + end +end diff --git a/lib/legion/data/migrations/132_drop_schema_version_from_llm_tool_calls.rb b/lib/legion/data/migrations/132_drop_schema_version_from_llm_tool_calls.rb new file mode 100644 index 0000000..92c62ad --- /dev/null +++ b/lib/legion/data/migrations/132_drop_schema_version_from_llm_tool_calls.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + existing = schema(:llm_tool_calls).map(&:first) + next unless existing.include?(:schema_version) + + alter_table(:llm_tool_calls) do + drop_column :schema_version + end + end + + down do + alter_table(:llm_tool_calls) do + add_column :schema_version, Integer, null: false, default: 15 + end + end +end diff --git a/lib/legion/data/migrations/133_allow_null_context_tokens.rb b/lib/legion/data/migrations/133_allow_null_context_tokens.rb new file mode 100644 index 0000000..bbaf380 --- /dev/null +++ b/lib/legion/data/migrations/133_allow_null_context_tokens.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_message_inference_requests) do + set_column_allow_null :context_tokens + end + end + + down do + alter_table(:llm_message_inference_requests) do + set_column_not_null :context_tokens + end + end +end diff --git a/lib/legion/data/migrations/134_add_route_attempt_columns.rb b/lib/legion/data/migrations/134_add_route_attempt_columns.rb new file mode 100644 index 0000000..3f0d97f --- /dev/null +++ b/lib/legion/data/migrations/134_add_route_attempt_columns.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_route_attempts) do + add_column :operation, String, size: 64, null: true + add_column :dispatch_path, String, size: 32, null: true + add_column :idempotency_key, String, size: 128, null: true + add_index :operation, name: :idx_route_attempts_operation + add_index :idempotency_key, name: :idx_route_attempts_idempotency_key + end + end + + down do + alter_table(:llm_route_attempts) do + drop_index :operation, name: :idx_route_attempts_operation + drop_index :idempotency_key, name: :idx_route_attempts_idempotency_key + drop_column :operation + drop_column :dispatch_path + drop_column :idempotency_key + end + end +end diff --git a/lib/legion/data/migrations/135_add_llm_context_token_accounting.rb b/lib/legion/data/migrations/135_add_llm_context_token_accounting.rb new file mode 100644 index 0000000..6637a40 --- /dev/null +++ b/lib/legion/data/migrations/135_add_llm_context_token_accounting.rb @@ -0,0 +1,89 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:llm_message_inference_metrics) do + add_column :request_message_estimated_tokens, Integer, null: false, default: 0 + add_column :loaded_history_estimated_tokens, Integer, null: false, default: 0 + add_column :curated_history_estimated_tokens, Integer, null: false, default: 0 + add_column :curation_saved_estimated_tokens, Integer, null: false, default: 0 + add_column :stripped_thinking_estimated_tokens, Integer, null: false, default: 0 + add_column :archived_history_estimated_tokens, Integer, null: false, default: 0 + add_column :archive_saved_estimated_tokens, Integer, null: false, default: 0 + add_column :context_window_saved_estimated_tokens, Integer, null: false, default: 0 + add_column :rag_injected_estimated_tokens, Integer, null: false, default: 0 + add_column :system_prompt_estimated_tokens, Integer, null: false, default: 0 + add_column :baseline_system_estimated_tokens, Integer, null: false, default: 0 + add_column :tool_definition_estimated_tokens, Integer, null: false, default: 0 + add_column :final_context_estimated_tokens, Integer, null: false, default: 0 + add_column :loaded_history_message_count, Integer, null: false, default: 0 + add_column :curated_history_message_count, Integer, null: false, default: 0 + add_column :archived_history_message_count, Integer, null: false, default: 0 + add_column :stripped_thinking_message_count, Integer, null: false, default: 0 + add_column :context_window_message_count_before, Integer, null: false, default: 0 + add_column :context_window_message_count_after, Integer, null: false, default: 0 + add_column :rag_entry_count, Integer, null: false, default: 0 + add_column :tool_definition_count, Integer, null: false, default: 0 + add_column :context_accounting_status, String, size: 64, null: false, default: 'missing' + add_column :context_accounting_json, String, text: true + end + + create_table(:llm_context_accounting_events) do + primary_key :id + String :uuid, size: 36, null: false, unique: true + foreign_key :message_inference_request_id, :llm_message_inference_requests, null: false, on_delete: :cascade + foreign_key :message_inference_response_id, :llm_message_inference_responses, null: true, on_delete: :set_null + foreign_key :message_inference_metric_id, :llm_message_inference_metrics, null: true, on_delete: :set_null + String :conversation_ref, size: 128 + String :request_ref, size: 128, null: false + String :event_type, size: 64, null: false + String :component, size: 64, null: false + Integer :estimated_tokens_before, null: false, default: 0 + Integer :estimated_tokens_after, null: false, default: 0 + Integer :estimated_tokens_delta, null: false, default: 0 + Integer :message_count_before, null: false, default: 0 + Integer :message_count_after, null: false, default: 0 + String :metadata_json, text: true + DateTime :recorded_at + DateTime :inserted_at, null: false, default: Sequel::CURRENT_TIMESTAMP + + index :message_inference_request_id + index :message_inference_response_id + index :message_inference_metric_id + index :request_ref + index :conversation_ref + index %i[event_type component] + index :recorded_at + end + end + + down do + drop_table(:llm_context_accounting_events) + + alter_table(:llm_message_inference_metrics) do + drop_column :context_accounting_json + drop_column :context_accounting_status + drop_column :tool_definition_count + drop_column :rag_entry_count + drop_column :context_window_message_count_after + drop_column :context_window_message_count_before + drop_column :stripped_thinking_message_count + drop_column :archived_history_message_count + drop_column :curated_history_message_count + drop_column :loaded_history_message_count + drop_column :final_context_estimated_tokens + drop_column :tool_definition_estimated_tokens + drop_column :baseline_system_estimated_tokens + drop_column :system_prompt_estimated_tokens + drop_column :rag_injected_estimated_tokens + drop_column :context_window_saved_estimated_tokens + drop_column :archive_saved_estimated_tokens + drop_column :archived_history_estimated_tokens + drop_column :stripped_thinking_estimated_tokens + drop_column :curation_saved_estimated_tokens + drop_column :curated_history_estimated_tokens + drop_column :loaded_history_estimated_tokens + drop_column :request_message_estimated_tokens + end + end +end diff --git a/lib/legion/data/model.rb b/lib/legion/data/model.rb index 8cf742c..1bc3dc6 100755 --- a/lib/legion/data/model.rb +++ b/lib/legion/data/model.rb @@ -1,15 +1,34 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + module Legion module Data module Models class << self + include Legion::Logging::Helper + attr_reader :loaded_models def models - %w[extension function task runner node setting] + %w[extension function relationship chain task runner node setting digital_worker + apollo_entry apollo_relation apollo_expertise apollo_access_log audit_log + audit_record extract_step_timing + identity/identity identity/principal identity/providers identity/group + identity/group_memberships identity/audit_log + apollo/entries apollo/relation apollo/access_log apollo/expertise + apollo/operation + rbac/role_assignments rbac/runner_grants rbac/cross_team_grants + llm/conversation llm/message llm/message_inference_request + llm/message_inference_response llm/route_attempt + llm/message_inference_metric llm/context_accounting_event + llm/tool_call llm/tool_call_attempt + llm/conversation_compaction llm/policy_evaluation + llm/security_event llm/registry_event] end def load - Legion::Logging.info 'Loading Legion::Data::Models' + log.info 'Loading Legion::Data::Models' @loaded_models ||= [] require_sequel_models(models) Legion::Settings[:data][:models][:loaded] = true @@ -21,14 +40,14 @@ def require_sequel_models(files = models) end def load_sequel_model(model) - Legion::Logging.debug("Trying to load #{model}.rb") + log.debug("Trying to load #{model}.rb") require_relative "models/#{model}" @loaded_models << model - Legion::Logging.debug("Successfully loaded #{model}") + log.debug("Successfully loaded #{model}") model rescue LoadError => e - Legion::Logging.fatal("Failed to load #{model}") - raise e unless Legion::Settings[:data][:models][:continue_on_fail] + handle_exception(e, level: :fatal, operation: :load_sequel_model, model: model) + raise e unless Legion::Settings[:data][:models][:continue_on_load_fail] end end end diff --git a/lib/legion/data/models/apollo/access_log.rb b/lib/legion/data/models/apollo/access_log.rb new file mode 100644 index 0000000..213fd7a --- /dev/null +++ b/lib/legion/data/models/apollo/access_log.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +return unless Legion::Data::Model::Apollo::ModelHelpers.table_available?(:apollo_access_log) + +module Legion + module Data + module Model + module Apollo + class AccessLog < Sequel::Model(:apollo_access_log) + many_to_one :entry, class: 'Legion::Data::Model::Apollo::Entry', key: :entry_id + end + end + end + end +end diff --git a/lib/legion/data/models/apollo/entries.rb b/lib/legion/data/models/apollo/entries.rb new file mode 100644 index 0000000..c5e8b8b --- /dev/null +++ b/lib/legion/data/models/apollo/entries.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +return unless Legion::Data::Model::Apollo::ModelHelpers.table_available?(:apollo_entries) + +module Legion + module Data + module Model + module Apollo + class Entry < Sequel::Model(:apollo_entries) + one_to_many :outgoing_relations, class: 'Legion::Data::Model::Apollo::Relation', + key: :from_entry_id + one_to_many :incoming_relations, class: 'Legion::Data::Model::Apollo::Relation', + key: :to_entry_id + one_to_many :access_logs, class: 'Legion::Data::Model::Apollo::AccessLog', + key: :entry_id + end + end + end + end +end diff --git a/lib/legion/data/models/apollo/expertise.rb b/lib/legion/data/models/apollo/expertise.rb new file mode 100644 index 0000000..fae81b8 --- /dev/null +++ b/lib/legion/data/models/apollo/expertise.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +return unless Legion::Data::Model::Apollo::ModelHelpers.table_available?(:apollo_expertise) + +module Legion + module Data + module Model + module Apollo + class Expertise < Sequel::Model(:apollo_expertise) + end + end + end + end +end diff --git a/lib/legion/data/models/apollo/model_helpers.rb b/lib/legion/data/models/apollo/model_helpers.rb new file mode 100644 index 0000000..ee7206a --- /dev/null +++ b/lib/legion/data/models/apollo/model_helpers.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Legion + module Data + module Model + module Apollo + module ModelHelpers + def self.table_available?(table_name) + Legion::Data::Connection.sequel&.table_exists?(table_name) + rescue StandardError => e + log.error("table availability check failed for #{table_name}: #{e.message}") + false + end + end + end + end + end +end diff --git a/lib/legion/data/models/apollo/operation.rb b/lib/legion/data/models/apollo/operation.rb new file mode 100644 index 0000000..c3feca8 --- /dev/null +++ b/lib/legion/data/models/apollo/operation.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +return unless Legion::Data::Model::Apollo::ModelHelpers.table_available?(:apollo_operations) + +module Legion + module Data + module Model + module Apollo + class Operation < Sequel::Model(:apollo_operations) + end + end + end + end +end diff --git a/lib/legion/data/models/apollo/relation.rb b/lib/legion/data/models/apollo/relation.rb new file mode 100644 index 0000000..09ceba2 --- /dev/null +++ b/lib/legion/data/models/apollo/relation.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +return unless Legion::Data::Model::Apollo::ModelHelpers.table_available?(:apollo_relations) + +module Legion + module Data + module Model + module Apollo + class Relation < Sequel::Model(:apollo_relations) + many_to_one :from_entry, class: 'Legion::Data::Model::Apollo::Entry', key: :from_entry_id + many_to_one :to_entry, class: 'Legion::Data::Model::Apollo::Entry', key: :to_entry_id + end + end + end + end +end diff --git a/lib/legion/data/models/apollo_access_log.rb b/lib/legion/data/models/apollo_access_log.rb new file mode 100644 index 0000000..ebc1d02 --- /dev/null +++ b/lib/legion/data/models/apollo_access_log.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +return unless Legion::Data::Connection.adapter == :postgres + +module Legion + module Data + module Model + class ApolloAccessLog < Sequel::Model(:apollo_access_log) + many_to_one :entry, class: 'Legion::Data::Model::ApolloEntry', key: :entry_id + end + end + end +end diff --git a/lib/legion/data/models/apollo_entry.rb b/lib/legion/data/models/apollo_entry.rb new file mode 100644 index 0000000..42bcacc --- /dev/null +++ b/lib/legion/data/models/apollo_entry.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +return unless Legion::Data::Connection.adapter == :postgres + +module Legion + module Data + module Model + class ApolloEntry < Sequel::Model(:apollo_entries) + one_to_many :outgoing_relations, class: 'Legion::Data::Model::ApolloRelation', + key: :from_entry_id + one_to_many :incoming_relations, class: 'Legion::Data::Model::ApolloRelation', + key: :to_entry_id + one_to_many :access_logs, class: 'Legion::Data::Model::ApolloAccessLog', + key: :entry_id + end + end + end +end diff --git a/lib/legion/data/models/apollo_expertise.rb b/lib/legion/data/models/apollo_expertise.rb new file mode 100644 index 0000000..40a8f82 --- /dev/null +++ b/lib/legion/data/models/apollo_expertise.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +return unless Legion::Data::Connection.adapter == :postgres + +module Legion + module Data + module Model + class ApolloExpertise < Sequel::Model(:apollo_expertise) + end + end + end +end diff --git a/lib/legion/data/models/apollo_relation.rb b/lib/legion/data/models/apollo_relation.rb new file mode 100644 index 0000000..35c0a6e --- /dev/null +++ b/lib/legion/data/models/apollo_relation.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +return unless Legion::Data::Connection.adapter == :postgres + +module Legion + module Data + module Model + class ApolloRelation < Sequel::Model(:apollo_relations) + many_to_one :from_entry, class: 'Legion::Data::Model::ApolloEntry', key: :from_entry_id + many_to_one :to_entry, class: 'Legion::Data::Model::ApolloEntry', key: :to_entry_id + end + end + end +end diff --git a/lib/legion/data/models/audit_log.rb b/lib/legion/data/models/audit_log.rb new file mode 100644 index 0000000..f1f58f4 --- /dev/null +++ b/lib/legion/data/models/audit_log.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' +require 'legion/data/audit_log_hash_chain' + +module Legion + module Data + module Model + class AuditLog < Sequel::Model(:audit_log) + include Legion::Logging::Helper + + VALID_EVENT_TYPES = %w[runner_execution lifecycle_transition].freeze + VALID_STATUSES = %w[success failure denied].freeze + + def validate + super + errors.add(:event_type, 'invalid') unless VALID_EVENT_TYPES.include?(event_type) + errors.add(:status, 'invalid') unless VALID_STATUSES.include?(status) + end + + def parsed_detail + return nil unless detail + + Legion::JSON.load(detail) + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :parsed_detail, id: self[:id]) + nil + end + + def before_update + raise 'audit_log records are immutable and cannot be updated' + end + + def before_destroy + raise 'audit_log records are immutable and cannot be deleted' + end + + def self.compute_hash(record) + Legion::Data::AuditLogHashChain.compute_hash(record) + end + + def self.verify_chain(records = order(:created_at, :id).all) + Legion::Data::AuditLogHashChain.verify(records) + end + end + end + end +end diff --git a/lib/legion/data/models/audit_record.rb b/lib/legion/data/models/audit_record.rb new file mode 100644 index 0000000..39f0c14 --- /dev/null +++ b/lib/legion/data/models/audit_record.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + +module Legion + module Data + module Model + class AuditRecord < Sequel::Model(:audit_records) + include Legion::Logging::Helper + + # Enforce append-only semantics at the application layer. + # PostgreSQL enforces this at the DB layer via rules (migration 058); + # the application guard covers SQLite and MySQL. + + def before_update + raise 'audit_records are immutable and cannot be updated' + end + + def before_destroy + raise 'audit_records are immutable and cannot be deleted' + end + + def parsed_metadata + return {} unless metadata + + Legion::JSON.load(metadata) + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :parsed_metadata, id: self[:id]) + {} + end + end + end + end +end diff --git a/lib/legion/data/models/chain.rb b/lib/legion/data/models/chain.rb new file mode 100644 index 0000000..9f577fc --- /dev/null +++ b/lib/legion/data/models/chain.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Legion + module Data + module Model + class Chain < Sequel::Model + one_to_many :relationships, key: :chain_id + end + end + end +end diff --git a/lib/legion/data/models/digital_worker.rb b/lib/legion/data/models/digital_worker.rb new file mode 100644 index 0000000..43f39e3 --- /dev/null +++ b/lib/legion/data/models/digital_worker.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Legion + module Data + module Model + class DigitalWorker < Sequel::Model + one_to_many :tasks, key: :worker_id, primary_key: :worker_id + + LIFECYCLE_STATES = %w[bootstrap active paused retired terminated].freeze + CONSENT_TIERS = %w[supervised consult notify autonomous].freeze + RISK_TIERS = %w[low medium high critical].freeze + HEALTH_STATUSES = %w[online offline unknown].freeze + + def validate + super + errors.add(:lifecycle_state, 'invalid') unless LIFECYCLE_STATES.include?(lifecycle_state) + errors.add(:consent_tier, 'invalid') unless CONSENT_TIERS.include?(consent_tier) + errors.add(:risk_tier, 'invalid') if risk_tier && !RISK_TIERS.include?(risk_tier) + errors.add(:health_status, 'invalid') if health_status && !HEALTH_STATUSES.include?(health_status) + end + + def active? + lifecycle_state == 'active' + end + + def terminated? + lifecycle_state == 'terminated' + end + + def paused? + lifecycle_state == 'paused' + end + + def online? + health_status == 'online' + end + + def offline? + health_status == 'offline' + end + end + end + end +end diff --git a/lib/legion/data/models/extract_step_timing.rb b/lib/legion/data/models/extract_step_timing.rb new file mode 100644 index 0000000..d906fb3 --- /dev/null +++ b/lib/legion/data/models/extract_step_timing.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +module Legion + module Data + module Model + class ExtractStepTiming < Sequel::Model(:extract_step_timings) + end + end + end +end diff --git a/lib/legion/data/models/function.rb b/lib/legion/data/models/function.rb index c8a7e4d..da35b62 100755 --- a/lib/legion/data/models/function.rb +++ b/lib/legion/data/models/function.rb @@ -1,12 +1,30 @@ # frozen_string_literal: true +require 'legion/logging/helper' + module Legion module Data module Model class Function < Sequel::Model + include Legion::Logging::Helper + many_to_one :runner - # one_to_many :trigger_relationships, class: 'Legion::Data::Model::Relationship', key: :trigger_id - # one_to_many :action_relationships, class: 'Legion::Data::Model::Relationship', key: :action_id + one_to_many :trigger_relationships, class: 'Legion::Data::Model::Relationship', key: :trigger_id + one_to_many :action_relationships, class: 'Legion::Data::Model::Relationship', key: :action_id + one_to_many :tasks + + def embedding_vector + return nil unless embedding + + ::JSON.parse(embedding) + rescue ::JSON::ParserError => e + handle_exception(e, level: :debug, handled: true, operation: :embedding_vector, id: self[:id]) + nil + end + + def embedding_vector=(vec) + self.embedding = vec&.to_json + end end end end diff --git a/lib/legion/data/models/identity/audit_log.rb b/lib/legion/data/models/identity/audit_log.rb new file mode 100644 index 0000000..2b70379 --- /dev/null +++ b/lib/legion/data/models/identity/audit_log.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Model + class Identity + class AuditLog < Sequel::Model(:identity_audit_log) + include ModelHelpers + + many_to_one :principal, class: 'Legion::Data::Model::Identity::Principal' + many_to_one :identity, class: 'Legion::Data::Model::Identity::Identity' + end + end + end + end +end diff --git a/lib/legion/data/models/identity/group.rb b/lib/legion/data/models/identity/group.rb new file mode 100644 index 0000000..e298670 --- /dev/null +++ b/lib/legion/data/models/identity/group.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Model + class Identity + class Group < Sequel::Model(:identity_groups) + include ModelHelpers + + one_to_many :memberships, class: 'Legion::Data::Model::Identity::GroupMembership', key: :group_id + many_to_many :principals, + class: 'Legion::Data::Model::Identity::Principal', + join_table: :identity_group_memberships, + left_key: :group_id, + right_key: :principal_id + + def self.lookup_columns + %i[id uuid name] + end + end + end + end + end +end diff --git a/lib/legion/data/models/identity/group_memberships.rb b/lib/legion/data/models/identity/group_memberships.rb new file mode 100644 index 0000000..b87778c --- /dev/null +++ b/lib/legion/data/models/identity/group_memberships.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Model + class Identity + class GroupMembership < Sequel::Model(:identity_group_memberships) + include ModelHelpers + + many_to_one :principal, class: 'Legion::Data::Model::Identity::Principal' + many_to_one :group, class: 'Legion::Data::Model::Identity::Group' + + def expired? + status == 'expired' || (expires_at && Time.now >= expires_at) + end + + def stale? + status == 'stale' + end + end + end + end + end +end diff --git a/lib/legion/data/models/identity/identity.rb b/lib/legion/data/models/identity/identity.rb new file mode 100644 index 0000000..7b37cb2 --- /dev/null +++ b/lib/legion/data/models/identity/identity.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Model + class Identity + class Identity < Sequel::Model(:identities) + include ModelHelpers + + many_to_one :principal, class: 'Legion::Data::Model::Identity::Principal' + many_to_one :provider, class: 'Legion::Data::Model::Identity::Provider', key: :provider_id + + def self.lookup_columns + %i[id uuid provider_identity_key] + end + end + end + end + end +end diff --git a/lib/legion/data/models/identity/model_helpers.rb b/lib/legion/data/models/identity/model_helpers.rb new file mode 100644 index 0000000..8e677b4 --- /dev/null +++ b/lib/legion/data/models/identity/model_helpers.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require 'securerandom' + +module Legion + module Data + module Model + class Identity + module ModelHelpers + def self.included(model) + model.extend(ClassMethods) + end + + module ClassMethods + def lookup(value) + lookup_by_columns(value, lookup_columns) + end + + def lookup_by_columns(value, lookup_columns) + normalized = normalize_lookup_value(value) + return if normalized.nil? + + lookup_columns.each do |column| + next unless columns.include?(column) + + query_value = lookup_query_value(column, normalized) + next if query_value == :skip + + record = where(column => query_value).first + return record if record + end + + nil + end + + private + + def lookup_columns + %i[id uuid name] + end + + def normalize_lookup_value(value) + normalized = value.is_a?(String) ? value.strip : value + return if normalized.respond_to?(:empty?) && normalized.empty? + + normalized + end + + def lookup_query_value(column, value) + case column + when :id + return value.to_i if integer_lookup_value?(value) + return value.to_s if uuid_lookup_value?(value) && !columns.include?(:uuid) + + :skip + when :uuid + uuid_lookup_value?(value) ? value.to_s : :skip + else + value.to_s + end + end + + def integer_lookup_value?(value) + value.is_a?(Integer) || value.to_s.match?(/\A\d+\z/) + end + + def uuid_lookup_value?(value) + value.to_s.match?(/\A[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\z/i) + end + end + + def before_create + self[:uuid] ||= SecureRandom.uuid if self.class.columns.include?(:uuid) + super + end + end + end + end + end +end diff --git a/lib/legion/data/models/identity/principal.rb b/lib/legion/data/models/identity/principal.rb new file mode 100644 index 0000000..a58680c --- /dev/null +++ b/lib/legion/data/models/identity/principal.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Model + class Identity + class Principal < Sequel::Model(:identity_principals) + include ModelHelpers + + one_to_many :identities, class: 'Legion::Data::Model::Identity::Identity' + one_to_many :group_memberships, class: 'Legion::Data::Model::Identity::GroupMembership' + many_to_many :groups, + class: 'Legion::Data::Model::Identity::Group', + join_table: :identity_group_memberships, + left_key: :principal_id, + right_key: :group_id + + def self.lookup_columns + %i[id uuid canonical_name employee_key] + end + + def active_groups + group_memberships_dataset + .where(status: 'active') + .eager(:group) + .all + .map(&:group) + end + end + end + end + end +end diff --git a/lib/legion/data/models/identity/providers.rb b/lib/legion/data/models/identity/providers.rb new file mode 100644 index 0000000..fc0aca8 --- /dev/null +++ b/lib/legion/data/models/identity/providers.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Model + class Identity + class Provider < Sequel::Model(:identity_providers) + include ModelHelpers + + one_to_many :identities, class: 'Legion::Data::Model::Identity::Identity', key: :provider_id + one_to_many :capabilities, + class: 'Legion::Data::Model::Identity::ProviderCapability', + key: :provider_id + + def self.lookup_columns + %i[id uuid name] + end + + def parsed_capabilities + capabilities_dataset.select_map(:capability_key) + end + end + + class ProviderCapability < Sequel::Model(:identity_provider_capabilities) + many_to_one :provider, class: 'Legion::Data::Model::Identity::Provider' + end + end + end + end +end diff --git a/lib/legion/data/models/llm/context_accounting_event.rb b/lib/legion/data/models/llm/context_accounting_event.rb new file mode 100644 index 0000000..9d01573 --- /dev/null +++ b/lib/legion/data/models/llm/context_accounting_event.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class ContextAccountingEvent < Sequel::Model(:llm_context_accounting_events) + include ModelHelpers + + many_to_one :message_inference_request + many_to_one :message_inference_response + many_to_one :message_inference_metric + end + end + end + end +end diff --git a/lib/legion/data/models/llm/conversation.rb b/lib/legion/data/models/llm/conversation.rb new file mode 100644 index 0000000..4d81368 --- /dev/null +++ b/lib/legion/data/models/llm/conversation.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class Conversation < Sequel::Model(:llm_conversations) + include ModelHelpers + + one_to_many :messages + one_to_many :message_inference_requests + one_to_many :conversation_compactions + one_to_many :policy_evaluations + one_to_many :security_events + + def security_incident_lineage + SecurityEvent.lineage_for_conversation(self) + end + end + end + end + end +end diff --git a/lib/legion/data/models/llm/conversation_compaction.rb b/lib/legion/data/models/llm/conversation_compaction.rb new file mode 100644 index 0000000..5d8f552 --- /dev/null +++ b/lib/legion/data/models/llm/conversation_compaction.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class ConversationCompaction < Sequel::Model(:llm_conversation_compactions) + include ModelHelpers + + many_to_one :conversation + many_to_one :triggered_by_message_inference_request, + class: 'Legion::Data::Models::LLM::MessageInferenceRequest', + key: :triggered_by_message_inference_request_id + many_to_one :replaces_message_from, class: 'Legion::Data::Models::LLM::Message', key: :replaces_message_from_id + many_to_one :replaces_message_to, class: 'Legion::Data::Models::LLM::Message', key: :replaces_message_to_id + end + end + end + end +end diff --git a/lib/legion/data/models/llm/message.rb b/lib/legion/data/models/llm/message.rb new file mode 100644 index 0000000..2678434 --- /dev/null +++ b/lib/legion/data/models/llm/message.rb @@ -0,0 +1,105 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class Message < Sequel::Model(:llm_messages) + include ModelHelpers + + many_to_one :conversation + many_to_one :parent_message, class: 'Legion::Data::Models::LLM::Message', key: :parent_message_id + many_to_one :message_inference_request + many_to_one :message_inference_response + many_to_one :tool_call + + one_to_many :child_messages, class: 'Legion::Data::Models::LLM::Message', key: :parent_message_id + one_to_many :triggered_message_inference_requests, + class: 'Legion::Data::Models::LLM::MessageInferenceRequest', + key: :latest_message_id + one_to_many :message_inference_responses, + class: 'Legion::Data::Models::LLM::MessageInferenceResponse', + key: :response_message_id + one_to_many :requested_tool_calls, class: 'Legion::Data::Models::LLM::ToolCall', + key: :requested_by_message_id + one_to_many :result_tool_calls, class: 'Legion::Data::Models::LLM::ToolCall', + key: :result_message_id + one_to_many :compactions_from, class: 'Legion::Data::Models::LLM::ConversationCompaction', + key: :replaces_message_from_id + one_to_many :compactions_to, class: 'Legion::Data::Models::LLM::ConversationCompaction', + key: :replaces_message_to_id + + class << self + def incident_flow_from(message_or_id) + message = message_or_id.is_a?(self) ? message_or_id : self[message_or_id] + message&.incident_flow + end + end + + def incident_flow + requests = incident_flow_requests + responses = incident_flow_responses(requests) + route_attempts = RouteAttempt.where(message_inference_request_id: requests.map(&:id)) + .order(:message_inference_request_id, :attempt_no, :id) + .all + tool_calls = incident_flow_tool_calls(responses) + tool_call_attempts = ToolCallAttempt.where(tool_call_id: tool_calls.map(&:id)) + .order(:tool_call_id, :attempt_no, :id) + .all + + { + message: self, + conversation: conversation, + requests: requests, + route_attempts: route_attempts, + responses: responses, + response_messages: responses.filter_map(&:response_message), + tool_calls: tool_calls, + tool_call_attempts: tool_call_attempts, + result_messages: incident_flow_result_messages(responses, tool_calls) + } + end + + private + + def incident_flow_requests + request_ids = [] + request_ids << message_inference_request_id if message_inference_request_id + request_ids.concat(MessageInferenceRequest.where(latest_message_id: id).select_map(:id)) + if message_inference_response_id && (linked_response = MessageInferenceResponse[message_inference_response_id]) + request_ids << linked_response.message_inference_request_id + end + if tool_call_id && (linked_tool_call = ToolCall[tool_call_id]) + request_ids << linked_tool_call.message_inference_response.message_inference_request_id + end + + MessageInferenceRequest.where(id: request_ids.uniq).order(:id).all + end + + def incident_flow_responses(requests) + request_ids = requests.map(&:id) + response_scope = MessageInferenceResponse.where(message_inference_request_id: request_ids) + response_scope = response_scope.or(id: message_inference_response_id) if message_inference_response_id + response_scope.order(:id).all + end + + def incident_flow_tool_calls(responses) + response_ids = responses.map(&:id) + scope = ToolCall.where(message_inference_response_id: response_ids) + scope = scope.or(requested_by_message_id: id).or(result_message_id: id) + scope.order(:message_inference_response_id, :tool_call_index, :id).all + end + + def incident_flow_result_messages(responses, tool_calls) + message_ids = responses.filter_map(&:response_message_id) + tool_calls.filter_map(&:result_message_id) + scope = Message.where(id: message_ids.uniq) + scope = scope.or(tool_call_id: tool_calls.map(&:id)) unless tool_calls.empty? + scope.order(:seq, :id).all + end + end + end + end + end +end diff --git a/lib/legion/data/models/llm/message_inference_metric.rb b/lib/legion/data/models/llm/message_inference_metric.rb new file mode 100644 index 0000000..851425d --- /dev/null +++ b/lib/legion/data/models/llm/message_inference_metric.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class MessageInferenceMetric < Sequel::Model(:llm_message_inference_metrics) + include ModelHelpers + + many_to_one :message_inference_request + many_to_one :message_inference_response + one_to_many :context_accounting_events + + class << self + def finance_usage_by_cost_center_model_day(cost_center: nil, model_key: nil, from: nil, to: nil) + usage_day = Sequel.function(:date, :recorded_at) + scope = dataset + scope = scope.where(cost_center: cost_center) unless cost_center.nil? + scope = scope.where(model_key: model_key) unless model_key.nil? + scope = scope.where { recorded_at >= from } unless from.nil? + scope = scope.where { recorded_at < to } unless to.nil? + + scope + .select( + :cost_center, + :model_key, + usage_day.as(:usage_day), + Sequel.function(:sum, :input_tokens).as(:input_tokens), + Sequel.function(:sum, :output_tokens).as(:output_tokens), + Sequel.function(:sum, :thinking_tokens).as(:thinking_tokens), + Sequel.function(:sum, :total_tokens).as(:total_tokens), + Sequel.function(:sum, :cost_usd).as(:cost_usd), + Sequel.function(:sum, :latency_ms).as(:latency_ms), + Sequel.function(:sum, :wall_clock_ms).as(:wall_clock_ms) + ) + .group(:cost_center, :model_key, usage_day) + .order(:cost_center, :model_key, usage_day) + .map(&:values) + end + end + end + end + end + end +end diff --git a/lib/legion/data/models/llm/message_inference_request.rb b/lib/legion/data/models/llm/message_inference_request.rb new file mode 100644 index 0000000..192d497 --- /dev/null +++ b/lib/legion/data/models/llm/message_inference_request.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class MessageInferenceRequest < Sequel::Model(:llm_message_inference_requests) + include ModelHelpers + + many_to_one :conversation + many_to_one :latest_message, class: 'Legion::Data::Models::LLM::Message', key: :latest_message_id + one_to_many :message_inference_responses + one_to_many :route_attempts + one_to_many :message_inference_metrics + one_to_many :conversation_compactions, key: :triggered_by_message_inference_request_id + one_to_many :policy_evaluations + one_to_many :security_events + + class << self + def lookup(reference) + return reference if reference.is_a?(self) + + value = reference.to_s + scope = where(uuid: value).or(request_ref: value) + scope = scope.or(id: value.to_i) if value.match?(/\A\d+\z/) + scope.first + end + + def audit_lineage_for(reference) + lookup(reference)&.audit_lineage + end + end + + def audit_lineage + responses = message_inference_responses_dataset.order(:id).all + response_ids = responses.map(&:id) + tool_calls = ToolCall.where(message_inference_response_id: response_ids).order(:tool_call_index, :id).all + tool_call_ids = tool_calls.map(&:id) + + { + request: self, + request_id: id, + request_ref: request_ref, + conversation: conversation, + latest_message: latest_message, + caller_principal: caller_principal, + caller_identity: caller_identity, + route_attempts: route_attempts_dataset.order(:attempt_no, :id).all, + responses: responses, + response_messages: responses.filter_map(&:response_message), + metrics: message_inference_metrics_dataset.order(:recorded_at, :id).all, + policy_evaluations: policy_evaluations_dataset.order(:evaluated_at, :id).all, + security_events: security_events_dataset.order(:detected_at, :id).all, + tool_calls: tool_calls, + tool_call_attempts: ToolCallAttempt.where(tool_call_id: tool_call_ids).order(:tool_call_id, :attempt_no, :id).all + } + end + + def request + self + end + + def caller_principal + return nil unless caller_principal_id && defined?(Legion::Data::Model::Identity::Principal) + + Legion::Data::Model::Identity::Principal.first(id: caller_principal_id) + end + + def caller_identity + return nil unless caller_identity_id && defined?(Legion::Data::Model::Identity::Identity) + + Legion::Data::Model::Identity::Identity.first(id: caller_identity_id) + end + end + end + end + end +end diff --git a/lib/legion/data/models/llm/message_inference_response.rb b/lib/legion/data/models/llm/message_inference_response.rb new file mode 100644 index 0000000..987d244 --- /dev/null +++ b/lib/legion/data/models/llm/message_inference_response.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class MessageInferenceResponse < Sequel::Model(:llm_message_inference_responses) + include ModelHelpers + + many_to_one :message_inference_request + many_to_one :response_message, class: 'Legion::Data::Models::LLM::Message', key: :response_message_id + one_to_many :route_attempts + one_to_many :message_inference_metrics + one_to_many :tool_calls + one_to_many :policy_evaluations + one_to_many :security_events + end + end + end + end +end diff --git a/lib/legion/data/models/llm/model_helpers.rb b/lib/legion/data/models/llm/model_helpers.rb new file mode 100644 index 0000000..a68ea7a --- /dev/null +++ b/lib/legion/data/models/llm/model_helpers.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require 'securerandom' + +module Legion + module Data + module Models + module LLM + module ModelHelpers + def before_create + self[:uuid] ||= SecureRandom.uuid if columns.include?(:uuid) + super + end + end + end + end + end +end diff --git a/lib/legion/data/models/llm/policy_evaluation.rb b/lib/legion/data/models/llm/policy_evaluation.rb new file mode 100644 index 0000000..3ad39b1 --- /dev/null +++ b/lib/legion/data/models/llm/policy_evaluation.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class PolicyEvaluation < Sequel::Model(:llm_policy_evaluations) + include ModelHelpers + + many_to_one :conversation + many_to_one :message_inference_request + many_to_one :message_inference_response + one_to_many :security_events + end + end + end + end +end diff --git a/lib/legion/data/models/llm/registry_event.rb b/lib/legion/data/models/llm/registry_event.rb new file mode 100644 index 0000000..730d2c6 --- /dev/null +++ b/lib/legion/data/models/llm/registry_event.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class RegistryEvent < Sequel::Model(:llm_registry_events) + include ModelHelpers + end + end + end + end +end diff --git a/lib/legion/data/models/llm/route_attempt.rb b/lib/legion/data/models/llm/route_attempt.rb new file mode 100644 index 0000000..dded1ee --- /dev/null +++ b/lib/legion/data/models/llm/route_attempt.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class RouteAttempt < Sequel::Model(:llm_route_attempts) + include ModelHelpers + + many_to_one :message_inference_request + many_to_one :message_inference_response + end + end + end + end +end diff --git a/lib/legion/data/models/llm/security_event.rb b/lib/legion/data/models/llm/security_event.rb new file mode 100644 index 0000000..6e545d9 --- /dev/null +++ b/lib/legion/data/models/llm/security_event.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class SecurityEvent < Sequel::Model(:llm_security_events) + include ModelHelpers + + many_to_one :conversation + many_to_one :message_inference_request + many_to_one :message_inference_response + many_to_one :tool_call + many_to_one :tool_call_attempt + many_to_one :policy_evaluation + + class << self + def lineage_for_conversation(conversation_or_id) + conversation_id = conversation_or_id.respond_to?(:id) ? conversation_or_id.id : conversation_or_id + requests = MessageInferenceRequest.where(conversation_id: conversation_id).order(:id).all + request_ids = requests.map(&:id) + responses = MessageInferenceResponse.where(message_inference_request_id: request_ids).order(:id).all + response_ids = responses.map(&:id) + tool_calls = ToolCall.where(message_inference_response_id: response_ids).order(:tool_call_index, :id).all + tool_call_ids = tool_calls.map(&:id) + + { + conversation: Conversation[conversation_id], + messages: Message.where(conversation_id: conversation_id).order(:seq, :id).all, + requests: requests, + route_attempts: RouteAttempt.where(message_inference_request_id: request_ids).order(:message_inference_request_id, :attempt_no, + :id).all, + responses: responses, + request_payload_hashes: requests.filter_map(&:request_content_hash), + response_payload_hashes: responses.filter_map(&:response_content_hash), + policy_evaluations: policy_evaluations_for(conversation_id, request_ids, response_ids), + security_events: security_events_for(conversation_id, request_ids, response_ids, tool_call_ids), + tool_calls: tool_calls, + tool_call_attempts: ToolCallAttempt.where(tool_call_id: tool_call_ids).order(:tool_call_id, :attempt_no, :id).all + } + end + + private + + def policy_evaluations_for(conversation_id, request_ids, response_ids) + scope = PolicyEvaluation.where(conversation_id: conversation_id) + scope = scope.or(message_inference_request_id: request_ids) unless request_ids.empty? + scope = scope.or(message_inference_response_id: response_ids) unless response_ids.empty? + scope.order(:evaluated_at, :id).all + end + + def security_events_for(conversation_id, request_ids, response_ids, tool_call_ids) + scope = where(conversation_id: conversation_id) + scope = scope.or(message_inference_request_id: request_ids) unless request_ids.empty? + scope = scope.or(message_inference_response_id: response_ids) unless response_ids.empty? + scope = scope.or(tool_call_id: tool_call_ids) unless tool_call_ids.empty? + scope.order(:detected_at, :id).all + end + end + end + end + end + end +end diff --git a/lib/legion/data/models/llm/tool_call.rb b/lib/legion/data/models/llm/tool_call.rb new file mode 100644 index 0000000..0eb1f03 --- /dev/null +++ b/lib/legion/data/models/llm/tool_call.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class ToolCall < Sequel::Model(:llm_tool_calls) + include ModelHelpers + + many_to_one :message_inference_response + many_to_one :conversation + many_to_one :requested_by_message, class: 'Legion::Data::Models::LLM::Message', key: :requested_by_message_id + many_to_one :result_message, class: 'Legion::Data::Models::LLM::Message', key: :result_message_id + one_to_many :tool_call_attempts + one_to_many :security_events + end + end + end + end +end diff --git a/lib/legion/data/models/llm/tool_call_attempt.rb b/lib/legion/data/models/llm/tool_call_attempt.rb new file mode 100644 index 0000000..3241e93 --- /dev/null +++ b/lib/legion/data/models/llm/tool_call_attempt.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Models + module LLM + class ToolCallAttempt < Sequel::Model(:llm_tool_call_attempts) + include ModelHelpers + + many_to_one :tool_call + one_to_many :security_events + end + end + end + end +end diff --git a/lib/legion/data/models/node.rb b/lib/legion/data/models/node.rb index 605a993..20093f4 100755 --- a/lib/legion/data/models/node.rb +++ b/lib/legion/data/models/node.rb @@ -1,10 +1,34 @@ # frozen_string_literal: true +require 'legion/logging/helper' + module Legion module Data module Model class Node < Sequel::Model - # one_to_many :task_log + include Legion::Logging::Helper + + one_to_many :task_log + one_to_many :task_logs, class: 'Legion::Data::Model::TaskLog' + many_to_one :principal, class: 'Legion::Data::Model::Principal' + + def parsed_metrics + return nil unless metrics + + Legion::JSON.load(metrics) + rescue StandardError => e + handle_exception(e, level: :debug, handled: true, operation: :parsed_metrics, id: self[:id]) + nil + end + + def parsed_hosted_worker_ids + return [] unless hosted_worker_ids + + Legion::JSON.load(hosted_worker_ids) + rescue StandardError => e + handle_exception(e, level: :debug, handled: true, operation: :parsed_hosted_worker_ids, id: self[:id]) + [] + end end end end diff --git a/lib/legion/data/models/rbac/cross_team_grants.rb b/lib/legion/data/models/rbac/cross_team_grants.rb new file mode 100644 index 0000000..1060d1e --- /dev/null +++ b/lib/legion/data/models/rbac/cross_team_grants.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Model + module RBAC + class CrossTeamGrant < Sequel::Model(:rbac_cross_team_grants) + include ModelHelpers + + def validate + super + errors.add(:source_team, 'cannot be empty') if source_team.nil? || source_team.empty? + errors.add(:target_team, 'cannot be empty') if target_team.nil? || target_team.empty? + errors.add(:source_team, 'cannot equal target_team') if source_team == target_team + errors.add(:runner_pattern, 'cannot be empty') if runner_pattern.nil? || runner_pattern.empty? + errors.add(:actions, 'cannot be empty') if actions.nil? || actions.empty? + errors.add(:granted_by, 'cannot be empty') if granted_by.nil? || granted_by.empty? + end + end + end + end + end +end diff --git a/lib/legion/data/models/rbac/model_helpers.rb b/lib/legion/data/models/rbac/model_helpers.rb new file mode 100644 index 0000000..f03a975 --- /dev/null +++ b/lib/legion/data/models/rbac/model_helpers.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Legion + module Data + module Model + module RBAC + module ModelHelpers + def expired? + return false if expires_at.nil? + + expires_at < Time.now + end + + def active? + !expired? + end + + def actions_list + (actions || '').split(',').map(&:strip) + end + end + end + end + end +end diff --git a/lib/legion/data/models/rbac/role_assignments.rb b/lib/legion/data/models/rbac/role_assignments.rb new file mode 100644 index 0000000..b875107 --- /dev/null +++ b/lib/legion/data/models/rbac/role_assignments.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Model + module RBAC + class RoleAssignment < Sequel::Model(:rbac_role_assignments) + include ModelHelpers + + VALID_PRINCIPAL_TYPES = %w[worker human].freeze + + def validate + super + errors.add(:principal_type, 'must be worker or human') unless VALID_PRINCIPAL_TYPES.include?(principal_type) + errors.add(:principal_id, 'cannot be empty') if principal_id.nil? || principal_id.empty? + errors.add(:role, 'cannot be empty') if role.nil? || role.empty? + errors.add(:granted_by, 'cannot be empty') if granted_by.nil? || granted_by.empty? + end + end + end + end + end +end diff --git a/lib/legion/data/models/rbac/runner_grants.rb b/lib/legion/data/models/rbac/runner_grants.rb new file mode 100644 index 0000000..e602932 --- /dev/null +++ b/lib/legion/data/models/rbac/runner_grants.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require_relative 'model_helpers' + +module Legion + module Data + module Model + module RBAC + class RunnerGrant < Sequel::Model(:rbac_runner_grants) + include ModelHelpers + + def validate + super + errors.add(:team, 'cannot be empty') if team.nil? || team.empty? + errors.add(:runner_pattern, 'cannot be empty') if runner_pattern.nil? || runner_pattern.empty? + errors.add(:actions, 'cannot be empty') if actions.nil? || actions.empty? + errors.add(:granted_by, 'cannot be empty') if granted_by.nil? || granted_by.empty? + end + end + end + end + end +end diff --git a/lib/legion/data/models/rbac_cross_team_grant.rb b/lib/legion/data/models/rbac_cross_team_grant.rb new file mode 100644 index 0000000..7ed470b --- /dev/null +++ b/lib/legion/data/models/rbac_cross_team_grant.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Legion + module Data + module Model + class RbacCrossTeamGrant < Sequel::Model + def validate + super + errors.add(:source_team, 'cannot be empty') if source_team.nil? || source_team.empty? + errors.add(:target_team, 'cannot be empty') if target_team.nil? || target_team.empty? + errors.add(:source_team, 'cannot equal target_team') if source_team == target_team + errors.add(:runner_pattern, 'cannot be empty') if runner_pattern.nil? || runner_pattern.empty? + errors.add(:actions, 'cannot be empty') if actions.nil? || actions.empty? + errors.add(:granted_by, 'cannot be empty') if granted_by.nil? || granted_by.empty? + end + + def expired? + return false if expires_at.nil? + + expires_at < Time.now + end + + def active? + !expired? + end + + def actions_list + (actions || '').split(',').map(&:strip) + end + end + end + end +end diff --git a/lib/legion/data/models/rbac_role_assignment.rb b/lib/legion/data/models/rbac_role_assignment.rb new file mode 100644 index 0000000..0cd4350 --- /dev/null +++ b/lib/legion/data/models/rbac_role_assignment.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module Legion + module Data + module Model + class RbacRoleAssignment < Sequel::Model + VALID_PRINCIPAL_TYPES = %w[worker human].freeze + + def validate + super + errors.add(:principal_type, 'must be worker or human') unless VALID_PRINCIPAL_TYPES.include?(principal_type) + errors.add(:principal_id, 'cannot be empty') if principal_id.nil? || principal_id.empty? + errors.add(:role, 'cannot be empty') if role.nil? || role.empty? + errors.add(:granted_by, 'cannot be empty') if granted_by.nil? || granted_by.empty? + end + + def expired? + return false if expires_at.nil? + + expires_at < Time.now + end + + def active? + !expired? + end + end + end + end +end diff --git a/lib/legion/data/models/rbac_runner_grant.rb b/lib/legion/data/models/rbac_runner_grant.rb new file mode 100644 index 0000000..c20dd05 --- /dev/null +++ b/lib/legion/data/models/rbac_runner_grant.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module Legion + module Data + module Model + class RbacRunnerGrant < Sequel::Model + def validate + super + errors.add(:team, 'cannot be empty') if team.nil? || team.empty? + errors.add(:runner_pattern, 'cannot be empty') if runner_pattern.nil? || runner_pattern.empty? + errors.add(:actions, 'cannot be empty') if actions.nil? || actions.empty? + errors.add(:granted_by, 'cannot be empty') if granted_by.nil? || granted_by.empty? + end + + def actions_list + (actions || '').split(',').map(&:strip) + end + end + end + end +end diff --git a/lib/legion/data/models/relationship.rb b/lib/legion/data/models/relationship.rb new file mode 100644 index 0000000..b354324 --- /dev/null +++ b/lib/legion/data/models/relationship.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Legion + module Data + module Model + class Relationship < Sequel::Model + many_to_one :trigger, class: 'Legion::Data::Model::Function' + many_to_one :action, class: 'Legion::Data::Model::Function' + many_to_one :chain + one_to_many :tasks + end + end + end +end diff --git a/lib/legion/data/models/runner.rb b/lib/legion/data/models/runner.rb index f9ac3c4..95e858e 100755 --- a/lib/legion/data/models/runner.rb +++ b/lib/legion/data/models/runner.rb @@ -5,10 +5,32 @@ module Legion module Data module Model class Runner < Sequel::Model - many_to_one :chain - one_to_many :task one_to_many :functions many_to_one :extension + + def chain + chains_dataset.first + end + + def chains_dataset + Legion::Data::Model::Chain.where(id: relationships_dataset.select(:chain_id)) + end + + def task + task_dataset.all + end + + def task_dataset + Legion::Data::Model::Task.where(function_id: functions_dataset.select(:id)) + end + + def relationships_dataset + function_ids = functions_dataset.select(:id) + + Legion::Data::Model::Relationship + .where(trigger_id: function_ids) + .or(action_id: function_ids) + end end end end diff --git a/lib/legion/data/models/task.rb b/lib/legion/data/models/task.rb index 661c030..475ad1e 100755 --- a/lib/legion/data/models/task.rb +++ b/lib/legion/data/models/task.rb @@ -1,15 +1,73 @@ # frozen_string_literal: true +require 'digest' +require 'legion/json' +require 'time' + module Legion module Data module Model class Task < Sequel::Model + TERMINAL_STATUSES = %w[ + completed complete failed error cancelled canceled timeout timed_out + ].freeze + + many_to_one :function many_to_one :relationship one_to_many :task_log + one_to_many :task_logs, class: 'Legion::Data::Model::TaskLog' many_to_one :parent, class: self one_to_many :children, key: :parent_id, class: self many_to_one :master, class: self one_to_many :slave, key: :master_id, class: self + one_to_many :slaves, key: :master_id, class: self + many_to_one :digital_worker, key: :worker_id, primary_key: :worker_id + + def self.idempotency_key_for(payload) + Digest::SHA256.hexdigest(Legion::JSON.dump(canonical_payload(payload))) + end + + def self.find_active_by_idempotency_key(key, now: Time.now) + return nil if key.to_s.empty? + return nil unless columns.include?(:idempotency_key) + + where(idempotency_key: key) + .exclude(status: TERMINAL_STATUSES) + .where { (idempotency_expires_at =~ nil) | (idempotency_expires_at > now) } + .reverse_order(:created, :id) + .first + end + + def self.create_idempotent(values, payload: nil, idempotency_key: nil, ttl: nil) + key = idempotency_key || idempotency_key_for(payload || values) + existing = find_active_by_idempotency_key(key) + return existing if existing + + expires_at = ttl ? Time.now + ttl : nil + create(values.merge(idempotency_key: key, idempotency_expires_at: expires_at)) + end + + def cancelled? + !cancelled_at.nil? + end + + def self.canonical_payload(value) + case value + when Hash + value.keys.map(&:to_s).sort.to_h do |key| + [key, canonical_payload(value.fetch(key) { value.fetch(key.to_sym) })] + end + when Array + value.map { |item| canonical_payload(item) } + when Time + value.utc.iso8601(6) + when DateTime + value.to_time.utc.iso8601(6) + else + value + end + end + private_class_method :canonical_payload end end end diff --git a/lib/legion/data/partition_manager.rb b/lib/legion/data/partition_manager.rb new file mode 100644 index 0000000..c5e22b4 --- /dev/null +++ b/lib/legion/data/partition_manager.rb @@ -0,0 +1,157 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + +module Legion + module Data + module PartitionManager + NOT_POSTGRES = { skipped: true, reason: 'not_postgres' }.freeze + + class << self + include Legion::Logging::Helper + + def ensure_partitions(table:, months_ahead: 3) + return NOT_POSTGRES unless postgres? + + created = [] + existing = [] + base = Date.today + + months_ahead.times do |i| + target = advance_months(base, i) + partition = partition_name(table, target) + from_str = target.strftime('%Y-%m-%d') + to_str = advance_months(target, 1).strftime('%Y-%m-%d') + + ddl = "CREATE TABLE IF NOT EXISTS #{partition} " \ + "PARTITION OF #{table} " \ + "FOR VALUES FROM ('#{from_str}') TO ('#{to_str}')" + + before_count = partition_names_for(table).size + Legion::Data.connection.run(ddl) + after_count = partition_names_for(table).size + + if after_count > before_count + log.info("Created partition #{partition}") + created << partition + else + existing << partition + end + end + + log.info "PartitionManager ensure_partitions table=#{table} created=#{created.size} existing=#{existing.size}" + { created: created, existing: existing } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :ensure_partitions, table: table, months_ahead: months_ahead) + { created: [], existing: [], error: e.message } + end + + def drop_old_partitions(table:, retention_months: 24) + return NOT_POSTGRES unless postgres? + + cutoff = advance_months(Date.today, -retention_months) + dropped = [] + retained = [] + + partition_names_for(table).each do |part| + part_date = parse_partition_date(part) + next unless part_date + + if part_date < cutoff + Legion::Data.connection.run("DROP TABLE #{part}") + log.info("Dropped partition #{part}") + dropped << part + else + retained << part + end + end + + log.info "PartitionManager drop_old_partitions table=#{table} dropped=#{dropped.size} retained=#{retained.size}" + { dropped: dropped, retained: retained } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :drop_old_partitions, table: table, retention_months: retention_months) + { dropped: [], retained: [], error: e.message } + end + + def list_partitions(table:) + return NOT_POSTGRES unless postgres? + + sql = <<~SQL + SELECT c.relname AS name, + pg_get_expr(c.relpartbound, c.oid) AS bound + FROM pg_inherits i + JOIN pg_class p ON p.oid = i.inhparent + JOIN pg_class c ON c.oid = i.inhrelid + WHERE p.relname = '#{table}' + ORDER BY c.relname + SQL + + partitions = Legion::Data.connection.fetch(sql).map do |row| + from_val, to_val = parse_bound(row[:bound]) + { name: row[:name], from: from_val, to: to_val } + end + log.info "PartitionManager list_partitions table=#{table} count=#{partitions.size}" + partitions + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :list_partitions, table: table) + [] + end + + private + + def postgres? + Legion::Data::Connection.adapter == :postgres + end + + def partition_name(table, date) + "#{table}_y#{date.strftime('%Y')}m#{date.strftime('%m')}" + end + + def advance_months(date, months) + year = date.year + month = date.month + months + while month > 12 + month -= 12 + year += 1 + end + while month < 1 + month += 12 + year -= 1 + end + Date.new(year, month, 1) + end + + def partition_names_for(table) + sql = <<~SQL + SELECT c.relname AS name + FROM pg_inherits i + JOIN pg_class p ON p.oid = i.inhparent + JOIN pg_class c ON c.oid = i.inhrelid + WHERE p.relname = '#{table}' + SQL + + Legion::Data.connection.fetch(sql).map { |row| row[:name] } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :partition_names_for, table: table) + [] + end + + def parse_partition_date(partition_name) + match = partition_name.match(/y(\d{4})m(\d{2})$/) + return nil unless match + + Date.new(match[1].to_i, match[2].to_i, 1) + end + + def parse_bound(expr) + return [nil, nil] unless expr + + matches = expr.scan(/'([^']+)'/) + from_val = matches[0]&.first + to_val = matches[1]&.first + [from_val, to_val] + end + end + end + end +end diff --git a/lib/legion/data/retention.rb b/lib/legion/data/retention.rb new file mode 100644 index 0000000..a86dbfc --- /dev/null +++ b/lib/legion/data/retention.rb @@ -0,0 +1,132 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' +require_relative 'archival/policy' + +module Legion + module Data + module Retention + DEFAULT_RETENTION_YEARS = 7 + DEFAULT_ARCHIVE_AFTER_DAYS = 90 + + class << self + include Legion::Logging::Helper + + def archive_old_records(table:, date_column: nil, archive_after_days: DEFAULT_ARCHIVE_AFTER_DAYS) + db = Legion::Data.connection + return { archived: 0, table: table } unless db + + date_column = resolve_date_column(table, date_column) + cutoff = Time.now - (archive_after_days * 86_400) + archive_table = archive_table_name(table) + + ensure_archive_table!(db, table, archive_table) + + count = 0 + db.transaction do + records = db[table].where(Sequel.identifier(date_column) < cutoff) + count = records.count + if count.positive? + db[archive_table].multi_insert(records.all) + records.delete + end + end + + log.info "Archived #{count} row(s) from #{table}" if count.positive? + { archived: count, table: table } + rescue StandardError => e + handle_exception( + e, + level: :error, + handled: false, + operation: :archive_old_records, + table: table, + date_column: date_column, + archive_after_days: archive_after_days + ) + raise + end + + def purge_expired_records(table:, date_column: nil, retention_years: DEFAULT_RETENTION_YEARS) + db = Legion::Data.connection + archive_table = archive_table_name(table) + return { purged: 0, table: table } unless db&.table_exists?(archive_table) + + date_column = resolve_date_column(table, date_column) + cutoff = Time.now - (retention_years * 365 * 86_400) + expired = db[archive_table].where(Sequel.identifier(date_column) < cutoff) + count = expired.count + expired.delete if count.positive? + log.info "Purged #{count} expired row(s) from #{archive_table}" if count.positive? + + { purged: count, table: table } + rescue StandardError => e + handle_exception( + e, + level: :error, + handled: false, + operation: :purge_expired_records, + table: table, + date_column: date_column, + retention_years: retention_years + ) + raise + end + + def retention_status(table:, date_column: nil) + db = Legion::Data.connection + archive_table = archive_table_name(table) + date_column = resolve_date_column(table, date_column) + + active_count = db&.table_exists?(table) ? db[table].count : 0 + archived_count = db&.table_exists?(archive_table) ? db[archive_table].count : 0 + + oldest_active = (db[table].order(Sequel.asc(date_column)).get(date_column) if db&.table_exists?(table) && active_count.positive?) + + oldest_archived = (db[archive_table].order(Sequel.asc(date_column)).get(date_column) if db&.table_exists?(archive_table) && archived_count.positive?) + + { + table: table, + active_count: active_count, + archived_count: archived_count, + oldest_active: oldest_active, + oldest_archived: oldest_archived + } + rescue StandardError => e + handle_exception(e, level: :warn, handled: false, operation: :retention_status, table: table, date_column: date_column) + raise + end + + def archive_table_name(table) + :"#{table}_archive" + end + + private + + def resolve_date_column(table, date_column) + return date_column if date_column + + if defined?(Legion::Data::Archival::Policy::DATE_COLUMN_OVERRIDES) + Legion::Data::Archival::Policy::DATE_COLUMN_OVERRIDES[table.to_s] || :created_at + else + :created_at + end + end + + def ensure_archive_table!(db, source_table, archive_table) + return if db.table_exists?(archive_table) + + source_schema = db.schema(source_table).to_h + + log.info "Creating archive table #{archive_table} from #{source_table}" + db.create_table(archive_table) do + source_schema.each do |col_name, col_info| + column col_name, col_info[:db_type] + end + DateTime :archived_at, default: Sequel::CURRENT_TIMESTAMP + end + end + end + end + end +end diff --git a/lib/legion/data/rls.rb b/lib/legion/data/rls.rb new file mode 100644 index 0000000..e3cae4a --- /dev/null +++ b/lib/legion/data/rls.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + +module Legion + module Data + module Rls + extend Legion::Logging::Helper + + RLS_TABLES = %i[ + tasks digital_workers audit_log memory_traces extensions + functions runners nodes settings value_metrics + ].freeze + + module_function + + def rls_enabled? + return false unless Legion::Settings[:data][:connected] + + Legion::Data.connection.adapter_scheme == :postgres + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :rls_enabled) + false + end + + def assign_tenant(tenant_id) + return unless rls_enabled? + + Legion::Data.connection.run( + Sequel.lit('SET app.current_tenant = ?', tenant_id.to_s) + ) + end + + def current_tenant + return nil unless rls_enabled? + + Legion::Data.connection.fetch('SHOW app.current_tenant').first&.values&.first + rescue Sequel::DatabaseError => e + handle_exception(e, level: :warn, handled: true, operation: :current_tenant) + nil + end + + def reset_tenant + return unless rls_enabled? + + Legion::Data.connection.run('RESET app.current_tenant') + end + + def with_tenant(tenant_id) + previous = current_tenant + assign_tenant(tenant_id) + yield + ensure + previous ? assign_tenant(previous) : reset_tenant + end + end + end +end diff --git a/lib/legion/data/settings.rb b/lib/legion/data/settings.rb index c40b65d..8795428 100755 --- a/lib/legion/data/settings.rb +++ b/lib/legion/data/settings.rb @@ -1,60 +1,135 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + module Legion module Data module Settings + extend Legion::Logging::Helper + + CREDS = { + sqlite: { + database: 'legionio.db' + }, + mysql2: { + username: 'legion', + password: 'legion', + database: 'legionio', + host: '127.0.0.1', + port: 3306 + }, + postgres: { + user: 'legion', + password: 'legion', + database: 'legionio', + host: '127.0.0.1', + port: 5432 + } + }.freeze + def self.default { - connected: false, - cache: cache, - connection: connection, - creds: creds, - migrations: migrations, - models: models, - connect_on_start: true + adapter: 'sqlite', + connected: false, + + # Connection pool + max_connections: 25, + pool_timeout: 5, + preconnect: false, + single_threaded: false, + test: true, + name: nil, + + # Logging + log: false, + query_log: false, + log_connection_info: false, + log_warn_duration: 1, + sql_log_level: 'debug', + + # Connection health (network adapters only, ignored for sqlite) + # Validation is disabled by default: the connection_validator extension issues a + # SELECT NULL on every checkout/checkin and before real queries, which kills + # throughput. Connection errors are already rescued and reconnected at query time. + # When enabled, connection_validation_timeout: -1 validates on every checkout + # (catches stale connections from VPN/sleep/network changes immediately). + connection_validation: false, + connection_validation_timeout: -1, + connection_expiration: true, + connection_expiration_timeout: 14_400, + + # Adapter-specific (nil = use adapter built-in default) + connect_timeout: nil, + read_timeout: nil, + write_timeout: nil, + encoding: nil, + sql_mode: nil, + sslmode: nil, + sslrootcert: nil, + search_path: nil, + timeout: nil, + readonly: nil, + disable_dqs: nil, + + # Grouped settings + creds: creds, + cache: cache, + migrations: migrations, + models: models, + local: local, + dev_mode: false, + dev_fallback: true, + connect_on_start: true, + read_replica_url: nil, + replicas: [], + archival: archival + } + end + + def self.local + { + enabled: true, + database: 'legionio_local.db', + query_log: false, + migrations: { auto_migrate: true } } end def self.models { continue_on_load_fail: false, - autoload: true + autoload: true } end def self.migrations { continue_on_fail: false, - auto_migrate: true, - ran: false, - version: nil + auto_migrate: true, + ran: false, + version: nil } end - def self.connection - { - log: false, - log_connection_info: false, - log_warn_duration: 1, - sql_log_level: 'debug', - max_connections: 10, - preconnect: false - } + def self.creds(adapter = nil) + adapter = (adapter || :sqlite).to_sym + CREDS.fetch(adapter, CREDS[:sqlite]).dup end - def self.creds + def self.archival { - username: 'legion', - password: 'legion', - database: 'legionio', - host: '127.0.0.1', - port: 3306 + retention_days: 90, + batch_size: 1000, + storage_backend: nil } end def self.cache { - connected: false, - auto_enable: Legion::Settings[:cache][:connected], - ttl: 60 + connected: false, + auto_enable: Legion::Settings[:cache][:connected], + static_cache: false, + ttl: 60 } end end @@ -62,7 +137,7 @@ def self.cache end begin - Legion::Settings.merge_settings('data', Legion::Data::Settings.default) if Legion.const_defined?('Settings') + Legion::Settings.merge_settings('data', Legion::Data::Settings.default) if Legion.const_defined?('Settings', false) rescue StandardError => e - Legion::Logging.fatal(e.message) if Legion::Logging.method_defined?(:fatal) + Legion::Data::Settings.handle_exception(e, level: :fatal, operation: :merge_settings) end diff --git a/lib/legion/data/spool.rb b/lib/legion/data/spool.rb new file mode 100644 index 0000000..d3d2d4b --- /dev/null +++ b/lib/legion/data/spool.rb @@ -0,0 +1,159 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' +require 'json' +require 'fileutils' +require 'securerandom' + +module Legion + module Data + module Spool + EXTENSION_PREFIX = 'Legion::Extensions::' + LEGION_PREFIX = 'Legion::' + + class << self + def root + @root ||= File.expand_path('~/.legionio/data/spool') + end + + attr_writer :root + + def for(extension_module) + ScopedSpool.new(extension_module, root) + end + + private + + def extension_path(extension_module) + name = extension_module.name + if name&.start_with?(EXTENSION_PREFIX) + name.delete_prefix(EXTENSION_PREFIX).gsub('::', '/').downcase + elsif name&.start_with?(LEGION_PREFIX) + name.delete_prefix(LEGION_PREFIX).gsub('::', '/').downcase + else + raise ArgumentError, "#{name} is not under the Legion:: namespace" + end + end + end + + class ScopedSpool + include Legion::Logging::Helper + + def initialize(extension_module, spool_root) + @extension_dir = File.join(spool_root, Spool.send(:extension_path, extension_module)) + end + + def write(sub_namespace, payload) + dir = sub_dir(sub_namespace) + FileUtils.mkdir_p(dir) + filename = "#{Time.now.strftime('%s%9N')}-#{SecureRandom.uuid}.json" + path = File.join(dir, filename) + temp_path = temp_path_for(dir, filename) + File.binwrite(temp_path, ::JSON.generate(payload)) + File.rename(temp_path, path) + log.info "Spool write: #{sub_namespace} -> #{filename}" + path + rescue StandardError => e + File.delete(temp_path) if defined?(temp_path) && temp_path && File.exist?(temp_path) + handle_exception(e, level: :error, handled: false, operation: :spool_write, sub_namespace: sub_namespace) + raise + end + + def read(sub_namespace) + sorted_files(sub_namespace).each_with_object([]) do |path, events| + event = load_event_file(path, sub_namespace) + events << event if event + end + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :spool_read, sub_namespace: sub_namespace) + raise + end + + def flush(sub_namespace) + count = 0 + path = nil + sorted_files(sub_namespace).each do |path| + event = load_event_file(path, sub_namespace) + next unless event + + yield event + File.delete(path) + count += 1 + end + log.info "Spool drained #{count} item(s) from #{sub_namespace}" if count.positive? + count + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :spool_flush, sub_namespace: sub_namespace, path: path) + raise + end + + def count(sub_namespace) + sorted_files(sub_namespace).size + end + + def clear(sub_namespace) + dir = sub_dir(sub_namespace) + return unless Dir.exist?(dir) + + Dir[File.join(dir, '*.json')].each { |f| File.delete(f) } + log.info "Spool cleared #{sub_namespace}" + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :spool_clear, sub_namespace: sub_namespace) + raise + end + + private + + def sub_dir(sub_namespace) + File.join(@extension_dir, sub_namespace.to_s) + end + + def sorted_files(sub_namespace) + dir = sub_dir(sub_namespace) + return [] unless Dir.exist?(dir) + + Dir.glob(File.join(dir, '*.json'), sort: true) + end + + def load_event_file(path, sub_namespace) + ::JSON.parse(File.binread(path), symbolize_names: true) + rescue Errno::ENOENT => e + log.debug("spool event file not found: #{path}: #{e.message}") + nil + rescue ::JSON::ParserError, EOFError, ArgumentError => e + quarantine_corrupt_file(path, sub_namespace, e) + nil + end + + def quarantine_corrupt_file(path, sub_namespace, error) + return unless File.exist?(path) + + quarantine_dir = File.join(sub_dir(sub_namespace), 'quarantine') + FileUtils.mkdir_p(quarantine_dir) + quarantine_path = unique_quarantine_path(quarantine_dir, File.basename(path)) + File.rename(path, quarantine_path) + handle_exception( + error, + level: :warn, + handled: true, + operation: :spool_quarantine, + sub_namespace: sub_namespace, + path: path, + quarantine_path: quarantine_path + ) + end + + def unique_quarantine_path(quarantine_dir, basename) + path = File.join(quarantine_dir, "#{basename}.corrupt") + return path unless File.exist?(path) + + File.join(quarantine_dir, "#{basename}.#{SecureRandom.uuid}.corrupt") + end + + def temp_path_for(dir, filename) + File.join(dir, ".#{filename}.tmp-#{SecureRandom.uuid}") + end + end + end + end +end diff --git a/lib/legion/data/storage_tiers.rb b/lib/legion/data/storage_tiers.rb new file mode 100644 index 0000000..fd03173 --- /dev/null +++ b/lib/legion/data/storage_tiers.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + +module Legion + module Data + module StorageTiers + TIERS = { hot: 0, warm: 1, cold: 2 }.freeze + + class << self + include Legion::Logging::Helper + + def archive_to_warm(table:, age_days: 90, batch_size: 1000) + return { archived: 0, reason: 'no_connection' } unless Legion::Data.connection + return { archived: 0, reason: 'no_archive_table' } unless Legion::Data.connection.table_exists?(:data_archive) + + cutoff = Time.now - (age_days * 86_400) + records = Legion::Data.connection[table].where { created_at < cutoff }.limit(batch_size).all + return { archived: 0 } if records.empty? + + Legion::Data.connection.transaction do + records.each do |record| + Legion::Data.connection[:data_archive].insert( + source_table: table.to_s, source_id: record[:id], + data: Legion::JSON.dump(record), + tier: TIERS[:warm], + archived_at: Time.now.utc + ) + end + + ids = records.map { |r| r[:id] } + Legion::Data.connection[table].where(id: ids).delete + end + + log.info "Archived #{records.size} row(s) from #{table} to warm tier" + { archived: records.size, table: table.to_s } + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :archive_to_warm, table: table, age_days: age_days, batch_size: batch_size) + raise + end + + def export_to_cold(age_days: 365, batch_size: 5000) + return { exported: 0 } unless Legion::Data.connection&.table_exists?(:data_archive) + + cutoff = Time.now - (age_days * 86_400) + records = Legion::Data.connection[:data_archive] + .where(tier: TIERS[:warm]) + .where { archived_at < cutoff } + .limit(batch_size).all + return { exported: 0 } if records.empty? + + ids = records.map { |r| r[:id] } + Legion::Data.connection[:data_archive].where(id: ids).update(tier: TIERS[:cold]) + log.info "Exported #{records.size} row(s) to cold tier" + { exported: records.size, data: records } + rescue StandardError => e + handle_exception(e, level: :error, handled: false, operation: :export_to_cold, age_days: age_days, batch_size: batch_size) + raise + end + + def stats + return {} unless Legion::Data.connection&.table_exists?(:data_archive) + + { warm: count_tier(:warm), cold: count_tier(:cold) } + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :storage_tiers_stats) + {} + end + + private + + def count_tier(tier) + Legion::Data.connection[:data_archive].where(tier: TIERS[tier]).count + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :storage_tiers_count, tier: tier) + 0 + end + end + end + end +end diff --git a/lib/legion/data/vector.rb b/lib/legion/data/vector.rb new file mode 100644 index 0000000..71f67aa --- /dev/null +++ b/lib/legion/data/vector.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +require 'legion/logging/helper' + +module Legion + module Data + module Vector + class << self + include Legion::Logging::Helper + + def available? + return false unless Legion::Data.connection + return false unless Legion::Data.connection.adapter_scheme == :postgres + + Legion::Data.connection.fetch("SELECT 1 FROM pg_extension WHERE extname = 'vector'").any? + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :vector_available?) + false + end + + def ensure_extension! + return false unless Legion::Data.connection&.adapter_scheme == :postgres + + Legion::Data.connection.run('CREATE EXTENSION IF NOT EXISTS vector') + log.info 'pgvector extension enabled' + true + rescue StandardError => e + handle_exception(e, level: :warn, handled: true, operation: :ensure_vector_extension) + false + end + + def cosine_search(table:, column:, query_vector:, limit: 10, min_similarity: 0.0) + return [] unless available? + + log.debug "Vector cosine_search: table=#{table} column=#{column} limit=#{limit}" + vec_literal = vector_literal(query_vector) + ds = Legion::Data.connection[table] + .select_all + .select_append(Sequel.lit("1 - (#{column} <=> ?)", vec_literal).as(:similarity)) + .order(Sequel.lit("#{column} <=> ?", vec_literal)) + .limit(limit) + + ds = ds.where(Sequel.lit("1 - (#{column} <=> ?) >= ?", vec_literal, min_similarity)) if min_similarity.positive? + ds.all + end + + def l2_search(table:, column:, query_vector:, limit: 10) + return [] unless available? + + log.debug "Vector l2_search: table=#{table} column=#{column} limit=#{limit}" + vec_literal = vector_literal(query_vector) + Legion::Data.connection[table] + .select_all + .select_append(Sequel.lit("#{column} <-> ?", vec_literal).as(:distance)) + .order(Sequel.lit("#{column} <-> ?", vec_literal)) + .limit(limit) + .all + end + + private + + def vector_literal(query_vector) + "[#{query_vector.join(',')}]" + end + end + end + end +end diff --git a/lib/legion/data/version.rb b/lib/legion/data/version.rb index 660e077..fcff62b 100755 --- a/lib/legion/data/version.rb +++ b/lib/legion/data/version.rb @@ -1,5 +1,7 @@ +# frozen_string_literal: true + module Legion module Data - VERSION = '1.2.0'.freeze + VERSION = '1.10.5' end end diff --git a/scripts/pre-commit-rubocop.sh b/scripts/pre-commit-rubocop.sh new file mode 100755 index 0000000..703c4a4 --- /dev/null +++ b/scripts/pre-commit-rubocop.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Pre-commit hook: run RuboCop with autofix on staged Ruby files. +set -uo pipefail + +FILES=("$@") + +if command -v rubocop >/dev/null 2>&1; then + exec rubocop -A --force-exclusion "${FILES[@]}" +fi + +if bundle exec rubocop -v >/dev/null 2>&1; then + exec bundle exec rubocop -A --force-exclusion "${FILES[@]}" +fi + +echo "RuboCop is not available locally; CI will enforce RuboCop." +exit 0 diff --git a/sonar-project.properties b/sonar-project.properties deleted file mode 100644 index 7b3c6ef..0000000 --- a/sonar-project.properties +++ /dev/null @@ -1,12 +0,0 @@ -sonar.projectKey=legion-io_legion-data -sonar.organization=legion-io -sonar.projectName=Legion::Data -sonar.sources=. -sonar.exclusions=vendor/** -sonar.coverage.exclusions=spec/** -sonar.ruby.coverage.reportPath=coverage/.resultset.json -sonar.ruby.file.suffixes=rb,ruby -sonar.ruby.coverage.framework=RSpec -sonar.ruby.rubocopConfig=.rubocop.yml -sonar.ruby.rubocop.reportPath=rubocop-result.json -sonar.ruby.rubocop.filePath=. \ No newline at end of file diff --git a/sourcehawk.yml b/sourcehawk.yml deleted file mode 100644 index a228e9b..0000000 --- a/sourcehawk.yml +++ /dev/null @@ -1,4 +0,0 @@ - -config-locations: - - https://raw.githubusercontent.com/optum/.github/main/sourcehawk.yml - diff --git a/spec/archival/scheduled_archival_spec.rb b/spec/archival/scheduled_archival_spec.rb new file mode 100644 index 0000000..2ac0097 --- /dev/null +++ b/spec/archival/scheduled_archival_spec.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/archival' +require 'legion/data/retention' + +RSpec.describe Legion::Data::Archival do + let(:db) { Legion::Data.connection } + + describe '.archive_completed_tasks' do + let(:cutoff_time) { Time.now - (100 * 86_400) } + + before do + # Clean up any leftover test rows + db[:tasks].where(status: %w[completed failed running]).delete rescue nil # rubocop:disable Style/RescueModifier + end + + after do + db[:tasks].where(status: %w[completed failed running]).delete rescue nil # rubocop:disable Style/RescueModifier + db[:tasks_archive].where(archive_reason: 'completed_task_archival').delete rescue nil # rubocop:disable Style/RescueModifier + end + + it 'returns a hash with :archived and :cutoff keys' do + result = described_class.archive_completed_tasks(days_old: 90) + expect(result).to have_key(:archived) + expect(result).to have_key(:cutoff) + end + + it 'moves old completed/failed tasks to tasks_archive' do + db[:tasks].insert(status: 'completed', created: cutoff_time - 1) + db[:tasks].insert(status: 'failed', created: cutoff_time - 1) + result = described_class.archive_completed_tasks(days_old: 90) + expect(result[:archived]).to be >= 2 + end + + it 'leaves recent completed tasks in the tasks table' do + id = db[:tasks].insert(status: 'completed', created: Time.now) + described_class.archive_completed_tasks(days_old: 90) + expect(db[:tasks].where(id: id).count).to eq(1) + end + + it 'leaves non-completed/failed tasks in place regardless of age' do + id = db[:tasks].insert(status: 'running', created: cutoff_time - 1) + described_class.archive_completed_tasks(days_old: 90) + expect(db[:tasks].where(id: id).count).to eq(1) + end + + it 'returns archived: 0 when tasks table does not exist' do + allow(db).to receive(:table_exists?).with(:tasks).and_return(false) + allow(db).to receive(:table_exists?).with(:tasks_archive).and_return(true) + result = described_class.archive_completed_tasks + expect(result[:archived]).to eq(0) + end + + it 'returns archived: 0 when tasks_archive table does not exist' do + allow(db).to receive(:table_exists?).with(:tasks).and_return(true) + allow(db).to receive(:table_exists?).with(:tasks_archive).and_return(false) + result = described_class.archive_completed_tasks + expect(result[:archived]).to eq(0) + end + + it 'cutoff is an ISO8601 string' do + result = described_class.archive_completed_tasks(days_old: 90) + expect(result[:cutoff]).to match(/\d{4}-\d{2}-\d{2}/) + end + end + + describe '.run_scheduled_archival' do + it 'returns a hash with :tasks key' do + result = described_class.run_scheduled_archival + expect(result).to have_key(:tasks) + end + + it 'delegates to archive_completed_tasks' do + allow(described_class).to receive(:archive_completed_tasks).and_return({ archived: 5, cutoff: '2026-01-01' }) + result = described_class.run_scheduled_archival + expect(result[:tasks][:archived]).to eq(5) + end + + it 'includes metering key when metering_records table exists' do + allow(described_class).to receive(:archive_completed_tasks).and_return({ archived: 0, cutoff: Time.now.iso8601 }) + allow(db).to receive(:table_exists?).with(:metering_records).and_return(true) + allow(Legion::Data::Retention).to receive(:archive_old_records).and_return({ archived: 3, table: :metering_records }) + result = described_class.run_scheduled_archival + expect(result).to have_key(:metering) + end + + it 'omits metering key when metering_records table does not exist' do + allow(described_class).to receive(:archive_completed_tasks).and_return({ archived: 0, cutoff: Time.now.iso8601 }) + allow(db).to receive(:table_exists?).with(:metering_records).and_return(false) + result = described_class.run_scheduled_archival + expect(result).not_to have_key(:metering) + end + end +end diff --git a/spec/legion/data/archival/policy_spec.rb b/spec/legion/data/archival/policy_spec.rb new file mode 100644 index 0000000..3e75e95 --- /dev/null +++ b/spec/legion/data/archival/policy_spec.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/archival/policy' + +RSpec.describe Legion::Data::Archival::Policy do + describe '.new' do + it 'uses defaults' do + policy = described_class.new + expect(policy.warm_after_days).to eq(7) + expect(policy.cold_after_days).to eq(90) + expect(policy.batch_size).to eq(1000) + end + + it 'accepts overrides' do + policy = described_class.new(warm_after_days: 14, cold_after_days: 180) + expect(policy.warm_after_days).to eq(14) + expect(policy.cold_after_days).to eq(180) + end + end + + describe '#warm_cutoff' do + it 'returns time warm_after_days ago' do + policy = described_class.new(warm_after_days: 7) + expect(policy.warm_cutoff).to be_within(2).of(Time.now - 604_800) + end + end + + describe '#cold_cutoff' do + it 'returns time cold_after_days ago' do + policy = described_class.new(cold_after_days: 90) + expect(policy.cold_cutoff).to be_within(2).of(Time.now - (90 * 86_400)) + end + end +end diff --git a/spec/legion/data/archival_spec.rb b/spec/legion/data/archival_spec.rb new file mode 100644 index 0000000..95cc3d4 --- /dev/null +++ b/spec/legion/data/archival_spec.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/archival' + +RSpec.describe Legion::Data::Archival do + describe 'ARCHIVE_TABLE_MAP' do + it 'maps source tables to archive tables' do + expect(described_class::ARCHIVE_TABLE_MAP[:tasks]).to eq(:tasks_archive) + expect(described_class::ARCHIVE_TABLE_MAP[:metering_records]).to eq(:metering_records_archive) + end + end + + describe '.archive!' do + it 'returns empty hash when db unavailable' do + allow(described_class).to receive(:db_ready?).and_return(false) + result = described_class.archive! + expect(result).to be_empty + end + end + + describe '.search' do + it 'returns empty array when db unavailable' do + allow(described_class).to receive(:db_ready?).and_return(false) + result = described_class.search(table: :tasks) + expect(result).to eq([]) + end + end + + describe '.restore' do + it 'returns 0 when db unavailable' do + allow(described_class).to receive(:db_ready?).and_return(false) + expect(described_class.restore(table: :tasks, ids: [1])).to eq(0) + end + end +end diff --git a/spec/legion/data/archiver_spec.rb b/spec/legion/data/archiver_spec.rb new file mode 100644 index 0000000..dd18e50 --- /dev/null +++ b/spec/legion/data/archiver_spec.rb @@ -0,0 +1,395 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'digest' +require 'json' +require 'stringio' +require 'tmpdir' +require 'zlib' +require 'legion/data/archiver' + +RSpec.describe Legion::Data::Archiver do + let(:conn) { Legion::Data.connection } + + before(:each) do + allow(Legion::Data::Connection).to receive(:adapter).and_return(:postgres) + allow(Legion::Data).to receive(:connection).and_return(conn) + end + + # --- non-postgres guard --- + + describe '.archive_table non-postgres' do + it 'returns skipped true with reason not_postgres on sqlite' do + allow(Legion::Data::Connection).to receive(:adapter).and_return(:sqlite) + result = described_class.archive_table(table: :tasks) + expect(result).to eq({ skipped: true, reason: 'not_postgres' }) + end + + it 'returns skipped true with reason not_postgres on mysql2' do + allow(Legion::Data::Connection).to receive(:adapter).and_return(:mysql2) + result = described_class.archive_table(table: :tasks) + expect(result).to eq({ skipped: true, reason: 'not_postgres' }) + end + end + + # --- empty table --- + + describe '.archive_table with empty/no old rows' do + let(:table) { :archiver_test_empty } + + before(:each) do + conn.drop_table?(table) + conn.create_table(table) do + primary_key :id + String :name + DateTime :created_at + end + end + + after(:each) do + conn.drop_table?(table) + end + + it 'returns zero batches when no rows are old enough' do + conn[table].insert(name: 'fresh', created_at: Time.now - (5 * 86_400)) + result = described_class.archive_table(table: table, retention_days: 90, storage_backend: nil) + expect(result[:batches]).to eq(0) + expect(result[:total_rows]).to eq(0) + expect(result[:paths]).to eq([]) + end + + it 'returns zero batches for an empty table' do + result = described_class.archive_table(table: table, retention_days: 90, storage_backend: nil) + expect(result[:batches]).to eq(0) + expect(result[:total_rows]).to eq(0) + end + end + + # --- single batch --- + + describe '.archive_table single batch' do + let(:table) { :archiver_test_single } + + before(:each) do + conn.drop_table?(:archive_manifest) + conn.drop_table?(table) + + conn.create_table(table) do + primary_key :id + String :name + DateTime :created_at + end + + conn.create_table(:archive_manifest) do + primary_key :id + String :batch_id, null: false, unique: true + String :source_table, null: false + Integer :row_count, null: false + String :checksum, null: false + String :storage_path, null: false + DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP + String :metadata + end + end + + after(:each) do + conn.drop_table?(:archive_manifest) + conn.drop_table?(table) + end + + def insert_old(name) + conn[table].insert(name: name, created_at: Time.now - (100 * 86_400)) + end + + it 'JSONL structure is correct: each line is valid JSON with original fields' do + insert_old('alpha') + insert_old('beta') + + result = described_class.archive_table(table: table, retention_days: 90, storage_backend: nil) + expect(result[:total_rows]).to eq(2) + + path = result[:paths].first.sub('file://', '') + compressed = File.binread(path) + jsonl = Zlib::GzipReader.new(StringIO.new(compressed)).read + lines = jsonl.split("\n").reject(&:empty?) + expect(lines.size).to eq(2) + parsed = lines.map { |l| JSON.parse(l) } + names = parsed.map { |p| p['name'] } + expect(names).to contain_exactly('alpha', 'beta') + end + + it 'gzip decompresses correctly' do + insert_old('gamma') + + result = described_class.archive_table(table: table, retention_days: 90, storage_backend: nil) + path = result[:paths].first.sub('file://', '') + compressed = File.binread(path) + + decompressed = Zlib::GzipReader.new(StringIO.new(compressed)).read + expect(decompressed).not_to be_empty + expect { JSON.parse(decompressed) }.not_to raise_error + end + + it 'SHA-256 checksum in manifest matches compressed file data' do + insert_old('delta') + + described_class.archive_table(table: table, retention_days: 90, storage_backend: nil) + + manifest_row = conn[:archive_manifest].first + path = manifest_row[:storage_path].sub('file://', '') + compressed = File.binread(path) + expect(manifest_row[:checksum]).to eq(Digest::SHA256.hexdigest(compressed)) + end + + it 'deletes rows from source table after archiving' do + 3.times { |i| insert_old("row#{i}") } + conn[table].insert(name: 'fresh', created_at: Time.now - (5 * 86_400)) + + described_class.archive_table(table: table, retention_days: 90, storage_backend: nil) + + expect(conn[table].count).to eq(1) + expect(conn[table].first[:name]).to eq('fresh') + end + + it 'batch_id in manifest is UUID format' do + insert_old('epsilon') + + described_class.archive_table(table: table, retention_days: 90, storage_backend: nil) + + batch_id = conn[:archive_manifest].first[:batch_id] + uuid_pattern = /\A[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\z/i + expect(batch_id).to match(uuid_pattern) + end + + it 'retention_days boundary: rows exactly at cutoff are included' do + boundary = Time.now - (90 * 86_400) - 1 + conn[table].insert(name: 'boundary_old', created_at: boundary) + conn[table].insert(name: 'boundary_fresh', created_at: Time.now - (89 * 86_400)) + + result = described_class.archive_table(table: table, retention_days: 90, storage_backend: nil) + + expect(result[:total_rows]).to eq(1) + expect(conn[table].first[:name]).to eq('boundary_fresh') + end + end + + # --- batch_size respected --- + + describe '.archive_table batch_size' do + let(:table) { :archiver_test_batches } + + before(:each) do + conn.drop_table?(:archive_manifest) + conn.drop_table?(table) + + conn.create_table(table) do + primary_key :id + String :name + DateTime :created_at + end + + conn.create_table(:archive_manifest) do + primary_key :id + String :batch_id, null: false, unique: true + String :source_table, null: false + Integer :row_count, null: false + String :checksum, null: false + String :storage_path, null: false + DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP + String :metadata + end + + 5.times { |i| conn[table].insert(name: "old#{i}", created_at: Time.now - (100 * 86_400)) } + end + + after(:each) do + conn.drop_table?(:archive_manifest) + conn.drop_table?(table) + end + + it 'iterates multiple batches when batch_size < total rows' do + result = described_class.archive_table(table: table, retention_days: 90, batch_size: 2, storage_backend: nil) + expect(result[:batches]).to eq(3) + expect(result[:total_rows]).to eq(5) + expect(conn[table].count).to eq(0) + end + + it 'produces one batch when batch_size >= total rows' do + result = described_class.archive_table(table: table, retention_days: 90, batch_size: 10, storage_backend: nil) + expect(result[:batches]).to eq(1) + expect(result[:total_rows]).to eq(5) + end + end + + # --- transaction rollback --- + + describe '.archive_table transaction rollback' do + let(:table) { :archiver_test_rollback } + + before(:each) do + conn.drop_table?(table) + conn.create_table(table) do + primary_key :id + String :name + DateTime :created_at + end + conn[table].insert(name: 'old', created_at: Time.now - (100 * 86_400)) + end + + after(:each) do + conn.drop_table?(table) + end + + it 'rolls back row deletion when manifest insert fails' do + allow(conn).to receive(:[]).and_call_original + mock_manifest = double('manifest_dataset') + allow(conn).to receive(:[]).with(:archive_manifest).and_return(mock_manifest) + allow(mock_manifest).to receive(:insert).and_raise(StandardError, 'manifest insert failure') + + expect do + described_class.archive_table(table: table, retention_days: 90, storage_backend: nil) + end.to raise_error(StandardError, /manifest insert failure/) + + expect(conn[table].count).to eq(1) + end + end + + # --- upload backends --- + + describe '.upload_batch' do + let(:compressed_data) { Zlib::Deflate.deflate('test data') } + + it 'nil backend writes to tmpdir and returns file:// path' do + path = described_class.upload_batch( + data: compressed_data, table: 'tasks', year: 2026, month: 3, batch_n: 1, backend: nil + ) + expect(path).to start_with('file://') + expect(path).to include('legion-archive') + expect(File.exist?(path.sub('file://', ''))).to be true + end + + it 's3 backend routes to S3 runner when defined' do + stub_const('Legion::Extensions::S3::Runners::Put', Class.new) + allow(Legion::Extensions::S3::Runners::Put).to receive(:run).and_return(nil) + + path = described_class.upload_batch( + data: compressed_data, table: 'tasks', year: 2026, month: 3, batch_n: 1, backend: :s3 + ) + expect(path).to start_with('s3://') + expect(Legion::Extensions::S3::Runners::Put).to have_received(:run) + end + + it 'azure backend routes to AzureStorage runner when defined' do + stub_const('Legion::Extensions::AzureStorage::Runners::Upload', Class.new) + allow(Legion::Extensions::AzureStorage::Runners::Upload).to receive(:run).and_return(nil) + + path = described_class.upload_batch( + data: compressed_data, table: 'tasks', year: 2026, month: 3, batch_n: 1, backend: :azure + ) + expect(path).to start_with('azure://') + expect(Legion::Extensions::AzureStorage::Runners::Upload).to have_received(:run) + end + + it 'raises UploadError when s3 runner not defined' do + hide_const('Legion::Extensions::S3::Runners::Put') if defined?(Legion::Extensions::S3::Runners::Put) + expect do + described_class.upload_batch( + data: compressed_data, table: 'tasks', year: 2026, month: 3, batch_n: 1, backend: :s3 + ) + end.to raise_error(Legion::Data::Archiver::UploadError) + end + + it 'raises UploadError when azure runner not defined' do + hide_const('Legion::Extensions::AzureStorage::Runners::Upload') if defined?(Legion::Extensions::AzureStorage::Runners::Upload) + expect do + described_class.upload_batch( + data: compressed_data, table: 'tasks', year: 2026, month: 3, batch_n: 1, backend: :azure + ) + end.to raise_error(Legion::Data::Archiver::UploadError) + end + + it 'raises UploadError when s3 runner raises' do + stub_const('Legion::Extensions::S3::Runners::Put', Class.new) + allow(Legion::Extensions::S3::Runners::Put).to receive(:run).and_raise(StandardError, 'connection refused') + + expect do + described_class.upload_batch( + data: compressed_data, table: 'tasks', year: 2026, month: 3, batch_n: 1, backend: :s3 + ) + end.to raise_error(Legion::Data::Archiver::UploadError, /connection refused/) + end + end + + # --- manifest_stats --- + + describe '.manifest_stats' do + before(:each) do + conn.drop_table?(:archive_manifest) + end + + after(:each) do + conn.drop_table?(:archive_manifest) + end + + it 'returns empty hash when archive_manifest table does not exist' do + result = described_class.manifest_stats + expect(result).to eq({}) + end + + it 'returns empty hash when no manifest rows exist' do + conn.create_table(:archive_manifest) do + primary_key :id + String :batch_id, null: false, unique: true + String :source_table, null: false + Integer :row_count, null: false + String :checksum, null: false + String :storage_path, null: false + DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP + String :metadata + end + + result = described_class.manifest_stats + expect(result).to eq({}) + end + + it 'returns aggregated stats per source_table' do + conn.create_table(:archive_manifest) do + primary_key :id + String :batch_id, null: false, unique: true + String :source_table, null: false + Integer :row_count, null: false + String :checksum, null: false + String :storage_path, null: false + DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP + String :metadata + end + + now = Time.now.utc + conn[:archive_manifest].insert( + batch_id: SecureRandom.uuid, source_table: 'tasks', + row_count: 500, checksum: 'abc', storage_path: 'file:///tmp/1', archived_at: now - 86_400 + ) + conn[:archive_manifest].insert( + batch_id: SecureRandom.uuid, source_table: 'tasks', + row_count: 300, checksum: 'def', storage_path: 'file:///tmp/2', archived_at: now + ) + conn[:archive_manifest].insert( + batch_id: SecureRandom.uuid, source_table: 'audit_log', + row_count: 100, checksum: 'ghi', storage_path: 'file:///tmp/3', archived_at: now + ) + + result = described_class.manifest_stats + expect(result.keys).to contain_exactly('tasks', 'audit_log') + expect(result['tasks'][:batches]).to eq(2) + expect(result['tasks'][:total_rows]).to eq(800) + expect(result['audit_log'][:batches]).to eq(1) + expect(result['audit_log'][:total_rows]).to eq(100) + end + + it 'returns skipped hash on non-postgres' do + allow(Legion::Data::Connection).to receive(:adapter).and_return(:sqlite) + result = described_class.manifest_stats + expect(result).to eq({}) + end + end +end diff --git a/spec/legion/data/audit_log_hash_chain_spec.rb b/spec/legion/data/audit_log_hash_chain_spec.rb new file mode 100644 index 0000000..d838a0c --- /dev/null +++ b/spec/legion/data/audit_log_hash_chain_spec.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/audit_log_hash_chain' + +RSpec.describe Legion::Data::AuditLogHashChain do + let(:created_at) { Time.utc(2026, 4, 27, 12, 0, 0) } + let(:record) do + { + id: 1, + principal_id: 'worker-1', + action: 'execute', + resource: 'runner#call', + source: 'amqp', + status: 'success', + detail: '{"task_id":1}', + created_at: created_at, + previous_hash: described_class::GENESIS_HASH + } + end + + it 'computes deterministic canonical hashes' do + expect(described_class.compute_hash(record)).to eq(described_class.compute_hash(record.dup)) + expect(described_class.compute_hash(record)).to match(/\A[0-9a-f]{64}\z/) + end + + it 'verifies a valid chain' do + first = record.merge(record_hash: described_class.compute_hash(record)) + second_base = record.merge(id: 2, action: 'finish', previous_hash: first[:record_hash]) + second = second_base.merge(record_hash: described_class.compute_hash(second_base)) + + expect(described_class.verify([first, second])).to eq({ valid: true, length: 2 }) + end + + it 'detects parent mismatch' do + bad = record.merge(previous_hash: 'a' * 64, record_hash: 'b' * 64) + result = described_class.verify([bad]) + + expect(result[:valid]).to be false + expect(result[:reason]).to eq(:parent_mismatch) + end + + it 'detects hash mismatch' do + bad = record.merge(record_hash: 'b' * 64) + result = described_class.verify([bad]) + + expect(result[:valid]).to be false + expect(result[:reason]).to eq(:hash_mismatch) + end +end diff --git a/spec/legion/data/audit_record_spec.rb b/spec/legion/data/audit_record_spec.rb new file mode 100644 index 0000000..d49c4f2 --- /dev/null +++ b/spec/legion/data/audit_record_spec.rb @@ -0,0 +1,346 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/audit_record' + +Legion::Data::Connection.setup unless Legion::Data.connected? +Legion::Data::Migration.migrate(Legion::Data::Connection.sequel, + File.expand_path('../../../lib/legion/data/migrations', __dir__)) +require 'legion/data/models/audit_record' + +RSpec.describe Legion::Data::AuditRecord do + let(:chain_id) { "test-chain-#{SecureRandom.hex(4)}" } + let(:content_type) { 'test.event' } + let(:content_hash) { Digest::SHA256.hexdigest('hello world') } + + # ------------------------------------------------------------------------- + # GENESIS_HASH constant + # ------------------------------------------------------------------------- + describe 'GENESIS_HASH' do + it 'is 64 zero characters' do + expect(described_class::GENESIS_HASH).to eq('0' * 64) + end + end + + # ------------------------------------------------------------------------- + # .compute_chain_hash (via public module_function) + # ------------------------------------------------------------------------- + describe '.compute_chain_hash' do + it 'returns a 64-character hex string' do + ts = Time.now + result = described_class.compute_chain_hash('0' * 64, content_hash, ts, content_type) + expect(result).to match(/\A[0-9a-f]{64}\z/) + end + + it 'produces different hashes for different parent_hashes' do + ts = Time.now + h1 = described_class.compute_chain_hash('a' * 64, content_hash, ts, content_type) + h2 = described_class.compute_chain_hash('b' * 64, content_hash, ts, content_type) + expect(h1).not_to eq(h2) + end + + it 'produces different hashes for different content_hashes' do + ts = Time.now + h1 = described_class.compute_chain_hash('0' * 64, 'aaa', ts, content_type) + h2 = described_class.compute_chain_hash('0' * 64, 'bbb', ts, content_type) + expect(h1).not_to eq(h2) + end + + it 'produces different hashes for different content_types' do + ts = Time.now + h1 = described_class.compute_chain_hash('0' * 64, content_hash, ts, 'type.a') + h2 = described_class.compute_chain_hash('0' * 64, content_hash, ts, 'type.b') + expect(h1).not_to eq(h2) + end + + it 'is deterministic for the same inputs' do + ts = Time.utc(2026, 1, 1, 0, 0, 0) + h1 = described_class.compute_chain_hash('0' * 64, content_hash, ts, content_type) + h2 = described_class.compute_chain_hash('0' * 64, content_hash, ts, content_type) + expect(h1).to eq(h2) + end + end + + # ------------------------------------------------------------------------- + # DB-unavailable guard + # ------------------------------------------------------------------------- + describe '.append when db unavailable' do + before { allow(described_class).to receive(:db_ready?).and_return(false) } + + it 'returns an error hash' do + result = described_class.append(chain_id: chain_id, content_type: content_type, content_hash: content_hash) + expect(result[:error]).to include('db unavailable') + end + end + + describe '.verify when db unavailable' do + before { allow(described_class).to receive(:db_ready?).and_return(false) } + + it 'returns valid: false with error' do + result = described_class.verify(chain_id: chain_id) + expect(result[:valid]).to be false + expect(result[:error]).to include('db unavailable') + end + end + + describe '.walk when db unavailable' do + before { allow(described_class).to receive(:db_ready?).and_return(false) } + + it 'returns an empty array' do + expect(described_class.walk(chain_id: chain_id)).to eq([]) + end + end + + describe '.query_by_type when db unavailable' do + before { allow(described_class).to receive(:db_ready?).and_return(false) } + + it 'returns an empty array' do + expect(described_class.query_by_type(content_type: content_type)).to eq([]) + end + end + + # ------------------------------------------------------------------------- + # Integration — live SQLite database + # ------------------------------------------------------------------------- + # Temporarily disables the PG NO UPDATE/NO DELETE rules so tamper specs can + # directly corrupt rows. On non-PG adapters the rules don't exist and the + # block just yields directly. + def with_audit_records_writable(conn) + if Legion::Data::Connection.adapter == :postgres + conn.run('ALTER TABLE audit_records DISABLE RULE no_update_audit_records') + conn.run('ALTER TABLE audit_records DISABLE RULE no_delete_audit_records') + end + yield + ensure + if Legion::Data::Connection.adapter == :postgres + conn.run('ALTER TABLE audit_records ENABLE RULE no_update_audit_records') + conn.run('ALTER TABLE audit_records ENABLE RULE no_delete_audit_records') + end + end + + context 'with a live database', :aggregate_failures do + before do + Legion::Data::Connection.setup unless Legion::Data.connected? + end + + describe '.append' do + it 'inserts a record and returns chain metadata' do + result = described_class.append( + chain_id: chain_id, + content_type: content_type, + content_hash: content_hash + ) + expect(result[:id]).to be_a(Integer) + expect(result[:chain_id]).to eq(chain_id) + expect(result[:chain_hash]).to match(/\A[0-9a-f]{64}\z/) + expect(result[:parent_hash]).to eq(described_class::GENESIS_HASH) + end + + it 'links the second record to the first via parent_hash' do + r1 = described_class.append(chain_id: chain_id, content_type: content_type, content_hash: content_hash) + r2 = described_class.append( + chain_id: chain_id, + content_type: content_type, + content_hash: Digest::SHA256.hexdigest('record 2') + ) + expect(r2[:parent_hash]).to eq(r1[:chain_hash]) + end + + it 'stores optional metadata as JSON' do + described_class.append( + chain_id: chain_id, + content_type: content_type, + content_hash: content_hash, + metadata: { actor: 'system', env: 'test' } + ) + row = Legion::Data.connection[:audit_records].where(chain_id: chain_id).first + parsed = Legion::JSON.load(row[:metadata]) + expect(parsed[:actor]).to eq('system') + end + + it 'uses nil metadata when the hash is empty' do + described_class.append(chain_id: chain_id, content_type: content_type, content_hash: content_hash) + row = Legion::Data.connection[:audit_records].where(chain_id: chain_id).first + expect(row[:metadata]).to be_nil + end + + it 'keeps chains independent from each other' do + other_chain = "other-#{SecureRandom.hex(4)}" + r1 = described_class.append(chain_id: chain_id, content_type: 'a', content_hash: Digest::SHA256.hexdigest('c1')) + r2 = described_class.append(chain_id: other_chain, content_type: 'a', content_hash: Digest::SHA256.hexdigest('c2')) + expect(r1[:parent_hash]).to eq(described_class::GENESIS_HASH) + expect(r2[:parent_hash]).to eq(described_class::GENESIS_HASH) + end + end + + describe '.verify' do + it 'returns valid: true, length: 0 for an empty chain' do + result = described_class.verify(chain_id: "empty-#{SecureRandom.hex(4)}") + expect(result).to eq({ valid: true, length: 0 }) + end + + it 'returns valid: true for a correctly chained sequence' do + 3.times do |i| + described_class.append( + chain_id: chain_id, + content_type: content_type, + content_hash: Digest::SHA256.hexdigest("record #{i}") + ) + end + result = described_class.verify(chain_id: chain_id) + expect(result[:valid]).to be true + expect(result[:length]).to eq(3) + end + + it 'detects a tampered chain_hash' do + described_class.append(chain_id: chain_id, content_type: content_type, content_hash: content_hash) + described_class.append(chain_id: chain_id, content_type: content_type, + content_hash: Digest::SHA256.hexdigest('r2')) + + tampered_hash = Digest::SHA256.hexdigest("tamper-#{chain_id}") + first = Legion::Data.connection[:audit_records] + .where(chain_id: chain_id) + .order(:created_at, :id) + .first + + # PG NO UPDATE rule silently ignores Sequel updates; bypass via raw SQL. + with_audit_records_writable(Legion::Data.connection) do + Legion::Data.connection[:audit_records] + .where(id: first[:id]) + .update(chain_hash: tampered_hash) + end + + result = described_class.verify(chain_id: chain_id) + expect(result[:valid]).to be false + expect(result[:broken_at]).not_to be_nil + end + + it 'detects a tampered parent_hash' do + described_class.append(chain_id: chain_id, content_type: content_type, content_hash: content_hash) + r2 = described_class.append(chain_id: chain_id, content_type: content_type, + content_hash: Digest::SHA256.hexdigest('r2')) + + with_audit_records_writable(Legion::Data.connection) do + Legion::Data.connection[:audit_records] + .where(id: r2[:id]) + .update(parent_hash: Digest::SHA256.hexdigest("tamper-parent-#{chain_id}")) + end + + result = described_class.verify(chain_id: chain_id) + expect(result[:valid]).to be false + expect(result[:reason]).to eq(:parent_mismatch) + end + end + + describe '.walk' do + it 'returns records in chronological order' do + 3.times do |i| + described_class.append( + chain_id: chain_id, + content_type: content_type, + content_hash: Digest::SHA256.hexdigest("walk #{i}") + ) + end + records = described_class.walk(chain_id: chain_id) + expect(records.size).to eq(3) + expect(records.map { |r| r[:chain_id] }.uniq).to eq([chain_id]) + end + + it 'accepts a since: filter' do + described_class.append(chain_id: chain_id, content_type: content_type, content_hash: content_hash) + described_class.append(chain_id: chain_id, content_type: content_type, + content_hash: Digest::SHA256.hexdigest('r2')) + + # A future cutoff should exclude all records already written + future = Time.now + 3600 + records = described_class.walk(chain_id: chain_id, since: future) + expect(records).to be_empty + end + + it 'respects the limit: parameter' do + 5.times do |i| + described_class.append( + chain_id: chain_id, + content_type: content_type, + content_hash: Digest::SHA256.hexdigest("lim #{i}") + ) + end + records = described_class.walk(chain_id: chain_id, limit: 3) + expect(records.size).to eq(3) + end + + it 'returns deserialized hashes with expected keys' do + described_class.append( + chain_id: chain_id, + content_type: content_type, + content_hash: content_hash, + metadata: { source: 'spec' } + ) + record = described_class.walk(chain_id: chain_id).first + expect(record.keys).to include(:id, :chain_id, :content_type, :content_hash, + :parent_hash, :chain_hash, :signature, :metadata, :created_at) + expect(record[:metadata][:source]).to eq('spec') + end + end + + describe '.query_by_type' do + it 'returns records matching the content_type across chains' do + ctype = "spec.type.#{SecureRandom.hex(4)}" + 2.times do |i| + described_class.append( + chain_id: "chain-#{i}-#{SecureRandom.hex(4)}", + content_type: ctype, + content_hash: Digest::SHA256.hexdigest("qbt #{i}") + ) + end + results = described_class.query_by_type(content_type: ctype) + expect(results.size).to eq(2) + expect(results.map { |r| r[:content_type] }.uniq).to eq([ctype]) + end + + it 'accepts a since: filter' do + ctype = "spec.since.#{SecureRandom.hex(4)}" + described_class.append(chain_id: chain_id, content_type: ctype, content_hash: content_hash) + future = Time.now + 3600 + results = described_class.query_by_type(content_type: ctype, since: future) + expect(results).to be_empty + end + end + end + + # ------------------------------------------------------------------------- + # Model: AuditRecord (immutability guards) + # ------------------------------------------------------------------------- + describe Legion::Data::Model::AuditRecord do + before { skip 'No DB connection' unless Legion::Data.connected? } + + it 'raises on update attempt' do + Legion::Data::AuditRecord.append(chain_id: chain_id, content_type: content_type, content_hash: content_hash) + record = Legion::Data::Model::AuditRecord.first(chain_id: chain_id) + expect { record.update(content_type: 'mutated') }.to raise_error(RuntimeError, /immutable/) + end + + it 'raises on destroy attempt' do + Legion::Data::AuditRecord.append(chain_id: chain_id, content_type: content_type, content_hash: content_hash) + record = Legion::Data::Model::AuditRecord.first(chain_id: chain_id) + expect { record.destroy }.to raise_error(RuntimeError, /immutable/) + end + + it 'parses metadata via parsed_metadata' do + Legion::Data::AuditRecord.append( + chain_id: chain_id, + content_type: content_type, + content_hash: content_hash, + metadata: { key: 'value' } + ) + record = Legion::Data::Model::AuditRecord.first(chain_id: chain_id) + expect(record.parsed_metadata[:key]).to eq('value') + end + + it 'returns empty hash for nil metadata' do + Legion::Data::AuditRecord.append(chain_id: chain_id, content_type: content_type, content_hash: content_hash) + record = Legion::Data::Model::AuditRecord.first(chain_id: chain_id) + expect(record.parsed_metadata).to eq({}) + end + end +end diff --git a/spec/legion/data/connection_fallback_spec.rb b/spec/legion/data/connection_fallback_spec.rb new file mode 100644 index 0000000..fa394cc --- /dev/null +++ b/spec/legion/data/connection_fallback_spec.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'fileutils' + +RSpec.describe Legion::Data::Connection do + describe 'dev mode fallback' do + let(:test_db) { 'legionio_fallback_test.db' } + + before(:each) do + @saved_adapter = Legion::Settings[:data][:adapter] + @saved_creds = Legion::Settings[:data][:creds].dup + @saved_dev_mode = Legion::Settings[:data][:dev_mode] + @saved_dev_fallback = Legion::Settings[:data][:dev_fallback] + @saved_connected = Legion::Settings[:data][:connected] + @saved_ivar_adapter = described_class.instance_variable_get(:@adapter) + @saved_ivar_sequel = described_class.instance_variable_get(:@sequel) + @saved_ivar_fallback_active = described_class.instance_variable_get(:@fallback_active) + + described_class.instance_variable_set(:@adapter, nil) + described_class.instance_variable_set(:@sequel, nil) + described_class.instance_variable_set(:@fallback_active, false) + Legion::Settings[:data][:connected] = false + end + + after(:each) do + begin + described_class.shutdown + rescue StandardError + nil + end + described_class.instance_variable_set(:@adapter, @saved_ivar_adapter) + described_class.instance_variable_set(:@sequel, @saved_ivar_sequel) + described_class.instance_variable_set(:@fallback_active, @saved_ivar_fallback_active) + Legion::Settings[:data][:adapter] = @saved_adapter + Legion::Settings[:data][:creds] = @saved_creds + Legion::Settings[:data][:dev_mode] = @saved_dev_mode + Legion::Settings[:data][:dev_fallback] = @saved_dev_fallback + Legion::Settings[:data][:connected] = @saved_connected + FileUtils.rm_f(test_db) + end + + context 'when dev_mode is true and network DB unreachable' do + before do + Legion::Settings[:data][:adapter] = 'mysql2' + Legion::Settings[:data][:dev_mode] = true + Legion::Settings[:data][:dev_fallback] = true + Legion::Settings[:data][:creds] = { database: test_db } + allow(Sequel).to receive(:connect).and_wrap_original do |original, *args, **kwargs| + raise Sequel::DatabaseConnectionError, 'connection refused' if kwargs[:adapter] == :mysql2 + + original.call(*args, **kwargs) + end + end + + it 'falls back to SQLite' do + described_class.setup + expect(described_class.adapter).to eq(:sqlite) + expect(described_class.sequel).to be_a(Sequel::SQLite::Database) + end + + it 'reports fallback diagnostics with strict boolean liveness' do + described_class.setup + + expect(described_class.fallback_active?).to be(true) + expect(described_class.connection_info).to include( + adapter: :sqlite, + connected: true, + fallback_active: true, + configured_adapter: :mysql2, + sequel_alive: true + ) + end + + it 'disables preconnect on the initial network connection attempt' do + captured_opts = nil + allow(Sequel).to receive(:connect).and_wrap_original do |original, *args, **kwargs| + options = kwargs.empty? ? args.last : kwargs + captured_opts = options if options[:adapter] == :mysql2 + raise Sequel::DatabaseConnectionError, 'connection refused' if options[:adapter] == :mysql2 + + original.call(*args, **kwargs) + end + + described_class.setup + + expect(captured_opts[:preconnect]).to be(false) + end + end + + context 'when setup completes without fallback' do + before do + Legion::Settings[:data][:adapter] = 'sqlite' + Legion::Settings[:data][:creds] = { database: test_db } + described_class.instance_variable_set(:@fallback_active, true) + end + + it 'resets fallback diagnostics' do + described_class.setup + + expect(described_class.fallback_active?).to be(false) + expect(described_class.connection_info).to include( + adapter: :sqlite, + connected: true, + fallback_active: false, + configured_adapter: :sqlite, + sequel_alive: true + ) + end + end + + context 'when dev_mode is false and network DB unreachable' do + before do + Legion::Settings[:data][:adapter] = 'mysql2' + Legion::Settings[:data][:dev_mode] = false + Legion::Settings[:data][:creds] = { database: test_db } + allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError, 'connection refused') + end + + it 'raises the connection error' do + expect { described_class.setup }.to raise_error(Sequel::DatabaseConnectionError) + end + end + + context 'when dev_fallback is explicitly disabled' do + before do + Legion::Settings[:data][:adapter] = 'mysql2' + Legion::Settings[:data][:dev_mode] = true + Legion::Settings[:data][:dev_fallback] = false + Legion::Settings[:data][:creds] = { database: test_db } + allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError, 'connection refused') + end + + it 'raises the connection error' do + expect { described_class.setup }.to raise_error(Sequel::DatabaseConnectionError) + end + end + end +end diff --git a/spec/legion/data/connection_info_spec.rb b/spec/legion/data/connection_info_spec.rb new file mode 100644 index 0000000..13a808c --- /dev/null +++ b/spec/legion/data/connection_info_spec.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'fileutils' + +RSpec.describe Legion::Data::Connection do + let(:test_db) { 'legionio_connection_info_test.db' } + + before(:each) do + @mutated_connection = false + @saved_adapter = Legion::Settings[:data][:adapter] + @saved_creds = Legion::Settings[:data][:creds].dup + @saved_dev_mode = Legion::Settings[:data][:dev_mode] + @saved_dev_fallback = Legion::Settings[:data][:dev_fallback] + @saved_connected = Legion::Settings[:data][:connected] + @saved_ivar_adapter = described_class.instance_variable_get(:@adapter) + @saved_ivar_sequel = described_class.instance_variable_get(:@sequel) + @saved_ivar_fallback_active = described_class.instance_variable_get(:@fallback_active) + end + + after(:each) do + if @mutated_connection + begin + described_class.shutdown + rescue StandardError + nil + end + end + + described_class.instance_variable_set(:@adapter, @saved_ivar_adapter) + described_class.instance_variable_set(:@sequel, @saved_ivar_sequel) + described_class.instance_variable_set(:@fallback_active, @saved_ivar_fallback_active) + Legion::Settings[:data][:adapter] = @saved_adapter + Legion::Settings[:data][:creds] = @saved_creds + Legion::Settings[:data][:dev_mode] = @saved_dev_mode + Legion::Settings[:data][:dev_fallback] = @saved_dev_fallback + Legion::Settings[:data][:connected] = @saved_connected + FileUtils.rm_f(test_db) + end + + describe '.connection_info' do + it 'returns a hash with adapter and connection state' do + info = Legion::Data::Connection.connection_info + expect(info).to be_a(Hash) + expect(info).to have_key(:adapter) + expect(info).to have_key(:connected) + expect(info).to have_key(:fallback_active) + end + + it 'reports the current adapter' do + info = Legion::Data::Connection.connection_info + expect(%i[sqlite postgres mysql2]).to include(info[:adapter]) + end + + it 'reports consistent fallback state' do + info = Legion::Data::Connection.connection_info + expect(info[:fallback_active]).to eq(Legion::Data::Connection.fallback_active?) + end + end + + describe '.fallback_active?' do + it 'returns a boolean' do + expect(Legion::Data::Connection.fallback_active?).to be(true).or be(false) + end + + it 'returns true after a deterministic network adapter fallback' do + @mutated_connection = true + described_class.instance_variable_set(:@adapter, nil) + described_class.instance_variable_set(:@sequel, nil) + described_class.instance_variable_set(:@fallback_active, false) + Legion::Settings[:data][:adapter] = 'postgres' + Legion::Settings[:data][:dev_mode] = true + Legion::Settings[:data][:dev_fallback] = true + Legion::Settings[:data][:creds] = { database: test_db } + + allow(Sequel).to receive(:connect).and_wrap_original do |original, *args, **kwargs| + options = kwargs.empty? ? args.last : kwargs + raise Sequel::DatabaseConnectionError, 'connection failed' if options[:adapter] == :postgres + + original.call(*args, **kwargs) + end + + described_class.setup + info = described_class.connection_info + + expect(described_class.fallback_active?).to be(true) + expect(info[:adapter]).to eq(:sqlite) + expect(info[:configured_adapter]).to eq(:postgres) + expect(info[:fallback_active]).to be(true) + end + end +end diff --git a/spec/legion/data/connection_reconnect_spec.rb b/spec/legion/data/connection_reconnect_spec.rb new file mode 100644 index 0000000..d669d87 --- /dev/null +++ b/spec/legion/data/connection_reconnect_spec.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'Legion::Data::Connection.reconnect_with_fresh_creds' do + after(:each) do + Legion::Data::Connection.shutdown + end + + context 'when adapter is sqlite' do + let(:mock_sequel) { instance_double(Sequel::SQLite::Database, opts: {}) } + + it 'returns false (no-op for sqlite)' do + Legion::Data::Connection.instance_variable_set(:@sequel, mock_sequel) + Legion::Data::Connection.instance_variable_set(:@adapter, :sqlite) + expect(Legion::Data::Connection.reconnect_with_fresh_creds).to be false + end + + after do + Legion::Data::Connection.instance_variable_set(:@adapter, nil) + Legion::Data::Connection.instance_variable_set(:@sequel, nil) + end + end + + context 'when sequel is nil' do + it 'returns false' do + Legion::Data::Connection.instance_variable_set(:@sequel, nil) + expect(Legion::Data::Connection.reconnect_with_fresh_creds).to be false + end + end + + context 'with a postgres adapter (mocked)' do + let(:mock_sequel) do + instance_double(Sequel::Database, opts: { user: 'old-vault-user', password: 'old-pass' }) + end + + before do + Legion::Data::Connection.instance_variable_set(:@sequel, mock_sequel) + Legion::Data::Connection.instance_variable_set(:@adapter, :postgres) + end + + after do + Legion::Data::Connection.instance_variable_set(:@adapter, nil) + Legion::Data::Connection.instance_variable_set(:@sequel, nil) + end + + it 'updates sequel opts and reconnects with fresh creds' do + Legion::Settings[:data][:creds] = { user: 'new-vault-user', password: 'new-pass', host: '127.0.0.1', port: 5432 } + + allow(mock_sequel).to receive(:disconnect) + allow(mock_sequel).to receive(:test_connection).and_return(true) + + result = Legion::Data::Connection.reconnect_with_fresh_creds + + expect(result).to be true + expect(mock_sequel.opts[:user]).to eq('new-vault-user') + expect(mock_sequel.opts[:password]).to eq('new-pass') + expect(mock_sequel).to have_received(:disconnect) + expect(mock_sequel).to have_received(:test_connection) + end + + it 'handles :username key as fallback' do + Legion::Settings[:data][:creds] = { username: 'alt-user', password: 'alt-pass' } + + allow(mock_sequel).to receive(:disconnect) + allow(mock_sequel).to receive(:test_connection).and_return(true) + + result = Legion::Data::Connection.reconnect_with_fresh_creds + + expect(result).to be true + expect(mock_sequel.opts[:user]).to eq('alt-user') + end + + it 'returns false when creds lack user/password' do + Legion::Settings[:data][:creds] = { host: '127.0.0.1' } + + expect(Legion::Data::Connection.reconnect_with_fresh_creds).to be false + end + + it 'returns false when creds is not a hash' do + Legion::Settings[:data][:creds] = nil + + expect(Legion::Data::Connection.reconnect_with_fresh_creds).to be false + end + + it 'returns false and handles exception when test_connection fails' do + Legion::Settings[:data][:creds] = { user: 'new-user', password: 'new-pass' } + + allow(mock_sequel).to receive(:disconnect) + allow(mock_sequel).to receive(:test_connection).and_raise(Sequel::DatabaseConnectionError.new('connection refused')) + + result = Legion::Data::Connection.reconnect_with_fresh_creds + + expect(result).to be false + end + end +end diff --git a/spec/legion/data/connection_replicas_spec.rb b/spec/legion/data/connection_replicas_spec.rb new file mode 100644 index 0000000..3a9a007 --- /dev/null +++ b/spec/legion/data/connection_replicas_spec.rb @@ -0,0 +1,219 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Legion::Data::Connection do + # Save and restore all touched state around each example + before(:each) do + @saved_adapter = Legion::Settings[:data][:adapter] + @saved_replica_url = Legion::Settings[:data][:read_replica_url] + @saved_replicas = Legion::Settings[:data][:replicas] + @saved_connected = Legion::Settings[:data][:connected] + @saved_ivar_adapter = described_class.instance_variable_get(:@adapter) + @saved_ivar_sequel = described_class.instance_variable_get(:@sequel) + @saved_ivar_replicas = described_class.instance_variable_get(:@replica_servers) + + # Reset mutable state before each example + described_class.instance_variable_set(:@adapter, nil) + described_class.instance_variable_set(:@replica_servers, nil) + Legion::Settings[:data][:connected] = false + end + + after(:each) do + described_class.instance_variable_set(:@adapter, @saved_ivar_adapter) + described_class.instance_variable_set(:@sequel, @saved_ivar_sequel) + described_class.instance_variable_set(:@replica_servers, @saved_ivar_replicas) + Legion::Settings[:data][:adapter] = @saved_adapter + Legion::Settings[:data][:read_replica_url] = @saved_replica_url + Legion::Settings[:data][:replicas] = @saved_replicas + Legion::Settings[:data][:connected] = @saved_connected + end + + # Build a minimal Sequel::Database double with the methods we call. + def fake_sequel_db(**_opts) + db = instance_double(Sequel::Database) + allow(db).to receive(:extension) + allow(db).to receive(:add_servers) + allow(db).to receive(:disconnect) + allow(db).to receive(:loggers).and_return([]) + allow(db).to receive(:logger=) + allow(db).to receive(:sql_log_level=) + allow(db).to receive(:log_warn_duration=) + db + end + + describe '#connect_with_replicas' do + context 'when adapter is sqlite' do + it 'is a no-op and does not call extension' do + Legion::Settings[:data][:adapter] = 'sqlite' + Legion::Settings[:data][:read_replica_url] = 'postgres://replica/db' + Legion::Settings[:data][:replicas] = [] + + db = fake_sequel_db + described_class.instance_variable_set(:@sequel, db) + described_class.instance_variable_set(:@adapter, :sqlite) + + expect(db).not_to receive(:extension) + expect(db).not_to receive(:add_servers) + + described_class.connect_with_replicas + expect(described_class.replica_servers).to be_empty + end + end + + context 'when adapter is postgres but no replicas configured' do + it 'is a no-op when both read_replica_url and replicas are empty' do + Legion::Settings[:data][:read_replica_url] = nil + Legion::Settings[:data][:replicas] = [] + + db = fake_sequel_db + described_class.instance_variable_set(:@sequel, db) + described_class.instance_variable_set(:@adapter, :postgres) + + expect(db).not_to receive(:extension) + expect(db).not_to receive(:add_servers) + + described_class.connect_with_replicas + expect(described_class.replica_servers).to be_empty + end + + it 'is a no-op when read_replica_url is empty string and replicas is empty' do + Legion::Settings[:data][:read_replica_url] = '' + Legion::Settings[:data][:replicas] = [] + + db = fake_sequel_db + described_class.instance_variable_set(:@sequel, db) + described_class.instance_variable_set(:@adapter, :postgres) + + expect(db).not_to receive(:extension) + described_class.connect_with_replicas + expect(described_class.replica_servers).to be_empty + end + end + + context 'when adapter is postgres with a single read_replica_url' do + it 'loads server_block extension and adds :read_0 server' do + url = 'postgres://replica-host/db' + Legion::Settings[:data][:read_replica_url] = url + Legion::Settings[:data][:replicas] = [] + + db = fake_sequel_db + described_class.instance_variable_set(:@sequel, db) + described_class.instance_variable_set(:@adapter, :postgres) + + expect(db).to receive(:extension).with(:server_block) + expect(db).to receive(:add_servers).with(read_0: url) + + described_class.connect_with_replicas + expect(described_class.replica_servers).to eq([:read_0]) + end + end + + context 'when adapter is postgres with multiple replicas in the array' do + it 'adds :read_0 and :read_1 servers' do + url0 = 'postgres://replica-0/db' + url1 = 'postgres://replica-1/db' + Legion::Settings[:data][:read_replica_url] = nil + Legion::Settings[:data][:replicas] = [url0, url1] + + db = fake_sequel_db + described_class.instance_variable_set(:@sequel, db) + described_class.instance_variable_set(:@adapter, :postgres) + + expect(db).to receive(:extension).with(:server_block) + expect(db).to receive(:add_servers).with(read_0: url0) + expect(db).to receive(:add_servers).with(read_1: url1) + + described_class.connect_with_replicas + expect(described_class.replica_servers).to eq(%i[read_0 read_1]) + end + end + + context 'deduplication when read_replica_url is also in replicas array' do + it 'registers the URL only once as :read_0' do + url = 'postgres://replica/db' + Legion::Settings[:data][:read_replica_url] = url + Legion::Settings[:data][:replicas] = [url] + + db = fake_sequel_db + described_class.instance_variable_set(:@sequel, db) + described_class.instance_variable_set(:@adapter, :postgres) + + expect(db).to receive(:extension).with(:server_block) + expect(db).to receive(:add_servers).with(read_0: url).once + + described_class.connect_with_replicas + expect(described_class.replica_servers).to eq([:read_0]) + end + end + + context 'server_block extension loading' do + it 'calls @sequel.extension(:server_block) when replicas are present' do + url = 'postgres://replica/db' + Legion::Settings[:data][:read_replica_url] = url + Legion::Settings[:data][:replicas] = [] + + db = fake_sequel_db + described_class.instance_variable_set(:@sequel, db) + described_class.instance_variable_set(:@adapter, :postgres) + + expect(db).to receive(:extension).with(:server_block) + described_class.connect_with_replicas + end + end + end + + describe '#read_server' do + it 'returns :read_0 when replicas are configured' do + described_class.instance_variable_set(:@replica_servers, [:read_0]) + expect(described_class.read_server).to eq(:read_0) + end + + it 'returns :default when no replicas are configured' do + described_class.instance_variable_set(:@replica_servers, nil) + expect(described_class.read_server).to eq(:default) + end + + it 'returns :default when replica_servers is an empty array' do + described_class.instance_variable_set(:@replica_servers, []) + expect(described_class.read_server).to eq(:default) + end + end + + describe '#replica_servers' do + it 'returns empty array before any replica wiring' do + described_class.instance_variable_set(:@replica_servers, nil) + expect(described_class.replica_servers).to eq([]) + end + + it 'returns the registered server names after wiring' do + url0 = 'postgres://r0/db' + url1 = 'postgres://r1/db' + Legion::Settings[:data][:read_replica_url] = nil + Legion::Settings[:data][:replicas] = [url0, url1] + + db = fake_sequel_db + described_class.instance_variable_set(:@sequel, db) + described_class.instance_variable_set(:@adapter, :postgres) + + described_class.connect_with_replicas + expect(described_class.replica_servers).to eq(%i[read_0 read_1]) + end + end + + describe 'settings flag to disable replicas' do + it 'does not wire replicas when replicas array is empty and read_replica_url is nil' do + Legion::Settings[:data][:read_replica_url] = nil + Legion::Settings[:data][:replicas] = [] + + db = fake_sequel_db + described_class.instance_variable_set(:@sequel, db) + described_class.instance_variable_set(:@adapter, :postgres) + + described_class.connect_with_replicas + + expect(described_class.replica_servers).to be_empty + expect(described_class.read_server).to eq(:default) + end + end +end diff --git a/spec/legion/data/connection_spec.rb b/spec/legion/data/connection_spec.rb index c284ddc..b55bec2 100644 --- a/spec/legion/data/connection_spec.rb +++ b/spec/legion/data/connection_spec.rb @@ -1,4 +1,7 @@ +# frozen_string_literal: true + require 'spec_helper' +require 'tmpdir' RSpec.describe 'Legion::Data::Connection' do after(:each) do @@ -6,33 +9,28 @@ end it 'can setup' do - expect { Legion::Data::Connection.setup }.not_to raise_exception + expect { Legion::Data::Connection.setup }.not_to raise_error # expect(Legion::Data::Connection.adapter).to eq :mysql2 expect(Legion::Settings[:data][:connected]).to eq true end it 'can shutdown' do - expect { Legion::Data::Connection.shutdown }.not_to raise_exception + expect { Legion::Data::Connection.shutdown }.not_to raise_error expect(Legion::Settings[:data][:connected]).to eq false end - it 'has default_creds' do - expect(Legion::Data::Connection.default_creds).to be_a Hash - expect(Legion::Data::Connection.default_creds[:host]).to eq '127.0.0.1' - expect(Legion::Data::Connection.default_creds[:port]).to eq 3306 - expect(Legion::Data::Connection.default_creds[:username]).to eq 'legion' - expect(Legion::Data::Connection.default_creds[:password]).to eq 'legion' - expect(Legion::Data::Connection.default_creds[:database]).to eq 'legion' - expect(Legion::Data::Connection.default_creds[:preconnect]).to eq nil - expect(Legion::Data::Connection.default_creds[:max_connections]).to eq 4 + it 'has creds_builder' do + creds = Legion::Data::Connection.creds_builder + expect(creds).to be_a Hash + expect(creds[:database]).to eq 'legionio.db' end it 'can setup with logger' do - Legion::Settings[:data][:connection][:log] = true - Legion::Settings[:data][:connection][:sql_log_level] = 'debug' - Legion::Settings[:data][:connection][:log_warn_duration] = 42 + Legion::Settings[:data][:log] = true + Legion::Settings[:data][:sql_log_level] = 'debug' + Legion::Settings[:data][:log_warn_duration] = 42 Legion::Data::Connection.setup - expect(Legion::Data::Connection.sequel.sql_log_level).to eq 'debug' + expect(Legion::Data::Connection.sequel.sql_log_level).to eq :debug expect(Legion::Data::Connection.sequel.log_warn_duration).to eq 42 end @@ -40,18 +38,60 @@ expect(Legion::Data::Connection.creds_builder).to be_a Hash end - it 'using the Legion::Logging logger' do + it 'using a tagged SlowQueryLogger' do Legion::Data::Connection.setup expect(Legion::Data::Connection.sequel.loggers).to be_a Array expect(Legion::Data::Connection.sequel.loggers.count).to be > 0 - expect(Legion::Data::Connection.sequel.loggers).to include Legion::Logging + expect(Legion::Data::Connection.sequel.loggers.first).to be_a Legion::Data::Connection::SlowQueryLogger + expect(Legion::Data::Connection.sequel.loggers.first.tagged.segments).to eq(%w[data connection]) end it 'uses other things' do Legion::Data::Connection.setup expect(Legion::Settings[:data][:connected]).to eq true expect(Legion::Data::Connection.sequel.log_warn_duration) - .to eq Legion::Settings[:data][:connection][:log_warn_duration] - expect(Legion::Data::Connection.sequel.sql_log_level).to eq Legion::Settings[:data][:connection][:sql_log_level] + .to eq Legion::Settings[:data][:log_warn_duration] + expect(Legion::Data::Connection.sequel.sql_log_level).to eq Legion::Settings[:data][:sql_log_level].to_sym + end + + describe 'connection_validation default' do + it 'defaults to false — the validator pings SELECT NULL on every checkout/checkin and kills throughput' do + expect(Legion::Data::Settings.default[:connection_validation]).to eq(false) + end + end + + describe 'connection_validation_timeout default' do + it 'defaults to -1 so every checkout validates liveness when validation is enabled' do + expect(Legion::Data::Settings.default[:connection_validation_timeout]).to eq(-1) + end + end + + describe 'preconnect default' do + it 'defaults to false to avoid background thread noise on failed network connects' do + expect(Legion::Data::Settings.default[:preconnect]).to eq(false) + end + end + + describe Legion::Data::Connection::QueryFileLogger do + around do |example| + Dir.mktmpdir('legion-data-query-log') do |dir| + @query_log_path = File.join(dir, 'query.log') + example.run + end + end + + it 'ignores debug writes after close without warning' do + logger = described_class.new(@query_log_path) + logger.close + + expect(logger).not_to receive(:handle_exception) + expect { logger.debug('SELECT 1') }.not_to raise_error + end + + it 'allows repeated close calls' do + logger = described_class.new(@query_log_path) + + expect { 2.times { logger.close } }.not_to raise_error + end end end diff --git a/spec/legion/data/connection_tls_spec.rb b/spec/legion/data/connection_tls_spec.rb new file mode 100644 index 0000000..d69d096 --- /dev/null +++ b/spec/legion/data/connection_tls_spec.rb @@ -0,0 +1,99 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'Legion::Data::Connection TLS' do + before do + stub_const('Legion::Crypt::TLS', Module.new) + end + + describe '.merge_tls_creds' do + context 'with postgres adapter and TLS enabled' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ tls: { enabled: true } }) + end + + it 'adds sslmode and sslrootcert' do + allow(Legion::Crypt::TLS).to receive(:resolve).and_return( + { enabled: true, verify: :peer, ca: '/ca.crt', cert: nil, key: nil, auto_detected: false } + ) + creds = {} + result = Legion::Data::Connection.merge_tls_creds(creds, adapter: :postgres, port: 5432) + expect(result[:sslmode]).to eq 'verify-full' + expect(result[:sslrootcert]).to eq '/ca.crt' + end + + it 'uses sslmode require for verify none' do + allow(Legion::Crypt::TLS).to receive(:resolve).and_return( + { enabled: true, verify: :none, ca: nil, cert: nil, key: nil, auto_detected: false } + ) + creds = {} + result = Legion::Data::Connection.merge_tls_creds(creds, adapter: :postgres, port: 5432) + expect(result[:sslmode]).to eq 'require' + end + + it 'includes sslcert and sslkey for mutual TLS' do + allow(Legion::Crypt::TLS).to receive(:resolve).and_return( + { enabled: true, verify: :mutual, ca: '/ca.crt', cert: '/c.crt', key: '/c.key', auto_detected: false } + ) + creds = {} + result = Legion::Data::Connection.merge_tls_creds(creds, adapter: :postgres, port: 5432) + expect(result[:sslcert]).to eq '/c.crt' + expect(result[:sslkey]).to eq '/c.key' + end + end + + context 'with mysql2 adapter and TLS enabled' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ tls: { enabled: true } }) + end + + it 'adds ssl_mode and sslca' do + allow(Legion::Crypt::TLS).to receive(:resolve).and_return( + { enabled: true, verify: :peer, ca: '/ca.crt', cert: nil, key: nil, auto_detected: false } + ) + creds = {} + result = Legion::Data::Connection.merge_tls_creds(creds, adapter: :mysql2, port: 3306) + expect(result[:ssl_mode]).to eq 'verify_identity' + expect(result[:sslca]).to eq '/ca.crt' + end + + it 'uses ssl_mode required for verify none' do + allow(Legion::Crypt::TLS).to receive(:resolve).and_return( + { enabled: true, verify: :none, ca: nil, cert: nil, key: nil, auto_detected: false } + ) + creds = {} + result = Legion::Data::Connection.merge_tls_creds(creds, adapter: :mysql2, port: 3306) + expect(result[:ssl_mode]).to eq 'required' + end + end + + context 'when TLS is disabled' do + it 'returns creds unchanged' do + allow(Legion::Crypt::TLS).to receive(:resolve).and_return( + { enabled: false, verify: :peer, ca: nil, cert: nil, key: nil, auto_detected: false } + ) + creds = { host: 'db.example.com' } + result = Legion::Data::Connection.merge_tls_creds(creds, adapter: :postgres, port: 5432) + expect(result).to eq({ host: 'db.example.com' }) + end + end + + context 'when sqlite adapter' do + it 'skips TLS entirely' do + creds = { database: 'test.db' } + result = Legion::Data::Connection.merge_tls_creds(creds, adapter: :sqlite, port: nil) + expect(result).to eq({ database: 'test.db' }) + end + end + + context 'when Legion::Crypt::TLS is not defined' do + it 'returns creds unchanged' do + hide_const('Legion::Crypt::TLS') + creds = { host: 'db.example.com' } + result = Legion::Data::Connection.merge_tls_creds(creds, adapter: :postgres, port: 5432) + expect(result).to eq({ host: 'db.example.com' }) + end + end + end +end diff --git a/spec/legion/data/encryption/cipher_spec.rb b/spec/legion/data/encryption/cipher_spec.rb new file mode 100644 index 0000000..e597bdd --- /dev/null +++ b/spec/legion/data/encryption/cipher_spec.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/encryption/cipher' + +RSpec.describe Legion::Data::Encryption::Cipher do + let(:key) { OpenSSL::Random.random_bytes(32) } + let(:plaintext) { 'sensitive data here' } + let(:aad) { 'tasks:1:payload' } + + describe '.encrypt / .decrypt' do + it 'round-trips plaintext' do + blob = described_class.encrypt(plaintext, key: key, aad: aad) + result = described_class.decrypt(blob, key: key, aad: aad) + expect(result).to eq(plaintext) + end + + it 'produces different ciphertext each time (random IV)' do + blob1 = described_class.encrypt(plaintext, key: key) + blob2 = described_class.encrypt(plaintext, key: key) + expect(blob1).not_to eq(blob2) + end + + it 'fails with wrong key' do + blob = described_class.encrypt(plaintext, key: key, aad: aad) + wrong_key = OpenSSL::Random.random_bytes(32) + expect { described_class.decrypt(blob, key: wrong_key, aad: aad) }.to raise_error(OpenSSL::Cipher::CipherError) + end + + it 'fails with wrong AAD' do + blob = described_class.encrypt(plaintext, key: key, aad: aad) + expect { described_class.decrypt(blob, key: key, aad: 'wrong') }.to raise_error(OpenSSL::Cipher::CipherError) + end + + it 'raises on truncated data' do + expect { described_class.decrypt('short', key: key) }.to raise_error(ArgumentError, /too short/) + end + end +end diff --git a/spec/legion/data/encryption/key_provider_spec.rb b/spec/legion/data/encryption/key_provider_spec.rb new file mode 100644 index 0000000..f030dfa --- /dev/null +++ b/spec/legion/data/encryption/key_provider_spec.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/encryption/key_provider' + +RSpec.describe Legion::Data::Encryption::KeyProvider do + let(:provider) { described_class.new } + + describe '#key_for' do + it 'returns 32-byte key for default' do + key = provider.key_for + expect(key.bytesize).to eq(32) + end + + it 'caches keys' do + key1 = provider.key_for + key2 = provider.key_for + expect(key1).to equal(key2) + end + + it 'returns different cache entries for different tenants' do + key1 = provider.key_for(tenant_id: nil) + key2 = provider.key_for(tenant_id: 'tenant-a') + expect(key1).not_to eq(key2) if defined?(Legion::Crypt::PartitionKeys) + end + end + + describe '#clear_cache!' do + it 'empties the key cache' do + provider.key_for + provider.clear_cache! + expect(provider.instance_variable_get(:@key_cache)).to be_empty + end + end +end diff --git a/spec/legion/data/encryption/sequel_plugin_spec.rb b/spec/legion/data/encryption/sequel_plugin_spec.rb new file mode 100644 index 0000000..a046234 --- /dev/null +++ b/spec/legion/data/encryption/sequel_plugin_spec.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/encryption/sequel_plugin' + +RSpec.describe Legion::Data::Encryption::SequelPlugin do + describe 'ClassMethods' do + let(:klass) do + Class.new do + extend Legion::Data::Encryption::SequelPlugin::ClassMethods + end + end + + it 'tracks encrypted columns' do + expect(klass.encrypted_columns).to be_a(Hash) + end + + it 'provides key provider' do + expect(klass.encryption_key_provider).to be_a(Legion::Data::Encryption::KeyProvider) + end + end + + describe 'integration' do + let(:db) do + Sequel.sqlite.tap do |database| + database.create_table(:encrypted_records) do + primary_key :id + String :tenant_id + column :secret, 'BLOB' + column :tenant_secret, 'BLOB' + end + end + end + + let(:model_class) do + Class.new(Sequel::Model(db[:encrypted_records])) do + plugin Legion::Data::Encryption::SequelPlugin + encrypted_column :secret + encrypted_column :tenant_secret, key_scope: :tenant + end + end + + after do + db.disconnect + end + + it 'decrypts a newly-created persisted row' do + record = model_class.create(secret: 'hello') + + expect(model_class[record.id].secret).to eq('hello') + end + + it 're-encrypts newly-created rows with their persisted primary key' do + record = model_class.create(secret: 'hello') + blob = db[:encrypted_records].where(id: record.id).get(:secret) + key = model_class.encryption_key_provider.key_for + + expect( + Legion::Data::Encryption::Cipher.decrypt( + blob, + key: key, + aad: Legion::Data::Encryption::SequelPlugin.aad_for( + table_name: :encrypted_records, + primary_key: record.id, + column: :secret + ) + ) + ).to eq('hello') + + expect do + Legion::Data::Encryption::Cipher.decrypt( + blob, + key: key, + aad: Legion::Data::Encryption::SequelPlugin.aad_for( + table_name: :encrypted_records, + primary_key: 0, + column: :secret + ) + ) + end.to raise_error(OpenSSL::Cipher::CipherError) + end + + it 'still reads rows encrypted with the legacy pre-persist AAD' do + key = model_class.encryption_key_provider.key_for + blob = Legion::Data::Encryption::Cipher.encrypt( + 'hello', + key: key, + aad: Legion::Data::Encryption::SequelPlugin.aad_for( + table_name: :encrypted_records, + primary_key: 0, + column: :secret + ) + ) + id = db[:encrypted_records].insert(secret: Sequel.blob(blob)) + + expect(model_class[id].secret).to eq('hello') + end + + it 'decrypts updates on already-persisted rows' do + record = model_class.create(secret: 'hello') + + record.update(secret: 'world') + + expect(model_class[record.id].secret).to eq('world') + end + + it 'preserves nil encrypted columns' do + record = model_class.create(secret: nil, tenant_secret: nil) + reloaded = model_class[record.id] + + expect(reloaded.secret).to be_nil + expect(reloaded.tenant_secret).to be_nil + end + + it 'decrypts tenant-scoped columns after persistence' do + provider = instance_double(Legion::Data::Encryption::KeyProvider) + allow(provider).to receive(:key_for) do |tenant_id: nil| + OpenSSL::Digest.digest('SHA256', "tenant:#{tenant_id}") + end + model_class.instance_variable_set(:@encryption_key_provider, provider) + + record = model_class.create(tenant_id: 'tenant-a', tenant_secret: 'hello') + reloaded = model_class[record.id] + + expect(reloaded.tenant_secret).to eq('hello') + expect(provider).to have_received(:key_for).with(tenant_id: 'tenant-a').at_least(:once) + end + end +end diff --git a/spec/legion/data/event_store/projection_spec.rb b/spec/legion/data/event_store/projection_spec.rb new file mode 100644 index 0000000..9eb9a19 --- /dev/null +++ b/spec/legion/data/event_store/projection_spec.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/event_store' +require 'legion/data/event_store/projection' + +RSpec.describe Legion::Data::EventStore::Projection do + describe '#apply' do + it 'raises NotImplementedError' do + expect { described_class.new.apply({}) }.to raise_error(NotImplementedError) + end + end +end + +RSpec.describe Legion::Data::EventStore::ConsentState do + let(:projection) { described_class.new } + + it 'tracks granted consents' do + projection.apply({ type: 'consent.granted', data: { scope: 'llm', tier: 'full' } }) + expect(projection.state['llm']).to eq('full') + end + + it 'removes revoked consents' do + projection.apply({ type: 'consent.granted', data: { scope: 'llm', tier: 'full' } }) + projection.apply({ type: 'consent.revoked', data: { scope: 'llm' } }) + expect(projection.state).not_to have_key('llm') + end + + it 'updates modified consents' do + projection.apply({ type: 'consent.granted', data: { scope: 'llm', tier: 'full' } }) + projection.apply({ type: 'consent.modified', data: { scope: 'llm', tier: 'limited' } }) + expect(projection.state['llm']).to eq('limited') + end +end + +RSpec.describe Legion::Data::EventStore::GovernanceTimeline do + let(:projection) { described_class.new } + + it 'appends events to timeline' do + projection.apply({ type: 'extinction.triggered', stream: 'sys', created_at: Time.now, data: {} }) + expect(projection.state.size).to eq(1) + end +end diff --git a/spec/legion/data/event_store_spec.rb b/spec/legion/data/event_store_spec.rb new file mode 100644 index 0000000..f7749dc --- /dev/null +++ b/spec/legion/data/event_store_spec.rb @@ -0,0 +1,137 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/event_store' + +RSpec.describe Legion::Data::EventStore do + let(:db) do + Sequel.sqlite.tap do |database| + database.create_table(:governance_events) do + primary_key :id + String :stream_id, null: false + String :event_type, null: false + Integer :sequence_number, null: false + column :data_json, :text + column :metadata_json, :text + String :event_hash, size: 64 + String :previous_hash, size: 64 + DateTime :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP + end + end + end + + after do + db.disconnect if defined?(db) && db + end + + describe 'GOVERNANCE_EVENT_TYPES' do + it 'includes consent and extinction events' do + expect(described_class::GOVERNANCE_EVENT_TYPES).to include('consent.granted', 'extinction.triggered') + end + end + + describe '.append' do + it 'returns error when db unavailable' do + allow(described_class).to receive(:db_ready?).and_return(false) + result = described_class.append(stream: 'test', type: 'consent.granted') + expect(result[:error]).to include('db unavailable') + end + end + + describe '.read_stream' do + it 'returns empty array when db unavailable' do + allow(described_class).to receive(:db_ready?).and_return(false) + expect(described_class.read_stream('test')).to eq([]) + end + end + + describe '.verify_chain' do + it 'returns invalid when db unavailable' do + allow(described_class).to receive(:db_ready?).and_return(false) + result = described_class.verify_chain('test') + expect(result[:valid]).to be false + end + end + + context 'with a live database' do + before do + allow(Legion::Data).to receive(:connection).and_return(db) + allow(described_class).to receive(:db_ready?).and_return(true) + end + + it 'round-trips data and metadata through append and read_stream' do + described_class.append( + stream: 'stream-1', + type: 'consent.granted', + data: { granted: true }, + metadata: { request_id: 'req-1', actor: 'worker-1' } + ) + + events = described_class.read_stream('stream-1') + + expect(events.size).to eq(1) + expect(events.first[:data]).to eq({ granted: true }) + expect(events.first[:metadata]).to eq({ request_id: 'req-1', actor: 'worker-1' }) + end + + it 'verifies a multi-event chain when metadata is unchanged' do + described_class.append( + stream: 'stream-2', + type: 'consent.granted', + data: { step: 1 }, + metadata: { request_id: 'req-1' } + ) + described_class.append( + stream: 'stream-2', + type: 'consent.modified', + data: { step: 2 }, + metadata: { request_id: 'req-2' } + ) + + result = described_class.verify_chain('stream-2') + + expect(result).to eq(valid: true, length: 2) + end + + it 'detects metadata tampering for newly-written rows' do + described_class.append( + stream: 'stream-3', + type: 'consent.granted', + data: { granted: true }, + metadata: { request_id: 'req-1' } + ) + + db[:governance_events] + .where(stream_id: 'stream-3', sequence_number: 1) + .update(metadata_json: Legion::JSON.dump(request_id: 'tampered')) + + result = described_class.verify_chain('stream-3') + + expect(result).to eq(valid: false, broken_at: 1) + end + + it 'continues to verify legacy rows hashed without metadata_json' do + stream = 'legacy-stream' + type = 'consent.granted' + data_json = Legion::JSON.dump(granted: true) + metadata_json = Legion::JSON.dump(request_id: 'req-1') + previous_hash = '0' * 64 + legacy_hash = Digest::SHA256.hexdigest("#{stream}:1:#{type}:#{data_json}:#{previous_hash}") + + db[:governance_events].insert( + stream_id: stream, + event_type: type, + sequence_number: 1, + data_json: data_json, + metadata_json: metadata_json, + event_hash: legacy_hash, + previous_hash: previous_hash, + created_at: Time.now + ) + + result = described_class.verify_chain(stream) + + expect(result).to eq(valid: true, length: 1, legacy_hashes: 1) + end + end +end diff --git a/spec/legion/data/extract/handlers/builtin_spec.rb b/spec/legion/data/extract/handlers/builtin_spec.rb new file mode 100644 index 0000000..53adb68 --- /dev/null +++ b/spec/legion/data/extract/handlers/builtin_spec.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +require 'legion/data/extract' +require 'legion/data/extract/handlers/text' +require 'legion/data/extract/handlers/markdown' +require 'legion/data/extract/handlers/csv' +require 'legion/data/extract/handlers/json' +require 'legion/data/extract/handlers/jsonl' +require 'tempfile' + +RSpec.describe 'Built-in Extract Handlers' do + describe Legion::Data::Extract::Handlers::Text do + it 'extracts text from a file' do + f = Tempfile.new(['test', '.txt']) + f.write('hello world') + f.flush + result = described_class.extract(f.path) + expect(result[:text]).to eq('hello world') + expect(result[:metadata][:bytes]).to eq(11) + ensure + f&.close! + end + + it 'extracts from IO' do + io = StringIO.new('from io') + result = described_class.extract(io) + expect(result[:text]).to eq('from io') + end + end + + describe Legion::Data::Extract::Handlers::Markdown do + it 'strips YAML frontmatter' do + f = Tempfile.new(['test', '.md']) + f.write("---\ntitle: Test\n---\n# Hello\nWorld") + f.flush + result = described_class.extract(f.path) + expect(result[:text]).to eq("# Hello\nWorld") + expect(result[:metadata][:has_frontmatter]).to be true + ensure + f&.close! + end + + it 'passes through markdown without frontmatter' do + f = Tempfile.new(['test', '.md']) + f.write('# Just Markdown') + f.flush + result = described_class.extract(f.path) + expect(result[:text]).to eq('# Just Markdown') + ensure + f&.close! + end + end + + describe Legion::Data::Extract::Handlers::Csv do + it 'extracts CSV as key-value text' do + f = Tempfile.new(['test', '.csv']) + f.write("name,age\nAlice,30\nBob,25") + f.flush + result = described_class.extract(f.path) + expect(result[:text]).to include('name: Alice') + expect(result[:metadata][:rows]).to eq(2) + expect(result[:metadata][:columns]).to eq(2) + ensure + f&.close! + end + end + + describe Legion::Data::Extract::Handlers::Json do + it 'pretty-prints JSON' do + f = Tempfile.new(['test', '.json']) + f.write('{"key":"value"}') + f.flush + result = described_class.extract(f.path) + expect(result[:text]).to include('"key"') + expect(result[:metadata][:keys]).to eq(['key']) + ensure + f&.close! + end + end + + describe Legion::Data::Extract::Handlers::Jsonl do + it 'extracts JSONL lines' do + f = Tempfile.new(['test', '.jsonl']) + f.write("{\"a\":1}\n{\"b\":2}") + f.flush + result = described_class.extract(f.path) + expect(result[:text]).to include('"a"') + expect(result[:metadata][:lines]).to eq(2) + ensure + f&.close! + end + end +end diff --git a/spec/legion/data/extract/handlers/optional_spec.rb b/spec/legion/data/extract/handlers/optional_spec.rb new file mode 100644 index 0000000..39563cb --- /dev/null +++ b/spec/legion/data/extract/handlers/optional_spec.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +require 'legion/data/extract' +require 'legion/data/extract/handlers/pdf' +require 'legion/data/extract/handlers/docx' +require 'legion/data/extract/handlers/pptx' +require 'legion/data/extract/handlers/xlsx' +require 'legion/data/extract/handlers/html' +require 'tempfile' + +RSpec.describe 'Optional Extract Handlers' do + describe Legion::Data::Extract::Handlers::Pdf do + it 'is registered for :pdf type' do + expect(Legion::Data::Extract::Handlers::Base.for_type(:pdf)).to eq(described_class) + end + + it 'declares pdf-reader gem dependency' do + expect(described_class.gem_name).to eq('pdf-reader') + end + end + + describe Legion::Data::Extract::Handlers::Docx do + it 'is registered for :docx type' do + expect(Legion::Data::Extract::Handlers::Base.for_type(:docx)).to eq(described_class) + end + + it 'declares docx gem dependency' do + expect(described_class.gem_name).to eq('docx') + end + end + + describe Legion::Data::Extract::Handlers::Pptx do + it 'is registered for :pptx type' do + expect(Legion::Data::Extract::Handlers::Base.for_type(:pptx)).to eq(described_class) + end + + it 'declares rubyzip gem dependency' do + expect(described_class.gem_name).to eq('rubyzip') + end + end + + describe Legion::Data::Extract::Handlers::Xlsx do + it 'is registered for :xlsx type' do + expect(Legion::Data::Extract::Handlers::Base.for_type(:xlsx)).to eq(described_class) + end + + it 'declares rubyXL gem dependency' do + expect(described_class.gem_name).to eq('rubyXL') + end + end + + describe Legion::Data::Extract::Handlers::Html do + it 'is registered for :html type' do + expect(Legion::Data::Extract::Handlers::Base.for_type(:html)).to eq(described_class) + end + + it 'declares nokogiri gem dependency' do + expect(described_class.gem_name).to eq('nokogiri') + end + + context 'when nokogiri is available' do + it 'extracts text from HTML string' do + f = Tempfile.new(['test', '.html']) + f.write('Test

Hello World

') + f.flush + result = described_class.extract(f.path) + if result[:text] + expect(result[:text]).to include('Hello World') + expect(result[:text]).not_to include('var x=1') + expect(result[:metadata][:title]).to eq('Test') + else + expect(result[:error]).to eq(:gem_not_installed) + end + ensure + f&.close! + end + end + end +end diff --git a/spec/legion/data/extract/handlers/vtt_spec.rb b/spec/legion/data/extract/handlers/vtt_spec.rb new file mode 100644 index 0000000..8ae0d6b --- /dev/null +++ b/spec/legion/data/extract/handlers/vtt_spec.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/extract/handlers/vtt' + +RSpec.describe Legion::Data::Extract::Handlers::Vtt do + describe '.extract' do + let(:vtt_content) do + <<~VTT + WEBVTT + + 00:00:01.000 --> 00:00:05.000 + Hello everyone, let's get started. + + 00:00:05.500 --> 00:00:10.000 + Thanks Alice. I have the Q3 numbers ready. + + 00:00:10.500 --> 00:00:15.000 + Great, please share them with the group. + VTT + end + + it 'extracts text from VTT content' do + result = described_class.extract(vtt_content) + expect(result[:text]).to include('Hello everyone') + expect(result[:text]).to include('Q3 numbers') + end + + it 'preserves speaker attribution by default' do + result = described_class.extract(vtt_content) + expect(result[:text]).to include('Alice:') + expect(result[:text]).to include('Bob:') + end + + it 'strips speaker tags when preserve_speakers is false' do + result = described_class.extract(vtt_content, preserve_speakers: false) + expect(result[:text]).not_to include('Alice:') + expect(result[:text]).to include('Hello everyone') + end + + it 'strips WebVTT timestamps from output' do + result = described_class.extract(vtt_content) + expect(result[:text]).not_to match(/\d{2}:\d{2}:\d{2}.\d{3} -->/) + end + + it 'handles input via file path' do + require 'tempfile' + f = Tempfile.new(['test', '.vtt']) + f.write(vtt_content) + f.close + result = described_class.extract(f.path) + expect(result[:text]).to include('Hello everyone') + f.unlink + end + + it 'returns error hash on failure' do + result = described_class.extract('/nonexistent/path.vtt') + expect(result[:text]).to be_nil + expect(result[:error]).to be_a(String) + end + end + + describe '.type' do + it 'returns :vtt' do + expect(described_class.type).to eq(:vtt) + end + end + + describe '.extensions' do + it 'includes .vtt' do + expect(described_class.extensions).to include('.vtt') + end + end +end diff --git a/spec/legion/data/extract/type_detector_spec.rb b/spec/legion/data/extract/type_detector_spec.rb new file mode 100644 index 0000000..a533d6d --- /dev/null +++ b/spec/legion/data/extract/type_detector_spec.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +require 'legion/data/extract/type_detector' + +RSpec.describe Legion::Data::Extract::TypeDetector do + describe '.detect_from_path' do + it 'detects PDF' do + expect(described_class.detect_from_path('/tmp/doc.pdf')).to eq(:pdf) + end + + it 'detects Markdown' do + expect(described_class.detect_from_path('/tmp/readme.md')).to eq(:markdown) + end + + it 'detects HTML variants' do + expect(described_class.detect_from_path('/tmp/page.htm')).to eq(:html) + expect(described_class.detect_from_path('/tmp/page.html')).to eq(:html) + end + + it 'returns nil for unknown extensions' do + expect(described_class.detect_from_path('/tmp/file.xyz')).to be_nil + end + + it 'is case insensitive' do + expect(described_class.detect_from_path('/tmp/FILE.PDF')).to eq(:pdf) + end + end +end diff --git a/spec/legion/data/extract_spec.rb b/spec/legion/data/extract_spec.rb new file mode 100644 index 0000000..15f839d --- /dev/null +++ b/spec/legion/data/extract_spec.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/extract' +require 'legion/data/extract/handlers/text' +require 'legion/data/extract/handlers/markdown' +require 'legion/data/extract/handlers/csv' +require 'legion/data/extract/handlers/json' +require 'legion/data/extract/handlers/jsonl' +require 'tempfile' + +RSpec.describe Legion::Data::Extract do + describe '.extract' do + context 'with unknown type' do + it 'returns error' do + result = described_class.extract('test string', type: :auto) + expect(result[:success]).to be false + expect(result[:error]).to eq(:unknown_type) + expect(result[:extract_id]).to match(/\A[0-9a-f-]{36}\z/) + expect(result[:step_timings].map { |step| step[:name] }).to include('detect_type') + end + end + + context 'with explicit unknown type' do + it 'returns no_handler error' do + result = described_class.extract('test', type: :foobar) + expect(result[:success]).to be false + expect(result[:error]).to eq(:no_handler) + end + end + end + + describe '.supported_types' do + it 'returns an array of symbols' do + types = described_class.supported_types + expect(types).to be_an(Array) + types.each { |t| expect(t).to be_a(Symbol) } + end + end + + describe '.can_extract?' do + it 'returns false for unregistered types' do + expect(described_class.can_extract?(:foobar)).to be false + end + end + + describe '.register_handler' do + it 'registers a custom handler' do + custom = Class.new(Legion::Data::Extract::Handlers::Base) do + def self.type = :custom_test + def self.extract(source) = { text: source.to_s, metadata: {} } + end + described_class.register_handler(:custom_test, custom) + expect(described_class.can_extract?(:custom_test)).to be true + end + end + + describe '.extract with builtin handlers' do + it 'extracts a text file by path' do + f = Tempfile.new(['test', '.txt']) + f.write('integration test') + f.flush + result = described_class.extract(f.path) + expect(result[:success]).to be true + expect(result[:text]).to eq('integration test') + expect(result[:type]).to eq(:text) + expect(result[:step_timings].map { |step| step[:name] }).to include( + 'detect_type', 'resolve_handler', 'check_availability', 'handler_extract' + ) + ensure + f&.close! + end + + # it 'persists per-step timing metadata when the timing table is available' do + # f = Tempfile.new(['test', '.txt']) + # f.write('timed extraction') + # f.flush + # result = described_class.extract(f.path) + # rows = Legion::Data.connection[:extract_step_timings].where(extract_id: result[:extract_id]).all + # expect(rows.map { |row| row[:name] }).to include('handler_extract') + # expect(rows.all? { |row| row[:status] == 'success' }).to be true + # ensure + # f&.close! + # end + + it 'extracts with explicit type override' do + f = Tempfile.new(['test', '.unknown']) + f.write('forced text') + f.flush + result = described_class.extract(f.path, type: :text) + expect(result[:success]).to be true + expect(result[:text]).to eq('forced text') + ensure + f&.close! + end + end +end diff --git a/spec/legion/data/helper_spec.rb b/spec/legion/data/helper_spec.rb new file mode 100644 index 0000000..c7a7cbf --- /dev/null +++ b/spec/legion/data/helper_spec.rb @@ -0,0 +1,253 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Legion::Data::Helper do + describe '#data_connected?' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'returns true when data is connected' do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: true }) + expect(instance.data_connected?).to be true + end + + it 'returns false when data is not connected' do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: false }) + expect(instance.data_connected?).to be false + end + end + + describe '#data_connection' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'delegates to Legion::Data::Connection.sequel' do + expect(instance.data_connection).to eq(Legion::Data::Connection.sequel) + end + end + + describe '#data_path' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + + def full_path + '/opt/legion/extensions/lex-test' + end + end + end + let(:instance) { test_class.new } + + it 'returns the data subdirectory path' do + expect(instance.data_path).to eq('/opt/legion/extensions/lex-test/data') + end + + it 'memoizes the result' do + first = instance.data_path + expect(instance.data_path).to equal(first) + end + end + + describe '#local_data_connected?' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'delegates to Legion::Data::Local.connected?' do + allow(Legion::Data::Local).to receive(:connected?).and_return(true) + expect(instance.local_data_connected?).to be true + end + end + + describe '#local_data_connection' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'delegates to Legion::Data::Local.connection' do + conn = double('local_connection') + allow(Legion::Data::Local).to receive(:connection).and_return(conn) + expect(instance.local_data_connection).to eq(conn) + end + end + + describe '#local_data_model' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'delegates to Legion::Data::Local.model with table name' do + model = double('model') + allow(Legion::Data::Local).to receive(:model).with(:tasks).and_return(model) + expect(instance.local_data_model(:tasks)).to eq(model) + end + end + + describe '#data_adapter' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'delegates to Legion::Data::Connection.adapter' do + allow(Legion::Data::Connection).to receive(:adapter).and_return(:sqlite) + expect(instance.data_adapter).to eq(:sqlite) + end + + it 'returns :unknown when an error is raised' do + allow(Legion::Data::Connection).to receive(:adapter).and_raise(StandardError) + expect(instance.data_adapter).to eq(:unknown) + end + end + + describe '#data_pool_stats' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'returns {} when not connected' do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: false }) + expect(instance.data_pool_stats).to eq({}) + end + + it 'delegates to Legion::Data::Connection.pool_stats when connected' do + stats = { size: 5, available: 3, in_use: 2 } + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: true }) + allow(Legion::Data::Connection).to receive(:pool_stats).and_return(stats) + expect(instance.data_pool_stats).to eq(stats) + end + + it 'returns {} when an error is raised' do + allow(Legion::Settings).to receive(:[]).and_return({}) + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: true }) + allow(Legion::Data::Connection).to receive(:pool_stats).and_raise(StandardError) + expect(instance.data_pool_stats).to eq({}) + end + end + + describe '#data_stats' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'returns {} when not connected' do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: false }) + expect(instance.data_stats).to eq({}) + end + + it 'delegates to Legion::Data.stats when connected' do + stats = { shared: { adapter: 'sqlite' }, local: {} } + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: true }) + allow(Legion::Data).to receive(:stats).and_return(stats) + expect(instance.data_stats).to eq(stats) + end + + it 'returns {} when an error is raised' do + allow(Legion::Settings).to receive(:[]).and_return({}) + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: true }) + allow(Legion::Data).to receive(:stats).and_raise(StandardError) + expect(instance.data_stats).to eq({}) + end + end + + describe '#local_data_stats' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'returns {} when local is not connected' do + allow(Legion::Data::Local).to receive(:connected?).and_return(false) + expect(instance.local_data_stats).to eq({}) + end + + it 'delegates to Legion::Data::Local.stats when connected' do + stats = { tables: 3, size_bytes: 4096 } + allow(Legion::Data::Local).to receive(:connected?).and_return(true) + allow(Legion::Data::Local).to receive(:stats).and_return(stats) + expect(instance.local_data_stats).to eq(stats) + end + + it 'returns {} when an error is raised' do + allow(Legion::Data::Local).to receive(:connected?).and_return(true) + allow(Legion::Data::Local).to receive(:stats).and_raise(StandardError) + expect(instance.local_data_stats).to eq({}) + end + end + + describe '#data_can_read?' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'delegates to Legion::Data.can_read?' do + allow(Legion::Data).to receive(:can_read?).with(:tasks).and_return(true) + expect(instance.data_can_read?(:tasks)).to be true + end + + it 'returns false when Legion::Data.can_read? returns false' do + allow(Legion::Data).to receive(:can_read?).with(:tasks).and_return(false) + expect(instance.data_can_read?(:tasks)).to be false + end + + it 'returns false when an error is raised' do + allow(Legion::Data).to receive(:can_read?).and_raise(StandardError) + expect(instance.data_can_read?(:tasks)).to be false + end + end + + describe '#data_can_write?' do + let(:test_class) do + Class.new do + include Legion::Data::Helper + end + end + let(:instance) { test_class.new } + + it 'delegates to Legion::Data.can_write?' do + allow(Legion::Data).to receive(:can_write?).with(:tasks).and_return(true) + expect(instance.data_can_write?(:tasks)).to be true + end + + it 'returns false when Legion::Data.can_write? returns false' do + allow(Legion::Data).to receive(:can_write?).with(:tasks).and_return(false) + expect(instance.data_can_write?(:tasks)).to be false + end + + it 'returns false when an error is raised' do + allow(Legion::Data).to receive(:can_write?).and_raise(StandardError) + expect(instance.data_can_write?(:tasks)).to be false + end + end +end diff --git a/spec/legion/data/local_spec.rb b/spec/legion/data/local_spec.rb new file mode 100644 index 0000000..3cf2253 --- /dev/null +++ b/spec/legion/data/local_spec.rb @@ -0,0 +1,126 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'fileutils' + +RSpec.describe Legion::Data::Local do + let(:test_db) { 'legionio_local_test.db' } + let(:resolved_db) { File.join(File.expand_path('~/.legionio'), test_db) } + + before(:each) do + described_class.reset! + end + + after(:each) do + begin + described_class.shutdown + rescue StandardError + nil + end + FileUtils.rm_f(resolved_db) + FileUtils.rm_f(test_db) # cleanup any stale relative path files + end + + describe '.setup' do + it 'creates a SQLite connection' do + described_class.setup(database: test_db) + expect(described_class.connection).to be_a(Sequel::SQLite::Database) + end + + it 'uses a local tagged Sequel logger' do + described_class.setup(database: test_db) + logger = described_class.connection.loggers.first + expect(logger).to be_a(Legion::Data::Connection::SlowQueryLogger) + expect(logger.tagged.segments).to eq(%w[data local]) + end + + it 'sets connected to true' do + described_class.setup(database: test_db) + expect(described_class.connected?).to be true + end + + it 'is idempotent' do + described_class.setup(database: test_db) + conn1 = described_class.connection + described_class.setup(database: test_db) + expect(described_class.connection).to equal(conn1) + end + end + + describe '.shutdown' do + it 'disconnects and clears state' do + described_class.setup(database: test_db) + described_class.shutdown + expect(described_class.connected?).to be false + expect(described_class.connection).to be_nil + end + end + + describe '.db_path' do + it 'returns the configured database path' do + described_class.setup(database: test_db) + expect(described_class.db_path).to eq(resolved_db) + end + end + + describe '.register_migrations' do + it 'accumulates migration directories' do + described_class.register_migrations(name: :memory, path: '/fake/path') + described_class.register_migrations(name: :trust, path: '/other/path') + expect(described_class.registered_migrations.size).to eq(2) + end + + it 'prevents duplicate registration by name' do + described_class.register_migrations(name: :memory, path: '/fake/path') + described_class.register_migrations(name: :memory, path: '/fake/path') + expect(described_class.registered_migrations.size).to eq(1) + end + end + + describe '.model' do + it 'creates a Sequel::Model bound to local connection' do + described_class.setup(database: test_db) + described_class.connection.create_table(:test_items) do + primary_key :id + String :name + end + + model_class = described_class.model(:test_items) + model_class.create(name: 'hello') + expect(model_class.count).to eq(1) + expect(model_class.first.name).to eq('hello') + end + + it 'raises when not connected' do + expect { described_class.model(:anything) }.to raise_error(RuntimeError, /not connected/) + end + end + + describe 'migration registration and execution' do + let(:migrations_dir) { File.join(__dir__, 'local', 'test_migrations') } + + before(:each) do + FileUtils.mkdir_p(migrations_dir) + File.write(File.join(migrations_dir, '20260316000001_create_test_table.rb'), <<~RUBY) + Sequel.migration do + change do + create_table(:local_test_table) do + primary_key :id + String :value + end + end + end + RUBY + end + + after(:each) do + FileUtils.rm_rf(migrations_dir) + end + + it 'runs registered migrations on setup' do + described_class.register_migrations(name: :test, path: migrations_dir) + described_class.setup(database: test_db) + expect(described_class.connection.table_exists?(:local_test_table)).to be true + end + end +end diff --git a/spec/legion/data/migration_spec.rb b/spec/legion/data/migration_spec.rb index 6f21d34..05e1b7f 100644 --- a/spec/legion/data/migration_spec.rb +++ b/spec/legion/data/migration_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'spec_helper' RSpec.describe Legion::Data::Migration do diff --git a/spec/legion/data/migrations_spec.rb b/spec/legion/data/migrations_spec.rb new file mode 100644 index 0000000..c0c12df --- /dev/null +++ b/spec/legion/data/migrations_spec.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'Migrations' do + # By the time spec_helper runs, Legion::Data.setup has auto-migrated to the latest version. + # This spec verifies that all migrations applied cleanly and the final schema is coherent. + + let(:db) { Legion::Data::Connection.sequel } + let(:migration_path) { File.expand_path('../../../lib/legion/data/migrations', __dir__) } + + before do + skip 'no global database connection configured' if db.nil? + end + + it 'has run all migrations to the latest version' do + max_migration = Dir.glob(File.join(migration_path, '*.rb')) + .filter_map { |f| File.basename(f, '.rb')[/\A(\d+)/, 1]&.to_i } + .max + raise 'no migrations found' unless max_migration + + # Sequel default is schema_migrations, but try common variants + version_table = %i[schema_migrations schema_info sequel_migrations].find { |t| db.table_exists?(t) } + skip "no migration version table found (#{db.adapter_scheme})" unless version_table + + applied = db[version_table].select_map(:version).map(&:to_i).sort + expect(applied.last).to eq(max_migration) + end + + it 'has all expected tables' do + # Authoritative list of all tables that should exist after all migrations run. + # Derived from the actual production schema, not from scanning migration files + # (which can't track renames and drops correctly). + expected_tables = %i[ + apollo_access_log apollo_entries apollo_entries_archive + apollo_expertise apollo_operations apollo_relations + audit_log audit_records chains + conversations + llm_conversation_compactions llm_conversations + llm_message_inference_metrics llm_message_inference_requests + llm_message_inference_responses llm_messages llm_policy_evaluations + llm_registry_events llm_route_attempts + llm_security_events llm_tool_call_attempts llm_tool_calls + digital_workers extensions extensions_registry functions + identities identity_audit_log identity_group_memberships + identity_groups identity_principals identity_provider_capabilities + identity_providers + memory_associations memory_traces + metering_hourly_rollup metering_records_archive + rbac_cross_team_grants rbac_role_assignments rbac_runner_grants + nodes relationships runners schema_info settings + tasks tasks_archive tenants + webhooks + ] + + expected_tables.each do |table| + exists = db.table_exists?(table) + raise "expected table #{table} to exist" unless exists + end + end + + it 'has critical indexes on key tables' do + critical_indexes = { + llm_tool_calls: ['idx_tool_calls_identity_principal_id'], + functions: ['idx_functions_component_type'] + } + + critical_indexes.each do |table, index_names| + indexes = if db.adapter_scheme == :postgres + db.indexes(table).keys.map(&:to_s) + else + db[:sqlite_master].where(type: 'index', tbl_name: table.to_s).select_map(:name) + end + + index_names.each do |name| + expect(indexes).to include(name), "expected index #{name} on #{table}" + end + end + end +end diff --git a/spec/legion/data/model_spec.rb b/spec/legion/data/model_spec.rb index 77c2fb2..dac63b1 100644 --- a/spec/legion/data/model_spec.rb +++ b/spec/legion/data/model_spec.rb @@ -1,24 +1,46 @@ +# frozen_string_literal: true + require 'spec_helper' RSpec.describe Legion::Data::Models do after(:each) do Legion::Data::Connection.shutdown + Legion::Settings[:data][:models][:autoload] = true + Legion::Settings[:data][:models][:continue_on_load_fail] = false end it 'can load' do - expect { Legion::Data::Models.load }.not_to raise_exception + expect { Legion::Data::Models.load }.not_to raise_error expect(Legion::Settings[:data][:models][:loaded]).to eq true end it '.require_sequel_models' do expect(Legion::Data::Models.require_sequel_models).to be_a Array expect(Legion::Data::Models.require_sequel_models([])).to eq [] - expect { Legion::Data::Models.require_sequel_models(['bad_model']) }.to raise_exception(LoadError) + expect { Legion::Data::Models.require_sequel_models(['bad_model']) }.to raise_error(LoadError) end it '.load_sequel_model' do expect(Legion::Data::Models.load_sequel_model('task')).to eq 'task' - expect { Legion::Data::Models.load_sequel_model('bad_model') }.to raise_exception LoadError + expect { Legion::Data::Models.load_sequel_model('bad_model') }.to raise_error(LoadError) + end + + describe 'settings-driven behaviour' do + it 'respects autoload: false by skipping model loading' do + Legion::Settings[:data][:models][:autoload] = false + result = Legion::Data.load_models + expect(result).to be_nil + end + + it 'uses continue_on_load_fail to swallow LoadError' do + Legion::Settings[:data][:models][:continue_on_load_fail] = true + expect { Legion::Data::Models.load_sequel_model('does_not_exist') }.not_to raise_error + end + + it 'raises LoadError when continue_on_load_fail is false' do + Legion::Settings[:data][:models][:continue_on_load_fail] = false + expect { Legion::Data::Models.load_sequel_model('does_not_exist') }.to raise_error(LoadError) + end end it '.models' do diff --git a/spec/legion/data/models/audit_log_spec.rb b/spec/legion/data/models/audit_log_spec.rb new file mode 100644 index 0000000..ed5a15e --- /dev/null +++ b/spec/legion/data/models/audit_log_spec.rb @@ -0,0 +1,126 @@ +# frozen_string_literal: true + +require 'spec_helper' +Legion::Data::Connection.setup +Legion::Data::Models.load + +RSpec.describe Legion::Data::Model::AuditLog do + after(:all) do + Legion::Data::Connection.shutdown + end + + let(:valid_attrs) do + { + event_type: 'runner_execution', + principal_id: 'worker-123', + principal_type: 'system', + action: 'execute', + resource: 'MyRunner/my_function', + source: 'amqp', + node: 'node-01', + status: 'success', + duration_ms: 42, + detail: '{"task_id":1}', + record_hash: 'a' * 64, + previous_hash: '0' * 64, + created_at: Time.now.utc + } + end + + it { should be_a Sequel::Model } + + describe 'creation' do + it 'creates a record with all required fields' do + record = described_class.create(**valid_attrs) + expect(record.id).not_to be_nil + expect(record.event_type).to eq('runner_execution') + expect(record.record_hash).to eq('a' * 64) + begin + record.delete + rescue StandardError + nil + end + described_class.where(id: record.id).delete + end + end + + describe 'validation' do + it 'accepts runner_execution event_type' do + record = described_class.new(**valid_attrs) + expect(record.valid?).to be true + end + + it 'accepts lifecycle_transition event_type' do + record = described_class.new(**valid_attrs, event_type: 'lifecycle_transition') + expect(record.valid?).to be true + end + + it 'rejects invalid event_type' do + record = described_class.new(**valid_attrs, event_type: 'bad') + expect(record.valid?).to be false + expect(record.errors[:event_type]).to include('invalid') + end + + %w[success failure denied].each do |status| + it "accepts #{status} status" do + record = described_class.new(**valid_attrs, status: status) + expect(record.valid?).to be true + end + end + + it 'rejects invalid status' do + record = described_class.new(**valid_attrs, status: 'bad') + expect(record.valid?).to be false + expect(record.errors[:status]).to include('invalid') + end + end + + describe '#parsed_detail' do + it 'deserializes JSON detail' do + record = described_class.new(**valid_attrs, detail: '{"key":"value"}') + expect(record.parsed_detail).to eq({ key: 'value' }) + end + + it 'returns nil when detail is nil' do + record = described_class.new(**valid_attrs, detail: nil) + expect(record.parsed_detail).to be_nil + end + + it 'returns nil when detail is invalid JSON' do + record = described_class.new(**valid_attrs, detail: 'not-json{{{') + expect(record.parsed_detail).to be_nil + end + end + + describe '.compute_hash' do + it 'delegates to the canonical audit log hash chain' do + record = valid_attrs.merge(previous_hash: Legion::Data::AuditLogHashChain::GENESIS_HASH) + expect(described_class.compute_hash(record)).to eq(Legion::Data::AuditLogHashChain.compute_hash(record)) + end + end + + describe '.verify_chain' do + it 'verifies records with the canonical hash chain' do + first_base = valid_attrs.merge(id: 1, previous_hash: Legion::Data::AuditLogHashChain::GENESIS_HASH) + first = first_base.merge(record_hash: described_class.compute_hash(first_base)) + second_base = valid_attrs.merge(id: 2, action: 'archive', previous_hash: first[:record_hash]) + second = second_base.merge(record_hash: described_class.compute_hash(second_base)) + + expect(described_class.verify_chain([first, second])).to eq({ valid: true, length: 2 }) + end + end + + describe 'immutability' do + it 'raises on update' do + record = described_class.create(**valid_attrs) + expect { record.update(status: 'failure') }.to raise_error(RuntimeError, /immutable.*cannot be updated/) + described_class.where(id: record.id).delete + end + + it 'raises on destroy' do + record = described_class.create(**valid_attrs) + expect { record.destroy }.to raise_error(RuntimeError, /immutable.*cannot be deleted/) + described_class.where(id: record.id).delete + end + end +end diff --git a/spec/legion/data/models/chain_spec.rb b/spec/legion/data/models/chain_spec.rb new file mode 100644 index 0000000..391fee4 --- /dev/null +++ b/spec/legion/data/models/chain_spec.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'spec_helper' +Legion::Data::Models.load + +RSpec.describe Legion::Data::Model::Chain do + subject(:model) { described_class } + + before(:all) do + Legion::Data::Migration.migrate + end + + describe '.insert' do + it 'creates a chain with a name' do + id = model.insert(name: 'test-workflow') + expect(id).to be_a(Integer) + row = model[id] + expect(row.values[:name]).to eq('test-workflow') + expect(row.values[:active]).to be true + row.delete + end + end + + describe '#relationships' do + it 'returns associated relationships' do + id = model.insert(name: 'chain-with-rels') + chain = model[id] + expect(chain.relationships).to be_an(Array) + chain.delete + end + end +end diff --git a/spec/legion/data/models/digital_worker_spec.rb b/spec/legion/data/models/digital_worker_spec.rb new file mode 100644 index 0000000..42a135d --- /dev/null +++ b/spec/legion/data/models/digital_worker_spec.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require 'securerandom' +require 'spec_helper' +Legion::Data::Connection.setup +Legion::Data::Models.load + +RSpec.describe Legion::Data::Model::DigitalWorker do + after(:all) do + Legion::Data::Connection.shutdown + end + + let(:valid_attrs) do + { + worker_id: SecureRandom.uuid, + name: 'test-worker', + entra_app_id: SecureRandom.uuid, + owner_msid: 'MS123', + extension_name: 'lex-test', + lifecycle_state: 'active', + consent_tier: 'supervised', + trust_score: 0.5 + } + end + + describe 'health_status validation' do + it 'accepts online as a valid health_status' do + worker = described_class.new(valid_attrs.merge(health_status: 'online')) + expect(worker.valid?).to be(true) + end + + it 'accepts offline as a valid health_status' do + worker = described_class.new(valid_attrs.merge(health_status: 'offline')) + expect(worker.valid?).to be(true) + end + + it 'accepts unknown as a valid health_status' do + worker = described_class.new(valid_attrs.merge(health_status: 'unknown')) + expect(worker.valid?).to be(true) + end + + it 'rejects invalid health_status values' do + worker = described_class.new(valid_attrs.merge(health_status: 'bad')) + expect(worker.valid?).to be(false) + expect(worker.errors[:health_status]).to include('invalid') + end + end + + describe '#online?' do + it 'returns true when health_status is online' do + worker = described_class.new(valid_attrs.merge(health_status: 'online')) + expect(worker.online?).to be(true) + end + + it 'returns false when health_status is offline' do + worker = described_class.new(valid_attrs.merge(health_status: 'offline')) + expect(worker.online?).to be(false) + end + end + + describe '#offline?' do + it 'returns true when health_status is offline' do + worker = described_class.new(valid_attrs.merge(health_status: 'offline')) + expect(worker.offline?).to be(true) + end + + it 'returns false when health_status is online' do + worker = described_class.new(valid_attrs.merge(health_status: 'online')) + expect(worker.offline?).to be(false) + end + end + + describe 'default health_status' do + it 'defaults health_status to unknown' do + worker = described_class.create(valid_attrs) + expect(worker.health_status).to eq('unknown') + worker.delete + end + end +end diff --git a/spec/legion/data/models/extension_spec.rb b/spec/legion/data/models/extension_spec.rb index 253c934..9a73266 100644 --- a/spec/legion/data/models/extension_spec.rb +++ b/spec/legion/data/models/extension_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'spec_helper' # Legion::Data::Connection.setup Legion::Data::Models.load diff --git a/spec/legion/data/models/function_spec.rb b/spec/legion/data/models/function_spec.rb index 60f6c04..beff7b1 100644 --- a/spec/legion/data/models/function_spec.rb +++ b/spec/legion/data/models/function_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'spec_helper' Legion::Data::Connection.setup Legion::Data::Models.load diff --git a/spec/legion/data/models/identity_lookup_spec.rb b/spec/legion/data/models/identity_lookup_spec.rb new file mode 100644 index 0000000..3cb068b --- /dev/null +++ b/spec/legion/data/models/identity_lookup_spec.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +require 'securerandom' +require 'spec_helper' + +RSpec.describe 'identity model lookups' do + let(:suffix) { SecureRandom.hex(4) } + let(:provider_uuid) { SecureRandom.uuid } + let(:principal_uuid) { SecureRandom.uuid } + let(:identity_uuid) { SecureRandom.uuid } + let(:group_uuid) { SecureRandom.uuid } + + let!(:provider) do + Legion::Data::Model::Identity::Provider.create( + uuid: provider_uuid, + name: "lookup-provider-#{suffix}", + provider_type: 'authenticate', + facing: 'both' + ) + end + + let!(:principal) do + Legion::Data::Model::Identity::Principal.create( + uuid: principal_uuid, + canonical_name: "lookup-principal-#{suffix}", + kind: 'human', + employee_key: "employee-#{suffix}" + ) + end + + let!(:identity) do + Legion::Data::Model::Identity::Identity.create( + uuid: identity_uuid, + principal_id: principal.id, + provider_id: provider.id, + provider_identity_key: "provider-identity-#{suffix}" + ) + end + + let!(:group) do + Legion::Data::Model::Identity::Group.create( + uuid: group_uuid, + name: "lookup-group-#{suffix}", + source: 'manual' + ) + end + + it 'looks up providers by id, uuid, and name' do + expect(Legion::Data::Model::Identity::Provider.lookup(provider.id)).to eq(provider) + expect(Legion::Data::Model::Identity::Provider.lookup(provider_uuid)).to eq(provider) + expect(Legion::Data::Model::Identity::Provider.lookup(provider.name)).to eq(provider) + end + + it 'looks up principals by id, uuid, canonical name, and employee key' do + expect(Legion::Data::Model::Identity::Principal.lookup(principal.id)).to eq(principal) + expect(Legion::Data::Model::Identity::Principal.lookup(principal_uuid)).to eq(principal) + expect(Legion::Data::Model::Identity::Principal.lookup(principal.canonical_name)).to eq(principal) + expect(Legion::Data::Model::Identity::Principal.lookup(principal.employee_key)).to eq(principal) + end + + it 'looks up identities by id, uuid, and provider identity key' do + expect(Legion::Data::Model::Identity::Identity.lookup(identity.id)).to eq(identity) + expect(Legion::Data::Model::Identity::Identity.lookup(identity_uuid)).to eq(identity) + expect(Legion::Data::Model::Identity::Identity.lookup(identity.provider_identity_key)).to eq(identity) + end + + it 'looks up groups by id, uuid, and name' do + expect(Legion::Data::Model::Identity::Group.lookup(group.id)).to eq(group) + expect(Legion::Data::Model::Identity::Group.lookup(group_uuid)).to eq(group) + expect(Legion::Data::Model::Identity::Group.lookup(group.name)).to eq(group) + end +end diff --git a/spec/legion/data/models/llm_namespace_spec.rb b/spec/legion/data/models/llm_namespace_spec.rb new file mode 100644 index 0000000..9361a48 --- /dev/null +++ b/spec/legion/data/models/llm_namespace_spec.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +require 'spec_helper' + +Legion::Data::Connection.setup unless Legion::Data.connected? +Legion::Data::Migration.migrate(Legion::Data::Connection.sequel, File.expand_path('../../../../lib/legion/data/migrations', __dir__)) +Legion::Data::Models.load + +RSpec.describe 'LLM model namespace' do + let(:conversation_model) { Legion::Data::Models::LLM::Conversation } + let(:message_model) { Legion::Data::Models::LLM::Message } + let(:request_model) { Legion::Data::Models::LLM::MessageInferenceRequest } + let(:response_model) { Legion::Data::Models::LLM::MessageInferenceResponse } + + before do + %i[ + llm_message_inference_responses + llm_message_inference_requests + llm_messages + llm_conversations + ].each { |table| Legion::Data::Connection.sequel[table].delete } + end + + it 'creates the conversation to request to response association graph from official constants' do + conversation = conversation_model.create(principal_id: 101, identity_id: 202, title: 'fleet response') + message = message_model.create(conversation_id: conversation.id, seq: 1, role: 'user', content: 'hello') + request = request_model.create( + conversation_id: conversation.id, + latest_message_id: message.id, + operation: 'chat', + request_type: 'chat', + correlation_id: 'corr-123', + idempotency_key: 'idem-123', + request_capture_mode: 'full', + request_json: '{"messages":[]}' + ) + response = response_model.create( + message_inference_request_id: request.id, + provider: 'vllm', + provider_instance: 'apollo', + model_key: 'qwen3.6-27b', + dispatch_path: 'fleet', + response_capture_mode: 'full', + response_json: '{"content":"hello"}', + response_thinking_json: '{"content":"thinking"}' + ) + + expect(conversation.messages).to contain_exactly(message) + expect(message.triggered_message_inference_requests).to contain_exactly(request) + expect(request.latest_message).to eq(message) + expect(request.message_inference_responses).to contain_exactly(response) + expect(response.message_inference_request).to eq(request) + end +end diff --git a/spec/legion/data/models/llm_reconstruction_queries_spec.rb b/spec/legion/data/models/llm_reconstruction_queries_spec.rb new file mode 100644 index 0000000..7b76be1 --- /dev/null +++ b/spec/legion/data/models/llm_reconstruction_queries_spec.rb @@ -0,0 +1,285 @@ +# frozen_string_literal: true + +require 'spec_helper' + +Legion::Data::Connection.setup unless Legion::Data.connected? +Legion::Data::Migration.migrate(Legion::Data::Connection.sequel, File.expand_path('../../../../lib/legion/data/migrations', __dir__)) +Legion::Data::Models.load + +RSpec.describe 'LLM reconstruction query helpers' do + let(:conversation_model) { Legion::Data::Models::LLM::Conversation } + let(:message_model) { Legion::Data::Models::LLM::Message } + let(:request_model) { Legion::Data::Models::LLM::MessageInferenceRequest } + let(:response_model) { Legion::Data::Models::LLM::MessageInferenceResponse } + let(:route_attempt_model) { Legion::Data::Models::LLM::RouteAttempt } + let(:metric_model) { Legion::Data::Models::LLM::MessageInferenceMetric } + let(:tool_call_model) { Legion::Data::Models::LLM::ToolCall } + let(:tool_call_attempt_model) { Legion::Data::Models::LLM::ToolCallAttempt } + let(:policy_evaluation_model) { Legion::Data::Models::LLM::PolicyEvaluation } + let(:security_event_model) { Legion::Data::Models::LLM::SecurityEvent } + + let(:unique_ref) { "req-#{SecureRandom.hex(6)}" } + + before(:each) do + clear_llm_tables + end + + it 'reconstructs audit lineage by request_ref and internal id' do + fixture = create_llm_lifecycle(request_ref: unique_ref) + + by_ref = request_model.audit_lineage_for(unique_ref) + by_id = request_model.audit_lineage_for(fixture[:request].id) + + expect(by_ref[:request]).to eq(fixture[:request]) + expect(by_ref[:request_ref]).to eq(unique_ref) + expect(by_ref[:conversation]).to eq(fixture[:conversation]) + expect(by_ref[:latest_message]).to eq(fixture[:user_message]) + expect(by_ref[:responses]).to contain_exactly(fixture[:response]) + expect(by_ref[:route_attempts].map(&:status)).to eq(%w[timeout success]) + expect(by_ref[:tool_calls]).to contain_exactly(fixture[:tool_call]) + expect(by_ref[:tool_call_attempts]).to contain_exactly(fixture[:failed_tool_attempt], fixture[:successful_tool_attempt]) + expect(by_id[:request]).to eq(fixture[:request]) + end + + it 'aggregates finance usage by cost center, model, and recorded day from inference metrics' do + create_llm_lifecycle(request_ref: unique_ref) + second = create_llm_lifecycle(request_ref: "#{unique_ref}-456", cost_center: 'finance-ops', model_key: 'gpt-4.1', + recorded_at: Time.utc(2026, 5, 5, 3, 0, 0), cost_usd: 0.75) + metric_model.create( + message_inference_request_id: second[:request].id, + message_inference_response_id: second[:response].id, + provider: 'openai', + model_key: 'gpt-4.1', + tier: 'standard', + input_tokens: 10, + output_tokens: 20, + thinking_tokens: 5, + total_tokens: 35, + latency_ms: 50, + wall_clock_ms: 60, + cost_usd: 0.25, + currency: 'USD', + cost_center: 'finance-ops', + budget_key: 'budget-a', + recorded_at: Time.utc(2026, 5, 5, 8, 0, 0) + ) + + rollups = metric_model.finance_usage_by_cost_center_model_day + + finance_rollup = rollups.find do |row| + row[:cost_center] == 'finance-ops' && row[:model_key] == 'gpt-4.1' && row[:usage_day].to_s == '2026-05-05' + end + expect(finance_rollup[:input_tokens]).to eq(20) + expect(finance_rollup[:output_tokens]).to eq(40) + expect(finance_rollup[:thinking_tokens]).to eq(10) + expect(finance_rollup[:total_tokens]).to eq(70) + expect(finance_rollup[:cost_usd].to_f).to eq(1.0) + end + + it 'reconstructs security incident lineage for a conversation' do + fixture = create_llm_lifecycle(request_ref: unique_ref) + + lineage = security_event_model.lineage_for_conversation(fixture[:conversation]) + + expect(lineage[:conversation]).to eq(fixture[:conversation]) + expect(lineage[:messages]).to include(fixture[:user_message], fixture[:assistant_message], fixture[:tool_result_message]) + expect(lineage[:requests]).to contain_exactly(fixture[:request]) + expect(lineage[:route_attempts].map(&:failure_reason)).to include('runner timeout') + expect(lineage[:request_payload_hashes]).to contain_exactly('request-hash') + expect(lineage[:response_payload_hashes]).to contain_exactly('response-hash') + expect(lineage[:policy_evaluations]).to contain_exactly(fixture[:policy_evaluation]) + expect(lineage[:security_events]).to contain_exactly(fixture[:security_event]) + expect(lineage[:tool_calls]).to contain_exactly(fixture[:tool_call]) + expect(lineage[:tool_call_attempts]).to contain_exactly(fixture[:failed_tool_attempt], fixture[:successful_tool_attempt]) + end + + it 'reconstructs incident flow from message to request, response, tool calls, and attempts' do + fixture = create_llm_lifecycle(request_ref: unique_ref) + + flow = fixture[:user_message].incident_flow + + expect(flow[:message]).to eq(fixture[:user_message]) + expect(flow[:conversation]).to eq(fixture[:conversation]) + expect(flow[:requests]).to contain_exactly(fixture[:request]) + expect(flow[:responses]).to contain_exactly(fixture[:response]) + expect(flow[:response_messages]).to contain_exactly(fixture[:assistant_message]) + expect(flow[:tool_calls]).to contain_exactly(fixture[:tool_call]) + expect(flow[:tool_call_attempts]).to contain_exactly(fixture[:failed_tool_attempt], fixture[:successful_tool_attempt]) + expect(flow[:result_messages]).to include(fixture[:assistant_message], fixture[:tool_result_message]) + end + + def clear_llm_tables + db = Legion::Data::Connection.sequel + db.run('PRAGMA foreign_keys = OFF') if Legion::Data::Connection.adapter == :sqlite + %i[ + llm_security_events + llm_policy_evaluations + llm_tool_call_attempts + llm_tool_calls + llm_message_inference_metrics + llm_route_attempts + llm_message_inference_responses + llm_message_inference_requests + llm_messages + llm_conversations + ].each { |table| db[table].delete } + db.run('PRAGMA foreign_keys = ON') if Legion::Data::Connection.adapter == :sqlite + end + + def create_llm_lifecycle(request_ref: 'req-123', cost_center: 'finance-ops', model_key: 'gpt-4.1', + recorded_at: Time.utc(2026, 5, 4, 12, 0, 0), cost_usd: 0.42) + conversation = create_fixture_conversation(recorded_at) + user_message = create_fixture_user_message(conversation) + request = create_fixture_request(conversation, user_message, request_ref, cost_center, recorded_at) + response = create_fixture_response(request, model_key, recorded_at) + assistant_message = create_fixture_assistant_message(conversation, request, response) + route_attempts_for(request, response, model_key, recorded_at) + metric_for(request, response, model_key, cost_center, recorded_at, cost_usd) + tool_fixture = create_tool_fixture(conversation, request, response, assistant_message, recorded_at) + policy_evaluation = create_policy_evaluation(conversation, request, response, recorded_at) + security_event = create_security_event(conversation, request, response, tool_fixture, policy_evaluation, recorded_at) + + { + conversation: conversation, + user_message: user_message, + request: request, + response: response, + assistant_message: assistant_message, + tool_call: tool_fixture.fetch(:tool_call), + failed_tool_attempt: tool_fixture.fetch(:failed_tool_attempt), + successful_tool_attempt: tool_fixture.fetch(:successful_tool_attempt), + tool_result_message: tool_fixture.fetch(:tool_result_message), + policy_evaluation: policy_evaluation, + security_event: security_event + } + end + + def create_fixture_conversation(recorded_at) + conversation_model.create(principal_id: 101, identity_id: 202, title: 'incident review', + classification_level: 'internal', recorded_at: recorded_at) + end + + def create_fixture_user_message(conversation) + message_model.create(conversation_id: conversation.id, seq: 1, role: 'user', + content: 'please fetch account details') + end + + def create_fixture_request(conversation, user_message, request_ref, cost_center, recorded_at) + request_model.create(conversation_id: conversation.id, latest_message_id: user_message.id, + caller_principal_id: 101, caller_identity_id: 202, + runtime_caller_type: 'user', request_ref: request_ref, + correlation_ref: 'corr-123', exchange_ref: 'exchange-123', + status: 'responded', cost_center: cost_center, + budget_key: 'budget-a', requested_at: recorded_at, + request_content_hash: 'request-hash') + end + + def create_fixture_response(request, model_key, recorded_at) + response_model.create(message_inference_request_id: request.id, provider: 'openai', + model_key: model_key, tier: 'standard', status: 'success', + finish_reason: 'tool_calls', response_content_hash: 'response-hash', + responded_at: recorded_at + 1) + end + + def create_fixture_assistant_message(conversation, request, response) + message = message_model.create(conversation_id: conversation.id, + message_inference_request_id: request.id, + message_inference_response_id: response.id, + seq: 2, role: 'assistant', content: 'calling tool') + response.update(response_message_id: message.id) + message + end + + def route_attempts_for(request, response, model_key, recorded_at) + route_attempt_model.create(message_inference_request_id: request.id, attempt_no: 1, provider: 'vllm', + model_key: model_key, tier: 'standard', route_target: 'runner-a', + status: 'timeout', failure_reason: 'runner timeout', latency_ms: 1_000, + started_at: recorded_at, ended_at: recorded_at + 1) + route_attempt_model.create(message_inference_request_id: request.id, message_inference_response_id: response.id, + attempt_no: 2, provider: 'openai', model_key: model_key, tier: 'standard', + route_target: 'provider-c', status: 'success', latency_ms: 500, + started_at: recorded_at + 1, ended_at: recorded_at + 2) + end + + def metric_for(request, response, model_key, cost_center, recorded_at, cost_usd) + metric_model.create(message_inference_request_id: request.id, message_inference_response_id: response.id, + provider: 'openai', model_key: model_key, tier: 'standard', + input_tokens: 10, output_tokens: 20, thinking_tokens: 5, total_tokens: 35, + latency_ms: 500, wall_clock_ms: 550, cost_usd: cost_usd, currency: 'USD', + cost_center: cost_center, budget_key: 'budget-a', recorded_at: recorded_at) + end + + def create_tool_fixture(conversation, request, response, assistant_message, recorded_at) + tool_call = create_tool_call(response, assistant_message, recorded_at) + failed_tool_attempt = create_failed_tool_attempt(tool_call, recorded_at) + successful_tool_attempt = create_successful_tool_attempt(tool_call, recorded_at) + tool_result_message = create_tool_result_message(conversation, request, tool_call) + tool_call.update(result_message_id: tool_result_message.id) + + { + tool_call: tool_call, + failed_tool_attempt: failed_tool_attempt, + successful_tool_attempt: successful_tool_attempt, + tool_result_message: tool_result_message + } + end + + def create_tool_call(response, assistant_message, recorded_at) + tool_call_model.create(message_inference_response_id: response.id, + requested_by_message_id: assistant_message.id, + tool_call_index: 0, provider_tool_call_ref: 'tooluse-123', + tool_name: 'fetch_account', tool_source_type: 'mcp', + tool_source_server: 'accounts', status: 'succeeded', + requested_at: recorded_at + 2, completed_at: recorded_at + 4) + end + + def create_failed_tool_attempt(tool_call, recorded_at) + tool_call_attempt_model.create(tool_call_id: tool_call.id, attempt_no: 1, + runner_ref: 'runner-a', status: 'failed', + error_category: 'network', error_code: 'timeout', + error_message: 'timed out', duration_ms: 100, + arguments_ref: 'args-hash', started_at: recorded_at + 2, + ended_at: recorded_at + 3) + end + + def create_successful_tool_attempt(tool_call, recorded_at) + tool_call_attempt_model.create(tool_call_id: tool_call.id, attempt_no: 2, + runner_ref: 'runner-b', status: 'succeeded', + duration_ms: 75, arguments_ref: 'args-hash', + result_ref: 'result-hash', + started_at: recorded_at + 3, + ended_at: recorded_at + 4) + end + + def create_tool_result_message(conversation, request, tool_call) + message_model.create(conversation_id: conversation.id, + message_inference_request_id: request.id, + tool_call_id: tool_call.id, seq: 3, role: 'tool', + content: 'account details') + end + + def create_policy_evaluation(conversation, request, response, recorded_at) + policy_evaluation_model.create(conversation_id: conversation.id, + message_inference_request_id: request.id, + message_inference_response_id: response.id, + policy_key: 'phi-redaction', policy_version: '1', + evaluation_type: 'classification', decision: 'allow', + enforcement_action: 'audit', classification_level: 'internal', + contains_phi: true, contains_pii: true, + reason_code: 'allowed-with-audit', + evaluated_at: recorded_at + 1) + end + + def create_security_event(conversation, request, response, tool_fixture, policy_evaluation, recorded_at) + security_event_model.create(conversation_id: conversation.id, + message_inference_request_id: request.id, + message_inference_response_id: response.id, + tool_call_id: tool_fixture.fetch(:tool_call).id, + tool_call_attempt_id: tool_fixture.fetch(:failed_tool_attempt).id, + policy_evaluation_id: policy_evaluation.id, + event_type: 'tool_retry_after_timeout', + severity: 'warn', status: 'resolved', + description: 'tool retry succeeded', + detected_at: recorded_at + 3) + end +end diff --git a/spec/legion/data/models/node_spec.rb b/spec/legion/data/models/node_spec.rb index 81ab43b..5f97772 100644 --- a/spec/legion/data/models/node_spec.rb +++ b/spec/legion/data/models/node_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'spec_helper' Legion::Data::Connection.setup Legion::Data::Models.load @@ -11,4 +13,39 @@ it { should respond_to? :dataceter } it { should respond_to? :task_log } it { should be_a Sequel::Model } + + describe '#parsed_metrics' do + it 'returns deserialized hash when metrics is valid JSON' do + node = described_class.new(metrics: Legion::JSON.dump({ memory_rss_mb: 142 })) + expect(node.parsed_metrics).to be_a(Hash) + expect(node.parsed_metrics[:memory_rss_mb]).to eq(142) + end + + it 'returns nil when metrics is nil' do + node = described_class.new(metrics: nil) + expect(node.parsed_metrics).to be_nil + end + + it 'returns nil when metrics is invalid JSON' do + node = described_class.new(metrics: 'not-json{{{') + expect(node.parsed_metrics).to be_nil + end + end + + describe '#parsed_hosted_worker_ids' do + it 'returns deserialized array when hosted_worker_ids is valid JSON' do + node = described_class.new(hosted_worker_ids: Legion::JSON.dump(%w[w1 w2])) + expect(node.parsed_hosted_worker_ids).to eq(%w[w1 w2]) + end + + it 'returns empty array when hosted_worker_ids is nil' do + node = described_class.new(hosted_worker_ids: nil) + expect(node.parsed_hosted_worker_ids).to eq([]) + end + + it 'returns empty array when hosted_worker_ids is invalid JSON' do + node = described_class.new(hosted_worker_ids: 'bad-json') + expect(node.parsed_hosted_worker_ids).to eq([]) + end + end end diff --git a/spec/legion/data/models/runner_spec.rb b/spec/legion/data/models/runner_spec.rb index ddb8c54..c30c7c8 100644 --- a/spec/legion/data/models/runner_spec.rb +++ b/spec/legion/data/models/runner_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'spec_helper' Legion::Data::Connection.setup Legion::Data::Models.load diff --git a/spec/legion/data/models/setting_spec.rb b/spec/legion/data/models/setting_spec.rb index 774cd1a..8e9b9ce 100644 --- a/spec/legion/data/models/setting_spec.rb +++ b/spec/legion/data/models/setting_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'spec_helper' Legion::Data::Connection.setup Legion::Data::Models.load diff --git a/spec/legion/data/models/tasks_spec.rb b/spec/legion/data/models/tasks_spec.rb index edd78b7..66ba107 100644 --- a/spec/legion/data/models/tasks_spec.rb +++ b/spec/legion/data/models/tasks_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'spec_helper' Legion::Data::Connection.setup Legion::Data::Models.load @@ -16,4 +18,33 @@ it { should respond_to? :user_owner } it { should respond_to? :group_owner } it { should be_a Sequel::Model } + + describe '.idempotency_key_for' do + it 'returns the same SHA-256 key for hash payloads with different key order' do + left = described_class.idempotency_key_for({ b: 2, a: 1 }) + right = described_class.idempotency_key_for({ a: 1, b: 2 }) + + expect(left).to eq(right) + expect(left).to match(/\A[0-9a-f]{64}\z/) + end + end + + describe '.create_idempotent' do + it 'returns an existing active task for duplicate payloads' do + attrs = { status: 'pending', payload: '{"a":1}' } + first = described_class.create_idempotent(attrs, payload: { a: 1 }) + second = described_class.create_idempotent(attrs, payload: { a: 1 }) + + expect(second.id).to eq(first.id) + end + + it 'creates a new task after the prior idempotency key reaches terminal status' do + attrs = { status: 'pending', payload: '{"a":2}' } + first = described_class.create_idempotent(attrs, payload: { a: 2 }) + first.update(status: 'completed') + second = described_class.create_idempotent(attrs, payload: { a: 2 }) + + expect(second.id).not_to eq(first.id) + end + end end diff --git a/spec/legion/data/partition_manager_spec.rb b/spec/legion/data/partition_manager_spec.rb new file mode 100644 index 0000000..f8763d7 --- /dev/null +++ b/spec/legion/data/partition_manager_spec.rb @@ -0,0 +1,294 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/partition_manager' + +RSpec.describe Legion::Data::PartitionManager do + # --------------------------------------------------------------------------- + # Shared mock DB + # --------------------------------------------------------------------------- + let(:executed_sql) { [] } + let(:mock_db) do + db = double('Sequel::Database') + allow(db).to receive(:run) { |sql| executed_sql << sql } + allow(db).to receive(:fetch).and_return([]) + db + end + + before(:each) do + allow(Legion::Data).to receive(:connection).and_return(mock_db) + end + + # --------------------------------------------------------------------------- + # Helper: freeze the adapter response + # --------------------------------------------------------------------------- + def with_adapter(adapter) + allow(Legion::Data::Connection).to receive(:adapter).and_return(adapter) + end + + # --------------------------------------------------------------------------- + # 1. Non-postgres guard + # --------------------------------------------------------------------------- + describe 'non-postgres guard' do + %i[sqlite mysql2].each do |adapter| + context "when adapter is #{adapter}" do + before { with_adapter(adapter) } + + it 'ensure_partitions returns skipped' do + result = described_class.ensure_partitions(table: :events) + expect(result).to eq({ skipped: true, reason: 'not_postgres' }) + end + + it 'drop_old_partitions returns skipped' do + result = described_class.drop_old_partitions(table: :events) + expect(result).to eq({ skipped: true, reason: 'not_postgres' }) + end + + it 'list_partitions returns skipped' do + result = described_class.list_partitions(table: :events) + expect(result).to eq({ skipped: true, reason: 'not_postgres' }) + end + end + end + end + + # --------------------------------------------------------------------------- + # 2 & 3. ensure_partitions: DDL content and idempotency + # --------------------------------------------------------------------------- + describe '.ensure_partitions' do + before { with_adapter(:postgres) } + + # Return empty fetch (partition didn't exist before) for all calls + before do + allow(mock_db).to receive(:fetch).and_return([]) + end + + it 'generates CREATE TABLE IF NOT EXISTS DDL for each month' do + travel_to = Date.new(2025, 11, 15) + allow(Date).to receive(:today).and_return(travel_to) + + described_class.ensure_partitions(table: :events, months_ahead: 3) + + expect(executed_sql.size).to eq(3) + expect(executed_sql[0]).to include('CREATE TABLE IF NOT EXISTS events_y2025m11') + expect(executed_sql[1]).to include('CREATE TABLE IF NOT EXISTS events_y2025m12') + expect(executed_sql[2]).to include('CREATE TABLE IF NOT EXISTS events_y2026m01') + end + + it 'uses IF NOT EXISTS (idempotent DDL)' do + allow(Date).to receive(:today).and_return(Date.new(2025, 6, 1)) + + described_class.ensure_partitions(table: :events, months_ahead: 1) + + expect(executed_sql.first).to include('IF NOT EXISTS') + end + + it 'sets correct FROM/TO boundaries' do + allow(Date).to receive(:today).and_return(Date.new(2025, 3, 1)) + + described_class.ensure_partitions(table: :events, months_ahead: 1) + + ddl = executed_sql.first + expect(ddl).to include("FROM ('2025-03-01')") + expect(ddl).to include("TO ('2025-04-01')") + end + + it 'includes table name in DDL' do + allow(Date).to receive(:today).and_return(Date.new(2025, 1, 1)) + + described_class.ensure_partitions(table: :my_events, months_ahead: 1) + + expect(executed_sql.first).to include('PARTITION OF my_events') + end + + it 'returns created and existing arrays' do + allow(Date).to receive(:today).and_return(Date.new(2025, 1, 1)) + result = described_class.ensure_partitions(table: :events, months_ahead: 2) + expect(result).to have_key(:created) + expect(result).to have_key(:existing) + expect((result[:created] + result[:existing]).size).to eq(2) + end + end + + # --------------------------------------------------------------------------- + # 4. Year-boundary month wrapping + # --------------------------------------------------------------------------- + describe '.ensure_partitions year-boundary math' do + before { with_adapter(:postgres) } + + before do + allow(mock_db).to receive(:fetch).and_return([]) + end + + it 'wraps December -> January correctly' do + allow(Date).to receive(:today).and_return(Date.new(2025, 12, 1)) + described_class.ensure_partitions(table: :events, months_ahead: 2) + + expect(executed_sql[0]).to include('events_y2025m12') + expect(executed_sql[1]).to include('events_y2026m01') + end + + it 'correctly advances across a year boundary for FROM/TO' do + allow(Date).to receive(:today).and_return(Date.new(2025, 12, 1)) + described_class.ensure_partitions(table: :events, months_ahead: 2) + + dec_ddl = executed_sql[0] + expect(dec_ddl).to include("FROM ('2025-12-01')") + expect(dec_ddl).to include("TO ('2026-01-01')") + + jan_ddl = executed_sql[1] + expect(jan_ddl).to include("FROM ('2026-01-01')") + expect(jan_ddl).to include("TO ('2026-02-01')") + end + end + + # --------------------------------------------------------------------------- + # 5. drop_old_partitions: only drops outside retention window + # --------------------------------------------------------------------------- + describe '.drop_old_partitions' do + before { with_adapter(:postgres) } + + let(:today) { Date.new(2025, 6, 1) } + + before { allow(Date).to receive(:today).and_return(today) } + + def stub_partitions(names) + rows = names.map { |n| { name: n } } + allow(mock_db).to receive(:fetch).and_return(rows) + end + + it 'drops partitions older than retention window' do + # 24 months ago from 2025-06: cutoff is 2023-06 + # 2022-01 is older → drop; 2024-01 is within → retain + stub_partitions(%w[events_y2022m01 events_y2024m01]) + + result = described_class.drop_old_partitions(table: :events, retention_months: 24) + + expect(result[:dropped]).to eq(['events_y2022m01']) + expect(result[:retained]).to eq(['events_y2024m01']) + expect(executed_sql).to include('DROP TABLE events_y2022m01') + expect(executed_sql).not_to include('DROP TABLE events_y2024m01') + end + + it 'drops nothing when all partitions are within retention' do + stub_partitions(%w[events_y2024m01 events_y2025m01]) + + result = described_class.drop_old_partitions(table: :events, retention_months: 24) + + expect(result[:dropped]).to be_empty + expect(result[:retained].size).to eq(2) + expect(executed_sql).to be_empty + end + + it 'handles a partition exactly at the cutoff boundary (not dropped)' do + # cutoff = 2023-06-01 — a partition named y2023m06 equals cutoff, not older + stub_partitions(['events_y2023m06']) + + result = described_class.drop_old_partitions(table: :events, retention_months: 24) + + expect(result[:dropped]).to be_empty + expect(result[:retained]).to eq(['events_y2023m06']) + end + + it 'skips partitions with unparseable names' do + stub_partitions(%w[events_custom_name events_y2022m01]) + + result = described_class.drop_old_partitions(table: :events, retention_months: 24) + + expect(result[:dropped]).to eq(['events_y2022m01']) + end + end + + # --------------------------------------------------------------------------- + # 7. list_partitions with empty result + # --------------------------------------------------------------------------- + describe '.list_partitions with empty result' do + before { with_adapter(:postgres) } + + it 'returns empty array when no partitions exist' do + allow(mock_db).to receive(:fetch).and_return([]) + + result = described_class.list_partitions(table: :events) + expect(result).to eq([]) + end + end + + # --------------------------------------------------------------------------- + # 8. list_partitions with populated result + # --------------------------------------------------------------------------- + describe '.list_partitions with populated result' do + before { with_adapter(:postgres) } + + it 'returns array of hashes with name, from, to' do + rows = [ + { name: 'events_y2025m01', bound: "FOR VALUES FROM ('2025-01-01') TO ('2025-02-01')" }, + { name: 'events_y2025m02', bound: "FOR VALUES FROM ('2025-02-01') TO ('2025-03-01')" } + ] + allow(mock_db).to receive(:fetch).and_return(rows) + + result = described_class.list_partitions(table: :events) + + expect(result.size).to eq(2) + expect(result[0]).to eq({ name: 'events_y2025m01', from: '2025-01-01', to: '2025-02-01' }) + expect(result[1]).to eq({ name: 'events_y2025m02', from: '2025-02-01', to: '2025-03-01' }) + end + + it 'handles rows with a nil bound gracefully' do + rows = [{ name: 'events_y2025m01', bound: nil }] + allow(mock_db).to receive(:fetch).and_return(rows) + + result = described_class.list_partitions(table: :events) + expect(result.size).to eq(1) + expect(result[0][:from]).to be_nil + expect(result[0][:to]).to be_nil + end + end + + # --------------------------------------------------------------------------- + # 9. Logging when Legion::Logging is available + # --------------------------------------------------------------------------- + describe 'logging when Legion::Logging is present' do + before { with_adapter(:postgres) } + + before do + allow(Date).to receive(:today).and_return(Date.new(2025, 1, 1)) + end + + it 'calls Legion::Logging.info for created partitions' do + # First fetch (before run) returns empty — partition doesn't exist yet. + # Second fetch (after run) returns the new row — partition was created. + fetch_calls = 0 + allow(mock_db).to receive(:fetch) do + fetch_calls += 1 + fetch_calls == 1 ? [] : [{ name: 'events_y2025m01' }] + end + + logger = instance_double('Legion::Logging::TaggedLogger', info: nil) + allow(described_class).to receive(:log).and_return(logger) + + described_class.ensure_partitions(table: :events, months_ahead: 1) + + expect(logger).to have_received(:info).at_least(:once) + end + end + + # --------------------------------------------------------------------------- + # 10. Graceful when Legion::Logging is absent + # --------------------------------------------------------------------------- + describe 'graceful when Legion::Logging is absent' do + before { with_adapter(:postgres) } + + before do + allow(Date).to receive(:today).and_return(Date.new(2025, 1, 1)) + allow(mock_db).to receive(:fetch).and_return([]) + end + + it 'does not raise when log helper is available' do + allow(described_class).to receive(:log).and_return(instance_double('Legion::Logging::TaggedLogger', info: nil)) + + expect { described_class.ensure_partitions(table: :events, months_ahead: 1) }.not_to raise_error + expect { described_class.drop_old_partitions(table: :events, retention_months: 24) }.not_to raise_error + expect { described_class.list_partitions(table: :events) }.not_to raise_error + end + end +end diff --git a/spec/legion/data/privilege_spec.rb b/spec/legion/data/privilege_spec.rb new file mode 100644 index 0000000..ef8a8be --- /dev/null +++ b/spec/legion/data/privilege_spec.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'Legion::Data privilege checks' do + before do + Legion::Data.instance_variable_set(:@write_privileges, nil) + Legion::Data.instance_variable_set(:@read_privileges, nil) + end + + describe '.can_write?' do + context 'when not connected' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: false }) + end + + it 'returns false' do + expect(Legion::Data.can_write?(:apollo_entries)).to be false + end + end + + context 'when connected with sqlite adapter' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: true, adapter: 'sqlite' }) + end + + it 'returns true (sqlite has no privilege system)' do + expect(Legion::Data.can_write?(:apollo_entries)).to be true + end + end + + context 'when result is cached' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: true, adapter: 'sqlite' }) + Legion::Data.can_write?(:apollo_entries) + end + + it 'returns cached value without re-checking' do + expect(Legion::Data.can_write?(:apollo_entries)).to be true + end + end + end + + describe '.can_read?' do + context 'when not connected' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: false }) + end + + it 'returns false' do + expect(Legion::Data.can_read?(:apollo_entries)).to be false + end + end + + context 'when connected with sqlite adapter' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: true, adapter: 'sqlite' }) + end + + it 'returns true' do + expect(Legion::Data.can_read?(:apollo_entries)).to be true + end + end + end + + describe '.connected?' do + it 'returns true when data is connected' do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: true }) + expect(Legion::Data.connected?).to be true + end + + it 'returns false when data is not connected' do + allow(Legion::Settings).to receive(:[]).with(:data).and_return({ connected: false }) + expect(Legion::Data.connected?).to be false + end + + it 'returns false on error' do + allow(Legion::Settings).to receive(:[]).and_return({}) + allow(Legion::Settings).to receive(:[]).with(:data).and_raise(StandardError) + expect(Legion::Data.connected?).to be false + end + end + + describe '.reset_privileges!' do + it 'clears cached values' do + Legion::Data.instance_variable_set(:@write_privileges, { foo: true }) + Legion::Data.reset_privileges! + expect(Legion::Data.instance_variable_get(:@write_privileges)).to be_nil + end + end +end diff --git a/spec/legion/data/retention_spec.rb b/spec/legion/data/retention_spec.rb new file mode 100644 index 0000000..dcdeb25 --- /dev/null +++ b/spec/legion/data/retention_spec.rb @@ -0,0 +1,245 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/retention' + +RSpec.describe Legion::Data::Retention do + let(:db) { Legion::Data.connection } + let(:table) { :retention_test_records } + let(:archive_table) { :retention_test_records_archive } + + before(:each) do + db.drop_table?(table) + db.drop_table?(archive_table) + + db.create_table(table) do + primary_key :id + String :name + DateTime :created_at + end + end + + after(:each) do + db.drop_table?(archive_table) + db.drop_table?(table) + end + + def insert_record(name:, created_at:) + db[table].insert(name: name, created_at: created_at) + end + + describe '.archive_table_name' do + it 'appends _archive suffix as symbol' do + expect(described_class.archive_table_name(:tasks)).to eq(:tasks_archive) + end + + it 'works with string input' do + expect(described_class.archive_table_name('events')).to eq(:events_archive) + end + end + + describe '.archive_old_records' do + it 'moves records older than archive_after_days to archive table' do + insert_record(name: 'old', created_at: Time.now - (100 * 86_400)) + insert_record(name: 'recent', created_at: Time.now - (10 * 86_400)) + + described_class.archive_old_records(table: table, archive_after_days: 90) + + expect(db[table].count).to eq(1) + expect(db[table].first[:name]).to eq('recent') + expect(db[archive_table].count).to eq(1) + expect(db[archive_table].first[:name]).to eq('old') + end + + it 'returns the correct archived count' do + insert_record(name: 'old1', created_at: Time.now - (200 * 86_400)) + insert_record(name: 'old2', created_at: Time.now - (150 * 86_400)) + insert_record(name: 'new', created_at: Time.now) + + result = described_class.archive_old_records(table: table, archive_after_days: 90) + + expect(result[:archived]).to eq(2) + expect(result[:table]).to eq(table) + end + + it 'returns zero archived when no records are old enough' do + insert_record(name: 'fresh', created_at: Time.now - (5 * 86_400)) + + result = described_class.archive_old_records(table: table, archive_after_days: 90) + + expect(result[:archived]).to eq(0) + end + + it 'returns zero when no connection' do + allow(Legion::Data).to receive(:connection).and_return(nil) + result = described_class.archive_old_records(table: table) + expect(result[:archived]).to eq(0) + end + + it 'handles an empty table gracefully' do + result = described_class.archive_old_records(table: table, archive_after_days: 90) + expect(result[:archived]).to eq(0) + end + + it 'creates the archive table automatically if it does not exist' do + insert_record(name: 'old', created_at: Time.now - (100 * 86_400)) + + expect(db.table_exists?(archive_table)).to be false + described_class.archive_old_records(table: table, archive_after_days: 90) + expect(db.table_exists?(archive_table)).to be true + end + + it 'works with a custom date_column' do + db.drop_table?(table) + db.create_table(table) do + primary_key :id + String :name + DateTime :recorded_at + end + + db[table].insert(name: 'old', recorded_at: Time.now - (100 * 86_400)) + db[table].insert(name: 'new', recorded_at: Time.now) + + result = described_class.archive_old_records( + table: table, + date_column: :recorded_at, + archive_after_days: 90 + ) + + expect(result[:archived]).to eq(1) + expect(db[archive_table].first[:name]).to eq('old') + end + end + + describe '.purge_expired_records' do + before(:each) do + db.create_table(archive_table) do + primary_key :id + String :name + DateTime :created_at + DateTime :archived_at + end + end + + it 'deletes records from archive older than retention_years' do + db[archive_table].insert(name: 'ancient', created_at: Time.now - (8 * 365 * 86_400)) + db[archive_table].insert(name: 'recent_archive', created_at: Time.now - (2 * 365 * 86_400)) + + result = described_class.purge_expired_records(table: table, retention_years: 7) + + expect(result[:purged]).to eq(1) + expect(db[archive_table].count).to eq(1) + expect(db[archive_table].first[:name]).to eq('recent_archive') + end + + it 'returns the correct purged count' do + db[archive_table].insert(name: 'old1', created_at: Time.now - (10 * 365 * 86_400)) + db[archive_table].insert(name: 'old2', created_at: Time.now - (9 * 365 * 86_400)) + + result = described_class.purge_expired_records(table: table, retention_years: 7) + + expect(result[:purged]).to eq(2) + expect(result[:table]).to eq(table) + end + + it 'returns zero when archive table does not exist' do + db.drop_table?(archive_table) + result = described_class.purge_expired_records(table: table, retention_years: 7) + expect(result[:purged]).to eq(0) + end + + it 'handles an empty archive table gracefully' do + result = described_class.purge_expired_records(table: table, retention_years: 7) + expect(result[:purged]).to eq(0) + end + + it 'works with a custom date_column' do + db.drop_table?(archive_table) + db.create_table(archive_table) do + primary_key :id + String :name + DateTime :recorded_at + DateTime :archived_at + end + + db[archive_table].insert(name: 'ancient', recorded_at: Time.now - (8 * 365 * 86_400)) + db[archive_table].insert(name: 'recent', recorded_at: Time.now - (1 * 365 * 86_400)) + + result = described_class.purge_expired_records( + table: table, + date_column: :recorded_at, + retention_years: 7 + ) + + expect(result[:purged]).to eq(1) + expect(db[archive_table].first[:name]).to eq('recent') + end + end + + describe '.retention_status' do + it 'reports correct active and archived counts' do + db.create_table(archive_table) do + primary_key :id + String :name + DateTime :created_at + DateTime :archived_at + end + + insert_record(name: 'active1', created_at: Time.now) + insert_record(name: 'active2', created_at: Time.now) + db[archive_table].insert(name: 'arch1', created_at: Time.now - (200 * 86_400)) + + status = described_class.retention_status(table: table) + + expect(status[:table]).to eq(table) + expect(status[:active_count]).to eq(2) + expect(status[:archived_count]).to eq(1) + end + + it 'reports oldest_active timestamp' do + older = Time.now - (60 * 86_400) + insert_record(name: 'older', created_at: older) + insert_record(name: 'newer', created_at: Time.now) + + status = described_class.retention_status(table: table) + + expect(status[:oldest_active]).not_to be_nil + end + + it 'reports oldest_archived timestamp when archive exists' do + db.create_table(archive_table) do + primary_key :id + String :name + DateTime :created_at + DateTime :archived_at + end + + db[archive_table].insert(name: 'old', created_at: Time.now - (500 * 86_400)) + + status = described_class.retention_status(table: table) + + expect(status[:oldest_archived]).not_to be_nil + end + + it 'returns nil for oldest_active when table is empty' do + status = described_class.retention_status(table: table) + expect(status[:oldest_active]).to be_nil + end + + it 'returns nil for oldest_archived when archive table does not exist' do + status = described_class.retention_status(table: table) + expect(status[:archived_count]).to eq(0) + expect(status[:oldest_archived]).to be_nil + end + end + + describe 'constants' do + it 'defines DEFAULT_RETENTION_YEARS as 7' do + expect(described_class::DEFAULT_RETENTION_YEARS).to eq(7) + end + + it 'defines DEFAULT_ARCHIVE_AFTER_DAYS as 90' do + expect(described_class::DEFAULT_ARCHIVE_AFTER_DAYS).to eq(90) + end + end +end diff --git a/spec/legion/data/spool_spec.rb b/spec/legion/data/spool_spec.rb new file mode 100644 index 0000000..972e4b3 --- /dev/null +++ b/spec/legion/data/spool_spec.rb @@ -0,0 +1,276 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'tmpdir' +require 'fileutils' + +# Stub extension modules for testing +module Legion + module Extensions + module LLM + module Gateway; end + end + + module Metering; end + module Audit; end + end + + module LLM; end +end + +RSpec.describe Legion::Data::Spool do + let(:tmpdir) { Dir.mktmpdir('legion_spool_spec') } + + before do + described_class.root = tmpdir + end + + after do + described_class.instance_variable_set(:@root, nil) + FileUtils.rm_rf(tmpdir) + end + + describe '.root' do + it 'returns the configured root' do + expect(described_class.root).to eq(tmpdir) + end + + it 'defaults to ~/.legionio/data/spool when not set' do + described_class.instance_variable_set(:@root, nil) + expect(described_class.root).to eq(File.expand_path('~/.legionio/data/spool')) + end + end + + describe '.for' do + it 'returns a ScopedSpool' do + spool = described_class.for(Legion::Extensions::Metering) + expect(spool).to be_a(Legion::Data::Spool::ScopedSpool) + end + + it 'rejects modules not under the Legion namespace' do + expect { described_class.for(String) }.to raise_error(ArgumentError, /not under the Legion:: namespace/) + end + + it 'accepts core gem modules under Legion::' do + spool = described_class.for(Legion::LLM) + spool.write(:metering, { test: true }) + expect(Dir.exist?(File.join(tmpdir, 'llm/metering'))).to be true + end + + it 'derives path from module name' do + spool = described_class.for(Legion::Extensions::LLM::Gateway) + spool.write(:metering, { test: true }) + expect(Dir.exist?(File.join(tmpdir, 'llm/gateway/metering'))).to be true + end + + it 'derives path for single-level extensions' do + spool = described_class.for(Legion::Extensions::Metering) + spool.write(:events, { test: true }) + expect(Dir.exist?(File.join(tmpdir, 'metering/events'))).to be true + end + end +end + +RSpec.describe Legion::Data::Spool::ScopedSpool do + let(:tmpdir) { Dir.mktmpdir('legion_spool_spec') } + let(:spool) { Legion::Data::Spool::ScopedSpool.new(Legion::Extensions::LLM::Gateway, tmpdir) } + let(:sub_ns) { :metering } + let(:subdir) { File.join(tmpdir, 'llm/gateway/metering') } + let(:quarantine_dir) { File.join(subdir, 'quarantine') } + + after do + FileUtils.rm_rf(tmpdir) + end + + describe '#write' do + it 'creates the sub-namespace directory if it does not exist' do + spool.write(sub_ns, foo: 'bar') + expect(Dir.exist?(File.join(tmpdir, 'llm/gateway/metering'))).to be true + end + + it 'creates a JSON file in the scoped directory' do + spool.write(sub_ns, foo: 'bar') + files = Dir[File.join(tmpdir, 'llm/gateway/metering', '*.json')] + expect(files.size).to eq(1) + end + + it 'returns the file path' do + path = spool.write(sub_ns, foo: 'bar') + expect(File.exist?(path)).to be true + end + + it 'writes valid JSON content' do + spool.write(sub_ns, key: 'value') + files = Dir[File.join(tmpdir, 'llm/gateway/metering', '*.json')] + content = JSON.parse(File.read(files.first), symbolize_names: true) + expect(content).to eq({ key: 'value' }) + end + + it 'does not leave temporary files behind' do + spool.write(sub_ns, key: 'value') + + expect(Dir[File.join(subdir, '.*.tmp-*')]).to be_empty + end + + it 'names files with timestamp-uuid pattern' do + path = spool.write(sub_ns, x: 1) + filename = File.basename(path, '.json') + expect(filename).to match(/\A\d{10,}-[0-9a-f-]{36}\z/) + end + + it 'isolates from other extensions' do + other_spool = Legion::Data::Spool::ScopedSpool.new(Legion::Extensions::Audit, tmpdir) + spool.write(sub_ns, from: 'gateway') + other_spool.write(sub_ns, from: 'audit') + expect(spool.count(sub_ns)).to eq(1) + expect(other_spool.count(sub_ns)).to eq(1) + end + end + + describe '#read' do + it 'returns an empty array for a missing sub-namespace' do + expect(spool.read(:nonexistent)).to eq([]) + end + + it 'returns parsed hashes with symbol keys' do + spool.write(sub_ns, foo: 'bar') + events = spool.read(sub_ns) + expect(events.first).to include(foo: 'bar') + end + + it 'returns events in FIFO order' do + spool.write(sub_ns, order: 1) + sleep 0.01 + spool.write(sub_ns, order: 2) + sleep 0.01 + spool.write(sub_ns, order: 3) + events = spool.read(sub_ns) + expect(events.map { |e| e[:order] }).to eq([1, 2, 3]) + end + + it 'sorts files by filename before reading' do + FileUtils.mkdir_p(subdir) + File.binwrite(File.join(subdir, '200.json'), JSON.generate(order: 2)) + File.binwrite(File.join(subdir, '100.json'), JSON.generate(order: 1)) + File.binwrite(File.join(subdir, '300.json'), JSON.generate(order: 3)) + + events = spool.read(sub_ns) + + expect(events.map { |e| e[:order] }).to eq([1, 2, 3]) + end + + it 'quarantines corrupt files and continues reading valid ones' do + FileUtils.mkdir_p(subdir) + File.binwrite(File.join(subdir, '100.json'), JSON.generate(order: 1)) + File.binwrite(File.join(subdir, '200.json'), '{"order":') + File.binwrite(File.join(subdir, '300.json'), JSON.generate(order: 2)) + + events = spool.read(sub_ns) + + expect(events.map { |e| e[:order] }).to eq([1, 2]) + expect(Dir[File.join(quarantine_dir, '*.corrupt')].size).to eq(1) + expect(spool.count(sub_ns)).to eq(2) + end + + it 'does not delete files' do + spool.write(sub_ns, x: 1) + spool.read(sub_ns) + expect(spool.count(sub_ns)).to eq(1) + end + end + + describe '#flush' do + it 'yields each event' do + spool.write(sub_ns, a: 1) + spool.write(sub_ns, a: 2) + yielded = [] + spool.flush(sub_ns) { |e| yielded << e } + expect(yielded.size).to eq(2) + end + + it 'deletes files after successful block execution' do + spool.write(sub_ns, a: 1) + spool.flush(sub_ns) { |_e| nil } + expect(spool.count(sub_ns)).to eq(0) + end + + it 'keeps the file when the block raises' do + spool.write(sub_ns, a: 1) + begin + spool.flush(sub_ns) { |_e| raise 'oops' } + rescue RuntimeError + nil + end + expect(spool.count(sub_ns)).to eq(1) + end + + it 'returns the number of successfully processed events' do + spool.write(sub_ns, a: 1) + spool.write(sub_ns, a: 2) + result = spool.flush(sub_ns) { |_e| nil } + expect(result).to eq(2) + end + + it 'processes events in FIFO order' do + spool.write(sub_ns, order: 1) + sleep 0.01 + spool.write(sub_ns, order: 2) + seen = [] + spool.flush(sub_ns) { |e| seen << e[:order] } + expect(seen).to eq([1, 2]) + end + + it 'quarantines corrupt files and continues draining valid ones' do + FileUtils.mkdir_p(subdir) + File.binwrite(File.join(subdir, '100.json'), JSON.generate(order: 1)) + File.binwrite(File.join(subdir, '200.json'), '{"order":') + File.binwrite(File.join(subdir, '300.json'), JSON.generate(order: 2)) + + seen = [] + result = spool.flush(sub_ns) { |e| seen << e[:order] } + + expect(seen).to eq([1, 2]) + expect(result).to eq(2) + expect(spool.count(sub_ns)).to eq(0) + expect(Dir[File.join(quarantine_dir, '*.corrupt')].size).to eq(1) + end + end + + describe '#count' do + it 'returns 0 for a missing sub-namespace' do + expect(spool.count(:nonexistent)).to eq(0) + end + + it 'returns the number of pending JSON files' do + spool.write(sub_ns, a: 1) + spool.write(sub_ns, a: 2) + expect(spool.count(sub_ns)).to eq(2) + end + + it 'decrements after flush' do + spool.write(sub_ns, a: 1) + spool.write(sub_ns, a: 2) + spool.flush(sub_ns) { |_e| nil } + expect(spool.count(sub_ns)).to eq(0) + end + end + + describe '#clear' do + it 'removes all JSON files in the sub-namespace' do + spool.write(sub_ns, a: 1) + spool.write(sub_ns, a: 2) + spool.clear(sub_ns) + expect(spool.count(sub_ns)).to eq(0) + end + + it 'does not raise for missing sub-namespace' do + expect { spool.clear(:nonexistent) }.not_to raise_error + end + + it 'leaves the directory in place after clearing' do + spool.write(sub_ns, a: 1) + spool.clear(sub_ns) + expect(Dir.exist?(File.join(tmpdir, 'llm/gateway/metering'))).to be true + end + end +end diff --git a/spec/legion/data/storage_tiers_spec.rb b/spec/legion/data/storage_tiers_spec.rb new file mode 100644 index 0000000..e6b4067 --- /dev/null +++ b/spec/legion/data/storage_tiers_spec.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/storage_tiers' + +RSpec.describe Legion::Data::StorageTiers do + describe '.archive_to_warm' do + it 'returns zero when no connection' do + allow(Legion::Data).to receive(:connection).and_return(nil) + result = described_class.archive_to_warm(table: :tasks) + expect(result[:archived]).to eq(0) + expect(result[:reason]).to eq('no_connection') + end + + it 'returns zero when no archive table' do + conn = Legion::Data.connection + allow(conn).to receive(:table_exists?).with(:data_archive).and_return(false) + result = described_class.archive_to_warm(table: :tasks) + expect(result[:archived]).to eq(0) + expect(result[:reason]).to eq('no_archive_table') + end + end + + describe '.export_to_cold' do + it 'returns zero when no archive table' do + conn = Legion::Data.connection + allow(conn).to receive(:table_exists?).with(:data_archive).and_return(false) + result = described_class.export_to_cold + expect(result[:exported]).to eq(0) + end + end + + describe 'TIERS' do + it 'defines three tiers' do + expect(described_class::TIERS.keys).to contain_exactly(:hot, :warm, :cold) + end + + it 'assigns ascending numeric values' do + expect(described_class::TIERS[:hot]).to eq(0) + expect(described_class::TIERS[:warm]).to eq(1) + expect(described_class::TIERS[:cold]).to eq(2) + end + end +end diff --git a/spec/legion/data/tls_spec.rb b/spec/legion/data/tls_spec.rb new file mode 100644 index 0000000..df8c64e --- /dev/null +++ b/spec/legion/data/tls_spec.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/connection' + +RSpec.describe Legion::Data::Connection do + describe '#merge_tls_creds' do + let(:base_creds) { { host: '127.0.0.1', port: 5432, user: 'legion', password: 'secret' } } + + before do + stub_const('Legion::Crypt::TLS', Module.new do + def self.resolve(config, **_opts) + if config[:enabled] + { enabled: true, verify: :peer, ca: '/etc/ssl/ca.pem', cert: nil, key: nil } + else + { enabled: false } + end + end + end) + end + + context 'when adapter is sqlite' do + it 'returns creds unchanged' do + result = described_class.merge_tls_creds(base_creds, adapter: :sqlite, port: nil) + expect(result).to eq(base_creds) + end + end + + context 'when data.tls.enabled is false (default)' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return( + { tls: { enabled: false } } + ) + end + + it 'returns creds unchanged for postgres' do + result = described_class.merge_tls_creds(base_creds.dup, adapter: :postgres, port: 5432) + expect(result[:sslmode]).to be_nil + end + + it 'returns creds unchanged for mysql2' do + result = described_class.merge_tls_creds(base_creds.dup, adapter: :mysql2, port: 3306) + expect(result[:ssl_mode]).to be_nil + end + end + + context 'when data.tls.enabled is true for postgres' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return( + { tls: { enabled: true, verify: 'peer' } } + ) + end + + it 'sets sslmode to verify-full' do + result = described_class.merge_tls_creds(base_creds.dup, adapter: :postgres, port: 5432) + expect(result[:sslmode]).to eq('verify-full') + end + + it 'sets sslrootcert when ca is present' do + result = described_class.merge_tls_creds(base_creds.dup, adapter: :postgres, port: 5432) + expect(result[:sslrootcert]).to eq('/etc/ssl/ca.pem') + end + end + + context 'when data.tls.enabled is true with verify none for postgres' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return( + { tls: { enabled: true, verify: 'none' } } + ) + + stub_const('Legion::Crypt::TLS', Module.new do + def self.resolve(_config, **_opts) + { enabled: true, verify: :none, ca: nil, cert: nil, key: nil } + end + end) + end + + it 'sets sslmode to require (not verify-full)' do + result = described_class.merge_tls_creds(base_creds.dup, adapter: :postgres, port: 5432) + expect(result[:sslmode]).to eq('require') + end + end + + context 'when data.tls.enabled is true for mysql2' do + before do + allow(Legion::Settings).to receive(:[]).with(:data).and_return( + { tls: { enabled: true, verify: 'peer' } } + ) + end + + it 'sets ssl_mode to verify_identity' do + result = described_class.merge_tls_creds(base_creds.dup, adapter: :mysql2, port: 3306) + expect(result[:ssl_mode]).to eq('verify_identity') + end + end + + context 'when Crypt::TLS is not defined' do + before do + hide_const('Legion::Crypt::TLS') + allow(Legion::Settings).to receive(:[]).with(:data).and_return( + { tls: { enabled: true } } + ) + end + + it 'returns creds unchanged' do + result = described_class.merge_tls_creds(base_creds.dup, adapter: :postgres, port: 5432) + expect(result[:sslmode]).to be_nil + end + end + end +end diff --git a/spec/legion/data/vector_spec.rb b/spec/legion/data/vector_spec.rb new file mode 100644 index 0000000..7e08013 --- /dev/null +++ b/spec/legion/data/vector_spec.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'legion/data/vector' + +RSpec.describe Legion::Data::Vector do + describe '.available?' do + it 'returns false when no connection' do + allow(Legion::Data).to receive(:connection).and_return(nil) + expect(described_class.available?).to be false + end + + it 'returns false for non-postgres adapter' do + conn = double(adapter_scheme: :sqlite) + allow(Legion::Data).to receive(:connection).and_return(conn) + expect(described_class.available?).to be false + end + end + + describe '.ensure_extension!' do + it 'returns false for non-postgres' do + conn = double(adapter_scheme: :sqlite) + allow(Legion::Data).to receive(:connection).and_return(conn) + expect(described_class.ensure_extension!).to be false + end + + it 'returns false when no connection' do + allow(Legion::Data).to receive(:connection).and_return(nil) + expect(described_class.ensure_extension!).to be false + end + end + + describe '.cosine_search' do + it 'returns empty when pgvector not available' do + allow(described_class).to receive(:available?).and_return(false) + result = described_class.cosine_search(table: :memory_traces, column: :embedding, query_vector: [0.1, 0.2]) + expect(result).to eq([]) + end + end + + describe '.l2_search' do + it 'returns empty when pgvector not available' do + allow(described_class).to receive(:available?).and_return(false) + result = described_class.l2_search(table: :memory_traces, column: :embedding, query_vector: [0.1, 0.2]) + expect(result).to eq([]) + end + end +end diff --git a/spec/legion/data_spec.rb b/spec/legion/data_spec.rb index 570879f..eb1b4a1 100644 --- a/spec/legion/data_spec.rb +++ b/spec/legion/data_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'spec_helper' RSpec.describe Legion::Data do diff --git a/spec/rls_spec.rb b/spec/rls_spec.rb new file mode 100644 index 0000000..272cfd1 --- /dev/null +++ b/spec/rls_spec.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +RSpec.describe Legion::Data::Rls do + describe '.rls_enabled?' do + it 'returns false for SQLite adapter' do + expect(described_class.rls_enabled?).to be(false) + end + end + + describe '.assign_tenant' do + it 'is a no-op on non-postgres' do + expect { described_class.assign_tenant('test') }.not_to raise_error + end + end + + describe '.current_tenant' do + it 'returns nil on non-postgres' do + expect(described_class.current_tenant).to be_nil + end + end + + describe '.reset_tenant' do + it 'is a no-op on non-postgres' do + expect { described_class.reset_tenant }.not_to raise_error + end + end + + describe '.with_tenant' do + it 'yields the block and returns its value' do + result = described_class.with_tenant('test') { 42 } + expect(result).to eq(42) + end + end + + describe '::RLS_TABLES' do + it 'lists all tables with tenant_id' do + expect(described_class::RLS_TABLES).to include(:tasks, :extensions, :memory_traces) + end + + it 'contains 10 tables' do + expect(described_class::RLS_TABLES.size).to eq(10) + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 6bdbee7..cbb6689 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + # begin # require 'simplecov' # SimpleCov.start do @@ -17,7 +19,16 @@ Legion::Settings.load require 'legion/data' +Legion::Settings[:data][:dev_mode] = true +Legion::Settings[:data][:creds] ||= {} +Legion::Settings[:data][:creds][:database] = 'legion_test.db' + +db_path = File.expand_path('~/.legionio/data/legion_test.db') +FileUtils.rm_f(db_path) + Legion::Data.setup +Legion::Data::Migration.migrate(Legion::Data::Connection.sequel, + File.expand_path('../lib/legion/data/migrations', __dir__)) RSpec.configure do |config| config.example_status_persistence_file_path = '.rspec_status'