From 41a201d785a964a10ab3981f089940f268d9be91 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Wed, 8 Apr 2026 17:59:13 +0300 Subject: [PATCH 01/13] back to basics --- .codespellignore | 2 - .credo.exs | 187 +++ .formatter.exs | 2 +- .github/workflows/bench.yml | 54 - .github/workflows/{test.yml => mix.yml} | 33 +- .github/workflows/spellcheck.yml | 22 - .typos.toml | 1 - README.md | 294 +--- bench/compress.exs | 19 + bench/support/compress.ex | 16 + lib/ch.ex | 148 +- lib/ch/connection.ex | 527 ------- lib/ch/pool.ex | 369 +++++ lib/ch/query.ex | 420 ------ lib/ch/result.ex | 28 - lib/ch/row_binary.ex | 7 +- lib/ch/stream.ex | 43 - lib/ch/telemetry.ex | 23 + lib/ch/types.ex | 5 +- mix.exs | 76 +- mix.lock | 18 +- test/ch/aggregation_test.exs | 214 --- test/ch/connect_test.exs | 22 - test/ch/connection_test.exs | 1830 ----------------------- test/ch/dynamic_test.exs | 439 ------ test/ch/faults_test.exs | 551 ------- test/ch/headers_test.exs | 77 - test/ch/http_test.exs | 66 - test/ch/json_test.exs | 362 ----- test/ch/pool_test.exs | 79 + test/ch/query_string_test.exs | 34 - test/ch/query_test.exs | 530 ------- test/ch/row_binary_test.exs | 6 +- test/ch/settings_test.exs | 26 - test/ch/stream_test.exs | 84 -- test/ch/variant_test.exs | 114 -- test/support/help.ex | 3 + test/support/test.ex | 123 -- test/test_helper.exs | 55 +- 39 files changed, 807 insertions(+), 6102 deletions(-) delete mode 100644 .codespellignore create mode 100644 .credo.exs delete mode 100644 .github/workflows/bench.yml rename .github/workflows/{test.yml => mix.yml} (71%) delete mode 100644 .github/workflows/spellcheck.yml create mode 100644 bench/compress.exs create mode 100644 bench/support/compress.ex delete mode 100644 lib/ch/connection.ex create mode 100644 lib/ch/pool.ex delete mode 100644 lib/ch/query.ex delete mode 100644 lib/ch/result.ex delete mode 100644 lib/ch/stream.ex create mode 100644 lib/ch/telemetry.ex delete mode 100644 test/ch/aggregation_test.exs delete mode 100644 test/ch/connect_test.exs delete mode 100644 test/ch/connection_test.exs delete mode 100644 test/ch/dynamic_test.exs delete mode 100644 test/ch/faults_test.exs delete mode 100644 test/ch/headers_test.exs delete mode 100644 test/ch/http_test.exs delete mode 100644 test/ch/json_test.exs create mode 100644 test/ch/pool_test.exs delete mode 100644 test/ch/query_string_test.exs delete mode 100644 test/ch/query_test.exs delete mode 100644 test/ch/settings_test.exs delete mode 100644 test/ch/stream_test.exs delete mode 100644 test/ch/variant_test.exs create mode 100644 test/support/help.ex delete mode 100644 test/support/test.ex diff --git a/.codespellignore b/.codespellignore deleted file mode 100644 index 7ece9f26..00000000 --- a/.codespellignore +++ /dev/null @@ -1,2 +0,0 @@ -som -ECT diff --git a/.credo.exs b/.credo.exs new file mode 100644 index 00000000..15b8c357 --- /dev/null +++ b/.credo.exs @@ -0,0 +1,187 @@ +# This file contains the configuration for Credo and you are probably reading +# this after creating it with `mix credo.gen.config`. +# +# If you find anything wrong or unclear in this file, please report an +# issue on GitHub: https://github.com/rrrene/credo/issues +# +%{ + # + # You can have as many configs as you like in the `configs:` field. + configs: [ + %{ + # + # Run any config using `mix credo -C `. If no config name is given + # "default" is used. + # + name: "default", + # + # These are the files included in the analysis: + files: %{ + # + # You can give explicit globs or simply directories. + # In the latter case `**/*.{ex,exs}` will be used. + # + included: ["lib/", "test/"], + excluded: [~r"/_build/", ~r"/deps/"] + }, + # + # Load and configure plugins here: + # + plugins: [], + # + # If you create your own checks, you must specify the source files for + # them here, so they can be loaded by Credo before running the analysis. + # + requires: [], + # + # If you want to enforce a style guide and need a more traditional linting + # experience, you can change `strict` to `true` below: + # + strict: false, + # + # To modify the timeout for parsing files, change this value: + # + parse_timeout: to_timeout(second: 5), + # + # If you want to use uncolored output by default, you can change `color` + # to `false` below: + # + color: true, + # + # You can customize the parameters of any check by adding a second element + # to the tuple. + # + # To disable a check put `false` as second element: + # + # {Credo.Check.Design.DuplicatedCode, false} + # + checks: %{ + enabled: [ + # + ## Consistency Checks + # + {Credo.Check.Consistency.ExceptionNames, []}, + {Credo.Check.Consistency.LineEndings, []}, + {Credo.Check.Consistency.MultiAliasImportRequireUse, []}, + {Credo.Check.Consistency.ParameterPatternMatching, []}, + {Credo.Check.Consistency.SpaceAroundOperators, []}, + {Credo.Check.Consistency.SpaceInParentheses, []}, + {Credo.Check.Consistency.TabsOrSpaces, []}, + + # + ## Design Checks + # + {Credo.Check.Design.DuplicatedCode, files: %{included: "lib/"}}, + {Credo.Check.Design.SkipTestWithoutComment, []}, + {Credo.Check.Design.TagFIXME, []}, + {Credo.Check.Design.TagTODO, exit_status: 2}, + + # + ## Readability Checks + # + {Credo.Check.Readability.AliasOrder, []}, + {Credo.Check.Readability.BlockPipe, []}, + {Credo.Check.Readability.FunctionNames, []}, + {Credo.Check.Readability.ImplTrue, []}, + {Credo.Check.Readability.MaxLineLength, priority: :low, max_length: 120}, + {Credo.Check.Readability.ModuleAttributeNames, []}, + {Credo.Check.Readability.ModuleDoc, []}, + {Credo.Check.Readability.ModuleNames, []}, + {Credo.Check.Readability.NestedFunctionCalls, min_pipeline_length: 3}, + {Credo.Check.Readability.OneArityFunctionInPipe, []}, + {Credo.Check.Readability.ParenthesesInCondition, []}, + {Credo.Check.Readability.ParenthesesOnZeroArityDefs, []}, + {Credo.Check.Readability.PipeIntoAnonymousFunctions, []}, + {Credo.Check.Readability.PredicateFunctionNames, []}, + {Credo.Check.Readability.PreferImplicitTry, []}, + {Credo.Check.Readability.RedundantBlankLines, []}, + {Credo.Check.Readability.Semicolons, []}, + {Credo.Check.Readability.SeparateAliasRequire, []}, + {Credo.Check.Readability.SingleFunctionToBlockPipe, []}, + {Credo.Check.Readability.SinglePipe, []}, + {Credo.Check.Readability.SpaceAfterCommas, []}, + {Credo.Check.Readability.StringSigils, []}, + {Credo.Check.Readability.TrailingBlankLine, []}, + {Credo.Check.Readability.TrailingWhiteSpace, []}, + {Credo.Check.Readability.UnnecessaryAliasExpansion, []}, + {Credo.Check.Readability.VariableNames, []}, + {Credo.Check.Readability.WithCustomTaggedTuple, []}, + {Credo.Check.Readability.WithSingleClause, []}, + + # + ## Refactoring Opportunities + # + {Credo.Check.Refactor.AppendSingleItem, []}, + {Credo.Check.Refactor.Apply, []}, + {Credo.Check.Refactor.CondStatements, []}, + {Credo.Check.Refactor.DoubleBooleanNegation, []}, + {Credo.Check.Refactor.FilterCount, []}, + {Credo.Check.Refactor.FilterFilter, []}, + {Credo.Check.Refactor.FilterReject, []}, + {Credo.Check.Refactor.FunctionArity, []}, + {Credo.Check.Refactor.IoPuts, []}, + {Credo.Check.Refactor.LongQuoteBlocks, []}, + {Credo.Check.Refactor.MapJoin, []}, + {Credo.Check.Refactor.MapMap, []}, + {Credo.Check.Refactor.MatchInCondition, []}, + {Credo.Check.Refactor.NegatedConditionsInUnless, []}, + {Credo.Check.Refactor.NegatedConditionsWithElse, []}, + {Credo.Check.Refactor.NegatedIsNil, []}, + {Credo.Check.Refactor.PassAsyncInTestCases, []}, + {Credo.Check.Refactor.RedundantWithClauseResult, []}, + {Credo.Check.Refactor.RejectFilter, []}, + {Credo.Check.Refactor.RejectReject, []}, + {Credo.Check.Refactor.UnlessWithElse, []}, + {Credo.Check.Refactor.UtcNowTruncate, []}, + {Credo.Check.Refactor.WithClauses, []}, + + # + ## Warnings + # + {Credo.Check.Warning.ApplicationConfigInModuleAttribute, []}, + {Credo.Check.Warning.BoolOperationOnSameValues, []}, + {Credo.Check.Warning.Dbg, []}, + {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []}, + {Credo.Check.Warning.IExPry, []}, + {Credo.Check.Warning.IoInspect, []}, + {Credo.Check.Warning.LeakyEnvironment, []}, + {Credo.Check.Warning.MapGetUnsafePass, []}, + {Credo.Check.Warning.MissedMetadataKeyInLoggerConfig, []}, + {Credo.Check.Warning.MixEnv, []}, + {Credo.Check.Warning.OperationOnSameValues, []}, + {Credo.Check.Warning.OperationWithConstantResult, []}, + {Credo.Check.Warning.RaiseInsideRescue, []}, + {Credo.Check.Warning.SpecWithStruct, []}, + {Credo.Check.Warning.UnsafeExec, []}, + {Credo.Check.Warning.UnusedEnumOperation, []}, + {Credo.Check.Warning.UnusedFileOperation, []}, + {Credo.Check.Warning.UnusedKeywordOperation, []}, + {Credo.Check.Warning.UnusedListOperation, []}, + {Credo.Check.Warning.UnusedPathOperation, []}, + {Credo.Check.Warning.UnusedRegexOperation, []}, + {Credo.Check.Warning.UnusedStringOperation, []}, + {Credo.Check.Warning.UnusedTupleOperation, []}, + {Credo.Check.Warning.WrongTestFileExtension, []} + ], + disabled: [ + {Credo.Check.Warning.LazyLogging, []}, + {Credo.Check.Warning.UnsafeToAtom, []}, + {Credo.Check.Readability.OnePipePerLine, []}, + {Credo.Check.Readability.MultiAlias, []}, + {Credo.Check.Readability.StrictModuleLayout, []}, + {Credo.Check.Refactor.ABCSize, []}, + {Credo.Check.Refactor.CyclomaticComplexity, []}, + {Credo.Check.Refactor.MapInto, []}, + {Credo.Check.Refactor.Nesting, []}, + {Credo.Check.Refactor.VariableRebinding, []}, + {Credo.Check.Refactor.PipeChainStart, []}, + {Credo.Check.Readability.LargeNumbers, []}, + {Credo.Check.Consistency.UnusedVariableNames, []}, + {Credo.Check.Readability.Specs, []}, + {Credo.Check.Readability.AliasAs, []}, + {Credo.Check.Refactor.ModuleDependencies, []} + ] + } + } + ] +} diff --git a/.formatter.exs b/.formatter.exs index 8eecf500..24cc1ec1 100644 --- a/.formatter.exs +++ b/.formatter.exs @@ -1,4 +1,4 @@ # Used by "mix format" [ - inputs: ["{mix,.formatter}.exs", "{config,lib,test,bench}/**/*.{ex,exs}"] + inputs: ["{mix,.credo,.formatter}.exs", "{config,lib,test,bench}/**/*.{ex,exs}"] ] diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml deleted file mode 100644 index f488ba22..00000000 --- a/.github/workflows/bench.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: bench - -on: - workflow_dispatch: - schedule: - - cron: "42 9 * * *" - -jobs: - benchee: - runs-on: ubuntu-latest - - env: - MIX_ENV: bench - - services: - clickhouse: - image: clickhouse/clickhouse-server:latest - ports: - - 8123:8123 - env: - # https://github.com/ClickHouse/ClickHouse/issues/75494 - CLICKHOUSE_SKIP_USER_SETUP: 1 - options: >- - --health-cmd nc -zw3 localhost 8124 - --health-interval 10s - --health-timeout 5s - --health-retries 5 - - steps: - - uses: actions/checkout@v6 - - - id: beam - uses: erlef/setup-beam@v1 - with: - elixir-version: 1.18 - otp-version: 28 - - - uses: actions/cache@v5 - with: - path: | - deps - _build - key: bench-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-${{ github.head_ref || github.ref }}-${{ hashFiles('**/mix.lock') }} - restore-keys: | - bench-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-${{ github.head_ref || github.ref }}- - bench-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-refs/heads/master- - - - run: mix deps.get --only $MIX_ENV - - run: mix compile --warnings-as-errors - - # - run: mix run bench/cast.exs - - run: mix run bench/encode.exs - - run: mix run bench/insert.exs - - run: mix run bench/stream.exs diff --git a/.github/workflows/test.yml b/.github/workflows/mix.yml similarity index 71% rename from .github/workflows/test.yml rename to .github/workflows/mix.yml index 9754efa1..048ae8eb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/mix.yml @@ -10,7 +10,7 @@ on: jobs: test: - name: "test (Elixir ${{ matrix.elixir }} / OTP ${{ matrix.otp }} / ClickHouse ${{ matrix.clickhouse || 'latest' }} / TZ ${{ matrix.timezone || 'UTC' }}${{ matrix.dialyzer && ' / Dialyzer' || '' }}${{ matrix.lint && ' / Lint' || '' }})" + name: "test (Elixir ${{ matrix.elixir }} / OTP ${{ matrix.otp }} / ClickHouse ${{ matrix.clickhouse || 'latest' }}${{ matrix.dialyzer && ' / Dialyzer' || '' }}${{ matrix.lint && ' / Lint' || '' }})" runs-on: ubuntu-latest env: @@ -19,41 +19,18 @@ jobs: strategy: matrix: include: - # some old elixir/erlang version - - elixir: 1.15 - otp: 25 - - # some recent version and non-UTC timezone - - elixir: 1.18 - otp: 27 - timezone: Europe/Berlin - # the latest elixir/erlang version with all static checks - elixir: 1.19 otp: 28 dialyzer: true lint: true - # Plausible versions - # - https://github.com/plausible/analytics/blob/master/.tool-versions - # - https://github.com/plausible/analytics/blob/master/.github/workflows/elixir.yml - - elixir: 1.19.4 - otp: 27.3.4.6 - clickhouse: 25.11.5.8 - - # some older pre-JSON ClickHouse version - # https://github.com/plausible/ch/issues/273 - - elixir: 1.18 - otp: 28 - clickhouse: 24.5.4.49 - services: clickhouse: image: clickhouse/clickhouse-server:${{ matrix.clickhouse || 'latest' }} ports: - 8123:8123 env: - TZ: ${{ matrix.timezone || 'UTC' }} # https://github.com/ClickHouse/ClickHouse/issues/75494 CLICKHOUSE_SKIP_USER_SETUP: 1 options: >- @@ -96,7 +73,10 @@ jobs: - run: mix format --check-formatted if: ${{ matrix.lint }} - - run: mix test --include slow + - run: mix credo --strict + if: ${{ matrix.lint }} + + - run: mix test - name: Restore PLTs cache if: ${{ matrix.dialyzer }} @@ -109,3 +89,6 @@ jobs: - run: mix dialyzer --format github if: ${{ matrix.dialyzer }} + + - uses: crate-ci/typos@master + if: ${{ matrix.lint }} diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml deleted file mode 100644 index 1bcf2f24..00000000 --- a/.github/workflows/spellcheck.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: spellcheck - -on: - pull_request: - push: - branches: [master] - -jobs: - codespell: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - uses: codespell-project/actions-codespell@v2 - with: - check_filenames: true - ignore_words_file: .codespellignore - - typos: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - uses: crate-ci/typos@master diff --git a/.typos.toml b/.typos.toml index 1b41395f..39a1b3e5 100644 --- a/.typos.toml +++ b/.typos.toml @@ -1,4 +1,3 @@ [default.extend-words] "som" = "som" # ./test/ch/ecto_type_test.exs -"ECT" = "ECT" # ./test/ch/query_test.exs "Evn" = "Evn" # ./CHANGELOG.md diff --git a/README.md b/README.md index 8f146d44..c0b33c89 100644 --- a/README.md +++ b/README.md @@ -7,309 +7,19 @@ Minimal HTTP [ClickHouse](https://clickhouse.com) client for Elixir. Used in [Ecto ClickHouse adapter.](https://github.com/plausible/ecto_ch) -### Key features - -- RowBinary -- Native query parameters -- Per query settings -- Minimal API - -Your ideas are welcome [here.](https://github.com/plausible/ch/issues/82) - ## Installation ```elixir defp deps do [ - {:ch, "~> 0.7.0"} + {:ch, "~> 0.9.0"} ] end ``` ## Usage -#### Start [DBConnection](https://github.com/elixir-ecto/db_connection) pool - -```elixir -defaults = [ - scheme: "http", - hostname: "localhost", - port: 8123, - database: "default", - settings: [], - pool_size: 1, - timeout: :timer.seconds(15) -] - -# note that starting in ClickHouse 25.1.3.23 `default` user doesn't have -# network access by default in the official Docker images -# see https://github.com/ClickHouse/ClickHouse/pull/75259 -{:ok, pid} = Ch.start_link(defaults) -``` - -#### Select rows - -```elixir -{:ok, pid} = Ch.start_link() - -{:ok, %Ch.Result{rows: [[0], [1], [2]]}} = - Ch.query(pid, "SELECT * FROM system.numbers LIMIT 3") - -{:ok, %Ch.Result{rows: [[0], [1], [2]]}} = - Ch.query(pid, "SELECT * FROM system.numbers LIMIT {$0:UInt8}", [3]) - -{:ok, %Ch.Result{rows: [[0], [1], [2]]}} = - Ch.query(pid, "SELECT * FROM system.numbers LIMIT {limit:UInt8}", %{"limit" => 3}) -``` - -Note on datetime encoding in query parameters: - -- `%NaiveDateTime{}` is encoded as text to make it assume the column's or ClickHouse server's timezone -- `%DateTime{}` is encoded as unix timestamp and is treated as UTC timestamp by ClickHouse - -#### Select rows (lots of params, reverse proxy) - -> [!NOTE] -> -> Support for multipart requests was added in `v0.6.2` - -For queries with many parameters the resulting URL can become too long for some reverse proxies, resulting in a `414 Request-URI Too Large` error. - -To avoid this, you can use the `multipart: true` option to send the query and parameters in the request body. - -```elixir -{:ok, pid} = Ch.start_link() - -# Moves parameters from the URL to a multipart/form-data body -%Ch.Result{rows: [[[1, 2, 3 | _rest]]]} = - Ch.query!(pid, "SELECT {ids:Array(UInt64)}", %{"ids" => Enum.to_list(1..10_000)}, multipart: true) -``` - -> [!NOTE] -> -> `multipart: true` is currently required on each individual query. Support for pool-wide configuration is planned for a future release. - -#### Insert rows - -```elixir -{:ok, pid} = Ch.start_link() - -Ch.query!(pid, "CREATE TABLE IF NOT EXISTS ch_demo(id UInt64) ENGINE Null") - -%Ch.Result{num_rows: 2} = - Ch.query!(pid, "INSERT INTO ch_demo(id) VALUES (0), (1)") - -%Ch.Result{num_rows: 2} = - Ch.query!(pid, "INSERT INTO ch_demo(id) VALUES ({$0:UInt8}), ({$1:UInt32})", [0, 1]) - -%Ch.Result{num_rows: 2} = - Ch.query!(pid, "INSERT INTO ch_demo(id) VALUES ({a:UInt16}), ({b:UInt64})", %{"a" => 0, "b" => 1}) - -%Ch.Result{num_rows: 2} = - Ch.query!(pid, "INSERT INTO ch_demo(id) SELECT number FROM system.numbers LIMIT {limit:UInt8}", %{"limit" => 2}) -``` - -#### Insert rows as [RowBinary](https://clickhouse.com/docs/en/interfaces/formats/RowBinary) (efficient) - -```elixir -{:ok, pid} = Ch.start_link() - -Ch.query!(pid, "CREATE TABLE IF NOT EXISTS ch_demo(id UInt64) ENGINE Null") - -types = ["UInt64"] -# or -types = [Ch.Types.u64()] -# or -types = [:u64] - -%Ch.Result{num_rows: 2} = - Ch.query!(pid, "INSERT INTO ch_demo(id) FORMAT RowBinary", [[0], [1]], types: types) -``` - -Note that RowBinary format encoding requires `:types` option to be provided. - -Similarly, you can use [RowBinaryWithNamesAndTypes](https://clickhouse.com/docs/en/interfaces/formats/RowBinaryWithNamesAndTypes) which would additionally do something like a type check. - -```elixir -sql = "INSERT INTO ch_demo FORMAT RowBinaryWithNamesAndTypes" -opts = [names: ["id"], types: ["UInt64"]] -rows = [[0], [1]] - -%Ch.Result{num_rows: 2} = Ch.query!(pid, sql, rows, opts) -``` - -#### Insert rows in custom [format](https://clickhouse.com/docs/en/interfaces/formats) - -```elixir -{:ok, pid} = Ch.start_link() - -Ch.query!(pid, "CREATE TABLE IF NOT EXISTS ch_demo(id UInt64) ENGINE Null") - -csv = [0, 1] |> Enum.map(&to_string/1) |> Enum.intersperse(?\n) - -%Ch.Result{num_rows: 2} = - Ch.query!(pid, "INSERT INTO ch_demo(id) FORMAT CSV", csv, encode: false) -``` - -#### Insert rows as chunked RowBinary stream - -```elixir -{:ok, pid} = Ch.start_link() - -Ch.query!(pid, "CREATE TABLE IF NOT EXISTS ch_demo(id UInt64) ENGINE Null") - -stream = Stream.repeatedly(fn -> [:rand.uniform(100)] end) -chunked = Stream.chunk_every(stream, 100) -encoded = Stream.map(chunked, fn chunk -> Ch.RowBinary.encode_rows(chunk, _types = ["UInt64"]) end) -ten_encoded_chunks = Stream.take(encoded, 10) - -%Ch.Result{num_rows: 1000} = - Ch.query(pid, "INSERT INTO ch_demo(id) FORMAT RowBinary", ten_encoded_chunks, encode: false) -``` - -This query makes a [`transfer-encoding: chunked`](https://en.wikipedia.org/wiki/Chunked_transfer_encoding) HTTP request while unfolding the stream resulting in lower memory usage. - -#### Query with custom [settings](https://clickhouse.com/docs/en/operations/settings/settings) - -```elixir -{:ok, pid} = Ch.start_link() - -settings = [async_insert: 1] - -%Ch.Result{rows: [["async_insert", "Bool", "0"]]} = - Ch.query!(pid, "SHOW SETTINGS LIKE 'async_insert'") - -%Ch.Result{rows: [["async_insert", "Bool", "1"]]} = - Ch.query!(pid, "SHOW SETTINGS LIKE 'async_insert'", [], settings: settings) -``` - -## Caveats - -#### NULL in RowBinary - -It's the same as in [ch-go](https://clickhouse.com/docs/en/integrations/go#nullable) - -> At insert time, Nil can be passed for both the normal and Nullable version of a column. For the former, the default value for the type will be persisted, e.g., an empty string for string. For the nullable version, a NULL value will be stored in ClickHouse. - -```elixir -{:ok, pid} = Ch.start_link() - -Ch.query!(pid, """ -CREATE TABLE ch_nulls ( - a UInt8 NULL, - b UInt8 DEFAULT 10, - c UInt8 NOT NULL -) ENGINE Memory -""") - -types = ["Nullable(UInt8)", "UInt8", "UInt8"] -inserted_rows = [[nil, nil, nil]] -selected_rows = [[nil, 0, 0]] - -%Ch.Result{num_rows: 1} = - Ch.query!(pid, "INSERT INTO ch_nulls(a, b, c) FORMAT RowBinary", inserted_rows, types: types) - -%Ch.Result{rows: ^selected_rows} = - Ch.query!(pid, "SELECT * FROM ch_nulls") -``` - -Note that in this example `DEFAULT 10` is ignored and `0` (the default value for `UInt8`) is persisted instead. - -However, [`input()`](https://clickhouse.com/docs/en/sql-reference/table-functions/input) can be used as a workaround: - -```elixir -sql = """ -INSERT INTO ch_nulls - SELECT * FROM input('a Nullable(UInt8), b Nullable(UInt8), c UInt8') - FORMAT RowBinary\ -""" - -Ch.query!(pid, sql, inserted_rows, types: ["Nullable(UInt8)", "Nullable(UInt8)", "UInt8"]) - -%Ch.Result{rows: [[0], [10]]} = - Ch.query!(pid, "SELECT b FROM ch_nulls ORDER BY b") -``` - -#### UTF-8 in RowBinary - -When decoding [`String`](https://clickhouse.com/docs/en/sql-reference/data-types/string) columns non UTF-8 characters are replaced with `�` (U+FFFD). This behaviour is similar to [`toValidUTF8`](https://clickhouse.com/docs/en/sql-reference/functions/string-functions#tovalidutf8) and [JSON format.](https://clickhouse.com/docs/en/interfaces/formats#json) - -```elixir -{:ok, pid} = Ch.start_link() - -Ch.query!(pid, "CREATE TABLE ch_utf8(str String) ENGINE Memory") - -bin = "\x61\xF0\x80\x80\x80b" -utf8 = "a�b" - -%Ch.Result{num_rows: 1} = - Ch.query!(pid, "INSERT INTO ch_utf8(str) FORMAT RowBinary", [[bin]], types: ["String"]) - -%Ch.Result{rows: [[^utf8]]} = - Ch.query!(pid, "SELECT * FROM ch_utf8") - -%Ch.Result{rows: %{"data" => [[^utf8]]}} = - pid |> Ch.query!("SELECT * FROM ch_utf8 FORMAT JSONCompact") |> Map.update!(:rows, &Jason.decode!/1) -``` - -To get raw binary from `String` columns use `:binary` type that skips UTF-8 checks. - -```elixir -%Ch.Result{rows: [[^bin]]} = - Ch.query!(pid, "SELECT * FROM ch_utf8", [], types: [:binary]) -``` - -#### Timezones in RowBinary - -Decoding non-UTC datetimes like `DateTime('Asia/Taipei')` requires a [timezone database.](https://hexdocs.pm/elixir/DateTime.html#module-time-zone-database) - -```elixir -Mix.install([:ch, :tz]) - -:ok = Calendar.put_time_zone_database(Tz.TimeZoneDatabase) - -{:ok, pid} = Ch.start_link() - -%Ch.Result{rows: [[~N[2023-04-25 17:45:09]]]} = - Ch.query!(pid, "SELECT CAST(now() as DateTime)") - -%Ch.Result{rows: [[~U[2023-04-25 17:45:11Z]]]} = - Ch.query!(pid, "SELECT CAST(now() as DateTime('UTC'))") - -%Ch.Result{rows: [[%DateTime{time_zone: "Asia/Taipei"} = taipei]]} = - Ch.query!(pid, "SELECT CAST(now() as DateTime('Asia/Taipei'))") - -"2023-04-26 01:45:12+08:00 CST Asia/Taipei" = to_string(taipei) -``` - -Encoding non-UTC datetimes works but might be slow due to timezone conversion: - -```elixir -Mix.install([:ch, :tz]) - -:ok = Calendar.put_time_zone_database(Tz.TimeZoneDatabase) - -{:ok, pid} = Ch.start_link() - -Ch.query!(pid, "CREATE TABLE ch_datetimes(name String, datetime DateTime) ENGINE Memory") - -naive = NaiveDateTime.utc_now() -utc = DateTime.utc_now() -taipei = DateTime.shift_zone!(utc, "Asia/Taipei") - -rows = [["naive", naive], ["utc", utc], ["taipei", taipei]] - -Ch.query!(pid, "INSERT INTO ch_datetimes(name, datetime) FORMAT RowBinary", rows, types: ["String", "DateTime"]) - -%Ch.Result{ - rows: [ - ["naive", ~U[2024-12-21 05:24:40Z]], - ["utc", ~U[2024-12-21 05:24:40Z]], - ["taipei", ~U[2024-12-21 05:24:40Z]] - ] -} = - Ch.query!(pid, "SELECT name, CAST(datetime as DateTime('UTC')) FROM ch_datetimes") -``` +See guides and tests for examples. ## [Benchmarks](./bench) diff --git a/bench/compress.exs b/bench/compress.exs new file mode 100644 index 00000000..78f93eae --- /dev/null +++ b/bench/compress.exs @@ -0,0 +1,19 @@ +rowbinary = fn count -> + Enum.map(1..count, fn i -> + row = [i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], DateTime.utc_now()] + Ch.RowBinary.encode_row(row, ["UInt64", "String", "Array(UInt8)", "DateTime"]) + end) +end + +Benchee.run( + %{ + "zstd once" => fn input -> :zstd.compress(input) end, + "zstd stream" => fn input -> Compress.zstd_stream(input) end, + "nimble_lz4 once" => fn input -> NimbleLZ4.compress(input) end + }, + inputs: %{ + "1 rows" => rowbinary.(1), + "1000 rows" => rowbinary.(1000), + "100,000 rows" => rowbinary.(100_000) + } +) diff --git a/bench/support/compress.ex b/bench/support/compress.ex new file mode 100644 index 00000000..4de5be75 --- /dev/null +++ b/bench/support/compress.ex @@ -0,0 +1,16 @@ +defmodule Compress do + def zstd_stream(input) when is_list(input) do + {:ok, ctx} = :zstd.context(:compress) + zstd_stream_continue(input, ctx) + end + + defp zstd_stream_continue([value | rest], ctx) do + {:continue, c} = :zstd.stream(ctx, value) + [c | zstd_stream_continue(rest, ctx)] + end + + defp zstd_stream_continue([], ctx) do + {:done, c} = :zstd.finish(ctx, []) + c + end +end diff --git a/lib/ch.ex b/lib/ch.ex index 6f2f567a..f0f74b86 100644 --- a/lib/ch.ex +++ b/lib/ch.ex @@ -1,133 +1,13 @@ defmodule Ch do @moduledoc "Minimal HTTP ClickHouse client." - alias Ch.{Connection, Query, Result} - - @typedoc """ - Options shared by both connection startup and query execution. - - * `:database` - Database, defaults to `"default"` - * `:username` - Username - * `:password` - User password - * `:settings` - Keyword list of ClickHouse settings - * `:timeout` - HTTP request/receive timeout in milliseconds - """ - @type common_option :: - {:database, String.t()} - | {:username, String.t()} - | {:password, String.t()} - | {:settings, Keyword.t()} - | {:timeout, timeout} - - @typedoc """ - Options for starting the connection pool. - - Includes all keys from `t:common_option/0` and `t:DBConnection.start_option/0` plus: - - * `:scheme` - HTTP scheme, defaults to `"http"` - * `:hostname` - server hostname, defaults to `"localhost"` - * `:port` - HTTP port, defaults to `8123` - * `:transport_opts` - options to be given to the transport being used. See `Mint.HTTP1.connect/4` for more info - """ - @type start_option :: - common_option - | {:scheme, String.t()} - | {:hostname, String.t()} - | {:port, :inet.port_number()} - | {:transport_opts, [:gen_tcp.connect_option() | :ssl.tls_client_option()]} - | DBConnection.start_option() - - @doc """ - Start the connection pool process. - - See `t:start_option/0` for available options. - """ - @spec start_link([start_option]) :: GenServer.on_start() - def start_link(opts \\ []) do - DBConnection.start_link(Connection, opts) - end - - @doc """ - Returns a supervisor child specification for a connection pool. - - See `t:start_option/0` for supported options. - """ - @spec child_spec([start_option]) :: :supervisor.child_spec() - def child_spec(opts) do - DBConnection.child_spec(Connection, opts) - end - - @typedoc """ - Options for executing a query. - - Includes all keys from `t:common_option/0` and `t:DBConnection.connection_option/0` plus: - - * `:command` - Command tag for the query - * `:headers` - Custom HTTP headers for the request - * `:format` - Custom response format for the request - * `:decode` - Whether to automatically decode the response - * `:multipart` - Whether to send the query as multipart/form-data - """ - @type query_option :: - common_option - | {:command, Ch.Query.command()} - | {:headers, [{String.t(), String.t()}]} - | {:format, String.t()} - | {:types, [String.t() | atom | tuple]} - # TODO remove - | {:encode, boolean} - | {:decode, boolean} - | {:multipart, boolean} - | DBConnection.connection_option() - - @doc """ - Runs a query and returns the result as `{:ok, %Ch.Result{}}` or - `{:error, Exception.t()}` if there was a database error. - - See `t:query_option/0` for available options. - """ - @spec query(DBConnection.conn(), iodata, params, [query_option]) :: - {:ok, Result.t()} | {:error, Exception.t()} - when params: map | [term] | [row :: [term]] | iodata | Enumerable.t() - def query(conn, statement, params \\ [], opts \\ []) do - query = Query.build(statement, opts) - - with {:ok, _query, result} <- DBConnection.execute(conn, query, params, opts) do - {:ok, result} - end - end - - @doc """ - Runs a query and returns the result or raises `Ch.Error` if - there was an error. See `query/4`. - """ - @spec query!(DBConnection.conn(), iodata, params, [query_option]) :: Result.t() - when params: map | [term] | [row :: [term]] | iodata | Enumerable.t() - def query!(conn, statement, params \\ [], opts \\ []) do - query = Query.build(statement, opts) - DBConnection.execute!(conn, query, params, opts) - end - - @doc false - @spec stream(DBConnection.t(), iodata, map | [term], [query_option]) :: Ch.Stream.t() - def stream(conn, statement, params \\ [], opts \\ []) do - query = Query.build(statement, opts) - %Ch.Stream{conn: conn, query: query, params: params, opts: opts} - end - - # TODO drop - @doc false - @spec run(DBConnection.conn(), (DBConnection.t() -> any), Keyword.t()) :: any - def run(conn, f, opts \\ []) when is_function(f, 1) do - DBConnection.run(conn, f, opts) - end if Code.ensure_loaded?(Ecto.ParameterizedType) do @behaviour Ecto.ParameterizedType - @impl true + @impl Ecto.ParameterizedType def type(params), do: {:parameterized, {Ch, params}} - @impl true + @impl Ecto.ParameterizedType def init(opts) do clickhouse_type = opts[:raw] || opts[:type] || @@ -136,13 +16,13 @@ defmodule Ch do Ch.Types.decode(clickhouse_type) end - @impl true + @impl Ecto.ParameterizedType def load(value, _loader, _params), do: {:ok, value} - @impl true + @impl Ecto.ParameterizedType def dump(value, _dumper, _params), do: {:ok, value} - @impl true + @impl Ecto.ParameterizedType def cast(value, :string = type), do: Ecto.Type.cast(type, value) def cast(value, :boolean = type), do: Ecto.Type.cast(type, value) def cast(value, :uuid), do: Ecto.Type.cast(Ecto.UUID, value) @@ -206,7 +86,10 @@ defmodule Ch do {:ok, value} _ when is_binary(value) -> - with {:error = e, _reason} <- :inet.parse_ipv4_address(to_charlist(value)), do: e + with {:error = e, _reason} <- + value |> String.to_charlist() |> :inet.parse_ipv4_address() do + e + end _ when is_list(value) -> with {:error = e, _reason} <- :inet.parse_ipv4_address(value), do: e @@ -224,7 +107,10 @@ defmodule Ch do {:ok, value} _ when is_binary(value) -> - with {:error = e, _reason} <- :inet.parse_ipv6_address(to_charlist(value)), do: e + with {:error = e, _reason} <- + value |> String.to_charlist() |> :inet.parse_ipv6_address() do + e + end _ when is_list(value) -> with {:error = e, _reason} <- :inet.parse_ipv6_address(value), do: e @@ -262,7 +148,7 @@ defmodule Ch do end end - defp cast_tuple([], [], acc), do: {:ok, List.to_tuple(:lists.reverse(acc))} + defp cast_tuple([], [], acc), do: {:ok, acc |> :lists.reverse() |> List.to_tuple()} defp cast_tuple(_types, _values, _acc), do: :error defp cast_map(value, key_type, value_type) when is_map(value) do @@ -294,13 +180,13 @@ defmodule Ch do defp cast_variant([], _value), do: :error - @impl true + @impl Ecto.ParameterizedType def embed_as(_, _), do: :self - @impl true + @impl Ecto.ParameterizedType def equal?(a, b, _), do: a == b - @impl true + @impl Ecto.ParameterizedType def format(params) do "#Ch<#{Ch.Types.encode(params)}>" end diff --git a/lib/ch/connection.ex b/lib/ch/connection.ex deleted file mode 100644 index b53394ab..00000000 --- a/lib/ch/connection.ex +++ /dev/null @@ -1,527 +0,0 @@ -defmodule Ch.Connection do - @moduledoc false - use DBConnection - require Logger - alias Ch.{Error, Query, Result, RowBinary} - alias Mint.HTTP1, as: HTTP - - @user_agent "ch/" <> Mix.Project.config()[:version] - - @typep conn :: HTTP.t() - - @impl true - @spec connect([Ch.start_option()]) :: {:ok, conn} | {:error, Error.t() | Mint.Types.error()} - def connect(opts) do - scheme = String.to_existing_atom(opts[:scheme] || "http") - address = opts[:hostname] || "localhost" - port = opts[:port] || 8123 - mint_opts = [mode: :passive] ++ Keyword.take(opts, [:hostname, :transport_opts]) - - with {:ok, conn} <- HTTP.connect(scheme, address, port, mint_opts) do - conn = - conn - |> HTTP.put_private(:timeout, opts[:timeout] || :timer.seconds(15)) - |> maybe_put_private(:database, opts[:database]) - |> maybe_put_private(:username, opts[:username]) - |> maybe_put_private(:password, opts[:password]) - |> maybe_put_private(:settings, opts[:settings]) - - handshake = Query.build("select 1, version()") - params = DBConnection.Query.encode(handshake, _params = [], _opts = []) - - case handle_execute(handshake, params, _opts = [], conn) do - {:ok, handshake, responses, conn} -> - case DBConnection.Query.decode(handshake, responses, _opts = []) do - %Result{rows: [[1, version]]} -> - conn = - if parse_version(version) >= parse_version("24.10") do - settings = - HTTP.get_private(conn, :settings, []) - |> Keyword.put_new(:input_format_binary_read_json_as_string, 1) - |> Keyword.put_new(:output_format_binary_write_json_as_string, 1) - - HTTP.put_private(conn, :settings, settings) - else - conn - end - - {:ok, conn} - - result -> - {:ok, _conn} = HTTP.close(conn) - reason = Error.exception("unexpected result for '#{handshake}': #{inspect(result)}") - {:error, reason} - end - - {:error, reason, conn} -> - {:ok, _conn} = HTTP.close(conn) - {:error, reason} - - {disconnect, reason, conn} when disconnect in [:disconnect, :disconnect_and_retry] -> - {:ok, _conn} = HTTP.close(conn) - {:error, reason} - end - end - catch - _kind, reason -> {:error, reason} - end - - defp parse_version(version) do - version - |> String.split(".") - |> Enum.flat_map(fn segment -> - case Integer.parse(segment) do - {int, _rest} -> [int] - :error -> [] - end - end) - end - - @impl true - @spec ping(conn) :: {:ok, conn} | {:disconnect, Mint.Types.error() | Error.t(), conn} - def ping(conn) do - headers = [{"user-agent", @user_agent}] - - case request(conn, "GET", "/ping", headers, _body = "", _opts = []) do - {:ok, conn, _response} -> {:ok, conn} - {:error, error, conn} -> {:disconnect, error, conn} - {:disconnect, _error, _conn} = disconnect -> disconnect - end - end - - @impl true - @spec checkout(conn) :: {:ok, conn} - def checkout(conn), do: {:ok, conn} - - # we "support" these four tx callbacks for Repo.checkout - # even though ClickHouse doesn't support txs - - @impl true - def handle_begin(_opts, conn), do: {:ok, %{}, conn} - @impl true - def handle_commit(_opts, conn), do: {:ok, %{}, conn} - @impl true - def handle_rollback(_opts, conn), do: {:ok, %{}, conn} - @impl true - def handle_status(_opts, conn), do: {:idle, conn} - - @impl true - def handle_prepare(_query, _opts, conn) do - {:error, Error.exception("prepared statements are not supported"), conn} - end - - @impl true - def handle_close(_query, _opts, conn) do - {:error, Error.exception("prepared statements are not supported"), conn} - end - - @impl true - def handle_declare(query, params, opts, conn) do - %Query{command: command, decode: decode} = query - {query_params, extra_headers, body} = params - - path = path(conn, query_params, opts) - headers = headers(conn, extra_headers, opts) - timeout = timeout(conn, opts) - - with {:ok, conn, _ref} <- send_request(conn, "POST", path, headers, body), - {:ok, conn, columns, headers, reader} <- recv_declare(conn, decode, timeout) do - result = %Result{ - command: command, - columns: columns, - rows: [], - num_rows: 0, - headers: headers, - data: [] - } - - {:ok, query, result, {conn, reader}} - else - {:error, _reason, _conn} = client_error -> client_error - {:disconnect, reason, conn} -> {:disconnect_and_retry, reason, conn} - end - end - - defp recv_declare(conn, decode, timeout) do - acc = %{decode: decode, step: :status, buffer: [], headers: []} - recv_declare_continue(conn, acc, timeout) - end - - defp recv_declare_continue(conn, acc, timeout) do - case HTTP.recv(conn, 0, timeout) do - {:ok, conn, responses} -> - case handle_recv_declare(responses, acc) do - {:ok, columns, headers, reader} -> - {:ok, conn, columns, headers, reader} - - {:more, acc} -> - recv_declare_continue(conn, acc, timeout) - - :error -> - all_responses_result = - case handle_all_responses(responses, []) do - {:ok, responses} -> {:ok, conn, responses} - {:more, acc} -> recv_all(conn, acc, timeout) - end - - with {:ok, conn, responses} <- all_responses_result do - [_status, headers | data] = responses - message = IO.iodata_to_binary(data) - - code = - if code = get_header(headers, "x-clickhouse-exception-code") do - String.to_integer(code) - end - - {:error, Error.exception(code: code, message: message), conn} - end - end - - {:error, conn, error, _responses} -> - {:disconnect, error, conn} - end - end - - defp handle_recv_declare([{:status, _ref, status} | responses], %{step: :status} = acc) do - case status do - 200 -> handle_recv_declare(responses, %{acc | step: :headers}) - _other -> :error - end - end - - defp handle_recv_declare([{:headers, _ref, headers} | responses], %{step: :headers} = acc) do - with %{decode: true} <- acc, - "RowBinaryWithNamesAndTypes" <- get_header(headers, "x-clickhouse-format") do - handle_recv_declare(responses, %{acc | headers: headers, step: :columns}) - else - _ -> - reader = %{decode: false, responses: responses} - {:ok, _columns = nil, headers, reader} - end - end - - defp handle_recv_declare([{:data, _ref, data} | responses], %{step: :columns} = acc) do - buffer = maybe_concat_buffer(acc.buffer, data) - - case RowBinary.decode_header(buffer) do - {:ok, names, types, buffer} -> - reader = %{buffer: buffer, types: types, state: nil, responses: responses} - {:ok, names, acc.headers, reader} - - :more -> - handle_recv_declare(responses, %{acc | buffer: buffer}) - end - end - - defp handle_recv_declare([], acc), do: {:more, acc} - - @compile inline: [maybe_concat_buffer: 2] - defp maybe_concat_buffer("", data), do: data - defp maybe_concat_buffer(buffer, data) when is_binary(buffer), do: buffer <> data - defp maybe_concat_buffer([], data), do: data - - @impl true - def handle_fetch(query, %Result{} = result, opts, {conn, reader}) do - case reader do - %{responses: []} -> - handle_fetch_recv(query, result, opts, conn, reader) - - %{decode: false, responses: responses} -> - case responses do - [{:data, _ref, data} | responses] -> - result = %Result{result | data: data} - reader = %{reader | responses: responses} - {:cont, result, {conn, reader}} - - [{:done, _ref}] -> - reader = %{reader | responses: []} - {:halt, result, {conn, reader}} - end - - %{buffer: buffer, types: types, state: state, responses: responses} -> - case responses do - [{:data, _ref, data} | responses] -> - buffer = maybe_concat_buffer(buffer, data) - {rows, buffer, state} = RowBinary.decode_rows_continue(buffer, types, state) - result = %Result{result | data: data, rows: rows, num_rows: length(rows)} - reader = %{reader | buffer: buffer, state: state, responses: responses} - {:cont, result, {conn, reader}} - - [{:done, _ref}] -> - reader = %{reader | responses: []} - {:halt, result, {conn, reader}} - end - end - end - - defp handle_fetch_recv(query, result, opts, conn, reader) do - timeout = timeout(conn, opts) - - case HTTP.recv(conn, 0, timeout) do - {:ok, conn, responses} -> - reader = %{reader | responses: responses} - handle_fetch(query, result, opts, {conn, reader}) - - {:error, conn, reason, _responses} -> - {:disconnect, reason, conn} - end - end - - @impl true - def handle_deallocate(_query, %Result{} = result, _opts, {conn, _reader}) do - case HTTP.open_request_count(conn) do - 0 -> - {:ok, %{result | data: []}, conn} - - 1 -> - error = - Error.exception("stopping stream before receiving full response by closing connection") - - {:disconnect, error, conn} - end - end - - @impl true - def handle_execute(%Query{} = query, {:stream, params}, opts, conn) do - {query_params, extra_headers, body} = params - - path = path(conn, query_params, opts) - headers = headers(conn, extra_headers, opts) - - with {:ok, conn, ref} <- send_request(conn, "POST", path, headers, :stream) do - case HTTP.stream_request_body(conn, ref, body) do - {:ok, conn} -> {:ok, query, ref, conn} - {:error, conn, reason} -> {:disconnect_and_retry, reason, conn} - end - end - end - - def handle_execute(%Query{} = query, {:stream, ref, body}, opts, conn) do - case HTTP.stream_request_body(conn, ref, body) do - {:ok, conn} -> - case body do - :eof -> - with {:ok, conn, responses} <- receive_full_response(conn, timeout(conn, opts)) do - {:ok, query, responses, conn} - end - - _other -> - {:ok, query, ref, conn} - end - - {:error, conn, reason} -> - {:disconnect_and_retry, reason, conn} - end - end - - def handle_execute(%Query{command: :insert} = query, params, opts, conn) do - {query_params, extra_headers, body} = params - - path = path(conn, query_params, opts) - headers = headers(conn, extra_headers, opts) - - result = - if is_function(body, 2) do - request_chunked(conn, "POST", path, headers, body, opts) - else - request(conn, "POST", path, headers, body, opts) - end - - case result do - {:ok, conn, responses} -> {:ok, query, responses, conn} - {:error, _reason, _conn} = client_error -> client_error - {:disconnect, reason, conn} -> {:disconnect_and_retry, reason, conn} - end - end - - def handle_execute(query, params, opts, conn) do - {query_params, extra_headers, body} = params - - path = path(conn, query_params, opts) - headers = headers(conn, extra_headers, opts) - - case request(conn, "POST", path, headers, body, opts) do - {:ok, conn, responses} -> {:ok, query, responses, conn} - {:error, _reason, _conn} = client_error -> client_error - {:disconnect, reason, conn} -> {:disconnect_and_retry, reason, conn} - end - end - - @impl true - def disconnect(error, {conn, _reader}) do - disconnect(error, conn) - end - - def disconnect(_error, conn) do - {:ok = ok, _conn} = HTTP.close(conn) - ok - end - - @typep response :: Mint.Types.status() | Mint.Types.headers() | binary - - @spec request(conn, binary, binary, Mint.Types.headers(), iodata, [Ch.query_option()]) :: - {:ok, conn, [response]} - | {:error, Error.t(), conn} - | {:disconnect, Mint.Types.error(), conn} - defp request(conn, method, path, headers, body, opts) do - with {:ok, conn, _ref} <- send_request(conn, method, path, headers, body) do - receive_full_response(conn, timeout(conn, opts)) - end - end - - @spec request_chunked(conn, binary, binary, Mint.Types.headers(), Enumerable.t(), Keyword.t()) :: - {:ok, conn, [response]} - | {:error, Error.t(), conn} - | {:disconnect, Mint.Types.error(), conn} - def request_chunked(conn, method, path, headers, stream, opts) do - with {:ok, conn, ref} <- send_request(conn, method, path, headers, :stream), - {:ok, conn} <- stream_body(conn, ref, stream), - do: receive_full_response(conn, timeout(conn, opts)) - end - - @spec stream_body(conn, Mint.Types.request_ref(), Enumerable.t()) :: - {:ok, conn} | {:disconnect, Mint.Types.error(), conn} - defp stream_body(conn, ref, stream) do - result = - stream - |> Stream.concat([:eof]) - |> Enum.reduce_while({:ok, conn}, fn - chunk, {:ok, conn} -> {:cont, HTTP.stream_request_body(conn, ref, chunk)} - _chunk, {:error, _conn, _reason} = error -> {:halt, error} - end) - - case result do - {:ok, _conn} = ok -> ok - {:error, conn, reason} -> {:disconnect, reason, conn} - end - end - - # stacktrace is a bit cleaner with this function inlined - @compile inline: [send_request: 5] - defp send_request(conn, method, path, headers, body) do - case HTTP.request(conn, method, path, headers, body) do - {:ok, _conn, _ref} = ok -> ok - {:error, conn, reason} -> {:disconnect, reason, conn} - end - end - - @spec receive_full_response(conn, timeout) :: - {:ok, conn, [response]} - | {:error, Error.t(), conn} - | {:disconnect, Mint.Types.error(), conn} - defp receive_full_response(conn, timeout) do - with {:ok, conn, responses} <- recv_all(conn, [], timeout) do - case responses do - [200, headers | _rest] -> - conn = ensure_same_server(conn, headers) - {:ok, conn, responses} - - [_status, headers | data] -> - message = IO.iodata_to_binary(data) - - code = - if code = get_header(headers, "x-clickhouse-exception-code") do - String.to_integer(code) - end - - {:error, Error.exception(code: code, message: message), conn} - end - end - end - - @spec recv_all(conn, [response], timeout()) :: - {:ok, conn, [response]} | {:disconnect, Mint.Types.error(), conn} - defp recv_all(conn, acc, timeout) do - case HTTP.recv(conn, 0, timeout) do - {:ok, conn, responses} -> - case handle_all_responses(responses, acc) do - {:ok, responses} -> {:ok, conn, responses} - {:more, acc} -> recv_all(conn, acc, timeout) - end - - {:error, conn, reason, _responses} -> - {:disconnect, reason, conn} - end - end - - for tag <- [:data, :status, :headers] do - defp handle_all_responses([{unquote(tag), _ref, data} | rest], acc) do - handle_all_responses(rest, [data | acc]) - end - end - - defp handle_all_responses([{:done, _ref}], acc), do: {:ok, :lists.reverse(acc)} - defp handle_all_responses([], acc), do: {:more, acc} - - defp maybe_put_private(conn, _k, nil), do: conn - defp maybe_put_private(conn, k, v), do: HTTP.put_private(conn, k, v) - - defp timeout(conn), do: HTTP.get_private(conn, :timeout) - defp timeout(conn, opts), do: Keyword.get(opts, :timeout) || timeout(conn) - - defp settings(conn, opts) do - default_settings = HTTP.get_private(conn, :settings, []) - opts_settings = Keyword.get(opts, :settings, []) - Keyword.merge(default_settings, opts_settings) - end - - defp headers(conn, extra_headers, opts) do - extra_headers - |> maybe_put_new_header("x-clickhouse-user", get_opts_or_private(conn, opts, :username)) - |> maybe_put_new_header("x-clickhouse-key", get_opts_or_private(conn, opts, :password)) - |> maybe_put_new_header("x-clickhouse-database", get_opts_or_private(conn, opts, :database)) - |> maybe_put_new_header("user-agent", @user_agent) - end - - defp get_opts_or_private(conn, opts, key) do - Keyword.get(opts, key) || HTTP.get_private(conn, key) - end - - defp maybe_put_new_header(headers, _name, _no_value = nil), do: headers - - defp maybe_put_new_header(headers, name, value) do - if List.keymember?(headers, name, 0) do - headers - else - [{name, value} | headers] - end - end - - defp get_header(headers, key) do - case List.keyfind(headers, key, 0) do - {_, value} -> value - nil = not_found -> not_found - end - end - - defp path(conn, query_params, opts) do - settings = settings(conn, opts) - "/?" <> URI.encode_query(settings ++ query_params) - end - - @server_display_name_key :server_display_name - - @spec ensure_same_server(conn, Mint.Types.headers()) :: conn - defp ensure_same_server(conn, headers) do - expected_name = HTTP.get_private(conn, @server_display_name_key) - actual_name = get_header(headers, "x-clickhouse-server-display-name") - - cond do - expected_name && actual_name -> - unless actual_name == expected_name do - Logger.warning( - "Server mismatch detected. Expected #{inspect(expected_name)} but got #{inspect(actual_name)}!" <> - " Connection pooling might be unstable." - ) - end - - conn - - actual_name -> - HTTP.put_private(conn, @server_display_name_key, actual_name) - - true -> - conn - end - end -end diff --git a/lib/ch/pool.ex b/lib/ch/pool.ex new file mode 100644 index 00000000..1f3f0753 --- /dev/null +++ b/lib/ch/pool.ex @@ -0,0 +1,369 @@ +defmodule Ch.Pool do + @moduledoc """ + TODO + """ + + @behaviour NimblePool + + @type statement :: iodata + @type params :: %{String.t() => term} + + @pool_size 10 + @worker_idle_timeout to_timeout(second: 5) + @query_timeout to_timeout(second: 30) + + # TODO + @type query_result :: term + @type query_error :: Ch.Error.t() | Mint.Types.error() + + # TODO nimble options, todo can pass settings + @spec start_link(keyword) :: GenServer.on_start() + def start_link(options) do + {name, options} = Keyword.pop(options, :name) + {pool_size, options} = Keyword.pop(options, :pool_size, @pool_size) + + {worker_idle_timeout, options} = + Keyword.pop(options, :worker_idle_timeout, @worker_idle_timeout) + + NimblePool.start_link( + worker: {__MODULE__, options}, + pool_size: pool_size, + worker_idle_timeout: worker_idle_timeout, + lazy: true, + name: name + ) + end + + @spec child_spec(keyword) :: Supervisor.child_spec() + def child_spec(options) do + options + |> Keyword.put(:worker, {__MODULE__, options}) + |> NimblePool.child_spec() + end + + @spec query(NimblePool.pool(), statement, params, keyword) :: + {:ok, query_result} | {:error, query_error} + def query(pool, statement, params \\ %{}, options \\ []) do + request = encode_request("POST", statement, params, options) + + {timeout, options} = Keyword.pop(options, :timeout, @query_timeout) + deadline = deadline_from_timeout(timeout) + + # TODO retry on closed + result = + NimblePool.checkout!( + pool, + :request, + fn {pid, _ref}, conn -> + # TODO what if caller dies? does nimble pool terminate the worker? probably + # TODO retry transient closed/etc. errors? + with {:ok, conn} <- ensure_connected(conn, pid, deadline), + {:ok, conn, response} <- request(conn, request, deadline) do + {{:ok, response}, checkin(conn)} + else + {:error, reason} = error -> {error, {:remove, reason}} + end + end, + timeout_from_deadline(deadline) + ) + + with {:ok, response} <- result do + decode_response(response, options) + end + end + + @spec query!(NimblePool.pool(), statement, params, keyword) :: query_result + def query!(pool, statement, params \\ %{}, options \\ []) do + case query(pool, statement, params, options) do + {:ok, result} -> result + {:error, error} -> raise error + end + end + + @spec stop(NimblePool.pool(), reason :: term, timeout) :: :ok + def stop(pool, reason \\ :normal, timeout \\ :infinity) do + NimblePool.stop(pool, reason, timeout) + end + + @impl NimblePool + def init_pool(options) do + scheme = Keyword.fetch!(options, :scheme) + host = Keyword.fetch!(options, :host) + port = Keyword.fetch!(options, :port) + + transport_options = + options + |> Keyword.get(:transport_options, []) + |> Keyword.put(:mode, :passive) + + config = %{ + scheme: scheme, + host: host, + port: port, + transport_options: transport_options + } + + {:ok, config} + end + + @impl NimblePool + def init_worker(config) do + %{scheme: scheme, host: host, port: port, transport_options: options} = config + {:ok, {:idle, scheme, host, port, options}, config} + end + + @impl NimblePool + def handle_checkout(:request, _from, conn, config) do + {:ok, conn, conn, config} + end + + @impl NimblePool + def handle_checkin({:ok, conn}, _from, _conn, config) do + {:ok, {:connected, conn}, config} + end + + def handle_checkin({:remove, reason}, _from, _conn, config) do + {:remove, reason, config} + end + + @impl NimblePool + def handle_ping(_conn, _config) do + {:remove, :idle_timeout} + end + + # TODO handle_info + + @impl NimblePool + def terminate_worker(_reason, conn, config) do + with {:connected, conn} <- conn, do: Mint.HTTP1.close(conn) + {:ok, config} + end + + defp deadline_from_timeout(:infinity = inf), do: inf + + defp deadline_from_timeout(timeout) do + System.monotonic_time(:millisecond) + timeout + end + + defp timeout_from_deadline(:infinity = inf), do: inf + + defp timeout_from_deadline(deadline) do + max(0, deadline - System.monotonic_time(:millisecond)) + end + + defp ensure_connected({:idle, scheme, host, port, options}, owner, deadline) do + timeout = timeout_from_deadline(deadline) + options = Keyword.put(options, :timeout, timeout) + + case Mint.HTTP1.connect(scheme, host, port, options) do + {:ok, conn} -> + case Mint.HTTP1.controlling_process(conn, owner) do + {:ok, _conn} = ok -> + ok + + {:error, _reason} = error -> + Mint.HTTP1.close(conn) + error + end + + {:error, _reason} = error -> + error + end + end + + defp ensure_connected({:connected, conn}, _owner, _deadline), do: {:ok, conn} + + defp encode_request(method, statement, params, options) do + settings = Keyword.get(options, :settings, []) + + headers = + options + |> Keyword.get(:headers, []) + |> put_new_header("x-clickhouse-format", "RowBinaryWithNamesAndTypes") + + path = "/?" <> URI.encode_query(settings ++ encode_params(params)) + %{method: method, path: path, headers: headers, body: statement} + end + + defp request(conn, request, deadline) do + %{method: method, path: path, headers: headers, body: body} = request + + case Mint.HTTP1.request(conn, method, path, headers, body) do + {:ok, conn, _ref} -> + receive_response(conn, [], deadline) + + {:error, conn, reason} -> + _todo = Mint.HTTP1.close(conn) + {:error, reason} + end + end + + defp receive_response(conn, acc, deadline) do + timeout = timeout_from_deadline(deadline) + + case Mint.HTTP1.recv(conn, 0, timeout) do + {:ok, conn, fragments} -> + case handle_response_fragments(fragments, acc) do + {:ok, response} -> {:ok, conn, response} + {:more, acc} -> receive_response(conn, acc, deadline) + end + + {:error, conn, reason, _fragments} -> + _todo = Mint.HTTP1.close(conn) + {:error, reason} + end + end + + for tag <- [:data, :status, :headers] do + defp handle_response_fragments([{unquote(tag), _ref, data} | rest], acc) do + handle_response_fragments(rest, [data | acc]) + end + end + + defp handle_response_fragments([{:done, _ref}], acc), do: {:ok, :lists.reverse(acc)} + defp handle_response_fragments([], acc), do: {:more, acc} + + defp checkin(conn) do + if Mint.HTTP1.open?(conn) do + {:ok, conn} + else + {:remove, Mint.TransportError.exception(reason: :closed)} + end + end + + defp decode_response(response, _options) do + case response do + [200, headers | data] -> + result = + case get_header(headers, "x-clickhouse-format") do + "RowBinaryWithNamesAndTypes" -> + [names | rows] = + data + |> IO.iodata_to_binary() + |> Ch.RowBinary.decode_names_and_rows() + + %{columns: names, rows: rows} + + _other -> + %{data: data} + end + + {:ok, result} + + [_status, headers | data] -> + message = IO.iodata_to_binary(data) + + code = + if code = get_header(headers, "x-clickhouse-exception-code") do + String.to_integer(code) + end + + {:error, Ch.Error.exception(code: code, message: message)} + end + end + + defp put_new_header(headers, name, value) do + if List.keymember?(headers, name, 0) do + headers + else + [{name, value} | headers] + end + end + + defp get_header(headers, key) do + case List.keyfind(headers, key, 0) do + {_, value} -> value + nil = not_found -> not_found + end + end + + defp encode_params(params) when is_map(params) do + Enum.map(params, fn {k, v} -> {"param_#{k}", encode_param(v)} end) + end + + defp encode_param(n) when is_integer(n), do: Integer.to_string(n) + defp encode_param(f) when is_float(f), do: Float.to_string(f) + + defp encode_param(b) when is_binary(b) do + escape_param([{"\\", "\\\\"}, {"\t", "\\\t"}, {"\n", "\\\n"}], b) + end + + defp encode_param(b) when is_boolean(b), do: Atom.to_string(b) + defp encode_param(nil), do: "\\N" + defp encode_param(%Decimal{} = d), do: Decimal.to_string(d, :normal) + defp encode_param(%Date{} = date), do: Date.to_iso8601(date) + defp encode_param(%NaiveDateTime{} = naive), do: NaiveDateTime.to_iso8601(naive) + defp encode_param(%Time{} = time), do: Time.to_iso8601(time) + + defp encode_param(%DateTime{microsecond: microsecond} = dt) do + dt = DateTime.shift_zone!(dt, "Etc/UTC") + + case microsecond do + {val, precision} when val > 0 and precision > 0 -> + size = round(:math.pow(10, precision)) + unix = DateTime.to_unix(dt, size) + seconds = div(unix, size) + fractional = rem(unix, size) + + IO.iodata_to_binary([ + Integer.to_string(seconds), + ?., + String.pad_leading(Integer.to_string(fractional), precision, "0") + ]) + + _ -> + dt |> DateTime.to_unix(:second) |> Integer.to_string() + end + end + + defp encode_param(tuple) when is_tuple(tuple) do + IO.iodata_to_binary([?(, encode_array_params(Tuple.to_list(tuple)), ?)]) + end + + defp encode_param(a) when is_list(a) do + IO.iodata_to_binary([?[, encode_array_params(a), ?]]) + end + + defp encode_param(m) when is_map(m) do + IO.iodata_to_binary([?{, encode_map_params(Map.to_list(m)), ?}]) + end + + defp encode_array_params([last]), do: encode_array_param(last) + + defp encode_array_params([s | rest]) do + [encode_array_param(s), ?, | encode_array_params(rest)] + end + + defp encode_array_params([] = empty), do: empty + + defp encode_map_params([last]), do: encode_map_param(last) + + defp encode_map_params([kv | rest]) do + [encode_map_param(kv), ?, | encode_map_params(rest)] + end + + defp encode_map_params([] = empty), do: empty + + defp encode_array_param(s) when is_binary(s) do + [?', escape_param([{"'", "''"}, {"\\", "\\\\"}], s), ?'] + end + + defp encode_array_param(nil), do: "null" + + defp encode_array_param(%s{} = param) when s in [Date, NaiveDateTime] do + [?', encode_param(param), ?'] + end + + defp encode_array_param(v), do: encode_param(v) + + defp encode_map_param({k, v}) do + [encode_array_param(k), ?:, encode_array_param(v)] + end + + defp escape_param([{pattern, replacement} | escapes], param) do + param = String.replace(param, pattern, replacement) + escape_param(escapes, param) + end + + defp escape_param([], param), do: param +end diff --git a/lib/ch/query.ex b/lib/ch/query.ex deleted file mode 100644 index 37e05966..00000000 --- a/lib/ch/query.ex +++ /dev/null @@ -1,420 +0,0 @@ -defmodule Ch.Query do - @moduledoc "Query struct wrapping the SQL statement." - defstruct [:statement, :command, :encode, :decode, :multipart] - - @typedoc """ - The Query struct. - - ## Fields - - * `:statement` - The SQL statement to be executed (as `t:iodata/0`). - * `:command` - The detected or enforced SQL command type (e.g., `:select`, `:insert`). - * `:encode` - Whether to encode parameters (defaults to `true`). - * `:decode` - Whether to decode the response (defaults to `true`). - * `:multipart` - Whether to use `multipart/form-data` for the request (defaults to `false`). - """ - @type t :: %__MODULE__{ - statement: iodata, - command: command, - encode: boolean, - decode: boolean, - multipart: boolean - } - - @doc false - @spec build(iodata, [Ch.query_option()]) :: t - def build(statement, opts \\ []) do - command = Keyword.get(opts, :command) || extract_command(statement) - encode = Keyword.get(opts, :encode, true) - decode = Keyword.get(opts, :decode, true) - multipart = Keyword.get(opts, :multipart, false) - - %__MODULE__{ - statement: statement, - command: command, - encode: encode, - decode: decode, - multipart: multipart - } - end - - statements = [ - {"SELECT", :select}, - {"INSERT", :insert}, - {"CREATE", :create}, - {"ALTER", :alter}, - {"DELETE", :delete}, - {"SYSTEM", :system}, - {"SHOW", :show}, - # as of ClickHouse 24.11, WITH is only allowed in SELECT - # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ - {"WITH", :select}, - {"GRANT", :grant}, - {"EXPLAIN", :explain}, - {"REVOKE", :revoke}, - {"UPDATE", :update}, - {"ATTACH", :attach}, - {"CHECK", :check}, - {"DESCRIBE", :describe}, - {"DETACH", :detach}, - {"DROP", :drop}, - {"EXISTS", :exists}, - {"KILL", :kill}, - {"OPTIMIZE", :optimize}, - {"RENAME", :rename}, - {"EXCHANGE", :exchange}, - {"SET", :set}, - {"TRUNCATE", :truncate}, - {"USE", :use}, - {"WATCH", :watch}, - {"MOVE", :move}, - {"UNDROP", :undrop} - ] - - command_union = - statements - |> Enum.map(fn {_, command} -> command end) - |> Enum.reduce(&{:|, [], [&1, &2]}) - - @typedoc """ - Atom representing the type of SQL command. - - Derived automatically from the start of the SQL statement (e.g., `"SELECT ..."` -> `:select`), - or provided explicitly via options. - """ - @type command :: unquote(command_union) - - defp extract_command(statement) - - for {statement, command} <- statements do - defp extract_command(unquote(statement) <> _), do: unquote(command) - defp extract_command(unquote(String.downcase(statement)) <> _), do: unquote(command) - end - - defp extract_command(<>) when whitespace in [?\s, ?\t, ?\n] do - extract_command(rest) - end - - defp extract_command([first_segment | _] = statement) do - extract_command(first_segment) || extract_command(IO.iodata_to_binary(statement)) - end - - defp extract_command(_other), do: nil -end - -defimpl DBConnection.Query, for: Ch.Query do - @dialyzer :no_improper_lists - alias Ch.{Query, Result, RowBinary} - - @spec parse(Query.t(), [Ch.query_option()]) :: Query.t() - def parse(query, _opts), do: query - - @spec describe(Query.t(), [Ch.query_option()]) :: Query.t() - def describe(query, _opts), do: query - - # stream: insert init - @spec encode(Query.t(), {:stream, term}, [Ch.query_option()]) :: - {:stream, {[{String.t(), String.t()}], Mint.Types.headers(), iodata}} - def encode(query, {:stream, params}, opts) do - {:stream, encode(query, params, opts)} - end - - # stream: insert data chunk - @spec encode(Query.t(), {:stream, Mint.Types.request_ref(), iodata | :eof}, [Ch.query_option()]) :: - {:stream, Mint.Types.request_ref(), iodata | :eof} - def encode(_query, {:stream, ref, data}, _opts) do - {:stream, ref, data} - end - - @spec encode(Query.t(), params, [Ch.query_option()]) :: - {query_params, Mint.Types.headers(), body} - when params: map | [term] | [row :: [term]] | iodata | Enumerable.t(), - query_params: [{String.t(), String.t()}], - body: iodata | Enumerable.t() - - def encode(%Query{command: :insert, encode: false, statement: statement}, data, opts) do - body = - case data do - _ when is_list(data) or is_binary(data) -> [statement, ?\n | data] - _ -> Stream.concat([[statement, ?\n]], data) - end - - {_query_params = [], headers(opts), body} - end - - def encode(%Query{command: :insert, statement: statement}, params, opts) do - cond do - names = Keyword.get(opts, :names) -> - types = Keyword.fetch!(opts, :types) - header = RowBinary.encode_names_and_types(names, types) - data = RowBinary.encode_rows(params, types) - {_query_params = [], headers(opts), [statement, ?\n, header | data]} - - format_row_binary?(statement) -> - types = Keyword.fetch!(opts, :types) - data = RowBinary.encode_rows(params, types) - {_query_params = [], headers(opts), [statement, ?\n | data]} - - true -> - {query_params(params), headers(opts), statement} - end - end - - def encode(%Query{multipart: true, statement: statement}, params, opts) do - types = Keyword.get(opts, :types) - default_format = if types, do: "RowBinary", else: "RowBinaryWithNamesAndTypes" - format = Keyword.get(opts, :format) || default_format - - boundary = "ChFormBoundary" <> Base.url_encode64(:crypto.strong_rand_bytes(24)) - content_type = "multipart/form-data; boundary=\"#{boundary}\"" - enc_boundary = "--#{boundary}\r\n" - multipart = multipart_params(params, enc_boundary) - multipart = add_multipart_part(multipart, "query", statement, enc_boundary) - multipart = [multipart | "--#{boundary}--\r\n"] - - {_no_query_params = [], - [{"x-clickhouse-format", format}, {"content-type", content_type} | headers(opts)], multipart} - end - - def encode(%Query{statement: statement}, params, opts) do - types = Keyword.get(opts, :types) - default_format = if types, do: "RowBinary", else: "RowBinaryWithNamesAndTypes" - format = Keyword.get(opts, :format) || default_format - {query_params(params), [{"x-clickhouse-format", format} | headers(opts)], statement} - end - - defp multipart_params(params, boundary) when is_map(params) do - multipart_named_params(Map.to_list(params), boundary, []) - end - - defp multipart_params(params, boundary) when is_list(params) do - multipart_positional_params(params, 0, boundary, []) - end - - defp multipart_named_params([{name, value} | params], boundary, acc) do - acc = - add_multipart_part( - acc, - "param_" <> URI.encode_www_form(name), - encode_param(value), - boundary - ) - - multipart_named_params(params, boundary, acc) - end - - defp multipart_named_params([], _boundary, acc), do: acc - - defp multipart_positional_params([value | params], idx, boundary, acc) do - acc = - add_multipart_part( - acc, - "param_$" <> Integer.to_string(idx), - encode_param(value), - boundary - ) - - multipart_positional_params(params, idx + 1, boundary, acc) - end - - defp multipart_positional_params([], _idx, _boundary, acc), do: acc - - @compile inline: [add_multipart_part: 4] - defp add_multipart_part(multipart, name, value, boundary) do - part = [ - boundary, - "content-disposition: form-data; name=\"", - name, - "\"\r\n\r\n", - value, - "\r\n" - ] - - case multipart do - [] -> part - _ -> [multipart | part] - end - end - - defp format_row_binary?(statement) when is_binary(statement) do - statement |> String.trim_trailing() |> String.ends_with?("RowBinary") - end - - defp format_row_binary?(statement) when is_list(statement) do - statement - |> IO.iodata_to_binary() - |> format_row_binary?() - end - - # stream: select result - @spec decode(Query.t(), result, [Ch.query_option()]) :: result when result: Result.t() - def decode(_query, %Result{} = result, _opts), do: result - # stream: insert result - @spec decode(Query.t(), ref, [Ch.query_option()]) :: ref when ref: Mint.Types.request_ref() - def decode(_query, ref, _opts) when is_reference(ref), do: ref - - @spec decode(Query.t(), [response], [Ch.query_option()]) :: Result.t() - when response: Mint.Types.status() | Mint.Types.headers() | binary - def decode(%Query{command: :insert}, responses, _opts) do - [_status, headers | _data] = responses - - num_rows = - if summary = get_header(headers, "x-clickhouse-summary") do - summary = Jason.decode!(summary) - - if written_rows = Map.get(summary, "written_rows") do - String.to_integer(written_rows) - end - end - - %Result{num_rows: num_rows, rows: nil, command: :insert, headers: headers} - end - - def decode(%Query{decode: false, command: command}, responses, _opts) when is_list(responses) do - # TODO potentially fails on x-progress-headers - [_status, headers | data] = responses - %Result{rows: data, data: data, command: command, headers: headers} - end - - def decode(%Query{command: command}, responses, opts) when is_list(responses) do - # TODO potentially fails on x-progress-headers - [_status, headers | data] = responses - - case get_header(headers, "x-clickhouse-format") do - "RowBinary" -> - types = Keyword.fetch!(opts, :types) - rows = data |> IO.iodata_to_binary() |> RowBinary.decode_rows(types) - %Result{num_rows: length(rows), rows: rows, command: command, headers: headers} - - "RowBinaryWithNamesAndTypes" -> - [names | rows] = data |> IO.iodata_to_binary() |> RowBinary.decode_names_and_rows() - - %Result{ - num_rows: length(rows), - columns: names, - rows: rows, - command: command, - headers: headers - } - - _other -> - %Result{rows: data, data: data, command: command, headers: headers} - end - end - - defp get_header(headers, key) do - case List.keyfind(headers, key, 0) do - {_, value} -> value - nil = not_found -> not_found - end - end - - defp query_params(params) when is_map(params) do - Enum.map(params, fn {k, v} -> {"param_#{k}", encode_param(v)} end) - end - - defp query_params(params) when is_list(params) do - params - |> Enum.with_index() - |> Enum.map(fn {v, idx} -> {"param_$#{idx}", encode_param(v)} end) - end - - defp encode_param(n) when is_integer(n), do: Integer.to_string(n) - defp encode_param(f) when is_float(f), do: Float.to_string(f) - - # TODO possibly speed up - # For more info see - # https://clickhouse.com/docs/en/interfaces/http#tabs-in-url-parameters - # "escaped" format is the same as https://clickhouse.com/docs/en/interfaces/formats#tabseparated-data-formatting - defp encode_param(b) when is_binary(b) do - escape_param([{"\\", "\\\\"}, {"\t", "\\\t"}, {"\n", "\\\n"}], b) - end - - defp encode_param(b) when is_boolean(b), do: Atom.to_string(b) - defp encode_param(nil), do: "\\N" - defp encode_param(%Decimal{} = d), do: Decimal.to_string(d, :normal) - defp encode_param(%Date{} = date), do: Date.to_iso8601(date) - defp encode_param(%NaiveDateTime{} = naive), do: NaiveDateTime.to_iso8601(naive) - defp encode_param(%Time{} = time), do: Time.to_iso8601(time) - - defp encode_param(%DateTime{microsecond: microsecond} = dt) do - dt = DateTime.shift_zone!(dt, "Etc/UTC") - - case microsecond do - {val, precision} when val > 0 and precision > 0 -> - size = round(:math.pow(10, precision)) - unix = DateTime.to_unix(dt, size) - seconds = div(unix, size) - fractional = rem(unix, size) - - IO.iodata_to_binary([ - Integer.to_string(seconds), - ?., - String.pad_leading(Integer.to_string(fractional), precision, "0") - ]) - - _ -> - dt |> DateTime.to_unix(:second) |> Integer.to_string() - end - end - - defp encode_param(tuple) when is_tuple(tuple) do - IO.iodata_to_binary([?(, encode_array_params(Tuple.to_list(tuple)), ?)]) - end - - defp encode_param(a) when is_list(a) do - IO.iodata_to_binary([?[, encode_array_params(a), ?]]) - end - - defp encode_param(m) when is_map(m) do - IO.iodata_to_binary([?{, encode_map_params(Map.to_list(m)), ?}]) - end - - defp encode_array_params([last]), do: encode_array_param(last) - - defp encode_array_params([s | rest]) do - [encode_array_param(s), ?, | encode_array_params(rest)] - end - - defp encode_array_params([] = empty), do: empty - - defp encode_map_params([last]), do: encode_map_param(last) - - defp encode_map_params([kv | rest]) do - [encode_map_param(kv), ?, | encode_map_params(rest)] - end - - defp encode_map_params([] = empty), do: empty - - defp encode_array_param(s) when is_binary(s) do - [?', escape_param([{"'", "''"}, {"\\", "\\\\"}], s), ?'] - end - - defp encode_array_param(nil), do: "null" - - defp encode_array_param(%s{} = param) when s in [Date, NaiveDateTime] do - [?', encode_param(param), ?'] - end - - defp encode_array_param(v), do: encode_param(v) - - defp encode_map_param({k, v}) do - [encode_array_param(k), ?:, encode_array_param(v)] - end - - defp escape_param([{pattern, replacement} | escapes], param) do - param = String.replace(param, pattern, replacement) - escape_param(escapes, param) - end - - defp escape_param([], param), do: param - - @spec headers(Keyword.t()) :: Mint.Types.headers() - defp headers(opts), do: Keyword.get(opts, :headers, []) -end - -defimpl String.Chars, for: Ch.Query do - def to_string(%{statement: statement}) do - IO.iodata_to_binary(statement) - end -end diff --git a/lib/ch/result.ex b/lib/ch/result.ex deleted file mode 100644 index 8d4f0868..00000000 --- a/lib/ch/result.ex +++ /dev/null @@ -1,28 +0,0 @@ -defmodule Ch.Result do - @moduledoc """ - Result struct returned from any successful query. - """ - - defstruct [:command, :num_rows, :columns, :rows, :headers, :data] - - @typedoc """ - The Result struct. - - ## Fields - - * `:command` - An atom of the query command, for example: `:select`, `:insert` - * `:columns` - A list of column names - * `:rows` - A list of lists (each inner list corresponding to a row, each element in the inner list corresponds to a column) - * `:num_rows` - The number of fetched or affected rows - * `:headers` - The HTTP response headers - * `:data` - The raw iodata from the response - """ - @type t :: %__MODULE__{ - command: Ch.Query.command() | nil, - num_rows: non_neg_integer | nil, - columns: [String.t()] | nil, - rows: [[term]] | iodata | nil, - headers: Mint.Types.headers(), - data: iodata - } -end diff --git a/lib/ch/row_binary.ex b/lib/ch/row_binary.ex index ae6c5346..31a19d0b 100644 --- a/lib/ch/row_binary.ex +++ b/lib/ch/row_binary.ex @@ -197,7 +197,7 @@ defmodule Ch.RowBinary do # assuming it can be sent as text and not "native" binary JSON # i.e. assumes `settings: [input_format_binary_read_json_as_string: 1]` # TODO - encode(:string, Jason.encode_to_iodata!(json)) + encode(:string, JSON.encode_to_iodata!(json)) end def encode({:fixed_string, size}, str) when byte_size(str) == size do @@ -886,7 +886,7 @@ defmodule Ch.RowBinary do rows, types ) do - decode_rows(types_rest, bin, [Jason.decode!(s) | row], rows, types) + decode_rows(types_rest, bin, [JSON.decode!(s) | row], rows, types) end end @@ -1461,7 +1461,8 @@ defmodule Ch.RowBinary do dt = case timezone do nil -> - NaiveDateTime.add(@epoch_naive_datetime, s, time_unit) + @epoch_naive_datetime + |> NaiveDateTime.add(s, time_unit) |> truncate(time_unit) "UTC" -> diff --git a/lib/ch/stream.ex b/lib/ch/stream.ex deleted file mode 100644 index 9ec8b5fd..00000000 --- a/lib/ch/stream.ex +++ /dev/null @@ -1,43 +0,0 @@ -defmodule Ch.Stream do - @moduledoc false - - @derive {Inspect, only: []} - defstruct [:conn, :ref, :query, :params, :opts] - - @type t :: %__MODULE__{ - conn: DBConnection.conn(), - ref: Mint.Types.request_ref() | nil, - query: Ch.Query.t(), - params: term, - opts: [Ch.query_option()] - } - - defimpl Enumerable do - def reduce(stream, acc, fun) do - %Ch.Stream{conn: conn, query: query, params: params, opts: opts} = stream - stream = %DBConnection.Stream{conn: conn, query: query, params: params, opts: opts} - DBConnection.reduce(stream, acc, fun) - end - - def member?(_, _), do: {:error, __MODULE__} - def count(_), do: {:error, __MODULE__} - def slice(_), do: {:error, __MODULE__} - end - - defimpl Collectable do - def into(stream) do - %Ch.Stream{conn: conn, query: query, params: params, opts: opts} = stream - ref = DBConnection.execute!(conn, query, {:stream, params}, opts) - {%{stream | ref: ref}, &collect/2} - end - - defp collect(%{conn: conn, query: query, ref: ref} = stream, {:cont, data}) do - ^ref = DBConnection.execute!(conn, query, {:stream, ref, data}) - stream - end - - defp collect(%{conn: conn, query: query, ref: ref}, eof) when eof in [:halt, :done] do - DBConnection.execute!(conn, query, {:stream, ref, :eof}) - end - end -end diff --git a/lib/ch/telemetry.ex b/lib/ch/telemetry.ex new file mode 100644 index 00000000..db78f92f --- /dev/null +++ b/lib/ch/telemetry.ex @@ -0,0 +1,23 @@ +defmodule Ch.Telemetry do + @moduledoc """ + TODO + """ + + @default_handler_id "ch-default-handler" + + # def attach_default_handler do + # :telemetry.attach_many(@default_handler_id, [[]], &__MODULE__.handle_event/4, _no_config = []) + # end + + def detach_default_handler do + :telemetry.detach(@default_handler_id) + end + + @doc false + def handle_event([:ch | event], _measurements, metadata, _config) do + case {event, metadata} do + {[:connect, _stop_or_exception], %{kind: _, reason: _}} -> + :ok + end + end +end diff --git a/lib/ch/types.ex b/lib/ch/types.ex index 0c0c2503..8fc7f3b5 100644 --- a/lib/ch/types.ex +++ b/lib/ch/types.ex @@ -4,7 +4,7 @@ defmodule Ch.Types do """ types = - [ + List.flatten([ {_encoded = "String", _decoded = :string, _args = []}, {"Bool", :boolean, []}, for size <- [8, 16, 32, 64, 128, 256] do @@ -48,8 +48,7 @@ defmodule Ch.Types do {"Polygon", :polygon, []}, {"MultiPolygon", :multipolygon, []}, {"Nothing", :nothing, []} - ] - |> List.flatten() + ]) for {encoded, name, []} <- types do @doc """ diff --git a/mix.exs b/mix.exs index 92a19e1c..194d588b 100644 --- a/mix.exs +++ b/mix.exs @@ -2,68 +2,78 @@ defmodule Ch.MixProject do use Mix.Project @source_url "https://github.com/plausible/ch" - @version "0.7.1" + @version "0.9.0" def project do [ app: :ch, version: @version, - elixir: "~> 1.15", + elixir: "~> 1.18", elixirc_paths: elixirc_paths(Mix.env()), + description: "HTTP ClickHouse driver for Elixir", deps: deps(), + + # Test coverage + test_coverage: [ + ignore_modules: [ + Help + ] + ], + + # Dialyzer + dialyzer: [ + plt_local_path: "plts", + plt_core_path: "plts" + ], + + # Docs name: "Ch", - description: "HTTP ClickHouse driver for Elixir", - docs: docs(), - package: package(), - source_url: @source_url, - dialyzer: [plt_local_path: "plts", plt_core_path: "plts"] + docs: [ + main: "readme", + source_url: @source_url, + source_ref: "v#{@version}", + extras: ["README.md", "CHANGELOG.md"], + skip_undefined_reference_warnings_on: ["CHANGELOG.md"] + ], + + # Hex + package: [ + licenses: ["MIT"], + # TODO add org=plausible, and link to plausible.io? + links: %{"GitHub" => @source_url} + ] ] end # Run "mix help compile.app" to learn about applications. def application do [ - extra_applications: [:logger | extra_applications(Mix.env())] + extra_applications: [:logger] ] end # Specifies which paths to compile per environment. defp elixirc_paths(:test), do: ["lib", "test/support"] + defp elixirc_paths(:bench), do: ["lib", "bench/support"] defp elixirc_paths(_env), do: ["lib"] - defp extra_applications(:test), do: [:inets, :tools] - defp extra_applications(:dev), do: [:tools] - defp extra_applications(_env), do: [] - # Run "mix help deps" to learn about dependencies. defp deps do [ {:mint, "~> 1.0"}, - {:db_connection, "~> 2.9.0"}, - {:jason, "~> 1.0"}, + {:nimble_pool, "~> 1.1"}, + {:nimble_options, "~> 1.1"}, + {:telemetry, "~> 1.4"}, + {:telemetry_docs, "~> 0.1.0"}, {:decimal, "~> 2.0"}, {:ecto, "~> 3.13.0", optional: true}, - {:benchee, "~> 1.0", only: [:bench]}, + {:benchee, "~> 1.0", only: :bench}, {:dialyxir, "~> 1.0", only: [:dev, :test], runtime: false}, {:ex_doc, ">= 0.0.0", only: :docs}, - {:tz, "~> 0.28.1", only: [:test]} - ] - end - - defp docs do - [ - source_url: @source_url, - source_ref: "v#{@version}", - main: "readme", - extras: ["README.md", "CHANGELOG.md"], - skip_undefined_reference_warnings_on: ["CHANGELOG.md"] - ] - end - - defp package do - [ - licenses: ["MIT"], - links: %{"GitHub" => @source_url} + {:tz, "~> 0.28.1", only: :test}, + {:nimble_lz4, "~> 1.1", only: [:dev, :test, :bench]}, + {:stream_data, "~> 1.3", only: :test}, + {:credo, "~> 1.7", only: [:dev, :test]} ] end end diff --git a/mix.lock b/mix.lock index 92957b86..4db0fba9 100644 --- a/mix.lock +++ b/mix.lock @@ -1,6 +1,8 @@ %{ "benchee": {:hex, :benchee, "1.5.0", "4d812c31d54b0ec0167e91278e7de3f596324a78a096fd3d0bea68bb0c513b10", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}, {:statistex, "~> 1.1", [hex: :statistex, repo: "hexpm", optional: false]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "5b075393aea81b8ae74eadd1c28b1d87e8a63696c649d8293db7c4df3eb67535"}, - "db_connection": {:hex, :db_connection, "2.9.0", "a6a97c5c958a2d7091a58a9be40caf41ab496b0701d21e1d1abff3fa27a7f371", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "17d502eacaf61829db98facf6f20808ed33da6ccf495354a41e64fe42f9c509c"}, + "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, + "castore": {:hex, :castore, "1.0.18", "5e43ef0ec7d31195dfa5a65a86e6131db999d074179d2ba5a8de11fe14570f55", [:mix], [], "hexpm", "f393e4fe6317829b158fb74d86eb681f737d2fe326aa61ccf6293c4104957e34"}, + "credo": {:hex, :credo, "1.7.18", "5c5596bf7aedf9c8c227f13272ac499fe8eae6237bd326f2f07dfc173786f042", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "a189d164685fd945809e862fe76a7420c4398fa288d76257662aecb909d6b3e5"}, "decimal": {:hex, :decimal, "2.3.0", "3ad6255aa77b4a3c4f818171b12d237500e63525c2fd056699967a3e7ea20f62", [:mix], [], "hexpm", "a4d66355cb29cb47c3cf30e71329e58361cfcb37c34235ef3bf1d7bf3773aeac"}, "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"}, "dialyxir": {:hex, :dialyxir, "1.4.7", "dda948fcee52962e4b6c5b4b16b2d8fa7d50d8645bbae8b8685c3f9ecb7f5f4d", [:mix], [{:erlex, ">= 0.2.8", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "b34527202e6eb8cee198efec110996c25c5898f43a4094df157f8d28f27d9efe"}, @@ -8,14 +10,26 @@ "ecto": {:hex, :ecto, "3.13.5", "9d4a69700183f33bf97208294768e561f5c7f1ecf417e0fa1006e4a91713a834", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "df9efebf70cf94142739ba357499661ef5dbb559ef902b68ea1f3c1fabce36de"}, "erlex": {:hex, :erlex, "0.2.8", "cd8116f20f3c0afe376d1e8d1f0ae2452337729f68be016ea544a72f767d9c12", [:mix], [], "hexpm", "9d66ff9fedf69e49dc3fd12831e12a8a37b76f8651dd21cd45fcf5561a8a7590"}, "ex_doc": {:hex, :ex_doc, "0.40.1", "67542e4b6dde74811cfd580e2c0149b78010fd13001fda7cfeb2b2c2ffb1344d", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "bcef0e2d360d93ac19f01a85d58f91752d930c0a30e2681145feea6bd3516e00"}, + "file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"}, + "finch": {:hex, :finch, "0.21.0", "b1c3b2d48af02d0c66d2a9ebfb5622be5c5ecd62937cf79a88a7f98d48a8290c", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "87dc6e169794cb2570f75841a19da99cfde834249568f2a5b121b809588a4377"}, "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"}, "makeup_erlang": {:hex, :makeup_erlang, "1.0.3", "4252d5d4098da7415c390e847c814bad3764c94a814a0b4245176215615e1035", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "953297c02582a33411ac6208f2c6e55f0e870df7f80da724ed613f10e6706afd"}, + "mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"}, "mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"}, + "nimble_lz4": {:hex, :nimble_lz4, "1.1.0", "53b87e37f1efc79fda6433ab35563788a628c7d33aef45d16f31a86a399a3cc5", [:mix], [{:rustler, "~> 0.34.0", [hex: :rustler, repo: "hexpm", optional: false]}, {:rustler_precompiled, "~> 0.7.2", [hex: :rustler_precompiled, repo: "hexpm", optional: false]}], "hexpm", "2c1d46eee76c5bbba8d6d3d23c75210dcb509f6698f0a01fb95015bf95f1b6d3"}, + "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, + "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, + "req": {:hex, :req, "0.5.17", "0096ddd5b0ed6f576a03dde4b158a0c727215b15d2795e59e0916c6971066ede", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "0b8bc6ffdfebbc07968e59d3ff96d52f2202d0536f10fef4dc11dc02a2a43e39"}, + "rustler": {:hex, :rustler, "0.34.0", "e9a73ee419fc296a10e49b415a2eb87a88c9217aa0275ec9f383d37eed290c1c", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:req, "~> 0.5", [hex: :req, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "1d0c7449482b459513003230c0e2422b0252245776fe6fd6e41cb2b11bd8e628"}, + "rustler_precompiled": {:hex, :rustler_precompiled, "0.7.3", "42cb9449785cd86c87453e39afdd27a0bdfa5c77a4ec5dc5ce45112e06b9f89b", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "cbc4b3777682e5f6f43ed39b0e0b4a42dccde8053aba91b4514e8f5ff9a5ac6d"}, "statistex": {:hex, :statistex, "1.1.0", "7fec1eb2f580a0d2c1a05ed27396a084ab064a40cfc84246dbfb0c72a5c761e5", [:mix], [], "hexpm", "f5950ea26ad43246ba2cce54324ac394a4e7408fdcf98b8e230f503a0cba9cf5"}, - "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, + "stream_data": {:hex, :stream_data, "1.3.0", "bde37905530aff386dea1ddd86ecbf00e6642dc074ceffc10b7d4e41dfd6aac9", [:mix], [], "hexpm", "3cc552e286e817dca43c98044c706eec9318083a1480c52ae2688b08e2936e3c"}, + "telemetry": {:hex, :telemetry, "1.4.1", "ab6de178e2b29b58e8256b92b382ea3f590a47152ca3651ea857a6cae05ac423", [:rebar3], [], "hexpm", "2172e05a27531d3d31dd9782841065c50dd5c3c7699d95266b2edd54c2dafa1c"}, + "telemetry_docs": {:hex, :telemetry_docs, "0.1.0", "9c95cdfcf34960b6533ff19d56a2ba2f1980aad7f2c3191d3e7ec91a85e6f74c", [:mix], [{:nimble_options, "~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}], "hexpm", "e59ac534d29437d75680a1af9a0d7c67569ffe146c35e47809b9fdabd5e04dc1"}, + "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"}, "tz": {:hex, :tz, "0.28.1", "717f5ffddfd1e475e2a233e221dc0b4b76c35c4b3650b060c8e3ba29dd6632e9", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:mint, "~> 1.6", [hex: :mint, repo: "hexpm", optional: true]}], "hexpm", "bfdca1aa1902643c6c43b77c1fb0cb3d744fd2f09a8a98405468afdee0848c8a"}, } diff --git a/test/ch/aggregation_test.exs b/test/ch/aggregation_test.exs deleted file mode 100644 index 651622c1..00000000 --- a/test/ch/aggregation_test.exs +++ /dev/null @@ -1,214 +0,0 @@ -defmodule Ch.AggregationTest do - use ExUnit.Case, async: true - - setup do - conn = start_supervised!({Ch, database: Ch.Test.database()}) - {:ok, conn: conn} - end - - test "select SimpleAggregateFunction types", %{conn: conn} do - Ch.query!(conn, """ - CREATE TABLE candle_fragments ( - ticker LowCardinality(String), - time DateTime('UTC') CODEC(Delta, Default), - high Float64 CODEC(Delta, Default), - open Float64 CODEC(Delta, Default), - close Float64 CODEC(Delta, Default), - low Float64 CODEC(Delta, Default), - ) ENGINE = MergeTree() - ORDER BY (ticker, time) - """) - - Ch.query!(conn, """ - CREATE MATERIALIZED VIEW candles_one_hour_amt - ( - ticker LowCardinality(String), - time DateTime('UTC') CODEC(Delta, Default), - high SimpleAggregateFunction(max, Float64) CODEC(Delta, Default), - open AggregateFunction(argMin, Float64, DateTime('UTC')), - close AggregateFunction(argMax , Float64, DateTime('UTC')), - low SimpleAggregateFunction(min, Float64) CODEC(Delta, Default) - ) - ENGINE = AggregatingMergeTree() - ORDER BY (ticker, time) - AS - SELECT - t.ticker AS ticker, - toStartOfHour(t.time) AS time, - max(t.high) AS high, - argMinState(t.open, t.time) AS open, - argMaxState(t.close, t.time) AS close, - min(t.low) AS low - FROM candle_fragments t - GROUP BY ticker, time - """) - - Ch.query!(conn, """ - INSERT INTO candle_fragments(ticker, time, high, open, close, low) VALUES - ('INTC', '2023-04-13 20:33:00', 32, 32, 32, 32), - ('INTC', '2023-04-13 20:34:00', 33, 33, 33, 33), - ('INTC', '2023-04-13 20:35:00', 32, 32, 31, 26), - ('INTC', '2023-04-13 20:36:00', 32, 27, 27, 27) - """) - - assert Ch.query!(conn, """ - SELECT - t.ticker AS ticker, - toStartOfHour(t.time) AS start_time, - toStartOfHour(t.time) + interval 1 hour AS end_time, - toStartOfHour(t.time)::DATE AS date, - max(t.high) AS high, - argMinMerge(t.open) AS open, - argMaxMerge(t.close) AS close, - min(t.low) AS low - FROM candles_one_hour_amt t - GROUP BY ticker, time - """).rows == [ - [ - "INTC", - ~U[2023-04-13 20:00:00Z], - ~U[2023-04-13 21:00:00Z], - ~D[2023-04-13], - 33.0, - 32.0, - 27.0, - 26.0 - ] - ] - end - - # based on https://github.com/ClickHouse/clickhouse-java/issues/1232 - test "insert AggregateFunction via input()", %{conn: conn} do - Ch.query!(conn, """ - CREATE TABLE test_insert_aggregate_function ( - uid Int16, - updated SimpleAggregateFunction(max, DateTime), - name AggregateFunction(argMax, String, DateTime) - ) ENGINE AggregatingMergeTree ORDER BY uid - """) - - rows = [ - [1, ~N[2020-01-02 00:00:00], "b"], - [1, ~N[2020-01-01 00:00:00], "a"] - ] - - assert %{num_rows: 2} = - Ch.query!( - conn, - """ - INSERT INTO test_insert_aggregate_function - SELECT uid, updated, arrayReduce('argMaxState', [name], [updated]) - FROM input('uid Int16, updated DateTime, name String') - FORMAT RowBinary\ - """, - rows, - types: ["Int16", "DateTime", "String"] - ) - - assert Ch.query!(conn, """ - SELECT uid, max(updated) AS updated, argMaxMerge(name) - FROM test_insert_aggregate_function - GROUP BY uid - """).rows == [[1, ~N[2020-01-02 00:00:00], "b"]] - end - - # https://kb.altinity.com/altinity-kb-schema-design/ingestion-aggregate-function/ - describe "altinity examples" do - test "ephemeral column", %{conn: conn} do - Ch.query!(conn, """ - CREATE TABLE test_users_ephemeral_column ( - uid Int16, - updated SimpleAggregateFunction(max, DateTime), - name_stub String Ephemeral, - name AggregateFunction(argMax, String, DateTime) DEFAULT arrayReduce('argMaxState', [name_stub], [updated]) - ) ENGINE AggregatingMergeTree ORDER BY uid - """) - - Ch.query!( - conn, - "INSERT INTO test_users_ephemeral_column(uid, updated, name_stub) FORMAT RowBinary", - _rows = [ - [1231, ~N[2020-01-02 00:00:00], "Jane"], - [1231, ~N[2020-01-01 00:00:00], "John"] - ], - types: ["Int16", "DateTime", "String"] - ) - - assert Ch.query!(conn, """ - SELECT uid, max(updated) AS updated, argMaxMerge(name) - FROM test_users_ephemeral_column - GROUP BY uid - """).rows == [[1231, ~N[2020-01-02 00:00:00], "Jane"]] - end - - test "input function", %{conn: conn} do - Ch.query!(conn, """ - CREATE TABLE test_users_input_function ( - uid Int16, - updated SimpleAggregateFunction(max, DateTime), - name AggregateFunction(argMax, String, DateTime) - ) ENGINE AggregatingMergeTree ORDER BY uid - """) - - Ch.query!( - conn, - """ - INSERT INTO test_users_input_function - SELECT uid, updated, arrayReduce('argMaxState', [name], [updated]) - FROM input('uid Int16, updated DateTime, name String') FORMAT RowBinary\ - """, - _rows = [ - [1231, ~N[2020-01-02 00:00:00], "Jane"], - [1231, ~N[2020-01-01 00:00:00], "John"] - ], - types: ["Int16", "DateTime", "String"] - ) - - assert Ch.query!(conn, """ - SELECT uid, max(updated) AS updated, argMaxMerge(name) - FROM test_users_input_function - GROUP BY uid - """).rows == [[1231, ~N[2020-01-02 00:00:00], "Jane"]] - end - - test "materialized view and null engine", %{conn: conn} do - Ch.query!(conn, """ - CREATE TABLE test_users_mv_ne ( - uid Int16, - updated SimpleAggregateFunction(max, DateTime), - name AggregateFunction(argMax, String, DateTime) - ) ENGINE AggregatingMergeTree ORDER BY uid - """) - - Ch.query!(conn, """ - CREATE TABLE test_users_ne ( - uid Int16, - updated DateTime, - name String - ) ENGINE Null - """) - - Ch.query!(conn, """ - CREATE MATERIALIZED VIEW test_users_mv TO test_users_mv_ne AS - SELECT uid, updated, arrayReduce('argMaxState', [name], [updated]) name - FROM test_users_ne - """) - - Ch.query!( - conn, - "INSERT INTO test_users_ne FORMAT RowBinary", - _rows = [ - [1231, ~N[2020-01-02 00:00:00], "Jane"], - [1231, ~N[2020-01-01 00:00:00], "John"] - ], - types: ["Int16", "DateTime", "String"] - ) - - assert Ch.query!(conn, """ - SELECT uid, max(updated) AS updated, argMaxMerge(name) - FROM test_users_mv_ne - GROUP BY uid - """).rows == [[1231, ~N[2020-01-02 00:00:00], "Jane"]] - end - end -end diff --git a/test/ch/connect_test.exs b/test/ch/connect_test.exs deleted file mode 100644 index d21edcb8..00000000 --- a/test/ch/connect_test.exs +++ /dev/null @@ -1,22 +0,0 @@ -defmodule Ch.ConnectTest do - use ExUnit.Case - import ExUnit.CaptureLog - - @tag :slow - test "retries to connect even with exceptions / exits / throws" do - # See https://github.com/plausible/ch/issues/208 - bad_transport_opts = [sndbuf: nil] - - logs = - capture_log(fn -> - {:ok, conn} = - Ch.start_link(database: Ch.Test.database(), transport_opts: bad_transport_opts) - - :timer.sleep(100) - - assert Process.alive?(conn) - end) - - assert logs =~ "failed to connect: ** (ArgumentError) argument error" - end -end diff --git a/test/ch/connection_test.exs b/test/ch/connection_test.exs deleted file mode 100644 index 57c48d31..00000000 --- a/test/ch/connection_test.exs +++ /dev/null @@ -1,1830 +0,0 @@ -defmodule Ch.ConnectionTest do - use ExUnit.Case, parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - - import Ch.Test, - only: [ - parameterize_query: 2, - parameterize_query: 3, - parameterize_query: 4, - parameterize_query!: 2, - parameterize_query!: 3, - parameterize_query!: 4 - ] - - alias Ch.RowBinary - - setup do - {:ok, conn: start_supervised!({Ch, database: Ch.Test.database()})} - end - - test "select without params", ctx do - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select 1") - end - - test "select with types", ctx do - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select 1", [], types: ["UInt8"]) - end - - test "select with params", ctx do - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:UInt8}", %{"a" => 1}) - - assert {:ok, %{num_rows: 1, rows: [[true]]}} = - parameterize_query(ctx, "select {b:Bool}", %{"b" => true}) - - assert {:ok, %{num_rows: 1, rows: [[false]]}} = - parameterize_query(ctx, "select {b:Bool}", %{"b" => false}) - - assert {:ok, %{num_rows: 1, rows: [[nil]]}} = - parameterize_query(ctx, "select {n:Nullable(Nothing)}", %{"n" => nil}) - - assert {:ok, %{num_rows: 1, rows: [[1.0]]}} = - parameterize_query(ctx, "select {a:Float32}", %{"a" => 1.0}) - - assert {:ok, %{num_rows: 1, rows: [["a&b=c"]]}} = - parameterize_query(ctx, "select {a:String}", %{"a" => "a&b=c"}) - - assert {:ok, %{num_rows: 1, rows: [["a\n"]]}} = - parameterize_query(ctx, "select {a:String}", %{"a" => "a\n"}) - - assert {:ok, %{num_rows: 1, rows: [["a\t"]]}} = - parameterize_query(ctx, "select {a:String}", %{"a" => "a\t"}) - - assert {:ok, %{num_rows: 1, rows: [[["a\tb"]]]}} = - parameterize_query(ctx, "select {a:Array(String)}", %{"a" => ["a\tb"]}) - - assert {:ok, %{num_rows: 1, rows: [[[true, false]]]}} = - parameterize_query(ctx, "select {a:Array(Bool)}", %{"a" => [true, false]}) - - assert {:ok, %{num_rows: 1, rows: [[["a", nil, "b"]]]}} = - parameterize_query(ctx, "select {a:Array(Nullable(String))}", %{ - "a" => ["a", nil, "b"] - }) - - assert {:ok, %{num_rows: 1, rows: [row]}} = - parameterize_query(ctx, "select {a:Decimal(9,4)}", %{"a" => Decimal.new("2000.333")}) - - assert row == [Decimal.new("2000.3330")] - - assert {:ok, %{num_rows: 1, rows: [[~D[2022-01-01]]]}} = - parameterize_query(ctx, "select {a:Date}", %{"a" => ~D[2022-01-01]}) - - assert {:ok, %{num_rows: 1, rows: [[~D[2022-01-01]]]}} = - parameterize_query(ctx, "select {a:Date32}", %{"a" => ~D[2022-01-01]}) - - naive_noon = ~N[2022-01-01 12:00:00] - - # datetimes in params are sent in text and ClickHouse translates them to UTC from server timezone by default - # see https://clickhouse.com/docs/en/sql-reference/data-types/datetime - # https://kb.altinity.com/altinity-kb-queries-and-syntax/time-zones/ - assert {:ok, %{num_rows: 1, rows: [[naive_datetime]], headers: headers}} = - parameterize_query(ctx, "select {naive:DateTime}", %{"naive" => naive_noon}) - - # to make this test pass for contributors with non UTC timezone we perform the same steps as ClickHouse - # i.e. we give server timezone to the naive datetime and shift it to UTC before comparing with the result - {_, timezone} = List.keyfind!(headers, "x-clickhouse-timezone", 0) - - assert naive_datetime == - naive_noon - |> DateTime.from_naive!(timezone) - |> DateTime.shift_zone!("Etc/UTC") - |> DateTime.to_naive() - - # when the timezone information is provided in the type, we don't need to rely on server timezone - assert {:ok, %{num_rows: 1, rows: [[bkk_datetime]]}} = - parameterize_query(ctx, "select {$0:DateTime('Asia/Bangkok')}", [naive_noon]) - - assert bkk_datetime == DateTime.from_naive!(naive_noon, "Asia/Bangkok") - - assert {:ok, %{num_rows: 1, rows: [[~U[2022-01-01 12:00:00Z]]]}} = - parameterize_query(ctx, "select {$0:DateTime('UTC')}", [naive_noon]) - - naive_noon_ms = ~N[2022-01-01 12:00:00.123] - - assert {:ok, %{num_rows: 1, rows: [[naive_datetime]]}} = - parameterize_query(ctx, "select {$0:DateTime64(3)}", [naive_noon_ms]) - - assert NaiveDateTime.compare( - naive_datetime, - naive_noon_ms - |> DateTime.from_naive!(timezone) - |> DateTime.shift_zone!("Etc/UTC") - |> DateTime.to_naive() - ) == :eq - - assert {:ok, %{num_rows: 1, rows: [[["a", "b'", "\\'c"]]]}} = - parameterize_query(ctx, "select {a:Array(String)}", %{"a" => ["a", "b'", "\\'c"]}) - - assert {:ok, %{num_rows: 1, rows: [[["a\n", "b\tc"]]]}} = - parameterize_query(ctx, "select {a:Array(String)}", %{"a" => ["a\n", "b\tc"]}) - - assert {:ok, %{num_rows: 1, rows: [[[1, 2, 3]]]}} = - parameterize_query(ctx, "select {a:Array(UInt8)}", %{"a" => [1, 2, 3]}) - - assert {:ok, %{num_rows: 1, rows: [[[[1], [2, 3], []]]]}} = - parameterize_query(ctx, "select {a:Array(Array(UInt8))}", %{"a" => [[1], [2, 3], []]}) - - uuid = "9B29BD20-924C-4DE5-BDB3-8C2AA1FCE1FC" - uuid_bin = uuid |> String.replace("-", "") |> Base.decode16!() - - assert {:ok, %{num_rows: 1, rows: [[^uuid_bin]]}} = - parameterize_query(ctx, "select {a:UUID}", %{"a" => uuid}) - - # TODO - # assert {:ok, %{num_rows: 1, rows: [[^uuid_bin]]}} = - # parameterize_query(ctx, "select {a:UUID}", %{"a" => uuid_bin}) - - # pseudo-positional bind - assert {:ok, %{num_rows: 1, rows: [[1]]}} = parameterize_query(ctx, "select {$0:UInt8}", [1]) - end - - test "utc datetime query param encoding", ctx do - utc = ~U[2021-01-01 12:00:00Z] - msk = DateTime.new!(~D[2021-01-01], ~T[15:00:00], "Europe/Moscow") - naive = utc |> DateTime.shift_zone!(Ch.Test.clickhouse_tz(ctx.conn)) |> DateTime.to_naive() - - assert parameterize_query!(ctx, "select {$0:DateTime} as d, toString(d)", [utc]).rows == - [[~N[2021-01-01 12:00:00], to_string(naive)]] - - assert parameterize_query!(ctx, "select {$0:DateTime('UTC')} as d, toString(d)", [utc]).rows == - [[utc, "2021-01-01 12:00:00"]] - - assert parameterize_query!(ctx, "select {$0:DateTime('Europe/Moscow')} as d, toString(d)", [ - utc - ]).rows == - [[msk, "2021-01-01 15:00:00"]] - end - - test "non-utc datetime query param encoding", ctx do - jp = DateTime.shift_zone!(~U[2021-01-01 12:34:56Z], "Asia/Tokyo") - assert inspect(jp) == "#DateTime<2021-01-01 21:34:56+09:00 JST Asia/Tokyo>" - - assert [[utc, jp]] = - parameterize_query!( - ctx, - "select {$0:DateTime('UTC')}, {$0:DateTime('Asia/Tokyo')}", - [jp] - ).rows - - assert inspect(utc) == "~U[2021-01-01 12:34:56Z]" - assert inspect(jp) == "#DateTime<2021-01-01 21:34:56+09:00 JST Asia/Tokyo>" - end - - test "non-utc datetime rowbinary encoding", ctx do - parameterize_query!( - ctx, - "create table ch_non_utc_datetimes(name String, datetime DateTime) engine Memory" - ) - - on_exit(fn -> Ch.Test.query("drop table ch_non_utc_datetimes") end) - - utc = ~U[2024-12-21 05:35:19.886393Z] - - taipei = DateTime.shift_zone!(utc, "Asia/Taipei") - tokyo = DateTime.shift_zone!(utc, "Asia/Tokyo") - vienna = DateTime.shift_zone!(utc, "Europe/Vienna") - - rows = [["taipei", taipei], ["tokyo", tokyo], ["vienna", vienna]] - - parameterize_query!( - ctx, - "insert into ch_non_utc_datetimes(name, datetime) format RowBinary", - rows, - types: ["String", "DateTime"] - ) - - result = - parameterize_query!( - ctx, - "select name, cast(datetime as DateTime('UTC')) from ch_non_utc_datetimes" - ) - |> Map.fetch!(:rows) - |> Map.new(fn [name, datetime] -> {name, datetime} end) - - assert result["taipei"] == ~U[2024-12-21 05:35:19Z] - assert result["tokyo"] == ~U[2024-12-21 05:35:19Z] - assert result["vienna"] == ~U[2024-12-21 05:35:19Z] - end - - test "utc datetime64 query param encoding", ctx do - utc = ~U[2021-01-01 12:00:00.123456Z] - msk = DateTime.new!(~D[2021-01-01], ~T[15:00:00.123456], "Europe/Moscow") - naive = utc |> DateTime.shift_zone!(Ch.Test.clickhouse_tz(ctx.conn)) |> DateTime.to_naive() - - assert parameterize_query!(ctx, "select {$0:DateTime64(6)} as d, toString(d)", [utc]).rows == - [[~N[2021-01-01 12:00:00.123456], to_string(naive)]] - - assert parameterize_query!(ctx, "select {$0:DateTime64(6, 'UTC')} as d, toString(d)", [utc]).rows == - [[utc, "2021-01-01 12:00:00.123456"]] - - assert parameterize_query!( - ctx, - "select {$0:DateTime64(6,'Europe/Moscow')} as d, toString(d)", - [utc] - ).rows == - [[msk, "2021-01-01 15:00:00.123456"]] - end - - test "utc datetime64 zero microseconds query param encoding", ctx do - # this test case guards against a previous bug where DateTimes with a microsecond value of 0 and precision > 0 would - # get encoded as a val like "1.6095024e9" which ClickHouse would be unable to parse to a DateTime. - utc = ~U[2021-01-01 12:00:00.000000Z] - naive = utc |> DateTime.shift_zone!(Ch.Test.clickhouse_tz(ctx.conn)) |> DateTime.to_naive() - - assert parameterize_query!(ctx, "select {$0:DateTime64(6)} as d, toString(d)", [utc]).rows == - [[~N[2021-01-01 12:00:00.000000], to_string(naive)]] - end - - test "utc datetime64 microseconds with more precision than digits", ctx do - # this test case guards against a previous bug where DateTimes with a microsecond value of with N digits - # and a precision > N would be encoded with a space like `234235234. 234123` - utc = ~U[2024-05-26 20:00:46.099856Z] - naive = utc |> DateTime.shift_zone!(Ch.Test.clickhouse_tz(ctx.conn)) |> DateTime.to_naive() - - assert parameterize_query!(ctx, "select {$0:DateTime64(6)} as d, toString(d)", [utc]).rows == - [[~N[2024-05-26 20:00:46.099856Z], to_string(naive)]] - end - - test "select with options", ctx do - assert {:ok, %{num_rows: 1, rows: [["async_insert", "Bool", "1"]]}} = - parameterize_query(ctx, "show settings like 'async_insert'", [], - settings: [async_insert: 1] - ) - - assert {:ok, %{num_rows: 1, rows: [["async_insert", "Bool", "0"]]}} = - parameterize_query(ctx, "show settings like 'async_insert'", [], - settings: [async_insert: 0] - ) - end - - test "create", ctx do - assert {:ok, %{command: :create, num_rows: nil, rows: [], data: []}} = - parameterize_query(ctx, "create table create_example(a UInt8) engine = Memory") - - on_exit(fn -> Ch.Test.query("drop table create_example") end) - end - - test "create with options", ctx do - assert {:error, %Ch.Error{code: 164, message: message}} = - parameterize_query(ctx, "create table create_example(a UInt8) engine = Memory", [], - settings: [readonly: 1] - ) - - assert message =~ ~r/Cannot execute query in readonly mode/ - end - - describe "insert" do - setup ctx do - table = "insert_t_#{System.unique_integer([:positive])}" - - parameterize_query!( - ctx, - "create table #{table}(a UInt8 default 1, b String) engine = Memory" - ) - - {:ok, table: table} - end - - test "values", %{table: table} = ctx do - parameterize_query( - ctx, - "insert into {table:Identifier} values (1, 'a'),(2,'b'), (null, null)", - %{"table" => table} - ) - - assert {:ok, %{rows: rows}} = - parameterize_query(ctx, "select * from {table:Identifier}", %{"table" => table}) - - assert rows == [[1, "a"], [2, "b"], [1, ""]] - - parameterize_query( - ctx, - "insert into {$0:Identifier}(a, b) values ({$1:UInt8},{$2:String}),({$3:UInt8},{$4:String})", - [table, 4, "d", 5, "e"] - ) - - assert {:ok, %{rows: rows}} = - parameterize_query(ctx, "select * from {table:Identifier} where a >= 4", %{ - "table" => table - }) - - assert rows == [[4, "d"], [5, "e"]] - end - - test "when readonly", %{table: table} = ctx do - settings = [readonly: 1] - - assert {:error, %Ch.Error{code: 164, message: message}} = - parameterize_query( - ctx, - "insert into {table:Identifier} values (1, 'a'), (2, 'b')", - %{"table" => table}, - settings: settings - ) - - assert message =~ "Cannot execute query in readonly mode." - end - - test "automatic RowBinary", %{table: table} = ctx do - stmt = "insert into #{table}(a, b) format RowBinary" - types = ["UInt8", "String"] - rows = [[1, "a"], [2, "b"]] - - parameterize_query!(ctx, stmt, rows, types: types) - - assert %{rows: rows} = - parameterize_query!(ctx, "select * from {table:Identifier}", %{"table" => table}) - - assert rows == [[1, "a"], [2, "b"]] - end - - test "manual RowBinary", %{table: table} = ctx do - stmt = "insert into #{table}(a, b) format RowBinary" - - types = ["UInt8", "String"] - rows = [[1, "a"], [2, "b"]] - data = RowBinary.encode_rows(rows, types) - - parameterize_query!(ctx, stmt, data, encode: false) - - assert %{rows: rows} = - parameterize_query!(ctx, "select * from {table:Identifier}", %{"table" => table}) - - assert rows == [[1, "a"], [2, "b"]] - end - - test "chunked", %{table: table} = ctx do - types = ["UInt8", "String"] - rows = [[1, "a"], [2, "b"], [3, "c"]] - - stream = - rows - |> Stream.chunk_every(2) - |> Stream.map(fn chunk -> RowBinary.encode_rows(chunk, types) end) - - parameterize_query( - ctx, - "insert into #{table}(a, b) format RowBinary", - stream, - encode: false - ) - - assert {:ok, %{rows: rows}} = - parameterize_query(ctx, "select * from {table:Identifier}", %{"table" => table}) - - assert rows == [[1, "a"], [2, "b"], [3, "c"]] - end - - test "select", %{table: table} = ctx do - parameterize_query( - ctx, - "insert into {table:Identifier} values (1, 'a'), (2, 'b'), (null, null)", - %{"table" => table} - ) - - parameterize_query( - ctx, - "insert into {table:Identifier}(a, b) select a, b from {table:Identifier}", - %{"table" => table} - ) - - assert {:ok, %{rows: rows}} = - parameterize_query(ctx, "select * from {table:Identifier}", %{"table" => table}) - - assert rows == [[1, "a"], [2, "b"], [1, ""], [1, "a"], [2, "b"], [1, ""]] - - assert {:ok, %{num_rows: 2}} = - parameterize_query( - ctx, - "insert into {$0:Identifier}(a, b) select a, b from {$0:Identifier} where a > {$1:UInt8}", - [table, 1] - ) - - assert {:ok, %{rows: new_rows}} = - parameterize_query(ctx, "select * from {table:Identifier}", %{"table" => table}) - - assert new_rows -- rows == [[2, "b"], [2, "b"]] - end - end - - test "delete", ctx do - parameterize_query!( - ctx, - "create table delete_t(a UInt8, b String) engine = MergeTree order by tuple()" - ) - - on_exit(fn -> Ch.Test.query("drop table delete_t") end) - - parameterize_query(ctx, "insert into delete_t values (1,'a'), (2,'b')") - - settings = [allow_experimental_lightweight_delete: 1] - - assert {:ok, %{rows: [], data: [], command: :delete}} = - parameterize_query(ctx, "delete from delete_t where 1", [], settings: settings) - end - - test "query!", ctx do - assert %{num_rows: 1, rows: [[1]]} = parameterize_query!(ctx, "select 1") - end - - describe "types" do - test "multiple types", ctx do - assert {:ok, %{num_rows: 1, rows: [[1, "a"]]}} = - parameterize_query(ctx, "select {a:Int8}, {b:String}", %{"a" => 1, "b" => "a"}) - end - - test "ints", ctx do - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:Int8}", %{"a" => 1}) - - assert {:ok, %{num_rows: 1, rows: [[-1000]]}} = - parameterize_query(ctx, "select {a:Int16}", %{"a" => -1000}) - - assert {:ok, %{num_rows: 1, rows: [[100_000]]}} = - parameterize_query(ctx, "select {a:Int32}", %{"a" => 100_000}) - - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:Int64}", %{"a" => 1}) - - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:Int128}", %{"a" => 1}) - - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:Int256}", %{"a" => 1}) - end - - test "uints", ctx do - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:UInt8}", %{"a" => 1}) - - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:UInt16}", %{"a" => 1}) - - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:UInt32}", %{"a" => 1}) - - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:UInt64}", %{"a" => 1}) - - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:UInt128}", %{"a" => 1}) - - assert {:ok, %{num_rows: 1, rows: [[1]]}} = - parameterize_query(ctx, "select {a:UInt256}", %{"a" => 1}) - end - - test "fixed string", ctx do - assert {:ok, %{num_rows: 1, rows: [[<<0, 0>>]]}} = - parameterize_query(ctx, "select {a:FixedString(2)}", %{"a" => ""}) - - assert {:ok, %{num_rows: 1, rows: [["a" <> <<0>>]]}} = - parameterize_query(ctx, "select {a:FixedString(2)}", %{"a" => "a"}) - - assert {:ok, %{num_rows: 1, rows: [["aa"]]}} = - parameterize_query(ctx, "select {a:FixedString(2)}", %{"a" => "aa"}) - - assert {:ok, %{num_rows: 1, rows: [["aaaaa"]]}} = - parameterize_query(ctx, "select {a:FixedString(5)}", %{"a" => "aaaaa"}) - - parameterize_query!(ctx, "create table fixed_string_t(a FixedString(3)) engine = Memory") - on_exit(fn -> Ch.Test.query("drop table fixed_string_t") end) - - parameterize_query( - ctx, - "insert into fixed_string_t(a) format RowBinary", - [ - [""], - ["a"], - ["aa"], - ["aaa"] - ], - types: ["FixedString(3)"] - ) - - assert parameterize_query!(ctx, "select * from fixed_string_t").rows == [ - [<<0, 0, 0>>], - ["a" <> <<0, 0>>], - ["aa" <> <<0>>], - ["aaa"] - ] - end - - test "decimal", ctx do - assert {:ok, %{num_rows: 1, rows: [row]}} = - parameterize_query(ctx, "SELECT toDecimal32(2, 4) AS x, x / 3, toTypeName(x)") - - assert row == [Decimal.new("2.0000"), Decimal.new("0.6666"), "Decimal(9, 4)"] - - assert {:ok, %{num_rows: 1, rows: [row]}} = - parameterize_query(ctx, "SELECT toDecimal64(2, 4) AS x, x / 3, toTypeName(x)") - - assert row == [Decimal.new("2.0000"), Decimal.new("0.6666"), "Decimal(18, 4)"] - - assert {:ok, %{num_rows: 1, rows: [row]}} = - parameterize_query(ctx, "SELECT toDecimal128(2, 4) AS x, x / 3, toTypeName(x)") - - assert row == [Decimal.new("2.0000"), Decimal.new("0.6666"), "Decimal(38, 4)"] - - assert {:ok, %{num_rows: 1, rows: [row]}} = - parameterize_query(ctx, "SELECT toDecimal256(2, 4) AS x, x / 3, toTypeName(x)") - - assert row == [Decimal.new("2.0000"), Decimal.new("0.6666"), "Decimal(76, 4)"] - - parameterize_query!(ctx, "create table decimal_t(d Decimal32(4)) engine = Memory") - on_exit(fn -> Ch.Test.query("drop table decimal_t") end) - - parameterize_query!( - ctx, - "insert into decimal_t(d) format RowBinary", - _rows = [ - [Decimal.new("2.66")], - [Decimal.new("2.6666")], - [Decimal.new("2.66666")] - ], - types: ["Decimal32(4)"] - ) - - assert parameterize_query!(ctx, "select * from decimal_t").rows == [ - [Decimal.new("2.6600")], - [Decimal.new("2.6666")], - [Decimal.new("2.6667")] - ] - end - - test "boolean", ctx do - assert {:ok, %{num_rows: 1, rows: [[true, "Bool"]]}} = - parameterize_query(ctx, "select true as col, toTypeName(col)") - - assert {:ok, %{num_rows: 1, rows: [[1, "UInt8"]]}} = - parameterize_query(ctx, "select true == 1 as col, toTypeName(col)") - - assert {:ok, %{num_rows: 1, rows: [[true, false]]}} = - parameterize_query(ctx, "select true, false") - - parameterize_query!(ctx, "create table test_bool(A Int64, B Bool) engine = Memory") - on_exit(fn -> Ch.Test.query("drop table test_bool") end) - - parameterize_query!(ctx, "INSERT INTO test_bool VALUES (1, true),(2,0)") - - parameterize_query!( - ctx, - "insert into test_bool(A, B) format RowBinary", - _rows = [[3, true], [4, false]], - types: ["Int64", "Bool"] - ) - - # anything > 0 is `true`, here `2` is `true` - parameterize_query!(ctx, "insert into test_bool(A, B) values (5, 2)") - - assert %{ - rows: [ - [1, true, 1], - [2, false, 0], - [3, true, 3], - [4, false, 0], - [5, true, 5] - ] - } = parameterize_query!(ctx, "SELECT *, A * B FROM test_bool ORDER BY A") - end - - test "uuid", ctx do - assert {:ok, %{num_rows: 1, rows: [[<<_::16-bytes>>]]}} = - parameterize_query(ctx, "select generateUUIDv4()") - - assert {:ok, %{num_rows: 1, rows: [[uuid, "417ddc5d-e556-4d27-95dd-a34d84e46a50"]]}} = - parameterize_query(ctx, "select {uuid:UUID} as u, toString(u)", %{ - "uuid" => "417ddc5d-e556-4d27-95dd-a34d84e46a50" - }) - - assert uuid == - "417ddc5d-e556-4d27-95dd-a34d84e46a50" - |> String.replace("-", "") - |> Base.decode16!(case: :lower) - - parameterize_query!(ctx, " CREATE TABLE t_uuid (x UUID, y String) ENGINE Memory") - on_exit(fn -> Ch.Test.query("drop table t_uuid") end) - - parameterize_query!(ctx, "INSERT INTO t_uuid SELECT generateUUIDv4(), 'Example 1'") - - assert {:ok, %{num_rows: 1, rows: [[<<_::16-bytes>>, "Example 1"]]}} = - parameterize_query(ctx, "SELECT * FROM t_uuid") - - parameterize_query!(ctx, "INSERT INTO t_uuid (y) VALUES ('Example 2')") - - parameterize_query!( - ctx, - "insert into t_uuid(x,y) format RowBinary", - _rows = [[uuid, "Example 3"]], - types: ["UUID", "String"] - ) - - assert {:ok, - %{ - num_rows: 3, - rows: [ - [<<_::16-bytes>>, "Example 1"], - [<<0::128>>, "Example 2"], - [^uuid, "Example 3"] - ] - }} = parameterize_query(ctx, "SELECT * FROM t_uuid ORDER BY y") - end - - @tag :skip - test "json", ctx do - settings = [allow_experimental_object_type: 1] - - parameterize_query!(ctx, "CREATE TABLE json(o JSON) ENGINE = Memory", [], - settings: settings - ) - - parameterize_query!( - ctx, - ~s|INSERT INTO json VALUES ('{"a": 1, "b": { "c": 2, "d": [1, 2, 3] }}')| - ) - - assert parameterize_query!(ctx, "SELECT o.a, o.b.c, o.b.d[3] FROM json").rows == [[1, 2, 3]] - - # named tuples are not supported yet - assert_raise ArgumentError, fn -> parameterize_query!(ctx, "SELECT o FROM json") end - end - - @tag :json - test "json as string", ctx do - # after v25 ClickHouse started rendering numbers in JSON as strings - [[version]] = parameterize_query!(ctx, "select version()").rows - - parse_version = fn version -> - version |> String.split(".") |> Enum.map(&String.to_integer/1) - end - - version = parse_version.(version) - numbers_as_strings? = version >= [25] and version <= [25, 8] - - [expected1, expected2] = - if numbers_as_strings? do - [ - [[~s|{"answer":"42"}|]], - [[~s|{"a":"42"}|], [~s|{"b":"10"}|]] - ] - else - [ - [[~s|{"answer":42}|]], - [[~s|{"a":42}|], [~s|{"b":10}|]] - ] - end - - assert parameterize_query!(ctx, ~s|select '{"answer":42}'::JSON::String|, [], - settings: [enable_json_type: 1] - ).rows == expected1 - - parameterize_query!(ctx, "CREATE TABLE test_json_as_string(json JSON) ENGINE = Memory", [], - settings: [enable_json_type: 1] - ) - - on_exit(fn -> Ch.Test.query("DROP TABLE test_json_as_string") end) - - parameterize_query!( - ctx, - "INSERT INTO test_json_as_string(json) FORMAT RowBinary", - _rows = [[Jason.encode_to_iodata!(%{"a" => 42})], [Jason.encode_to_iodata!(%{"b" => 10})]], - types: [:string], - settings: [ - enable_json_type: 1, - input_format_binary_read_json_as_string: 1 - ] - ) - - assert parameterize_query!(ctx, "select json::String from test_json_as_string", [], - settings: [enable_json_type: 1] - ).rows == expected2 - end - - # TODO enum16 - - test "enum8", ctx do - assert {:ok, %{num_rows: 1, rows: [["Enum8('a' = 1, 'b' = 2)"]]}} = - parameterize_query( - ctx, - "SELECT toTypeName(CAST('a', 'Enum(\\'a\\' = 1, \\'b\\' = 2)'))" - ) - - assert {:ok, %{num_rows: 1, rows: [["a"]]}} = - parameterize_query(ctx, "SELECT CAST('a', 'Enum(\\'a\\' = 1, \\'b\\' = 2)')") - - assert {:ok, %{num_rows: 1, rows: [["b"]]}} = - parameterize_query(ctx, "select {enum:Enum('a' = 1, 'b' = 2)}", %{"enum" => "b"}) - - assert {:ok, %{num_rows: 1, rows: [["b"]]}} = - parameterize_query(ctx, "select {enum:Enum('a' = 1, 'b' = 2)}", %{"enum" => 2}) - - assert {:ok, %{num_rows: 1, rows: [["b"]]}} = - parameterize_query(ctx, "select {enum:Enum16('a' = 1, 'b' = 2)}", %{"enum" => 2}) - - parameterize_query!( - ctx, - "CREATE TABLE t_enum(i UInt8, x Enum('hello' = 1, 'world' = 2)) ENGINE Memory" - ) - - on_exit(fn -> Ch.Test.query("DROP TABLE t_enum") end) - - parameterize_query!( - ctx, - "INSERT INTO t_enum VALUES (0, 'hello'), (1, 'world'), (2, 'hello')" - ) - - assert parameterize_query!(ctx, "SELECT *, CAST(x, 'Int8') FROM t_enum ORDER BY i").rows == - [ - [0, "hello", 1], - [1, "world", 2], - [2, "hello", 1] - ] - - parameterize_query!( - ctx, - "INSERT INTO t_enum(i, x) FORMAT RowBinary", - _rows = [[3, "hello"], [4, "world"], [5, 1], [6, 2]], - types: ["UInt8", "Enum8('hello' = 1, 'world' = 2)"] - ) - - assert parameterize_query!(ctx, "SELECT *, CAST(x, 'Int8') FROM t_enum ORDER BY i").rows == - [ - [0, "hello", 1], - [1, "world", 2], - [2, "hello", 1], - [3, "hello", 1], - [4, "world", 2], - [5, "hello", 1], - [6, "world", 2] - ] - - # TODO nil enum - end - - test "map", ctx do - assert parameterize_query!( - ctx, - "SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map" - ).rows == [[%{1 => "Ready", 2 => "Steady", 3 => "Go"}]] - - assert parameterize_query!(ctx, "select {map:Map(String, UInt8)}", %{ - "map" => %{"pg" => 13, "hello" => 100} - }).rows == [[%{"hello" => 100, "pg" => 13}]] - - parameterize_query!(ctx, "CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory") - on_exit(fn -> Ch.Test.query("DROP TABLE table_map") end) - - parameterize_query!( - ctx, - "INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30})" - ) - - assert parameterize_query!(ctx, "SELECT a['key2'] FROM table_map").rows == [ - [10], - [20], - [30] - ] - - assert parameterize_query!(ctx, "INSERT INTO table_map VALUES ({'key3':100}), ({})") - - assert parameterize_query!(ctx, "SELECT a['key3'] FROM table_map ORDER BY 1 DESC").rows == [ - [100], - [0], - [0], - [0], - [0] - ] - - assert parameterize_query!( - ctx, - "INSERT INTO table_map FORMAT RowBinary", - _rows = [ - [%{"key10" => 20, "key20" => 40}], - # empty map - [%{}], - # null map - [nil], - # empty proplist map - [[]], - [[{"key50", 100}]] - ], - types: ["Map(String, UInt64)"] - ) - - assert parameterize_query!(ctx, "SELECT * FROM table_map ORDER BY a ASC").rows == [ - [%{}], - [%{}], - [%{}], - [%{}], - [%{"key1" => 1, "key2" => 10}], - [%{"key1" => 2, "key2" => 20}], - [%{"key1" => 3, "key2" => 30}], - [%{"key10" => 20, "key20" => 40}], - [%{"key3" => 100}], - [%{"key50" => 100}] - ] - end - - test "tuple", ctx do - assert parameterize_query!(ctx, "SELECT tuple(1,'a') AS x, toTypeName(x)").rows == [ - [{1, "a"}, "Tuple(UInt8, String)"] - ] - - assert parameterize_query!(ctx, "SELECT {$0:Tuple(Int8, String)}", [{-1, "abs"}]).rows == [ - [{-1, "abs"}] - ] - - assert parameterize_query!(ctx, "SELECT tuple('a') AS x").rows == [[{"a"}]] - - assert parameterize_query!(ctx, "SELECT tuple(1, NULL) AS x, toTypeName(x)").rows == [ - [{1, nil}, "Tuple(UInt8, Nullable(Nothing))"] - ] - - # TODO named tuples - parameterize_query!(ctx, "CREATE TABLE tuples_t (`a` Tuple(String, Int64)) ENGINE = Memory") - on_exit(fn -> Ch.Test.query("DROP TABLE tuples_t") end) - - parameterize_query!(ctx, "INSERT INTO tuples_t VALUES (('y', 10)), (('x',-10))") - - parameterize_query!( - ctx, - "INSERT INTO tuples_t FORMAT RowBinary", - _rows = [[{"a", 20}], [{"b", 30}]], - types: ["Tuple(String, Int64)"] - ) - - assert parameterize_query!(ctx, "SELECT a FROM tuples_t ORDER BY a.1 ASC").rows == [ - [{"a", 20}], - [{"b", 30}], - [{"x", -10}], - [{"y", 10}] - ] - end - - test "datetime", ctx do - parameterize_query!( - ctx, - "CREATE TABLE dt(`timestamp` DateTime('Asia/Istanbul'), `event_id` UInt8) ENGINE = Memory" - ) - - on_exit(fn -> Ch.Test.query("DROP TABLE dt") end) - - parameterize_query!( - ctx, - "INSERT INTO dt Values (1546300800, 1), ('2019-01-01 00:00:00', 2)" - ) - - assert {:ok, %{num_rows: 2, rows: rows}} = - parameterize_query(ctx, "SELECT *, toString(timestamp) FROM dt") - - assert rows == [ - [ - DateTime.new!(~D[2019-01-01], ~T[03:00:00], "Asia/Istanbul"), - 1, - "2019-01-01 03:00:00" - ], - [ - DateTime.new!(~D[2019-01-01], ~T[00:00:00], "Asia/Istanbul"), - 2, - "2019-01-01 00:00:00" - ] - ] - - naive_noon = ~N[2022-12-12 12:00:00] - - # datetimes in params are sent in text and ClickHouse translates them to UTC from server timezone by default - # see https://clickhouse.com/docs/en/sql-reference/data-types/datetime - # https://kb.altinity.com/altinity-kb-queries-and-syntax/time-zones/ - assert {:ok, - %{num_rows: 1, rows: [[naive_datetime, "2022-12-12 12:00:00"]], headers: headers}} = - parameterize_query(ctx, "select {$0:DateTime} as d, toString(d)", [naive_noon]) - - # to make this test pass for contributors with non UTC timezone we perform the same steps as ClickHouse - # i.e. we give server timezone to the naive datetime and shift it to UTC before comparing with the result - {_, timezone} = List.keyfind!(headers, "x-clickhouse-timezone", 0) - - assert naive_datetime == - naive_noon - |> DateTime.from_naive!(timezone) - |> DateTime.shift_zone!("Etc/UTC") - |> DateTime.to_naive() - - assert {:ok, %{num_rows: 1, rows: [[~U[2022-12-12 12:00:00Z], "2022-12-12 12:00:00"]]}} = - parameterize_query(ctx, "select {$0:DateTime('UTC')} as d, toString(d)", [ - naive_noon - ]) - - assert {:ok, %{num_rows: 1, rows: rows}} = - parameterize_query(ctx, "select {$0:DateTime('Asia/Bangkok')} as d, toString(d)", [ - naive_noon - ]) - - assert rows == [ - [ - DateTime.new!(~D[2022-12-12], ~T[12:00:00], "Asia/Bangkok"), - "2022-12-12 12:00:00" - ] - ] - - # simulate unknown timezone - prev_tz_db = Calendar.get_time_zone_database() - Calendar.put_time_zone_database(Calendar.UTCOnlyTimeZoneDatabase) - on_exit(fn -> Calendar.put_time_zone_database(prev_tz_db) end) - - assert_raise ArgumentError, ~r/:utc_only_time_zone_database/, fn -> - parameterize_query(ctx, "select {$0:DateTime('Asia/Tokyo')}", [naive_noon]) - end - end - - # TODO are negatives correct? what's the range? - test "date32", ctx do - parameterize_query!( - ctx, - "CREATE TABLE new(`timestamp` Date32, `event_id` UInt8) ENGINE = Memory;" - ) - - on_exit(fn -> Ch.Test.query("DROP TABLE new") end) - - parameterize_query!(ctx, "INSERT INTO new VALUES (4102444800, 1), ('2100-01-01', 2)") - - assert {:ok, - %{ - num_rows: 2, - rows: [first_event, [~D[2100-01-01], 2, "2100-01-01"]] - }} = parameterize_query(ctx, "SELECT *, toString(timestamp) FROM new") - - # TODO use timezone info to be more exact - assert first_event in [ - [~D[2099-12-31], 1, "2099-12-31"], - [~D[2100-01-01], 1, "2100-01-01"] - ] - - assert {:ok, %{num_rows: 1, rows: [[~D[1900-01-01], "1900-01-01"]]}} = - parameterize_query(ctx, "select {$0:Date32} as d, toString(d)", [~D[1900-01-01]]) - - # max - assert {:ok, %{num_rows: 1, rows: [[~D[2299-12-31], "2299-12-31"]]}} = - parameterize_query(ctx, "select {$0:Date32} as d, toString(d)", [~D[2299-12-31]]) - - # min - assert {:ok, %{num_rows: 1, rows: [[~D[1900-01-01], "1900-01-01"]]}} = - parameterize_query(ctx, "select {$0:Date32} as d, toString(d)", [~D[1900-01-01]]) - - parameterize_query!( - ctx, - "insert into new(timestamp, event_id) format RowBinary", - _rows = [[~D[1960-01-01], 3]], - types: ["Date32", "UInt8"] - ) - - assert %{ - num_rows: 3, - rows: [ - first_event, - [~D[2100-01-01], 2, "2100-01-01"], - [~D[1960-01-01], 3, "1960-01-01"] - ] - } = - parameterize_query!( - ctx, - "SELECT *, toString(timestamp) FROM new ORDER BY event_id" - ) - - # TODO use timezone info to be more exact - assert first_event in [ - [~D[2099-12-31], 1, "2099-12-31"], - [~D[2100-01-01], 1, "2100-01-01"] - ] - - assert %{num_rows: 1, rows: [[3]]} = - parameterize_query!(ctx, "SELECT event_id FROM new WHERE timestamp = '1960-01-01'") - end - - # https://clickhouse.com/docs/sql-reference/data-types/time - @tag :time - test "time", ctx do - settings = [enable_time_time64_type: 1] - - parameterize_query!( - ctx, - "CREATE TABLE time_t(`time` Time, `event_id` UInt8) ENGINE = Memory", - [], - settings: settings - ) - - on_exit(fn -> - Ch.Test.query("DROP TABLE time_t", [], settings: settings) - end) - - parameterize_query!(ctx, "INSERT INTO time_t VALUES ('100:00:00', 1), (12453, 2)", [], - settings: settings - ) - - # ClickHouse supports Time values of [-999:59:59, 999:59:59] - # and Elixir's Time supports values of [00:00:00, 23:59:59] - # so we raise an error when ClickHouse's Time value is out of Elixir's Time range - - assert_raise ArgumentError, - "ClickHouse Time value 3.6e5 (seconds) is out of Elixir's Time range (00:00:00.000000 - 23:59:59.999999)", - fn -> - parameterize_query!(ctx, "select * from time_t", [], settings: settings) - end - - parameterize_query!( - ctx, - "INSERT INTO time_t(time, event_id) FORMAT RowBinary", - _rows = [ - [~T[00:00:00], 3], - [~T[12:34:56], 4], - [~T[23:59:59], 5] - ], - settings: settings, - types: ["Time", "UInt8"] - ) - - assert parameterize_query!( - ctx, - "select * from time_t where event_id > 1 order by event_id", - [], - settings: settings - ).rows == - [[~T[03:27:33], 2], [~T[00:00:00], 3], [~T[12:34:56], 4], [~T[23:59:59], 5]] - end - - # https://clickhouse.com/docs/sql-reference/data-types/time64 - @tag :time - test "Time64(3)", ctx do - settings = [enable_time_time64_type: 1] - - parameterize_query!( - ctx, - "CREATE TABLE time64_3_t(`time` Time64(3), `event_id` UInt8) ENGINE = Memory", - [], - settings: settings - ) - - on_exit(fn -> - Ch.Test.query("DROP TABLE time64_3_t", [], settings: settings) - end) - - parameterize_query!( - ctx, - "INSERT INTO time64_3_t VALUES (15463123, 1), (154600.123, 2), ('100:00:00', 3);", - [], - settings: settings - ) - - # ClickHouse supports Time64 values of [-999:59:59.999999999, 999:59:59.999999999] - # and Elixir's Time supports values of [00:00:00.000000, 23:59:59.999999] - # so we raise an error when ClickHouse's Time64 value is out of Elixir's Time range - - assert_raise ArgumentError, - "ClickHouse Time value 154600.123 (seconds) is out of Elixir's Time range (00:00:00.000000 - 23:59:59.999999)", - fn -> - parameterize_query!(ctx, "select * from time64_3_t", [], settings: settings) - end - - parameterize_query!( - ctx, - "INSERT INTO time64_3_t(time, event_id) FORMAT RowBinary", - _rows = [ - [~T[00:00:00.000000], 4], - [~T[12:34:56.012300], 5], - [~T[12:34:56.123456], 6], - [~T[12:34:56.120000], 7], - [~T[23:59:59.999999], 8] - ], - settings: settings, - types: ["Time64(3)", "UInt8"] - ) - - assert parameterize_query!( - ctx, - "select * from time64_3_t where time < {max_elixir_time:Time64(6)} order by event_id", - %{"max_elixir_time" => ~T[23:59:59.999999]}, - settings: settings - ).rows == - [ - [~T[04:17:43.123], 1], - [~T[00:00:00.000], 4], - [~T[12:34:56.012], 5], - [~T[12:34:56.123], 6], - [~T[12:34:56.120], 7], - [~T[23:59:59.999], 8] - ] - end - - @tag :time - test "Time64(6)", ctx do - settings = [enable_time_time64_type: 1] - - parameterize_query!( - ctx, - "CREATE TABLE time64_6_t(`time` Time64(6), `event_id` UInt8) ENGINE = Memory", - [], - settings: settings - ) - - on_exit(fn -> - Ch.Test.query("DROP TABLE time64_6_t", [], settings: settings) - end) - - parameterize_query!( - ctx, - "INSERT INTO time64_6_t(time, event_id) FORMAT RowBinary", - _rows = [ - [~T[00:00:00.000000], 1], - [~T[12:34:56.123456], 2], - [~T[12:34:56.123000], 3], - [~T[12:34:56.000123], 4], - [~T[23:59:59.999999], 5] - ], - settings: settings, - types: ["Time64(6)", "UInt8"] - ) - - assert parameterize_query!( - ctx, - "select * from time64_6_t order by event_id", - [], - settings: settings - ).rows == - [ - [~T[00:00:00.000000], 1], - [~T[12:34:56.123456], 2], - [~T[12:34:56.123000], 3], - [~T[12:34:56.000123], 4], - [~T[23:59:59.999999], 5] - ] - end - - @tag :time - test "Time64(9)", ctx do - settings = [enable_time_time64_type: 1] - - parameterize_query!( - ctx, - "CREATE TABLE time64_9_t(`time` Time64(9), `event_id` UInt8) ENGINE = Memory", - [], - settings: settings - ) - - on_exit(fn -> - Ch.Test.query("DROP TABLE time64_9_t", [], settings: settings) - end) - - parameterize_query!( - ctx, - "INSERT INTO time64_9_t(time, event_id) FORMAT RowBinary", - _rows = [ - [~T[00:00:00.000000], 1], - [~T[12:34:56.123456], 2], - [~T[12:34:56.123000], 3], - [~T[12:34:56.000123], 4], - [~T[23:59:59.999999], 5] - ], - settings: settings, - types: ["Time64(9)", "UInt8"] - ) - - assert parameterize_query!( - ctx, - "select * from time64_9_t order by event_id", - [], - settings: settings - ).rows == - [ - [~T[00:00:00.000000], 1], - [~T[12:34:56.123456], 2], - [~T[12:34:56.123000], 3], - [~T[12:34:56.000123], 4], - [~T[23:59:59.999999], 5] - ] - end - - test "datetime64", ctx do - parameterize_query!( - ctx, - "CREATE TABLE datetime64_t(`timestamp` DateTime64(3, 'Asia/Istanbul'), `event_id` UInt8) ENGINE = Memory" - ) - - on_exit(fn -> Ch.Test.query("DROP TABLE datetime64_t") end) - - parameterize_query!( - ctx, - "INSERT INTO datetime64_t Values (1546300800123, 1), (1546300800.123, 2), ('2019-01-01 00:00:00', 3)" - ) - - assert {:ok, %{num_rows: 3, rows: rows}} = - parameterize_query(ctx, "SELECT *, toString(timestamp) FROM datetime64_t") - - assert rows == [ - [ - DateTime.new!(~D[2019-01-01], ~T[03:00:00.123], "Asia/Istanbul"), - 1, - "2019-01-01 03:00:00.123" - ], - [ - DateTime.new!(~D[2019-01-01], ~T[03:00:00.123], "Asia/Istanbul"), - 2, - "2019-01-01 03:00:00.123" - ], - [ - DateTime.new!(~D[2019-01-01], ~T[00:00:00.000], "Asia/Istanbul"), - 3, - "2019-01-01 00:00:00.000" - ] - ] - - parameterize_query!( - ctx, - "insert into datetime64_t(event_id, timestamp) format RowBinary", - _rows = [ - [4, ~N[2021-01-01 12:00:00.123456]], - [5, ~N[2021-01-01 12:00:00]] - ], - types: ["UInt8", "DateTime64(3)"] - ) - - assert {:ok, %{num_rows: 2, rows: rows}} = - parameterize_query( - ctx, - "SELECT *, toString(timestamp) FROM datetime64_t WHERE timestamp > '2020-01-01'" - ) - - assert rows == [ - [ - DateTime.new!(~D[2021-01-01], ~T[15:00:00.123], "Asia/Istanbul"), - 4, - "2021-01-01 15:00:00.123" - ], - [ - DateTime.new!(~D[2021-01-01], ~T[15:00:00.000], "Asia/Istanbul"), - 5, - "2021-01-01 15:00:00.000" - ] - ] - - for precision <- 0..9 do - naive_noon = ~N[2022-01-01 12:00:00] - - # datetimes in params are sent in text and ClickHouse translates them to UTC from server timezone by default - # see https://clickhouse.com/docs/en/sql-reference/data-types/datetime - # https://kb.altinity.com/altinity-kb-queries-and-syntax/time-zones/ - assert {:ok, %{num_rows: 1, rows: [[naive_datetime]], headers: headers}} = - parameterize_query(ctx, "select {$0:DateTime64(#{precision})}", [naive_noon]) - - # to make this test pass for contributors with non UTC timezone we perform the same steps as ClickHouse - # i.e. we give server timezone to the naive datetime and shift it to UTC before comparing with the result - {_, timezone} = List.keyfind!(headers, "x-clickhouse-timezone", 0) - - expected = - naive_noon - |> DateTime.from_naive!(timezone) - |> DateTime.shift_zone!("Etc/UTC") - |> DateTime.to_naive() - - assert NaiveDateTime.compare(naive_datetime, expected) == :eq - end - - assert {:ok, - %{num_rows: 1, rows: [[~U[2022-01-01 12:00:00.123Z], "2022-01-01 12:00:00.123"]]}} = - parameterize_query(ctx, "select {dt:DateTime64(3,'UTC')} as d, toString(d)", %{ - "dt" => ~N[2022-01-01 12:00:00.123] - }) - - assert {:ok, - %{num_rows: 1, rows: [[~U[1900-01-01 12:00:00.123Z], "1900-01-01 12:00:00.123"]]}} = - parameterize_query(ctx, "select {dt:DateTime64(3,'UTC')} as d, toString(d)", %{ - "dt" => ~N[1900-01-01 12:00:00.123] - }) - - assert {:ok, %{num_rows: 1, rows: [row]}} = - parameterize_query( - ctx, - "select {dt:DateTime64(3,'Asia/Bangkok')} as d, toString(d)", - %{ - "dt" => ~N[2022-01-01 12:00:00.123] - } - ) - - assert row == [ - DateTime.new!(~D[2022-01-01], ~T[12:00:00.123], "Asia/Bangkok"), - "2022-01-01 12:00:00.123" - ] - end - - test "nullable", ctx do - parameterize_query!( - ctx, - "CREATE TABLE nullable (`n` Nullable(UInt32)) ENGINE = MergeTree ORDER BY tuple()" - ) - - on_exit(fn -> Ch.Test.query("DROP TABLE nullable") end) - - parameterize_query!(ctx, "INSERT INTO nullable VALUES (1) (NULL) (2) (NULL)") - - assert {:ok, %{num_rows: 4, rows: [[0], [1], [0], [1]]}} = - parameterize_query(ctx, "SELECT n.null FROM nullable") - - assert {:ok, %{num_rows: 4, rows: [[1], [nil], [2], [nil]]}} = - parameterize_query(ctx, "SELECT n FROM nullable") - - # weird thing about nullables is that, similar to bool, in binary format, any byte larger than 0 is `null` - parameterize_query( - ctx, - "insert into nullable format RowBinary", - <<1, 2, 3, 4, 5>>, - encode: false - ) - - assert %{num_rows: 1, rows: [[count]]} = - parameterize_query!(ctx, "select count(*) from nullable where n is null") - - assert count == 2 + 5 - end - - test "nullable + default", ctx do - parameterize_query!(ctx, """ - CREATE TABLE ch_nulls ( - a UInt8, - b UInt8 NULL, - c UInt8 DEFAULT 10, - d Nullable(UInt8) DEFAULT 10, - ) ENGINE Memory - """) - - on_exit(fn -> Ch.Test.query("DROP TABLE ch_nulls") end) - - parameterize_query!( - ctx, - "INSERT INTO ch_nulls(a, b, c, d) FORMAT RowBinary", - [[nil, nil, nil, nil]], - types: ["UInt8", "Nullable(UInt8)", "UInt8", "Nullable(UInt8)"] - ) - - # default is ignored... - assert parameterize_query!(ctx, "SELECT * FROM ch_nulls").rows == [[0, nil, 0, nil]] - end - - # based on https://github.com/ClickHouse/clickhouse-java/pull/1345/files - test "nullable + input() + default", ctx do - parameterize_query!(ctx, """ - CREATE TABLE test_insert_default_value( - n Int32, - s String DEFAULT 'secret' - ) ENGINE Memory - """) - - on_exit(fn -> Ch.Test.query("DROP TABLE test_insert_default_value") end) - - parameterize_query!( - ctx, - """ - INSERT INTO test_insert_default_value - SELECT id, name - FROM input('id UInt32, name Nullable(String)') - FORMAT RowBinary\ - """, - [[1, nil], [-1, nil]], - types: ["UInt32", "Nullable(String)"] - ) - - assert parameterize_query!(ctx, "SELECT * FROM test_insert_default_value ORDER BY n").rows == - [ - [-1, "secret"], - [1, "secret"] - ] - end - - test "can decode casted Point", ctx do - assert parameterize_query!(ctx, "select cast((0, 1) as Point)").rows == [ - _row = [_point = {0.0, 1.0}] - ] - end - - test "can encode and then decode Point in query params", ctx do - assert parameterize_query!(ctx, "select {$0:Point}", [{10, 10}]).rows == [ - _row = [_point = {10.0, 10.0}] - ] - end - - test "can insert and select Point", ctx do - parameterize_query!(ctx, "CREATE TABLE geo_point (p Point) ENGINE = Memory()") - on_exit(fn -> Ch.Test.query("DROP TABLE geo_point") end) - - parameterize_query!(ctx, "INSERT INTO geo_point VALUES((10, 10))") - - parameterize_query!(ctx, "INSERT INTO geo_point FORMAT RowBinary", [[{20, 20}]], - types: ["Point"] - ) - - assert parameterize_query!(ctx, "SELECT p, toTypeName(p) FROM geo_point ORDER BY p ASC").rows == - [ - [{10.0, 10.0}, "Point"], - [{20.0, 20.0}, "Point"] - ] - - # to make our RowBinary is not garbage in garbage out we also test a text format response - assert parameterize_query!( - ctx, - "SELECT p, toTypeName(p) FROM geo_point ORDER BY p ASC FORMAT JSONCompact" - ).rows - |> Jason.decode!() - |> Map.fetch!("data") == [ - [[10, 10], "Point"], - [[20, 20], "Point"] - ] - end - - test "can decode casted Ring", ctx do - ring = [{0.0, 1.0}, {10.0, 3.0}] - - assert parameterize_query!(ctx, "select cast([(0,1),(10,3)] as Ring)").rows == [ - _row = [ring] - ] - end - - test "can encode and then decode Ring in query params", ctx do - ring = [{0.0, 1.0}, {10.0, 3.0}] - assert parameterize_query!(ctx, "select {$0:Ring}", [ring]).rows == [_row = [ring]] - end - - test "can insert and select Ring", ctx do - parameterize_query!(ctx, "CREATE TABLE geo_ring (r Ring) ENGINE = Memory()") - on_exit(fn -> Ch.Test.query("DROP TABLE geo_ring") end) - - parameterize_query!( - ctx, - "INSERT INTO geo_ring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)])" - ) - - ring = [{20, 20}, {0, 0}, {0, 20}] - parameterize_query!(ctx, "INSERT INTO geo_ring FORMAT RowBinary", [[ring]], types: ["Ring"]) - - assert parameterize_query!(ctx, "SELECT r, toTypeName(r) FROM geo_ring ORDER BY r ASC").rows == - [ - [[{0.0, 0.0}, {10.0, 0.0}, {10.0, 10.0}, {0.0, 10.0}], "Ring"], - [[{20.0, 20.0}, {0.0, 0.0}, {0.0, 20.0}], "Ring"] - ] - - # to make our RowBinary is not garbage in garbage out we also test a text format response - assert parameterize_query!( - ctx, - "SELECT r, toTypeName(r) FROM geo_ring ORDER BY r ASC FORMAT JSONCompact" - ).rows - |> Jason.decode!() - |> Map.fetch!("data") == [ - [[[0, 0], [10, 0], [10, 10], [0, 10]], "Ring"], - [[[20, 20], [0, 0], [0, 20]], "Ring"] - ] - end - - test "can decode casted Polygon", ctx do - polygon = [[{0.0, 1.0}, {10.0, 3.0}], [], [{2, 2}]] - - assert parameterize_query!(ctx, "select cast([[(0,1),(10,3)],[],[(2,2)]] as Polygon)").rows == - [ - _row = [polygon] - ] - end - - test "can encode and then decode Polygon in query params", ctx do - polygon = [[{0.0, 1.0}, {10.0, 3.0}], [], [{2, 2}]] - assert parameterize_query!(ctx, "select {$0:Polygon}", [polygon]).rows == [_row = [polygon]] - end - - test "can insert and select Polygon", ctx do - parameterize_query!(ctx, "CREATE TABLE geo_polygon (pg Polygon) ENGINE = Memory()") - on_exit(fn -> Ch.Test.query("DROP TABLE geo_polygon") end) - - parameterize_query!( - ctx, - "INSERT INTO geo_polygon VALUES([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]])" - ) - - polygon = [[{0, 1.0}, {10, 3.2}], [], [{2, 2}]] - - parameterize_query!(ctx, "INSERT INTO geo_polygon FORMAT RowBinary", [[polygon]], - types: ["Polygon"] - ) - - assert parameterize_query!( - ctx, - "SELECT pg, toTypeName(pg) FROM geo_polygon ORDER BY pg ASC" - ).rows == - [ - [[[{0.0, 1.0}, {10.0, 3.2}], [], [{2.0, 2.0}]], "Polygon"], - [ - [ - [{20.0, 20.0}, {50.0, 20.0}, {50.0, 50.0}, {20.0, 50.0}], - [{30.0, 30.0}, {50.0, 50.0}, {50.0, 30.0}] - ], - "Polygon" - ] - ] - - # to make our RowBinary is not garbage in garbage out we also test a text format response - assert parameterize_query!( - ctx, - "SELECT pg, toTypeName(pg) FROM geo_polygon ORDER BY pg ASC FORMAT JSONCompact" - ).rows - |> Jason.decode!() - |> Map.fetch!("data") == [ - [[[[0, 1], [10, 3.2]], [], [[2, 2]]], "Polygon"], - [ - [[[20, 20], [50, 20], [50, 50], [20, 50]], [[30, 30], [50, 50], [50, 30]]], - "Polygon" - ] - ] - end - - test "can decode casted MultiPolygon", ctx do - multipolygon = [[[{0.0, 1.0}, {10.0, 3.0}], [], [{2, 2}]], [], [[{3, 3}]]] - - assert parameterize_query!( - ctx, - "select cast([[[(0,1),(10,3)],[],[(2,2)]],[],[[(3, 3)]]] as MultiPolygon)" - ).rows == [ - _row = [multipolygon] - ] - end - - test "can encode and then decode MultiPolygon in query params", ctx do - multipolygon = [[[{0.0, 1.0}, {10.0, 3.0}], [], [{2, 2}]], [], [[{3, 3}]]] - - assert parameterize_query!(ctx, "select {$0:MultiPolygon}", [multipolygon]).rows == [ - _row = [multipolygon] - ] - end - - test "can insert and select MultiPolygon", ctx do - parameterize_query!( - ctx, - "CREATE TABLE geo_multipolygon (mpg MultiPolygon) ENGINE = Memory()" - ) - - on_exit(fn -> Ch.Test.query("DROP TABLE geo_multipolygon") end) - - parameterize_query!( - ctx, - "INSERT INTO geo_multipolygon VALUES([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])" - ) - - multipolygon = [[[{0.0, 1.0}, {10.0, 3.0}], [], [{2, 2}]], [], [[{3, 3}]]] - - parameterize_query!(ctx, "INSERT INTO geo_multipolygon FORMAT RowBinary", [[multipolygon]], - types: ["MultiPolygon"] - ) - - assert parameterize_query!( - ctx, - "SELECT mpg, toTypeName(mpg) FROM geo_multipolygon ORDER BY mpg ASC" - ).rows == - [ - _row = [ - _multipolygon = [ - _polygon = [ - _ring = [{0.0, 0.0}, {10.0, 0.0}, {10.0, 10.0}, {0.0, 10.0}] - ], - [ - [{20.0, 20.0}, {50.0, 20.0}, {50.0, 50.0}, {20.0, 50.0}], - [{30.0, 30.0}, {50.0, 50.0}, {50.0, 30.0}] - ] - ], - "MultiPolygon" - ], - [ - [ - [ - [{0.0, 1.0}, {10.0, 3.0}], - [], - [{2.0, 2.0}] - ], - [], - [ - [{3.0, 3.0}] - ] - ], - "MultiPolygon" - ] - ] - - # to make our RowBinary is not garbage in garbage out we also test a text format response - assert parameterize_query!( - ctx, - "SELECT mpg, toTypeName(mpg) FROM geo_multipolygon ORDER BY mpg ASC FORMAT JSONCompact" - ).rows - |> Jason.decode!() - |> Map.fetch!("data") == [ - [ - [ - [[[0, 0], [10, 0], [10, 10], [0, 10]]], - [[[20, 20], [50, 20], [50, 50], [20, 50]], [[30, 30], [50, 50], [50, 30]]] - ], - "MultiPolygon" - ], - [[[[[0, 1], [10, 3]], [], [[2, 2]]], [], [[[3, 3]]]], "MultiPolygon"] - ] - end - end - - describe "options" do - # this test is flaky, sometimes it raises due to ownership timeout - @tag capture_log: true, skip: true - test "can provide custom timeout", ctx do - assert {:error, %Mint.TransportError{reason: :timeout} = error} = - parameterize_query(ctx, "select sleep(1)", _params = [], timeout: 100) - - assert Exception.message(error) == "timeout" - end - - test "errors on invalid creds", ctx do - assert {:error, %Ch.Error{code: 516} = error} = - parameterize_query(ctx, "select 1 + 1", _params = [], - username: "no-exists", - password: "wrong" - ) - - assert Exception.message(error) =~ - "Code: 516. DB::Exception: no-exists: Authentication failed: password is incorrect, or there is no user with such name. (AUTHENTICATION_FAILED)" - end - - test "errors on invalid database", ctx do - assert {:error, %Ch.Error{code: 81} = error} = - parameterize_query(ctx, "select 1 + 1", _params = [], database: "no-db") - - assert Exception.message(error) =~ "`no-db`" - assert Exception.message(error) =~ "UNKNOWN_DATABASE" - end - - test "can provide custom database", ctx do - assert {:ok, %{num_rows: 1, rows: [[2]]}} = - parameterize_query(ctx, "select 1 + 1", [], database: "default") - end - end - - describe "transactions" do - test "commit", ctx do - DBConnection.transaction(ctx.conn, fn conn -> - ctx = Map.put(ctx, :conn, conn) - parameterize_query!(ctx, "select 1 + 1") - end) - end - - test "rollback", ctx do - DBConnection.transaction(ctx.conn, fn conn -> - DBConnection.rollback(conn, :some_reason) - end) - end - - test "status", ctx do - assert DBConnection.status(ctx.conn) == :idle - end - end - - describe "stream" do - test "emits result structs containing raw data", ctx do - results = - DBConnection.run(ctx.conn, fn conn -> - conn - |> Ch.stream( - "select number from system.numbers limit {limit:UInt64}", - %{"limit" => 10_000}, - decode: false - ) - |> Enum.into([]) - end) - - assert length(results) >= 2 - - assert results - |> Enum.map(& &1.data) - |> IO.iodata_to_binary() - |> RowBinary.decode_rows() == Enum.map(0..9999, &[&1]) - end - - test "disconnects on early halt", ctx do - logs = - ExUnit.CaptureLog.capture_log(fn -> - Ch.run(ctx.conn, fn conn -> - conn |> Ch.stream("select number from system.numbers") |> Enum.take(1) - end) - - assert parameterize_query!(ctx, "select 1 + 1").rows == [[2]] - end) - - assert logs =~ - "disconnected: ** (Ch.Error) stopping stream before receiving full response by closing connection" - end - end - - describe "prepare" do - test "no-op", ctx do - query = Ch.Query.build("select 1 + 1") - - assert {:error, %Ch.Error{message: "prepared statements are not supported"}} = - DBConnection.prepare(ctx.conn, query) - end - end - - describe "start_link/1" do - test "can pass options to start_link/1", ctx do - db = "#{Ch.Test.database()}_#{System.unique_integer([:positive])}" - Ch.Test.query("CREATE DATABASE {db:Identifier}", %{"db" => db}) - on_exit(fn -> Ch.Test.query("DROP DATABASE {db:Identifier}", %{"db" => db}) end) - - {:ok, conn} = Ch.start_link(database: db) - ctx = Map.put(ctx, :conn, conn) - parameterize_query!(ctx, "create table example(a UInt8) engine=Memory") - assert {:ok, %{rows: [["example"]]}} = parameterize_query(ctx, "show tables") - end - - test "can start without options", ctx do - {:ok, conn} = Ch.start_link() - ctx = Map.put(ctx, :conn, conn) - assert {:ok, %{num_rows: 1, rows: [[2]]}} = parameterize_query(ctx, "select 1 + 1") - end - end - - describe "RowBinaryWithNamesAndTypes" do - setup ctx do - parameterize_query!(ctx, """ - create table if not exists row_binary_names_and_types_t ( - country_code FixedString(2), - rare_string LowCardinality(String), - maybe_int32 Nullable(Int32) - ) engine Memory - """) - - on_exit(fn -> Ch.Test.query("truncate row_binary_names_and_types_t") end) - end - - test "error on type mismatch", ctx do - stmt = "insert into row_binary_names_and_types_t format RowBinaryWithNamesAndTypes" - rows = [["AB", "rare", -42]] - names = ["country_code", "rare_string", "maybe_int32"] - - opts = [ - names: names, - types: [Ch.Types.fixed_string(2), Ch.Types.string(), Ch.Types.nullable(Ch.Types.u32())] - ] - - assert {:error, %Ch.Error{code: 117, message: message}} = - parameterize_query(ctx, stmt, rows, opts) - - assert message =~ "Type of 'rare_string' must be LowCardinality(String), not String" - - opts = [ - names: names, - types: [ - Ch.Types.fixed_string(2), - Ch.Types.low_cardinality(Ch.Types.string()), - Ch.Types.nullable(Ch.Types.u32()) - ] - ] - - assert {:error, %Ch.Error{code: 117, message: message}} = - parameterize_query(ctx, stmt, rows, opts) - - assert message =~ "Type of 'maybe_int32' must be Nullable(Int32), not Nullable(UInt32)" - end - - test "ok on valid types", ctx do - stmt = "insert into row_binary_names_and_types_t format RowBinaryWithNamesAndTypes" - rows = [["AB", "rare", -42]] - names = ["country_code", "rare_string", "maybe_int32"] - - opts = [ - names: names, - types: [ - Ch.Types.fixed_string(2), - Ch.Types.low_cardinality(Ch.Types.string()), - Ch.Types.nullable(Ch.Types.i32()) - ] - ] - - parameterize_query(ctx, stmt, rows, opts) - - assert parameterize_query!(ctx, "select * from row_binary_names_and_types_t").rows == [ - ["AB", "rare", -42] - ] - end - - test "select with lots of columns", ctx do - select = Enum.map_join(1..1000, ", ", fn i -> "#{i} as col_#{i}" end) - stmt = "select #{select} format RowBinaryWithNamesAndTypes" - - assert %Ch.Result{columns: columns, rows: [row]} = parameterize_query!(ctx, stmt) - - assert length(columns) == 1000 - assert List.first(columns) == "col_1" - assert List.last(columns) == "col_1000" - - assert length(row) == 1000 - assert List.first(row) == 1 - assert List.last(row) == 1000 - end - end -end diff --git a/test/ch/dynamic_test.exs b/test/ch/dynamic_test.exs deleted file mode 100644 index 7a916d69..00000000 --- a/test/ch/dynamic_test.exs +++ /dev/null @@ -1,439 +0,0 @@ -defmodule Ch.DynamicTest do - use ExUnit.Case, parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - import Ch.Test, only: [parameterize_query!: 2, parameterize_query!: 3, parameterize_query!: 4] - - @moduletag :dynamic - - setup do - {:ok, conn: start_supervised!({Ch, database: Ch.Test.database()})} - end - - test "it works", ctx do - select = fn literal -> - [row] = parameterize_query!(ctx, "select #{literal}::Dynamic as d, dynamicType(d)").rows - row - end - - parameterize_query!(ctx, "CREATE TABLE test (d Dynamic, id String) ENGINE = Memory;") - on_exit(fn -> Ch.Test.query("DROP TABLE test") end) - - insert = fn value -> - id = inspect(value) - - parameterize_query!(ctx, "insert into test(d, id) format RowBinary", [[value, id]], - types: ["Dynamic", "String"] - ).rows - - [[inserted]] = - parameterize_query!(ctx, "select d from test where id = {id:String}", %{"id" => id}).rows - - inserted - end - - # https://clickhouse.com/docs/sql-reference/data-types/data-types-binary-encoding - - # Nothing 0x00 - assert select.("[]::Array(Nothing)") == [[], "Array(Nothing)"] - # assert insert.([]) == [] - - # UInt8 0x01 - assert select.("0::UInt8") == [0, "UInt8"] - assert select.("255::UInt8") == [255, "UInt8"] - - # UInt16 0x02 - assert select.("12::UInt16") == [12, "UInt16"] - - # UInt32 0x03 - assert select.("123::UInt32") == [123, "UInt32"] - - # UInt64 0x04 - assert select.("1234::UInt64") == [1234, "UInt64"] - - assert insert.(0) == 0 - assert insert.(255) == 255 - - # UInt128 0x05 - assert select.("12345::UInt128") == [12345, "UInt128"] - - # UInt256 0x06 - assert select.("123456::UInt256") == [123_456, "UInt256"] - - # Int8 0x07 - assert select.("0::Int8") == [0, "Int8"] - assert select.("-23::Int8") == [-23, "Int8"] - - # Int16 0x08 - assert select.("-12::Int16") == [-12, "Int16"] - - # Int32 0x09 - assert select.("123::Int32") == [123, "Int32"] - - # Int64 0x0A - assert select.("-1234::Int64") == [-1234, "Int64"] - - assert insert.(-1234) == -1234 - - # Int128 0x0B - assert select.("12345::Int128") == [12345, "Int128"] - - # Int256 0x0C - assert select.("-123456::Int256") == [-123_456, "Int256"] - - # Float32 0x0D - assert select.("3.14::Float32") == [3.140000104904175, "Float32"] - - # Float64 0x0E - assert select.("-3.14159::Float64") == [-3.14159, "Float64"] - - assert insert.(-3.14159) == -3.14159 - - # Date 0x0F - assert select.("'2020-01-01'::Date") == [~D[2020-01-01], "Date"] - - assert insert.(~D[2020-01-01]) == ~D[2020-01-01] - - # Date32 0x10 - assert select.("'2020-01-01'::Date32") == [~D[2020-01-01], "Date32"] - - # DateTime 0x11 - assert select.("'2020-01-01 12:34:56'::DateTime") == [ - Ch.Test.to_clickhouse_naive(ctx.conn, ~N[2020-01-01 12:34:56]), - "DateTime" - ] - - assert insert.(~N[2020-01-01 12:34:56]) == ~N[2020-01-01 12:34:56] - - # DateTime(time_zone) 0x12 - assert [dt, "DateTime('Europe/Prague')"] = - select.("'2020-01-01 12:34:56'::DateTime('Europe/Prague')") - - assert inspect(dt) == "#DateTime<2020-01-01 12:34:56+01:00 CET Europe/Prague>" - - # DateTime64(P) 0x13 - assert select.("'2020-01-01 12:34:56.123456'::DateTime64(6)") == - [ - Ch.Test.to_clickhouse_naive(ctx.conn, ~N[2020-01-01 12:34:56.123456]), - "DateTime64(6)" - ] - - # DateTime64(P, time_zone) 0x14 - assert [dt64, "DateTime64(6, 'Europe/Prague')"] = - select.("'2020-01-01 12:34:56.123456'::DateTime64(6, 'Europe/Prague')") - - assert inspect(dt64) == "#DateTime<2020-01-01 12:34:56.123456+01:00 CET Europe/Prague>" - - # String 0x15 - assert select.("'Hello, World!'") == ["Hello, World!", "String"] - assert select.("0") == ["0", "String"] - - assert insert.("Hello, World!") == "Hello, World!" - - # FixedString(N) 0x16 - assert select.("'Hello'::FixedString(5)") == ["Hello", "FixedString(5)"] - assert select.("'Hell'::FixedString(5)") == ["Hell\0", "FixedString(5)"] - - # TODO - # Enum8 0x17... - assert_raise ArgumentError, "unsupported dynamic type Enum8", fn -> - select.("'a'::Enum8('a' = 1, 'b' = 2, 'c' = 3)") - end - - # TODO - # Enum16 0x18...> - assert_raise ArgumentError, "unsupported dynamic type Enum16", fn -> - select.("'a'::Enum16('a' = 1, 'b' = 2, 'c' = 3)") - end - - # Decimal32(P, S) 0x19 - assert select.("42.42::Decimal32(2)") == [Decimal.new("42.42"), "Decimal(9, 2)"] - - # Decimal64(P, S) 0x1A - assert select.("-42.42::Decimal64(2)") == [Decimal.new("-42.42"), "Decimal(18, 2)"] - - # Decimal128(P, S) 0x1B - assert select.("1234567890.123456789::Decimal128(9)") == - [Decimal.new("1234567890.123456789"), "Decimal(38, 9)"] - - # Decimal256(P, S) 0x1C - assert select.("-1234567890.123456789::Decimal256(9)") == - [Decimal.new("-1234567890.123456789"), "Decimal(76, 9)"] - - # UUID 0x1D - assert select.("'550e8400-e29b-41d4-a716-446655440000'::UUID") == - [Ecto.UUID.dump!("550e8400-e29b-41d4-a716-446655440000"), "UUID"] - - # Array(T) 0x1E - assert select.("[1, 2, 3]::Array(UInt8)") == [[1, 2, 3], "Array(UInt8)"] - assert select.("[1, 2, 3]::Array(Int64)") == [[1, 2, 3], "Array(Int64)"] - - assert select.("['hello', 'world', '!']::Array(String)") == [ - ["hello", "world", "!"], - "Array(String)" - ] - - assert select.("['hello', 'world', '!']::Array(LowCardinality(String))") == [ - ["hello", "world", "!"], - "Array(LowCardinality(String))" - ] - - assert select.("['hello', 'world', null, '!']::Array(Nullable(String))") == [ - ["hello", "world", nil, "!"], - "Array(Nullable(String))" - ] - - assert select.("[]::Array(Nothing)") == [[], "Array(Nothing)"] - - assert select.("[[1,2,3], [1,2], [3]]::Array(Array(UInt8))") == [ - [[1, 2, 3], [1, 2], [3]], - "Array(Array(UInt8))" - ] - - assert select.("[[[1],[],[2],[3,4,5]], [[1,2],[]], [[3]]]::Array(Array(Array(UInt8)))") == [ - [[[1], [], [2], [3, 4, 5]], [[1, 2], []], [[3]]], - "Array(Array(Array(UInt8)))" - ] - - assert select.("['2020-01-01', '2023-01-01']::Array(Date)") == [ - [~D[2020-01-01], ~D[2023-01-01]], - "Array(Date)" - ] - - # TODO - # Tuple(T1, ..., TN) 0x1F... - assert_raise ArgumentError, "unsupported dynamic type Tuple", fn -> - select.("('a', 'b', 'c')::Tuple(String, String, String)") - end - - # TODO - # Tuple(name1 T1, ..., nameN TN) 0x20... - assert_raise ArgumentError, "unsupported dynamic type TupleWithNames", fn -> - select.("('a' = 'b', 'c' = 'd')::Tuple(a String, c String)") - end - - # TODO - # Set 0x21 - - # TODO - # Interval 0x22 (see interval kind binary encoding) - - # Nullable(T) 0x23 - assert select.("'Hello, World!'::Nullable(String)") == ["Hello, World!", "String"] - assert select.("null::Nullable(String)") == [nil, "None"] - - # TODO - # Function 0x24... - - # TODO - # AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) 0x25...... (see aggregate function parameter binary encoding) - - # LowCardinality(T) 0x26 - assert select.("'Hello, World!'::LowCardinality(String)") == [ - "Hello, World!", - "LowCardinality(String)" - ] - - # TODO - # Map(K, V) 0x27 - assert_raise ArgumentError, "unsupported dynamic type Map", fn -> - select.("map('key1', 'value1', 'key2', 'value2')::Map(String, String)") - end - - # IPv4 0x28 - assert select.("'1.1.1.1'::IPv4") == [{1, 1, 1, 1}, "IPv4"] - - # IPv6 0x29 - assert select.("'::1'::IPv6") == [{0, 0, 0, 0, 0, 0, 0, 1}, "IPv6"] - - # TODO - # Variant(T1, ..., TN) 0x2A... - assert_raise ArgumentError, "unsupported dynamic type Variant", fn -> - select.("['a', 1]::Array(Variant(String, UInt8))") - end - - # TODO - # Dynamic(max_types=N) 0x2B - - # TODO - # Custom type (Ring, Polygon, etc) 0x2C - assert_raise ArgumentError, "unsupported dynamic type CustomType", fn -> - select.("(0, 1)::Point") - end - - # Bool 0x2D - assert select.("true") == [true, "Bool"] - assert select.("false") == [false, "Bool"] - - # TODO - # SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) 0x2E...... (see aggregate function parameter binary encoding) - # Nested(name1 T1, ..., nameN TN) 0x2F... - # JSON(max_dynamic_paths=N, max_dynamic_types=M, path Type, SKIP skip_path, SKIP REGEXP skip_path_regexp) 0x30......... - end - - # https://clickhouse.com/docs/sql-reference/data-types/dynamic#creating-dynamic - test "creating dynamic", ctx do - # Using Dynamic type in table column definition: - parameterize_query!(ctx, "CREATE TABLE test (d Dynamic) ENGINE = Memory;") - on_exit(fn -> Ch.Test.query("DROP TABLE test") end) - - parameterize_query!( - ctx, - "INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);" - ) - - assert parameterize_query!(ctx, "SELECT d, dynamicType(d) FROM test;").rows == [ - [nil, "None"], - [42, "Int64"], - ["Hello, World!", "String"], - [[1, 2, 3], "Array(Int64)"] - ] - - # Using CAST from ordinary column: - assert parameterize_query!(ctx, "SELECT 'Hello, World!'::Dynamic AS d, dynamicType(d);").rows == - [ - ["Hello, World!", "String"] - ] - - # Using CAST from Variant column: - assert parameterize_query!( - ctx, - "SELECT multiIf((number % 3) = 0, number, (number % 3) = 1, range(number + 1), NULL)::Dynamic AS d, dynamicType(d) FROM numbers(3)", - [], - settings: [ - enable_variant_type: 1, - use_variant_as_common_type: 1 - ] - ).rows == [ - [0, "UInt64"], - [[0, 1], "Array(UInt64)"], - [nil, "None"] - ] - end - - # https://clickhouse.com/docs/sql-reference/data-types/dynamic#reading-dynamic-nested-types-as-subcolumns - test "reading dynamic nested types as subcolumns", ctx do - parameterize_query!(ctx, "CREATE TABLE test (d Dynamic) ENGINE = Memory;") - on_exit(fn -> Ch.Test.query("DROP TABLE test") end) - - parameterize_query!( - ctx, - "INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);" - ) - - assert parameterize_query!( - ctx, - "SELECT d, dynamicType(d), d.String, d.Int64, d.`Array(Int64)`, d.Date, d.`Array(String)` FROM test;" - ).rows == [ - [nil, "None", nil, nil, [], nil, []], - [42, "Int64", nil, 42, [], nil, []], - ["Hello, World!", "String", "Hello, World!", nil, [], nil, []], - [[1, 2, 3], "Array(Int64)", nil, nil, [1, 2, 3], nil, []] - ] - - assert parameterize_query!( - ctx, - "SELECT toTypeName(d.String), toTypeName(d.Int64), toTypeName(d.`Array(Int64)`), toTypeName(d.Date), toTypeName(d.`Array(String)`) FROM test LIMIT 1;" - ).rows == [ - [ - "Nullable(String)", - "Nullable(Int64)", - "Array(Int64)", - "Nullable(Date)", - "Array(String)" - ] - ] - - assert parameterize_query!( - ctx, - "SELECT d, dynamicType(d), dynamicElement(d, 'String'), dynamicElement(d, 'Int64'), dynamicElement(d, 'Array(Int64)'), dynamicElement(d, 'Date'), dynamicElement(d, 'Array(String)') FROM test;" - ).rows == [ - [nil, "None", nil, nil, [], nil, []], - [42, "Int64", nil, 42, [], nil, []], - ["Hello, World!", "String", "Hello, World!", nil, [], nil, []], - [[1, 2, 3], "Array(Int64)", nil, nil, [1, 2, 3], nil, []] - ] - end - - # https://clickhouse.com/docs/sql-reference/data-types/dynamic#converting-a-string-column-to-a-dynamic-column-through-parsing - test "converting a string column to a dynamic column through parsing", ctx do - assert parameterize_query!( - ctx, - "SELECT CAST(materialize(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01')), 'Map(String, Dynamic)') as map_of_dynamic, mapApply((k, v) -> (k, dynamicType(v)), map_of_dynamic) as map_of_dynamic_types;", - [], - settings: [cast_string_to_dynamic_use_inference: 1] - ).rows == [ - [ - %{"key1" => 42, "key2" => true, "key3" => ~D[2020-01-01]}, - %{"key1" => "Int64", "key2" => "Bool", "key3" => "Date"} - ] - ] - end - - # https://clickhouse.com/docs/sql-reference/data-types/dynamic#converting-a-dynamic-column-to-an-ordinary-column - test "converting a dynamic column to an ordinary column", ctx do - parameterize_query!(ctx, "CREATE TABLE test (d Dynamic) ENGINE = Memory;") - on_exit(fn -> Ch.Test.query("DROP TABLE test") end) - parameterize_query!(ctx, "INSERT INTO test VALUES (NULL), (42), ('42.42'), (true), ('e10');") - - assert parameterize_query!(ctx, "SELECT d::Nullable(Float64) FROM test;").rows == [ - [nil], - [42.0], - [42.42], - [1.0], - [0.0] - ] - end - - # https://clickhouse.com/docs/sql-reference/data-types/dynamic#converting-a-variant-column-to-dynamic-column - test "converting a variant column to dynamic column", ctx do - parameterize_query!( - ctx, - "CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;" - ) - - on_exit(fn -> Ch.Test.query("DROP TABLE test") end) - parameterize_query!(ctx, "INSERT INTO test VALUES (NULL), (42), ('String'), ([1, 2, 3]);") - - assert parameterize_query!(ctx, "SELECT v::Dynamic AS d, dynamicType(d) FROM test;").rows == [ - [nil, "None"], - [42, "UInt64"], - ["String", "String"], - [[1, 2, 3], "Array(UInt64)"] - ] - end - - # https://clickhouse.com/docs/sql-reference/data-types/dynamic#converting-a-dynamicmax_typesn-column-to-another-dynamicmax_typesk - test "converting a Dynamic(max_types=N) column to another Dynamic(max_types=K)", ctx do - parameterize_query!(ctx, "CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;") - on_exit(fn -> Ch.Test.query("DROP TABLE test") end) - - parameterize_query!( - ctx, - "INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);" - ) - - assert parameterize_query!( - ctx, - "SELECT d::Dynamic(max_types=5) as d2, dynamicType(d2) FROM test;" - ).rows == - [ - [nil, "None"], - [42, "Int64"], - [43, "Int64"], - ["42.42", "String"], - [true, "Bool"], - [[1, 2, 3], "Array(Int64)"] - ] - - assert parameterize_query!( - ctx, - "SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test;" - ).rows == [ - [nil, "None", nil, "None", false], - [42, "Int64", 42, "Int64", false], - [43, "Int64", 43, "Int64", false], - ["42.42", "String", "42.42", "String", false], - [true, "Bool", true, "Bool", true], - [[1, 2, 3], "Array(Int64)", [1, 2, 3], "Array(Int64)", true] - ] - end -end diff --git a/test/ch/faults_test.exs b/test/ch/faults_test.exs deleted file mode 100644 index bcc7457a..00000000 --- a/test/ch/faults_test.exs +++ /dev/null @@ -1,551 +0,0 @@ -defmodule Ch.FaultsTest do - alias Ch.Result - use ExUnit.Case, parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - import Ch.Test, only: [intercept_packets: 1] - - defp capture_async_log(f) do - ExUnit.CaptureLog.capture_log([async: true], f) - end - - @socket_opts [:binary, {:active, true}, {:packet, :raw}] - - setup do - # this setup makes the test act as MITM for clickhouse and ch's http conn (mint) - # allowing the test to intercept, slow down, and modify packets to cause failures - {:ok, clickhouse} = :gen_tcp.connect({127, 0, 0, 1}, 8123, @socket_opts) - {:ok, listen} = :gen_tcp.listen(0, @socket_opts) - {:ok, port} = :inet.port(listen) - {:ok, clickhouse: clickhouse, listen: listen, port: port} - end - - setup ctx do - {:ok, query_options: ctx[:query_options] || []} - end - - describe "connect/1" do - test "reconnects to eventually reachable server", ctx do - %{listen: listen, port: port, clickhouse: clickhouse, query_options: query_options} = ctx - - # make the server unreachable - :ok = :gen_tcp.close(listen) - test = self() - - {:ok, conn} = Ch.start_link(port: port, queue_interval: 100, backoff_min: 0) - - log = - capture_async_log(fn -> - assert {:error, %DBConnection.ConnectionError{reason: :queue_timeout}} = - Ch.query(conn, "select 1 + 1", [], query_options) - - # make the server reachable - {:ok, listen} = :gen_tcp.listen(port, @socket_opts) - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - spawn_link(fn -> - assert {:ok, %{num_rows: 1, rows: [[2]]}} = - Ch.query(conn, "select 1 + 1", [], query_options) - - send(test, :done) - end) - - # select 1 + 1 - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - assert_receive :done - refute_receive _anything - end) - - assert log =~ "failed to connect: ** (Mint.TransportError) connection refused" - end - end - - describe "connect/1 handshake" do - test "reconnects after timeout", %{port: port, listen: listen, clickhouse: clickhouse} do - log = - capture_async_log(fn -> - Ch.start_link(port: port, timeout: 100, backoff_min: 0) - - # connect - {:ok, mint} = :gen_tcp.accept(listen) - - # failed handshake - handshake = intercept_packets(mint) - assert handshake =~ "select 1, version()" - :ok = :gen_tcp.send(clickhouse, handshake) - :ok = :gen_tcp.send(mint, first_byte(intercept_packets(clickhouse))) - - # reconnect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - handshake = intercept_packets(mint) - assert handshake =~ "select 1, version()" - :ok = :gen_tcp.send(clickhouse, handshake) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - end) - - assert log =~ "failed to connect: ** (Mint.TransportError) timeout" - end - - test "reconnects after closed", %{port: port, listen: listen, clickhouse: clickhouse} do - log = - capture_async_log(fn -> - Ch.start_link(port: port, backoff_min: 0) - - # connect - {:ok, mint} = :gen_tcp.accept(listen) - - # failed handshake - handshake = intercept_packets(mint) - assert handshake =~ "select 1, version()" - :ok = :gen_tcp.send(clickhouse, handshake) - :ok = :gen_tcp.send(mint, first_byte(intercept_packets(clickhouse))) - :gen_tcp.close(mint) - - # reconnect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - handshake = intercept_packets(mint) - assert handshake =~ "select 1, version()" - :ok = :gen_tcp.send(clickhouse, handshake) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - end) - - assert log =~ "failed to connect: ** (Mint.TransportError) socket closed" - end - - test "reconnects after unexpected status code", ctx do - %{port: port, listen: listen, clickhouse: clickhouse} = ctx - - log = - capture_async_log(fn -> - Ch.start_link(port: port, backoff_min: 0) - - # connect - {:ok, mint1} = :gen_tcp.accept(listen) - - # failed handshake - handshake = intercept_packets(mint1) - assert handshake =~ "select 1, version()" - altered_handshake = String.replace(handshake, "select 1", "select x") - :ok = :gen_tcp.send(clickhouse, altered_handshake) - :ok = :gen_tcp.send(mint1, intercept_packets(clickhouse)) - - # reconnect - {:ok, mint2} = :gen_tcp.accept(listen) - - # handshake - handshake = intercept_packets(mint2) - assert handshake =~ "select 1, version()" - :ok = :gen_tcp.send(clickhouse, handshake) - :ok = :gen_tcp.send(mint2, intercept_packets(clickhouse)) - - # no socket leak - refute Port.info(mint1) - assert Port.info(mint2) - end) - - assert log =~ "UNKNOWN_IDENTIFIER" - end - - test "reconnects after incorrect query result", ctx do - %{port: port, listen: listen, clickhouse: clickhouse} = ctx - - log = - capture_async_log(fn -> - Ch.start_link(port: port, backoff_min: 0) - - # connect - {:ok, mint1} = :gen_tcp.accept(listen) - - # failed handshake - handshake = intercept_packets(mint1) - assert handshake =~ "select 1, version()" - - altered_handshake = - String.replace(handshake, "select 1, version()", "select 2, version()") - - :ok = :gen_tcp.send(clickhouse, altered_handshake) - :ok = :gen_tcp.send(mint1, intercept_packets(clickhouse)) - - # reconnect - {:ok, mint2} = :gen_tcp.accept(listen) - - # handshake - handshake = intercept_packets(mint2) - assert handshake =~ "select 1, version()" - :ok = :gen_tcp.send(clickhouse, handshake) - :ok = :gen_tcp.send(mint2, intercept_packets(clickhouse)) - - # no socket leak - refute Port.info(mint1) - assert Port.info(mint2) - end) - - assert log =~ "failed to connect: ** (Ch.Error) unexpected result for 'select 1, version()'" - end - end - - describe "ping/1" do - test "reconnects after timeout", %{port: port, listen: listen, clickhouse: clickhouse} do - log = - capture_async_log(fn -> - Ch.start_link(port: port, timeout: 100, idle_interval: 20) - - # connect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - # failed ping - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, first_byte(intercept_packets(clickhouse))) - - # reconnect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - # ping - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - end) - - assert log =~ "disconnected: ** (Mint.TransportError) timeout" - end - - test "reconnects after close", %{port: port, listen: listen, clickhouse: clickhouse} do - log = - capture_async_log(fn -> - Ch.start_link(port: port, idle_interval: 40) - - # connect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - # failed ping - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, first_byte(intercept_packets(clickhouse))) - :ok = :gen_tcp.close(mint) - - # reconnect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - # ping - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - end) - - assert log =~ "disconnected: ** (Mint.TransportError) socket closed" - end - end - - describe "query" do - test "reconnects after timeout", %{ - port: port, - listen: listen, - clickhouse: clickhouse, - query_options: query_options - } do - log = - capture_async_log(fn -> - {:ok, conn} = Ch.start_link(port: port, timeout: 100) - - # connect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - select = - Task.async(fn -> - Ch.query(conn, "select 1 + 1", [], query_options) - end) - - # failed select 1 + 1 - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, first_byte(intercept_packets(clickhouse))) - - # reconnect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - # select 1 + 1 - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - assert {:ok, %Ch.Result{rows: [[2]]}} = Task.await(select) - end) - - assert log =~ "disconnected: ** (Mint.TransportError) timeout" - end - - test "reconnects after closed on response", ctx do - %{port: port, listen: listen, clickhouse: clickhouse, query_options: query_options} = ctx - - log = - capture_async_log(fn -> - {:ok, conn} = Ch.start_link(port: port) - - # connect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - select = - Task.async(fn -> - Ch.query(conn, "select 1 + 1", [], query_options) - end) - - # failed select 1 + 1 - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, first_byte(intercept_packets(clickhouse))) - :ok = :gen_tcp.close(mint) - - # reconnect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - # select 1 + 1 - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - assert {:ok, %{rows: [[2]]}} = Task.await(select) - end) - - assert log =~ "disconnected: ** (Mint.TransportError) socket closed" - end - - test "reconnects after Connection: close response from server", ctx do - %{port: port, listen: listen, clickhouse: clickhouse, query_options: query_options} = ctx - - log = - capture_async_log(fn -> - {:ok, conn} = Ch.start_link(port: port) - - # connect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - select = - Task.async(fn -> - Ch.query(conn, "select 1 + 1", [], query_options) - end) - - # first select 1 + 1 - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - - response = - String.replace( - intercept_packets(clickhouse), - "Connection: Keep-Alive", - "Connection: Close" - ) - - assert response =~ "Connection: Close" - - :ok = :gen_tcp.send(mint, response) - :ok = :gen_tcp.close(mint) - - assert {:ok, %Ch.Result{rows: [[2]]}} = Task.await(select) - - # reconnect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - select = - Task.async(fn -> - Ch.query(conn, "select 2 + 2", [], query_options) - end) - - # select 2 + 2 - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - assert {:ok, %Ch.Result{rows: [[4]]}} = Task.await(select) - end) - - assert log =~ "disconnected: ** (Mint.HTTPError) the connection is closed" - end - - # TODO non-chunked request - - test "reconnects after closed before streaming request", ctx do - %{port: port, listen: listen, clickhouse: clickhouse, query_options: query_options} = ctx - - rows = [[1, 2], [3, 4]] - stream = Stream.map(rows, fn row -> Ch.RowBinary.encode_row(row, [:u8, :u8]) end) - - log = - capture_async_log(fn -> - {:ok, conn} = Ch.start_link(database: Ch.Test.database(), port: port) - - # connect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - # disconnect before insert - :ok = :gen_tcp.close(mint) - - insert = - Task.async(fn -> - Ch.query( - conn, - "insert into unknown_table(a,b) format RowBinary", - stream, - Keyword.merge(query_options, encode: false) - ) - end) - - # reconnect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - # insert - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - assert {:error, %Ch.Error{code: 60, message: message}} = Task.await(insert) - assert message =~ ~r/UNKNOWN_TABLE/ - end) - - assert log =~ "disconnected: ** (Mint.TransportError) socket closed" - end - - test "reconnects after closed while streaming request", ctx do - %{port: port, listen: listen, clickhouse: clickhouse, query_options: query_options} = ctx - - rows = [[1, 2], [3, 4]] - stream = Stream.map(rows, fn row -> Ch.RowBinary.encode_row(row, [:u8, :u8]) end) - - log = - capture_async_log(fn -> - {:ok, conn} = Ch.start_link(database: Ch.Test.database(), port: port) - - # connect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - insert = - Task.async(fn -> - Ch.query( - conn, - "insert into unknown_table(a,b) format RowBinary", - stream, - Keyword.merge(query_options, encode: false) - ) - end) - - # close after first packet from mint arrives - assert_receive {:tcp, ^mint, _packet} - :ok = :gen_tcp.close(mint) - - # reconnect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - # insert - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - assert {:error, %Ch.Error{code: 60, message: message}} = Task.await(insert) - assert message =~ ~r/UNKNOWN_TABLE/ - end) - - assert log =~ "disconnected: ** (Mint.TransportError) socket closed" - end - - test "warns on different server name", ctx do - %{port: port, listen: listen, clickhouse: clickhouse, query_options: query_options} = ctx - test = self() - - header = "X-ClickHouse-Server-Display-Name" - %Result{headers: headers} = Ch.Test.query("select 1") - {_, expected_name} = List.keyfind!(headers, String.downcase(header), 0) - - log = - capture_async_log(fn -> - {:ok, conn} = Ch.start_link(port: port) - - # connect - {:ok, mint} = :gen_tcp.accept(listen) - - # handshake - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - :ok = :gen_tcp.send(mint, intercept_packets(clickhouse)) - - spawn_link(fn -> - assert {:ok, %Result{rows: [[1]]}} = Ch.query(conn, "select 1", [], query_options) - send(test, :done) - end) - - # query - :ok = :gen_tcp.send(clickhouse, intercept_packets(mint)) - - response = - String.replace( - intercept_packets(clickhouse), - "#{header}: #{expected_name}", - "#{header}: not-#{expected_name}" - ) - - :ok = :gen_tcp.send(mint, response) - - assert_receive :done - end) - - assert log =~ - "[warning] Server mismatch detected." <> - " Expected \"#{expected_name}\" but got \"not-#{expected_name}\"!" <> - " Connection pooling might be unstable." - end - end - - defp first_byte(binary) do - :binary.part(binary, 0, 1) - end -end diff --git a/test/ch/headers_test.exs b/test/ch/headers_test.exs deleted file mode 100644 index 2d3da43b..00000000 --- a/test/ch/headers_test.exs +++ /dev/null @@ -1,77 +0,0 @@ -defmodule Ch.HeadersTest do - use ExUnit.Case, - async: true, - parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - - setup do - {:ok, conn} = Ch.start_link() - {:ok, conn: conn} - end - - setup ctx do - {:ok, query_options: ctx[:query_options] || []} - end - - test "can request gzipped response through headers", %{conn: conn, query_options: query_options} do - assert {:ok, %{rows: data, data: data, headers: headers}} = - Ch.query( - conn, - "select number from system.numbers limit 100", - [], - Keyword.merge(query_options, - decode: false, - settings: [enable_http_compression: 1], - headers: [{"accept-encoding", "gzip"}] - ) - ) - - assert :proplists.get_value("content-type", headers) == "application/octet-stream" - assert :proplists.get_value("content-encoding", headers) == "gzip" - assert :proplists.get_value("x-clickhouse-format", headers) == "RowBinaryWithNamesAndTypes" - - # https://en.wikipedia.org/wiki/Gzip - assert <<0x1F, 0x8B, _rest::bytes>> = IO.iodata_to_binary(data) - end - - test "can request lz4 response through headers", %{conn: conn, query_options: query_options} do - assert {:ok, %{rows: data, data: data, headers: headers}} = - Ch.query( - conn, - "select number from system.numbers limit 100", - [], - Keyword.merge(query_options, - decode: false, - settings: [enable_http_compression: 1], - headers: [{"accept-encoding", "lz4"}] - ) - ) - - assert :proplists.get_value("content-type", headers) == "application/octet-stream" - assert :proplists.get_value("content-encoding", headers) == "lz4" - assert :proplists.get_value("x-clickhouse-format", headers) == "RowBinaryWithNamesAndTypes" - - # https://en.wikipedia.org/wiki/LZ4_(compression_algorithm) - assert <<0x04, 0x22, 0x4D, 0x18, _rest::bytes>> = IO.iodata_to_binary(data) - end - - test "can request zstd response through headers", %{conn: conn, query_options: query_options} do - assert {:ok, %{rows: data, data: data, headers: headers}} = - Ch.query( - conn, - "select number from system.numbers limit 100", - [], - Keyword.merge(query_options, - decode: false, - settings: [enable_http_compression: 1], - headers: [{"accept-encoding", "zstd"}] - ) - ) - - assert :proplists.get_value("content-type", headers) == "application/octet-stream" - assert :proplists.get_value("content-encoding", headers) == "zstd" - assert :proplists.get_value("x-clickhouse-format", headers) == "RowBinaryWithNamesAndTypes" - - # https://en.wikipedia.org/wiki/LZ4_(compression_algorithm) - assert <<0x28, 0xB5, 0x2F, 0xFD, _rest::bytes>> = IO.iodata_to_binary(data) - end -end diff --git a/test/ch/http_test.exs b/test/ch/http_test.exs deleted file mode 100644 index c0f802b4..00000000 --- a/test/ch/http_test.exs +++ /dev/null @@ -1,66 +0,0 @@ -defmodule Ch.HTTPTest do - use ExUnit.Case, - async: true, - parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - - @moduletag :slow - - setup ctx do - {:ok, query_options: ctx[:query_options] || []} - end - - describe "user-agent" do - setup do - {:ok, ch: start_supervised!(Ch)} - end - - test "sets user-agent to ch/ by default", %{ch: ch, query_options: query_options} do - %Ch.Result{rows: [[123]], headers: resp_header} = - Ch.query!(ch, "select 123", [], query_options) - - {"x-clickhouse-query-id", query_id} = List.keyfind!(resp_header, "x-clickhouse-query-id", 0) - - assert query_http_user_agent(ch, query_id, query_options) == - "ch/" <> Mix.Project.config()[:version] - end - - test "uses the provided user-agent", %{ch: ch, query_options: query_options} do - req_headers = [{"user-agent", "plausible/0.1.0"}] - - %Ch.Result{rows: [[123]], headers: resp_header} = - Ch.query!( - ch, - "select 123", - _params = [], - Keyword.merge(query_options, headers: req_headers) - ) - - {"x-clickhouse-query-id", query_id} = List.keyfind!(resp_header, "x-clickhouse-query-id", 0) - assert query_http_user_agent(ch, query_id, query_options) == "plausible/0.1.0" - end - end - - defp query_http_user_agent(ch, query_id, query_options) do - retry(fn -> - %Ch.Result{rows: [[user_agent]]} = - Ch.query!( - ch, - "select http_user_agent from system.query_log where query_id = {query_id:String} limit 1", - %{"query_id" => query_id}, - query_options - ) - - user_agent - end) - end - - defp retry(f) do - try do - f.() - catch - _, _ -> - :timer.sleep(100) - retry(f) - end - end -end diff --git a/test/ch/json_test.exs b/test/ch/json_test.exs deleted file mode 100644 index b41a82d0..00000000 --- a/test/ch/json_test.exs +++ /dev/null @@ -1,362 +0,0 @@ -defmodule Ch.JSONTest do - use ExUnit.Case, parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - - @moduletag :json - - setup ctx do - {:ok, query_options: ctx[:query_options] || []} - end - - setup do - on_exit(fn -> Ch.Test.query("DROP TABLE IF EXISTS json_test") end) - {:ok, conn: start_supervised!({Ch, database: Ch.Test.database()})} - end - - test "simple json", %{conn: conn, query_options: query_options} do - select = fn literal -> - [[value]] = Ch.query!(conn, "select '#{literal}'::json", [], query_options).rows - value - end - - assert select.(~s|{"a":"b","c":"d"}|) == %{"a" => "b", "c" => "d"} - - # note that 42 was a string in pre-25.0 and post-25.8 ClickHouse versions - - assert select.(~s|{"a":42}|) == %{"a" => 42} - - assert select.(~s|{}|) == %{} - - # null fields are removed? - assert select.(~s|{"a":null}|) == %{} - - assert select.(~s|{"a":3.14}|) == %{"a" => 3.14} - - assert select.(~s|{"a":true}|) == %{"a" => true} - - assert select.(~s|{"a":false}|) == %{"a" => false} - - assert select.(~s|{"a":{"b":"c"}}|) == %{"a" => %{"b" => "c"}} - - # numbers in arrays become strings - assert select.(~s|{"a":[1,2,3]}|) == %{"a" => [1, 2, 3]} - - # this is weird, fields with dots are treated as nested objects - assert select.(~s|{"a.b":"c"}|) == %{"a" => %{"b" => "c"}} - - assert select.(~s|{"a":[]}|) == %{"a" => []} - - assert select.(~s|{"a":[null]}|) == %{"a" => [nil]} - - # everything in an array gets converted to "lcd" type, aka string - assert select.(~s|{"a":[1,3.14,"hello",null]}|) == %{"a" => [1, 3.14, "hello", nil]} - - # but not if the array has nested objects, then the array becomes a tuple and can support mixed types - assert select.(~s|{"a":[1,2.13,"s",{"a":"b"}]}|) == %{"a" => [1, 2.13, "s", %{"a" => "b"}]} - end - - # https://clickhouse.com/docs/sql-reference/data-types/newjson#using-json-in-a-table-column-definition - test "basic", %{conn: conn, query_options: query_options} do - Ch.query!( - conn, - "CREATE TABLE json_test (json JSON, id UInt8) ENGINE = Memory", - [], - query_options - ) - - Ch.query!( - conn, - """ - INSERT INTO json_test VALUES - ('{"a" : {"b" : 42}, "c" : [1, 2, 3]}', 0), - ('{"f" : "Hello, World!"}', 1), - ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}', 2) - """, - [], - query_options - ) - - assert Ch.query!( - conn, - "SELECT json FROM json_test ORDER BY id", - [], - query_options - ).rows == [ - [%{"a" => %{"b" => 42}, "c" => [1, 2, 3]}], - [%{"f" => "Hello, World!"}], - [%{"a" => %{"b" => 43, "e" => 10}, "c" => [4, 5, 6]}] - ] - - Ch.query!( - conn, - "INSERT INTO json_test(json, id) FORMAT RowBinary", - [[%{"a" => %{"b" => 999}, "some other" => "json value", "from" => "rowbinary"}, 3]], - Keyword.merge(query_options, types: ["JSON", "UInt8"]) - ) - - assert Ch.query!( - conn, - "SELECT json FROM json_test where json.from = 'rowbinary'", - [], - query_options - ).rows == [ - [%{"from" => "rowbinary", "some other" => "json value", "a" => %{"b" => 999}}] - ] - - assert Ch.query!( - conn, - "select json.a.b, json.a.g, json.c, json.d from json_test order by id", - [], - query_options - ).rows == - [ - [42, nil, [1, 2, 3], nil], - [nil, nil, nil, nil], - [43, nil, [4, 5, 6], nil], - [999, nil, nil, nil] - ] - end - - # https://clickhouse.com/docs/sql-reference/data-types/newjson#using-json-in-a-table-column-definition - test "with skip (i.e. extra type options)", %{conn: conn, query_options: query_options} do - Ch.query!( - conn, - "CREATE TABLE json_test (json JSON(a.b UInt32, SKIP a.e)) ENGINE = Memory;", - [], - query_options - ) - - Ch.query!( - conn, - """ - INSERT INTO json_test VALUES - ('{"a" : {"b" : 42}, "c" : [1, 2, 3]}'), - ('{"f" : "Hello, World!"}'), - ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}'); - """, - [], - query_options - ) - - assert Ch.query!( - conn, - "SELECT json FROM json_test", - [], - query_options - ).rows == [ - [%{"a" => %{"b" => 42}, "c" => [1, 2, 3]}], - [%{"a" => %{"b" => 0}, "f" => "Hello, World!"}], - [%{"a" => %{"b" => 43}, "c" => [4, 5, 6]}] - ] - end - - # https://clickhouse.com/docs/sql-reference/data-types/newjson#reading-json-paths-as-sub-columns - test "reading json paths as subcolumns", %{conn: conn, query_options: query_options} do - Ch.query!( - conn, - "CREATE TABLE json_test (json JSON(a.b UInt32, SKIP a.e)) ENGINE = Memory", - [], - query_options - ) - - Ch.query!( - conn, - """ - INSERT INTO json_test VALUES - ('{"a" : {"b" : 42, "g" : 42.42}, "c" : [1, 2, 3], "d" : "2020-01-01"}'), - ('{"f" : "Hello, World!", "d" : "2020-01-02"}'), - ('{"a" : {"b" : 43, "e" : 10, "g" : 43.43}, "c" : [4, 5, 6]}'); - """, - [], - query_options - ) - - assert Ch.query!( - conn, - "SELECT json FROM json_test", - [], - query_options - ).rows == [ - [%{"a" => %{"b" => 42, "g" => 42.42}, "c" => [1, 2, 3], "d" => "2020-01-01"}], - [%{"a" => %{"b" => 0}, "d" => "2020-01-02", "f" => "Hello, World!"}], - [%{"a" => %{"b" => 43, "g" => 43.43}, "c" => [4, 5, 6]}] - ] - - assert Ch.query!( - conn, - "SELECT json.a.b, json.a.g, json.c, json.d FROM json_test", - [], - query_options - ).rows == [ - [42, 42.42, [1, 2, 3], ~D[2020-01-01]], - [0, nil, nil, ~D[2020-01-02]], - [43, 43.43, [4, 5, 6], nil] - ] - - assert Ch.query!(conn, "SELECT json.non.existing.path FROM json_test", [], query_options).rows == - [ - [nil], - [nil], - [nil] - ] - - assert Ch.query!( - conn, - "SELECT toTypeName(json.a.b), toTypeName(json.a.g), toTypeName(json.c), toTypeName(json.d) FROM json_test;", - [], - query_options - ).rows == [ - ["UInt32", "Dynamic", "Dynamic", "Dynamic"], - ["UInt32", "Dynamic", "Dynamic", "Dynamic"], - ["UInt32", "Dynamic", "Dynamic", "Dynamic"] - ] - - assert Ch.query!( - conn, - """ - SELECT - json.a.g.:Float64, - dynamicType(json.a.g), - json.d.:Date, - dynamicType(json.d) - FROM json_test - """, - [], - query_options - ).rows == [ - [42.42, "Float64", ~D[2020-01-01], "Date"], - [nil, "None", ~D[2020-01-02], "Date"], - [43.43, "Float64", nil, "None"] - ] - - assert Ch.query!( - conn, - """ - SELECT json.a.g::UInt64 AS uint - FROM json_test; - """, - [], - query_options - ).rows == [ - [42], - [0], - [43] - ] - - assert_raise Ch.Error, ~r/Conversion between numeric types and UUID is not supported/, fn -> - Ch.query!(conn, "SELECT json.a.g::UUID AS float FROM json_test;", [], query_options) - end - end - - # https://clickhouse.com/docs/sql-reference/data-types/newjson#reading-json-sub-objects-as-sub-columns - test "reading json subobjects as subcolumns", %{conn: conn, query_options: query_options} do - Ch.query!(conn, "CREATE TABLE json_test (json JSON) ENGINE = Memory;", [], query_options) - - Ch.query!( - conn, - """ - INSERT INTO json_test VALUES - ('{"a" : {"b" : {"c" : 42, "g" : 42.42}}, "c" : [1, 2, 3], "d" : {"e" : {"f" : {"g" : "Hello, World", "h" : [1, 2, 3]}}}}'), - ('{"f" : "Hello, World!", "d" : {"e" : {"f" : {"h" : [4, 5, 6]}}}}'), - ('{"a" : {"b" : {"c" : 43, "e" : 10, "g" : 43.43}}, "c" : [4, 5, 6]}'); - """, - [], - query_options - ) - - assert Ch.query!(conn, "SELECT json FROM json_test;", [], query_options).rows == [ - [ - %{ - "a" => %{"b" => %{"c" => 42, "g" => 42.42}}, - "c" => [1, 2, 3], - "d" => %{"e" => %{"f" => %{"g" => "Hello, World", "h" => [1, 2, 3]}}} - } - ], - [%{"d" => %{"e" => %{"f" => %{"h" => [4, 5, 6]}}}, "f" => "Hello, World!"}], - [ - %{ - "a" => %{"b" => %{"c" => 43, "e" => 10, "g" => 43.43}}, - "c" => [4, 5, 6] - } - ] - ] - - assert Ch.query!(conn, "SELECT json.^a.b, json.^d.e.f FROM json_test;", [], query_options).rows == - [ - [%{"c" => 42, "g" => 42.42}, %{"g" => "Hello, World", "h" => [1, 2, 3]}], - [%{}, %{"h" => [4, 5, 6]}], - [%{"c" => 43, "e" => 10, "g" => 43.43}, %{}] - ] - end - - # TODO - # https://clickhouse.com/docs/sql-reference/data-types/newjson#handling-arrays-of-json-objects - test "handling arrays of json objects", %{conn: conn, query_options: query_options} do - Ch.query!(conn, "CREATE TABLE json_test (json JSON) ENGINE = Memory;", [], query_options) - - Ch.query!( - conn, - """ - INSERT INTO json_test VALUES - ('{"a" : {"b" : [{"c" : 42, "d" : "Hello", "f" : [[{"g" : 42.42}]], "k" : {"j" : 1000}}, {"c" : 43}, {"e" : [1, 2, 3], "d" : "My", "f" : [[{"g" : 43.43, "h" : "2020-01-01"}]], "k" : {"j" : 2000}}]}}'), - ('{"a" : {"b" : [1, 2, 3]}}'), - ('{"a" : {"b" : [{"c" : 44, "f" : [[{"h" : "2020-01-02"}]]}, {"e" : [4, 5, 6], "d" : "World", "f" : [[{"g" : 44.44}]], "k" : {"j" : 3000}}]}}'); - """, - [], - query_options - ) - - assert Ch.query!(conn, "SELECT json FROM json_test;", [], query_options).rows == [ - [ - %{ - "a" => %{ - "b" => [ - %{ - "c" => 42, - "d" => "Hello", - "f" => [[%{"g" => 42.42}]], - "k" => %{"j" => 1000} - }, - %{"c" => 43}, - %{ - "d" => "My", - "e" => [1, 2, 3], - "f" => [[%{"g" => 43.43, "h" => "2020-01-01"}]], - "k" => %{"j" => 2000} - } - ] - } - } - ], - [%{"a" => %{"b" => [1, 2, 3]}}], - [ - %{ - "a" => %{ - "b" => [ - %{"c" => 44, "f" => [[%{"h" => "2020-01-02"}]]}, - %{ - "d" => "World", - "e" => [4, 5, 6], - "f" => [[%{"g" => 44.44}]], - "k" => %{"j" => 3000} - } - ] - } - } - ] - ] - - # TODO - assert_raise ArgumentError, "unsupported dynamic type JSON", fn -> - Ch.query!(conn, "SELECT json.a.b, dynamicType(json.a.b) FROM json_test;", [], query_options) - end - - assert_raise ArgumentError, "unsupported dynamic type JSON", fn -> - Ch.query!( - conn, - "SELECT json.a.b.:`Array(JSON)`.c, json.a.b.:`Array(JSON)`.f, json.a.b.:`Array(JSON)`.d FROM json_test;", - [], - query_options - ) - end - end -end diff --git a/test/ch/pool_test.exs b/test/ch/pool_test.exs new file mode 100644 index 00000000..5793cb7c --- /dev/null +++ b/test/ch/pool_test.exs @@ -0,0 +1,79 @@ +defmodule Ch.PoolTest do + use ExUnit.Case, async: true + + setup do + {:ok, pool: start_supervised!({Ch.Pool, scheme: :http, host: "localhost", port: 8123})} + end + + test "select", %{pool: pool} do + assert Ch.Pool.query!(pool, "select 1").rows == [[1]] + + uuid = "9B29BD20-924C-4DE5-BDB3-8C2AA1FCE1FC" + uuid_bin = uuid |> String.replace("-", "") |> Base.decode16!() + + params = [ + {"UInt8", 1}, + {"Bool", true}, + {"Bool", false}, + {"Nullable(Nothing)", nil}, + {"Float32", 1.0}, + {"Float64", 1.0}, + {"String", "a&b=c"}, + {"String", "a\n"}, + {"String", "a\t"}, + {"Array(String)", ["a\tb"]}, + {"Array(Bool)", [true, false]}, + {"Array(Nullable(String))", ["a", nil, "b"]}, + {"Decimal(9,4)", Decimal.new("2000.3330")}, + {"Decimal(9,4)", Decimal.new("2000.333"), Decimal.new("2000.3330")}, + {"Date", ~D[2022-01-01]}, + {"Array(Date)", [~D[2022-01-01], ~D[2022-01-02]]}, + {"Date32", ~D[2022-01-01]}, + {"Array(String)", ["a", "b'", "\\'c"]}, + {"Array(String)", ["a\n", "b\tc"]}, + {"Array(UInt8)", [1, 2, 3]}, + {"Array(Array(UInt8))", [[1], [2, 3], []]}, + {"UUID", uuid, uuid_bin} + ] + + Enum.each(params, fn param -> + {type, value, expected} = + case param do + {type, value} -> {type, value, value} + {_type, _value, _expected} -> param + end + + assert Ch.Pool.query!(pool, "select {a:#{type}}", %{"a" => value}).rows == [[expected]] + end) + end + + test "insert", %{pool: pool} do + settings = [session_id: "test_insert_#{System.unique_integer()}"] + + session_query = fn statement -> + Ch.Pool.query!(pool, statement, %{}, settings: settings) + end + + assert session_query.("create temporary table test_insert(a UInt8, b String) engine Memory") + + assert session_query.("insert into test_insert values (1, 'hello')") + + types = ["UInt8", "String"] + + rowbinary = [ + Ch.RowBinary.encode_names_and_types(["a", "b"], types) + | Ch.RowBinary.encode_rows([[2, "world"], [3, "foo"], [4, "bar"]], types) + ] + + assert session_query.([ + "insert into test_insert format RowBinaryWithNamesAndTypes\n" | rowbinary + ]) + + assert session_query.("select * from test_insert order by a asc").rows == [ + [1, "hello"], + [2, "world"], + [3, "foo"], + [4, "bar"] + ] + end +end diff --git a/test/ch/query_string_test.exs b/test/ch/query_string_test.exs deleted file mode 100644 index 398ab3e5..00000000 --- a/test/ch/query_string_test.exs +++ /dev/null @@ -1,34 +0,0 @@ -defmodule Ch.QueryStringTest do - use ExUnit.Case, - async: true, - parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - - setup ctx do - {:ok, query_options: ctx[:query_options] || []} - end - - setup do - {:ok, conn: start_supervised!(Ch)} - end - - # For more info see - # https://clickhouse.com/docs/en/interfaces/http#tabs-in-url-parameters - # "escaped" format is the same as https://clickhouse.com/docs/en/interfaces/formats#tabseparated-data-formatting - test "binaries are escaped properly", %{conn: conn, query_options: query_options} do - for s <- ["\t", "\n", "\\", "'", "\b", "\f", "\r", "\0"] do - assert Ch.query!(conn, "select {s:String}", %{"s" => s}, query_options).rows == [[s]] - end - - # example from https://clickhouse.com/docs/en/interfaces/http#tabs-in-url-parameters - assert Ch.query!(conn, "select splitByChar('\t', 'abc\t123')", [], query_options).rows == - [[["abc", "123"]]] - - assert Ch.query!( - conn, - "select splitByChar('\t', {arg1:String})", - %{"arg1" => "abc\t123"}, - query_options - ).rows == - [[["abc", "123"]]] - end -end diff --git a/test/ch/query_test.exs b/test/ch/query_test.exs deleted file mode 100644 index 056cb0cf..00000000 --- a/test/ch/query_test.exs +++ /dev/null @@ -1,530 +0,0 @@ -defmodule Ch.QueryTest do - use ExUnit.Case, - async: true, - parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - - alias Ch.Query - - setup ctx do - {:ok, query_options: ctx[:query_options] || []} - end - - test "to_string" do - query = Query.build(["select ", 1 + ?0, ?+, 2 + ?0]) - assert to_string(query) == "select 1+2" - end - - describe "command" do - test "without command provided" do - assert Query.build("select 1+2").command == :select - assert Query.build("SELECT 1+2").command == :select - assert Query.build(" select 1+2").command == :select - assert Query.build("\t\n\t\nSELECT 1+2").command == :select - - assert Query.build(""" - - select 1+2 - """).command == :select - - assert Query.build(["select 1+2"]).command == :select - assert Query.build([?S, ?E, ?L | "ECT 1"]).command == :select - - assert Query.build("with insert as (select 1) select * from insert").command == :select - assert Query.build("insert into table(a, b) values(1, 2)").command == :insert - - assert Query.build("insert into table(a, b) select b, c from table2 where b = 'update'").command == - :insert - end - - test "with nil command provided" do - assert Query.build("select 1+2", command: nil).command == :select - end - - test "with command provided" do - assert Query.build("select 1+2", command: :custom).command == :custom - end - - @tag skip: true - test "TODO" do - assert Query.build("Select 1+2").command == :select - end - end - - # adapted from https://github.com/elixir-ecto/postgrex/blob/master/test/query_test.exs - describe "query" do - setup do - {:ok, conn: start_supervised!({Ch, database: Ch.Test.database()})} - end - - test "iodata", %{conn: conn, query_options: query_options} do - assert [[123]] = - Ch.query!(conn, ["S", ?E, ["LEC" | "T"], " ", ~c"123"], [], query_options).rows - end - - test "decode basic types", %{conn: conn, query_options: query_options} do - assert [[nil]] = Ch.query!(conn, "SELECT NULL", [], query_options).rows - assert [[true, false]] = Ch.query!(conn, "SELECT true, false", [], query_options).rows - assert [["e"]] = Ch.query!(conn, "SELECT 'e'::char", [], query_options).rows - assert [["ẽ"]] = Ch.query!(conn, "SELECT 'ẽ'::char", [], query_options).rows - assert [[42]] = Ch.query!(conn, "SELECT 42", [], query_options).rows - assert [[42.0]] = Ch.query!(conn, "SELECT 42::float", [], query_options).rows - assert [[42.0]] = Ch.query!(conn, "SELECT 42.0", [], query_options).rows - # TODO [[:NaN]] ? - assert [[nil]] = Ch.query!(conn, "SELECT 'NaN'::float", [], query_options).rows - # TODO [[:int]] ? - assert [[nil]] = Ch.query!(conn, "SELECT 'inf'::float", [], query_options).rows - # TODO [[:"-inf"]] ? - assert [[nil]] = Ch.query!(conn, "SELECT '-inf'::float", [], query_options).rows - assert [["ẽric"]] = Ch.query!(conn, "SELECT 'ẽric'", [], query_options).rows - assert [["ẽric"]] = Ch.query!(conn, "SELECT 'ẽric'::varchar", [], query_options).rows - # TODO - # assert [[<<1, 2, 3>>]] = Ch.query!(conn, "SELECT '\\001\\002\\003'::bytea").rows - end - - test "decode numeric", %{conn: conn, query_options: query_options} do - assert [[Decimal.new("42.0000000000")]] == - Ch.query!(conn, "SELECT 42::numeric(10,10)", [], query_options).rows - end - - @tag skip: true - test "decode json/jsonb", %{conn: conn, query_options: query_options} do - assert_raise ArgumentError, "Object('json') type is not supported", fn -> - assert [[%{"foo" => 42}]] == - Ch.query!(conn, "SELECT '{\"foo\": 42}'::json", [], query_options).rows - end - end - - test "decode uuid", %{conn: conn, query_options: query_options} do - uuid = <<160, 238, 188, 153, 156, 11, 78, 248, 187, 109, 107, 185, 189, 56, 10, 17>> - - assert [[^uuid]] = - Ch.query!( - conn, - "SELECT 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'::UUID", - [], - query_options - ).rows - end - - # https://clickhouse.com/docs/sql-reference/data-types/time - @tag :time - test "decode time", %{conn: conn, query_options: query_options} do - settings = [enable_time_time64_type: 1] - - times = [ - %{value: "00:00:00", expected: ~T[00:00:00]}, - %{value: "12:34:56", expected: ~T[12:34:56]}, - %{value: "23:59:59", expected: ~T[23:59:59]} - ] - - for time <- times do - %{value: value, expected: expected} = time - - assert Ch.query!( - conn, - "SELECT '#{value}'::time", - [], - Keyword.merge(query_options, settings: settings) - ).rows == - [[expected]] - - assert Ch.query!( - conn, - "SELECT {time:Time}", - %{"time" => expected}, - Keyword.merge(query_options, settings: settings) - ).rows == - [[expected]] - end - - # ClickHouse supports Time values of [-999:59:59, 999:59:59] - # and Elixir's Time supports values of [00:00:00, 23:59:59] - # so we raise an error when ClickHouse's Time value is out of Elixir's Time range - - assert_raise ArgumentError, - "ClickHouse Time value -1.0 (seconds) is out of Elixir's Time range (00:00:00.000000 - 23:59:59.999999)", - fn -> - Ch.query!( - conn, - "SELECT '-00:00:01'::time", - [], - Keyword.merge(query_options, settings: settings) - ) - end - - assert_raise ArgumentError, - "ClickHouse Time value 3599999.0 (seconds) is out of Elixir's Time range (00:00:00.000000 - 23:59:59.999999)", - fn -> - Ch.query!( - conn, - "SELECT '999:59:59'::time", - [], - Keyword.merge(query_options, settings: settings) - ) - end - - assert_raise ArgumentError, - "ClickHouse Time value -3599999.0 (seconds) is out of Elixir's Time range (00:00:00.000000 - 23:59:59.999999)", - fn -> - Ch.query!( - conn, - "SELECT '-999:59:59'::time", - [], - Keyword.merge(query_options, settings: settings) - ) - end - - # ** (Ch.Error) Code: 457. DB::Exception: Value 12:34:56.123456 cannot be parsed as Time for query parameter 'time' - # because it isn't parsed completely: only 8 of 15 bytes was parsed: 12:34:56. (BAD_QUERY_PARAMETER) - # (version 25.6.3.116 (official build)) - assert_raise Ch.Error, ~r/only 8 of 15 bytes was parsed/, fn -> - Ch.query!( - conn, - "SELECT {time:Time}", - %{"time" => ~T[12:34:56.123456]}, - Keyword.merge(query_options, settings: settings) - ) - end - end - - # https://clickhouse.com/docs/sql-reference/data-types/time64 - @tag :time - test "decode time64", %{conn: conn, query_options: query_options} do - settings = [enable_time_time64_type: 1] - - times = [ - %{value: "00:00:00.000000000", precision: 0, expected: ~T[00:00:00]}, - %{value: "12:34:56.123456789", precision: 0, expected: ~T[12:34:56]}, - %{value: "23:59:59.999999999", precision: 0, expected: ~T[23:59:59]}, - %{value: "12:34:56.123456789", precision: 1, expected: ~T[12:34:56.1]}, - %{value: "23:59:59.999999999", precision: 1, expected: ~T[23:59:59.9]}, - %{value: "12:34:56.123456789", precision: 2, expected: ~T[12:34:56.12]}, - %{value: "23:59:59.999999999", precision: 2, expected: ~T[23:59:59.99]}, - %{value: "12:34:56.123456789", precision: 3, expected: ~T[12:34:56.123]}, - %{value: "23:59:59.999999999", precision: 3, expected: ~T[23:59:59.999]}, - %{value: "12:34:56.123456789", precision: 4, expected: ~T[12:34:56.1234]}, - %{value: "23:59:59.999999999", precision: 4, expected: ~T[23:59:59.9999]}, - %{value: "12:34:56.001200000", precision: 4, expected: ~T[12:34:56.0012]}, - %{value: "12:34:56.123456789", precision: 5, expected: ~T[12:34:56.12345]}, - %{value: "23:59:59.999999999", precision: 5, expected: ~T[23:59:59.99999]}, - %{value: "12:34:56.123456789", precision: 6, expected: ~T[12:34:56.123456]}, - %{value: "12:34:56.123000", precision: 6, expected: ~T[12:34:56.123000]}, - %{value: "12:34:56.000123000", precision: 6, expected: ~T[12:34:56.000123]}, - %{value: "00:00:00.000000000", precision: 6, expected: ~T[00:00:00.000000]}, - %{value: "12:34:56.123456789", precision: 6, expected: ~T[12:34:56.123456]}, - %{value: "00:00:00.123000", precision: 6, expected: ~T[00:00:00.123000]}, - %{value: "00:00:00.000123000", precision: 6, expected: ~T[00:00:00.000123]}, - %{value: "23:59:59.999999999", precision: 6, expected: ~T[23:59:59.999999]}, - %{value: "12:34:56.123456789", precision: 7, expected: ~T[12:34:56.123456]}, - %{value: "12:34:56.123456789", precision: 8, expected: ~T[12:34:56.123456]}, - %{value: "12:34:56.123456789", precision: 9, expected: ~T[12:34:56.123456]}, - %{value: "23:59:59.999999999", precision: 9, expected: ~T[23:59:59.999999]} - ] - - for time <- times do - %{value: value, precision: precision, expected: expected} = time - - assert Ch.query!( - conn, - "SELECT '#{value}'::time64(#{precision})", - [], - Keyword.merge(query_options, settings: settings) - ).rows == - [[expected]] - - assert Ch.query!( - conn, - "SELECT {time:time64(#{precision})}", - %{"time" => expected}, - Keyword.merge(query_options, settings: settings) - ).rows == - [[expected]] - end - - # ClickHouse supports Time64 values of [-999:59:59.999999999, 999:59:59.999999999] - # and Elixir's Time supports values of [00:00:00.000000, 23:59:59.999999] - # so we raise an error when ClickHouse's Time64 value is out of Elixir's Time range - - assert_raise ArgumentError, - "ClickHouse Time value -1.0 (seconds) is out of Elixir's Time range (00:00:00.000000 - 23:59:59.999999)", - fn -> - Ch.query!( - conn, - "SELECT '-00:00:01.000'::time64(6)", - [], - Keyword.merge(query_options, settings: settings) - ) - end - - assert_raise ArgumentError, - "ClickHouse Time value 3599999.999999 (seconds) is out of Elixir's Time range (00:00:00.000000 - 23:59:59.999999)", - fn -> - Ch.query!( - conn, - "SELECT '999:59:59.999999999'::time64(6)", - [], - Keyword.merge(query_options, settings: settings) - ) - end - - assert_raise ArgumentError, - "ClickHouse Time value -3599999.999999 (seconds) is out of Elixir's Time range (00:00:00.000000 - 23:59:59.999999)", - fn -> - Ch.query!( - conn, - "SELECT '-999:59:59.999999999'::time64(6)", - [], - Keyword.merge(query_options, settings: settings) - ) - end - end - - test "decode arrays", %{conn: conn, query_options: query_options} do - assert [[[]]] = Ch.query!(conn, "SELECT []", [], query_options).rows - assert [[[1]]] = Ch.query!(conn, "SELECT [1]", [], query_options).rows - assert [[[1, 2]]] = Ch.query!(conn, "SELECT [1,2]", [], query_options).rows - assert [[[[0], [1]]]] = Ch.query!(conn, "SELECT [[0],[1]]", [], query_options).rows - assert [[[[0]]]] = Ch.query!(conn, "SELECT [[0]]", [], query_options).rows - end - - test "decode tuples", %{conn: conn, query_options: query_options} do - assert [[{"Hello", 123}]] = Ch.query!(conn, "select ('Hello', 123)", [], query_options).rows - - assert [[{"Hello", 123}]] = - Ch.query!(conn, "select ('Hello' as a, 123 as b)", [], query_options).rows - - assert [[{"Hello", 123}]] = - Ch.query!(conn, "select ('Hello' as a_, 123 as b)", [], query_options).rows - - # TODO - # assert [[{"Hello", 123}]] = Ch.query!(conn, "select ('Hello' as a$, 123 as b)", [], query_options).rows - end - - test "decode network types", %{conn: conn, query_options: query_options} do - assert [[{127, 0, 0, 1} = ipv4]] = - Ch.query!(conn, "SELECT '127.0.0.1'::inet4", [], query_options).rows - - assert :inet.ntoa(ipv4) == ~c"127.0.0.1" - - assert [[{0, 0, 0, 0, 0, 0, 0, 1} = ipv6]] = - Ch.query!(conn, "SELECT '::1'::inet6", [], query_options).rows - - assert :inet.ntoa(ipv6) == ~c"::1" - - assert [[ipv6]] = - Ch.query!(conn, "SELECT '2001:44c8:129:2632:33:0:252:2'::inet6", [], query_options).rows - - assert :inet.ntoa(ipv6) == ~c"2001:44c8:129:2632:33:0:252:2" - end - - test "decoded binaries copy behaviour", %{conn: conn, query_options: query_options} do - text = "hello world" - assert [[bin]] = Ch.query!(conn, "SELECT {$0:String}", [text], query_options).rows - assert :binary.referenced_byte_size(bin) == :binary.referenced_byte_size("hello world") - - # For OTP 20+ refc binaries up to 64 bytes might be copied during a GC - text = String.duplicate("hello world", 6) - assert [[bin]] = Ch.query!(conn, "SELECT {$0:String}", [text], query_options).rows - assert :binary.referenced_byte_size(bin) == byte_size(text) - end - - test "encode basic types", %{conn: conn, query_options: query_options} do - # TODO - # assert [[nil, nil]] = query("SELECT $1::text, $2::int", [nil, nil]) - assert [[true, false]] = - Ch.query!(conn, "SELECT {$0:bool}, {$1:Bool}", [true, false], query_options).rows - - assert [["ẽ"]] = Ch.query!(conn, "SELECT {$0:char}", ["ẽ"], query_options).rows - assert [[42]] = Ch.query!(conn, "SELECT {$0:int}", [42], query_options).rows - - assert [[42.0, 43.0]] = - Ch.query!(conn, "SELECT {$0:float}, {$1:float}", [42, 43.0], query_options).rows - - assert [[nil, nil]] = - Ch.query!(conn, "SELECT {$0:float}, {$1:float}", ["NaN", "nan"], query_options).rows - - assert [[nil]] = Ch.query!(conn, "SELECT {$0:float}", ["inf"], query_options).rows - assert [[nil]] = Ch.query!(conn, "SELECT {$0:float}", ["-inf"], query_options).rows - assert [["ẽric"]] = Ch.query!(conn, "SELECT {$0:varchar}", ["ẽric"], query_options).rows - - assert [[<<1, 2, 3>>]] = - Ch.query!(conn, "SELECT {$0:bytea}", [<<1, 2, 3>>], query_options).rows - end - - test "encode numeric", %{conn: conn, query_options: query_options} do - nums = [ - {"42", "numeric(2,0)"}, - {"0.4242", "numeric(4,4)"}, - {"42.4242", "numeric(6,4)"}, - {"1.001", "numeric(4,3)"}, - {"1.00123", "numeric(6,5)"}, - {"0.01", "numeric(3,2)"}, - {"0.00012345", "numeric(9,8)"}, - {"1000000000", "numeric(10,0)"}, - {"1000000000.0", "numeric(11,1)"}, - {"123456789123456789123456789", "numeric(27,0)"}, - {"123456789123456789123456789.123456789", "numeric(36,9)"}, - {"1.1234500000", "numeric(11,10)"}, - {"1.0000000000", "numeric(11,10)"}, - {"1.111101", "numeric(7,6)"}, - {"1.1111111101", "numeric(11,10)"}, - {"1.11110001", "numeric(9,8)"}, - # {"NaN", "numeric(1,0)"}, - {"-42", "numeric(2,0)"} - ] - - Enum.each(nums, fn {num, type} -> - dec = Decimal.new(num) - assert [[dec]] == Ch.query!(conn, "SELECT {$0:#{type}}", [dec], query_options).rows - end) - end - - test "encode integers and floats as numeric", %{conn: conn, query_options: query_options} do - dec = Decimal.new(1) - assert [[dec]] == Ch.query!(conn, "SELECT {$0:numeric(1,0)}", [1], query_options).rows - - dec = Decimal.from_float(1.0) - assert [[dec]] == Ch.query!(conn, "SELECT {$0:numeric(2,1)}", [1.0], query_options).rows - end - - @tag skip: true - test "encode json/jsonb", %{conn: conn, query_options: query_options} do - json = %{"foo" => 42} - assert [[json]] == Ch.query!(conn, "SELECT {$0::json}", [json], query_options).rows - end - - test "encode uuid", %{conn: conn, query_options: query_options} do - # TODO - uuid = <<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15>> - uuid_hex = "00010203-0405-0607-0809-0a0b0c0d0e0f" - assert [[^uuid]] = Ch.query!(conn, "SELECT {$0:UUID}", [uuid_hex], query_options).rows - end - - test "encode arrays", %{conn: conn, query_options: query_options} do - assert [[[]]] = Ch.query!(conn, "SELECT {$0:Array(integer)}", [[]], query_options).rows - assert [[[1]]] = Ch.query!(conn, "SELECT {$0:Array(integer)}", [[1]], query_options).rows - - assert [[[1, 2]]] = - Ch.query!(conn, "SELECT {$0:Array(integer)}", [[1, 2]], query_options).rows - - assert [[["1"]]] = Ch.query!(conn, "SELECT {$0:Array(String)}", [["1"]], query_options).rows - assert [[[true]]] = Ch.query!(conn, "SELECT {$0:Array(Bool)}", [[true]], query_options).rows - - assert [[[~D[2023-01-01]]]] = - Ch.query!(conn, "SELECT {$0:Array(Date)}", [[~D[2023-01-01]]], query_options).rows - - assert [[[Ch.Test.to_clickhouse_naive(conn, ~N[2023-01-01 12:00:00])]]] == - Ch.query!( - conn, - "SELECT {$0:Array(DateTime)}", - [[~N[2023-01-01 12:00:00]]], - query_options - ).rows - - assert [[[~U[2023-01-01 12:00:00Z]]]] == - Ch.query!( - conn, - "SELECT {$0:Array(DateTime('UTC'))}", - [[~N[2023-01-01 12:00:00]]], - query_options - ).rows - - assert [[[~N[2023-01-01 12:00:00]]]] == - Ch.query!( - conn, - "SELECT {$0:Array(DateTime)}", - [[~U[2023-01-01 12:00:00Z]]], - query_options - ).rows - - assert [[[~U[2023-01-01 12:00:00Z]]]] == - Ch.query!( - conn, - "SELECT {$0:Array(DateTime('UTC'))}", - [[~U[2023-01-01 12:00:00Z]]], - query_options - ).rows - - assert [[[[0], [1]]]] = - Ch.query!(conn, "SELECT {$0:Array(Array(integer))}", [[[0], [1]]], query_options).rows - - assert [[[[0]]]] = - Ch.query!(conn, "SELECT {$0:Array(Array(integer))}", [[[0]]], query_options).rows - - # assert [[[1, nil, 3]]] = Ch.query!(conn, "SELECT {$0:Array(integer)}", [[1, nil, 3]], query_options).rows - end - - test "encode network types", %{conn: conn, query_options: query_options} do - # TODO, or wrap in custom struct like in postgrex - # assert [["127.0.0.1/32"]] = - # Ch.query!(conn, "SELECT {$0:inet4}::text", [{127, 0, 0, 1}], query_options).rows - - assert [[{127, 0, 0, 1}]] = - Ch.query!(conn, "SELECT {$0:text}::inet4", ["127.0.0.1"], query_options).rows - - assert [[{0, 0, 0, 0, 0, 0, 0, 1}]] = - Ch.query!(conn, "SELECT {$0:text}::inet6", ["::1"], query_options).rows - end - - test "result struct", %{conn: conn, query_options: query_options} do - assert {:ok, res} = Ch.query(conn, "SELECT 123 AS a, 456 AS b", [], query_options) - assert %Ch.Result{} = res - assert res.command == :select - assert res.columns == ["a", "b"] - assert res.num_rows == 1 - end - - test "empty result struct", %{conn: conn, query_options: query_options} do - assert %Ch.Result{} = - res = Ch.query!(conn, "select number, 'a' as b from numbers(0)", [], query_options) - - assert res.command == :select - assert res.columns == ["number", "b"] - assert res.rows == [] - assert res.num_rows == 0 - end - - test "error struct", %{conn: conn, query_options: query_options} do - assert {:error, %Ch.Error{}} = Ch.query(conn, "SELECT 123 + 'a'", [], query_options) - end - - test "error code", %{conn: conn, query_options: query_options} do - assert {:error, %Ch.Error{code: 62}} = Ch.query(conn, "wat", [], query_options) - end - - test "connection works after failure in execute", %{conn: conn, query_options: query_options} do - assert {:error, %Ch.Error{}} = Ch.query(conn, "wat", [], query_options) - assert [[42]] = Ch.query!(conn, "SELECT 42", [], query_options).rows - end - - test "async test", %{conn: conn, query_options: query_options} do - self_pid = self() - - Enum.each(1..10, fn _ -> - spawn_link(fn -> - send(self_pid, Ch.query!(conn, "SELECT sleep(0.05)", [], query_options).rows) - end) - end) - - assert [[42]] = Ch.query!(conn, "SELECT 42", [], query_options).rows - - Enum.each(1..10, fn _ -> - assert_receive [[0]], :timer.seconds(1) - end) - end - - test "query struct interpolates to statement" do - assert "#{%Ch.Query{statement: "SELECT 1"}}" == "SELECT 1" - end - end - - test "query before and after idle ping", %{query_options: query_options} do - opts = [backoff_type: :stop, idle_interval: 1] - {:ok, pid} = Ch.start_link(opts) - assert {:ok, _} = Ch.query(pid, "SELECT 42", [], query_options) - :timer.sleep(20) - assert {:ok, _} = Ch.query(pid, "SELECT 42", [], query_options) - :timer.sleep(20) - assert {:ok, _} = Ch.query(pid, "SELECT 42", [], query_options) - end -end diff --git a/test/ch/row_binary_test.exs b/test/ch/row_binary_test.exs index 3d5e453a..e485c00f 100644 --- a/test/ch/row_binary_test.exs +++ b/test/ch/row_binary_test.exs @@ -609,7 +609,8 @@ defmodule Ch.RowBinaryTest do encode({:datetime64, 1_000_000}, ~U[2022-01-01 12:00:00.123456Z]), encode( {:datetime64, 1_000_000_000}, - DateTime.new!(~D[2022-01-01], ~T[12:00:00.123456], "Asia/Tokyo") + ~D[2022-01-01] + |> DateTime.new!(~T[12:00:00.123456], "Asia/Tokyo") |> DateTime.shift_zone!("Etc/UTC") ) ], @@ -619,7 +620,8 @@ defmodule Ch.RowBinaryTest do encode({:datetime64, 1_000_000}, ~U[2042-12-31 23:59:59.987654Z]), encode( {:datetime64, 1_000_000_000}, - DateTime.new!(~D[2042-12-31], ~T[23:59:59.987654], "Asia/Tokyo") + ~D[2042-12-31] + |> DateTime.new!(~T[23:59:59.987654], "Asia/Tokyo") |> DateTime.shift_zone!("Etc/UTC") ) ] diff --git a/test/ch/settings_test.exs b/test/ch/settings_test.exs deleted file mode 100644 index 69024b91..00000000 --- a/test/ch/settings_test.exs +++ /dev/null @@ -1,26 +0,0 @@ -defmodule Ch.SettingsTest do - use ExUnit.Case, parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - - setup ctx do - {:ok, query_options: ctx[:query_options] || []} - end - - test "can pass default settings", %{query_options: query_options} do - assert {:ok, conn} = Ch.start_link(settings: [async_insert: 1]) - - assert {:ok, %{num_rows: 1, rows: [["async_insert", "Bool", "1"]]}} = - Ch.query(conn, "show settings like 'async_insert'", [], query_options) - end - - test "can overwrite default settings with options", %{query_options: query_options} do - assert {:ok, conn} = Ch.start_link(settings: [async_insert: 1]) - - assert {:ok, %{num_rows: 1, rows: [["async_insert", "Bool", "0"]]}} = - Ch.query( - conn, - "show settings like 'async_insert'", - [], - Keyword.merge(query_options, settings: [async_insert: 0]) - ) - end -end diff --git a/test/ch/stream_test.exs b/test/ch/stream_test.exs deleted file mode 100644 index 06d95de3..00000000 --- a/test/ch/stream_test.exs +++ /dev/null @@ -1,84 +0,0 @@ -defmodule Ch.StreamTest do - use ExUnit.Case, parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - alias Ch.{Result, RowBinary} - - setup ctx do - {:ok, query_options: ctx[:query_options] || []} - end - - setup do - {:ok, conn: start_supervised!({Ch, database: Ch.Test.database()})} - end - - describe "enumerable Ch.stream/4" do - test "emits %Ch.Result{}", %{conn: conn, query_options: query_options} do - results = - DBConnection.run(conn, fn conn -> - conn - |> Ch.stream( - "select * from numbers({count:UInt64})", - %{"count" => 1_000_000}, - query_options - ) - |> Enum.into([]) - end) - - assert results |> Enum.map(fn %Result{rows: rows} -> rows end) |> List.flatten() == - Enum.to_list(0..999_999) - end - - test "raises on error", %{conn: conn, query_options: query_options} do - assert_raise Ch.Error, - ~r/Code: 62. DB::Exception: Syntax error: failed at position 8/, - fn -> - DBConnection.run(conn, fn conn -> - conn - |> Ch.stream("select ", %{"count" => 1_000_000}, query_options) - |> Enum.into([]) - end) - end - end - - test "large strings", %{conn: conn, query_options: query_options} do - results = - DBConnection.run(conn, fn conn -> - conn - |> Ch.stream( - "select repeat('abc', 500000) from numbers({count:UInt64})", - %{"count" => 10}, - query_options - ) - |> Enum.into([]) - end) - - expected_string = String.duplicate("abc", 500_000) - - assert results |> Enum.map(fn %Result{rows: rows} -> rows end) |> List.flatten() == - List.duplicate(expected_string, 10) - end - end - - describe "collectable Ch.stream/4" do - test "inserts chunks", %{conn: conn, query_options: query_options} do - Ch.query!(conn, "create table collect_stream(i UInt64) engine Memory") - on_exit(fn -> Ch.Test.query("DROP TABLE collect_stream") end) - - DBConnection.run(conn, fn conn -> - Stream.repeatedly(fn -> [:rand.uniform(100)] end) - |> Stream.chunk_every(100_000) - |> Stream.map(fn chunk -> RowBinary.encode_rows(chunk, _types = ["UInt64"]) end) - |> Stream.take(10) - |> Enum.into( - Ch.stream( - conn, - "insert into collect_stream(i) format RowBinary", - _params = [], - Keyword.merge(query_options, encode: false) - ) - ) - end) - - assert Ch.query!(conn, "select count(*) from collect_stream").rows == [[1_000_000]] - end - end -end diff --git a/test/ch/variant_test.exs b/test/ch/variant_test.exs deleted file mode 100644 index fbd7144f..00000000 --- a/test/ch/variant_test.exs +++ /dev/null @@ -1,114 +0,0 @@ -defmodule Ch.VariantTest do - use ExUnit.Case, parameterize: [%{query_options: []}, %{query_options: [multipart: true]}] - import Ch.Test, only: [parameterize_query!: 2, parameterize_query!: 4] - - # https://clickhouse.com/docs/sql-reference/data-types/variant - - @moduletag :variant - - setup do - conn = start_supervised!({Ch, database: Ch.Test.database()}) - {:ok, conn: conn} - end - - test "basic", ctx do - assert parameterize_query!(ctx, "select null::Variant(UInt64, String, Array(UInt64))").rows == - [[nil]] - - assert parameterize_query!(ctx, "select [1]::Variant(UInt64, String, Array(UInt64))").rows == - [[[1]]] - - assert parameterize_query!(ctx, "select 0::Variant(UInt64, String, Array(UInt64))").rows == [ - [0] - ] - - assert parameterize_query!( - ctx, - "select 'Hello, World!'::Variant(UInt64, String, Array(UInt64))" - ).rows == - [["Hello, World!"]] - end - - # https://github.com/plausible/ch/issues/272 - test "ordering internal types", ctx do - test = %{ - "'hello'" => "hello", - "-10" => -10, - "true" => true, - "map('hello', null::Nullable(String))" => %{"hello" => nil}, - "map('hello', 'world'::Nullable(String))" => %{"hello" => "world"} - } - - for {value, expected} <- test do - assert parameterize_query!( - ctx, - "select #{value}::Variant(String, Int32, Bool, Map(String, Nullable(String)))" - ).rows == [[expected]] - end - end - - test "with a table", ctx do - # https://clickhouse.com/docs/sql-reference/data-types/variant#creating-variant - parameterize_query!(ctx, """ - CREATE TABLE variant_test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; - """) - - on_exit(fn -> Ch.Test.query("DROP TABLE variant_test") end) - - parameterize_query!( - ctx, - "INSERT INTO variant_test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);" - ) - - assert parameterize_query!(ctx, "SELECT v FROM variant_test").rows == [ - [nil], - [42], - ["Hello, World!"], - [[1, 2, 3]] - ] - - # https://clickhouse.com/docs/sql-reference/data-types/variant#reading-variant-nested-types-as-subcolumns - assert parameterize_query!( - ctx, - "SELECT v, v.String, v.UInt64, v.`Array(UInt64)` FROM variant_test;" - ).rows == - [ - [nil, nil, nil, []], - [42, nil, 42, []], - ["Hello, World!", "Hello, World!", nil, []], - [[1, 2, 3], nil, nil, [1, 2, 3]] - ] - - assert parameterize_query!( - ctx, - "SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM variant_test;" - ).rows == [ - [nil, nil, nil, []], - [42, nil, 42, []], - ["Hello, World!", "Hello, World!", nil, []], - [[1, 2, 3], nil, nil, [1, 2, 3]] - ] - end - - test "rowbinary", ctx do - parameterize_query!(ctx, """ - CREATE TABLE variant_test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; - """) - - on_exit(fn -> Ch.Test.query("DROP TABLE variant_test") end) - - parameterize_query!( - ctx, - "INSERT INTO variant_test FORMAT RowBinary", - [[nil], [42], ["Hello, World!"], [[1, 2, 3]]], - types: ["Variant(UInt64, String, Array(UInt64))"] - ) - - assert parameterize_query!(ctx, "SELECT v FROM variant_test").rows == [ - [nil], - [42], - ["Hello, World!"], - [[1, 2, 3]] - ] - end -end diff --git a/test/support/help.ex b/test/support/help.ex new file mode 100644 index 00000000..07b26355 --- /dev/null +++ b/test/support/help.ex @@ -0,0 +1,3 @@ +defmodule Help do + @moduledoc false +end diff --git a/test/support/test.ex b/test/support/test.ex deleted file mode 100644 index 301e9ab1..00000000 --- a/test/support/test.ex +++ /dev/null @@ -1,123 +0,0 @@ -defmodule Ch.Test do - @moduledoc false - - def database, do: Application.fetch_env!(:ch, :database) - - # makes a query in a short lived process so that pool automatically exits once finished - def query(sql, params \\ [], opts \\ []) do - task = - Task.async(fn -> - {:ok, pid} = Ch.start_link(opts) - opts = Keyword.put_new_lazy(opts, :database, &database/0) - Ch.query!(pid, sql, params, opts) - end) - - Task.await(task) - end - - # helper for ExUnit.Case :parameterize - def parameterize_query_options(ctx, options \\ []) do - if default_options = ctx[:query_options] do - Keyword.merge(default_options, options) - else - options - end - end - - def parameterize_query(ctx, sql, params \\ [], options \\ []) do - Ch.query( - ctx.conn, - sql, - params, - parameterize_query_options(ctx, options) - ) - end - - def parameterize_query!(ctx, sql, params \\ [], options \\ []) do - Ch.query!( - ctx.conn, - sql, - params, - parameterize_query_options(ctx, options) - ) - end - - # TODO packet: :http? - def intercept_packets(socket, buffer \\ <<>>) do - receive do - {:tcp, ^socket, packet} -> - buffer = buffer <> packet - - if complete?(buffer) do - buffer - else - intercept_packets(socket, buffer) - end - end - end - - defp complete?(buffer) do - with {:ok, rest} <- eat_status(buffer), - {:ok, content_length, rest} <- eat_headers(rest) do - verify_body(content_length, rest) - else - _ -> false - end - end - - defp eat_status(buffer) do - case :erlang.decode_packet(:http_bin, buffer, []) do - {:ok, _, rest} -> {:ok, rest} - {:more, _} -> {:more, buffer} - end - end - - defp eat_headers(buffer, content_length \\ nil) do - case :erlang.decode_packet(:httph_bin, buffer, []) do - {:ok, {_, _, :"Content-Length", _, content_length}, rest} -> - eat_headers(rest, String.to_integer(content_length)) - - {:ok, {_, _, :"Transfer-Encoding", _, "chunked"}, rest} -> - eat_headers(rest, :chunked) - - {:ok, :http_eoh, rest} -> - {:ok, content_length, rest} - - {:ok, _, rest} -> - eat_headers(rest, content_length) - - {:more, _} -> - {:more, buffer} - end - end - - defp verify_body(:chunked, chunks) do - String.ends_with?(chunks, "\r\n0\r\n\r\n") - end - - defp verify_body(content_length, body) do - byte_size(body) == content_length - end - - # shifts naive datetimes for non-utc timezones into utc to match ClickHouse behaviour - # see https://clickhouse.com/docs/en/sql-reference/data-types/datetime#usage-remarks - def to_clickhouse_naive(conn, %NaiveDateTime{} = naive_datetime) do - case Ch.query!(conn, "select timezone()").rows do - [["UTC"]] -> - naive_datetime - - [[timezone]] -> - naive_datetime - |> DateTime.from_naive!(timezone) - |> DateTime.shift_zone!("Etc/UTC") - |> DateTime.to_naive() - end - end - - def clickhouse_tz(conn) do - case Ch.query!(conn, "select timezone()").rows do - [["UTC"]] -> "Etc/UTC" - [[timezone]] -> timezone - end - end -end diff --git a/test/test_helper.exs b/test/test_helper.exs index 97caedd8..991cdd3c 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -1,46 +1,19 @@ -clickhouse_available? = - case :httpc.request(:get, {~c"http://localhost:8123/ping", []}, [], []) do - {:ok, {{_version, _status = 200, _reason}, _headers, ~c"Ok.\n"}} -> - true +# TODO +# clickhouse_available? = +# case Help.http("http://localhost:8123/ping") do +# {200, _headers, "Ok.\n"} -> true +# {:error, :econnrefused} -> false +# end - {:error, {:failed_connect, [{:to_address, _to_address}, {:inet, [:inet], :econnrefused}]}} -> - false - end +# unless clickhouse_available? do +# Mix.shell().error(""" +# ClickHouse is not detected at localhost:8123! Please start the local container with the following command: -unless clickhouse_available? do - Mix.shell().error(""" - ClickHouse is not detected at localhost:8123! Please start the local container with the following command: +# docker compose up -d clickhouse +# """) - docker compose up -d clickhouse - """) - - System.halt(1) -end +# System.halt(1) +# end Calendar.put_time_zone_database(Tz.TimeZoneDatabase) -default_test_db = System.get_env("CH_DATABASE", "ch_elixir_test") -Application.put_env(:ch, :database, default_test_db) - -Ch.Test.query( - "DROP DATABASE IF EXISTS {db:Identifier}", - %{"db" => default_test_db}, - database: "default" -) - -Ch.Test.query( - "CREATE DATABASE {db:Identifier}", - %{"db" => default_test_db}, - database: "default" -) - -%{rows: [[ch_version]]} = Ch.Test.query("SELECT version()") - -extra_exclude = - if ch_version >= "25" do - [] - else - # Time, Variant, JSON, and Dynamic types are not supported in older ClickHouse versions we have in the CI - [:time, :variant, :json, :dynamic] - end - -ExUnit.start(exclude: [:slow | extra_exclude]) +ExUnit.start() From 0ac1ed5307d2f1abef8b8d463fcf085e03558199 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 13 Apr 2026 20:37:33 +0300 Subject: [PATCH 02/13] continue --- test/ch/pool_test.exs | 60 +++++++++++++++++++++++++++---------------- test/support/help.ex | 38 +++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 22 deletions(-) diff --git a/test/ch/pool_test.exs b/test/ch/pool_test.exs index 5793cb7c..7e4374aa 100644 --- a/test/ch/pool_test.exs +++ b/test/ch/pool_test.exs @@ -1,12 +1,12 @@ defmodule Ch.PoolTest do use ExUnit.Case, async: true - setup do - {:ok, pool: start_supervised!({Ch.Pool, scheme: :http, host: "localhost", port: 8123})} + setup ctx do + Help.setup_pool(ctx) end - test "select", %{pool: pool} do - assert Ch.Pool.query!(pool, "select 1").rows == [[1]] + test "select", ctx do + assert Help.query!(ctx, "select 1").rows == [[1]] uuid = "9B29BD20-924C-4DE5-BDB3-8C2AA1FCE1FC" uuid_bin = uuid |> String.replace("-", "") |> Base.decode16!() @@ -43,37 +43,53 @@ defmodule Ch.PoolTest do {_type, _value, _expected} -> param end - assert Ch.Pool.query!(pool, "select {a:#{type}}", %{"a" => value}).rows == [[expected]] + assert Help.query!(ctx, "select {a:#{type}}", %{"a" => value}).rows == [[expected]] end) end - test "insert", %{pool: pool} do - settings = [session_id: "test_insert_#{System.unique_integer()}"] + test "insert", ctx do + assert Help.query!(ctx, "create temporary table test_insert(a UInt8, b String) engine Memory") - session_query = fn statement -> - Ch.Pool.query!(pool, statement, %{}, settings: settings) - end - - assert session_query.("create temporary table test_insert(a UInt8, b String) engine Memory") - - assert session_query.("insert into test_insert values (1, 'hello')") + assert Help.query!( + ctx, + "insert into test_insert values (1, 'hello'), ({two:UInt8}, {world:String})", + %{"two" => "2", "world" => "world"} + ) types = ["UInt8", "String"] - rowbinary = [ - Ch.RowBinary.encode_names_and_types(["a", "b"], types) - | Ch.RowBinary.encode_rows([[2, "world"], [3, "foo"], [4, "bar"]], types) - ] + # rowbinary - assert session_query.([ - "insert into test_insert format RowBinaryWithNamesAndTypes\n" | rowbinary + assert Help.query!(ctx, [ + "insert into test_insert format RowBinaryWithNamesAndTypes\n", + Ch.RowBinary.encode_names_and_types(["a", "b"], types) + | Ch.RowBinary.encode_rows([[3, "foo"], [4, "bar"], [5, "baz"]], types) ]) - assert session_query.("select * from test_insert order by a asc").rows == [ + # compressed rowbinary + assert Help.query!( + ctx, + :zstd.compress([ + "insert into test_insert format RowBinaryWithNamesAndTypes\n", + Ch.RowBinary.encode_names_and_types(["a", "b"], types) + | Ch.RowBinary.encode_rows( + [[6, "clickhouse"], [7, "postgres"], [8, "sqlite"]], + types + ) + ]), + _params = %{}, + headers: [{"content-encoding", "zstd"}] + ) + + assert Help.query!(ctx, "select * from test_insert order by a asc").rows == [ [1, "hello"], [2, "world"], [3, "foo"], - [4, "bar"] + [4, "bar"], + [5, "baz"], + [6, "clickhouse"], + [7, "postgres"], + [8, "sqlite"] ] end end diff --git a/test/support/help.ex b/test/support/help.ex index 07b26355..21d72ce5 100644 --- a/test/support/help.ex +++ b/test/support/help.ex @@ -1,3 +1,41 @@ defmodule Help do @moduledoc false + + def start_pool!(_test_context) do + ExUnit.Callbacks.start_supervised!({Ch.Pool, scheme: :http, host: "localhost", port: 8123}) + end + + def setup_pool(%{pool: pool}) when is_pid(pool), do: :ok + + def setup_pool(test_context) do + {:ok, pool: start_pool!(test_context), session_id: session_id(test_context)} + end + + def query!(test_context, statement, params \\ %{}, options \\ []) do + %{pool: pool, session_id: session_id} = test_context + session_settings = [session_id: session_id] + + options = + Keyword.update(options, :settings, session_settings, fn settings -> + Keyword.merge(session_settings, settings) + end) + + Ch.Pool.query!(pool, statement, params, options) + end + + def session_id(test_context) do + %{module: module, test: test} = test_context + + rand = + Base.hex_encode32( + << + System.system_time(:nanosecond)::64, + :erlang.phash2({node(), self()}, 16_777_216)::24, + :erlang.unique_integer()::32 + >>, + case: :lower + ) + + "#{module}-#{test}-#{rand}" + end end From 9bbc61fa2ec94fe0a743d93373979c2438c97973 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 13 Apr 2026 20:42:34 +0300 Subject: [PATCH 03/13] continue --- test/ch/pool_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ch/pool_test.exs b/test/ch/pool_test.exs index 7e4374aa..cc88cc70 100644 --- a/test/ch/pool_test.exs +++ b/test/ch/pool_test.exs @@ -50,6 +50,7 @@ defmodule Ch.PoolTest do test "insert", ctx do assert Help.query!(ctx, "create temporary table test_insert(a UInt8, b String) engine Memory") + # params assert Help.query!( ctx, "insert into test_insert values (1, 'hello'), ({two:UInt8}, {world:String})", @@ -59,7 +60,6 @@ defmodule Ch.PoolTest do types = ["UInt8", "String"] # rowbinary - assert Help.query!(ctx, [ "insert into test_insert format RowBinaryWithNamesAndTypes\n", Ch.RowBinary.encode_names_and_types(["a", "b"], types) From 2a55f0ad54ce6485c6eb0a3b14015a4567b71da2 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 13 Apr 2026 20:52:11 +0300 Subject: [PATCH 04/13] continue --- lib/ch/http.ex | 203 ++++++++++++++++++++++++++++++++++++++++++++++ lib/ch/pool.ex | 212 ++----------------------------------------------- 2 files changed, 210 insertions(+), 205 deletions(-) create mode 100644 lib/ch/http.ex diff --git a/lib/ch/http.ex b/lib/ch/http.ex new file mode 100644 index 00000000..8c2093ed --- /dev/null +++ b/lib/ch/http.ex @@ -0,0 +1,203 @@ +defmodule Ch.HTTP do + @moduledoc """ + Helpers for `Mint.HTTP1` for working with ClickHouse. + """ + + def deadline_from_timeout(:infinity = inf), do: inf + + def deadline_from_timeout(timeout) do + System.monotonic_time(:millisecond) + timeout + end + + def timeout_from_deadline(:infinity = inf), do: inf + + def timeout_from_deadline(deadline) do + max(0, deadline - System.monotonic_time(:millisecond)) + end + + def encode_request(method, statement, params, options) do + settings = Keyword.get(options, :settings, []) + + headers = + options + |> Keyword.get(:headers, []) + |> put_new_header("x-clickhouse-format", "RowBinaryWithNamesAndTypes") + + path = "/?" <> URI.encode_query(settings ++ encode_params(params)) + %{method: method, path: path, headers: headers, body: statement} + end + + def request(conn, request, deadline) do + %{method: method, path: path, headers: headers, body: body} = request + + case Mint.HTTP1.request(conn, method, path, headers, body) do + {:ok, conn, _ref} -> + receive_response(conn, [], deadline) + + {:error, conn, reason} -> + _todo = Mint.HTTP1.close(conn) + {:error, reason} + end + end + + defp receive_response(conn, acc, deadline) do + timeout = timeout_from_deadline(deadline) + + case Mint.HTTP1.recv(conn, 0, timeout) do + {:ok, conn, fragments} -> + case handle_response_fragments(fragments, acc) do + {:ok, response} -> {:ok, conn, response} + {:more, acc} -> receive_response(conn, acc, deadline) + end + + {:error, conn, reason, _fragments} -> + _todo = Mint.HTTP1.close(conn) + {:error, reason} + end + end + + for tag <- [:data, :status, :headers] do + defp handle_response_fragments([{unquote(tag), _ref, data} | rest], acc) do + handle_response_fragments(rest, [data | acc]) + end + end + + defp handle_response_fragments([{:done, _ref}], acc), do: {:ok, :lists.reverse(acc)} + defp handle_response_fragments([], acc), do: {:more, acc} + + def decode_response(response, _options) do + case response do + [200, headers | data] -> + result = + case get_header(headers, "x-clickhouse-format") do + "RowBinaryWithNamesAndTypes" -> + [names | rows] = + data + |> IO.iodata_to_binary() + |> Ch.RowBinary.decode_names_and_rows() + + %{columns: names, rows: rows} + + _other -> + %{data: data} + end + + {:ok, result} + + [_status, headers | data] -> + message = IO.iodata_to_binary(data) + + code = + if code = get_header(headers, "x-clickhouse-exception-code") do + String.to_integer(code) + end + + {:error, Ch.Error.exception(code: code, message: message)} + end + end + + defp put_new_header(headers, name, value) do + if List.keymember?(headers, name, 0) do + headers + else + [{name, value} | headers] + end + end + + defp get_header(headers, key) do + case List.keyfind(headers, key, 0) do + {_, value} -> value + nil = not_found -> not_found + end + end + + defp encode_params(params) when is_map(params) do + Enum.map(params, fn {k, v} -> {"param_#{k}", encode_param(v)} end) + end + + defp encode_param(n) when is_integer(n), do: Integer.to_string(n) + defp encode_param(f) when is_float(f), do: Float.to_string(f) + + defp encode_param(b) when is_binary(b) do + escape_param([{"\\", "\\\\"}, {"\t", "\\\t"}, {"\n", "\\\n"}], b) + end + + defp encode_param(b) when is_boolean(b), do: Atom.to_string(b) + defp encode_param(nil), do: "\\N" + defp encode_param(%Decimal{} = d), do: Decimal.to_string(d, :normal) + defp encode_param(%Date{} = date), do: Date.to_iso8601(date) + defp encode_param(%NaiveDateTime{} = naive), do: NaiveDateTime.to_iso8601(naive) + defp encode_param(%Time{} = time), do: Time.to_iso8601(time) + + defp encode_param(%DateTime{microsecond: microsecond} = dt) do + dt = DateTime.shift_zone!(dt, "Etc/UTC") + + case microsecond do + {val, precision} when val > 0 and precision > 0 -> + size = round(:math.pow(10, precision)) + unix = DateTime.to_unix(dt, size) + seconds = div(unix, size) + fractional = rem(unix, size) + + IO.iodata_to_binary([ + Integer.to_string(seconds), + ?., + String.pad_leading(Integer.to_string(fractional), precision, "0") + ]) + + _ -> + dt |> DateTime.to_unix(:second) |> Integer.to_string() + end + end + + defp encode_param(tuple) when is_tuple(tuple) do + IO.iodata_to_binary([?(, encode_array_params(Tuple.to_list(tuple)), ?)]) + end + + defp encode_param(a) when is_list(a) do + IO.iodata_to_binary([?[, encode_array_params(a), ?]]) + end + + defp encode_param(m) when is_map(m) do + IO.iodata_to_binary([?{, encode_map_params(Map.to_list(m)), ?}]) + end + + defp encode_array_params([last]), do: encode_array_param(last) + + defp encode_array_params([s | rest]) do + [encode_array_param(s), ?, | encode_array_params(rest)] + end + + defp encode_array_params([] = empty), do: empty + + defp encode_map_params([last]), do: encode_map_param(last) + + defp encode_map_params([kv | rest]) do + [encode_map_param(kv), ?, | encode_map_params(rest)] + end + + defp encode_map_params([] = empty), do: empty + + defp encode_array_param(s) when is_binary(s) do + [?', escape_param([{"'", "''"}, {"\\", "\\\\"}], s), ?'] + end + + defp encode_array_param(nil), do: "null" + + defp encode_array_param(%s{} = param) when s in [Date, NaiveDateTime] do + [?', encode_param(param), ?'] + end + + defp encode_array_param(v), do: encode_param(v) + + defp encode_map_param({k, v}) do + [encode_array_param(k), ?:, encode_array_param(v)] + end + + defp escape_param([{pattern, replacement} | escapes], param) do + param = String.replace(param, pattern, replacement) + escape_param(escapes, param) + end + + defp escape_param([], param), do: param +end diff --git a/lib/ch/pool.ex b/lib/ch/pool.ex index 1f3f0753..86fd21ee 100644 --- a/lib/ch/pool.ex +++ b/lib/ch/pool.ex @@ -44,10 +44,10 @@ defmodule Ch.Pool do @spec query(NimblePool.pool(), statement, params, keyword) :: {:ok, query_result} | {:error, query_error} def query(pool, statement, params \\ %{}, options \\ []) do - request = encode_request("POST", statement, params, options) + request = Ch.HTTP.encode_request("POST", statement, params, options) {timeout, options} = Keyword.pop(options, :timeout, @query_timeout) - deadline = deadline_from_timeout(timeout) + deadline = Ch.HTTP.deadline_from_timeout(timeout) # TODO retry on closed result = @@ -58,17 +58,17 @@ defmodule Ch.Pool do # TODO what if caller dies? does nimble pool terminate the worker? probably # TODO retry transient closed/etc. errors? with {:ok, conn} <- ensure_connected(conn, pid, deadline), - {:ok, conn, response} <- request(conn, request, deadline) do + {:ok, conn, response} <- Ch.HTTP.request(conn, request, deadline) do {{:ok, response}, checkin(conn)} else {:error, reason} = error -> {error, {:remove, reason}} end end, - timeout_from_deadline(deadline) + timeout ) with {:ok, response} <- result do - decode_response(response, options) + Ch.HTTP.decode_response(response, options) end end @@ -131,7 +131,7 @@ defmodule Ch.Pool do {:remove, :idle_timeout} end - # TODO handle_info + # TODO handle_info? @impl NimblePool def terminate_worker(_reason, conn, config) do @@ -139,20 +139,8 @@ defmodule Ch.Pool do {:ok, config} end - defp deadline_from_timeout(:infinity = inf), do: inf - - defp deadline_from_timeout(timeout) do - System.monotonic_time(:millisecond) + timeout - end - - defp timeout_from_deadline(:infinity = inf), do: inf - - defp timeout_from_deadline(deadline) do - max(0, deadline - System.monotonic_time(:millisecond)) - end - defp ensure_connected({:idle, scheme, host, port, options}, owner, deadline) do - timeout = timeout_from_deadline(deadline) + timeout = Ch.HTTP.timeout_from_deadline(deadline) options = Keyword.put(options, :timeout, timeout) case Mint.HTTP1.connect(scheme, host, port, options) do @@ -173,56 +161,6 @@ defmodule Ch.Pool do defp ensure_connected({:connected, conn}, _owner, _deadline), do: {:ok, conn} - defp encode_request(method, statement, params, options) do - settings = Keyword.get(options, :settings, []) - - headers = - options - |> Keyword.get(:headers, []) - |> put_new_header("x-clickhouse-format", "RowBinaryWithNamesAndTypes") - - path = "/?" <> URI.encode_query(settings ++ encode_params(params)) - %{method: method, path: path, headers: headers, body: statement} - end - - defp request(conn, request, deadline) do - %{method: method, path: path, headers: headers, body: body} = request - - case Mint.HTTP1.request(conn, method, path, headers, body) do - {:ok, conn, _ref} -> - receive_response(conn, [], deadline) - - {:error, conn, reason} -> - _todo = Mint.HTTP1.close(conn) - {:error, reason} - end - end - - defp receive_response(conn, acc, deadline) do - timeout = timeout_from_deadline(deadline) - - case Mint.HTTP1.recv(conn, 0, timeout) do - {:ok, conn, fragments} -> - case handle_response_fragments(fragments, acc) do - {:ok, response} -> {:ok, conn, response} - {:more, acc} -> receive_response(conn, acc, deadline) - end - - {:error, conn, reason, _fragments} -> - _todo = Mint.HTTP1.close(conn) - {:error, reason} - end - end - - for tag <- [:data, :status, :headers] do - defp handle_response_fragments([{unquote(tag), _ref, data} | rest], acc) do - handle_response_fragments(rest, [data | acc]) - end - end - - defp handle_response_fragments([{:done, _ref}], acc), do: {:ok, :lists.reverse(acc)} - defp handle_response_fragments([], acc), do: {:more, acc} - defp checkin(conn) do if Mint.HTTP1.open?(conn) do {:ok, conn} @@ -230,140 +168,4 @@ defmodule Ch.Pool do {:remove, Mint.TransportError.exception(reason: :closed)} end end - - defp decode_response(response, _options) do - case response do - [200, headers | data] -> - result = - case get_header(headers, "x-clickhouse-format") do - "RowBinaryWithNamesAndTypes" -> - [names | rows] = - data - |> IO.iodata_to_binary() - |> Ch.RowBinary.decode_names_and_rows() - - %{columns: names, rows: rows} - - _other -> - %{data: data} - end - - {:ok, result} - - [_status, headers | data] -> - message = IO.iodata_to_binary(data) - - code = - if code = get_header(headers, "x-clickhouse-exception-code") do - String.to_integer(code) - end - - {:error, Ch.Error.exception(code: code, message: message)} - end - end - - defp put_new_header(headers, name, value) do - if List.keymember?(headers, name, 0) do - headers - else - [{name, value} | headers] - end - end - - defp get_header(headers, key) do - case List.keyfind(headers, key, 0) do - {_, value} -> value - nil = not_found -> not_found - end - end - - defp encode_params(params) when is_map(params) do - Enum.map(params, fn {k, v} -> {"param_#{k}", encode_param(v)} end) - end - - defp encode_param(n) when is_integer(n), do: Integer.to_string(n) - defp encode_param(f) when is_float(f), do: Float.to_string(f) - - defp encode_param(b) when is_binary(b) do - escape_param([{"\\", "\\\\"}, {"\t", "\\\t"}, {"\n", "\\\n"}], b) - end - - defp encode_param(b) when is_boolean(b), do: Atom.to_string(b) - defp encode_param(nil), do: "\\N" - defp encode_param(%Decimal{} = d), do: Decimal.to_string(d, :normal) - defp encode_param(%Date{} = date), do: Date.to_iso8601(date) - defp encode_param(%NaiveDateTime{} = naive), do: NaiveDateTime.to_iso8601(naive) - defp encode_param(%Time{} = time), do: Time.to_iso8601(time) - - defp encode_param(%DateTime{microsecond: microsecond} = dt) do - dt = DateTime.shift_zone!(dt, "Etc/UTC") - - case microsecond do - {val, precision} when val > 0 and precision > 0 -> - size = round(:math.pow(10, precision)) - unix = DateTime.to_unix(dt, size) - seconds = div(unix, size) - fractional = rem(unix, size) - - IO.iodata_to_binary([ - Integer.to_string(seconds), - ?., - String.pad_leading(Integer.to_string(fractional), precision, "0") - ]) - - _ -> - dt |> DateTime.to_unix(:second) |> Integer.to_string() - end - end - - defp encode_param(tuple) when is_tuple(tuple) do - IO.iodata_to_binary([?(, encode_array_params(Tuple.to_list(tuple)), ?)]) - end - - defp encode_param(a) when is_list(a) do - IO.iodata_to_binary([?[, encode_array_params(a), ?]]) - end - - defp encode_param(m) when is_map(m) do - IO.iodata_to_binary([?{, encode_map_params(Map.to_list(m)), ?}]) - end - - defp encode_array_params([last]), do: encode_array_param(last) - - defp encode_array_params([s | rest]) do - [encode_array_param(s), ?, | encode_array_params(rest)] - end - - defp encode_array_params([] = empty), do: empty - - defp encode_map_params([last]), do: encode_map_param(last) - - defp encode_map_params([kv | rest]) do - [encode_map_param(kv), ?, | encode_map_params(rest)] - end - - defp encode_map_params([] = empty), do: empty - - defp encode_array_param(s) when is_binary(s) do - [?', escape_param([{"'", "''"}, {"\\", "\\\\"}], s), ?'] - end - - defp encode_array_param(nil), do: "null" - - defp encode_array_param(%s{} = param) when s in [Date, NaiveDateTime] do - [?', encode_param(param), ?'] - end - - defp encode_array_param(v), do: encode_param(v) - - defp encode_map_param({k, v}) do - [encode_array_param(k), ?:, encode_array_param(v)] - end - - defp escape_param([{pattern, replacement} | escapes], param) do - param = String.replace(param, pattern, replacement) - escape_param(escapes, param) - end - - defp escape_param([], param), do: param end From 6d47e1f6ace09a6e850018aa52d440f82f9194e5 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 13 Apr 2026 21:08:48 +0300 Subject: [PATCH 05/13] continue --- lib/ch/pool.ex | 24 ++++++++++++++---------- mix.exs | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/lib/ch/pool.ex b/lib/ch/pool.ex index 86fd21ee..92e34b40 100644 --- a/lib/ch/pool.ex +++ b/lib/ch/pool.ex @@ -108,18 +108,22 @@ defmodule Ch.Pool do @impl NimblePool def init_worker(config) do - %{scheme: scheme, host: host, port: port, transport_options: options} = config - {:ok, {:idle, scheme, host, port, options}, config} + {:ok, :template, config} end @impl NimblePool - def handle_checkout(:request, _from, conn, config) do - {:ok, conn, conn, config} + def handle_checkout(:request, _from, :template = template, config) do + %{scheme: scheme, host: host, port: port, transport_options: options} = config + {:ok, {template, scheme, host, port, options}, template, config} + end + + def handle_checkout(:request, _from, %Mint.HTTP1{} = conn, config) do + {:ok, {:ok, conn}, conn, config} end @impl NimblePool - def handle_checkin({:ok, conn}, _from, _conn, config) do - {:ok, {:connected, conn}, config} + def handle_checkin({:ok, %Mint.HTTP1{} = conn}, _from, _conn, config) do + {:ok, conn, config} end def handle_checkin({:remove, reason}, _from, _conn, config) do @@ -128,18 +132,18 @@ defmodule Ch.Pool do @impl NimblePool def handle_ping(_conn, _config) do - {:remove, :idle_timeout} + {:remove, :worker_idle_timeout} end # TODO handle_info? @impl NimblePool def terminate_worker(_reason, conn, config) do - with {:connected, conn} <- conn, do: Mint.HTTP1.close(conn) + with %Mint.HTTP1{} <- conn, do: Mint.HTTP1.close(conn) {:ok, config} end - defp ensure_connected({:idle, scheme, host, port, options}, owner, deadline) do + defp ensure_connected({:template, scheme, host, port, options}, owner, deadline) do timeout = Ch.HTTP.timeout_from_deadline(deadline) options = Keyword.put(options, :timeout, timeout) @@ -159,7 +163,7 @@ defmodule Ch.Pool do end end - defp ensure_connected({:connected, conn}, _owner, _deadline), do: {:ok, conn} + defp ensure_connected({:ok, %Mint.HTTP1{}} = ok, _owner, _deadline), do: ok defp checkin(conn) do if Mint.HTTP1.open?(conn) do diff --git a/mix.exs b/mix.exs index 194d588b..e11f0e0c 100644 --- a/mix.exs +++ b/mix.exs @@ -64,7 +64,7 @@ defmodule Ch.MixProject do {:nimble_pool, "~> 1.1"}, {:nimble_options, "~> 1.1"}, {:telemetry, "~> 1.4"}, - {:telemetry_docs, "~> 0.1.0"}, + {:telemetry_docs, "~> 0.1.0", only: :dev}, {:decimal, "~> 2.0"}, {:ecto, "~> 3.13.0", optional: true}, {:benchee, "~> 1.0", only: :bench}, From 3fdaa37a9fa123b0d72db3668c3e495679c045a0 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 13 Apr 2026 23:46:25 +0300 Subject: [PATCH 06/13] continue --- lib/ch/pool.ex | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/ch/pool.ex b/lib/ch/pool.ex index 92e34b40..1e20ab23 100644 --- a/lib/ch/pool.ex +++ b/lib/ch/pool.ex @@ -55,7 +55,6 @@ defmodule Ch.Pool do pool, :request, fn {pid, _ref}, conn -> - # TODO what if caller dies? does nimble pool terminate the worker? probably # TODO retry transient closed/etc. errors? with {:ok, conn} <- ensure_connected(conn, pid, deadline), {:ok, conn, response} <- Ch.HTTP.request(conn, request, deadline) do From e5ab101892a73de22ad07cc29f9906ee3bcf8256 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Tue, 14 Apr 2026 05:50:54 +0300 Subject: [PATCH 07/13] continue --- .formatter.exs | 2 +- .gitignore | 3 + lib/ch/telemetry.ex | 9 ++- mix.exs | 44 ++++++++++++- pages/telemetry_events.exs | 128 +++++++++++++++++++++++++++++++++++++ 5 files changed, 182 insertions(+), 4 deletions(-) create mode 100644 pages/telemetry_events.exs diff --git a/.formatter.exs b/.formatter.exs index 24cc1ec1..53c59adb 100644 --- a/.formatter.exs +++ b/.formatter.exs @@ -1,4 +1,4 @@ # Used by "mix format" [ - inputs: ["{mix,.credo,.formatter}.exs", "{config,lib,test,bench}/**/*.{ex,exs}"] + inputs: ["{mix,.credo,.formatter}.exs", "{config,lib,test,bench,pages}/**/*.{ex,exs}"] ] diff --git a/.gitignore b/.gitignore index f4f7b2ed..75e0d7f9 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,6 @@ ch-*.tar # Dialyzer PLTs /plts/ + +# Generated docs. +pages/telemetry-events.md diff --git a/lib/ch/telemetry.ex b/lib/ch/telemetry.ex index db78f92f..f9224394 100644 --- a/lib/ch/telemetry.ex +++ b/lib/ch/telemetry.ex @@ -1,6 +1,13 @@ defmodule Ch.Telemetry do @moduledoc """ - TODO + Telemetry integration for event tracing, metrics, and logging. + + A complete list of emitted events is available in the [Telemetry Events](telemetry-events.md) reference. + + ### Default Logging Handler + + Xinesis provides a default Telemetry handler that logs connection and request events at appropriate log levels. + To enable this default logging, call `attach_default_handler/0`, to disable it, call `detach_default_handler/0`. """ @default_handler_id "ch-default-handler" diff --git a/mix.exs b/mix.exs index e11f0e0c..8f8247cf 100644 --- a/mix.exs +++ b/mix.exs @@ -12,6 +12,9 @@ defmodule Ch.MixProject do elixirc_paths: elixirc_paths(Mix.env()), description: "HTTP ClickHouse driver for Elixir", deps: deps(), + aliases: [ + docs: [&telemetry_docs/1, "docs"] + ], # Test coverage test_coverage: [ @@ -32,7 +35,7 @@ defmodule Ch.MixProject do main: "readme", source_url: @source_url, source_ref: "v#{@version}", - extras: ["README.md", "CHANGELOG.md"], + extras: ["README.md", "CHANGELOG.md", "pages/telemetry-events.md"], skip_undefined_reference_warnings_on: ["CHANGELOG.md"] ], @@ -69,11 +72,48 @@ defmodule Ch.MixProject do {:ecto, "~> 3.13.0", optional: true}, {:benchee, "~> 1.0", only: :bench}, {:dialyxir, "~> 1.0", only: [:dev, :test], runtime: false}, - {:ex_doc, ">= 0.0.0", only: :docs}, + {:ex_doc, ">= 0.0.0", only: :dev}, {:tz, "~> 0.28.1", only: :test}, {:nimble_lz4, "~> 1.1", only: [:dev, :test, :bench]}, {:stream_data, "~> 1.3", only: :test}, {:credo, "~> 1.7", only: [:dev, :test]} ] end + + defp telemetry_docs(_args) do + Mix.Task.run("loadpaths") + + {sections, _bindings} = Code.eval_file("pages/telemetry_events.exs") + sections_md = TelemetryDocs.sections_to_markdown(sections) + + summary_list = + sections + |> Enum.flat_map(&Keyword.fetch!(&1, :events)) + |> Enum.map_join("\n", fn {name, opts} -> + name = Atom.to_string(name) + + # Converts "[:ch, :query, :start]" to "ch-query-start" + anchor = + name + |> String.replace(["[", "]", ":"], "") + |> String.replace(", ", "-") + + "- [`#{name}`](##{anchor}) - #{Keyword.fetch!(opts, :doc)}" + end) + + preface = """ + # Telemetry Events + + Ch emits the following Telemetry events: + + #{summary_list} + + > #### Time Units {: .warning} + > + > All `:duration` and `:system_time` measurements are in the `:native` time unit. See `System.convert_time_unit/3` for how to convert it to "human" units. + + """ + + File.write!("pages/telemetry-events.md", preface <> sections_md) + end end diff --git a/pages/telemetry_events.exs b/pages/telemetry_events.exs new file mode 100644 index 00000000..8434e9a3 --- /dev/null +++ b/pages/telemetry_events.exs @@ -0,0 +1,128 @@ +system_time = [ + system_time: [type: "`integer()`", doc: "System time in native time units."] +] + +duration = [ + duration: [type: "`integer()`", doc: "Duration in native time units."] +] + +exception_meta = [ + kind: [type: "`atom()`", doc: "One of `:throw`, `:error`, or `:exit`."], + reason: [type: "`term()`", doc: "The exception reason."], + stacktrace: [type: "`Exception.stacktrace()`", doc: "The exception stacktrace."] +] + +query_meta = [ + pool: [type: "`NimblePool.pool()`", doc: "The pool name or pid."], + statement: [type: "`iodata()`", doc: "The query statement."] +] + +pool_meta = [ + scheme: [type: "`atom()`", doc: "The connection scheme (e.g. `:http`)."], + host: [type: "`String.t()`", doc: "The host name."], + port: [type: "`:inet.port_number()`", doc: "The port number."] +] + +[ + [ + title: "Query Events", + doc: "Events emitted during query execution.", + events: [ + "[:ch, :query, :start]": [ + doc: "Emitted when a query execution starts.", + measurements: system_time, + metadata: query_meta + ], + "[:ch, :query, :stop]": [ + doc: "Emitted when a query completes successfully.", + measurements: [ + encode_time: [ + type: "`integer()`", + doc: "Time spent encoding the request in native units." + ], + queue_time: [ + type: "`integer()`", + doc: "Time spent waiting for a connection in native units." + ], + query_time: [ + type: "`integer()`", + doc: "Time spent executing the request over the network in native units." + ], + decode_time: [ + type: "`integer()`", + doc: "Time spent decoding the response in native units." + ], + total_time: [ + type: "`integer()`", + doc: "Total time from start to stop in native units." + ], + idle_time: [ + type: "`integer()`", + doc: "Time the connection spent idle in the pool prior to this query." + ] + ], + metadata: + query_meta ++ + [result: [type: "`term()`", doc: "The query result."]] + ], + "[:ch, :query, :exception]": [ + doc: "Emitted when a query raises an exception.", + measurements: duration, + metadata: query_meta ++ exception_meta + ] + ] + ], + [ + title: "Pool Events", + doc: "Events emitted by the connection pool.", + events: [ + "[:ch, :pool, :connect, :start]": [ + doc: "Emitted when a TCP/TLS connection attempt starts.", + measurements: system_time, + metadata: pool_meta + ], + "[:ch, :pool, :connect, :stop]": [ + doc: "Emitted when a TCP/TLS connection attempt completes.", + measurements: duration, + metadata: + pool_meta ++ + [ + result: [ + type: "`{:ok, Mint.HTTP1.t()} | {:error, term()}`", + doc: "The result of the connection attempt." + ] + ] + ], + "[:ch, :pool, :connect, :exception]": [ + doc: "Emitted when a TCP/TLS connection attempt raises an exception.", + measurements: duration, + metadata: pool_meta ++ exception_meta + ], + "[:ch, :pool, :reused_connection]": [ + doc: "Emitted when an existing connection is successfully checked out of the pool.", + measurements: system_time, + metadata: pool_meta + ], + "[:ch, :pool, :disconnect]": [ + doc: "Emitted when a connection is closed and removed from the pool.", + measurements: system_time, + metadata: + pool_meta ++ + [ + reason: [type: "`term()`", doc: "The reason for disconnection."] + ] + ], + "[:ch, :pool, :connection_idle]": [ + doc: + "Emitted when a connection is checked out, tracking how long it sat idle in the pool.", + measurements: [ + idle_time: [ + type: "`integer()`", + doc: "Time the connection spent idle in the pool in native units." + ] + ], + metadata: pool_meta + ] + ] + ] +] From 3419bbbdbc3f40ec3b2ac61cde81edb48fbb5394 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Tue, 14 Apr 2026 06:23:04 +0300 Subject: [PATCH 08/13] continue --- lib/ch/http.ex | 5 +++-- test/support/help.ex | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/ch/http.ex b/lib/ch/http.ex index 8c2093ed..96015c8b 100644 --- a/lib/ch/http.ex +++ b/lib/ch/http.ex @@ -6,13 +6,14 @@ defmodule Ch.HTTP do def deadline_from_timeout(:infinity = inf), do: inf def deadline_from_timeout(timeout) do - System.monotonic_time(:millisecond) + timeout + System.monotonic_time() + System.convert_time_unit(timeout, :millisecond, :native) end def timeout_from_deadline(:infinity = inf), do: inf def timeout_from_deadline(deadline) do - max(0, deadline - System.monotonic_time(:millisecond)) + timeout_native = max(0, deadline - System.monotonic_time()) + System.convert_time_unit(timeout_native, :native, :millisecond) end def encode_request(method, statement, params, options) do diff --git a/test/support/help.ex b/test/support/help.ex index 21d72ce5..1c4613b8 100644 --- a/test/support/help.ex +++ b/test/support/help.ex @@ -1,14 +1,14 @@ defmodule Help do @moduledoc false - def start_pool!(_test_context) do + def start_supervised_pool!(_test_context) do ExUnit.Callbacks.start_supervised!({Ch.Pool, scheme: :http, host: "localhost", port: 8123}) end def setup_pool(%{pool: pool}) when is_pid(pool), do: :ok def setup_pool(test_context) do - {:ok, pool: start_pool!(test_context), session_id: session_id(test_context)} + {:ok, pool: start_supervised_pool!(test_context), session_id: session_id(test_context)} end def query!(test_context, statement, params \\ %{}, options \\ []) do From e696405481a2f24d1c00c7430463ca3a75a234b2 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Tue, 14 Apr 2026 18:30:01 +0300 Subject: [PATCH 09/13] continue --- lib/ch/pool.ex | 107 ++++++++++++++++++++++++++++++------------- test/support/help.ex | 6 +-- 2 files changed, 77 insertions(+), 36 deletions(-) diff --git a/lib/ch/pool.ex b/lib/ch/pool.ex index 1e20ab23..a1b558b5 100644 --- a/lib/ch/pool.ex +++ b/lib/ch/pool.ex @@ -8,25 +8,82 @@ defmodule Ch.Pool do @type statement :: iodata @type params :: %{String.t() => term} - @pool_size 10 - @worker_idle_timeout to_timeout(second: 5) @query_timeout to_timeout(second: 30) # TODO @type query_result :: term @type query_error :: Ch.Error.t() | Mint.Types.error() - # TODO nimble options, todo can pass settings + @start_options_schema [ + name: [ + type: :any, + doc: "Process name registration (e.g. `MyPool` or `{:via, Registry, :ch}`)." + ], + pool_size: [ + type: :pos_integer, + doc: "Maximum number of concurrent connections.", + default: 10 + ], + worker_idle_timeout: [ + type: :timeout, + doc: """ + Time a connection can stay idle before the pool closes it. + Should be lower than ClickHouse's `keep_alive_timeout`. + """, + default: to_timeout(second: 5) + ], + url: [ + type: :string, + doc: "The ClickHouse endpoint URL.", + default: "http://localhost:8123" + ], + connect_options: [ + type: :keyword_list, + default: [], + doc: "Options passed to `Mint.HTTP.connect/4` (e.g. `:timeout`, `:proxy`)." + ] + ] + + @typedoc """ + The options supported by `start_link/1`. + """ + @type start_option :: unquote(NimbleOptions.option_typespec(@start_options_schema)) + + @doc """ + Starts a new Ch pool process. + + Supported options: + #{NimbleOptions.docs(@start_options_schema)} + """ @spec start_link(keyword) :: GenServer.on_start() def start_link(options) do - {name, options} = Keyword.pop(options, :name) - {pool_size, options} = Keyword.pop(options, :pool_size, @pool_size) + options = NimbleOptions.validate!(options, @start_options_schema) + + name = Keyword.get(options, :name) + pool_size = Keyword.fetch!(options, :pool_size) + worker_idle_timeout = Keyword.fetch!(options, :worker_idle_timeout) + url = Keyword.fetch!(options, :url) + + connect_options = + options + |> Keyword.get(:connect_options, []) + |> Keyword.put(:mode, :passive) + + %URI{scheme: scheme, host: host, port: port} = URI.parse(url) + + scheme = + case scheme do + "http" -> :http + "https" -> :https + _other -> raise ArgumentError, "unexpected HTTP scheme: #{inspect(scheme)}" + end - {worker_idle_timeout, options} = - Keyword.pop(options, :worker_idle_timeout, @worker_idle_timeout) + initial_pool_state = %{ + template: {:template, scheme, host, port, connect_options} + } NimblePool.start_link( - worker: {__MODULE__, options}, + worker: {__MODULE__, initial_pool_state}, pool_size: pool_size, worker_idle_timeout: worker_idle_timeout, lazy: true, @@ -34,11 +91,16 @@ defmodule Ch.Pool do ) end + @doc """ + Returns a child spec to allow Ch pool to be started under a supervisor. + + ## Options + + The options are exactly the same as for `start_link/1`. + """ @spec child_spec(keyword) :: Supervisor.child_spec() def child_spec(options) do - options - |> Keyword.put(:worker, {__MODULE__, options}) - |> NimblePool.child_spec() + %{id: __MODULE__, start: {__MODULE__, :start_link, [options]}} end @spec query(NimblePool.pool(), statement, params, keyword) :: @@ -49,7 +111,7 @@ defmodule Ch.Pool do {timeout, options} = Keyword.pop(options, :timeout, @query_timeout) deadline = Ch.HTTP.deadline_from_timeout(timeout) - # TODO retry on closed + # TODO retry on closed? backoff? result = NimblePool.checkout!( pool, @@ -85,23 +147,7 @@ defmodule Ch.Pool do end @impl NimblePool - def init_pool(options) do - scheme = Keyword.fetch!(options, :scheme) - host = Keyword.fetch!(options, :host) - port = Keyword.fetch!(options, :port) - - transport_options = - options - |> Keyword.get(:transport_options, []) - |> Keyword.put(:mode, :passive) - - config = %{ - scheme: scheme, - host: host, - port: port, - transport_options: transport_options - } - + def init_pool(config) do {:ok, config} end @@ -112,8 +158,7 @@ defmodule Ch.Pool do @impl NimblePool def handle_checkout(:request, _from, :template = template, config) do - %{scheme: scheme, host: host, port: port, transport_options: options} = config - {:ok, {template, scheme, host, port, options}, template, config} + {:ok, config.template, template, config} end def handle_checkout(:request, _from, %Mint.HTTP1{} = conn, config) do diff --git a/test/support/help.ex b/test/support/help.ex index 1c4613b8..b3d76b99 100644 --- a/test/support/help.ex +++ b/test/support/help.ex @@ -1,14 +1,10 @@ defmodule Help do @moduledoc false - def start_supervised_pool!(_test_context) do - ExUnit.Callbacks.start_supervised!({Ch.Pool, scheme: :http, host: "localhost", port: 8123}) - end - def setup_pool(%{pool: pool}) when is_pid(pool), do: :ok def setup_pool(test_context) do - {:ok, pool: start_supervised_pool!(test_context), session_id: session_id(test_context)} + {:ok, pool: ExUnit.Callbacks.start_supervised!(Ch.Pool), session_id: session_id(test_context)} end def query!(test_context, statement, params \\ %{}, options \\ []) do From a53d6a1ece8c67228c91a6e63ebe6c468d6c74bf Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Sat, 18 Apr 2026 17:49:06 +0300 Subject: [PATCH 10/13] improve error --- lib/ch/error.ex | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/ch/error.ex b/lib/ch/error.ex index 494a0f4c..a015f5d3 100644 --- a/lib/ch/error.ex +++ b/lib/ch/error.ex @@ -3,12 +3,15 @@ defmodule Ch.Error do defexception [:code, :message] @typedoc """ - The Error struct. + The Error struct. See [ErrorCodes.cpp](https://github.com/ClickHouse/ClickHouse/blob/5ce532e6f930c6f7fbdfa98b0327cc007df894b7/src/Common/ErrorCodes.cpp#) for possible errors. ## Fields * `:code` - The ClickHouse numeric error code * `:message` - The error message returned by the server """ - @type t :: %__MODULE__{code: pos_integer | nil, message: String.t()} + @type t :: %__MODULE__{ + code: non_neg_integer | nil, + message: String.t() + } end From cc9b46656ab4315ecce93658e076d0dcb215ce3f Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Sat, 18 Apr 2026 20:43:42 +0300 Subject: [PATCH 11/13] vibes --- .github/workflows/bench.yml | 63 ++++ README.md | 36 ++- bench/ci.exs | 29 ++ lib/ch/http.ex | 467 ++++++++++++++++++++++++---- lib/ch/row_binary.ex | 1 + mix.exs | 13 +- pages/compression.md | 119 +++++++ pages/defaults.md | 0 pages/inserts.md | 100 ++++++ pages/json.md | 74 +++++ pages/multihost.md | 0 pages/multipart.md | 0 pages/streaming.md | 119 +++++++ test/ch/guides/compression_test.exs | 110 +++++++ test/ch/guides/inserts_test.exs | 60 ++++ test/ch/guides/json_test.exs | 62 ++++ test/ch/guides/streaming_test.exs | 104 +++++++ test/ch/http_test.exs | 0 18 files changed, 1280 insertions(+), 77 deletions(-) create mode 100644 .github/workflows/bench.yml create mode 100644 bench/ci.exs create mode 100644 pages/compression.md create mode 100644 pages/defaults.md create mode 100644 pages/inserts.md create mode 100644 pages/json.md create mode 100644 pages/multihost.md create mode 100644 pages/multipart.md create mode 100644 pages/streaming.md create mode 100644 test/ch/guides/compression_test.exs create mode 100644 test/ch/guides/inserts_test.exs create mode 100644 test/ch/guides/json_test.exs create mode 100644 test/ch/guides/streaming_test.exs create mode 100644 test/ch/http_test.exs diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml new file mode 100644 index 00000000..a70e54f1 --- /dev/null +++ b/.github/workflows/bench.yml @@ -0,0 +1,63 @@ +name: bench + +on: + push: + branches: [master] + schedule: + - cron: "42 10 * * *" + workflow_dispatch: + +# Write access is needed to push benchmark data to gh-pages. +# This workflow intentionally does NOT run on pull_request to avoid +# granting write permissions to untrusted forks. +permissions: + contents: write + +jobs: + bench: + name: Benchmarks + runs-on: ubuntu-latest + + env: + MIX_ENV: bench + + steps: + - uses: actions/checkout@v4 + + - id: beam + uses: erlef/setup-beam@v1 + with: + elixir-version: "1.19" + otp-version: "28" + + - name: Restore deps cache + uses: actions/cache@v4 + with: + path: | + deps + _build + key: bench-${{ steps.beam.outputs.elixir-version }}-${{ hashFiles('**/mix.lock') }} + restore-keys: | + bench-${{ steps.beam.outputs.elixir-version }}- + + - run: mix deps.get + + - run: mix benchee_github.clean + + - run: mix run bench/ci.exs + + - name: Store benchmark results + uses: benchmark-action/github-action-benchmark@v1 + with: + name: Ch RowBinary + tool: customSmallerIsBetter + output-file-path: bench_output.json + github-token: ${{ secrets.GITHUB_TOKEN }} + auto-push: true + gh-pages-branch: gh-pages + benchmark-data-dir-path: benchmarks + # Alert on >20% regression compared to previous run on master + alert-threshold: "120%" + comment-on-alert: true + fail-on-alert: false + summary-always: true diff --git a/README.md b/README.md index c0b33c89..dda10578 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,17 @@ [![Documentation badge](https://img.shields.io/badge/Documentation-ff69b4)](https://hexdocs.pm/ch) [![Hex.pm badge](https://img.shields.io/badge/Package%20on%20hex.pm-informational)](https://hex.pm/packages/ch) +[![Benchmarks badge](https://img.shields.io/badge/Benchmarks-orange)](https://plausible.github.io/ch/benchmarks/) Minimal HTTP [ClickHouse](https://clickhouse.com) client for Elixir. -Used in [Ecto ClickHouse adapter.](https://github.com/plausible/ecto_ch) +Three layers: + +- **`Ch.HTTP`** — stateless helpers for `Mint.HTTP1`: encode requests, receive and decode responses (single-shot or streaming), deadline propagation +- **`Ch.Pool`** — `NimblePool` of `Mint.HTTP1` connections tuned for ClickHouse (short keepalive, lazy connect) +- **`Ch.Buffer`** — data structure for accumulating rows as `RowBinaryWithNamesAndTypes` for `INSERT` + +Used in [Ecto ClickHouse adapter](https://github.com/plausible/ecto_ch). ## Installation @@ -19,8 +26,31 @@ end ## Usage -See guides and tests for examples. +```elixir +deadline = Ch.HTTP.to_deadline(to_timeout(second: 15)) + +{:ok, conn} = + Mint.HTTP1.connect(:http, "localhost", 8123, + mode: :passive, + timeout: Ch.HTTP.to_timeout(deadline) + ) + +try do + {path, headers, body} = Ch.HTTP.encode("SELECT 1") + + with {:ok, _ref, conn} <- Mint.HTTP1.request(conn, "POST", path, headers, body), + {:ok, {200, _headers, body}, conn} <- Ch.HTTP.recv_all(conn, deadline), + {:ok, _names, rows} <- Ch.HTTP.decode(200, _headers, body) do + rows + end +after + Mint.HTTP1.close(conn) +end +``` + +See [guides](./guides) and [tests](./test) for more examples. ## [Benchmarks](./bench) -See nightly [CI runs](https://github.com/plausible/ch/actions/workflows/bench.yml) for latest results. +Results tracked over time at [plausible.github.io/ch/benchmarks](https://plausible.github.io/ch/benchmarks/). +See [bench/](./bench) for local benchmark scripts. diff --git a/bench/ci.exs b/bench/ci.exs new file mode 100644 index 00000000..818b78bd --- /dev/null +++ b/bench/ci.exs @@ -0,0 +1,29 @@ +# CI benchmark suite — no external services required. +# Tracks RowBinary encode/decode performance over time via BencheeGithub + github-action-benchmark. +# Run with: MIX_ENV=bench mix run bench/ci.exs + +alias Ch.RowBinary + +types = ["UInt64", "String", "Array(UInt8)", "DateTime"] +names = ["id", "name", "tags", "created_at"] + +rows = + Enum.map(1..1_000, fn i -> + [i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], DateTime.utc_now()] + end) + +encoded = IO.iodata_to_binary(RowBinary.encode_rows(rows, types)) +encoded_with_header = + IO.iodata_to_binary([RowBinary.encode_names_and_types(names, types) | encoded]) + +Benchee.run( + %{ + "encode_rows/2 — 1_000 rows" => fn -> RowBinary.encode_rows(rows, types) end, + "decode_rows/2 — 1_000 rows" => fn -> RowBinary.decode_rows(encoded, types) end, + "decode_names_and_rows/1 — 1_000 rows" => fn -> + RowBinary.decode_names_and_rows(encoded_with_header) + end + }, + formatters: [Benchee.Formatters.Console, {BencheeGithub, output_path: "bench_output.json"}] +) + diff --git a/lib/ch/http.ex b/lib/ch/http.ex index 96015c8b..614e6d66 100644 --- a/lib/ch/http.ex +++ b/lib/ch/http.ex @@ -1,121 +1,452 @@ defmodule Ch.HTTP do @moduledoc """ - Helpers for `Mint.HTTP1` for working with ClickHouse. + Stateless helpers for `Mint.HTTP1` with ClickHouse-specific encoding and decoding. + + Provides three layers of functionality: + + 1. **Deadline / timeout helpers** — convert between relative millisecond timeouts + and absolute monotonic deadlines, so a single deadline propagates correctly + across multiple network calls. + + 2. **Request encoding** — build a `{path, headers, body}` triple ready for + `Mint.HTTP1.request/5`. Parameter binding is handled transparently. + + 3. **Response decoding** — single-shot (`decode/3`) or streaming + (`decode_start/1` + `decode_continue/2`) decoding of ClickHouse HTTP responses. + + The caller retains full control of the connection lifecycle and the HTTP method. + Body compression is the caller's responsibility: compress `body` manually and pass + `{"content-encoding", "gzip"}` in `opts[:headers]`. Responses with + `content-encoding: gzip` are decompressed automatically by `decode/3`. + + ## Single-shot usage + + deadline = Ch.HTTP.to_deadline(to_timeout(second: 15)) + + {:ok, conn} = + Mint.HTTP1.connect(:http, "localhost", 8123, + mode: :passive, + timeout: Ch.HTTP.to_timeout(deadline) + ) + + try do + {path, headers, body} = Ch.HTTP.encode("CREATE TABLE demo(a Int64) ENGINE Null") + + with {:ok, _ref, conn} <- Mint.HTTP1.request(conn, "POST", path, headers, body), + {:ok, {status, headers, body}, conn} <- Ch.HTTP.recv_all(conn, deadline), + :ok <- Ch.HTTP.decode(status, headers, body) do + :ok + end + after + Mint.HTTP1.close(conn) + end + + ## Streaming + + For large result sets, use `decode_start/1` + `decode_continue/2` to process rows + as Mint data chunks arrive, without buffering the entire response body. The caller + handles `:status` and `:headers` responses, then passes only data to the decoder: + + # active-mode receive loop (passive mode: same but with Mint.HTTP1.recv/3) + receive do + message -> + {:ok, conn, responses} = Mint.HTTP1.stream(conn, message) + + Enum.reduce(responses, state, fn + {:status, _ref, _status}, state -> + state + + {:headers, _ref, headers}, _state -> + Ch.HTTP.decode_start(headers) + + {:data, _ref, chunk}, state -> + case Ch.HTTP.decode_continue(chunk, state) do + {:rows, rows, names, state} -> process_rows(rows, names); state + {:more, state} -> state + end + + {:done, _ref}, state -> + {:ok, names, rows} = Ch.HTTP.decode_continue(:end_of_input, state) + done(names, rows) + end) + end """ - def deadline_from_timeout(:infinity = inf), do: inf + import Kernel, except: [to_timeout: 1] + + @typedoc """ + Represents a deadline for an operation. - def deadline_from_timeout(timeout) do - System.monotonic_time() + System.convert_time_unit(timeout, :millisecond, :native) + Either `:infinity` or `{:deadline, timestamp}` where `timestamp` is an absolute + time in milliseconds from `System.monotonic_time(:millisecond)`. + """ + @type deadline :: {:deadline, integer} | :infinity + + @typedoc """ + Opaque streaming decoder state. + + Returned by `decode_start/1` and updated by each call to `decode_continue/2`. + """ + @opaque decode_state :: + {:awaiting_rb_header, buf :: binary} + | {:decoding_rows, names :: [String.t()], types :: [term], row_state :: term, + remainder :: binary} + | {:raw, acc :: iodata} + + @doc """ + Converts a relative timeout (milliseconds) or existing `t:deadline/0` to a `t:deadline/0`. + + Passing an already-converted `{:deadline, _}` tuple is a no-op, making this safe to + call at multiple layers of the call stack without double-adding the offset. + """ + @spec to_deadline(timeout | deadline) :: deadline + def to_deadline(:infinity), do: :infinity + def to_deadline({:deadline, _timestamp} = deadline), do: deadline + + def to_deadline(timeout) when is_integer(timeout) do + {:deadline, System.monotonic_time(:millisecond) + timeout} end - def timeout_from_deadline(:infinity = inf), do: inf + @doc """ + Returns the remaining milliseconds until `deadline`, suitable for passing to Mint. - def timeout_from_deadline(deadline) do - timeout_native = max(0, deadline - System.monotonic_time()) - System.convert_time_unit(timeout_native, :native, :millisecond) + Always returns `>= 0`; clamps to `0` if the deadline has already passed (Mint does + not accept negative timeouts). + """ + @spec to_timeout(timeout | deadline) :: timeout + def to_timeout(:infinity), do: :infinity + def to_timeout(timeout) when is_integer(timeout), do: timeout + + def to_timeout({:deadline, timestamp}) do + max(0, timestamp - System.monotonic_time(:millisecond)) end - def encode_request(method, statement, params, options) do - settings = Keyword.get(options, :settings, []) + @doc """ + Encodes a ClickHouse HTTP request with no parameters. - headers = - options - |> Keyword.get(:headers, []) - |> put_new_header("x-clickhouse-format", "RowBinaryWithNamesAndTypes") + Equivalent to `encode(statement, %{}, [])`. + """ + @spec encode(statement :: iodata) :: + {path :: String.t(), headers :: Mint.Types.headers(), body :: iodata} + def encode(statement) do + encode(statement, %{}, []) + end + + @doc """ + Encodes a ClickHouse HTTP request with parameters and options. + + Returns `{path, headers, body}` ready for `Mint.HTTP1.request/5`. The HTTP method + (`"POST"`) and connection lifecycle remain the caller's responsibility. + + ## Parameters + + Parameters are encoded using ClickHouse's + [escaped HTTP format](https://clickhouse.com/docs/en/interfaces/http#tabs-in-url-parameters), + which follows the same escaping rules as ClickHouse's TSV format: tab (`\\t`), + newline (`\\n`), and backslash (`\\`) are backslash-escaped. + + * **Named** — `%{"city" => "Prague"}` → `?param_city=Prague` + * **Positional** — `["Prague", 42]` → `?param_$0=Prague¶m_$1=42` + + ## Options + + * `:headers` — additional Mint-style headers forwarded verbatim, e.g. + `[{"x-clickhouse-user", "alice"}, {"x-clickhouse-key", "secret"}]`. + + ## Body + + The returned `body` is the `statement` iodata unchanged. No RowBinary encoding or + compression is applied — those are the caller's responsibility: + + compressed = :zlib.compress(IO.iodata_to_binary(statement)) + {path, headers, body} = + Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "gzip"}]) + """ + @spec encode( + statement :: iodata, + params :: %{String.t() => term} | [term], + opts :: keyword + ) :: + {path :: String.t(), headers :: Mint.Types.headers(), body :: iodata} + def encode(statement, params, opts) do + query_params = encode_params(params) + + path = + case query_params do + [] -> "/" + _ -> "/?" <> URI.encode_query(query_params) + end + + headers = Keyword.get(opts, :headers, []) + {path, headers, statement} + end + + @doc """ + Receives a complete HTTP response from a passive `Mint.HTTP1` connection. - path = "/?" <> URI.encode_query(settings ++ encode_params(params)) - %{method: method, path: path, headers: headers, body: statement} + Accumulates all Mint response messages until `{:done, ref}` and returns the + raw `{status, headers, body}` triple, which can be passed directly to `decode/3`. + + Accepts a plain timeout in milliseconds or a `t:deadline/0`. When given a deadline, + the remaining time is recomputed before each `Mint.HTTP1.recv/3` call. + """ + @spec recv_all(Mint.HTTP1.t(), timeout | deadline) :: + {:ok, {status :: non_neg_integer, Mint.Types.headers(), body :: binary}, Mint.HTTP1.t()} + | {:error, Mint.HTTP1.t(), Mint.Types.error()} + def recv_all(conn, timeout_or_deadline) do + deadline = to_deadline(timeout_or_deadline) + do_recv_all(conn, _status = nil, _headers = [], _data = [], deadline) end - def request(conn, request, deadline) do - %{method: method, path: path, headers: headers, body: body} = request + defp do_recv_all(conn, status, headers, data, deadline) do + case Mint.HTTP1.recv(conn, 0, to_timeout(deadline)) do + {:ok, conn, responses} -> + case handle_responses(responses, status, headers, data) do + {:ok, status, headers, body} -> + {:ok, {status, headers, body}, conn} - case Mint.HTTP1.request(conn, method, path, headers, body) do - {:ok, conn, _ref} -> - receive_response(conn, [], deadline) + {:more, status, headers, data} -> + do_recv_all(conn, status, headers, data, deadline) - {:error, conn, reason} -> - _todo = Mint.HTTP1.close(conn) - {:error, reason} + {:error, reason} -> + {:error, conn, reason} + end + + {:error, conn, reason, _responses} -> + {:error, conn, reason} end end - defp receive_response(conn, acc, deadline) do - timeout = timeout_from_deadline(deadline) + @dialyzer {:no_improper_lists, handle_responses: 4} + defp handle_responses([{:status, _ref, status} | rest], _status, headers, data) do + handle_responses(rest, status, headers, data) + end - case Mint.HTTP1.recv(conn, 0, timeout) do - {:ok, conn, fragments} -> - case handle_response_fragments(fragments, acc) do - {:ok, response} -> {:ok, conn, response} - {:more, acc} -> receive_response(conn, acc, deadline) - end + defp handle_responses([{:headers, _ref, new_headers} | rest], status, prev_headers, data) do + handle_responses(rest, status, prev_headers ++ new_headers, data) + end + + defp handle_responses([{:data, _ref, new_data} | rest], status, headers, prev_data) do + handle_responses(rest, status, headers, [prev_data | new_data]) + end + + defp handle_responses([{:done, _ref} | _rest], status, headers, data) do + {:ok, status, headers, IO.iodata_to_binary(data)} + end + + defp handle_responses([{:error, _ref, reason} | _rest], _status, _headers, _data) do + {:error, reason} + end + + defp handle_responses([], status, headers, data) do + {:more, status, headers, data} + end + + @doc """ + Decodes a complete ClickHouse HTTP response. + + Accepts the `{status, headers, body}` triple returned by `recv_all/2`. + Handles errors, decompression, and `RowBinaryWithNamesAndTypes` decoding. - {:error, conn, reason, _fragments} -> - _todo = Mint.HTTP1.close(conn) - {:error, reason} + * Non-200 status → `{:error, Ch.Error.t()}` with code and message from ClickHouse. + * `content-encoding: gzip` → automatically decompressed before parsing. + * `x-clickhouse-format: RowBinaryWithNamesAndTypes` → `{:ok, names, rows}`. + * Empty body (DDL, INSERT without result) → `:ok`. + * Other or absent format → `{:ok, [], [body]}` with the raw binary. + + ## Example + + {:ok, {status, headers, body}, conn} = Ch.HTTP.recv_all(conn, deadline) + case Ch.HTTP.decode(status, headers, body) do + {:ok, names, rows} -> ... + :ok -> ... + {:error, error} -> ... + end + """ + @spec decode( + status :: non_neg_integer, + headers :: Mint.Types.headers(), + body :: binary + ) :: + :ok + | {:ok, names :: [String.t()], rows :: [[term]]} + | {:error, Ch.Error.t()} + def decode(status, headers, body) + + def decode(status, headers, body) when status != 200 do + code = + case get_header(headers, "x-clickhouse-exception-code") do + nil -> nil + code -> String.to_integer(code) + end + + {:error, Ch.Error.exception(code: code, message: body)} + end + + def decode(200, headers, body) do + body = maybe_decompress(body, get_header(headers, "content-encoding")) + + case get_header(headers, "x-clickhouse-format") do + "RowBinaryWithNamesAndTypes" -> + [names | rows] = Ch.RowBinary.decode_names_and_rows(body) + {:ok, names, rows} + + _other -> + case body do + "" -> :ok + _ -> {:ok, [], [body]} + end end end - for tag <- [:data, :status, :headers] do - defp handle_response_fragments([{unquote(tag), _ref, data} | rest], acc) do - handle_response_fragments(rest, [data | acc]) + @doc """ + Initialises a streaming ClickHouse response decoder from response headers. + + Inspects `x-clickhouse-format` to determine how to decode incoming data chunks. + The returned `t:decode_state/0` is passed to `decode_continue/2` along with each + binary chunk extracted from `{:data, ref, chunk}` Mint responses. + + The caller is responsible for handling `{:status, _, _}` and `{:headers, _, _}` + responses before calling `decode_start/1`, and for passing `{:done, _}` as + `:end_of_input` to `decode_continue/2`. + + ## Example + + {:headers, _ref, headers} = ... # from Mint.HTTP1.stream/2 + state = Ch.HTTP.decode_start(headers) + + {:data, _ref, chunk} = ... + case Ch.HTTP.decode_continue(chunk, state) do + {:rows, rows, names, state} -> ... + {:more, state} -> ... + end + + {:done, _ref} = ... + {:ok, names, []} = Ch.HTTP.decode_continue(:end_of_input, state) + """ + @spec decode_start(headers :: Mint.Types.headers()) :: decode_state + def decode_start(headers) do + case get_header(headers, "x-clickhouse-format") do + "RowBinaryWithNamesAndTypes" -> {:awaiting_rb_header, <<>>} + _other -> {:raw, []} end end - defp handle_response_fragments([{:done, _ref}], acc), do: {:ok, :lists.reverse(acc)} - defp handle_response_fragments([], acc), do: {:more, acc} + @doc """ + Feeds a binary chunk into a streaming decoder, advancing its state. - def decode_response(response, _options) do - case response do - [200, headers | data] -> - result = - case get_header(headers, "x-clickhouse-format") do - "RowBinaryWithNamesAndTypes" -> - [names | rows] = - data - |> IO.iodata_to_binary() - |> Ch.RowBinary.decode_names_and_rows() + Pass binary chunks extracted from `{:data, ref, chunk}` Mint response tuples. + When the response is complete (`:done` received from Mint), pass `:end_of_input` + to finalise and retrieve any remaining output. - %{columns: names, rows: rows} + ## Return values - _other -> - %{data: data} - end + * `{:rows, rows, names, state}` — one or more complete rows decoded. `names` is + the list of column names from the `RowBinaryWithNamesAndTypes` header. + Continue calling `decode_continue/2` with the next chunk. + * `{:more, state}` — chunk consumed, no complete rows yet (e.g. still accumulating + the RowBinary header). Continue with the next chunk. + * `{:ok, names, rows}` — stream complete. If rows were emitted incrementally via + `{:rows, ...}`, the final `rows` list here will be empty. + * `{:error, Ch.Error.t()}` — decoding failed. + """ + @spec decode_continue(chunk :: binary | :end_of_input, decode_state) :: + {:rows, rows :: [[term]], names :: [String.t()], decode_state} + | {:more, decode_state} + | {:ok, names :: [String.t()], rows :: [[term]]} + | {:error, Ch.Error.t()} + def decode_continue(:end_of_input, state) do + flush_state(state) + end - {:ok, result} + def decode_continue(chunk, {:awaiting_rb_header, buf}) when is_binary(chunk) do + buf = buf <> chunk - [_status, headers | data] -> - message = IO.iodata_to_binary(data) + case Ch.RowBinary.decode_header(buf) do + :more -> + {:more, {:awaiting_rb_header, buf}} - code = - if code = get_header(headers, "x-clickhouse-exception-code") do - String.to_integer(code) - end + {:ok, names, types, rest} -> + {rows, remainder, row_state} = Ch.RowBinary.decode_rows_continue(rest, types, nil) + new_state = {:decoding_rows, names, types, row_state, remainder} - {:error, Ch.Error.exception(code: code, message: message)} + case rows do + [] -> {:more, new_state} + _ -> {:rows, rows, names, new_state} + end end end - defp put_new_header(headers, name, value) do - if List.keymember?(headers, name, 0) do - headers - else - [{name, value} | headers] + def decode_continue(chunk, {:decoding_rows, names, types, row_state, remainder}) + when is_binary(chunk) do + {rows, new_remainder, new_row_state} = + Ch.RowBinary.decode_rows_continue(remainder <> chunk, types, row_state) + + new_state = {:decoding_rows, names, types, new_row_state, new_remainder} + + case rows do + [] -> {:more, new_state} + _ -> {:rows, rows, names, new_state} + end + end + + def decode_continue(chunk, {:raw, acc}) when is_binary(chunk) do + {:more, {:raw, [acc | chunk]}} + end + + defp flush_state({:awaiting_rb_header, <<>>}) do + {:ok, [], []} + end + + defp flush_state({:awaiting_rb_header, _buf}) do + {:error, + Ch.Error.exception(code: nil, message: "incomplete RowBinaryWithNamesAndTypes header")} + end + + defp flush_state({:decoding_rows, names, _types, _row_state, _remainder}) do + # All rows already emitted via {:rows, ...} during streaming + {:ok, names, []} + end + + defp flush_state({:raw, acc}) do + case IO.iodata_to_binary(acc) do + "" -> {:ok, [], []} + body -> {:ok, [], [body]} end end + ## Private helpers + + defp maybe_decompress(body, "gzip"), do: :zlib.gunzip(body) + defp maybe_decompress(body, "zstd"), do: :zstd.decompress(body) + defp maybe_decompress(body, _encoding), do: body + + defp get_header(headers, key) do case List.keyfind(headers, key, 0) do {_, value} -> value - nil = not_found -> not_found + nil -> nil end end + # Encodes query parameters for ClickHouse HTTP URL binding. + # + # ClickHouse uses an "escaped" parameter format identical to its TSV format escaping + # (see https://clickhouse.com/docs/en/interfaces/http#tabs-in-url-parameters): + # tab (\t), newline (\n), and backslash (\) are backslash-escaped. + # + # Named params: %{"city" => "Prague"} → [{"param_city", "Prague"}] + # Positional params: ["Prague", 42] → [{"param_$0", "Prague"}, {"param_$1", "42"}] defp encode_params(params) when is_map(params) do Enum.map(params, fn {k, v} -> {"param_#{k}", encode_param(v)} end) end + defp encode_params(params) when is_list(params) do + params + |> Enum.with_index() + |> Enum.map(fn {v, idx} -> {"param_$#{idx}", encode_param(v)} end) + end + defp encode_param(n) when is_integer(n), do: Integer.to_string(n) defp encode_param(f) when is_float(f), do: Float.to_string(f) diff --git a/lib/ch/row_binary.ex b/lib/ch/row_binary.ex index 31a19d0b..7f001910 100644 --- a/lib/ch/row_binary.ex +++ b/lib/ch/row_binary.ex @@ -835,6 +835,7 @@ defmodule Ch.RowBinary do to_be_continued(rows, bin, [:string | types_rest], row) end + # TODO remove @doc false def to_utf8(str) do utf8 = to_utf8(str, 0, 0, str, []) diff --git a/mix.exs b/mix.exs index 8f8247cf..6068c7eb 100644 --- a/mix.exs +++ b/mix.exs @@ -64,19 +64,20 @@ defmodule Ch.MixProject do defp deps do [ {:mint, "~> 1.0"}, - {:nimble_pool, "~> 1.1"}, - {:nimble_options, "~> 1.1"}, - {:telemetry, "~> 1.4"}, + {:nimble_pool, "~> 1.0"}, + {:nimble_options, "~> 1.0"}, + {:telemetry, "~> 1.0"}, {:telemetry_docs, "~> 0.1.0", only: :dev}, {:decimal, "~> 2.0"}, {:ecto, "~> 3.13.0", optional: true}, {:benchee, "~> 1.0", only: :bench}, + {:benchee_github, "~> 0.1", only: :bench}, {:dialyxir, "~> 1.0", only: [:dev, :test], runtime: false}, {:ex_doc, ">= 0.0.0", only: :dev}, {:tz, "~> 0.28.1", only: :test}, - {:nimble_lz4, "~> 1.1", only: [:dev, :test, :bench]}, - {:stream_data, "~> 1.3", only: :test}, - {:credo, "~> 1.7", only: [:dev, :test]} + {:nimble_lz4, "~> 1.0", only: [:dev, :test, :bench]}, + {:stream_data, "~> 1.0", only: :test}, + {:credo, "~> 1.0", only: [:dev, :test]} ] end diff --git a/pages/compression.md b/pages/compression.md new file mode 100644 index 00000000..f72f7d53 --- /dev/null +++ b/pages/compression.md @@ -0,0 +1,119 @@ +# Compression + +ClickHouse HTTP accepts compressed request bodies via the `content-encoding` header. +Compress the **entire request body** — including the SQL statement prefix for INSERTs — +before sending. `Ch.HTTP.decode/3` automatically decompresses `gzip` and `zstd` responses. + +**Further reading:** +- [Compression in ClickHouse](https://clickhouse.com/docs/data-compression/compression-in-clickhouse) +- [Compression modes](https://clickhouse.com/docs/data-compression/compression-modes) +- [Optimizing with schemas and codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) +- [Input format matchup: which is fastest](https://clickhouse.com/blog/clickhouse-input-format-matchup-which-is-fastest-most-efficient) — [FastFormats benchmark](https://fastformats.clickhouse.com) +- [Supercharging large data loads](https://clickhouse.com/blog/supercharge-your-clickhouse-data-loads-part2) +- [What really matters for performance](https://clickhouse.com/blog/what-really-matters-for-performance-lessons-from-a-year-of-benchmarks) + +## zstd (preferred, stdlib) + +`:zstd` is part of OTP 28 stdlib. ZSTD is also ClickHouse Cloud's default column +compression codec. For HTTP transport, ZSTD achieves 30–50% smaller payloads than LZ4 at +the cost of more CPU on the client during compression. +**Use ZSTD when bandwidth costs money** (cross-region, CDN egress) or when your client +has idle CPU — the server decompresses quickly enough that the transfer saving usually wins. + +```elixir +rows = Ch.RowBinary.encode_rows([[1, "pageview", DateTime.utc_now()]], types) +body = IO.iodata_to_binary([ + "INSERT INTO events FORMAT RowBinaryWithNamesAndTypes\n", + Ch.RowBinary.encode_names_and_types(names, types), + rows +]) + +compressed = :zstd.compress(body) + +{path, headers, body} = + Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "zstd"}]) + +{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", path, headers, body) +``` + +> The SQL statement and RowBinary payload must be compressed together as one blob. +> Only compressing the data rows and leaving the statement uncompressed does not work. + +## gzip (stdlib, fallback) + +`:zlib.gzip/1` is available on all OTP versions. Lower compression ratio and slower than +ZSTD, but zero extra dependencies. Good choice if OTP < 28 or you need maximum compatibility. + +```elixir +compressed = :zlib.gzip(body) + +{path, headers, body} = + Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "gzip"}]) +``` + +`Ch.HTTP.decode/3` decompresses `gzip` responses automatically. + +## lz4 (nimble_lz4) + +LZ4 compresses and decompresses faster than both gzip and zstd, with a moderate ratio. +Per the [FastFormats benchmark](https://fastformats.clickhouse.com), LZ4 is a +"no-brainer" for same-region deployments: it cuts wire size roughly in half with +negligible CPU overhead on both client and server. + +```elixir +{:ok, compressed} = NimbleLz4.compress(body) + +{path, headers, body} = + Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "lz4"}]) +``` + +Add to deps: + +```elixir +{:nimble_lz4, "~> 1.1"} +``` + +## Response decompression + +`Ch.HTTP.decode/3` decompresses responses with `content-encoding: gzip` or +`content-encoding: zstd` automatically. For other encodings (e.g. `lz4`), +decompress the body before calling `decode/3`, or simply do not request compressed +responses (the default — ClickHouse sends uncompressed unless you add +`Accept-Encoding` to the request). + +## Which to use? + +| | gzip | zstd | lz4 | +|---|---|---|---| +| Dep | none (stdlib) | none (OTP ≥ 28) | `nimble_lz4` | +| Ratio | good | best | moderate | +| Client CPU | medium | high | low | +| Server CPU | medium | low | lowest | +| Best for | compatibility | bandwidth-sensitive | same-region throughput | + +ClickHouse's own benchmark ([FastFormats](https://fastformats.clickhouse.com)) shows +RowBinaryWithNamesAndTypes+LZ4 reduces payload to ~60% of uncompressed size with +minimal overhead. ZSTD takes it to ~30% of original. In same-region tests, those +extra CPU cycles slightly offset the bandwidth saving; in cross-region or metered +bandwidth scenarios, ZSTD wins overall. + +## Column-level compression (on-disk, not HTTP) + +The compression you choose for HTTP transport is separate from ClickHouse's on-disk +column codec. ClickHouse Cloud defaults to `ZSTD(1)` for column storage. +Tune per-column codecs in your `CREATE TABLE` DDL: + +```sql +CREATE TABLE events ( + id UInt64, + name LowCardinality(String), + ts DateTime64(3, 'UTC') CODEC(Delta, ZSTD) +) ENGINE = MergeTree ORDER BY (name, ts) +``` + +See [Optimizing with schemas and codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) +for guidance on `Delta`, `DoubleDelta`, `Gorilla`, `T64`, and when each helps. + +## Tests + +See [`test/ch/guides/compression_test.exs`](../test/ch/guides/compression_test.exs). diff --git a/pages/defaults.md b/pages/defaults.md new file mode 100644 index 00000000..e69de29b diff --git a/pages/inserts.md b/pages/inserts.md new file mode 100644 index 00000000..1e98d520 --- /dev/null +++ b/pages/inserts.md @@ -0,0 +1,100 @@ +# Inserts + +ClickHouse is optimised for large batch INSERTs. Inserting rows one-by-one is an +antipattern — each INSERT triggers a merge on the server. Aim for **100k–1M rows per +batch** or at minimum flush every few seconds. + +## Format + +Use `RowBinaryWithNamesAndTypes`. The INSERT statement prefix declares the format: + +```elixir +statement = "INSERT INTO events (id, name, created_at) FORMAT RowBinaryWithNamesAndTypes\n" +types = ["UInt64", "String", "DateTime"] +names = ["id", "name", "created_at"] + +header = Ch.RowBinary.encode_names_and_types(names, types) +rows = Ch.RowBinary.encode_rows([[1, "pageview", DateTime.utc_now()]], types) + +body = IO.iodata_to_binary([statement, header | rows]) +{path, headers, _body} = Ch.HTTP.encode(body) +``` + +## Batching with a GenServer + +A simple GenServer accumulates rows and flushes on size or time threshold: + +```elixir +defmodule EventBuffer do + use GenServer + + @flush_interval :timer.seconds(5) + @max_rows 100_000 + + def start_link(opts), do: GenServer.start_link(__MODULE__, opts, name: __MODULE__) + + def insert(rows), do: GenServer.cast(__MODULE__, {:insert, rows}) + + def init(opts) do + schedule_flush() + {:ok, %{rows: [], count: 0, conn: nil, opts: opts}} + end + + def handle_cast({:insert, new_rows}, %{count: count} = state) do + state = %{state | rows: [state.rows | new_rows], count: count + length(new_rows)} + + if state.count >= @max_rows do + {:noreply, flush(state)} + else + {:noreply, state} + end + end + + def handle_info(:flush, state), do: {:noreply, flush(state)} + + defp flush(%{rows: []} = state), do: state + + defp flush(state) do + # build and send INSERT here, then reset + schedule_flush() + %{state | rows: [], count: 0} + end + + defp schedule_flush, do: Process.send_after(self(), :flush, @flush_interval) +end +``` + +## Batching with ETS for concurrent writers + +When multiple processes produce rows concurrently, use ETS as a lock-free accumulator: + +```elixir +# In application startup: +:ets.new(:event_buffer, [:bag, :public, :named_table]) + +# From any process: +:ets.insert(:event_buffer, {:row, [id, name, created_at]}) + +# In a periodic flusher: +rows = :ets.tab2list(:event_buffer) |> Enum.map(fn {:row, r} -> r end) +:ets.delete_all_objects(:event_buffer) +# INSERT rows ... +``` + +ETS gives you concurrent inserts without a GenServer bottleneck. Use `:ets.select_delete/2` +for atomic take-and-clear on busier tables. + +## x-clickhouse-summary + +A successful INSERT response includes an `x-clickhouse-summary` header: + +```json +{"written_rows": "150000", "written_bytes": "3145728", ...} +``` + +Extract it from the response headers — see `Ch.HTTP.decode/3` which returns it +parsed as a map in the result. + +## Tests + +See [`test/ch/guides/inserts_test.exs`](../test/ch/guides/inserts_test.exs). diff --git a/pages/json.md b/pages/json.md new file mode 100644 index 00000000..813bdcee --- /dev/null +++ b/pages/json.md @@ -0,0 +1,74 @@ +# JSON + +ClickHouse has two distinct JSON-related features: storing JSON in a `String` column, +and the native `JSON` type (ClickHouse ≥ 24.1). Both are supported via `RowBinaryWithNamesAndTypes`. + +## JSON stored in String columns + +Store serialised JSON as a `String`. Encode with `JSON.encode!/1` (Elixir ≥ 1.18 stdlib): + +```elixir +types = ["UInt64", "String"] +names = ["id", "metadata"] + +rows = [ + [1, JSON.encode!(%{source: "web", browser: "Firefox"})], + [2, JSON.encode!(%{source: "mobile", os: "iOS"})] +] + +body = [ + "INSERT INTO events FORMAT RowBinaryWithNamesAndTypes\n", + Ch.RowBinary.encode_names_and_types(names, types), + Ch.RowBinary.encode_rows(rows, types) +] +``` + +Query it back with ClickHouse JSON functions: + +```sql +SELECT id, JSONExtractString(metadata, 'source') FROM events +``` + +## Native JSON type (ClickHouse ≥ 24.1) + +The `JSON` column type stores semi-structured data with automatic column extraction. +In `RowBinaryWithNamesAndTypes`, `JSON` columns are encoded and decoded as Elixir +maps or lists — the same as any other term: + +```elixir +types = ["UInt64", "JSON"] +names = ["id", "data"] + +rows = [ + [1, %{"action" => "click", "element" => "button"}], + [2, %{"action" => "view", "page" => "/home"}] +] + +header = Ch.RowBinary.encode_names_and_types(names, types) +encoded = Ch.RowBinary.encode_rows(rows, types) +``` + +Decoding a response with `JSON` columns returns Elixir maps: + +```elixir +{:ok, names, rows} = Ch.HTTP.decode(status, headers, body) +# rows => [[1, %{"action" => "click", ...}], ...] +``` + +## Querying JSON sub-fields + +To extract JSON fields server-side (avoiding transferring full JSON blobs), use +ClickHouse's `JSON_VALUE`, `JSONExtract*`, or the `.field` accessor syntax for the +native `JSON` type: + +```sql +SELECT id, data.action FROM events +-- returns rows where data.action is a String +``` + +The resulting `RowBinaryWithNamesAndTypes` response will have the extracted field +as a `String` (or inferred type) column — decode normally. + +## Tests + +See [`test/ch/guides/json_test.exs`](../test/ch/guides/json_test.exs). diff --git a/pages/multihost.md b/pages/multihost.md new file mode 100644 index 00000000..e69de29b diff --git a/pages/multipart.md b/pages/multipart.md new file mode 100644 index 00000000..e69de29b diff --git a/pages/streaming.md b/pages/streaming.md new file mode 100644 index 00000000..e4290af9 --- /dev/null +++ b/pages/streaming.md @@ -0,0 +1,119 @@ +# Streaming + +For large `SELECT` results, use `decode_start/1` + `decode_continue/2` to process +rows as Mint chunks arrive rather than buffering the entire response body first. + +## When to use streaming + +- Result sets large enough that buffering into memory is expensive (millions of rows) +- You want to pipe rows into a `Stream`, write to a file, or forward to another system +- You need first-row latency (start processing before the response is complete) + +## API + +`decode_start/1` initialises a decoder from response headers (inspects +`x-clickhouse-format`). `decode_continue/2` accepts raw binary chunks extracted +from `{:data, ref, chunk}` Mint responses, and returns rows incrementally. + +``` +{:rows, rows, names, state} -- rows decoded from this chunk; continue +{:more, state} -- no complete rows yet; continue +{:ok, names, []} -- done (all rows already emitted via :rows) +{:error, Ch.Error.t()} -- ClickHouse error +``` + +## Passive mode (recv loop) + +```elixir +{path, headers, body} = Ch.HTTP.encode("SELECT number FROM system.numbers LIMIT 10000000") +{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", path, headers, body) + +state = nil + +conn = + Stream.resource( + fn -> {conn, state} end, + fn {conn, state} -> + case Mint.HTTP1.recv(conn, 0, 5_000) do + {:ok, conn, responses} -> + {rows, state} = + Enum.reduce(responses, {[], state}, fn + {:status, _ref, _status}, acc -> + acc + + {:headers, _ref, headers}, {rows, _state} -> + {rows, Ch.HTTP.decode_start(headers)} + + {:data, _ref, chunk}, {rows, state} -> + case Ch.HTTP.decode_continue(chunk, state) do + {:rows, new_rows, _names, state} -> {rows ++ new_rows, state} + {:more, state} -> {rows, state} + end + + {:done, _ref}, acc -> + acc + end) + + {rows, {conn, state}} + + {:error, conn, _reason, _} -> + {:halt, {conn, state}} + end + end, + fn {conn, _state} -> Mint.HTTP1.close(conn) end + ) + |> Stream.each(fn row -> IO.inspect(row) end) + |> Stream.run() +``` + +## Active mode + +In active mode, responses arrive as messages. The decoder state carries across +`receive` iterations: + +```elixir +defp recv_loop(conn, state) do + receive do + message -> + case Mint.HTTP1.stream(conn, message) do + {:ok, conn, responses} -> + state = + Enum.reduce(responses, state, fn + {:status, _ref, _status}, state -> + state + + {:headers, _ref, headers}, _state -> + Ch.HTTP.decode_start(headers) + + {:data, _ref, chunk}, state -> + case Ch.HTTP.decode_continue(chunk, state) do + {:rows, rows, names, state} -> + handle_rows(rows, names) + state + + {:more, state} -> + state + end + + {:done, _ref}, state -> + {:done, state} + end) + + case state do + {:done, _} -> :ok + state -> recv_loop(conn, state) + end + end + end +end +``` + +## Chunk boundary handling + +`decode_continue/2` handles data arriving split across RowBinary structural boundaries — the +RowBinary names/types header may arrive across multiple chunks. This is tested exhaustively +byte-by-byte in `Ch.RowBinary` tests. + +## Tests + +See [`test/ch/guides/streaming_test.exs`](../test/ch/guides/streaming_test.exs). diff --git a/test/ch/guides/compression_test.exs b/test/ch/guides/compression_test.exs new file mode 100644 index 00000000..16006d2a --- /dev/null +++ b/test/ch/guides/compression_test.exs @@ -0,0 +1,110 @@ +defmodule Ch.Guides.CompressionTest do + # Tests from pages/compression.md + use ExUnit.Case, async: true + + @types ["UInt64", "String", "DateTime"] + @names ["id", "name", "created_at"] + + defp sample_body do + rows = [[1, "pageview", DateTime.utc_now()]] + header = Ch.RowBinary.encode_names_and_types(@names, @types) + encoded = Ch.RowBinary.encode_rows(rows, @types) + + IO.iodata_to_binary([ + "INSERT INTO events FORMAT RowBinaryWithNamesAndTypes\n", + header | encoded + ]) + end + + describe "zstd (OTP 28 stdlib)" do + test "encodes body as zstd and sets content-encoding header" do + body = sample_body() + compressed = :zstd.compress(body) + + {_path, headers, ^compressed} = + Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "zstd"}]) + + assert List.keyfind(headers, "content-encoding", 0) == {"content-encoding", "zstd"} + end + + test "zstd roundtrip: compressed body decompresses to original" do + body = sample_body() + compressed = :zstd.compress(body) + assert :zstd.decompress(compressed) == body + end + + test "decode/3 auto-decompresses zstd response" do + rows = [[1, "pageview", ~N[2024-01-01 00:00:00]]] + rb_body = IO.iodata_to_binary([ + Ch.RowBinary.encode_names_and_types(@names, @types), + Ch.RowBinary.encode_rows(rows, @types) + ]) + compressed = :zstd.compress(rb_body) + + headers = [ + {"x-clickhouse-format", "RowBinaryWithNamesAndTypes"}, + {"content-encoding", "zstd"} + ] + + assert {:ok, @names, ^rows} = Ch.HTTP.decode(200, headers, compressed) + end + end + + describe "gzip (stdlib)" do + test "encodes body as gzip and sets content-encoding header" do + body = sample_body() + compressed = :zlib.gzip(body) + + {_path, headers, ^compressed} = + Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "gzip"}]) + + assert List.keyfind(headers, "content-encoding", 0) == {"content-encoding", "gzip"} + end + + test "gzip roundtrip: compressed body decompresses to original" do + body = sample_body() + compressed = :zlib.gzip(body) + assert :zlib.gunzip(compressed) == body + end + + test "decode/3 auto-decompresses gzip response" do + rows = [[1, "pageview", ~N[2024-01-01 00:00:00]]] + rb_body = IO.iodata_to_binary([ + Ch.RowBinary.encode_names_and_types(@names, @types), + Ch.RowBinary.encode_rows(rows, @types) + ]) + gzipped = :zlib.gzip(rb_body) + + headers = [ + {"x-clickhouse-format", "RowBinaryWithNamesAndTypes"}, + {"content-encoding", "gzip"} + ] + + assert {:ok, @names, ^rows} = Ch.HTTP.decode(200, headers, gzipped) + end + end + + describe "lz4 (nimble_lz4)" do + @tag :lz4 + test "encode and decode lz4 compressed body" do + body = sample_body() + {:ok, compressed} = NimbleLz4.compress(body) + assert NimbleLz4.decompress(compressed) == {:ok, body} + + {_path, headers, ^compressed} = + Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "lz4"}]) + + assert List.keyfind(headers, "content-encoding", 0) == {"content-encoding", "lz4"} + end + end + + @tag :integration + describe "live ClickHouse" do + # Requires ClickHouse at localhost:8123 + # Run with: mix test --include integration + + test "INSERT with gzip compression succeeds" + test "INSERT with lz4 compression succeeds" + test "SELECT response with accept-encoding: gzip is decompressed automatically" + end +end diff --git a/test/ch/guides/inserts_test.exs b/test/ch/guides/inserts_test.exs new file mode 100644 index 00000000..15cde2c0 --- /dev/null +++ b/test/ch/guides/inserts_test.exs @@ -0,0 +1,60 @@ +defmodule Ch.Guides.InsertsTest do + # Tests from pages/inserts.md + use ExUnit.Case, async: true + import Ch.RowBinary + + @types ["UInt64", "String", "DateTime"] + @names ["id", "name", "created_at"] + @rows [[1, "pageview", ~N[2024-01-01 00:00:00]], [2, "click", ~N[2024-01-01 00:01:00]]] + + describe "INSERT body construction" do + test "builds correct RowBinaryWithNamesAndTypes body" do + header = encode_names_and_types(@names, @types) + rows_binary = encode_rows(@rows, @types) + + body = + IO.iodata_to_binary([ + "INSERT INTO events FORMAT RowBinaryWithNamesAndTypes\n", + header | rows_binary + ]) + + # The body must start with the SQL statement + assert String.starts_with?(body, "INSERT INTO events FORMAT RowBinaryWithNamesAndTypes\n") + + # The RowBinary header is intact — decode_names_and_rows can parse the data portion + data = binary_part(body, 52, byte_size(body) - 52) + [names | decoded] = decode_names_and_rows(data) + assert names == @names + assert decoded == @rows + end + + test "statement and RowBinary must be compressed together" do + header = encode_names_and_types(@names, @types) + rows_encoded = encode_rows(@rows, @types) + + body = + IO.iodata_to_binary([ + "INSERT INTO events FORMAT RowBinaryWithNamesAndTypes\n", + header | rows_encoded + ]) + + compressed = :zlib.gzip(body) + assert :zlib.gunzip(compressed) == body + + {_path, headers, ^compressed} = + Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "gzip"}]) + + assert {"content-encoding", "gzip"} in headers + end + end + + @tag :integration + describe "live ClickHouse" do + test "INSERT 1 row" + test "INSERT 100_000 rows in one batch" + test "INSERT with gzip compression" + test "response includes x-clickhouse-summary with written_rows" + test "GenServer buffer flushes on size threshold" + test "GenServer buffer flushes on time threshold" + end +end diff --git a/test/ch/guides/json_test.exs b/test/ch/guides/json_test.exs new file mode 100644 index 00000000..b7fa3b0c --- /dev/null +++ b/test/ch/guides/json_test.exs @@ -0,0 +1,62 @@ +defmodule Ch.Guides.JsonTest do + # Tests from pages/json.md + use ExUnit.Case, async: true + import Ch.RowBinary + + describe "JSON stored in String columns" do + test "encode and decode JSON-in-String roundtrip" do + types = ["UInt64", "String"] + names = ["id", "metadata"] + + rows = [ + [1, JSON.encode!(%{"source" => "web", "browser" => "Firefox"})], + [2, JSON.encode!(%{"source" => "mobile", "os" => "iOS"})] + ] + + encoded = + IO.iodata_to_binary([ + encode_names_and_types(names, types), + encode_rows(rows, types) + ]) + + assert [^names | decoded_rows] = decode_names_and_rows(encoded) + assert decoded_rows == rows + end + + test "JSON values survive RowBinary encode/decode as strings" do + json = JSON.encode!(%{"nested" => %{"key" => [1, 2, 3]}}) + encoded = IO.iodata_to_binary(encode(:string, json)) + assert decode_rows(encoded, [:string]) == [[json]] + end + end + + describe "native JSON type" do + test "JSON map encodes and decodes as Elixir map" do + types = ["UInt64", "JSON"] + names = ["id", "data"] + + rows = [ + [1, %{"action" => "click", "element" => "button"}], + [2, %{"action" => "view", "page" => "/home"}] + ] + + encoded = IO.iodata_to_binary(encode_rows(rows, types)) + assert decode_rows(encoded, types) == rows + end + + test "JSON list encodes and decodes as Elixir list" do + types = ["JSON"] + rows = [[[1, 2, 3]], [nil]] + + encoded = IO.iodata_to_binary(encode_rows(rows, types)) + assert decode_rows(encoded, types) == rows + end + end + + @tag :integration + describe "live ClickHouse" do + test "INSERT and SELECT with String column containing JSON" + test "INSERT and SELECT with native JSON column type" + test "SELECT JSON sub-field with data.field accessor syntax" + end +end diff --git a/test/ch/guides/streaming_test.exs b/test/ch/guides/streaming_test.exs new file mode 100644 index 00000000..fc5cb0d6 --- /dev/null +++ b/test/ch/guides/streaming_test.exs @@ -0,0 +1,104 @@ +defmodule Ch.Guides.StreamingTest do + # Tests from pages/streaming.md + use ExUnit.Case, async: true + import Ch.RowBinary + + # Simulates a RowBinaryWithNamesAndTypes response split into N chunks, + # runs it through decode_start/decode_continue, collects all rows. + defp stream_decode(binary, chunk_size) do + headers = [{"x-clickhouse-format", "RowBinaryWithNamesAndTypes"}] + state = Ch.HTTP.decode_start(headers) + + chunks = for <>, do: chunk + remainder_size = rem(byte_size(binary), chunk_size) + + chunks = + if remainder_size > 0 do + chunks ++ [binary_part(binary, byte_size(binary) - remainder_size, remainder_size)] + else + chunks + end + + {names, rows, state} = + Enum.reduce(chunks, {nil, [], state}, fn chunk, {names, rows_acc, state} -> + case Ch.HTTP.decode_continue(chunk, state) do + {:rows, new_rows, chunk_names, state} -> + {names || chunk_names, rows_acc ++ new_rows, state} + + {:more, state} -> + {names, rows_acc, state} + end + end) + + {:ok, final_names, final_rows} = Ch.HTTP.decode_continue(:end_of_input, state) + {names || final_names, rows ++ final_rows} + end + + describe "decode_start/decode_continue" do + test "single chunk — full response at once" do + types = ["UInt64", "String"] + names = ["id", "name"] + rows = [[1, "a"], [2, "b"], [3, "c"]] + + binary = + IO.iodata_to_binary([ + encode_names_and_types(names, types), + encode_rows(rows, types) + ]) + + assert stream_decode(binary, byte_size(binary)) == {names, rows} + end + + test "byte-by-byte chunks — header and rows split at every boundary" do + types = ["UInt64", "String"] + names = ["id", "name"] + rows = Enum.map(1..20, fn i -> [i, "row_#{i}"] end) + + binary = + IO.iodata_to_binary([ + encode_names_and_types(names, types), + encode_rows(rows, types) + ]) + + assert stream_decode(binary, 1) == {names, rows} + end + + test "various chunk sizes produce the same result" do + types = ["UInt32", "String", "Bool"] + names = ["n", "s", "b"] + rows = Enum.map(1..50, fn i -> [i, "val#{i}", rem(i, 2) == 0] end) + + binary = + IO.iodata_to_binary([ + encode_names_and_types(names, types), + encode_rows(rows, types) + ]) + + for chunk_size <- [1, 3, 7, 13, 64, 256, byte_size(binary)] do + assert stream_decode(binary, chunk_size) == {names, rows}, + "failed with chunk_size=#{chunk_size}" + end + end + + test "empty response" do + headers = [{"x-clickhouse-format", "RowBinaryWithNamesAndTypes"}] + state = Ch.HTTP.decode_start(headers) + assert {:ok, [], []} = Ch.HTTP.decode_continue(:end_of_input, state) + end + + test "non-RowBinary format accumulates raw body" do + headers = [{"x-clickhouse-format", "TabSeparated"}] + state = Ch.HTTP.decode_start(headers) + {:more, state} = Ch.HTTP.decode_continue("col1\tcol2\n", state) + {:more, state} = Ch.HTTP.decode_continue("val1\tval2\n", state) + assert {:ok, [], [_body]} = Ch.HTTP.decode_continue(:end_of_input, state) + end + end + + @tag :integration + describe "live ClickHouse — streaming SELECT" do + test "streams 1_000_000 rows from system.numbers" + test "handles connection close mid-stream" + test "works in passive mode receive loop" + end +end diff --git a/test/ch/http_test.exs b/test/ch/http_test.exs new file mode 100644 index 00000000..e69de29b From 2a47b261db7d6ccae4bac7b2c4d7b773c534c331 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Sun, 19 Apr 2026 12:43:12 +0300 Subject: [PATCH 12/13] eh --- README.md | 19 +- lib/ch/http.ex | 447 +++++++++------------------- lib/ch/pool.ex | 216 +++++++++----- lib/ch/row_binary.ex | 4 +- mix.exs | 3 +- pages/compression.md | 149 ++++++---- pages/inserts.md | 11 +- pages/json.md | 124 +++++--- pages/multihost.md | 165 ++++++++++ pages/streaming.md | 81 +++-- pages/telemetry.md | 0 test/ch/guides/compression_test.exs | 80 +++-- test/ch/guides/inserts_test.exs | 6 +- test/ch/guides/json_test.exs | 107 ++++--- test/ch/guides/streaming_test.exs | 60 ++-- 15 files changed, 833 insertions(+), 639 deletions(-) create mode 100644 pages/telemetry.md diff --git a/README.md b/README.md index dda10578..306135df 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,22 @@ deadline = Ch.HTTP.to_deadline(to_timeout(second: 15)) ) try do - {path, headers, body} = Ch.HTTP.encode("SELECT 1") + path = Ch.HTTP.path(%{}) + + with {:ok, _ref, conn} <- Mint.HTTP1.request(conn, "POST", path, [], "SELECT 1"), + {:ok, conn, responses} <- Mint.HTTP1.recv(conn, 0, Ch.HTTP.to_timeout(deadline)) do + state = Ch.HTTP.decode_start() + + {_state, rows} = + Enum.reduce(responses, {state, []}, fn response, {state, acc} -> + case Ch.HTTP.decode_continue(state, response) do + {:rows, rows, _names, state} -> {state, acc ++ rows} + {:cont, state} -> {state, acc} + {:ok, _names, rows} -> {state, acc ++ rows} + _ -> {state, acc} + end + end) - with {:ok, _ref, conn} <- Mint.HTTP1.request(conn, "POST", path, headers, body), - {:ok, {200, _headers, body}, conn} <- Ch.HTTP.recv_all(conn, deadline), - {:ok, _names, rows} <- Ch.HTTP.decode(200, _headers, body) do rows end after diff --git a/lib/ch/http.ex b/lib/ch/http.ex index 614e6d66..194ad473 100644 --- a/lib/ch/http.ex +++ b/lib/ch/http.ex @@ -1,75 +1,6 @@ defmodule Ch.HTTP do @moduledoc """ Stateless helpers for `Mint.HTTP1` with ClickHouse-specific encoding and decoding. - - Provides three layers of functionality: - - 1. **Deadline / timeout helpers** — convert between relative millisecond timeouts - and absolute monotonic deadlines, so a single deadline propagates correctly - across multiple network calls. - - 2. **Request encoding** — build a `{path, headers, body}` triple ready for - `Mint.HTTP1.request/5`. Parameter binding is handled transparently. - - 3. **Response decoding** — single-shot (`decode/3`) or streaming - (`decode_start/1` + `decode_continue/2`) decoding of ClickHouse HTTP responses. - - The caller retains full control of the connection lifecycle and the HTTP method. - Body compression is the caller's responsibility: compress `body` manually and pass - `{"content-encoding", "gzip"}` in `opts[:headers]`. Responses with - `content-encoding: gzip` are decompressed automatically by `decode/3`. - - ## Single-shot usage - - deadline = Ch.HTTP.to_deadline(to_timeout(second: 15)) - - {:ok, conn} = - Mint.HTTP1.connect(:http, "localhost", 8123, - mode: :passive, - timeout: Ch.HTTP.to_timeout(deadline) - ) - - try do - {path, headers, body} = Ch.HTTP.encode("CREATE TABLE demo(a Int64) ENGINE Null") - - with {:ok, _ref, conn} <- Mint.HTTP1.request(conn, "POST", path, headers, body), - {:ok, {status, headers, body}, conn} <- Ch.HTTP.recv_all(conn, deadline), - :ok <- Ch.HTTP.decode(status, headers, body) do - :ok - end - after - Mint.HTTP1.close(conn) - end - - ## Streaming - - For large result sets, use `decode_start/1` + `decode_continue/2` to process rows - as Mint data chunks arrive, without buffering the entire response body. The caller - handles `:status` and `:headers` responses, then passes only data to the decoder: - - # active-mode receive loop (passive mode: same but with Mint.HTTP1.recv/3) - receive do - message -> - {:ok, conn, responses} = Mint.HTTP1.stream(conn, message) - - Enum.reduce(responses, state, fn - {:status, _ref, _status}, state -> - state - - {:headers, _ref, headers}, _state -> - Ch.HTTP.decode_start(headers) - - {:data, _ref, chunk}, state -> - case Ch.HTTP.decode_continue(chunk, state) do - {:rows, rows, names, state} -> process_rows(rows, names); state - {:more, state} -> state - end - - {:done, _ref}, state -> - {:ok, names, rows} = Ch.HTTP.decode_continue(:end_of_input, state) - done(names, rows) - end) - end """ import Kernel, except: [to_timeout: 1] @@ -82,22 +13,8 @@ defmodule Ch.HTTP do """ @type deadline :: {:deadline, integer} | :infinity - @typedoc """ - Opaque streaming decoder state. - - Returned by `decode_start/1` and updated by each call to `decode_continue/2`. - """ - @opaque decode_state :: - {:awaiting_rb_header, buf :: binary} - | {:decoding_rows, names :: [String.t()], types :: [term], row_state :: term, - remainder :: binary} - | {:raw, acc :: iodata} - @doc """ - Converts a relative timeout (milliseconds) or existing `t:deadline/0` to a `t:deadline/0`. - - Passing an already-converted `{:deadline, _}` tuple is a no-op, making this safe to - call at multiple layers of the call stack without double-adding the offset. + Converts a relative timeout (milliseconds) to a `t:deadline/0`. """ @spec to_deadline(timeout | deadline) :: deadline def to_deadline(:infinity), do: :infinity @@ -108,10 +25,7 @@ defmodule Ch.HTTP do end @doc """ - Returns the remaining milliseconds until `deadline`, suitable for passing to Mint. - - Always returns `>= 0`; clamps to `0` if the deadline has already passed (Mint does - not accept negative timeouts). + Returns the remaining milliseconds until a `t:deadline/0`. """ @spec to_timeout(timeout | deadline) :: timeout def to_timeout(:infinity), do: :infinity @@ -122,306 +36,222 @@ defmodule Ch.HTTP do end @doc """ - Encodes a ClickHouse HTTP request with no parameters. - - Equivalent to `encode(statement, %{}, [])`. - """ - @spec encode(statement :: iodata) :: - {path :: String.t(), headers :: Mint.Types.headers(), body :: iodata} - def encode(statement) do - encode(statement, %{}, []) - end - - @doc """ - Encodes a ClickHouse HTTP request with parameters and options. - - Returns `{path, headers, body}` ready for `Mint.HTTP1.request/5`. The HTTP method - (`"POST"`) and connection lifecycle remain the caller's responsibility. + Builds the request path for a ClickHouse HTTP request. - ## Parameters + ### Examples - Parameters are encoded using ClickHouse's - [escaped HTTP format](https://clickhouse.com/docs/en/interfaces/http#tabs-in-url-parameters), - which follows the same escaping rules as ClickHouse's TSV format: tab (`\\t`), - newline (`\\n`), and backslash (`\\`) are backslash-escaped. + iex> Ch.HTTP.path(%{}) + "/" - * **Named** — `%{"city" => "Prague"}` → `?param_city=Prague` - * **Positional** — `["Prague", 42]` → `?param_$0=Prague¶m_$1=42` + iex> Ch.HTTP.path(%{"city" => "Prague"}) + "/?param_city=Prague" - ## Options + iex> Ch.HTTP.path(%{}, output_format_binary_write_json_as_string: true) + "/?output_format_binary_write_json_as_string=true" - * `:headers` — additional Mint-style headers forwarded verbatim, e.g. - `[{"x-clickhouse-user", "alice"}, {"x-clickhouse-key", "secret"}]`. + iex> Ch.HTTP.path(%{"city" => "Prague"}, %{"query_id" => "550e8400"}) + "/?param_city=Prague&query_id=550e8400" - ## Body - - The returned `body` is the `statement` iodata unchanged. No RowBinary encoding or - compression is applied — those are the caller's responsibility: - - compressed = :zlib.compress(IO.iodata_to_binary(statement)) - {path, headers, body} = - Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "gzip"}]) """ - @spec encode( - statement :: iodata, - params :: %{String.t() => term} | [term], - opts :: keyword - ) :: - {path :: String.t(), headers :: Mint.Types.headers(), body :: iodata} - def encode(statement, params, opts) do - query_params = encode_params(params) - - path = - case query_params do - [] -> "/" - _ -> "/?" <> URI.encode_query(query_params) - end - - headers = Keyword.get(opts, :headers, []) - {path, headers, statement} + @spec path(%{String.t() => term}, Enumerable.t()) :: String.t() + def path(params, options \\ []) do + case encode_params(params) ++ options do + [] -> "/" + qp -> "/?" <> URI.encode_query(qp) + end end @doc """ - Receives a complete HTTP response from a passive `Mint.HTTP1` connection. + Initialises a streaming ClickHouse response decoder. - Accumulates all Mint response messages until `{:done, ref}` and returns the - raw `{status, headers, body}` triple, which can be passed directly to `decode/3`. + Accepts an optional `decoders` map, mapping from format name to a decoder function. - Accepts a plain timeout in milliseconds or a `t:deadline/0`. When given a deadline, - the remaining time is recomputed before each `Mint.HTTP1.recv/3` call. + Only `RowBinaryWithNamesAndTypes` format is supported by default. For all other formats, + the data is left as is. """ - @spec recv_all(Mint.HTTP1.t(), timeout | deadline) :: - {:ok, {status :: non_neg_integer, Mint.Types.headers(), body :: binary}, Mint.HTTP1.t()} - | {:error, Mint.HTTP1.t(), Mint.Types.error()} - def recv_all(conn, timeout_or_deadline) do - deadline = to_deadline(timeout_or_deadline) - do_recv_all(conn, _status = nil, _headers = [], _data = [], deadline) - end - - defp do_recv_all(conn, status, headers, data, deadline) do - case Mint.HTTP1.recv(conn, 0, to_timeout(deadline)) do - {:ok, conn, responses} -> - case handle_responses(responses, status, headers, data) do - {:ok, status, headers, body} -> - {:ok, {status, headers, body}, conn} - - {:more, status, headers, data} -> - do_recv_all(conn, status, headers, data, deadline) - - {:error, reason} -> - {:error, conn, reason} - end + def decode_start(opts \\ []) do + decoders = + Keyword.get(opts, :decoders, %{ + "RowBinaryWithNamesAndTypes" => &__MODULE__.decode_rowbinary_stream/2, + :_ => &__MODULE__.decode_raw_stream/2 + }) - {:error, conn, reason, _responses} -> - {:error, conn, reason} - end + {:init, decoders} end - @dialyzer {:no_improper_lists, handle_responses: 4} - defp handle_responses([{:status, _ref, status} | rest], _status, headers, data) do - handle_responses(rest, status, headers, data) + @doc false + def decode_rowbinary_stream(new_data, {:rows, names, types, prev_data, state}) do + data = prev_data <> new_data + {rows, rest, state} = Ch.RowBinary.decode_rows_continue(data, types, state) + {:more, %{names: names, rows: rows}, {:rows, names, types, rest, state}} end - defp handle_responses([{:headers, _ref, new_headers} | rest], status, prev_headers, data) do - handle_responses(rest, status, prev_headers ++ new_headers, data) - end + def decode_rowbinary_stream(new_data, state) do + data = + case state do + :init -> new_data + {:header, prev_data} -> prev_data <> new_data + end - defp handle_responses([{:data, _ref, new_data} | rest], status, headers, prev_data) do - handle_responses(rest, status, headers, [prev_data | new_data]) + case Ch.RowBinary.decode_header(data) do + :more -> {:more, [], {:header, data}} + {:ok, names, types, rest} -> decode_rowbinary_stream(rest, {:rows, names, types, rest, nil}) + end end - defp handle_responses([{:done, _ref} | _rest], status, headers, data) do - {:ok, status, headers, IO.iodata_to_binary(data)} + @doc false + def decode_raw_stream(data, state) do + {:more, data, state} end - defp handle_responses([{:error, _ref, reason} | _rest], _status, _headers, _data) do - {:error, reason} - end + @doc """ + Feeds a Mint response tuple into the streaming decoder. + + This function handles the entire Mint response lifecycle (`:status`, `:headers`, + `:data`, `:done`, `:error`) for a single request. + """ + @spec decode_continue(Mint.Types.response(), decoder) :: + :ok + | {:more, decoded, decoder} + | {:error, error} + | :done + when decoded: term, + decoder: term, + error: Mint.Types.error() | Ch.Error.t() + def decode_continue(response, decoder) - defp handle_responses([], status, headers, data) do - {:more, status, headers, data} + def decode_continue({:status, _ref, status}, {:init, decoders}) do + {:cont, {:status, status, decoders}} end - @doc """ - Decodes a complete ClickHouse HTTP response. + def decode_continue({:headers, _ref, headers}, {:status, 200, decoders}, ) do + format = get_header(headers, "x-clickhouse-format") - Accepts the `{status, headers, body}` triple returned by `recv_all/2`. - Handles errors, decompression, and `RowBinaryWithNamesAndTypes` decoding. + state = + cond do + format == "RowBinaryWithNamesAndTypes" -> + {:rowbinary, <<>>} - * Non-200 status → `{:error, Ch.Error.t()}` with code and message from ClickHouse. - * `content-encoding: gzip` → automatically decompressed before parsing. - * `x-clickhouse-format: RowBinaryWithNamesAndTypes` → `{:ok, names, rows}`. - * Empty body (DDL, INSERT without result) → `:ok`. - * Other or absent format → `{:ok, [], [body]}` with the raw binary. + format == nil -> + {:empty} - ## Example + decoder = decoders[format] -> + {:custom, decoder, decoder.decode_start(headers)} - {:ok, {status, headers, body}, conn} = Ch.HTTP.recv_all(conn, deadline) - case Ch.HTTP.decode(status, headers, body) do - {:ok, names, rows} -> ... - :ok -> ... - {:error, error} -> ... + true -> + {:unknown_format, format} end - """ - @spec decode( - status :: non_neg_integer, - headers :: Mint.Types.headers(), - body :: binary - ) :: - :ok - | {:ok, names :: [String.t()], rows :: [[term]]} - | {:error, Ch.Error.t()} - def decode(status, headers, body) - def decode(status, headers, body) when status != 200 do + {:cont, state} + end + + def decode_continue({:headers, _ref, headers}, {:status, _status, decoders}) do code = - case get_header(headers, "x-clickhouse-exception-code") do - nil -> nil - code -> String.to_integer(code) + if code = get_header(headers, "x-clickhouse-exception-code") do + String.to_integer(code) end - {:error, Ch.Error.exception(code: code, message: body)} + {:cont, {:error_body, status, code, []}} end - def decode(200, headers, body) do - body = maybe_decompress(body, get_header(headers, "content-encoding")) + def decode_continue({:data, _ref, chunk}, decoder) do + decode_continue_data(state, chunk) + end - case get_header(headers, "x-clickhouse-format") do - "RowBinaryWithNamesAndTypes" -> - [names | rows] = Ch.RowBinary.decode_names_and_rows(body) - {:ok, names, rows} + def decode_continue({:done, _ref}, decoder) do + decode_continue_data(state, :done) + end - _other -> - case body do - "" -> :ok - _ -> {:ok, [], [body]} - end - end + def decode_continue({:error, _ref, reason}, _decoder) do + {:error, reason} end - @doc """ - Initialises a streaming ClickHouse response decoder from response headers. + defp decode_continue_data(state, chunk_or_done) - Inspects `x-clickhouse-format` to determine how to decode incoming data chunks. - The returned `t:decode_state/0` is passed to `decode_continue/2` along with each - binary chunk extracted from `{:data, ref, chunk}` Mint responses. + defp decode_continue_data({:custom, decoder, state}, chunk_or_done) do + case decoder.decode_continue(state, chunk_or_done) do + {:rows, rows, names, new_state} -> {:rows, rows, names, {:custom, decoder, new_state}} + {:cont, new_state} -> {:cont, {:custom, decoder, new_state}} + {:ok, names, rows} -> {:ok, names, rows} + :ok -> :ok + {:error, error} -> {:error, error} + end + end - The caller is responsible for handling `{:status, _, _}` and `{:headers, _, _}` - responses before calling `decode_start/1`, and for passing `{:done, _}` as - `:end_of_input` to `decode_continue/2`. + # --- :done (finalise) --- - ## Example + # empty body before RowBinary header — DDL/INSERT sent with wrong format header? + defp decode_continue_data({:rowbinary, <<>>}, :done), do: :ok - {:headers, _ref, headers} = ... # from Mint.HTTP1.stream/2 - state = Ch.HTTP.decode_start(headers) + defp decode_continue_data({:rowbinary, _buf}, :done) do + {:error, + Ch.Error.exception(code: nil, message: "incomplete RowBinaryWithNamesAndTypes header")} + end - {:data, _ref, chunk} = ... - case Ch.HTTP.decode_continue(chunk, state) do - {:rows, rows, names, state} -> ... - {:more, state} -> ... - end + defp decode_continue_data({:decoding_rows, names, _types, _row_state, _remainder}, :done) do + # all rows emitted via {:rows, ...} during streaming + {:ok, names, []} + end - {:done, _ref} = ... - {:ok, names, []} = Ch.HTTP.decode_continue(:end_of_input, state) - """ - @spec decode_start(headers :: Mint.Types.headers()) :: decode_state - def decode_start(headers) do - case get_header(headers, "x-clickhouse-format") do - "RowBinaryWithNamesAndTypes" -> {:awaiting_rb_header, <<>>} - _other -> {:raw, []} - end + defp decode_continue_data({:empty}, :done), do: :ok + + defp decode_continue_data({:unknown_format, format}, :done) do + {:error, {:unknown_format, format}} end - @doc """ - Feeds a binary chunk into a streaming decoder, advancing its state. - - Pass binary chunks extracted from `{:data, ref, chunk}` Mint response tuples. - When the response is complete (`:done` received from Mint), pass `:end_of_input` - to finalise and retrieve any remaining output. - - ## Return values - - * `{:rows, rows, names, state}` — one or more complete rows decoded. `names` is - the list of column names from the `RowBinaryWithNamesAndTypes` header. - Continue calling `decode_continue/2` with the next chunk. - * `{:more, state}` — chunk consumed, no complete rows yet (e.g. still accumulating - the RowBinary header). Continue with the next chunk. - * `{:ok, names, rows}` — stream complete. If rows were emitted incrementally via - `{:rows, ...}`, the final `rows` list here will be empty. - * `{:error, Ch.Error.t()}` — decoding failed. - """ - @spec decode_continue(chunk :: binary | :end_of_input, decode_state) :: - {:rows, rows :: [[term]], names :: [String.t()], decode_state} - | {:more, decode_state} - | {:ok, names :: [String.t()], rows :: [[term]]} - | {:error, Ch.Error.t()} - def decode_continue(:end_of_input, state) do - flush_state(state) + defp decode_continue_data({:error_body, _status, code, acc}, :done) do + {:error, Ch.Error.exception(code: code, message: IO.iodata_to_binary(acc))} end - def decode_continue(chunk, {:awaiting_rb_header, buf}) when is_binary(chunk) do + # --- binary chunks --- + + defp decode_continue_data({:rowbinary, buf}, chunk) when is_binary(chunk) do buf = buf <> chunk case Ch.RowBinary.decode_header(buf) do :more -> - {:more, {:awaiting_rb_header, buf}} + {:cont, {:rowbinary, buf}} {:ok, names, types, rest} -> {rows, remainder, row_state} = Ch.RowBinary.decode_rows_continue(rest, types, nil) new_state = {:decoding_rows, names, types, row_state, remainder} case rows do - [] -> {:more, new_state} + [] -> {:cont, new_state} _ -> {:rows, rows, names, new_state} end end end - def decode_continue(chunk, {:decoding_rows, names, types, row_state, remainder}) - when is_binary(chunk) do + defp decode_continue_data({:decoding_rows, names, types, row_state, remainder}, chunk) + when is_binary(chunk) do {rows, new_remainder, new_row_state} = Ch.RowBinary.decode_rows_continue(remainder <> chunk, types, row_state) new_state = {:decoding_rows, names, types, new_row_state, new_remainder} case rows do - [] -> {:more, new_state} + [] -> {:cont, new_state} _ -> {:rows, rows, names, new_state} end end - def decode_continue(chunk, {:raw, acc}) when is_binary(chunk) do - {:more, {:raw, [acc | chunk]}} + defp decode_continue_data({:empty}, chunk) when is_binary(chunk) do + # unexpected data on what should be an empty response; ignore + {:cont, {:empty}} end - defp flush_state({:awaiting_rb_header, <<>>}) do - {:ok, [], []} + defp decode_continue_data({:unknown_format, format}, chunk) when is_binary(chunk) do + # discard chunks; error reported at :done + {:cont, {:unknown_format, format}} end - defp flush_state({:awaiting_rb_header, _buf}) do - {:error, - Ch.Error.exception(code: nil, message: "incomplete RowBinaryWithNamesAndTypes header")} - end - - defp flush_state({:decoding_rows, names, _types, _row_state, _remainder}) do - # All rows already emitted via {:rows, ...} during streaming - {:ok, names, []} - end - - defp flush_state({:raw, acc}) do - case IO.iodata_to_binary(acc) do - "" -> {:ok, [], []} - body -> {:ok, [], [body]} - end + defp decode_continue_data({:error_body, status, code, acc}, chunk) when is_binary(chunk) do + {:cont, {:error_body, status, code, [acc | chunk]}} end ## Private helpers - defp maybe_decompress(body, "gzip"), do: :zlib.gunzip(body) - defp maybe_decompress(body, "zstd"), do: :zstd.decompress(body) - defp maybe_decompress(body, _encoding), do: body - - defp get_header(headers, key) do case List.keyfind(headers, key, 0) do {_, value} -> value @@ -434,19 +264,10 @@ defmodule Ch.HTTP do # ClickHouse uses an "escaped" parameter format identical to its TSV format escaping # (see https://clickhouse.com/docs/en/interfaces/http#tabs-in-url-parameters): # tab (\t), newline (\n), and backslash (\) are backslash-escaped. - # - # Named params: %{"city" => "Prague"} → [{"param_city", "Prague"}] - # Positional params: ["Prague", 42] → [{"param_$0", "Prague"}, {"param_$1", "42"}] defp encode_params(params) when is_map(params) do Enum.map(params, fn {k, v} -> {"param_#{k}", encode_param(v)} end) end - defp encode_params(params) when is_list(params) do - params - |> Enum.with_index() - |> Enum.map(fn {v, idx} -> {"param_$#{idx}", encode_param(v)} end) - end - defp encode_param(n) when is_integer(n), do: Integer.to_string(n) defp encode_param(f) when is_float(f), do: Float.to_string(f) diff --git a/lib/ch/pool.ex b/lib/ch/pool.ex index a1b558b5..af1d2113 100644 --- a/lib/ch/pool.ex +++ b/lib/ch/pool.ex @@ -3,7 +3,7 @@ defmodule Ch.Pool do TODO """ - @behaviour NimblePool + use GenServer @type statement :: iodata @type params :: %{String.t() => term} @@ -24,23 +24,10 @@ defmodule Ch.Pool do doc: "Maximum number of concurrent connections.", default: 10 ], - worker_idle_timeout: [ - type: :timeout, - doc: """ - Time a connection can stay idle before the pool closes it. - Should be lower than ClickHouse's `keep_alive_timeout`. - """, - default: to_timeout(second: 5) - ], url: [ type: :string, doc: "The ClickHouse endpoint URL.", default: "http://localhost:8123" - ], - connect_options: [ - type: :keyword_list, - default: [], - doc: "Options passed to `Mint.HTTP.connect/4` (e.g. `:timeout`, `:proxy`)." ] ] @@ -61,14 +48,8 @@ defmodule Ch.Pool do name = Keyword.get(options, :name) pool_size = Keyword.fetch!(options, :pool_size) - worker_idle_timeout = Keyword.fetch!(options, :worker_idle_timeout) url = Keyword.fetch!(options, :url) - connect_options = - options - |> Keyword.get(:connect_options, []) - |> Keyword.put(:mode, :passive) - %URI{scheme: scheme, host: host, port: port} = URI.parse(url) scheme = @@ -78,59 +59,42 @@ defmodule Ch.Pool do _other -> raise ArgumentError, "unexpected HTTP scheme: #{inspect(scheme)}" end - initial_pool_state = %{ - template: {:template, scheme, host, port, connect_options} - } - - NimblePool.start_link( - worker: {__MODULE__, initial_pool_state}, + config = [ pool_size: pool_size, - worker_idle_timeout: worker_idle_timeout, - lazy: true, - name: name - ) + template: {scheme, host, port} + ] + + GenServer.start_link(__MODULE__, config, name: name) end @doc """ - Returns a child spec to allow Ch pool to be started under a supervisor. + Stops the given `pool`. - ## Options - - The options are exactly the same as for `start_link/1`. + The pool exits with the given `reason`. The pool has `timeout` milliseconds to stop + before it's unilaterally killed by the runtime. """ - @spec child_spec(keyword) :: Supervisor.child_spec() - def child_spec(options) do - %{id: __MODULE__, start: {__MODULE__, :start_link, [options]}} + def stop(pool, reason \\ :normal, timeout \\ :infinity) do + GenServer.stop(pool, reason, timeout) end @spec query(NimblePool.pool(), statement, params, keyword) :: {:ok, query_result} | {:error, query_error} def query(pool, statement, params \\ %{}, options \\ []) do - request = Ch.HTTP.encode_request("POST", statement, params, options) - {timeout, options} = Keyword.pop(options, :timeout, @query_timeout) - deadline = Ch.HTTP.deadline_from_timeout(timeout) + + deadline = Ch.HTTP.to_deadline(timeout) + path = Ch.HTTP.path(params, options) # TODO retry on closed? backoff? - result = - NimblePool.checkout!( - pool, - :request, - fn {pid, _ref}, conn -> - # TODO retry transient closed/etc. errors? - with {:ok, conn} <- ensure_connected(conn, pid, deadline), - {:ok, conn, response} <- Ch.HTTP.request(conn, request, deadline) do - {{:ok, response}, checkin(conn)} - else - {:error, reason} = error -> {error, {:remove, reason}} - end - end, - timeout - ) - - with {:ok, response} <- result do - Ch.HTTP.decode_response(response, options) - end + # TODO retry transient closed/etc. errors? + checkout(pool, timeout, fn ref, conn -> + with {:ok, conn} <- ensure_connected(conn, pool, deadline), + {:ok, conn, result} <- request(conn, path, statement, deadline) do + {result, checkin(conn)} + else + {:error, reason} = error -> {error, {:remove, reason}} + end + end) end @spec query!(NimblePool.pool(), statement, params, keyword) :: query_result @@ -141,45 +105,134 @@ defmodule Ch.Pool do end end - @spec stop(NimblePool.pool(), reason :: term, timeout) :: :ok - def stop(pool, reason \\ :normal, timeout \\ :infinity) do - NimblePool.stop(pool, reason, timeout) + defp checkout(pool, timeout, fun) when is_function(fun) do + monitor_ref = Process.monitor(pool) + + # TODO noconnect? + GenServer.cast(pool, {:out, self(), monitor_ref, timeout}) + + receive do + {^monitor_ref, conn, request_ref} -> + Process.demonitor(monitor_ref, [:flush]) + {result, conn} = fun.(conn) + GenServer.cast(pool, {:in, conn, request_ref}) + result + + {^monitor_ref, :timeout} -> + Process.demonitor(monitor_ref, [:flush]) + {:error, :timeout} + + {:DOWN, ^monitor_ref, :process, _pid, reason} -> + {:error, reason} + end end - @impl NimblePool - def init_pool(config) do - {:ok, config} + @impl GenServer + def init(config) do + Process.flag(:trap_exit, true) + + pool_size = Keyword.fetch!(config, :pool_size) + template = Keyword.fetch!(config, :template) + + state = %{ + queue: :queue.new(), + requests: %{}, + monitors: %{}, + resources: :queue.new(), + pool_size: pool_size, + template: template + } + + {:ok, state} end - @impl NimblePool - def init_worker(config) do - {:ok, :template, config} + @impl GenServer + def handle_cast({:out, pid, request_ref, timeout}, state) do + monitor_ref = Process.monitor(pid) + + %{requests: requests, monitors: monitors} = state + requests = Map.put(requests, request_ref, {pid, monitor_ref, :out}) + monitors = Map.put(monitors, monitor_ref, request_ref) + state = %{state | requests: requests, monitors: monitors} + state = maybe_checkout(request_ref, monitor_ref, timeout, pid, state) + + {:noreply, state} end - @impl NimblePool - def handle_checkout(:request, _from, :template = template, config) do - {:ok, config.template, template, config} + def handle_cast({:in, conn, monitor_ref}, state) do + Process.demonitor(monitor_ref, [:flush]) + + %{requests: requests, resources: resources} = state + + resources = + case handle_checkin(conn) do + {:ok, conn} -> + :queue.in(conn, resources) + + {:remove, reason} -> + remove_worker(reason, conn) + resources + end + + state = remove_requests(state, monitor_ref) + state = maybe_checkout(%{state | resources: resources}) + {:noreply, state} end - def handle_checkout(:request, _from, %Mint.HTTP1{} = conn, config) do - {:ok, {:ok, conn}, conn, config} + @impl GenServer + def handle_info({:DOWN, monitor_ref, _, _, _} = down, state) do end - @impl NimblePool - def handle_checkin({:ok, %Mint.HTTP1{} = conn}, _from, _conn, config) do + def handle_info({:ping, worker}, state) do + end + + @impl GenServer + def terminate(reason, state) do + end + + defp maybe_checkout(%{queue: queue, requests: requests} = state) do + case :queue.out(queue) do + {{:value, {pid, ref}, queue}} -> + case requests do + # the request still exists, so we can checkout the resource + %{^ref => {^pid, mon_ref, :out, deadline}} -> + maybe_checkout(command, mon_ref, deadline, {pid, ref}, %{state | queue: queue}) + + # it should never happen + %{^ref => _} -> + exit(:unexpected_checkout) + + # the request is no longer active, try the next one + %{} -> + maybe_checkout(%{state | queue: queue}) + end + + {:empty, _queue} -> + state + end + end + + defp handle_checkin({:ok, %Mint.HTTP1{} = conn}, _from, _conn, config) do {:ok, conn, config} end - def handle_checkin({:remove, reason}, _from, _conn, config) do + defp handle_checkin({:remove, reason}, _from, _conn, config) do {:remove, reason, config} end - @impl NimblePool - def handle_ping(_conn, _config) do + defp handle_checkout(:request, _from, :template = template, config) do + {:ok, config.template, template, config} + end + + defp handle_checkout(:request, _from, %Mint.HTTP1{} = conn, config) do + {:ok, {:ok, conn}, conn, config} + end + + defp handle_ping(_conn, _config) do {:remove, :worker_idle_timeout} end - # TODO handle_info? + # TODO handle_info @impl NimblePool def terminate_worker(_reason, conn, config) do @@ -187,11 +240,10 @@ defmodule Ch.Pool do {:ok, config} end - defp ensure_connected({:template, scheme, host, port, options}, owner, deadline) do + defp ensure_connected({:template, scheme, host, port}, owner, deadline) do timeout = Ch.HTTP.timeout_from_deadline(deadline) - options = Keyword.put(options, :timeout, timeout) - case Mint.HTTP1.connect(scheme, host, port, options) do + case Mint.HTTP1.connect(scheme, host, port, mode: :passive, timeout: timeout) do {:ok, conn} -> case Mint.HTTP1.controlling_process(conn, owner) do {:ok, _conn} = ok -> diff --git a/lib/ch/row_binary.ex b/lib/ch/row_binary.ex index 7f001910..a0ebbde1 100644 --- a/lib/ch/row_binary.ex +++ b/lib/ch/row_binary.ex @@ -195,7 +195,7 @@ defmodule Ch.RowBinary do def encode(:json, json) do # assuming it can be sent as text and not "native" binary JSON - # i.e. assumes `settings: [input_format_binary_read_json_as_string: 1]` + # i.e. assumes `input_format_binary_read_json_as_string: 1` in Ch.HTTP.encode options # TODO encode(:string, JSON.encode_to_iodata!(json)) end @@ -1334,7 +1334,7 @@ defmodule Ch.RowBinary do :json -> # assuming it arrives as text and not "native" binary JSON - # i.e. assumes `settings: [output_format_binary_write_json_as_string: 1]` + # i.e. assumes `output_format_binary_write_json_as_string: 1` in Ch.HTTP.encode options # TODO decode_string_json_decode_rows(bin, types_rest, row, rows, types) diff --git a/mix.exs b/mix.exs index 6068c7eb..0e3bd361 100644 --- a/mix.exs +++ b/mix.exs @@ -71,7 +71,8 @@ defmodule Ch.MixProject do {:decimal, "~> 2.0"}, {:ecto, "~> 3.13.0", optional: true}, {:benchee, "~> 1.0", only: :bench}, - {:benchee_github, "~> 0.1", only: :bench}, + # {:benchee_github_action_benchmark, + # github: "ruslandoga/benchee_github_action_benchmark", only: :bench}, {:dialyxir, "~> 1.0", only: [:dev, :test], runtime: false}, {:ex_doc, ">= 0.0.0", only: :dev}, {:tz, "~> 0.28.1", only: :test}, diff --git a/pages/compression.md b/pages/compression.md index f72f7d53..288180c4 100644 --- a/pages/compression.md +++ b/pages/compression.md @@ -2,7 +2,7 @@ ClickHouse HTTP accepts compressed request bodies via the `content-encoding` header. Compress the **entire request body** — including the SQL statement prefix for INSERTs — -before sending. `Ch.HTTP.decode/3` automatically decompresses `gzip` and `zstd` responses. +before sending. **Further reading:** - [Compression in ClickHouse](https://clickhouse.com/docs/data-compression/compression-in-clickhouse) @@ -12,84 +12,129 @@ before sending. `Ch.HTTP.decode/3` automatically decompresses `gzip` and `zstd` - [Supercharging large data loads](https://clickhouse.com/blog/supercharge-your-clickhouse-data-loads-part2) - [What really matters for performance](https://clickhouse.com/blog/what-really-matters-for-performance-lessons-from-a-year-of-benchmarks) -## zstd (preferred, stdlib) +## ZSTD + +`:zstd` is part of OTP 28 stdlib. -`:zstd` is part of OTP 28 stdlib. ZSTD is also ClickHouse Cloud's default column -compression codec. For HTTP transport, ZSTD achieves 30–50% smaller payloads than LZ4 at -the cost of more CPU on the client during compression. **Use ZSTD when bandwidth costs money** (cross-region, CDN egress) or when your client has idle CPU — the server decompresses quickly enough that the transfer saving usually wins. ```elixir -rows = Ch.RowBinary.encode_rows([[1, "pageview", DateTime.utc_now()]], types) -body = IO.iodata_to_binary([ - "INSERT INTO events FORMAT RowBinaryWithNamesAndTypes\n", - Ch.RowBinary.encode_names_and_types(names, types), - rows -]) - -compressed = :zstd.compress(body) +statement = "INSERT INTO events FORMAT RowBinaryWithNamesAndTypes\n" -{path, headers, body} = - Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "zstd"}]) - -{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", path, headers, body) -``` +names = ["id", "name", "created_at"] +types = ["UInt64", "String", "DateTime"] -> The SQL statement and RowBinary payload must be compressed together as one blob. -> Only compressing the data rows and leaving the statement uncompressed does not work. +rows = [ + [1, "pageview", DateTime.utc_now()], + [2, "click", DateTime.utc_now()], + [3, "purchase", DateTime.utc_now()] +] -## gzip (stdlib, fallback) +row_binary_with_names_and_types = [ + Ch.RowBinary.encode_names_and_types(names, types) + | Ch.RowBinary.encode_rows(rows, types) +] -`:zlib.gzip/1` is available on all OTP versions. Lower compression ratio and slower than -ZSTD, but zero extra dependencies. Good choice if OTP < 28 or you need maximum compatibility. +body = :zstd.compress([statement | row_binary_with_names_and_types]) -```elixir -compressed = :zlib.gzip(body) - -{path, headers, body} = - Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "gzip"}]) +{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", "/", [{"content-encoding", "zstd"}], body) ``` -`Ch.HTTP.decode/3` decompresses `gzip` responses automatically. +## LZ4 -## lz4 (nimble_lz4) +LZ4 is not part of OTP stdlib but you can use [NimbleLZ4](https://github.com/whatyouhide/nimble_lz4). -LZ4 compresses and decompresses faster than both gzip and zstd, with a moderate ratio. Per the [FastFormats benchmark](https://fastformats.clickhouse.com), LZ4 is a "no-brainer" for same-region deployments: it cuts wire size roughly in half with negligible CPU overhead on both client and server. +However, due to NimbleLZ4 using dirty CPU schedulers for the NIF calls, the compression +speed is similar to `:zstd` for small payloads, so compare on your own data before making a decision. + ```elixir -{:ok, compressed} = NimbleLz4.compress(body) +statement = "INSERT INTO events FORMAT RowBinaryWithNamesAndTypes\n" -{path, headers, body} = - Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "lz4"}]) -``` +names = ["id", "name", "created_at"] +types = ["UInt64", "String", "DateTime"] -Add to deps: +rows = [ + [1, "pageview", DateTime.utc_now()], + [2, "click", DateTime.utc_now()], + [3, "purchase", DateTime.utc_now()] +] -```elixir -{:nimble_lz4, "~> 1.1"} +row_binary_with_names_and_types = [ + Ch.RowBinary.encode_names_and_types(names, types) + | Ch.RowBinary.encode_rows(rows, types) +] + +body = NimbleLZ4.compress([statement | row_binary_with_names_and_types]) + +{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", "/", [{"content-encoding", "lz4"}], body) ``` ## Response decompression -`Ch.HTTP.decode/3` decompresses responses with `content-encoding: gzip` or -`content-encoding: zstd` automatically. For other encodings (e.g. `lz4`), -decompress the body before calling `decode/3`, or simply do not request compressed -responses (the default — ClickHouse sends uncompressed unless you add -`Accept-Encoding` to the request). +`Ch.HTTP` does not decompress responses. Decompress the body yourself +before decoding. -## Which to use? +To receive a compressed response, add `accept-encoding` header to your request. +ClickHouse sends uncompressed responses by default. + +```elixir +headers = [ + {"accept-encoding", "zstd"}, + {"x-clickhouse-format", "RowBinaryWithNamesAndTypes"} +] -| | gzip | zstd | lz4 | -|---|---|---|---| -| Dep | none (stdlib) | none (OTP ≥ 28) | `nimble_lz4` | -| Ratio | good | best | moderate | -| Client CPU | medium | high | low | -| Server CPU | medium | low | lowest | -| Best for | compatibility | bandwidth-sensitive | same-region throughput | +statement = "SELECT * FROM events WHERE name = {name:String}" + +params = %{"name" => "pageview"} +options = %{"query_id" => "123"} +path = Ch.HTTP.path(params, options) + +{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", path, headers, statement) + +deadline = Ch.HTTP.to_deadline(to_timeout(second: 5)) + +case Mint.HTTP1.recv(conn, 0, Ch.HTTP.to_timeout(deadline)) do + {:ok, _ref, responses} -> + case handle_responses(responses) do + {:ok, _ref, conn} -> + {:ok, _ref, conn} + + {:error, reason} -> + {:error, reason} + end + + {:error, reason} -> + {:error, reason} +end + +# 1. Accumulate Mint responses (manual recv loop omitted for brevity) +# {:ok, conn, responses} = Mint.HTTP1.recv(conn, 0, 5_000) + +# 2. Extract and decompress +resp_body = Enum.find_value(responses, fn {:data, _ref, data} -> data end) +resp_headers = Enum.find_value(responses, fn {:headers, _ref, h} -> h end) + +body = + case List.keyfind(resp_headers, "content-encoding", 0) do + {_, "zstd"} -> :zstd.decompress(resp_body) + {_, "gzip"} -> :zlib.gunzip(resp_body) + _ -> resp_body + end + +# 3. Decode +state = Ch.HTTP.decode_start() +# ... Feed responses (status, headers, body, done) to decode_continue ... +# (See README or Ch.HTTP for the full loop) +``` + +For INSERT and DDL, responses are always empty — no decompression needed. + +## Which to use? ClickHouse's own benchmark ([FastFormats](https://fastformats.clickhouse.com)) shows RowBinaryWithNamesAndTypes+LZ4 reduces payload to ~60% of uncompressed size with @@ -116,4 +161,4 @@ for guidance on `Delta`, `DoubleDelta`, `Gorilla`, `T64`, and when each helps. ## Tests -See [`test/ch/guides/compression_test.exs`](../test/ch/guides/compression_test.exs). +See [`test/ch/guides/compression_test.exs`](../test/ch/guides/compression_test.exs) for more examples. diff --git a/pages/inserts.md b/pages/inserts.md index 1e98d520..23e20f0a 100644 --- a/pages/inserts.md +++ b/pages/inserts.md @@ -16,8 +16,10 @@ names = ["id", "name", "created_at"] header = Ch.RowBinary.encode_names_and_types(names, types) rows = Ch.RowBinary.encode_rows([[1, "pageview", DateTime.utc_now()]], types) -body = IO.iodata_to_binary([statement, header | rows]) -{path, headers, _body} = Ch.HTTP.encode(body) +body = [header | rows] +path = Ch.HTTP.path(%{}) +headers = [{"x-clickhouse-format", "RowBinaryWithNamesAndTypes"}] +{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", path, headers, body) ``` ## Batching with a GenServer @@ -92,8 +94,9 @@ A successful INSERT response includes an `x-clickhouse-summary` header: {"written_rows": "150000", "written_bytes": "3145728", ...} ``` -Extract it from the response headers — see `Ch.HTTP.decode/3` which returns it -parsed as a map in the result. +Extract it from the response headers yourself whenever you receive an +`{:headers, ref, headers}` tuple from Mint. Use `Ch.HTTP.decode_continue/2` to +handle the rest of the response. ## Tests diff --git a/pages/json.md b/pages/json.md index 813bdcee..198f19ce 100644 --- a/pages/json.md +++ b/pages/json.md @@ -1,73 +1,117 @@ # JSON -ClickHouse has two distinct JSON-related features: storing JSON in a `String` column, -and the native `JSON` type (ClickHouse ≥ 24.1). Both are supported via `RowBinaryWithNamesAndTypes`. +ClickHouse 24.1+ has a native [`JSON` column type](https://clickhouse.com/docs/sql-reference/data-types/newjson) +that stores semi-structured data with automatic path inference and typed storage. +This guide covers how to use it with `RowBinaryWithNamesAndTypes` via the +[RowBinary format settings](https://clickhouse.com/docs/interfaces/formats/RowBinary#format-settings). -## JSON stored in String columns +## Required settings -Store serialised JSON as a `String`. Encode with `JSON.encode!/1` (Elixir ≥ 1.18 stdlib): +`Ch.RowBinary.encode(:json, value)` serializes JSON values as RowBinary strings containing +JSON text (using `JSON.encode_to_iodata!/1` internally). ClickHouse's default RowBinary +wire format for `JSON` columns is a complex internal binary encoding — you must opt into +the string representation on both sides: + +| Direction | Setting | +|---|---| +| INSERT | `input_format_binary_read_json_as_string: true` | +| SELECT | `output_format_binary_write_json_as_string: true` | + +Pass them via `Ch.HTTP.path/2`: ```elixir -types = ["UInt64", "String"] -names = ["id", "metadata"] +# SELECT +path = Ch.HTTP.path(%{}, output_format_binary_write_json_as_string: true) + +# INSERT +path = Ch.HTTP.path(%{}, input_format_binary_read_json_as_string: true) +``` +## INSERT + +Pass Elixir maps or lists directly — the library calls `JSON.encode_to_iodata!/1` +internally, no manual encoding needed: + +```elixir rows = [ - [1, JSON.encode!(%{source: "web", browser: "Firefox"})], - [2, JSON.encode!(%{source: "mobile", os: "iOS"})] + [1, %{"action" => "click", "element" => "button"}], + [2, %{"action" => "view", "page" => "/home"}] ] +types = ["UInt64", "JSON"] +names = ["id", "data"] + body = [ "INSERT INTO events FORMAT RowBinaryWithNamesAndTypes\n", Ch.RowBinary.encode_names_and_types(names, types), Ch.RowBinary.encode_rows(rows, types) ] -``` -Query it back with ClickHouse JSON functions: +path = Ch.HTTP.path(%{}, input_format_binary_read_json_as_string: true) -```sql -SELECT id, JSONExtractString(metadata, 'source') FROM events +{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", path, headers, body) ``` -## Native JSON type (ClickHouse ≥ 24.1) - -The `JSON` column type stores semi-structured data with automatic column extraction. -In `RowBinaryWithNamesAndTypes`, `JSON` columns are encoded and decoded as Elixir -maps or lists — the same as any other term: +## SELECT ```elixir -types = ["UInt64", "JSON"] -names = ["id", "data"] +path = Ch.HTTP.path(%{}, output_format_binary_write_json_as_string: true) + +{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", path, headers, body) +{:ok, conn, responses} = Mint.HTTP1.recv(conn, 0, 5_000) + +state = Ch.HTTP.decode_start() +{_state, rows} = Enum.reduce(responses, {state, []}, fn response, {state, acc} -> + case Ch.HTTP.decode_continue(state, response) do + {:rows, rows, _names, state} -> {state, acc ++ rows} + {:cont, state} -> {state, acc} + {:ok, _names, rows} -> {state, acc ++ rows} + _ -> {state, acc} + end +end) +# rows => [[1, %{"action" => "click", "element" => "button"}], ...] +``` -rows = [ - [1, %{"action" => "click", "element" => "button"}], - [2, %{"action" => "view", "page" => "/home"}] -] +`Ch.HTTP` streaming decoder feeds `Ch.RowBinary.decode_rows_continue/3`, which decodes RowBinary strings +back into Elixir maps using `JSON.decode!/1` for the `:json` type. -header = Ch.RowBinary.encode_names_and_types(names, types) -encoded = Ch.RowBinary.encode_rows(rows, types) -``` +## Typed paths -Decoding a response with `JSON` columns returns Elixir maps: +The `JSON` column type optionally accepts type hints for known paths, which ClickHouse +stores with fixed types rather than inferred ones: -```elixir -{:ok, names, rows} = Ch.HTTP.decode(status, headers, body) -# rows => [[1, %{"action" => "click", ...}], ...] +```sql +CREATE TABLE events ( + id UInt64, + data JSON( + action LowCardinality(String), + ts DateTime64(3, 'UTC') + ) +) ENGINE = MergeTree ORDER BY id ``` -## Querying JSON sub-fields +Typed paths don't change the wire format — the same string encoding applies. The values +for typed paths just need to match their declared types. -To extract JSON fields server-side (avoiding transferring full JSON blobs), use -ClickHouse's `JSON_VALUE`, `JSONExtract*`, or the `.field` accessor syntax for the -native `JSON` type: +## Performance note -```sql -SELECT id, data.action FROM events --- returns rows where data.action is a String -``` +`Ch.RowBinary.encode_rows/2` calls `encoding_types/1` on every invocation to decode +type strings (`"UInt64"`, `"JSON"`) into internal atoms. If you're inserting in a tight +loop across many batches, this is repeated work. A future `Ch.Buffer` struct would cache +the pre-decoded types alongside accumulated rows to avoid this overhead. + +Until then, you can call `Ch.RowBinary.encoding_types/1` once and reuse the result across +batches by using the internal `Ch.RowBinary._encode_rows/2`. + +## Format settings reference + +See [RowBinary format settings](https://clickhouse.com/docs/interfaces/formats/RowBinary#format-settings). -The resulting `RowBinaryWithNamesAndTypes` response will have the extracted field -as a `String` (or inferred type) column — decode normally. +| Setting | Default | Effect | +|---|---|---| +| `output_format_binary_write_json_as_string` | `false` | Write JSON columns as JSON strings in SELECT | +| `input_format_binary_read_json_as_string` | `false` | Read JSON columns from JSON strings in INSERT | +| `format_binary_max_string_size` | `1GiB` | Maximum string length in RowBinary | ## Tests diff --git a/pages/multihost.md b/pages/multihost.md index e69de29b..86ea01de 100644 --- a/pages/multihost.md +++ b/pages/multihost.md @@ -0,0 +1,165 @@ +# Multi-host + +> **Planned, not yet implemented.** + +Support for multiple ClickHouse endpoints — load balancing, read replicas, failover — +is planned for `Ch.Pool`. This page describes the intended design. + +## Comparison: clickhouse-go + +The [Go client](https://clickhouse.com/docs/integrations/language-clients/go/configuration#connecting-to-multiple-nodes) +is the closest reference point given it also targets the HTTP interface. + +```go +conn, err := clickhouse.Open(&clickhouse.Options{ + Addr: []string{"ch-1:8123", "ch-2:8123", "ch-3:8123"}, + ConnOpenStrategy: clickhouse.ConnOpenRoundRobin, + // ... +}) +``` + +Go exposes three strategies via `ConnOpenStrategy`: + +| Strategy | Behaviour | +|---|---| +| `ConnOpenInOrder` (default) | Try first address; fall back to later ones only on failure. Pure failover / primary-replica. | +| `ConnOpenRoundRobin` | Cycle through addresses in order. | +| `ConnOpenRandom` | Pick at random. | + +Notable points from the Go docs: +- Pool sizing is `MaxOpenConns` / `MaxIdleConns` (default 5/10). +- `ConnMaxLifetime` defaults to **1 hour**. Their own docs warn this causes load imbalance + when a failed node recovers — connections won't rebalance for up to an hour. They recommend + lowering it for heavy workloads. +- No down-host tracking. The `InOrder` strategy retries later addresses, but there's no + internal "mark this host bad" state. + +**Where ch already does better**: `worker_idle_timeout` defaults to 5 seconds. A worker +that has been idle for 5 s is removed, which means connections naturally cycle far more +aggressively than Go's 1-hour lifetime. Ch workers reconnect to a (potentially new) host +on the next checkout. No special `ConnMaxLifetime` concept needed. That said, if all +pool workers are always busy (high load), idle timeout never fires — the connection TTL +question from the previous section still applies. + +**Where Go does better**: Three configurable strategies including `InOrder` +(primary/replica failover) which ch does not plan to support initially. + +## Proposed API + +```elixir +Ch.Pool.start_link( + urls: [ + "http://ch-1.internal:8123", + "http://ch-2.internal:8123", + "http://ch-3.internal:8123" + ], + pool_size: 15, + worker_idle_timeout: to_timeout(second: 5), + # optional, default: :random + connect_strategy: :random # | :round_robin | :in_order +) +``` + +`urls:` replaces the current `url:` option. A single-element list or the original `url:` +scalar are both accepted for backward compatibility. + +## Connection strategies + +### `:random` (default) + +Pick uniformly at random from available (non-down) hosts. No shared counter, no +accumulated skew after worker churn. The correct default for symmetric replica sets. + +### `:round_robin` + +Increment a counter in pool state, take `rem(counter, length(hosts))`. Offers more +even initial distribution than random for small pools, but skews after disconnects just +as in Go. Suitable when you want deterministic spread and your pool is stable. + +### `:in_order` + +Try hosts in list order; only use later hosts if earlier ones are down. +Primary/replica failover: all traffic goes to `ch-1`, only spills to `ch-2` when +`ch-1` is unreachable. This is Go's `ConnOpenInOrder`. + +Host selection happens in `handle_checkout/4` which has access to full pool state +including the down-host map, so all three strategies can filter out known-down hosts +before selection. + +## Down-host tracking + +The Go client has no equivalent of this — it's a ch addition. + +When `ensure_connected` returns `{:error, reason}`, the caller does +`{:remove, {:connect_error, host, port, reason}}`. `handle_checkin` catches this +and marks the host down in pool state: + +```elixir +%{ + hosts: [{:http, "ch-1.internal", 8123}, ...], + down: %{ + {"ch-2.internal", 8123} => down_until_monotonic_ms + } +} +``` + +**Connect error vs request error distinction**: if a connection was alive and the +request failed (timeout, CH error response), the host is fine — don't penalise it. +Only TCP-level connect failures mark a host down. + +**Fallback when all hosts are down**: pick randomly from all hosts anyway. Better to +attempt a potentially-recovered connection and get a fast TCP refusal than to stall +the caller indefinitely. + +## Re-enabling down hosts + +No active probe process is needed. Down entries have a `down_until` timestamp. +`handle_checkout` ignores any host where `now < down_until`. Expired entries are +naturally bypassed and a periodic `handle_info` GC removes stale map entries: + +```elixir +def init_pool(config) do + schedule_gc() + {:ok, config} +end + +def handle_info(:gc_down_hosts, state) do + now = System.monotonic_time(:millisecond) + down = Map.reject(state.down, fn {_host, until} -> until <= now end) + schedule_gc() + {:ok, %{state | down: down}} +end +``` + +Default `down_ttl: to_timeout(second: 30)`, configurable. + +## Connection TTL for always-busy pools + +`worker_idle_timeout` (default 5 s) handles the quiet case: idle workers are removed +and replaced with fresh connections on next checkout. This is already much better than +Go's 1-hour default. + +For high-load scenarios where all workers stay checked out continuously, idle timeout +never fires. A future `max_connection_age` option would forcibly recycle a connection +after a wall-clock TTL regardless of activity, checked on checkin: + +```elixir +defp checkin(conn, connected_at, max_age) do + if Mint.HTTP1.open?(conn) and max_age_ok?(connected_at, max_age) do + {:ok, conn} + else + {:remove, :ttl_expired} + end +end +``` + +Not planned for the initial implementation. + +## Open questions + +- Should `connect_strategy` default to `:random` or `:round_robin`? Random is simpler + and avoids skew; round-robin is more intuitive. +- Should `urls:` accept `host:port` pairs (like Go) in addition to full URLs? Full URLs + are more explicit about scheme and path; `host:port` is less typing. +- Telemetry event when all hosts are down and the fallback kicks in? +- `down_ttl` configurable per-pool or fixed? diff --git a/pages/streaming.md b/pages/streaming.md index e4290af9..e2f86928 100644 --- a/pages/streaming.md +++ b/pages/streaming.md @@ -11,24 +11,27 @@ rows as Mint chunks arrive rather than buffering the entire response body first. ## API -`decode_start/1` initialises a decoder from response headers (inspects -`x-clickhouse-format`). `decode_continue/2` accepts raw binary chunks extracted -from `{:data, ref, chunk}` Mint responses, and returns rows incrementally. +`decode_start/1` initialises the decoder configuration (e.g. custom `decoders`). +`decode_continue/2` accepts Mint response tuples directly (`:status`, `:headers`, +`:data`, `:done`, `:error`) and returns decoded rows or transition states. ``` {:rows, rows, names, state} -- rows decoded from this chunk; continue -{:more, state} -- no complete rows yet; continue -{:ok, names, []} -- done (all rows already emitted via :rows) -{:error, Ch.Error.t()} -- ClickHouse error +{:cont, state} -- tuple consumed, state advanced; continue +{:ok, names, rows} -- done (emits any final rows) +:ok -- done (no rows, e.g. DDL/INSERT) +{:error, Ch.Error.t()} -- ClickHouse server-side error +{:error, reason} -- Mint connection or transport error ``` ## Passive mode (recv loop) ```elixir -{path, headers, body} = Ch.HTTP.encode("SELECT number FROM system.numbers LIMIT 10000000") -{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", path, headers, body) +path = Ch.HTTP.path(%{}) +body = "SELECT number FROM system.numbers LIMIT 10000000" +{:ok, _ref, conn} = Mint.HTTP1.request(conn, "POST", path, [], body) -state = nil +state = Ch.HTTP.decode_start() conn = Stream.resource( @@ -36,22 +39,15 @@ conn = fn {conn, state} -> case Mint.HTTP1.recv(conn, 0, 5_000) do {:ok, conn, responses} -> + # Feed Mint responses directly to the decoder {rows, state} = - Enum.reduce(responses, {[], state}, fn - {:status, _ref, _status}, acc -> - acc - - {:headers, _ref, headers}, {rows, _state} -> - {rows, Ch.HTTP.decode_start(headers)} - - {:data, _ref, chunk}, {rows, state} -> - case Ch.HTTP.decode_continue(chunk, state) do - {:rows, new_rows, _names, state} -> {rows ++ new_rows, state} - {:more, state} -> {rows, state} - end - - {:done, _ref}, acc -> - acc + Enum.reduce(responses, {[], state}, fn resp, {rows, state} -> + case Ch.HTTP.decode_continue(state, resp) do + {:rows, new_rows, _names, state} -> {rows ++ new_rows, state} + {:cont, state} -> {rows, state} + {:ok, _names, new_rows} -> {rows ++ new_rows, state} + _ -> {rows, state} + end end) {rows, {conn, state}} @@ -78,31 +74,22 @@ defp recv_loop(conn, state) do case Mint.HTTP1.stream(conn, message) do {:ok, conn, responses} -> state = - Enum.reduce(responses, state, fn - {:status, _ref, _status}, state -> - state - - {:headers, _ref, headers}, _state -> - Ch.HTTP.decode_start(headers) - - {:data, _ref, chunk}, state -> - case Ch.HTTP.decode_continue(chunk, state) do - {:rows, rows, names, state} -> - handle_rows(rows, names) - state - - {:more, state} -> - state - end - - {:done, _ref}, state -> - {:done, state} + Enum.reduce(responses, state, fn resp, state -> + case Ch.HTTP.decode_continue(state, resp) do + {:rows, rows, names, state} -> + handle_rows(rows, names) + state + + {:cont, state} -> + state + + # Check for termination in a real app + _ -> + state + end end) - case state do - {:done, _} -> :ok - state -> recv_loop(conn, state) - end + recv_loop(conn, state) end end end diff --git a/pages/telemetry.md b/pages/telemetry.md new file mode 100644 index 00000000..e69de29b diff --git a/test/ch/guides/compression_test.exs b/test/ch/guides/compression_test.exs index 16006d2a..30ed82a5 100644 --- a/test/ch/guides/compression_test.exs +++ b/test/ch/guides/compression_test.exs @@ -17,14 +17,11 @@ defmodule Ch.Guides.CompressionTest do end describe "zstd (OTP 28 stdlib)" do - test "encodes body as zstd and sets content-encoding header" do - body = sample_body() - compressed = :zstd.compress(body) - - {_path, headers, ^compressed} = - Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "zstd"}]) + test "sets content-encoding header in path" do + _body = sample_body() + path = Ch.HTTP.path(%{}, [{"content-encoding", "zstd"}]) - assert List.keyfind(headers, "content-encoding", 0) == {"content-encoding", "zstd"} + assert path =~ "content-encoding=zstd" end test "zstd roundtrip: compressed body decompresses to original" do @@ -33,12 +30,15 @@ defmodule Ch.Guides.CompressionTest do assert :zstd.decompress(compressed) == body end - test "decode/3 auto-decompresses zstd response" do + test "streaming decode with manual zstd decompression" do rows = [[1, "pageview", ~N[2024-01-01 00:00:00]]] - rb_body = IO.iodata_to_binary([ - Ch.RowBinary.encode_names_and_types(@names, @types), - Ch.RowBinary.encode_rows(rows, @types) - ]) + + rb_body = + IO.iodata_to_binary([ + Ch.RowBinary.encode_names_and_types(@names, @types), + Ch.RowBinary.encode_rows(rows, @types) + ]) + compressed = :zstd.compress(rb_body) headers = [ @@ -46,19 +46,26 @@ defmodule Ch.Guides.CompressionTest do {"content-encoding", "zstd"} ] - assert {:ok, @names, ^rows} = Ch.HTTP.decode(200, headers, compressed) + # Manual decompression before decoding + body = + case List.keyfind(headers, "content-encoding", 0) do + {_, "zstd"} -> :zstd.decompress(compressed) + _ -> compressed + end + + state = Ch.HTTP.decode_start() + {:cont, state} = Ch.HTTP.decode_continue(state, {:status, nil, 200}) + {:cont, state} = Ch.HTTP.decode_continue(state, {:headers, nil, headers}) + assert {:rows, ^rows, @names, _state} = Ch.HTTP.decode_continue(state, {:data, nil, body}) end end describe "gzip (stdlib)" do - test "encodes body as gzip and sets content-encoding header" do - body = sample_body() - compressed = :zlib.gzip(body) - - {_path, headers, ^compressed} = - Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "gzip"}]) + test "sets content-encoding in path" do + _body = sample_body() + path = Ch.HTTP.path(%{}, [{"content-encoding", "gzip"}]) - assert List.keyfind(headers, "content-encoding", 0) == {"content-encoding", "gzip"} + assert path =~ "content-encoding=gzip" end test "gzip roundtrip: compressed body decompresses to original" do @@ -67,12 +74,15 @@ defmodule Ch.Guides.CompressionTest do assert :zlib.gunzip(compressed) == body end - test "decode/3 auto-decompresses gzip response" do + test "streaming decode with manual gzip decompression" do rows = [[1, "pageview", ~N[2024-01-01 00:00:00]]] - rb_body = IO.iodata_to_binary([ - Ch.RowBinary.encode_names_and_types(@names, @types), - Ch.RowBinary.encode_rows(rows, @types) - ]) + + rb_body = + IO.iodata_to_binary([ + Ch.RowBinary.encode_names_and_types(@names, @types), + Ch.RowBinary.encode_rows(rows, @types) + ]) + gzipped = :zlib.gzip(rb_body) headers = [ @@ -80,21 +90,29 @@ defmodule Ch.Guides.CompressionTest do {"content-encoding", "gzip"} ] - assert {:ok, @names, ^rows} = Ch.HTTP.decode(200, headers, gzipped) + # Manual decompression before decoding + body = + case List.keyfind(headers, "content-encoding", 0) do + {_, "gzip"} -> :zlib.gunzip(gzipped) + _ -> gzipped + end + + state = Ch.HTTP.decode_start() + {:cont, state} = Ch.HTTP.decode_continue(state, {:status, nil, 200}) + {:cont, state} = Ch.HTTP.decode_continue(state, {:headers, nil, headers}) + assert {:rows, ^rows, @names, _state} = Ch.HTTP.decode_continue(state, {:data, nil, body}) end end describe "lz4 (nimble_lz4)" do @tag :lz4 - test "encode and decode lz4 compressed body" do + test "lz4 roundtrip and path setting" do body = sample_body() {:ok, compressed} = NimbleLz4.compress(body) assert NimbleLz4.decompress(compressed) == {:ok, body} - {_path, headers, ^compressed} = - Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "lz4"}]) - - assert List.keyfind(headers, "content-encoding", 0) == {"content-encoding", "lz4"} + path = Ch.HTTP.path(%{}, [{"content-encoding", "lz4"}]) + assert path =~ "content-encoding=lz4" end end diff --git a/test/ch/guides/inserts_test.exs b/test/ch/guides/inserts_test.exs index 15cde2c0..bc9aaa75 100644 --- a/test/ch/guides/inserts_test.exs +++ b/test/ch/guides/inserts_test.exs @@ -41,10 +41,8 @@ defmodule Ch.Guides.InsertsTest do compressed = :zlib.gzip(body) assert :zlib.gunzip(compressed) == body - {_path, headers, ^compressed} = - Ch.HTTP.encode(compressed, %{}, headers: [{"content-encoding", "gzip"}]) - - assert {"content-encoding", "gzip"} in headers + path = Ch.HTTP.path(%{}, [{"content-encoding", "gzip"}]) + assert path =~ "content-encoding=gzip" end end diff --git a/test/ch/guides/json_test.exs b/test/ch/guides/json_test.exs index b7fa3b0c..8fc3f778 100644 --- a/test/ch/guides/json_test.exs +++ b/test/ch/guides/json_test.exs @@ -3,60 +3,91 @@ defmodule Ch.Guides.JsonTest do use ExUnit.Case, async: true import Ch.RowBinary - describe "JSON stored in String columns" do - test "encode and decode JSON-in-String roundtrip" do - types = ["UInt64", "String"] - names = ["id", "metadata"] + @types ["UInt64", "JSON"] + @names ["id", "data"] - rows = [ - [1, JSON.encode!(%{"source" => "web", "browser" => "Firefox"})], - [2, JSON.encode!(%{"source" => "mobile", "os" => "iOS"})] - ] + @rows [ + [1, %{"action" => "click", "element" => "button"}], + [2, %{"action" => "view", "page" => "/home"}] + ] - encoded = - IO.iodata_to_binary([ - encode_names_and_types(names, types), - encode_rows(rows, types) - ]) + describe "encode(:json, value)" do + test "maps are encoded as RowBinary strings via JSON.encode_to_iodata!" do + encoded = IO.iodata_to_binary(encode(:json, %{"action" => "click"})) + # RowBinary string: 1-byte LEB128 length prefix + JSON text + <> = encoded + assert len == byte_size(rest) + assert Jason.decode!(rest) == %{"action" => "click"} + end - assert [^names | decoded_rows] = decode_names_and_rows(encoded) - assert decoded_rows == rows + test "lists are encoded as RowBinary strings" do + encoded = IO.iodata_to_binary(encode(:json, [1, 2, 3])) + <> = encoded + assert len == byte_size(rest) + assert Jason.decode!(rest) == [1, 2, 3] end - test "JSON values survive RowBinary encode/decode as strings" do - json = JSON.encode!(%{"nested" => %{"key" => [1, 2, 3]}}) - encoded = IO.iodata_to_binary(encode(:string, json)) - assert decode_rows(encoded, [:string]) == [[json]] + test "nil encodes as empty string" do + # nil as JSON string mode + encoded = IO.iodata_to_binary(encode(:json, nil)) + <> = encoded + assert len == 4 # "null" end end - describe "native JSON type" do - test "JSON map encodes and decodes as Elixir map" do - types = ["UInt64", "JSON"] - names = ["id", "data"] + describe "encode_rows/2 with JSON type" do + test "rows with JSON maps roundtrip through encode_rows / decode_rows" do + encoded = IO.iodata_to_binary(encode_rows(@rows, @types)) + assert decode_rows(encoded, @types) == @rows + end + + test "full RowBinaryWithNamesAndTypes roundtrip with JSON column" do + encoded = IO.iodata_to_binary([ + encode_names_and_types(@names, @types), + encode_rows(@rows, @types) + ]) + + [names | decoded_rows] = decode_names_and_rows(encoded) + assert names == @names + assert decoded_rows == @rows + end - rows = [ - [1, %{"action" => "click", "element" => "button"}], - [2, %{"action" => "view", "page" => "/home"}] - ] + test "no manual JSON.encode! needed — maps pass directly" do + # Encoding maps/lists directly works; the library calls JSON.encode_to_iodata! internally + rows = [[1, %{"nested" => %{"key" => [1, 2, 3]}}]] + encoded = IO.iodata_to_binary(encode_rows(rows, @types)) + assert decode_rows(encoded, @types) == rows + end + end - encoded = IO.iodata_to_binary(encode_rows(rows, types)) - assert decode_rows(encoded, types) == rows + describe "query options encoding for JSON" do + test "input_format_binary_read_json_as_string goes into query string, not param_" do + path = Ch.HTTP.path(%{}, input_format_binary_read_json_as_string: true) + assert path =~ "input_format_binary_read_json_as_string=true" + refute path =~ "param_input_format" end - test "JSON list encodes and decodes as Elixir list" do - types = ["JSON"] - rows = [[[1, 2, 3]], [nil]] + test "output_format_binary_write_json_as_string goes into query string, not param_" do + path = Ch.HTTP.path(%{}, output_format_binary_write_json_as_string: true) + assert path =~ "output_format_binary_write_json_as_string=true" + refute path =~ "param_output_format" + end - encoded = IO.iodata_to_binary(encode_rows(rows, types)) - assert decode_rows(encoded, types) == rows + test "query options coexist with SQL params" do + path = Ch.HTTP.path( + %{"city" => "Prague"}, + output_format_binary_write_json_as_string: true + ) + assert path =~ "param_city=Prague" + assert path =~ "output_format_binary_write_json_as_string=true" end end @tag :integration - describe "live ClickHouse" do - test "INSERT and SELECT with String column containing JSON" - test "INSERT and SELECT with native JSON column type" - test "SELECT JSON sub-field with data.field accessor syntax" + describe "live ClickHouse — native JSON type" do + test "INSERT and SELECT with JSON column using both string-mode settings" + test "typed paths in JSON schema (action LowCardinality(String))" + test "nested JSON objects roundtrip" + test "nil JSON value roundtrip" end end diff --git a/test/ch/guides/streaming_test.exs b/test/ch/guides/streaming_test.exs index fc5cb0d6..711ad944 100644 --- a/test/ch/guides/streaming_test.exs +++ b/test/ch/guides/streaming_test.exs @@ -6,32 +6,48 @@ defmodule Ch.Guides.StreamingTest do # Simulates a RowBinaryWithNamesAndTypes response split into N chunks, # runs it through decode_start/decode_continue, collects all rows. defp stream_decode(binary, chunk_size) do + state = Ch.HTTP.decode_start() headers = [{"x-clickhouse-format", "RowBinaryWithNamesAndTypes"}] - state = Ch.HTTP.decode_start(headers) - chunks = for <>, do: chunk + responses = [ + {:status, nil, 200}, + {:headers, nil, headers} + ] + + responses = + responses ++ + for <>, do: {:data, nil, chunk} + remainder_size = rem(byte_size(binary), chunk_size) - chunks = + responses = if remainder_size > 0 do - chunks ++ [binary_part(binary, byte_size(binary) - remainder_size, remainder_size)] + responses ++ + [{:data, nil, binary_part(binary, byte_size(binary) - remainder_size, remainder_size)}] else - chunks + responses end + responses = responses ++ [{:done, nil}] + {names, rows, state} = - Enum.reduce(chunks, {nil, [], state}, fn chunk, {names, rows_acc, state} -> - case Ch.HTTP.decode_continue(chunk, state) do - {:rows, new_rows, chunk_names, state} -> - {names || chunk_names, rows_acc ++ new_rows, state} + Enum.reduce(responses, {nil, [], state}, fn resp, {names, rows_acc, state} -> + case Ch.HTTP.decode_continue(state, resp) do + {:rows, new_rows, chunk_names, new_state} -> + {names || chunk_names, rows_acc ++ new_rows, new_state} + + {:cont, new_state} -> + {names, rows_acc, new_state} + + {:ok, chunk_names, new_rows} -> + {names || chunk_names, rows_acc ++ new_rows, nil} - {:more, state} -> - {names, rows_acc, state} + :ok -> + {names, rows_acc, nil} end end) - {:ok, final_names, final_rows} = Ch.HTTP.decode_continue(:end_of_input, state) - {names || final_names, rows ++ final_rows} + {names, rows} end describe "decode_start/decode_continue" do @@ -81,17 +97,19 @@ defmodule Ch.Guides.StreamingTest do end test "empty response" do - headers = [{"x-clickhouse-format", "RowBinaryWithNamesAndTypes"}] - state = Ch.HTTP.decode_start(headers) - assert {:ok, [], []} = Ch.HTTP.decode_continue(:end_of_input, state) + state = Ch.HTTP.decode_start() + {:cont, state} = Ch.HTTP.decode_continue(state, {:status, nil, 200}) + {:cont, state} = Ch.HTTP.decode_continue(state, {:headers, nil, []}) + assert :ok = Ch.HTTP.decode_continue(state, {:done, nil}) end - test "non-RowBinary format accumulates raw body" do + test "unknown format accumulates error body" do + state = Ch.HTTP.decode_start() headers = [{"x-clickhouse-format", "TabSeparated"}] - state = Ch.HTTP.decode_start(headers) - {:more, state} = Ch.HTTP.decode_continue("col1\tcol2\n", state) - {:more, state} = Ch.HTTP.decode_continue("val1\tval2\n", state) - assert {:ok, [], [_body]} = Ch.HTTP.decode_continue(:end_of_input, state) + {:cont, state} = Ch.HTTP.decode_continue(state, {:status, nil, 200}) + {:cont, state} = Ch.HTTP.decode_continue(state, {:headers, nil, headers}) + {:cont, state} = Ch.HTTP.decode_continue(state, {:data, nil, "col1\tcol2\n"}) + assert {:error, {:unknown_format, "TabSeparated"}} = Ch.HTTP.decode_continue(state, {:done, nil}) end end From 08692c868b35533e7ed2a4daeb1878421e1bc3f6 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 20 Apr 2026 01:17:49 +0300 Subject: [PATCH 13/13] continue --- lib/ch/http.ex | 200 +------------------------- lib/ch/pool.ex | 372 ++++++++++++++++++++++++++++++------------------- mix.exs | 2 + 3 files changed, 232 insertions(+), 342 deletions(-) diff --git a/lib/ch/http.ex b/lib/ch/http.ex index 194ad473..b75ccc0b 100644 --- a/lib/ch/http.ex +++ b/lib/ch/http.ex @@ -1,6 +1,6 @@ defmodule Ch.HTTP do @moduledoc """ - Stateless helpers for `Mint.HTTP1` with ClickHouse-specific encoding and decoding. + Stateless helpers for `Mint.HTTP1` for ClickHouse. """ import Kernel, except: [to_timeout: 1] @@ -61,204 +61,6 @@ defmodule Ch.HTTP do end end - @doc """ - Initialises a streaming ClickHouse response decoder. - - Accepts an optional `decoders` map, mapping from format name to a decoder function. - - Only `RowBinaryWithNamesAndTypes` format is supported by default. For all other formats, - the data is left as is. - """ - def decode_start(opts \\ []) do - decoders = - Keyword.get(opts, :decoders, %{ - "RowBinaryWithNamesAndTypes" => &__MODULE__.decode_rowbinary_stream/2, - :_ => &__MODULE__.decode_raw_stream/2 - }) - - {:init, decoders} - end - - @doc false - def decode_rowbinary_stream(new_data, {:rows, names, types, prev_data, state}) do - data = prev_data <> new_data - {rows, rest, state} = Ch.RowBinary.decode_rows_continue(data, types, state) - {:more, %{names: names, rows: rows}, {:rows, names, types, rest, state}} - end - - def decode_rowbinary_stream(new_data, state) do - data = - case state do - :init -> new_data - {:header, prev_data} -> prev_data <> new_data - end - - case Ch.RowBinary.decode_header(data) do - :more -> {:more, [], {:header, data}} - {:ok, names, types, rest} -> decode_rowbinary_stream(rest, {:rows, names, types, rest, nil}) - end - end - - @doc false - def decode_raw_stream(data, state) do - {:more, data, state} - end - - @doc """ - Feeds a Mint response tuple into the streaming decoder. - - This function handles the entire Mint response lifecycle (`:status`, `:headers`, - `:data`, `:done`, `:error`) for a single request. - """ - @spec decode_continue(Mint.Types.response(), decoder) :: - :ok - | {:more, decoded, decoder} - | {:error, error} - | :done - when decoded: term, - decoder: term, - error: Mint.Types.error() | Ch.Error.t() - def decode_continue(response, decoder) - - def decode_continue({:status, _ref, status}, {:init, decoders}) do - {:cont, {:status, status, decoders}} - end - - def decode_continue({:headers, _ref, headers}, {:status, 200, decoders}, ) do - format = get_header(headers, "x-clickhouse-format") - - state = - cond do - format == "RowBinaryWithNamesAndTypes" -> - {:rowbinary, <<>>} - - format == nil -> - {:empty} - - decoder = decoders[format] -> - {:custom, decoder, decoder.decode_start(headers)} - - true -> - {:unknown_format, format} - end - - {:cont, state} - end - - def decode_continue({:headers, _ref, headers}, {:status, _status, decoders}) do - code = - if code = get_header(headers, "x-clickhouse-exception-code") do - String.to_integer(code) - end - - {:cont, {:error_body, status, code, []}} - end - - def decode_continue({:data, _ref, chunk}, decoder) do - decode_continue_data(state, chunk) - end - - def decode_continue({:done, _ref}, decoder) do - decode_continue_data(state, :done) - end - - def decode_continue({:error, _ref, reason}, _decoder) do - {:error, reason} - end - - defp decode_continue_data(state, chunk_or_done) - - defp decode_continue_data({:custom, decoder, state}, chunk_or_done) do - case decoder.decode_continue(state, chunk_or_done) do - {:rows, rows, names, new_state} -> {:rows, rows, names, {:custom, decoder, new_state}} - {:cont, new_state} -> {:cont, {:custom, decoder, new_state}} - {:ok, names, rows} -> {:ok, names, rows} - :ok -> :ok - {:error, error} -> {:error, error} - end - end - - # --- :done (finalise) --- - - # empty body before RowBinary header — DDL/INSERT sent with wrong format header? - defp decode_continue_data({:rowbinary, <<>>}, :done), do: :ok - - defp decode_continue_data({:rowbinary, _buf}, :done) do - {:error, - Ch.Error.exception(code: nil, message: "incomplete RowBinaryWithNamesAndTypes header")} - end - - defp decode_continue_data({:decoding_rows, names, _types, _row_state, _remainder}, :done) do - # all rows emitted via {:rows, ...} during streaming - {:ok, names, []} - end - - defp decode_continue_data({:empty}, :done), do: :ok - - defp decode_continue_data({:unknown_format, format}, :done) do - {:error, {:unknown_format, format}} - end - - defp decode_continue_data({:error_body, _status, code, acc}, :done) do - {:error, Ch.Error.exception(code: code, message: IO.iodata_to_binary(acc))} - end - - # --- binary chunks --- - - defp decode_continue_data({:rowbinary, buf}, chunk) when is_binary(chunk) do - buf = buf <> chunk - - case Ch.RowBinary.decode_header(buf) do - :more -> - {:cont, {:rowbinary, buf}} - - {:ok, names, types, rest} -> - {rows, remainder, row_state} = Ch.RowBinary.decode_rows_continue(rest, types, nil) - new_state = {:decoding_rows, names, types, row_state, remainder} - - case rows do - [] -> {:cont, new_state} - _ -> {:rows, rows, names, new_state} - end - end - end - - defp decode_continue_data({:decoding_rows, names, types, row_state, remainder}, chunk) - when is_binary(chunk) do - {rows, new_remainder, new_row_state} = - Ch.RowBinary.decode_rows_continue(remainder <> chunk, types, row_state) - - new_state = {:decoding_rows, names, types, new_row_state, new_remainder} - - case rows do - [] -> {:cont, new_state} - _ -> {:rows, rows, names, new_state} - end - end - - defp decode_continue_data({:empty}, chunk) when is_binary(chunk) do - # unexpected data on what should be an empty response; ignore - {:cont, {:empty}} - end - - defp decode_continue_data({:unknown_format, format}, chunk) when is_binary(chunk) do - # discard chunks; error reported at :done - {:cont, {:unknown_format, format}} - end - - defp decode_continue_data({:error_body, status, code, acc}, chunk) when is_binary(chunk) do - {:cont, {:error_body, status, code, [acc | chunk]}} - end - - ## Private helpers - - defp get_header(headers, key) do - case List.keyfind(headers, key, 0) do - {_, value} -> value - nil -> nil - end - end - # Encodes query parameters for ClickHouse HTTP URL binding. # # ClickHouse uses an "escaped" parameter format identical to its TSV format escaping diff --git a/lib/ch/pool.ex b/lib/ch/pool.ex index af1d2113..b0480601 100644 --- a/lib/ch/pool.ex +++ b/lib/ch/pool.ex @@ -1,17 +1,64 @@ defmodule Ch.Pool do @moduledoc """ - TODO - """ + Connection pool for ClickHouse HTTP requests using NimblePool and Mint. + + Connections are established lazily during checkout in the caller process. + Idle connections close after 5 seconds by default (ClickHouse default `keep_alive_timeout` is 10 seconds). - use GenServer + Queries default to the `RowBinaryWithNamesAndTypes` format. + """ - @type statement :: iodata - @type params :: %{String.t() => term} + @behaviour NimblePool @query_timeout to_timeout(second: 30) - # TODO - @type query_result :: term + @query_headers [ + {"x-clickhouse-format", "RowBinaryWithNamesAndTypes"}, + {"user-agent", "ch/#{Ch.MixProject.version()}"} + ] + + @typedoc """ + The query payload. + + This can be a standard SQL string or SQL appended with RowBinary data (`[sql, ?\n, rowbinary]`). + If providing compressed payloads, pass the appropriate `content-encoding` header. + """ + @type query_statement :: iodata + + @typedoc """ + Query parameters map mapped to ClickHouse parameters (e.g., `{a:UInt64}`). + + These are encoded directly into the URL query string and are subject to URL length limits. + """ + @type query_params :: %{String.t() => term} + + # TODO add :strings :copy | :auto | etc. + + @typedoc """ + Query execution options. + + * `:timeout` - Request timeout, defaults to 30 seconds. + * `:query` - An enumerable (usually a map or a keyword list) added to the URL query string. Used for ClickHouse settings, `query_id`, etc. + * `:headers` - Headers passed directly to Mint. Defaults to "x-clickhouse-format" set to "RowBinaryWithNamesAndTypes" and "user-agent" set to "ch/VERSION". + """ + @type query_option :: + {:timeout, timeout} + | {:query, Enumerable.t()} + | {:headers, Mint.Types.headers()} + + @typedoc """ + The parsed query response. + + If the format is `RowBinaryWithNamesAndTypes`, it returns `%{names: [name], rows: [[value]]}`. + Otherwise, it returns the raw response body binary. + """ + @type query_result :: %{names: [String.t()], rows: [[term]]} | binary + + @typedoc """ + A query execution error. + + Returns `Ch.Error` for ClickHouse errors or Mint errors for network/HTTP failures. + """ @type query_error :: Ch.Error.t() | Mint.Types.error() @start_options_schema [ @@ -22,7 +69,15 @@ defmodule Ch.Pool do pool_size: [ type: :pos_integer, doc: "Maximum number of concurrent connections.", - default: 10 + default: 20 + ], + worker_idle_timeout: [ + type: :timeout, + doc: """ + Time a connection can stay idle before the pool closes it. + Should be lower than ClickHouse's `keep_alive_timeout`. + """, + default: to_timeout(second: 5) ], url: [ type: :string, @@ -42,12 +97,13 @@ defmodule Ch.Pool do Supported options: #{NimbleOptions.docs(@start_options_schema)} """ - @spec start_link(keyword) :: GenServer.on_start() + @spec start_link([start_option]) :: GenServer.on_start() def start_link(options) do options = NimbleOptions.validate!(options, @start_options_schema) name = Keyword.get(options, :name) pool_size = Keyword.fetch!(options, :pool_size) + worker_idle_timeout = Keyword.fetch!(options, :worker_idle_timeout) url = Keyword.fetch!(options, :url) %URI{scheme: scheme, host: host, port: port} = URI.parse(url) @@ -59,12 +115,29 @@ defmodule Ch.Pool do _other -> raise ArgumentError, "unexpected HTTP scheme: #{inspect(scheme)}" end - config = [ + initial_pool_state = %{ + template: {:template, scheme, host, port} + } + + NimblePool.start_link( + worker: {__MODULE__, initial_pool_state}, pool_size: pool_size, - template: {scheme, host, port} - ] + worker_idle_timeout: worker_idle_timeout, + lazy: true, + name: name + ) + end - GenServer.start_link(__MODULE__, config, name: name) + @doc """ + Returns a child spec to allow Ch pool to be started under a supervisor. + + ## Options + + The options are exactly the same as for `start_link/1`. + """ + @spec child_spec([start_option]) :: Supervisor.child_spec() + def child_spec(options) do + %{id: __MODULE__, start: {__MODULE__, :start_link, [options]}} end @doc """ @@ -73,31 +146,54 @@ defmodule Ch.Pool do The pool exits with the given `reason`. The pool has `timeout` milliseconds to stop before it's unilaterally killed by the runtime. """ + @spec stop(NimblePool.pool(), reason :: term, timeout) :: :ok def stop(pool, reason \\ :normal, timeout \\ :infinity) do - GenServer.stop(pool, reason, timeout) + NimblePool.stop(pool, reason, timeout) end - @spec query(NimblePool.pool(), statement, params, keyword) :: + @doc """ + Executes a query on the given pool. + + Returns `{:ok, query_result}` on success or `{:error, query_error}` on failure. + """ + @spec query(NimblePool.pool(), query_statement, query_params, [query_option]) :: {:ok, query_result} | {:error, query_error} def query(pool, statement, params \\ %{}, options \\ []) do {timeout, options} = Keyword.pop(options, :timeout, @query_timeout) + {query, options} = Keyword.pop(options, :query, []) + {headers, options} = Keyword.pop(options, :headers, @query_headers) deadline = Ch.HTTP.to_deadline(timeout) - path = Ch.HTTP.path(params, options) - - # TODO retry on closed? backoff? - # TODO retry transient closed/etc. errors? - checkout(pool, timeout, fn ref, conn -> - with {:ok, conn} <- ensure_connected(conn, pool, deadline), - {:ok, conn, result} <- request(conn, path, statement, deadline) do - {result, checkin(conn)} - else - {:error, reason} = error -> {error, {:remove, reason}} - end - end) + path = Ch.HTTP.path(params, query) + + result = + NimblePool.checkout!( + pool, + :request, + fn {pid, _ref}, conn_or_template -> + with {:ok, conn} <- connect(conn_or_template, pid, deadline), + {:ok, conn, status, headers, data} <- + request(conn, "POST", path, headers, statement, deadline) do + {{:ok, status, headers, data}, checkin(conn)} + else + {:error, reason} = error -> {error, {:remove, reason}} + end + end, + timeout + ) + + with {:ok, status, headers, data} <- result do + data = data |> maybe_decompress(headers) |> IO.iodata_to_binary() + decode_query_response(status, headers, data, options) + end end - @spec query!(NimblePool.pool(), statement, params, keyword) :: query_result + @doc """ + Executes a query on the given pool, raising on error. + + Returns the `query_result` directly. Raises an exception if the query fails. + """ + @spec query!(NimblePool.pool(), query_statement, query_params, [query_option]) :: query_result def query!(pool, statement, params \\ %{}, options \\ []) do case query(pool, statement, params, options) do {:ok, result} -> result @@ -105,143 +201,47 @@ defmodule Ch.Pool do end end - defp checkout(pool, timeout, fun) when is_function(fun) do - monitor_ref = Process.monitor(pool) - - # TODO noconnect? - GenServer.cast(pool, {:out, self(), monitor_ref, timeout}) - - receive do - {^monitor_ref, conn, request_ref} -> - Process.demonitor(monitor_ref, [:flush]) - {result, conn} = fun.(conn) - GenServer.cast(pool, {:in, conn, request_ref}) - result - - {^monitor_ref, :timeout} -> - Process.demonitor(monitor_ref, [:flush]) - {:error, :timeout} - - {:DOWN, ^monitor_ref, :process, _pid, reason} -> - {:error, reason} - end - end - - @impl GenServer - def init(config) do - Process.flag(:trap_exit, true) - - pool_size = Keyword.fetch!(config, :pool_size) - template = Keyword.fetch!(config, :template) - - state = %{ - queue: :queue.new(), - requests: %{}, - monitors: %{}, - resources: :queue.new(), - pool_size: pool_size, - template: template - } - - {:ok, state} - end - - @impl GenServer - def handle_cast({:out, pid, request_ref, timeout}, state) do - monitor_ref = Process.monitor(pid) - - %{requests: requests, monitors: monitors} = state - requests = Map.put(requests, request_ref, {pid, monitor_ref, :out}) - monitors = Map.put(monitors, monitor_ref, request_ref) - state = %{state | requests: requests, monitors: monitors} - state = maybe_checkout(request_ref, monitor_ref, timeout, pid, state) - - {:noreply, state} - end - - def handle_cast({:in, conn, monitor_ref}, state) do - Process.demonitor(monitor_ref, [:flush]) - - %{requests: requests, resources: resources} = state - - resources = - case handle_checkin(conn) do - {:ok, conn} -> - :queue.in(conn, resources) - - {:remove, reason} -> - remove_worker(reason, conn) - resources - end - - state = remove_requests(state, monitor_ref) - state = maybe_checkout(%{state | resources: resources}) - {:noreply, state} - end - - @impl GenServer - def handle_info({:DOWN, monitor_ref, _, _, _} = down, state) do + @impl NimblePool + def init_pool(config) do + {:ok, config} end - def handle_info({:ping, worker}, state) do + @impl NimblePool + def init_worker(config) do + {:ok, :template, config} end - @impl GenServer - def terminate(reason, state) do + @impl NimblePool + def handle_checkout(:request, _from, :template = template, config) do + {:ok, config.template, template, config} end - defp maybe_checkout(%{queue: queue, requests: requests} = state) do - case :queue.out(queue) do - {{:value, {pid, ref}, queue}} -> - case requests do - # the request still exists, so we can checkout the resource - %{^ref => {^pid, mon_ref, :out, deadline}} -> - maybe_checkout(command, mon_ref, deadline, {pid, ref}, %{state | queue: queue}) - - # it should never happen - %{^ref => _} -> - exit(:unexpected_checkout) - - # the request is no longer active, try the next one - %{} -> - maybe_checkout(%{state | queue: queue}) - end - - {:empty, _queue} -> - state - end + def handle_checkout(:request, _from, %Mint.HTTP1{} = conn, config) do + {:ok, {:ok, conn}, conn, config} end - defp handle_checkin({:ok, %Mint.HTTP1{} = conn}, _from, _conn, config) do + @impl NimblePool + def handle_checkin({:ok, conn}, _from, _conn, config) do {:ok, conn, config} end - defp handle_checkin({:remove, reason}, _from, _conn, config) do + def handle_checkin({:remove, reason}, _from, _conn, config) do {:remove, reason, config} end - defp handle_checkout(:request, _from, :template = template, config) do - {:ok, config.template, template, config} - end - - defp handle_checkout(:request, _from, %Mint.HTTP1{} = conn, config) do - {:ok, {:ok, conn}, conn, config} - end - - defp handle_ping(_conn, _config) do + @impl NimblePool + def handle_ping(_conn, _config) do {:remove, :worker_idle_timeout} end - # TODO handle_info - @impl NimblePool def terminate_worker(_reason, conn, config) do with %Mint.HTTP1{} <- conn, do: Mint.HTTP1.close(conn) {:ok, config} end - defp ensure_connected({:template, scheme, host, port}, owner, deadline) do - timeout = Ch.HTTP.timeout_from_deadline(deadline) + defp connect({:template, scheme, host, port}, owner, deadline) do + timeout = Ch.HTTP.to_timeout(deadline) case Mint.HTTP1.connect(scheme, host, port, mode: :passive, timeout: timeout) do {:ok, conn} -> @@ -259,7 +259,59 @@ defmodule Ch.Pool do end end - defp ensure_connected({:ok, %Mint.HTTP1{}} = ok, _owner, _deadline), do: ok + defp connect({:ok, _conn} = ok, _owner, _deadline), do: ok + + defp request(conn, method, path, headers, body, deadline) do + result = + with {:ok, conn, _ref} <- Mint.HTTP1.request(conn, method, path, headers, body) do + recv_all(conn, nil, [], [], deadline) + end + + with {:error, conn, reason} <- result do + Mint.HTTP1.close(conn) + {:error, reason} + end + end + + defp recv_all(conn, status, headers, data, deadline) do + timeout = Ch.HTTP.to_timeout(deadline) + + case Mint.HTTP1.recv(conn, 0, timeout) do + {:ok, conn, responses} -> + case handle_responses(responses, status, headers, data) do + {:ok, status, headers, data} -> {:ok, conn, status, headers, data} + {:more, status, headers, data} -> recv_all(conn, status, headers, data, deadline) + {:error, reason} -> {:error, conn, reason} + end + + {:error, conn, reason, _responses} -> + {:error, conn, reason} + end + end + + defp handle_responses([{:status, _ref, status} | rest], _prev_status = nil, headers, data) do + handle_responses(rest, status, headers, data) + end + + defp handle_responses([{:headers, _ref, new_headers} | rest], status, prev_headers, data) do + handle_responses(rest, status, prev_headers ++ new_headers, data) + end + + defp handle_responses([{:data, _ref, new_data} | rest], status, headers, prev_data) do + handle_responses(rest, status, headers, [prev_data | new_data]) + end + + defp handle_responses([{:done, _ref}], status, headers, data) do + {:ok, status, headers, data} + end + + defp handle_responses([{:error, _ref, reason} | _rest], _status, _headers, _data) do + {:error, reason} + end + + defp handle_responses([], status, headers, data) do + {:more, status, headers, data} + end defp checkin(conn) do if Mint.HTTP1.open?(conn) do @@ -268,4 +320,38 @@ defmodule Ch.Pool do {:remove, Mint.TransportError.exception(reason: :closed)} end end + + defp maybe_decompress(data, headers) do + case List.keyfind(headers, "content-encoding", 0) do + {_, "gzip"} -> :zlib.gunzip(data) + {_, "zstd"} -> :zstd.decompress(data) + {_, other} -> raise "unsupported content encoding: #{inspect(other)}" + nil -> data + end + end + + defp decode_query_response(200, _headers, _no_body = "", _options) do + :ok + end + + defp decode_query_response(200, headers, body, _options) do + case List.keyfind(headers, "x-clickhouse-format", 0) do + {_, "RowBinaryWithNamesAndTypes"} -> + [name | rows] = Ch.RowBinary.decode_names_and_rows(body) + {:ok, %{names: name, rows: rows}} + + {_, _format} -> + {:ok, body} + end + end + + defp decode_query_response(_status, headers, body, _options) do + code = + case List.keyfind(headers, "x-clickhouse-error-code", 0) do + {_, code} -> String.to_integer(code) + nil -> nil + end + + {:error, %Ch.Error{code: code, message: body}} + end end diff --git a/mix.exs b/mix.exs index 0e3bd361..7606b00a 100644 --- a/mix.exs +++ b/mix.exs @@ -4,6 +4,8 @@ defmodule Ch.MixProject do @source_url "https://github.com/plausible/ch" @version "0.9.0" + def version, do: @version + def project do [ app: :ch,