diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a0a159fb..4ded1be0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ defaults: jobs: format: name: Check format - runs-on: [runner-amd64-large] + runs-on: [ runner-amd64-large ] continue-on-error: true steps: - name: Install stable toolchain @@ -45,7 +45,7 @@ jobs: clippy: name: Check clippy - runs-on: [runner-amd64-large] + runs-on: [ runner-amd64-large ] continue-on-error: true steps: - name: Install stable toolchain @@ -67,7 +67,7 @@ jobs: test: name: Check tests - runs-on: [runner-amd64-large] + runs-on: [ runner-amd64-large ] continue-on-error: true steps: - name: Install stable toolchain @@ -122,7 +122,7 @@ jobs: docker-build: name: Build Docker image - runs-on: [runner-amd64-large-private] + runs-on: [ runner-amd64-large-private ] needs: build continue-on-error: true steps: @@ -193,7 +193,7 @@ jobs: benchmark: name: Benchmark ${{ matrix.description }} needs: build - runs-on: [runner-amd64-large-private] + runs-on: [ runner-amd64-large-private ] continue-on-error: true strategy: fail-fast: false @@ -204,142 +204,169 @@ jobs: database: arangodb enabled: true description: ArangoDB + fulltext: false # Cassandra - name: cassandra database: cassandra enabled: false description: Cassandra + fulltext: false skipped: Cassandra benchmark not yet implemented # Dragonfly - name: dragonfly database: dragonfly enabled: true description: Dragonfly + fulltext: false # Dry - name: dry database: dry enabled: true description: Dry + fulltext: false # EchoDB - name: echodb database: echodb enabled: true description: EchoDB + fulltext: false # Fjall - name: Fjall database: fjall enabled: true description: Fjall + fulltext: false # KeyDB - name: keydb database: keydb enabled: true description: KeyDB + fulltext: false # LMDB - name: lmdb database: lmdb enabled: true description: LMDB + fulltext: false # Map - name: map database: map enabled: true description: Map + fulltext: false # MemoDB - name: memodb database: memodb enabled: true description: MemoDB + fulltext: false # MongoDB - name: mongodb database: mongodb enabled: true description: MongoDB + fulltext: false # MySQL - name: mysql database: mysql enabled: true description: MySQL + fulltext: false # Neo4j - name: neo4j database: neo4j enabled: true description: Neo4j + fulltext: false # Postgres - name: postgres database: postgres enabled: true description: Postgres + fulltext: false # Redb - name: redb database: redb enabled: false description: ReDB + fulltext: false skipped: ReDB benchmark skipped due to excessive benchmark time # Redis - name: redis database: redis enabled: true description: Redis + fulltext: false # RocksDB - name: rocksdb database: rocksdb enabled: true description: RocksDB + fulltext: false # Scylladb - name: scylladb database: scylladb enabled: false description: ScyllaDB + fulltext: false skipped: ScyllaDB benchmark not yet implemented # SQLite - name: sqlite database: sqlite enabled: true description: SQLite + fulltext: false # SurrealDB + Memory - name: surrealdb-memory database: surrealdb-memory enabled: true description: SurrealDB with in-memory storage + fulltext: true # SurrealDB + RocksDB - name: surrealdb-rocksdb database: surrealdb-rocksdb enabled: true description: SurrealDB with RocksDB storage + fulltext: true # SurrealDB + SurrealKV - name: surrealdb-surrealkv database: surrealdb-surrealkv enabled: true description: SurrealDB with SurrealKV storage + fulltext: true # SurrealDB Memory Engine - name: surrealdb-embedded-memory database: surrealdb enabled: true endpoint: -e memory description: SurrealDB embedded with in-memory storage + fulltext: true # SurrealDB RocksDB Engine - name: surrealdb-embedded-rocksdb database: surrealdb enabled: true endpoint: -e rocksdb:~/crud-bench description: SurrealDB embedded with RocksDB storage + fulltext: true # SurrealDB SurrealKV Engine - name: surrealdb-embedded-surrealkv database: surrealdb enabled: true endpoint: -e surrealkv:~/crud-bench description: SurrealDB embedded with SurrealKV storage + fulltext: true # SurrealKV - name: surrealkv database: surrealkv enabled: true description: SurrealKV + fulltext: false # SurrealKV Memory - name: surrealkv-memory database: surrealkv-memory enabled: true description: SurrealKV with in-memory storage + fulltext: false steps: - name: Download artifacts uses: actions/download-artifact@v4 @@ -448,6 +475,17 @@ jobs: env: CRUD_BENCH_VALUE: '{ "text": "text:50", "integer": "int", "nested": { "text": "text:1000", "array": [ "string:50", "string:50", "string:50", "string:50", "string:50" ] } }' + - name: Run benchmarks (100,000 samples / 128 clients / 48 threads / key string26 / random / words) + timeout-minutes: 30 + if: ${{ matrix.enabled && matrix.fulltext && (success() || failure()) }} + run: | + ${{ github.workspace }}/artifacts/crud-bench -d ${{ matrix.database }} ${{ matrix.endpoint || '' }} -s 1000 -c 10 -t 10 -k string26 -r -n string26-random-1k + docker container kill crud-bench &>/dev/null || docker container prune --force &>/dev/null || docker volume prune --all --force &>/dev/null || true + env: + CRUD_BENCH_PREPARE: 'DEFINE ANALYZER IF NOT EXISTS simple TOKENIZERS class FILTERS lowercase,ascii; DEFINE INDEX ft ON record COLUMNS words FULLTEXT ANALYZER simple BM25;' + CRUD_BENCH_VALUE: '{"words": "words:50;how,are,you,a,an,foo,bar,hello,world"}' + CRUD_BENCH_SCANS: '[{ "name": "fulltext", "samples": 1000, "condition": "words @@ \"foo\"", "limit": 1000 }]' + - name: Upload result artifacts uses: actions/upload-artifact@v4 if: ${{ matrix.enabled && (success() || failure()) }} diff --git a/src/arangodb.rs b/src/arangodb.rs index f93e9fcd..cf181708 100644 --- a/src/arangodb.rs +++ b/src/arangodb.rs @@ -85,13 +85,16 @@ async fn create_arango_client( } impl BenchmarkClient for ArangoDBClient { - async fn startup(&self) -> Result<()> { + async fn startup(&self, prepare: Option<&String>) -> Result<()> { // Ensure we drop the database first. // We can drop the database initially // because the other clients will be // created subsequently, and will then // create the database as necessary. self.connection.drop_database("crud-bench").await?; + if prepare.is_some() { + bail!(NOT_SUPPORTED_ERROR); + } // Everything ok Ok(()) } diff --git a/src/benchmark.rs b/src/benchmark.rs index a4991b31..6f437f7c 100644 --- a/src/benchmark.rs +++ b/src/benchmark.rs @@ -42,6 +42,8 @@ pub(crate) struct Benchmark { pub(crate) sync: bool, /// Whether to enable disk persistence (specific to surrealkv for now) pub(crate) disk_persistence: bool, + /// Statement(s) to run before the benchmark + pub(crate) prepare: Option, } impl Benchmark { pub(crate) fn new(args: &Args) -> Self { @@ -56,6 +58,7 @@ impl Benchmark { sync: args.sync, pid: args.pid, disk_persistence: true, + prepare: args.prepare.to_owned(), } } /// Run the benchmark for the desired benchmark engine @@ -74,7 +77,7 @@ impl Benchmark { // Generate a value sample for the report let sample = vp.generate_value::(); // Setup the datastore - self.wait_for_client(&engine).await?.startup().await?; + self.wait_for_client(&engine).await?.startup(self.prepare.as_ref()).await?; // Setup the clients let clients = self.setup_clients(&engine).await?; // Run the "creates" benchmark diff --git a/src/engine.rs b/src/engine.rs index 5f2891ee..96f0b1d2 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -28,7 +28,10 @@ where /// running benchmark tests for a client or connection. pub(crate) trait BenchmarkClient: Sync + Send + 'static { /// Initialise the store at startup - async fn startup(&self) -> Result<()> { + async fn startup(&self, prepare: Option<&String>) -> Result<()> { + if prepare.is_some() { + bail!(NOT_SUPPORTED_ERROR); + } Ok(()) } diff --git a/src/main.rs b/src/main.rs index 96a49ce5..70c6d8ae 100644 --- a/src/main.rs +++ b/src/main.rs @@ -136,6 +136,10 @@ pub(crate) struct Args { ]"# )] pub(crate) scans: String, + + /// Statement(s) to execute before the benchmark + #[arg(long, env = "CRUD_BENCH_PREPARE")] + pub(crate) prepare: Option, } #[derive(Debug, ValueEnum, Clone, Copy)] @@ -340,6 +344,7 @@ mod test { scans: r#"[{"name": "limit", "start": 50, "limit": 100, "expect": 100}]"#.to_string(), show_sample: false, pid: None, + prepare: None, }) } diff --git a/src/mysql.rs b/src/mysql.rs index 9d66def5..e289793c 100644 --- a/src/mysql.rs +++ b/src/mysql.rs @@ -56,7 +56,7 @@ pub(crate) struct MysqlClient { } impl BenchmarkClient for MysqlClient { - async fn startup(&self) -> Result<()> { + async fn startup(&self, prepare: Option<&String>) -> Result<()> { let id_type = match self.kt { KeyType::Integer => "SERIAL", KeyType::String26 => "VARCHAR(26)", @@ -85,9 +85,13 @@ impl BenchmarkClient for MysqlClient { }) .collect::>() .join(", "); - let stm = + let mut stm = format!("DROP TABLE IF EXISTS record; CREATE TABLE record ( id {id_type} PRIMARY KEY, {fields}) ENGINE=InnoDB;"); + if let Some(prepare) = prepare { + stm.push_str(prepare); + } self.conn.lock().await.query_drop(&stm).await?; + Ok(()) } diff --git a/src/neo4j.rs b/src/neo4j.rs index e8db8365..e299ec10 100644 --- a/src/neo4j.rs +++ b/src/neo4j.rs @@ -68,9 +68,12 @@ pub(crate) struct Neo4jClient { } impl BenchmarkClient for Neo4jClient { - async fn startup(&self) -> Result<()> { - let stm = "CREATE INDEX FOR (r:Record) ON (r.id);"; - self.graph.execute(query(stm)).await?.next().await.ok(); + async fn startup(&self, prepare: Option<&String>) -> Result<()> { + let mut stm = "CREATE INDEX FOR (r:Record) ON (r.id);".to_string(); + if let Some(prepare) = prepare { + stm.push_str(prepare); + } + self.graph.execute(query(&stm)).await?.next().await.ok(); Ok(()) } diff --git a/src/postgres.rs b/src/postgres.rs index c6e1a711..89a1170a 100644 --- a/src/postgres.rs +++ b/src/postgres.rs @@ -61,7 +61,7 @@ pub(crate) struct PostgresClient { } impl BenchmarkClient for PostgresClient { - async fn startup(&self) -> Result<()> { + async fn startup(&self, prepare: Option<&String>) -> Result<()> { let id_type = match self.kt { KeyType::Integer => "SERIAL", KeyType::String26 => "VARCHAR(26)", @@ -90,7 +90,10 @@ impl BenchmarkClient for PostgresClient { }) .collect::>() .join(", "); - let stm = format!("DROP TABLE IF EXISTS record; CREATE TABLE record ( id {id_type} PRIMARY KEY, {fields});"); + let mut stm = format!("DROP TABLE IF EXISTS record; CREATE TABLE record ( id {id_type} PRIMARY KEY, {fields});"); + if let Some(prepare) = prepare { + stm.push_str(prepare); + } self.client.batch_execute(&stm).await?; Ok(()) } diff --git a/src/scylladb.rs b/src/scylladb.rs index cec3742b..dc040e91 100644 --- a/src/scylladb.rs +++ b/src/scylladb.rs @@ -55,7 +55,7 @@ pub(crate) struct ScylladbClient { } impl BenchmarkClient for ScylladbClient { - async fn startup(&self) -> Result<()> { + async fn startup(&self, prepare: Option<&String>) -> Result<()> { self.session .query_unpaged( " @@ -96,6 +96,9 @@ impl BenchmarkClient for ScylladbClient { (), ) .await?; + if let Some(prepare) = prepare { + self.session.query_unpaged(prepare.to_owned(), ()).await?; + } Ok(()) } async fn create_u32(&self, key: u32, val: Value) -> Result<()> { diff --git a/src/sqlite.rs b/src/sqlite.rs index 55e1f16c..b869d889 100644 --- a/src/sqlite.rs +++ b/src/sqlite.rs @@ -71,7 +71,7 @@ impl BenchmarkClient for SqliteClient { Ok(()) } - async fn startup(&self) -> Result<()> { + async fn startup(&self, prepare: Option<&String>) -> Result<()> { // Optimise SQLite let stmt = " PRAGMA synchronous = OFF; @@ -109,12 +109,15 @@ impl BenchmarkClient for SqliteClient { }) .collect::>() .join(","); - let stmt = format!( + let mut stmt = format!( " DROP TABLE IF EXISTS record; CREATE TABLE record ( id {id_type} PRIMARY KEY, {fields}); " ); + if let Some(prepare) = prepare { + stmt.push_str(prepare); + } self.execute_batch(Cow::Owned(stmt)).await?; Ok(()) } diff --git a/src/surrealdb.rs b/src/surrealdb.rs index 5df8202a..8711e668 100644 --- a/src/surrealdb.rs +++ b/src/surrealdb.rs @@ -6,7 +6,7 @@ use crate::engine::{BenchmarkClient, BenchmarkEngine}; use crate::valueprovider::Columns; use crate::{Benchmark, KeyType, Projection, Scan}; use anyhow::Result; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use serde_json::Value; use surrealdb::engine::any::{connect, Any}; use surrealdb::opt::auth::Root; @@ -116,8 +116,14 @@ struct SurrealRecord { id: RecordId, } +#[derive(Debug, Serialize)] +struct Bindings { + content: Value, + key: T, +} + impl BenchmarkClient for SurrealDBClient { - async fn startup(&self) -> Result<()> { + async fn startup(&self, prepare: Option<&String>) -> Result<()> { // Ensure the table exists. This wouldn't // normally be an issue, as SurrealDB is // schemaless. However, because we are testing @@ -131,54 +137,99 @@ impl BenchmarkClient for SurrealDBClient { DEFINE TABLE record; "; self.db.query(Raw::from(surql)).await?.check()?; + if let Some(prepare) = prepare { + self.db.query(Raw::from(prepare.to_owned())).await?.check()?; + } Ok(()) } async fn create_u32(&self, key: u32, val: Value) -> Result<()> { - let res = self.db.create(Resource::from(("record", key as i64))).content(val).await?; - assert!(res.into_inner().is_some()); + let res = self + .db + .query(Raw::from("CREATE type::thing('record', $key) CONTENT $content RETURN NULL")) + .bind(Bindings { + key, + content: val, + }) + .await? + .take::(0)?; + assert!(!res.into_inner().is_none()); Ok(()) } async fn create_string(&self, key: String, val: Value) -> Result<()> { - let res = self.db.create(Resource::from(("record", key))).content(val).await?; - assert!(res.into_inner().is_some()); + let res = self + .db + .query(Raw::from("CREATE type::thing('record', $key) CONTENT $content RETURN NULL")) + .bind(Bindings { + key, + content: val, + }) + .await? + .take::(0)?; + assert!(!res.into_inner().is_none()); Ok(()) } async fn read_u32(&self, key: u32) -> Result<()> { let res = self.db.select(Resource::from(("record", key as i64))).await?; - assert!(res.into_inner().is_some()); + assert!(!res.into_inner().is_none()); Ok(()) } async fn read_string(&self, key: String) -> Result<()> { let res = self.db.select(Resource::from(("record", key))).await?; - assert!(res.into_inner().is_some()); + assert!(!res.into_inner().is_none()); Ok(()) } async fn update_u32(&self, key: u32, val: Value) -> Result<()> { - let res = self.db.update(Resource::from(("record", key as i64))).content(val).await?; - assert!(res.into_inner().is_some()); + let res = self + .db + .query(Raw::from("UPDATE type::thing('record', $key) CONTENT $content RETURN NULL")) + .bind(Bindings { + key, + content: val, + }) + .await? + .take::(0)?; + assert!(!res.into_inner().is_none()); Ok(()) } async fn update_string(&self, key: String, val: Value) -> Result<()> { - let res = self.db.update(Resource::from(("record", key))).content(val).await?; - assert!(res.into_inner().is_some()); + let res = self + .db + .query(Raw::from("UPDATE type::thing('record', $key) CONTENT $content RETURN NULL")) + .bind(Bindings { + key, + content: val, + }) + .await? + .take::(0)?; + assert!(!res.into_inner().is_none()); Ok(()) } async fn delete_u32(&self, key: u32) -> Result<()> { - let res = self.db.delete(Resource::from(("record", key as i64))).await?; - assert!(res.into_inner().is_some()); + let res = self + .db + .query(Raw::from("DELETE type::thing('record', $key) RETURN NULL")) + .bind(("key", key)) + .await? + .take::(0)?; + assert!(!res.into_inner().is_none()); Ok(()) } async fn delete_string(&self, key: String) -> Result<()> { - let res = self.db.delete(Resource::from(("record", key))).await?; - assert!(res.into_inner().is_some()); + let res = self + .db + .query(Raw::from("DELETE type::thing('record', $key) RETURN NULL")) + .bind(("key", key)) + .await? + .take::(0)?; + assert!(!res.into_inner().is_none()); Ok(()) } @@ -203,7 +254,8 @@ impl SurrealDBClient { Projection::Id => { let sql = format!("SELECT id FROM record {c} {s} {l}"); let res: surrealdb::Value = self.db.query(Raw::from(sql)).await?.take(0)?; - let surrealdb::expr::Value::Array(arr) = res.into_inner() else { + let val = res.into_inner(); + let Some(arr) = val.as_array() else { panic!("Unexpected response type"); }; Ok(arr.len()) @@ -211,7 +263,8 @@ impl SurrealDBClient { Projection::Full => { let sql = format!("SELECT * FROM record {c} {s} {l}"); let res: surrealdb::Value = self.db.query(Raw::from(sql)).await?.take(0)?; - let surrealdb::expr::Value::Array(arr) = res.into_inner() else { + let val = res.into_inner(); + let Some(arr) = val.as_array() else { panic!("Unexpected response type"); }; Ok(arr.len()) diff --git a/src/valueprovider.rs b/src/valueprovider.rs index d7800f46..7d9bdd50 100644 --- a/src/valueprovider.rs +++ b/src/valueprovider.rs @@ -59,6 +59,7 @@ enum ValueGenerator { Bool, String(Length), Text(Length), + Words(Length, Vec), Integer, Float, DateTime, @@ -110,6 +111,26 @@ fn text_range(rng: &mut SmallRng, range: Range) -> String { text(rng, l) } +fn words(rng: &mut SmallRng, size: usize, dictionary: &[String]) -> String { + let mut l = 0; + let mut words = Vec::with_capacity(size / 5); + let mut i = 0; + while l < size { + let w = dictionary[rng.gen_range(0..dictionary.len())].as_str(); + l += w.len(); + words.push(w); + l += i; + // We ignore the first whitespace, but not the following ones + i = 1; + } + words.join(" ") +} + +fn words_range(rng: &mut SmallRng, range: Range, dictionary: &[String]) -> String { + let l = rng.gen_range(range); + words(rng, l, dictionary) +} + impl ValueGenerator { fn new(value: Value) -> Result { match value { @@ -128,6 +149,14 @@ impl ValueGenerator { Self::String(Length::new(i)?) } else if let Some(i) = s.strip_prefix("text:") { Self::Text(Length::new(i)?) + } else if let Some(i) = s.strip_prefix("words:") { + let args: Vec<&str> = i.split(";").collect(); + if args.len() != 2 { + bail!("Words takes 2 arguments. Got: {}", args.len()); + } + let l = Length::new(args[0])?; + let dictionary = args[1].split(",").map(|s| s.to_string()).collect(); + Self::Words(l, dictionary) } else if let Some(i) = s.strip_prefix("int:") { if let Length::Range(r) = Length::new(i)? { Self::IntegerRange(r) @@ -194,62 +223,69 @@ impl ValueGenerator { D: Dialect, { match self { - ValueGenerator::Bool => { + Self::Bool => { let v = rng.gen::(); Value::Bool(v) } - ValueGenerator::String(l) => { + Self::String(l) => { let val = match l { Length::Range(r) => string_range(rng, r.clone()), Length::Fixed(l) => string(rng, *l), }; Value::String(val) } - ValueGenerator::Text(l) => { + Self::Text(l) => { let val = match l { Length::Range(r) => text_range(rng, r.clone()), Length::Fixed(l) => text(rng, *l), }; Value::String(val) } - ValueGenerator::Integer => { + Self::Words(l, dictionary) => { + let val = match l { + Length::Range(r) => words_range(rng, r.clone(), dictionary), + Length::Fixed(l) => words(rng, *l, dictionary), + }; + Value::String(val) + } + Self::Integer => { let v = rng.gen::(); Value::Number(Number::from(v)) } - ValueGenerator::Float => { + Self::Float => { let v = rng.gen::(); Value::Number(Number::from_f64(v as f64).unwrap()) } - ValueGenerator::DateTime => { + Self::DateTime => { // Number of seconds from Epoch to 31/12/2030 let s = rng.gen_range(0..1_924_991_999); D::date_time(s) } - ValueGenerator::Uuid => { + Self::Uuid => { let uuid = Uuid::new_v4(); D::uuid(uuid) } - ValueGenerator::IntegerRange(r) => { + Self::IntegerRange(r) => { let v = rng.gen_range(r.start..r.end); Value::Number(v.into()) } - ValueGenerator::FloatRange(r) => { + Self::FloatRange(r) => { let v = rng.gen_range(r.start..r.end); Value::Number(Number::from_f64(v as f64).unwrap()) } - ValueGenerator::StringEnum(a) => { + Self::StringEnum(a) => { let i = rng.gen_range(0..a.len()); Value::String(a[i].to_string()) } - ValueGenerator::IntegerEnum(a) => { + Self::IntegerEnum(a) => { let i = rng.gen_range(0..a.len()); Value::Number(a[i].clone()) } - ValueGenerator::FloatEnum(a) => { + Self::FloatEnum(a) => { let i = rng.gen_range(0..a.len()); Value::Number(a[i].clone()) } - ValueGenerator::Array(a) => { + Self::Array(a) => { // Generate any array structure values let mut vec = Vec::with_capacity(a.len()); for v in a { @@ -257,7 +293,7 @@ impl ValueGenerator { } Value::Array(vec) } - ValueGenerator::Object(o) => { + Self::Object(o) => { // Generate any object structure values let mut map = Map::::new(); for (k, v) in o { @@ -338,9 +374,10 @@ impl ColumnType { fn new(v: &ValueGenerator) -> Result { let r = match v { ValueGenerator::Object(_) => ColumnType::Object, - ValueGenerator::StringEnum(_) | ValueGenerator::String(_) | ValueGenerator::Text(_) => { - ColumnType::String - } + ValueGenerator::StringEnum(_) + | ValueGenerator::String(_) + | ValueGenerator::Text(_) + | ValueGenerator::Words(_, _) => ColumnType::String, ValueGenerator::Integer | ValueGenerator::IntegerRange(_) | ValueGenerator::IntegerEnum(_) => ColumnType::Integer,