From 02d3abc8bd3bea12b6b7e0b9c13dddf53f852177 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 10 Apr 2025 10:21:44 +0200 Subject: [PATCH 1/4] Update playground.md with specifications --- docs/getting-started/playground.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/getting-started/playground.md b/docs/getting-started/playground.md index 6b43701d877..e785d6e8c6f 100644 --- a/docs/getting-started/playground.md +++ b/docs/getting-started/playground.md @@ -45,3 +45,14 @@ TCP endpoint example with [CLI](../interfaces/cli.md): ```bash clickhouse client --secure --host play.clickhouse.com --user explorer ``` + +## Playground specifications {#specifications} + +our ClickHouse Playground is running with the following specifications: + +- Hosted on Google Cloud (GCE) in the US Central region (US-Central-1) +- 3-replica setup +- 256 GiB of storage and 59 virtual CPUs each. + + + From 3d351fcf5519730368575a2cb2aa40cab576ba5b Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 10 Apr 2025 10:26:11 +0200 Subject: [PATCH 2/4] Remove whitespace --- docs/getting-started/playground.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/getting-started/playground.md b/docs/getting-started/playground.md index e785d6e8c6f..60fe41cba74 100644 --- a/docs/getting-started/playground.md +++ b/docs/getting-started/playground.md @@ -53,6 +53,3 @@ our ClickHouse Playground is running with the following specifications: - Hosted on Google Cloud (GCE) in the US Central region (US-Central-1) - 3-replica setup - 256 GiB of storage and 59 virtual CPUs each. - - - From 9f833146e3055f1047ad7d29fd6a560b2f212886 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 10 Apr 2025 10:49:19 +0200 Subject: [PATCH 3/4] Update amazon-reviews.md --- .../example-datasets/amazon-reviews.md | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/docs/getting-started/example-datasets/amazon-reviews.md b/docs/getting-started/example-datasets/amazon-reviews.md index f9fbf3889a0..2953a566919 100644 --- a/docs/getting-started/example-datasets/amazon-reviews.md +++ b/docs/getting-started/example-datasets/amazon-reviews.md @@ -8,7 +8,8 @@ title: 'Amazon Customer Review' This dataset contains over 150M customer reviews of Amazon products. The data is in snappy-compressed Parquet files in AWS S3 that total 49GB in size (compressed). Let's walk through the steps to insert it into ClickHouse. :::note -The queries below were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud). +The queries below were executed on a **Production** instance of ClickHouse Cloud. For more information see +["Playground specifications"](/getting-started/playground#specifications). ::: ## Loading the dataset {#loading-the-dataset} @@ -86,21 +87,26 @@ CREATE DATABASE amazon CREATE TABLE amazon.amazon_reviews ( - review_date Date, - marketplace LowCardinality(String), - customer_id UInt64, - review_id String, - product_id String, - product_parent UInt64, - product_title String, - product_category LowCardinality(String), - star_rating UInt8, - helpful_votes UInt32, - total_votes UInt32, - vine Bool, - verified_purchase Bool, - review_headline String, - review_body String + `review_date` Date, + `marketplace` LowCardinality(String), + `customer_id` UInt64, + `review_id` String, + `product_id` String, + `product_parent` UInt64, + `product_title` String, + `product_category` LowCardinality(String), + `star_rating` UInt8, + `helpful_votes` UInt32, + `total_votes` UInt32, + `vine` Bool, + `verified_purchase` Bool, + `review_headline` String, + `review_body` String, + PROJECTION helpful_votes + ( + SELECT * + ORDER BY helpful_votes + ) ) ENGINE = MergeTree ORDER BY (review_date, product_category) @@ -146,7 +152,7 @@ The original data was about 70G, but compressed in ClickHouse it takes up about ## Example queries {#example-queries} -7. Let's run some queries...here are the top 10 most-helpful reviews in the dataset: +7. Let's run some queries. Here are the top 10 most-helpful reviews in the dataset: ```sql runnable SELECT @@ -157,7 +163,9 @@ ORDER BY helpful_votes DESC LIMIT 10 ``` -Notice the query has to process all 151M rows in less than a second! +:::note +This query is using a projection to speed up performance. +::: 8. Here are the top 10 products in Amazon with the most reviews: @@ -214,7 +222,7 @@ ORDER BY count DESC LIMIT 50; ``` -The query only takes 4 seconds - which is impressive - and the results are a fun read: +Notice the query time for such a large amount of data. The results are also a fun read! 12. We can run the same query again, except this time we search for **awesome** in the reviews: From eb9784054911508a6c166720b787693a4612ef95 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 10 Apr 2025 10:57:07 +0200 Subject: [PATCH 4/4] Fix edit page for example datasets --- docusaurus.config.en.js | 1 - 1 file changed, 1 deletion(-) diff --git a/docusaurus.config.en.js b/docusaurus.config.en.js index f48eb936d8b..37d526ec4bf 100644 --- a/docusaurus.config.en.js +++ b/docusaurus.config.en.js @@ -138,7 +138,6 @@ const config = { if ( docPath.includes("development") || docPath.includes("engines") || - docPath.includes("getting-started") || docPath.includes("interfaces") || docPath.includes("operations") || docPath.includes("sql-reference")