docs: add PARALLELISM option for COPY DATABASE and CLI tools (#2219)

Copilot · WenyXu · web-flow · commit 8da53c035060 · 2025-11-18T15:58:30.000+08:00
Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: WenyXu &lt;32535939+WenyXu@users.noreply.github.com&gt;
diff --git a/docs/reference/command-lines/utilities/data.md b/docs/reference/command-lines/utilities/data.md
@@ -20,7 +20,8 @@ greptime cli data export [OPTIONS]
 | `--addr`                  | Yes      | -              | Server address to connect                                                                                                                                                                                                                                                                                                                                                                                                           |
 | `--output-dir`            | Yes      | -              | Directory to store exported data                                                                                                                                                                                                                                                                                                                                                                                                    |
 | `--database`              | No       | all databasses | Name of the database to export                                                                                                                                                                                                                                                                                                                                                                                                      |
-| `--export-jobs`, `-j`     | No       | 1              | Number of parallel export jobs(multiple databases can be exported in parallel)                                                                                                                                                                                                                                                                                                                                                      |
+| `--db-parallelism`, `-j`  | No       | 1              | Number of databases to export in parallel. For example, if there are 20 databases and `db-parallelism` is set to 4, then 4 databases will be exported concurrently. (alias: `--export-jobs`)                                                                                                                                                                                                                                        |
+| `--table-parallelism`     | No       | 4              | Number of tables to export in parallel within a single database. For example, if a database contains 30 tables and `table-parallelism` is set to 8, then 8 tables will be exported concurrently.                                                                                                                                                                                                                                    |
 | `--max-retry`             | No       | 3              | Maximum retry attempts per job                                                                                                                                                                                                                                                                                                                                                                                                      |
 | `--target`, `-t`          | No       | all            | Export target (schema/data/all)                                                                                                                                                                                                                                                                                                                                                                                                     |
 | `--start-time`            | No       | -              | Start of time range for data export                                                                                                                                                                                                                                                                                                                                                                                                 |
@@ -56,15 +57,15 @@ greptime cli data import [OPTIONS]
 ```
 
 ### Options
-| Option              | Required | Default       | Description                                                                     |
-| ------------------- | -------- | ------------- | ------------------------------------------------------------------------------- |
-| `--addr`            | Yes      | -             | Server address to connect                                                       |
-| `--input-dir`       | Yes      | -             | Directory containing backup data                                                |
-| `--database`        | No       | all databases | Name of the database to import                                                  |
-| `--import-jobs, -j` | No       | 1             | Number of parallel import jobs (multiple databases can be imported in parallel) |
-| `--max-retry`       | No       | 3             | Maximum retry attempts per job                                                  |
-| `--target, -t`      | No       | all           | Import target (schema/data/all)                                                 |
-| `--auth-basic`      | No       | -             | Use the `<username>:<password>` format                                          |
+| Option                   | Required | Default       | Description                                                                                                                                                                                  |
+| ------------------------ | -------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--addr`                 | Yes      | -             | Server address to connect                                                                                                                                                                    |
+| `--input-dir`            | Yes      | -             | Directory containing backup data                                                                                                                                                             |
+| `--database`             | No       | all databases | Name of the database to import                                                                                                                                                               |
+| `--db-parallelism`, `-j` | No       | 1             | Number of databases to import in parallel. For example, if there are 20 databases and `db-parallelism` is set to 4, then 4 databases will be imported concurrently. (alias: `--import-jobs`) |
+| `--max-retry`            | No       | 3             | Maximum retry attempts per job                                                                                                                                                               |
+| `--target, -t`           | No       | all           | Import target (schema/data/all)                                                                                                                                                              |
+| `--auth-basic`           | No       | -             | Use the `<username>:<password>` format                                                                                                                                                       |
 
 ### Import Targets
 - `schema`: Imports table schemas only
diff --git a/docs/reference/sql/copy.md b/docs/reference/sql/copy.md
@@ -188,7 +188,8 @@ COPY DATABASE <db_name>
   WITH (
     FORMAT = { 'CSV' | 'JSON' | 'PARQUET' },
     START_TIME = "<START TIMESTAMP>",
-    END_TIME = "<END TIMESTAMP>"
+    END_TIME = "<END TIMESTAMP>",
+    PARALLELISM = <number>
   ) 
   [CONNECTION(
     REGION = "<REGION NAME>",
@@ -203,6 +204,7 @@ COPY DATABASE <db_name>
 |---|---|---|
 | `FORMAT` | Export file format, available options: JSON, CSV, Parquet  | **Required** |
 | `START_TIME`/`END_TIME`| The time range within which data should be exported. `START_TIME` is inclusive and `END_TIME` is exclusive. | Optional |
+| `PARALLELISM` | Number of tables to process in parallel. For example, if a database contains 30 tables and `PARALLELISM` is set to 8, then 8 tables will be processed concurrently. Defaults to the total number of CPU cores, with a minimum value of 1. | Optional |
 
 > - When copying databases, `<PATH>` must end with `/`.
 > - `CONNECTION` parameters can also be used to copying databases to/from object storage services like AWS S3.
@@ -213,11 +215,17 @@ COPY DATABASE <db_name>
 -- Export all tables' data to /tmp/export/
 COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet');
 
+-- Export all table data using 4 parallel operations
+COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=4);
+
 -- Export all tables' data within time range 2022-04-11 08:00:00~2022-04-11 09:00:00 to /tmp/export/
 COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', START_TIME='2022-04-11 08:00:00', END_TIME='2022-04-11 09:00:00');
 
 -- Import files under /tmp/export/ directory to database named public.
 COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet');
+
+-- Import files using 8 parallel operations
+COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=8);
 ```
 
 ## Special reminder for Windows platforms
diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/reference/command-lines/utilities/data.md b/i18n/zh/docusaurus-plugin-content-docs/current/reference/command-lines/utilities/data.md
@@ -20,7 +20,8 @@ greptime cli data export [OPTIONS]
 | `--addr`                  | 是       | -          | 要连接的 GreptimeDB 数据库地址                                                                                                                                                                                                                                             |
 | `--output-dir`            | 是       | -          | 存储导出数据的目录                                                                                                                                                                                                                                                         |
 | `--database`              | 否       | 所有数据库 | 要导出的数据库名称                                                                                                                                                                                                                                                         |
-| `--export-jobs, -j`       | 否       | 1          | 并行导出任务数量（多个数据库可以并行导出）                                                                                                                                                                                                                                 |
+| `--db-parallelism, -j`    | 否       | 1          | 并行导出的数据库数量。例如，有 20 个数据库且 `db-parallelism` 设置为 4 时，将同时导出 4 个数据库。（别名：`--export-jobs`）                                                                                                                                                |
+| `--table-parallelism`     | 否       | 4          | 单个数据库内并行导出的表数量。例如，数据库包含 30 个表且 `table-parallelism` 设置为 8 时，将同时导出 8 个表。                                                                                                                                                              |
 | `--max-retry`             | 否       | 3          | 每个任务的最大重试次数                                                                                                                                                                                                                                                     |
 | `--target, -t`            | 否       | all        | 导出目标（schema/data/all）                                                                                                                                                                                                                                                |
 | `--start-time`            | 否       | -          | 数据导出的开始时间范围                                                                                                                                                                                                                                                     |
@@ -56,15 +57,15 @@ greptime cli data import [OPTIONS]
 ```
 
 ### 选项
-| 选项                | 是否必需 | 默认值     | 描述                                       |
-| ------------------- | -------- | ---------- | ------------------------------------------ |
-| `--addr`            | 是       | -          | 要连接的 GreptimeDB 数据库地址             |
-| `--input-dir`       | 是       | -          | 包含备份数据的目录                         |
-| `--database`        | 否       | 所有数据库 | 要导入的数据库名称                         |
-| `--import-jobs, -j` | 否       | 1          | 并行导入任务数量（多个数据库可以并行导入） |
-| `--max-retry`       | 否       | 3          | 每个任务的最大重试次数                     |
-| `--target, -t`      | 否       | all        | 导入目标（schema/data/all）                |
-| `--auth-basic`      | 否       | -          | 使用 `<username>:<password>` 格式          |
+| 选项                     | 是否必需 | 默认值     | 描述                                                                                                                                    |
+| ------------------------ | -------- | ---------- | --------------------------------------------------------------------------------------------------------------------------------------- |
+| `--addr`                 | 是       | -          | 要连接的 GreptimeDB 数据库地址                                                                                                          |
+| `--input-dir`            | 是       | -          | 包含备份数据的目录                                                                                                                      |
+| `--database`             | 否       | 所有数据库 | 要导入的数据库名称                                                                                                                      |
+| `--db-parallelism, -j`   | 否       | 1          | 并行导入的数据库数量。例如，有 20 个数据库且 `db-parallelism` 设置为 4 时，将同时导入 4 个数据库。（别名：`--import-jobs`）             |
+| `--max-retry`            | 否       | 3          | 每个任务的最大重试次数                                                                                                                  |
+| `--target, -t`           | 否       | all        | 导入目标（schema/data/all）                                                                                                             |
+| `--auth-basic`           | 否       | -          | 使用 `<username>:<password>` 格式                                                                                                       |
 
 ### 导入目标
 - `schema`: 仅导入表结构
diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/reference/sql/copy.md b/i18n/zh/docusaurus-plugin-content-docs/current/reference/sql/copy.md
@@ -181,7 +181,8 @@ COPY DATABASE <db_name>
   WITH (
     FORMAT =  { 'CSV' | 'JSON' | 'PARQUET' } 
     START_TIME = "<START TIMESTAMP>",
-    END_TIME = "<END TIMESTAMP>"
+    END_TIME = "<END TIMESTAMP>",
+    PARALLELISM = <number>
   ) 
   [CONNECTION(
     REGION = "<REGION NAME>",
@@ -196,6 +197,7 @@ COPY DATABASE <db_name>
 |---|---|---|
 | `FORMAT` | 目标文件格式，例如 JSON, CSV, Parquet  | **是** |
 | `START_TIME`/`END_TIME`| 需要导出数据的时间范围，时间范围为左闭右开 | 可选 |
+| `PARALLELISM` | 并行处理的表数量。例如，数据库包含 30 个表且 `PARALLELISM` 设置为 8 时，将同时处理 8 个表。默认值为 CPU 核心总数，最小值为 1。 | 可选 |
 
 > - 当导入/导出表时，`<PATH>` 参数必须以 `/` 结尾；
 > - COPY DATABASE 同样可以通过 `CONNECTION` 参数将数据导入/导出的路径指向 S3 等对象存储
@@ -207,11 +209,17 @@ COPY DATABASE <db_name>
 -- 将 public 数据库中所有数据导出到 /tmp/export/ 目录下
 COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet');
 
+-- 使用 4 个并行操作导出所有表数据
+COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=4);
+
 -- 将 public 数据库中时间范围在 2022-04-11 08:00:00 到 2022-04-11 09:00:00 之间的数据导出到 /tmp/export/ 目录下
 COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', START_TIME='2022-04-11 08:00:00', END_TIME='2022-04-11 09:00:00');
 
 -- 从 /tmp/export/ 目录恢复 public 数据库的数据
 COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet');
+
+-- 使用 8 个并行操作导入数据
+COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=8);
 ```
 
 ## Windows 平台上的路径