pingcap · shiyuhang0 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/dumpling/export/BUILD.bazel b/dumpling/export/BUILD.bazel
@@ -21,6 +21,7 @@ go_library(
         "task.go",
         "util.go",
         "writer.go",
+        "writer_parquet.go",
         "writer_util.go",
     ],
     importpath = "github.com/pingcap/tidb/dumpling/export",
@@ -65,6 +66,13 @@ go_library(
         "@com_github_tikv_pd_client//:client",
         "@com_github_tikv_pd_client//http",
         "@com_github_tikv_pd_client//pkg/caller",
+        "@com_github_xitongsys_parquet_go//layout",
+        "@com_github_xitongsys_parquet_go//marshal",
+        "@com_github_xitongsys_parquet_go//parquet",
+        "@com_github_xitongsys_parquet_go//schema",
+        "@com_github_xitongsys_parquet_go//types",
+        "@com_github_xitongsys_parquet_go//writer",
+        "@com_github_xitongsys_parquet_go_source//buffer",
         "@io_etcd_go_etcd_client_v3//:client",
         "@org_golang_x_sync//errgroup",
         "@org_uber_go_atomic//:atomic",
@@ -91,6 +99,7 @@ go_test(
         "status_test.go",
         "util_for_test.go",
         "util_test.go",
+        "writer_parquet_test.go",
         "writer_serial_test.go",
         "writer_test.go",
     ],
@@ -118,6 +127,9 @@ go_test(
         "@com_github_prometheus_client_golang//prometheus/collectors",
         "@com_github_spf13_pflag//:pflag",
         "@com_github_stretchr_testify//require",
+        "@com_github_xitongsys_parquet_go//reader",
+        "@com_github_xitongsys_parquet_go//types",
+        "@com_github_xitongsys_parquet_go_source//local",
         "@com_github_tikv_pd_client//:client",
         "@com_github_tikv_pd_client//clients/gc",
         "@org_golang_x_sync//errgroup",

diff --git a/dumpling/export/config.go b/dumpling/export/config.go
@@ -83,6 +83,9 @@ const (
 	flagClusterSSLCA             = "cluster-ssl-ca"
 	flagClusterSSLCert           = "cluster-ssl-cert"
 	flagClusterSSLKey            = "cluster-ssl-key"
+	flagParquetCompress          = "parquet-compress"
+	flagParquetPageSize          = "parquet-page-size"
+	flagParquetRowGroupSize      = "parquet-row-group-size"
 
 	// FlagHelp represents the help flag
 	FlagHelp = "help"
@@ -203,11 +206,27 @@ type Config struct {
 	PDAddr string
 	// ClusterSSLCA/ClusterSSLCert/ClusterSSLKey override Security.* when connecting
 	// to PD endpoints for GC control.
-	ClusterSSLCA   string
-	ClusterSSLCert string
-	ClusterSSLKey  string
+	ClusterSSLCA        string
+	ClusterSSLCert      string
+	ClusterSSLKey       string
+	ParquetCompressType ParquetCompressType
+	ParquetPageSize     int64
+	ParquetRowGroupSize int64
 }
 
+type ParquetCompressType string
+
+const (
+	// NoCompression won't compress given bytes.
+	NoCompression ParquetCompressType = "no-compression"
+	// Gzip will compress given bytes in gzip format.
+	Gzip ParquetCompressType = "gz"
+	// Snappy will compress given bytes in snappy format.
+	Snappy ParquetCompressType = "snappy"
+	// Zstd will compress given bytes in zstd format.
+	Zstd ParquetCompressType = "zst"
+)
+
 // ServerInfoUnknown is the unknown database type to dumpling
 var ServerInfoUnknown = version.ServerInfo{
 	ServerType:    version.ServerTypeUnknown,
@@ -352,7 +371,7 @@ func (*Config) DefineFlags(flags *pflag.FlagSet) {
 		"If not specified, dumpling will dump table without inner-concurrency which could be relatively slow. default unlimited")
 	flags.String(flagWhere, "", "Dump only selected records")
 	flags.Bool(flagEscapeBackslash, true, "use backslash to escape special characters")
-	flags.String(flagFiletype, "", "The type of export file (sql/csv)")
+	flags.String(flagFiletype, "", "The type of export file (sql/csv/parquet)")
 	flags.Bool(flagNoHeader, false, "whether not to dump CSV table header")
 	flags.BoolP(flagNoSchemas, "m", false, "Do not dump table schemas with the data")
 	flags.BoolP(flagNoData, "d", false, "Do not dump table data")
@@ -384,6 +403,9 @@ func (*Config) DefineFlags(flags *pflag.FlagSet) {
 	flags.String(flagClusterSSLCA, "", "CA certificate path for TLS connections to PD endpoints used by GC control; if empty, reuse --ca")
 	flags.String(flagClusterSSLCert, "", "Client certificate path for TLS connections to PD endpoints used by GC control; if empty, reuse --cert")
 	flags.String(flagClusterSSLKey, "", "Client private key path for TLS connections to PD endpoints used by GC control; if empty, reuse --key")
+	flags.String(flagParquetCompress, "snappy", "Compress algorithm for parquet file, support 'no-compression', 'snappy', 'gzip', 'zstd'")
+	flags.Int64(flagParquetPageSize, 1024*1024, "Parquet page size in bytes")
+	flags.Int64(flagParquetRowGroupSize, 16*1024*1024, "Parquet row group size in bytes")
 }
 
 // ParseFromFlags parses dumpling's export.Config from flags
@@ -625,6 +647,20 @@ func (conf *Config) ParseFromFlags(flags *pflag.FlagSet) error {
 		return errors.Errorf("%s is only supported when dumping whole table to csv, not compatible with %s", flagCsvOutputDialect, conf.FileType)
 	}
 	conf.CsvOutputDialect, err = ParseOutputDialect(dialect)
+
+	parquetCompressType, err := flags.GetString(flagParquetCompress)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	conf.ParquetCompressType, err = ParseParquetCompressType(parquetCompressType)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	conf.ParquetPageSize, err = flags.GetInt64(flagParquetPageSize)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	conf.ParquetRowGroupSize, err = flags.GetInt64(flagParquetRowGroupSize)
 	if err != nil {
 		return errors.Trace(err)
 	}
@@ -818,6 +854,10 @@ func adjustFileFormat(conf *Config) error {
 			return errors.Errorf("unsupported config.FileType '%s' when we specify --sql, please unset --filetype or set it to 'csv'", conf.FileType)
 		}
 	case FileFormatCSVString:
+	case FileFormatParquetString:
+		if conf.CompressType != storage.NoCompression {
+			return errors.Errorf("parquet does not support --compress, please unset it or use --parquet-compress instead")
+		}
-	case FileFormatParquetString:
-		if conf.CompressType != storage.NoCompression {
-			return errors.Errorf("parquet does not support --compress, please unset it or use --parquet-compress instead")
-		}
+	case FileFormatParquetString:
+		if conf.CompressType != compressedio.NoCompression {
+			return errors.Errorf("parquet does not support --compress, please unset it or use --parquet-compress instead")
+		}
-	case FileFormatParquetString:
-		if conf.CompressType != storage.NoCompression {
-			return errors.Errorf("parquet does not support --compress, please unset it or use --parquet-compress instead")
-		}
+	case FileFormatParquetString:
+		if conf.CompressType != compressedio.NoCompression {
+			return errors.Errorf("parquet does not support --compress, please unset it or use --parquet-compress instead")
+		}
 	default:
 		return errors.Errorf("unknown config.FileType '%s'", conf.FileType)
 	}

diff --git a/dumpling/export/ir.go b/dumpling/export/ir.go
@@ -35,6 +35,15 @@ type TableMeta interface {
 	ShowCreateView() string
 	AvgRowLength() uint64
 	HasImplicitRowID() bool
+	ColumnInfos() []*ColumnInfo
+}
+
+type ColumnInfo struct {
+	Name      string
+	Type      string
+	Nullable  bool
+	Precision int64
+	Scale     int64
 }
 
 // SQLRowIter is the iterator on a collection of sql.Row.
@@ -57,6 +66,7 @@ type RowReceiverStringer interface {
 type Stringer interface {
 	WriteToBuffer(*bytes.Buffer, bool)
 	WriteToBufferInCsv(*bytes.Buffer, bool, *csvOption)
+	GetRawBytes() []sql.RawBytes
 }
 
 // RowReceiver is an interface which represents sql types that support bind address for *sql.Rows

diff --git a/dumpling/export/ir_impl.go b/dumpling/export/ir_impl.go
@@ -267,6 +267,25 @@ type tableMeta struct {
 	hasImplicitRowID bool
 }
 
+func (tm *tableMeta) ColumnInfos() []*ColumnInfo {
+	columnInfos := make([]*ColumnInfo, 0, len(tm.colTypes))
+	for _, ct := range tm.colTypes {
+		nullable, _ := ct.Nullable()
+		precision, scale, ok := ct.DecimalSize()
+		if !ok {
+			precision, scale = 0, 0
+		}
+		columnInfos = append(columnInfos, &ColumnInfo{
+			Name:      ct.Name(),
+			Type:      ct.DatabaseTypeName(),
+			Nullable:  nullable,
+			Precision: precision,
+			Scale:     scale,
+		})
+	}
+	return columnInfos
+}
+
 func (tm *tableMeta) ColumnTypes() []string {
 	colTypes := make([]string, len(tm.colTypes))
 	for i, ct := range tm.colTypes {

diff --git a/dumpling/export/sql_type.go b/dumpling/export/sql_type.go
@@ -230,6 +230,15 @@ func (r *RowReceiverArr) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bo
 	}
 }
 
+func (r RowReceiverArr) GetRawBytes() []sql.RawBytes {
+	rawBytes := make([]sql.RawBytes, len(r.receivers))
+	for i, receiver := range r.receivers {
+		receiver.GetRawBytes()
+		rawBytes[i] = receiver.GetRawBytes()[0]
+	}
+	return rawBytes
+}
-func (r RowReceiverArr) GetRawBytes() []sql.RawBytes {
-	rawBytes := make([]sql.RawBytes, len(r.receivers))
-	for i, receiver := range r.receivers {
-		receiver.GetRawBytes()
-		rawBytes[i] = receiver.GetRawBytes()[0]
-	}
-	return rawBytes
-}
+// GetRawBytes implements Stringer.GetRawBytes by collecting the first raw-bytes
+// element from each underlying receiver.
+func (r *RowReceiverArr) GetRawBytes() []sql.RawBytes {
+	rawBytes := make([]sql.RawBytes, len(r.receivers))
+	for i, receiver := range r.receivers {
+		rawBytes[i] = receiver.GetRawBytes()[0]
+	}
+	return rawBytes
+}
-func (r RowReceiverArr) GetRawBytes() []sql.RawBytes {
-	rawBytes := make([]sql.RawBytes, len(r.receivers))
-	for i, receiver := range r.receivers {
-		receiver.GetRawBytes()
-		rawBytes[i] = receiver.GetRawBytes()[0]
-	}
-	return rawBytes
-}
+// GetRawBytes implements Stringer.GetRawBytes by collecting the first raw-bytes
+// element from each underlying receiver.
+func (r *RowReceiverArr) GetRawBytes() []sql.RawBytes {
+	rawBytes := make([]sql.RawBytes, len(r.receivers))
+	for i, receiver := range r.receivers {
+		rawBytes[i] = receiver.GetRawBytes()[0]
+	}
+	return rawBytes
+}
+
 // SQLTypeNumber implements RowReceiverStringer which represents numeric type columns in database
 type SQLTypeNumber struct {
 	SQLTypeString
@@ -253,6 +262,10 @@ func (s SQLTypeNumber) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, opt *csvOpti
 	}
 }
 
+func (s *SQLTypeNumber) GetRawBytes() []sql.RawBytes {
+	return []sql.RawBytes{s.RawBytes}
+}
+
 // SQLTypeString implements RowReceiverStringer which represents string type columns in database
 type SQLTypeString struct {
 	sql.RawBytes
@@ -285,6 +298,10 @@ func (s *SQLTypeString) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash boo
 	}
 }
 
+func (s *SQLTypeString) GetRawBytes() []sql.RawBytes {
+	return []sql.RawBytes{s.RawBytes}
+}
+
 // SQLTypeBytes implements RowReceiverStringer which represents bytes type columns in database
 type SQLTypeBytes struct {
 	sql.RawBytes
@@ -321,3 +338,7 @@ func (s *SQLTypeBytes) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool
 		bf.WriteString(opt.nullValue)
 	}
 }
+
+func (s *SQLTypeBytes) GetRawBytes() []sql.RawBytes {
+	return []sql.RawBytes{s.RawBytes}
+}
diff --git a/dumpling/export/util_for_test.go b/dumpling/export/util_for_test.go
@@ -160,6 +160,7 @@ type mockTableIR struct {
 	hasImplicitRowID bool
 	rowErr           error
 	rows             *sql.Rows
+	columnInfos      []*ColumnInfo
 	SQLRowIter
 }
 
@@ -256,6 +257,10 @@ func (m *mockTableIR) EscapeBackSlash() bool {
 	return m.escapeBackSlash
 }
 
+func (m *mockTableIR) ColumnInfos() []*ColumnInfo {
+	return m.columnInfos
+}
+
 func newMockTableIR(databaseName, tableName string, data [][]driver.Value, specialComments, colTypes []string) *mockTableIR {
 	return &mockTableIR{
 		dbName:        databaseName,
@@ -268,3 +273,21 @@ func newMockTableIR(databaseName, tableName string, data [][]driver.Value, speci
 		SQLRowIter:    nil,
 	}
 }
+
+func newMockTableIRWithColumnInfo(databaseName, tableName string, data [][]driver.Value, specialComments []string, infos []*ColumnInfo) *mockTableIR {
+	colTypes := make([]string, len(infos))
+	for i, info := range infos {
+		colTypes[i] = info.Type
+	}
+	return &mockTableIR{
+		dbName:        databaseName,
+		tblName:       tableName,
+		data:          data,
+		specCmt:       specialComments,
+		selectedField: "*",
+		selectedLen:   len(infos),
+		colTypes:      colTypes,
+		SQLRowIter:    nil,
+		columnInfos:   infos,
+	}
+}
diff --git a/dumpling/export/writer.go b/dumpling/export/writer.go
@@ -58,6 +58,8 @@ func NewWriter(
 		sw.fileFmt = FileFormatSQLText
 	case FileFormatCSVString:
 		sw.fileFmt = FileFormatCSV
+	case FileFormatParquetString:
+		sw.fileFmt = FileFormatParquet
 	}
 	return sw
 }
@@ -232,7 +234,11 @@ func (w *Writer) WriteTableData(meta TableMeta, ir TableDataIR, currentChunk int
 func (w *Writer) tryToWriteTableData(tctx *tcontext.Context, meta TableMeta, ir TableDataIR, curChkIdx int) error {
 	conf, format := w.conf, w.fileFmt
 	namer := newOutputFileNamer(meta, curChkIdx, conf.Rows != UnspecifiedSize, conf.FileSize != UnspecifiedSize)
-	fileName, err := namer.NextName(conf.OutputFileTemplate, w.fileFmt.Extension())
+	fileFmtExtension := format.Extension()
+	if format == FileFormatParquet && conf.ParquetCompressType != NoCompression {
+		fileFmtExtension = fmt.Sprintf("%s.%s", conf.ParquetCompressType, fileFmtExtension)
+	}
+	fileName, err := namer.NextName(conf.OutputFileTemplate, fileFmtExtension)
 	if err != nil {
 		return err
 	}