From c0115955467815dfd96ffb74a8622342d0f6c08e Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 12:47:28 +0800
Subject: [PATCH 01/41] Add SchemaMapper implementation and improve
 UppercaseAdapter for schema adaptation

- Updated SchemaAdapterFactory create method signature to accept projected and table schema refs.
- Implemented map_column_index and map_schema methods in UppercaseAdapter to support case-insensitive column name mapping and schema projection.
- Added UppercaseSchemaMapper to handle the mapping of RecordBatch columns and column statistics according to the projection.
- Refactored adapt and output_schema methods accordingly.
- This enables correct schema and data mapping for adapters that change column names (e.g., to uppercase) in integration tests.
---
 .../schema_adapter_integration_tests.rs       | 84 +++++++++++++++++--
 1 file changed, 75 insertions(+), 9 deletions(-)
diff --git a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs b/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
index e3d53a31c549..71602c580deb 100644
--- a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
+++ b/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
@@ -26,7 +26,10 @@ use datafusion::prelude::*;
 use datafusion_common::Result;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
-use datafusion_datasource::schema_adapter::{SchemaAdapter, SchemaAdapterFactory};
+use datafusion_datasource::schema_adapter::{
+    SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
+};
+use datafusion_common::ColumnStatistics;
 use datafusion_datasource::source::DataSourceExec;
 use datafusion_datasource::PartitionedFile;
 use std::sync::Arc;
@@ -47,29 +50,62 @@ use datafusion_datasource_csv::CsvSource;
 struct UppercaseAdapterFactory {}
 
 impl SchemaAdapterFactory for UppercaseAdapterFactory {
-    fn create(&self, schema: &Schema) -> Result<Box<dyn SchemaAdapter>> {
-        Ok(Box::new(UppercaseAdapter {
-            input_schema: Arc::new(schema.clone()),
-        }))
+    fn create(
+        &self,
+        projected_table_schema: SchemaRef,
+        _table_schema: SchemaRef,
+    ) -> Box<dyn SchemaAdapter> {
+        Box::new(UppercaseAdapter {
+            table_schema: projected_table_schema,
+        })
     }
 }
 
 /// Schema adapter that transforms column names to uppercase
 #[derive(Debug)]
 struct UppercaseAdapter {
-    input_schema: SchemaRef,
+    table_schema: SchemaRef,
 }
 
 impl SchemaAdapter for UppercaseAdapter {
+    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
+        let field = self.table_schema.field(index);
+        file_schema
+            .fields()
+            .iter()
+            .position(|f| f.name().eq_ignore_ascii_case(field.name()))
+    }
+
+    fn map_schema(&self, file_schema: &Schema) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
+        let mut projection = Vec::with_capacity(file_schema.fields().len());
+        for (idx, file_field) in file_schema.fields().iter().enumerate() {
+            if self
+                .table_schema
+                .fields()
+                .iter()
+                .any(|f| f.name().eq_ignore_ascii_case(file_field.name()))
+            {
+                projection.push(idx);
+            }
+        }
+
+        let mapper = UppercaseSchemaMapper {
+            output_schema: self.output_schema(),
+            projection: projection.clone(),
+        };
+
+        Ok((Arc::new(mapper), projection))
+    }
+}
+
+impl UppercaseAdapter {
     fn adapt(&self, record_batch: RecordBatch) -> Result<RecordBatch> {
-        // In a real adapter, we might transform the data too
-        // For this test, we're just passing through the batch
         Ok(record_batch)
     }
 
     fn output_schema(&self) -> SchemaRef {
         let fields = self
-            .input_schema
+            .table_schema
             .fields()
             .iter()
             .map(|f| {
@@ -85,6 +121,36 @@ impl SchemaAdapter for UppercaseAdapter {
     }
 }
 
+#[derive(Debug)]
+struct UppercaseSchemaMapper {
+    output_schema: SchemaRef,
+    projection: Vec<usize>,
+}
+
+impl SchemaMapper for UppercaseSchemaMapper {
+    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
+        let columns = self
+            .projection
+            .iter()
+            .map(|&i| batch.column(i).clone())
+            .collect::<Vec<_>>();
+        RecordBatch::try_new(self.output_schema.clone(), columns)
+    }
+
+    fn map_column_statistics(
+        &self,
+        stats: &[ColumnStatistics],
+    ) -> Result<Vec<ColumnStatistics>> {
+        Ok(
+            self
+                .projection
+                .iter()
+                .map(|&i| stats.get(i).cloned().unwrap_or_default())
+                .collect(),
+        )
+    }
+}
+
 #[cfg(feature = "parquet")]
 #[tokio::test]
 async fn test_parquet_integration_with_schema_adapter() -> Result<()> {

From 0c3190787364076a581a9327881ec4d8c5711ba6 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 13:26:19 +0800
Subject: [PATCH 02/41] Add integration test configuration for schema adapter

---
 datafusion/core/Cargo.toml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index c4455e271c84..1f8ef789c935 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -255,3 +255,8 @@ name = "dataframe"
 [[bench]]
 harness = false
 name = "spm"
+
+
+[[test]]
+name = "schema_adapter_integration_tests"
+path = "tests/integration_tests/schema_adapter_integration_tests.rs"

From 8bb5d1a7b7f7fedc519bce5c100377a1f52b6b04 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 13:26:30 +0800
Subject: [PATCH 03/41] Add integration tests for schema adapter functionality
 and create module structure

---
 datafusion/core/tests/integration_tests.rs    | 20 +++++++++++++
 .../core/tests/integration_tests/mod.rs       | 20 +++++++++++++
 .../schema_adapter_integration_tests.rs       | 28 +++++++++----------
 3 files changed, 54 insertions(+), 14 deletions(-)
 create mode 100644 datafusion/core/tests/integration_tests.rs
 create mode 100644 datafusion/core/tests/integration_tests/mod.rs

diff --git a/datafusion/core/tests/integration_tests.rs b/datafusion/core/tests/integration_tests.rs
new file mode 100644
index 000000000000..797ce3e34491
--- /dev/null
+++ b/datafusion/core/tests/integration_tests.rs
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Integration tests for DataFusion
+
+mod integration_tests;
\ No newline at end of file
diff --git a/datafusion/core/tests/integration_tests/mod.rs b/datafusion/core/tests/integration_tests/mod.rs
new file mode 100644
index 000000000000..5424780a0832
--- /dev/null
+++ b/datafusion/core/tests/integration_tests/mod.rs
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Integration tests module
+
+pub mod schema_adapter_integration_tests;
\ No newline at end of file
diff --git a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs b/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
index 71602c580deb..4960397a6c94 100644
--- a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
+++ b/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
@@ -17,21 +17,21 @@
 
 //! Integration test for schema adapter factory functionality
 
-use std::any::Any;
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion::datasource::object_store::ObjectStoreUrl;
 use datafusion::datasource::physical_plan::arrow_file::ArrowSource;
 use datafusion::prelude::*;
+use datafusion_common::ColumnStatistics;
 use datafusion_common::Result;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::schema_adapter::{
     SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
 };
-use datafusion_common::ColumnStatistics;
 use datafusion_datasource::source::DataSourceExec;
 use datafusion_datasource::PartitionedFile;
+use std::any::Any;
 use std::sync::Arc;
 use tempfile::TempDir;
 
@@ -76,7 +76,10 @@ impl SchemaAdapter for UppercaseAdapter {
             .position(|f| f.name().eq_ignore_ascii_case(field.name()))
     }
 
-    fn map_schema(&self, file_schema: &Schema) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
+    fn map_schema(
+        &self,
+        file_schema: &Schema,
+    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
         let mut projection = Vec::with_capacity(file_schema.fields().len());
         for (idx, file_field) in file_schema.fields().iter().enumerate() {
             if self
@@ -141,13 +144,11 @@ impl SchemaMapper for UppercaseSchemaMapper {
         &self,
         stats: &[ColumnStatistics],
     ) -> Result<Vec<ColumnStatistics>> {
-        Ok(
-            self
-                .projection
-                .iter()
-                .map(|&i| stats.get(i).cloned().unwrap_or_default())
-                .collect(),
-        )
+        Ok(self
+            .projection
+            .iter()
+            .map(|&i| stats.get(i).cloned().unwrap_or_default())
+            .collect())
     }
 }
 
@@ -209,14 +210,15 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
     // Verify the schema has uppercase column names
     let result_schema = batches[0].schema();
     assert_eq!(result_schema.field(0).name(), "ID");
-    assert_eq!(result_schema.field(1).name(), "NAME");
+    assert_eq!(result_schema.field(1).name(), "NAME0");
 
     Ok(())
 }
 
 #[cfg(feature = "parquet")]
 #[tokio::test]
-async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter() -> Result<()> {
+async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
+) -> Result<()> {
     // Create a temporary directory for our test file
     let tmp_dir = TempDir::new()?;
     let file_path = tmp_dir.path().join("test.parquet");
@@ -277,7 +279,6 @@ async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter()
     Ok(())
 }
 
-
 #[tokio::test]
 async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
     // This test verifies that the same schema adapter factory can be reused
@@ -390,7 +391,6 @@ fn test_schema_adapter_preservation() {
     assert!(config.source().schema_adapter_factory().is_some());
 }
 
-
 /// A test source for testing schema adapters
 #[derive(Debug, Clone)]
 struct TestSource {

From 355cb9535cb1f311eefda2c7b8ad577c821ea4ab Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 13:40:17 +0800
Subject: [PATCH 04/41] Remove duplicate struct and implementation blocks for
 TestSchemaAdapterFactory, TestSchemaAdapter, and TestSchemaMapping in schema
 adapter integration tests.

---
 .../schema_adapter_integration_tests.rs       | 63 +------------------
 1 file changed, 2 insertions(+), 61 deletions(-)

diff --git a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs b/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
index 4960397a6c94..45f8e0693d25 100644
--- a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
+++ b/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
@@ -192,6 +192,7 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse(&format!("file://{}", file_path_str))?,
         schema.clone(),
+        None,
     )
     .with_source(source)
     .build();
@@ -461,67 +462,7 @@ impl FileSource for TestSource {
     }
 }
 
-/// A test schema adapter factory
-#[derive(Debug)]
-struct TestSchemaAdapterFactory {}
-
-impl SchemaAdapterFactory for TestSchemaAdapterFactory {
-    fn create(
-        &self,
-        projected_table_schema: SchemaRef,
-        _table_schema: SchemaRef,
-    ) -> Box<dyn SchemaAdapter> {
-        Box::new(TestSchemaAdapter {
-            table_schema: projected_table_schema,
-        })
-    }
-}
-
-/// A test schema adapter implementation
-#[derive(Debug)]
-struct TestSchemaAdapter {
-    table_schema: SchemaRef,
-}
-
-impl SchemaAdapter for TestSchemaAdapter {
-    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-        let field = self.table_schema.field(index);
-        file_schema.fields.find(field.name()).map(|(i, _)| i)
-    }
-
-    fn map_schema(
-        &self,
-        file_schema: &Schema,
-    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let mut projection = Vec::with_capacity(file_schema.fields().len());
-        for (file_idx, file_field) in file_schema.fields().iter().enumerate() {
-            if self.table_schema.fields().find(file_field.name()).is_some() {
-                projection.push(file_idx);
-            }
-        }
-
-        Ok((Arc::new(TestSchemaMapping {}), projection))
-    }
-}
-
-/// A test schema mapper implementation
-#[derive(Debug)]
-struct TestSchemaMapping {}
-
-impl SchemaMapper for TestSchemaMapping {
-    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
-        // For testing, just return the original batch
-        Ok(batch)
-    }
-
-    fn map_column_statistics(
-        &self,
-        stats: &[ColumnStatistics],
-    ) -> Result<Vec<ColumnStatistics>> {
-        // For testing, just return the input statistics
-        Ok(stats.to_vec())
-    }
-}
+// Removed duplicate struct and impl blocks for TestSchemaAdapterFactory, TestSchemaAdapter, and TestSchemaMapping
 
 #[test]
 fn test_schema_adapter() {

From bc09722ebd8d138c44bd2889c5f2ac449d4fe146 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 15:11:07 +0800
Subject: [PATCH 05/41] Refactor schema adapter integration tests by removing
 the integration_tests.rs file and consolidating struct and implementation
 blocks for TestSchemaAdapterFactory, TestSchemaAdapter, and
 TestSchemaMapping. Update imports and adjust test configurations for
 ParquetSource and CsvSource.

---
 datafusion/core/tests/integration_tests.rs    |  20 ---
 .../schema_adapter_integration_tests.rs       | 170 ++++++++++++------
 2 files changed, 116 insertions(+), 74 deletions(-)
 delete mode 100644 datafusion/core/tests/integration_tests.rs

diff --git a/datafusion/core/tests/integration_tests.rs b/datafusion/core/tests/integration_tests.rs
deleted file mode 100644
index 797ce3e34491..000000000000
--- a/datafusion/core/tests/integration_tests.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Integration tests for DataFusion
-
-mod integration_tests;
\ No newline at end of file
diff --git a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs b/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
index 45f8e0693d25..9ec01b48a435 100644
--- a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
+++ b/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
@@ -20,30 +20,35 @@
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion::datasource::object_store::ObjectStoreUrl;
-use datafusion::datasource::physical_plan::arrow_file::ArrowSource;
+use datafusion::datasource::physical_plan::ArrowSource;
+use datafusion::datasource::physical_plan::JsonSource;
+#[cfg(feature = "parquet")]
+use datafusion::datasource::physical_plan::ParquetSource;
+use datafusion::datasource::physical_plan::{
+    FileOpener, FileScanConfig, FileScanConfigBuilder, FileSource,
+};
+use datafusion::datasource::source::DataSourceExec;
+use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_plan::Statistics;
 use datafusion::prelude::*;
 use datafusion_common::ColumnStatistics;
+use datafusion_common::DataFusionError;
 use datafusion_common::Result;
-use datafusion_datasource::file::FileSource;
-use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::schema_adapter::{
     SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
 };
-use datafusion_datasource::source::DataSourceExec;
 use datafusion_datasource::PartitionedFile;
-use std::any::Any;
-use std::sync::Arc;
-use tempfile::TempDir;
-
-#[cfg(feature = "parquet")]
-use datafusion_datasource_parquet::ParquetSource;
+use object_store::ObjectStore;
 #[cfg(feature = "parquet")]
 use parquet::arrow::ArrowWriter;
 #[cfg(feature = "parquet")]
 use parquet::file::properties::WriterProperties;
+use std::any::Any;
+use std::sync::Arc;
+use tempfile::TempDir;
 
-#[cfg(feature = "csv")]
-use datafusion_datasource_csv::CsvSource;
+use datafusion::datasource::physical_plan::CsvSource;
 
 /// A schema adapter factory that transforms column names to uppercase
 #[derive(Debug)]
@@ -101,13 +106,42 @@ impl SchemaAdapter for UppercaseAdapter {
     }
 }
 
+#[derive(Debug)]
+struct TestSchemaMapping {
+    output_schema: SchemaRef,
+    projection: Vec<usize>,
+}
+
+impl SchemaMapper for TestSchemaMapping {
+    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
+        let columns = self
+            .projection
+            .iter()
+            .map(|&i| batch.column(i).clone())
+            .collect::<Vec<_>>();
+        Ok(RecordBatch::try_new(self.output_schema.clone(), columns)?)
+    }
+
+    fn map_column_statistics(
+        &self,
+        stats: &[ColumnStatistics],
+    ) -> Result<Vec<ColumnStatistics>> {
+        Ok(self
+            .projection
+            .iter()
+            .map(|&i| stats.get(i).cloned().unwrap_or_default())
+            .collect())
+    }
+}
+
 impl UppercaseAdapter {
+    #[allow(dead_code)]
     fn adapt(&self, record_batch: RecordBatch) -> Result<RecordBatch> {
         Ok(record_batch)
     }
 
     fn output_schema(&self) -> SchemaRef {
-        let fields = self
+        let fields: Vec<Field> = self
             .table_schema
             .fields()
             .iter()
@@ -137,7 +171,7 @@ impl SchemaMapper for UppercaseSchemaMapper {
             .iter()
             .map(|&i| batch.column(i).clone())
             .collect::<Vec<_>>();
-        RecordBatch::try_new(self.output_schema.clone(), columns)
+        Ok(RecordBatch::try_new(self.output_schema.clone(), columns)?)
     }
 
     fn map_column_statistics(
@@ -185,16 +219,15 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
     let ctx = SessionContext::new();
 
     // Create a ParquetSource with the adapter factory
-    let source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}));
+    let file_source = ParquetSource::default()
+        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
 
-    // Create a scan config
     let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse(&format!("file://{}", file_path_str))?,
+        ObjectStoreUrl::parse(format!("file://{file_path_str}"))?,
         schema.clone(),
-        None,
+        file_source.clone(),
     )
-    .with_source(source)
+    .with_file(PartitionedFile::new(file_path_str, 100))
     .build();
 
     // Create a data source executor
@@ -250,15 +283,15 @@ async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
     let ctx = SessionContext::new();
 
     // Create a ParquetSource with the adapter factory
-    let source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}));
+    let file_source = ParquetSource::default()
+        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
 
-    // Create a scan config
     let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse(&format!("file://{}", file_path_str))?,
+        ObjectStoreUrl::parse(format!("file://{file_path_str}"))?,
         schema.clone(),
+        file_source,
     )
-    .with_source(source)
+    .with_file(PartitionedFile::new(file_path_str, 100))
     .build();
 
     // Create a data source executor
@@ -292,15 +325,18 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
     let factory = Arc::new(UppercaseAdapterFactory {});
 
     // Apply the same adapter to different source types
-    let arrow_source =
-        ArrowSource::default().with_schema_adapter_factory(factory.clone());
+    let arrow_source = ArrowSource::default()
+        .with_schema_adapter_factory(factory.clone())
+        .unwrap();
 
     #[cfg(feature = "parquet")]
-    let parquet_source =
-        ParquetSource::default().with_schema_adapter_factory(factory.clone());
+    let parquet_source = ParquetSource::default()
+        .with_schema_adapter_factory(factory.clone())
+        .unwrap();
 
-    #[cfg(feature = "csv")]
-    let csv_source = CsvSource::default().with_schema_adapter_factory(factory.clone());
+    let csv_source = CsvSource::default()
+        .with_schema_adapter_factory(factory.clone())
+        .unwrap();
 
     // Verify adapters were properly set
     assert!(arrow_source.schema_adapter_factory().is_some());
@@ -308,7 +344,6 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
     #[cfg(feature = "parquet")]
     assert!(parquet_source.schema_adapter_factory().is_some());
 
-    #[cfg(feature = "csv")]
     assert!(csv_source.schema_adapter_factory().is_some());
 
     Ok(())
@@ -329,11 +364,9 @@ fn test_from_implementations() {
     #[cfg(feature = "parquet")]
     test_from_impl::<ParquetSource>("parquet");
 
-    #[cfg(feature = "csv")]
     test_from_impl::<CsvSource>("csv");
 
-    #[cfg(feature = "json")]
-    test_from_impl::<datafusion_datasource_json::JsonSource>("json");
+    test_from_impl::<JsonSource>("json");
 }
 
 /// A simple test schema adapter factory that doesn't modify the schema
@@ -341,10 +374,14 @@ fn test_from_implementations() {
 struct TestSchemaAdapterFactory {}
 
 impl SchemaAdapterFactory for TestSchemaAdapterFactory {
-    fn create(&self, schema: &Schema) -> Result<Box<dyn SchemaAdapter>> {
-        Ok(Box::new(TestSchemaAdapter {
-            input_schema: Arc::new(schema.clone()),
-        }))
+    fn create(
+        &self,
+        projected_table_schema: SchemaRef,
+        _table_schema: SchemaRef,
+    ) -> Box<dyn SchemaAdapter> {
+        Box::new(TestSchemaAdapter {
+            input_schema: projected_table_schema,
+        })
     }
 }
 
@@ -355,13 +392,36 @@ struct TestSchemaAdapter {
 }
 
 impl SchemaAdapter for TestSchemaAdapter {
-    fn adapt(&self, record_batch: RecordBatch) -> Result<RecordBatch> {
-        // Just pass through the batch unmodified
-        Ok(record_batch)
+    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
+        let field = self.input_schema.field(index);
+        file_schema
+            .fields()
+            .iter()
+            .position(|f| f.name() == field.name())
     }
 
-    fn output_schema(&self) -> SchemaRef {
-        self.input_schema.clone()
+    fn map_schema(
+        &self,
+        file_schema: &Schema,
+    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
+        let mut projection = Vec::with_capacity(file_schema.fields().len());
+        for (idx, file_field) in file_schema.fields().iter().enumerate() {
+            if self
+                .input_schema
+                .fields()
+                .iter()
+                .any(|f| f.name() == file_field.name())
+            {
+                projection.push(idx);
+            }
+        }
+
+        let mapper = TestSchemaMapping {
+            output_schema: Arc::clone(&self.input_schema),
+            projection: projection.clone(),
+        };
+
+        Ok((Arc::new(mapper), projection))
     }
 }
 
@@ -377,31 +437,34 @@ fn test_schema_adapter_preservation() {
     // Create source with schema adapter factory
     let source = ParquetSource::default();
     let factory = Arc::new(TestSchemaAdapterFactory {});
-    let file_source = source.with_schema_adapter_factory(factory);
+    let file_source = source.with_schema_adapter_factory(factory).unwrap();
 
     // Create a FileScanConfig with the source
-    let config_builder =
-        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), schema.clone())
-            .with_source(file_source.clone())
-            // Add a file to make it valid
-            .with_file(PartitionedFile::new("test.parquet", 100));
+    let config_builder = FileScanConfigBuilder::new(
+        ObjectStoreUrl::local_filesystem(),
+        schema.clone(),
+        file_source.clone(),
+    )
+    .with_file(PartitionedFile::new("test.parquet", 100));
 
     let config = config_builder.build();
 
     // Verify the schema adapter factory is present in the file source
-    assert!(config.source().schema_adapter_factory().is_some());
+    assert!(config.file_source().schema_adapter_factory().is_some());
 }
 
 /// A test source for testing schema adapters
 #[derive(Debug, Clone)]
 struct TestSource {
     schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    metrics: ExecutionPlanMetricsSet,
 }
 
 impl TestSource {
     fn new() -> Self {
         Self {
             schema_adapter_factory: None,
+            metrics: ExecutionPlanMetricsSet::new(),
         }
     }
 }
@@ -441,7 +504,7 @@ impl FileSource for TestSource {
     }
 
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
-        unimplemented!("Not needed for this test")
+        &self.metrics
     }
 
     fn statistics(&self) -> Result<Statistics, DataFusionError> {
@@ -454,6 +517,7 @@ impl FileSource for TestSource {
     ) -> Result<Arc<dyn FileSource>> {
         Ok(Arc::new(Self {
             schema_adapter_factory: Some(schema_adapter_factory),
+            metrics: ExecutionPlanMetricsSet::new(),
         }))
     }
 
@@ -462,8 +526,6 @@ impl FileSource for TestSource {
     }
 }
 
-// Removed duplicate struct and impl blocks for TestSchemaAdapterFactory, TestSchemaAdapter, and TestSchemaMapping
-
 #[test]
 fn test_schema_adapter() {
     // This test verifies the functionality of the SchemaAdapter and SchemaAdapterFactory

From 2acf1e4dc035c8cc63bc63fab0ef04fc9ed72d67 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 15:51:54 +0800
Subject: [PATCH 06/41] Update schema adapter integration tests path in
 Cargo.toml to point to the directory instead of a specific file

---
 datafusion/core/Cargo.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 1f8ef789c935..cf5713007f3c 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -256,7 +256,6 @@ name = "dataframe"
 harness = false
 name = "spm"
 
-
 [[test]]
 name = "schema_adapter_integration_tests"
-path = "tests/integration_tests/schema_adapter_integration_tests.rs"
+path = "tests/integration_tests/"

From 4c41b0c35f2bc7bc7274a2dc7782b1c2fffe544a Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 16:08:24 +0800
Subject: [PATCH 07/41] Remove schema_adapter_integration_tests block from
 Cargo.toml in datafusion/core

---
 datafusion/core/Cargo.toml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index cf5713007f3c..c4455e271c84 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -255,7 +255,3 @@ name = "dataframe"
 [[bench]]
 harness = false
 name = "spm"
-
-[[test]]
-name = "schema_adapter_integration_tests"
-path = "tests/integration_tests/"

From 6cf9654bc4fa46d18a634415f63ac1f605602c98 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 16:36:25 +0800
Subject: [PATCH 08/41] rename integration_tests folder to schema_adaptation

---
 datafusion/core/tests/core_integration.rs                     | 2 +-
 .../core/tests/physical_optimizer/aggregate_statistics.rs     | 2 +-
 .../tests/physical_optimizer/combine_partial_final_agg.rs     | 2 +-
 .../core/tests/physical_optimizer/enforce_distribution.rs     | 4 ++--
 datafusion/core/tests/physical_optimizer/enforce_sorting.rs   | 2 +-
 datafusion/core/tests/physical_optimizer/limit_pushdown.rs    | 2 +-
 .../tests/physical_optimizer/limited_distinct_aggregation.rs  | 2 +-
 .../replace_with_order_preserving_variants.rs                 | 2 +-
 datafusion/core/tests/physical_optimizer/sanity_checker.rs    | 2 +-
 .../tests/{integration_tests => schema_adaptation}/mod.rs     | 0
 .../schema_adapter_integration_tests.rs                       | 0
 11 files changed, 10 insertions(+), 10 deletions(-)
 rename datafusion/core/tests/{integration_tests => schema_adaptation}/mod.rs (100%)
 rename datafusion/core/tests/{integration_tests => schema_adaptation}/schema_adapter_integration_tests.rs (100%)

diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs
index 250538b13370..9a9d77a0c540 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -43,7 +43,7 @@ mod custom_sources_cases;
 mod optimizer;
 
 /// Run all tests that are found in the `physical_optimizer` directory
-mod physical_optimizer;
+mod physical_optimizer_test;
 
 /// Run all tests that are found in the `serde` directory
 mod serde;
diff --git a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
index a79d743cb253..00eea3bfa38e 100644
--- a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer::test_utils::TestAggregate;
+use crate::physical_optimizer_test::test_utils::TestAggregate;
 
 use arrow::array::Int32Array;
 use arrow::datatypes::{DataType, Field, Schema};
diff --git a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
index 9c76f6ab6f58..94a18365cb80 100644
--- a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
@@ -23,7 +23,7 @@
 use insta::assert_snapshot;
 use std::sync::Arc;
 
-use crate::physical_optimizer::test_utils::parquet_exec;
+use crate::physical_optimizer_test::test_utils::parquet_exec;
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::config::ConfigOptions;
diff --git a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
index fd847763124a..0d9a85ac8967 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
@@ -19,7 +19,7 @@ use std::fmt::Debug;
 use std::ops::Deref;
 use std::sync::Arc;
 
-use crate::physical_optimizer::test_utils::{
+use crate::physical_optimizer_test::test_utils::{
     check_integrity, coalesce_partitions_exec, parquet_exec_with_sort,
     parquet_exec_with_stats, repartition_exec, schema, sort_exec,
     sort_exec_with_preserve_partitioning, sort_merge_join_exec,
@@ -300,7 +300,7 @@ fn hash_join_exec(
     join_on: &JoinOn,
     join_type: &JoinType,
 ) -> Arc<dyn ExecutionPlan> {
-    crate::physical_optimizer::test_utils::hash_join_exec(
+    crate::physical_optimizer_test::test_utils::hash_join_exec(
         left,
         right,
         join_on.clone(),
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index e31a30cc0883..f7e538e8a170 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer::test_utils::{
+use crate::physical_optimizer_test::test_utils::{
     aggregate_exec, bounded_window_exec, bounded_window_exec_with_partition,
     check_integrity, coalesce_batches_exec, coalesce_partitions_exec, create_test_schema,
     create_test_schema2, create_test_schema3, filter_exec, global_limit_exec,
diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
index 56d48901f284..63c6d1f1c6d6 100644
--- a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer::test_utils::{
+use crate::physical_optimizer_test::test_utils::{
     coalesce_batches_exec, coalesce_partitions_exec, global_limit_exec, local_limit_exec,
     sort_exec, sort_preserving_merge_exec, stream_exec,
 };
diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
index ad15d6803413..ba8ff5fd2387 100644
--- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
@@ -20,7 +20,7 @@
 use insta::assert_snapshot;
 use std::sync::Arc;
 
-use crate::physical_optimizer::test_utils::{
+use crate::physical_optimizer_test::test_utils::{
     build_group_by, get_optimized_plan, mock_data, parquet_exec_with_sort, schema,
     TestAggregate,
 };
diff --git a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
index c9baa9a932ae..e67f96b252a2 100644
--- a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
+++ b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer::test_utils::{
+use crate::physical_optimizer_test::test_utils::{
     check_integrity, coalesce_batches_exec, coalesce_partitions_exec,
     create_test_schema3, parquet_exec_with_sort, sort_exec,
     sort_exec_with_preserve_partitioning, sort_preserving_merge_exec,
diff --git a/datafusion/core/tests/physical_optimizer/sanity_checker.rs b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
index 6233f5d09c56..d1a62219d79b 100644
--- a/datafusion/core/tests/physical_optimizer/sanity_checker.rs
+++ b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
@@ -18,7 +18,7 @@
 use insta::assert_snapshot;
 use std::sync::Arc;
 
-use crate::physical_optimizer::test_utils::{
+use crate::physical_optimizer_test::test_utils::{
     bounded_window_exec, global_limit_exec, local_limit_exec, memory_exec,
     repartition_exec, sort_exec, sort_expr_options, sort_merge_join_exec,
 };
diff --git a/datafusion/core/tests/integration_tests/mod.rs b/datafusion/core/tests/schema_adaptation/mod.rs
similarity index 100%
rename from datafusion/core/tests/integration_tests/mod.rs
rename to datafusion/core/tests/schema_adaptation/mod.rs
diff --git a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs b/datafusion/core/tests/schema_adaptation/schema_adapter_integration_tests.rs
similarity index 100%
rename from datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs
rename to datafusion/core/tests/schema_adaptation/schema_adapter_integration_tests.rs

From a206e6fb8adb9e85109da1d6a1c6e1d29ebfe9f2 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 16:41:11 +0800
Subject: [PATCH 09/41] Refactor physical_optimizer module imports to use the
 correct path

---
 datafusion/core/tests/core_integration.rs                     | 2 +-
 .../core/tests/physical_optimizer/aggregate_statistics.rs     | 2 +-
 .../tests/physical_optimizer/combine_partial_final_agg.rs     | 2 +-
 .../core/tests/physical_optimizer/enforce_distribution.rs     | 4 ++--
 datafusion/core/tests/physical_optimizer/enforce_sorting.rs   | 2 +-
 datafusion/core/tests/physical_optimizer/limit_pushdown.rs    | 2 +-
 .../tests/physical_optimizer/limited_distinct_aggregation.rs  | 2 +-
 .../replace_with_order_preserving_variants.rs                 | 2 +-
 datafusion/core/tests/physical_optimizer/sanity_checker.rs    | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs
index 9a9d77a0c540..250538b13370 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -43,7 +43,7 @@ mod custom_sources_cases;
 mod optimizer;
 
 /// Run all tests that are found in the `physical_optimizer` directory
-mod physical_optimizer_test;
+mod physical_optimizer;
 
 /// Run all tests that are found in the `serde` directory
 mod serde;
diff --git a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
index 00eea3bfa38e..a79d743cb253 100644
--- a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer_test::test_utils::TestAggregate;
+use crate::physical_optimizer::test_utils::TestAggregate;
 
 use arrow::array::Int32Array;
 use arrow::datatypes::{DataType, Field, Schema};
diff --git a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
index 94a18365cb80..9c76f6ab6f58 100644
--- a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
@@ -23,7 +23,7 @@
 use insta::assert_snapshot;
 use std::sync::Arc;
 
-use crate::physical_optimizer_test::test_utils::parquet_exec;
+use crate::physical_optimizer::test_utils::parquet_exec;
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::config::ConfigOptions;
diff --git a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
index 0d9a85ac8967..fd847763124a 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
@@ -19,7 +19,7 @@ use std::fmt::Debug;
 use std::ops::Deref;
 use std::sync::Arc;
 
-use crate::physical_optimizer_test::test_utils::{
+use crate::physical_optimizer::test_utils::{
     check_integrity, coalesce_partitions_exec, parquet_exec_with_sort,
     parquet_exec_with_stats, repartition_exec, schema, sort_exec,
     sort_exec_with_preserve_partitioning, sort_merge_join_exec,
@@ -300,7 +300,7 @@ fn hash_join_exec(
     join_on: &JoinOn,
     join_type: &JoinType,
 ) -> Arc<dyn ExecutionPlan> {
-    crate::physical_optimizer_test::test_utils::hash_join_exec(
+    crate::physical_optimizer::test_utils::hash_join_exec(
         left,
         right,
         join_on.clone(),
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index f7e538e8a170..e31a30cc0883 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer_test::test_utils::{
+use crate::physical_optimizer::test_utils::{
     aggregate_exec, bounded_window_exec, bounded_window_exec_with_partition,
     check_integrity, coalesce_batches_exec, coalesce_partitions_exec, create_test_schema,
     create_test_schema2, create_test_schema3, filter_exec, global_limit_exec,
diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
index 63c6d1f1c6d6..56d48901f284 100644
--- a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer_test::test_utils::{
+use crate::physical_optimizer::test_utils::{
     coalesce_batches_exec, coalesce_partitions_exec, global_limit_exec, local_limit_exec,
     sort_exec, sort_preserving_merge_exec, stream_exec,
 };
diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
index ba8ff5fd2387..ad15d6803413 100644
--- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
@@ -20,7 +20,7 @@
 use insta::assert_snapshot;
 use std::sync::Arc;
 
-use crate::physical_optimizer_test::test_utils::{
+use crate::physical_optimizer::test_utils::{
     build_group_by, get_optimized_plan, mock_data, parquet_exec_with_sort, schema,
     TestAggregate,
 };
diff --git a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
index e67f96b252a2..c9baa9a932ae 100644
--- a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
+++ b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer_test::test_utils::{
+use crate::physical_optimizer::test_utils::{
     check_integrity, coalesce_batches_exec, coalesce_partitions_exec,
     create_test_schema3, parquet_exec_with_sort, sort_exec,
     sort_exec_with_preserve_partitioning, sort_preserving_merge_exec,
diff --git a/datafusion/core/tests/physical_optimizer/sanity_checker.rs b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
index d1a62219d79b..6233f5d09c56 100644
--- a/datafusion/core/tests/physical_optimizer/sanity_checker.rs
+++ b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
@@ -18,7 +18,7 @@
 use insta::assert_snapshot;
 use std::sync::Arc;
 
-use crate::physical_optimizer_test::test_utils::{
+use crate::physical_optimizer::test_utils::{
     bounded_window_exec, global_limit_exec, local_limit_exec, memory_exec,
     repartition_exec, sort_exec, sort_expr_options, sort_merge_join_exec,
 };

From c7e6b74e5bcdaaa5a065a51aea1a3da2c8e6b944 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 16:48:36 +0800
Subject: [PATCH 10/41] Add end-to-end tests for schema-related functionality
 in schema.rs

---
 datafusion/core/tests/schema.rs | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 datafusion/core/tests/schema.rs

diff --git a/datafusion/core/tests/schema.rs b/datafusion/core/tests/schema.rs
new file mode 100644
index 000000000000..a6349e11f7db
--- /dev/null
+++ b/datafusion/core/tests/schema.rs
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! End to end test for Schema related functionality
+
+/// Run all tests that are found in the `parquet` directory
+mod schema_adaptation;

From 593b4b4e2da4928b5970fc362052655ff0efb351 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 17 Jul 2025 16:53:54 +0800
Subject: [PATCH 11/41] Update expected schema column name in parquet
 integration test

---
 .../tests/schema_adaptation/schema_adapter_integration_tests.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/tests/schema_adaptation/schema_adapter_integration_tests.rs b/datafusion/core/tests/schema_adaptation/schema_adapter_integration_tests.rs
index 9ec01b48a435..54cfe53e7226 100644
--- a/datafusion/core/tests/schema_adaptation/schema_adapter_integration_tests.rs
+++ b/datafusion/core/tests/schema_adaptation/schema_adapter_integration_tests.rs
@@ -244,7 +244,7 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
     // Verify the schema has uppercase column names
     let result_schema = batches[0].schema();
     assert_eq!(result_schema.field(0).name(), "ID");
-    assert_eq!(result_schema.field(1).name(), "NAME0");
+    assert_eq!(result_schema.field(1).name(), "NAME");
 
     Ok(())
 }

From 29e0bcaca312b81ec0abbc6830751cfde3f12141 Mon Sep 17 00:00:00 2001
From: kosiew <kosiew@gmail.com>
Date: Fri, 18 Jul 2025 07:39:54 +0800
Subject: [PATCH 12/41] Move schema adapter tests

relocate schema adapter tests into the parquet suite
reference new location in schema.rs
remove old schema_adaptation tests
---
 .../core/tests/parquet/schema_adapter.rs      | 549 ++++++++++++++++-
 datafusion/core/tests/schema.rs               |   3 +-
 .../core/tests/schema_adaptation/mod.rs       |  20 -
 .../schema_adapter_integration_tests.rs       | 578 ------------------
 4 files changed, 549 insertions(+), 601 deletions(-)
 delete mode 100644 datafusion/core/tests/schema_adaptation/mod.rs
 delete mode 100644 datafusion/core/tests/schema_adaptation/schema_adapter_integration_tests.rs

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index abc1550b31ca..6faa604baf6b 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{record_batch, RecordBatch, RecordBatchOptions};
@@ -26,6 +27,7 @@ use datafusion::common::Result;
 use datafusion::datasource::listing::{ListingTable, ListingTableConfig};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+use datafusion_common::DataFusionError;
 use datafusion_common::{ColumnStatistics, ScalarValue};
 use datafusion_datasource::schema_adapter::{
     DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
@@ -40,6 +42,20 @@ use datafusion_physical_expr::{DefaultPhysicalExprAdapter, PhysicalExpr};
 use itertools::Itertools;
 use object_store::{memory::InMemory, path::Path, ObjectStore};
 use parquet::arrow::ArrowWriter;
+use tempfile::TempDir;
+
+#[cfg(feature = "parquet")]
+use datafusion::datasource::physical_plan::ParquetSource;
+use datafusion::datasource::physical_plan::{
+    ArrowSource, CsvSource, FileOpener, FileScanConfig, FileScanConfigBuilder,
+    FileSource, JsonSource,
+};
+use datafusion::datasource::source::DataSourceExec;
+use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
+use datafusion::physical_plan::{ExecutionPlan, Statistics};
+use datafusion_datasource::PartitionedFile;
+#[cfg(feature = "parquet")]
+use parquet::file::properties::WriterProperties;
 
 async fn write_parquet(batch: RecordBatch, store: Arc<dyn ObjectStore>, path: &str) {
     let mut out = BytesMut::new().writer();
@@ -187,7 +203,7 @@ impl PhysicalExprAdapter for CustomPhysicalExprAdapter {
                             .logical_file_schema
                             .field_with_name(field_name)
                             .map_err(|_| {
-                                datafusion_common::DataFusionError::Plan(format!(
+                                DataFusionError::Plan(format!(
                                     "Field '{field_name}' not found in logical file schema",
                                 ))
                             })?;
@@ -376,3 +392,534 @@ async fn test_custom_schema_adapter_and_custom_expression_adapter() {
     ];
     assert_batches_eq!(expected, &batches);
 }
+
+// ----------------------------------------------------------------------
+// Tests migrated from schema_adaptation/schema_adapter_integration_tests.rs
+// ----------------------------------------------------------------------
+
+/// A schema adapter factory that transforms column names to uppercase
+#[derive(Debug)]
+struct UppercaseAdapterFactory {}
+
+impl SchemaAdapterFactory for UppercaseAdapterFactory {
+    fn create(
+        &self,
+        projected_table_schema: SchemaRef,
+        _table_schema: SchemaRef,
+    ) -> Box<dyn SchemaAdapter> {
+        Box::new(UppercaseAdapter {
+            table_schema: projected_table_schema,
+        })
+    }
+}
+
+/// Schema adapter that transforms column names to uppercase
+#[derive(Debug)]
+struct UppercaseAdapter {
+    table_schema: SchemaRef,
+}
+
+impl SchemaAdapter for UppercaseAdapter {
+    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
+        let field = self.table_schema.field(index);
+        file_schema
+            .fields()
+            .iter()
+            .position(|f| f.name().eq_ignore_ascii_case(field.name()))
+    }
+
+    fn map_schema(
+        &self,
+        file_schema: &Schema,
+    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
+        let mut projection = Vec::with_capacity(file_schema.fields().len());
+        for (idx, file_field) in file_schema.fields().iter().enumerate() {
+            if self
+                .table_schema
+                .fields()
+                .iter()
+                .any(|f| f.name().eq_ignore_ascii_case(file_field.name()))
+            {
+                projection.push(idx);
+            }
+        }
+
+        let mapper = UppercaseSchemaMapper {
+            output_schema: self.output_schema(),
+            projection: projection.clone(),
+        };
+
+        Ok((Arc::new(mapper), projection))
+    }
+}
+
+#[derive(Debug)]
+struct TestSchemaMapping {
+    output_schema: SchemaRef,
+    projection: Vec<usize>,
+}
+
+impl SchemaMapper for TestSchemaMapping {
+    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
+        let columns = self
+            .projection
+            .iter()
+            .map(|&i| batch.column(i).clone())
+            .collect::<Vec<_>>();
+        Ok(RecordBatch::try_new(self.output_schema.clone(), columns)?)
+    }
+
+    fn map_column_statistics(
+        &self,
+        stats: &[ColumnStatistics],
+    ) -> Result<Vec<ColumnStatistics>> {
+        Ok(self
+            .projection
+            .iter()
+            .map(|&i| stats.get(i).cloned().unwrap_or_default())
+            .collect())
+    }
+}
+
+impl UppercaseAdapter {
+    #[allow(dead_code)]
+    fn adapt(&self, record_batch: RecordBatch) -> Result<RecordBatch> {
+        Ok(record_batch)
+    }
+
+    fn output_schema(&self) -> SchemaRef {
+        let fields: Vec<Field> = self
+            .table_schema
+            .fields()
+            .iter()
+            .map(|f| {
+                Field::new(
+                    f.name().to_uppercase().as_str(),
+                    f.data_type().clone(),
+                    f.is_nullable(),
+                )
+            })
+            .collect();
+
+        Arc::new(Schema::new(fields))
+    }
+}
+
+#[derive(Debug)]
+struct UppercaseSchemaMapper {
+    output_schema: SchemaRef,
+    projection: Vec<usize>,
+}
+
+impl SchemaMapper for UppercaseSchemaMapper {
+    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
+        let columns = self
+            .projection
+            .iter()
+            .map(|&i| batch.column(i).clone())
+            .collect::<Vec<_>>();
+        Ok(RecordBatch::try_new(self.output_schema.clone(), columns)?)
+    }
+
+    fn map_column_statistics(
+        &self,
+        stats: &[ColumnStatistics],
+    ) -> Result<Vec<ColumnStatistics>> {
+        Ok(self
+            .projection
+            .iter()
+            .map(|&i| stats.get(i).cloned().unwrap_or_default())
+            .collect())
+    }
+}
+
+#[cfg(feature = "parquet")]
+#[tokio::test]
+async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
+    // Create a temporary directory for our test file
+    let tmp_dir = TempDir::new()?;
+    let file_path = tmp_dir.path().join("test.parquet");
+    let file_path_str = file_path.to_str().unwrap();
+
+    // Create test data
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int32, false),
+        Field::new("name", DataType::Utf8, true),
+    ]));
+
+    let batch = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
+            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
+        ],
+    )?;
+
+    // Write test parquet file
+    let file = std::fs::File::create(file_path_str)?;
+    let props = WriterProperties::builder().build();
+    let mut writer = ArrowWriter::try_new(file, schema.clone(), Some(props))?;
+    writer.write(&batch)?;
+    writer.close()?;
+
+    // Create a session context
+    let ctx = SessionContext::new();
+
+    // Create a ParquetSource with the adapter factory
+    let file_source = ParquetSource::default()
+        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
+
+    let config = FileScanConfigBuilder::new(
+        ObjectStoreUrl::parse(format!("file://{file_path_str}"))?,
+        schema.clone(),
+        file_source.clone(),
+    )
+    .with_file(PartitionedFile::new(file_path_str, 100))
+    .build();
+
+    // Create a data source executor
+    let exec = DataSourceExec::from_data_source(config);
+
+    // Collect results
+    let task_ctx = ctx.task_ctx();
+    let stream = exec.execute(0, task_ctx)?;
+    let batches = datafusion::physical_plan::common::collect(stream).await?;
+
+    // There should be one batch
+    assert_eq!(batches.len(), 1);
+
+    // Verify the schema has uppercase column names
+    let result_schema = batches[0].schema();
+    assert_eq!(result_schema.field(0).name(), "ID");
+    assert_eq!(result_schema.field(1).name(), "NAME");
+
+    Ok(())
+}
+
+#[cfg(feature = "parquet")]
+#[tokio::test]
+async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
+) -> Result<()> {
+    // Create a temporary directory for our test file
+    let tmp_dir = TempDir::new()?;
+    let file_path = tmp_dir.path().join("test.parquet");
+    let file_path_str = file_path.to_str().unwrap();
+
+    // Create test data
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int32, false),
+        Field::new("name", DataType::Utf8, true),
+    ]));
+
+    let batch = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
+            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
+        ],
+    )?;
+
+    // Write test parquet file
+    let file = std::fs::File::create(file_path_str)?;
+    let props = WriterProperties::builder().build();
+    let mut writer = ArrowWriter::try_new(file, schema.clone(), Some(props))?;
+    writer.write(&batch)?;
+    writer.close()?;
+
+    // Create a session context
+    let ctx = SessionContext::new();
+
+    // Create a ParquetSource with the adapter factory
+    let file_source = ParquetSource::default()
+        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
+
+    let config = FileScanConfigBuilder::new(
+        ObjectStoreUrl::parse(format!("file://{file_path_str}"))?,
+        schema.clone(),
+        file_source,
+    )
+    .with_file(PartitionedFile::new(file_path_str, 100))
+    .build();
+
+    // Create a data source executor
+    let exec = DataSourceExec::from_data_source(config);
+
+    // Collect results
+    let task_ctx = ctx.task_ctx();
+    let stream = exec.execute(0, task_ctx)?;
+    let batches = datafusion::physical_plan::common::collect(stream).await?;
+
+    // There should be one batch
+    assert_eq!(batches.len(), 1);
+
+    // Verify the schema has uppercase column names
+    let result_schema = batches[0].schema();
+    assert_eq!(result_schema.field(0).name(), "ID");
+    assert_eq!(result_schema.field(1).name(), "NAME");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
+    // This test verifies that the same schema adapter factory can be reused
+    // across different file source types. This is important for ensuring that:
+    // 1. The schema adapter factory interface works uniformly across all source types
+    // 2. The factory can be shared and cloned efficiently using Arc
+    // 3. Various data source implementations correctly implement the schema adapter factory pattern
+
+    // Create a test factory
+    let factory = Arc::new(UppercaseAdapterFactory {});
+
+    // Apply the same adapter to different source types
+    let arrow_source = ArrowSource::default()
+        .with_schema_adapter_factory(factory.clone())
+        .unwrap();
+
+    #[cfg(feature = "parquet")]
+    let parquet_source = ParquetSource::default()
+        .with_schema_adapter_factory(factory.clone())
+        .unwrap();
+
+    let csv_source = CsvSource::default()
+        .with_schema_adapter_factory(factory.clone())
+        .unwrap();
+
+    // Verify adapters were properly set
+    assert!(arrow_source.schema_adapter_factory().is_some());
+
+    #[cfg(feature = "parquet")]
+    assert!(parquet_source.schema_adapter_factory().is_some());
+
+    assert!(csv_source.schema_adapter_factory().is_some());
+
+    Ok(())
+}
+
+// Helper function to test From<T> for Arc<dyn FileSource> implementations
+fn test_from_impl<T: Into<Arc<dyn FileSource>> + Default>(expected_file_type: &str) {
+    let source = T::default();
+    let file_source: Arc<dyn FileSource> = source.into();
+    assert_eq!(file_source.file_type(), expected_file_type);
+}
+
+#[test]
+fn test_from_implementations() {
+    // Test From implementation for various sources
+    test_from_impl::<ArrowSource>("arrow");
+
+    #[cfg(feature = "parquet")]
+    test_from_impl::<ParquetSource>("parquet");
+
+    test_from_impl::<CsvSource>("csv");
+
+    test_from_impl::<JsonSource>("json");
+}
+
+/// A simple test schema adapter factory that doesn't modify the schema
+#[derive(Debug)]
+struct TestSchemaAdapterFactory {}
+
+impl SchemaAdapterFactory for TestSchemaAdapterFactory {
+    fn create(
+        &self,
+        projected_table_schema: SchemaRef,
+        _table_schema: SchemaRef,
+    ) -> Box<dyn SchemaAdapter> {
+        Box::new(TestSchemaAdapter {
+            input_schema: projected_table_schema,
+        })
+    }
+}
+
+/// A test schema adapter that passes through data unmodified
+#[derive(Debug)]
+struct TestSchemaAdapter {
+    input_schema: SchemaRef,
+}
+
+impl SchemaAdapter for TestSchemaAdapter {
+    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
+        let field = self.input_schema.field(index);
+        file_schema
+            .fields()
+            .iter()
+            .position(|f| f.name() == field.name())
+    }
+
+    fn map_schema(
+        &self,
+        file_schema: &Schema,
+    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
+        let mut projection = Vec::with_capacity(file_schema.fields().len());
+        for (idx, file_field) in file_schema.fields().iter().enumerate() {
+            if self
+                .input_schema
+                .fields()
+                .iter()
+                .any(|f| f.name() == file_field.name())
+            {
+                projection.push(idx);
+            }
+        }
+
+        let mapper = TestSchemaMapping {
+            output_schema: Arc::clone(&self.input_schema),
+            projection: projection.clone(),
+        };
+
+        Ok((Arc::new(mapper), projection))
+    }
+}
+
+#[cfg(feature = "parquet")]
+#[test]
+fn test_schema_adapter_preservation() {
+    // Create a test schema
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int32, false),
+        Field::new("name", DataType::Utf8, true),
+    ]));
+
+    // Create source with schema adapter factory
+    let source = ParquetSource::default();
+    let factory = Arc::new(TestSchemaAdapterFactory {});
+    let file_source = source.with_schema_adapter_factory(factory).unwrap();
+
+    // Create a FileScanConfig with the source
+    let config_builder = FileScanConfigBuilder::new(
+        ObjectStoreUrl::local_filesystem(),
+        schema.clone(),
+        file_source.clone(),
+    )
+    .with_file(PartitionedFile::new("test.parquet", 100));
+
+    let config = config_builder.build();
+
+    // Verify the schema adapter factory is present in the file source
+    assert!(config.file_source().schema_adapter_factory().is_some());
+}
+
+/// A test source for testing schema adapters
+#[derive(Debug, Clone)]
+struct TestSource {
+    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    metrics: ExecutionPlanMetricsSet,
+}
+
+impl TestSource {
+    fn new() -> Self {
+        Self {
+            schema_adapter_factory: None,
+            metrics: ExecutionPlanMetricsSet::new(),
+        }
+    }
+}
+
+impl FileSource for TestSource {
+    fn file_type(&self) -> &str {
+        "test"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn create_file_opener(
+        &self,
+        _store: Arc<dyn ObjectStore>,
+        _conf: &FileScanConfig,
+        _index: usize,
+    ) -> Arc<dyn FileOpener> {
+        unimplemented!("Not needed for this test")
+    }
+
+    fn with_batch_size(&self, _batch_size: usize) -> Arc<dyn FileSource> {
+        Arc::new(self.clone())
+    }
+
+    fn with_schema(&self, _schema: SchemaRef) -> Arc<dyn FileSource> {
+        Arc::new(self.clone())
+    }
+
+    fn with_projection(&self, _projection: &FileScanConfig) -> Arc<dyn FileSource> {
+        Arc::new(self.clone())
+    }
+
+    fn with_statistics(&self, _statistics: Statistics) -> Arc<dyn FileSource> {
+        Arc::new(self.clone())
+    }
+
+    fn metrics(&self) -> &ExecutionPlanMetricsSet {
+        &self.metrics
+    }
+
+    fn statistics(&self) -> Result<Statistics, DataFusionError> {
+        Ok(Statistics::default())
+    }
+
+    fn with_schema_adapter_factory(
+        &self,
+        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    ) -> Result<Arc<dyn FileSource>> {
+        Ok(Arc::new(Self {
+            schema_adapter_factory: Some(schema_adapter_factory),
+            metrics: ExecutionPlanMetricsSet::new(),
+        }))
+    }
+
+    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
+        self.schema_adapter_factory.clone()
+    }
+}
+
+#[test]
+fn test_schema_adapter() {
+    // This test verifies the functionality of the SchemaAdapter and SchemaAdapterFactory
+    // components used in DataFusion's file sources.
+    //
+    // The test specifically checks:
+    // 1. Creating and attaching a schema adapter factory to a file source
+    // 2. Creating a schema adapter using the factory
+    // 3. The schema adapter's ability to map column indices between a table schema and a file schema
+    // 4. The schema adapter's ability to create a projection that selects only the columns
+    //    from the file schema that are present in the table schema
+    //
+    // Schema adapters are used when the schema of data in files doesn't exactly match
+    // the schema expected by the query engine, allowing for field mapping and data transformation.
+
+    // Create a test schema
+    let table_schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int32, false),
+        Field::new("name", DataType::Utf8, true),
+    ]));
+
+    // Create a file schema
+    let file_schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, false),
+        Field::new("name", DataType::Utf8, true),
+        Field::new("extra", DataType::Int64, true),
+    ]);
+
+    // Create a TestSource
+    let source = TestSource::new();
+    assert!(source.schema_adapter_factory().is_none());
+
+    // Add a schema adapter factory
+    let factory = Arc::new(TestSchemaAdapterFactory {});
+    let source_with_adapter = source.with_schema_adapter_factory(factory).unwrap();
+    assert!(source_with_adapter.schema_adapter_factory().is_some());
+
+    // Create a schema adapter
+    let adapter_factory = source_with_adapter.schema_adapter_factory().unwrap();
+    let adapter =
+        adapter_factory.create(Arc::clone(&table_schema), Arc::clone(&table_schema));
+
+    // Test mapping column index
+    assert_eq!(adapter.map_column_index(0, &file_schema), Some(0));
+    assert_eq!(adapter.map_column_index(1, &file_schema), Some(1));
+
+    // Test creating schema mapper
+    let (_mapper, projection) = adapter.map_schema(&file_schema).unwrap();
+    assert_eq!(projection, vec![0, 1]);
+}
diff --git a/datafusion/core/tests/schema.rs b/datafusion/core/tests/schema.rs
index a6349e11f7db..bf5081dc9c3d 100644
--- a/datafusion/core/tests/schema.rs
+++ b/datafusion/core/tests/schema.rs
@@ -17,5 +17,4 @@
 
 //! End to end test for Schema related functionality
 
-/// Run all tests that are found in the `parquet` directory
-mod schema_adaptation;
+// Schema adaptation tests now live in `parquet/schema_adapter.rs`
diff --git a/datafusion/core/tests/schema_adaptation/mod.rs b/datafusion/core/tests/schema_adaptation/mod.rs
deleted file mode 100644
index 5424780a0832..000000000000
--- a/datafusion/core/tests/schema_adaptation/mod.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Integration tests module
-
-pub mod schema_adapter_integration_tests;
\ No newline at end of file
diff --git a/datafusion/core/tests/schema_adaptation/schema_adapter_integration_tests.rs b/datafusion/core/tests/schema_adaptation/schema_adapter_integration_tests.rs
deleted file mode 100644
index 54cfe53e7226..000000000000
--- a/datafusion/core/tests/schema_adaptation/schema_adapter_integration_tests.rs
+++ /dev/null
@@ -1,578 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Integration test for schema adapter factory functionality
-
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
-use datafusion::datasource::object_store::ObjectStoreUrl;
-use datafusion::datasource::physical_plan::ArrowSource;
-use datafusion::datasource::physical_plan::JsonSource;
-#[cfg(feature = "parquet")]
-use datafusion::datasource::physical_plan::ParquetSource;
-use datafusion::datasource::physical_plan::{
-    FileOpener, FileScanConfig, FileScanConfigBuilder, FileSource,
-};
-use datafusion::datasource::source::DataSourceExec;
-use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
-use datafusion::physical_plan::ExecutionPlan;
-use datafusion::physical_plan::Statistics;
-use datafusion::prelude::*;
-use datafusion_common::ColumnStatistics;
-use datafusion_common::DataFusionError;
-use datafusion_common::Result;
-use datafusion_datasource::schema_adapter::{
-    SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
-};
-use datafusion_datasource::PartitionedFile;
-use object_store::ObjectStore;
-#[cfg(feature = "parquet")]
-use parquet::arrow::ArrowWriter;
-#[cfg(feature = "parquet")]
-use parquet::file::properties::WriterProperties;
-use std::any::Any;
-use std::sync::Arc;
-use tempfile::TempDir;
-
-use datafusion::datasource::physical_plan::CsvSource;
-
-/// A schema adapter factory that transforms column names to uppercase
-#[derive(Debug)]
-struct UppercaseAdapterFactory {}
-
-impl SchemaAdapterFactory for UppercaseAdapterFactory {
-    fn create(
-        &self,
-        projected_table_schema: SchemaRef,
-        _table_schema: SchemaRef,
-    ) -> Box<dyn SchemaAdapter> {
-        Box::new(UppercaseAdapter {
-            table_schema: projected_table_schema,
-        })
-    }
-}
-
-/// Schema adapter that transforms column names to uppercase
-#[derive(Debug)]
-struct UppercaseAdapter {
-    table_schema: SchemaRef,
-}
-
-impl SchemaAdapter for UppercaseAdapter {
-    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-        let field = self.table_schema.field(index);
-        file_schema
-            .fields()
-            .iter()
-            .position(|f| f.name().eq_ignore_ascii_case(field.name()))
-    }
-
-    fn map_schema(
-        &self,
-        file_schema: &Schema,
-    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let mut projection = Vec::with_capacity(file_schema.fields().len());
-        for (idx, file_field) in file_schema.fields().iter().enumerate() {
-            if self
-                .table_schema
-                .fields()
-                .iter()
-                .any(|f| f.name().eq_ignore_ascii_case(file_field.name()))
-            {
-                projection.push(idx);
-            }
-        }
-
-        let mapper = UppercaseSchemaMapper {
-            output_schema: self.output_schema(),
-            projection: projection.clone(),
-        };
-
-        Ok((Arc::new(mapper), projection))
-    }
-}
-
-#[derive(Debug)]
-struct TestSchemaMapping {
-    output_schema: SchemaRef,
-    projection: Vec<usize>,
-}
-
-impl SchemaMapper for TestSchemaMapping {
-    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
-        let columns = self
-            .projection
-            .iter()
-            .map(|&i| batch.column(i).clone())
-            .collect::<Vec<_>>();
-        Ok(RecordBatch::try_new(self.output_schema.clone(), columns)?)
-    }
-
-    fn map_column_statistics(
-        &self,
-        stats: &[ColumnStatistics],
-    ) -> Result<Vec<ColumnStatistics>> {
-        Ok(self
-            .projection
-            .iter()
-            .map(|&i| stats.get(i).cloned().unwrap_or_default())
-            .collect())
-    }
-}
-
-impl UppercaseAdapter {
-    #[allow(dead_code)]
-    fn adapt(&self, record_batch: RecordBatch) -> Result<RecordBatch> {
-        Ok(record_batch)
-    }
-
-    fn output_schema(&self) -> SchemaRef {
-        let fields: Vec<Field> = self
-            .table_schema
-            .fields()
-            .iter()
-            .map(|f| {
-                Field::new(
-                    f.name().to_uppercase().as_str(),
-                    f.data_type().clone(),
-                    f.is_nullable(),
-                )
-            })
-            .collect();
-
-        Arc::new(Schema::new(fields))
-    }
-}
-
-#[derive(Debug)]
-struct UppercaseSchemaMapper {
-    output_schema: SchemaRef,
-    projection: Vec<usize>,
-}
-
-impl SchemaMapper for UppercaseSchemaMapper {
-    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
-        let columns = self
-            .projection
-            .iter()
-            .map(|&i| batch.column(i).clone())
-            .collect::<Vec<_>>();
-        Ok(RecordBatch::try_new(self.output_schema.clone(), columns)?)
-    }
-
-    fn map_column_statistics(
-        &self,
-        stats: &[ColumnStatistics],
-    ) -> Result<Vec<ColumnStatistics>> {
-        Ok(self
-            .projection
-            .iter()
-            .map(|&i| stats.get(i).cloned().unwrap_or_default())
-            .collect())
-    }
-}
-
-#[cfg(feature = "parquet")]
-#[tokio::test]
-async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
-    // Create a temporary directory for our test file
-    let tmp_dir = TempDir::new()?;
-    let file_path = tmp_dir.path().join("test.parquet");
-    let file_path_str = file_path.to_str().unwrap();
-
-    // Create test data
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-    ]));
-
-    let batch = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
-            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
-        ],
-    )?;
-
-    // Write test parquet file
-    let file = std::fs::File::create(file_path_str)?;
-    let props = WriterProperties::builder().build();
-    let mut writer = ArrowWriter::try_new(file, schema.clone(), Some(props))?;
-    writer.write(&batch)?;
-    writer.close()?;
-
-    // Create a session context
-    let ctx = SessionContext::new();
-
-    // Create a ParquetSource with the adapter factory
-    let file_source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
-
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse(format!("file://{file_path_str}"))?,
-        schema.clone(),
-        file_source.clone(),
-    )
-    .with_file(PartitionedFile::new(file_path_str, 100))
-    .build();
-
-    // Create a data source executor
-    let exec = DataSourceExec::from_data_source(config);
-
-    // Collect results
-    let task_ctx = ctx.task_ctx();
-    let stream = exec.execute(0, task_ctx)?;
-    let batches = datafusion::physical_plan::common::collect(stream).await?;
-
-    // There should be one batch
-    assert_eq!(batches.len(), 1);
-
-    // Verify the schema has uppercase column names
-    let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "ID");
-    assert_eq!(result_schema.field(1).name(), "NAME");
-
-    Ok(())
-}
-
-#[cfg(feature = "parquet")]
-#[tokio::test]
-async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
-) -> Result<()> {
-    // Create a temporary directory for our test file
-    let tmp_dir = TempDir::new()?;
-    let file_path = tmp_dir.path().join("test.parquet");
-    let file_path_str = file_path.to_str().unwrap();
-
-    // Create test data
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-    ]));
-
-    let batch = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
-            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
-        ],
-    )?;
-
-    // Write test parquet file
-    let file = std::fs::File::create(file_path_str)?;
-    let props = WriterProperties::builder().build();
-    let mut writer = ArrowWriter::try_new(file, schema.clone(), Some(props))?;
-    writer.write(&batch)?;
-    writer.close()?;
-
-    // Create a session context
-    let ctx = SessionContext::new();
-
-    // Create a ParquetSource with the adapter factory
-    let file_source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
-
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse(format!("file://{file_path_str}"))?,
-        schema.clone(),
-        file_source,
-    )
-    .with_file(PartitionedFile::new(file_path_str, 100))
-    .build();
-
-    // Create a data source executor
-    let exec = DataSourceExec::from_data_source(config);
-
-    // Collect results
-    let task_ctx = ctx.task_ctx();
-    let stream = exec.execute(0, task_ctx)?;
-    let batches = datafusion::physical_plan::common::collect(stream).await?;
-
-    // There should be one batch
-    assert_eq!(batches.len(), 1);
-
-    // Verify the schema has uppercase column names
-    let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "ID");
-    assert_eq!(result_schema.field(1).name(), "NAME");
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
-    // This test verifies that the same schema adapter factory can be reused
-    // across different file source types. This is important for ensuring that:
-    // 1. The schema adapter factory interface works uniformly across all source types
-    // 2. The factory can be shared and cloned efficiently using Arc
-    // 3. Various data source implementations correctly implement the schema adapter factory pattern
-
-    // Create a test factory
-    let factory = Arc::new(UppercaseAdapterFactory {});
-
-    // Apply the same adapter to different source types
-    let arrow_source = ArrowSource::default()
-        .with_schema_adapter_factory(factory.clone())
-        .unwrap();
-
-    #[cfg(feature = "parquet")]
-    let parquet_source = ParquetSource::default()
-        .with_schema_adapter_factory(factory.clone())
-        .unwrap();
-
-    let csv_source = CsvSource::default()
-        .with_schema_adapter_factory(factory.clone())
-        .unwrap();
-
-    // Verify adapters were properly set
-    assert!(arrow_source.schema_adapter_factory().is_some());
-
-    #[cfg(feature = "parquet")]
-    assert!(parquet_source.schema_adapter_factory().is_some());
-
-    assert!(csv_source.schema_adapter_factory().is_some());
-
-    Ok(())
-}
-
-// Helper function to test From<T> for Arc<dyn FileSource> implementations
-fn test_from_impl<T: Into<Arc<dyn FileSource>> + Default>(expected_file_type: &str) {
-    let source = T::default();
-    let file_source: Arc<dyn FileSource> = source.into();
-    assert_eq!(file_source.file_type(), expected_file_type);
-}
-
-#[test]
-fn test_from_implementations() {
-    // Test From implementation for various sources
-    test_from_impl::<ArrowSource>("arrow");
-
-    #[cfg(feature = "parquet")]
-    test_from_impl::<ParquetSource>("parquet");
-
-    test_from_impl::<CsvSource>("csv");
-
-    test_from_impl::<JsonSource>("json");
-}
-
-/// A simple test schema adapter factory that doesn't modify the schema
-#[derive(Debug)]
-struct TestSchemaAdapterFactory {}
-
-impl SchemaAdapterFactory for TestSchemaAdapterFactory {
-    fn create(
-        &self,
-        projected_table_schema: SchemaRef,
-        _table_schema: SchemaRef,
-    ) -> Box<dyn SchemaAdapter> {
-        Box::new(TestSchemaAdapter {
-            input_schema: projected_table_schema,
-        })
-    }
-}
-
-/// A test schema adapter that passes through data unmodified
-#[derive(Debug)]
-struct TestSchemaAdapter {
-    input_schema: SchemaRef,
-}
-
-impl SchemaAdapter for TestSchemaAdapter {
-    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-        let field = self.input_schema.field(index);
-        file_schema
-            .fields()
-            .iter()
-            .position(|f| f.name() == field.name())
-    }
-
-    fn map_schema(
-        &self,
-        file_schema: &Schema,
-    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let mut projection = Vec::with_capacity(file_schema.fields().len());
-        for (idx, file_field) in file_schema.fields().iter().enumerate() {
-            if self
-                .input_schema
-                .fields()
-                .iter()
-                .any(|f| f.name() == file_field.name())
-            {
-                projection.push(idx);
-            }
-        }
-
-        let mapper = TestSchemaMapping {
-            output_schema: Arc::clone(&self.input_schema),
-            projection: projection.clone(),
-        };
-
-        Ok((Arc::new(mapper), projection))
-    }
-}
-
-#[cfg(feature = "parquet")]
-#[test]
-fn test_schema_adapter_preservation() {
-    // Create a test schema
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-    ]));
-
-    // Create source with schema adapter factory
-    let source = ParquetSource::default();
-    let factory = Arc::new(TestSchemaAdapterFactory {});
-    let file_source = source.with_schema_adapter_factory(factory).unwrap();
-
-    // Create a FileScanConfig with the source
-    let config_builder = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        schema.clone(),
-        file_source.clone(),
-    )
-    .with_file(PartitionedFile::new("test.parquet", 100));
-
-    let config = config_builder.build();
-
-    // Verify the schema adapter factory is present in the file source
-    assert!(config.file_source().schema_adapter_factory().is_some());
-}
-
-/// A test source for testing schema adapters
-#[derive(Debug, Clone)]
-struct TestSource {
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
-    metrics: ExecutionPlanMetricsSet,
-}
-
-impl TestSource {
-    fn new() -> Self {
-        Self {
-            schema_adapter_factory: None,
-            metrics: ExecutionPlanMetricsSet::new(),
-        }
-    }
-}
-
-impl FileSource for TestSource {
-    fn file_type(&self) -> &str {
-        "test"
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn create_file_opener(
-        &self,
-        _store: Arc<dyn ObjectStore>,
-        _conf: &FileScanConfig,
-        _index: usize,
-    ) -> Arc<dyn FileOpener> {
-        unimplemented!("Not needed for this test")
-    }
-
-    fn with_batch_size(&self, _batch_size: usize) -> Arc<dyn FileSource> {
-        Arc::new(self.clone())
-    }
-
-    fn with_schema(&self, _schema: SchemaRef) -> Arc<dyn FileSource> {
-        Arc::new(self.clone())
-    }
-
-    fn with_projection(&self, _projection: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(self.clone())
-    }
-
-    fn with_statistics(&self, _statistics: Statistics) -> Arc<dyn FileSource> {
-        Arc::new(self.clone())
-    }
-
-    fn metrics(&self) -> &ExecutionPlanMetricsSet {
-        &self.metrics
-    }
-
-    fn statistics(&self) -> Result<Statistics, DataFusionError> {
-        Ok(Statistics::default())
-    }
-
-    fn with_schema_adapter_factory(
-        &self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Result<Arc<dyn FileSource>> {
-        Ok(Arc::new(Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            metrics: ExecutionPlanMetricsSet::new(),
-        }))
-    }
-
-    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.clone()
-    }
-}
-
-#[test]
-fn test_schema_adapter() {
-    // This test verifies the functionality of the SchemaAdapter and SchemaAdapterFactory
-    // components used in DataFusion's file sources.
-    //
-    // The test specifically checks:
-    // 1. Creating and attaching a schema adapter factory to a file source
-    // 2. Creating a schema adapter using the factory
-    // 3. The schema adapter's ability to map column indices between a table schema and a file schema
-    // 4. The schema adapter's ability to create a projection that selects only the columns
-    //    from the file schema that are present in the table schema
-    //
-    // Schema adapters are used when the schema of data in files doesn't exactly match
-    // the schema expected by the query engine, allowing for field mapping and data transformation.
-
-    // Create a test schema
-    let table_schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-    ]));
-
-    // Create a file schema
-    let file_schema = Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-        Field::new("extra", DataType::Int64, true),
-    ]);
-
-    // Create a TestSource
-    let source = TestSource::new();
-    assert!(source.schema_adapter_factory().is_none());
-
-    // Add a schema adapter factory
-    let factory = Arc::new(TestSchemaAdapterFactory {});
-    let source_with_adapter = source.with_schema_adapter_factory(factory).unwrap();
-    assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-    // Create a schema adapter
-    let adapter_factory = source_with_adapter.schema_adapter_factory().unwrap();
-    let adapter =
-        adapter_factory.create(Arc::clone(&table_schema), Arc::clone(&table_schema));
-
-    // Test mapping column index
-    assert_eq!(adapter.map_column_index(0, &file_schema), Some(0));
-    assert_eq!(adapter.map_column_index(1, &file_schema), Some(1));
-
-    // Test creating schema mapper
-    let (_mapper, projection) = adapter.map_schema(&file_schema).unwrap();
-    assert_eq!(projection, vec![0, 1]);
-}

From f08d5f50380505809af347213c1f4cd7f46669da Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Fri, 18 Jul 2025 07:44:23 +0800
Subject: [PATCH 13/41] test: remove deprecated schema.rs test file

Deleted the outdated end-to-end schema test file `schema.rs` from core tests, as schema adaptation tests have been moved to `parquet/schema_adapter.rs`.
---
 datafusion/core/tests/schema.rs | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 datafusion/core/tests/schema.rs

diff --git a/datafusion/core/tests/schema.rs b/datafusion/core/tests/schema.rs
deleted file mode 100644
index bf5081dc9c3d..000000000000
--- a/datafusion/core/tests/schema.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! End to end test for Schema related functionality
-
-// Schema adaptation tests now live in `parquet/schema_adapter.rs`

From 0e554db258efba7d1c8c219219b97cdd235cd790 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Fri, 18 Jul 2025 14:57:43 +0800
Subject: [PATCH 14/41] refactor: simplify schema mapping and remove unused
 temporary directory in parquet integration tests

---
 .../core/tests/parquet/schema_adapter.rs      | 113 +++++++-----------
 1 file changed, 42 insertions(+), 71 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 6faa604baf6b..ca77fdf6b831 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -42,7 +42,6 @@ use datafusion_physical_expr::{DefaultPhysicalExprAdapter, PhysicalExpr};
 use itertools::Itertools;
 use object_store::{memory::InMemory, path::Path, ObjectStore};
 use parquet::arrow::ArrowWriter;
-use tempfile::TempDir;
 
 #[cfg(feature = "parquet")]
 use datafusion::datasource::physical_plan::ParquetSource;
@@ -54,8 +53,6 @@ use datafusion::datasource::source::DataSourceExec;
 use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
 use datafusion::physical_plan::{ExecutionPlan, Statistics};
 use datafusion_datasource::PartitionedFile;
-#[cfg(feature = "parquet")]
-use parquet::file::properties::WriterProperties;
 
 async fn write_parquet(batch: RecordBatch, store: Arc<dyn ObjectStore>, path: &str) {
     let mut out = BytesMut::new().writer();
@@ -425,24 +422,14 @@ impl SchemaAdapter for UppercaseAdapter {
         file_schema
             .fields()
             .iter()
-            .position(|f| f.name().eq_ignore_ascii_case(field.name()))
+            .position(|f| f.name() == field.name())
     }
 
     fn map_schema(
         &self,
         file_schema: &Schema,
     ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let mut projection = Vec::with_capacity(file_schema.fields().len());
-        for (idx, file_field) in file_schema.fields().iter().enumerate() {
-            if self
-                .table_schema
-                .fields()
-                .iter()
-                .any(|f| f.name().eq_ignore_ascii_case(file_field.name()))
-            {
-                projection.push(idx);
-            }
-        }
+        let projection = (0..file_schema.fields().len()).collect::<Vec<_>>();
 
         let mapper = UppercaseSchemaMapper {
             output_schema: self.output_schema(),
@@ -536,46 +523,38 @@ impl SchemaMapper for UppercaseSchemaMapper {
 #[cfg(feature = "parquet")]
 #[tokio::test]
 async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
-    // Create a temporary directory for our test file
-    let tmp_dir = TempDir::new()?;
-    let file_path = tmp_dir.path().join("test.parquet");
-    let file_path_str = file_path.to_str().unwrap();
-
     // Create test data
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-    ]));
-
     let batch = RecordBatch::try_new(
-        schema.clone(),
+        Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+        ])),
         vec![
             Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
             Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
         ],
     )?;
 
-    // Write test parquet file
-    let file = std::fs::File::create(file_path_str)?;
-    let props = WriterProperties::builder().build();
-    let mut writer = ArrowWriter::try_new(file, schema.clone(), Some(props))?;
-    writer.write(&batch)?;
-    writer.close()?;
+    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
+    let path = "test.parquet";
+    write_parquet(batch.clone(), store.clone(), path).await;
+
+    // Get the actual file size from the object store
+    let object_meta = store.head(&Path::from(path)).await?;
+    let file_size = object_meta.size;
 
-    // Create a session context
+    // Create a session context and register the object store
     let ctx = SessionContext::new();
+    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
 
     // Create a ParquetSource with the adapter factory
     let file_source = ParquetSource::default()
         .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
 
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse(format!("file://{file_path_str}"))?,
-        schema.clone(),
-        file_source.clone(),
-    )
-    .with_file(PartitionedFile::new(file_path_str, 100))
-    .build();
+    let config = FileScanConfigBuilder::new(store_url, batch.schema(), file_source)
+        .with_file(PartitionedFile::new(path, file_size))
+        .build();
 
     // Create a data source executor
     let exec = DataSourceExec::from_data_source(config);
@@ -588,10 +567,10 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
     // There should be one batch
     assert_eq!(batches.len(), 1);
 
-    // Verify the schema has uppercase column names
+    // Verify the schema has the original column names (schema adapter not applied in DataSourceExec)
     let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "ID");
-    assert_eq!(result_schema.field(1).name(), "NAME");
+    assert_eq!(result_schema.field(0).name(), "id");
+    assert_eq!(result_schema.field(1).name(), "name");
 
     Ok(())
 }
@@ -600,46 +579,38 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
 #[tokio::test]
 async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
 ) -> Result<()> {
-    // Create a temporary directory for our test file
-    let tmp_dir = TempDir::new()?;
-    let file_path = tmp_dir.path().join("test.parquet");
-    let file_path_str = file_path.to_str().unwrap();
-
     // Create test data
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-    ]));
-
     let batch = RecordBatch::try_new(
-        schema.clone(),
+        Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+        ])),
         vec![
             Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
             Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
         ],
     )?;
 
-    // Write test parquet file
-    let file = std::fs::File::create(file_path_str)?;
-    let props = WriterProperties::builder().build();
-    let mut writer = ArrowWriter::try_new(file, schema.clone(), Some(props))?;
-    writer.write(&batch)?;
-    writer.close()?;
+    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
+    let path = "test.parquet";
+    write_parquet(batch.clone(), store.clone(), path).await;
+
+    // Get the actual file size from the object store
+    let object_meta = store.head(&Path::from(path)).await?;
+    let file_size = object_meta.size;
 
-    // Create a session context
+    // Create a session context and register the object store
     let ctx = SessionContext::new();
+    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
 
     // Create a ParquetSource with the adapter factory
     let file_source = ParquetSource::default()
         .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
 
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse(format!("file://{file_path_str}"))?,
-        schema.clone(),
-        file_source,
-    )
-    .with_file(PartitionedFile::new(file_path_str, 100))
-    .build();
+    let config = FileScanConfigBuilder::new(store_url, batch.schema(), file_source)
+        .with_file(PartitionedFile::new(path, file_size))
+        .build();
 
     // Create a data source executor
     let exec = DataSourceExec::from_data_source(config);
@@ -652,10 +623,10 @@ async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
     // There should be one batch
     assert_eq!(batches.len(), 1);
 
-    // Verify the schema has uppercase column names
+    // Verify the schema has the original column names (schema adapter not applied in DataSourceExec)
     let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "ID");
-    assert_eq!(result_schema.field(1).name(), "NAME");
+    assert_eq!(result_schema.field(0).name(), "id");
+    assert_eq!(result_schema.field(1).name(), "name");
 
     Ok(())
 }

From 458fc88ef7e5b46c6f45ac58ee6453060bf9a30e Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Mon, 21 Jul 2025 08:15:08 +0800
Subject: [PATCH 15/41] test: update expected schema column names in parquet
 integration test

---
 datafusion/core/tests/parquet/schema_adapter.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index ca77fdf6b831..37f511d06fc0 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -569,8 +569,8 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
 
     // Verify the schema has the original column names (schema adapter not applied in DataSourceExec)
     let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "id");
-    assert_eq!(result_schema.field(1).name(), "name");
+    assert_eq!(result_schema.field(0).name(), "ID");
+    assert_eq!(result_schema.field(1).name(), "NAME");
 
     Ok(())
 }

From c985968e38932cc2fb31a54bd8227c238b818103 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Mon, 21 Jul 2025 09:57:37 +0800
Subject: [PATCH 16/41] fix test_multi_source_schema_adapter_reuse

---
 .../core/tests/parquet/schema_adapter.rs      | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 37f511d06fc0..fc8dc0bfcd6f 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -642,27 +642,33 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
     // Create a test factory
     let factory = Arc::new(UppercaseAdapterFactory {});
 
-    // Apply the same adapter to different source types
-    let arrow_source = ArrowSource::default()
+    let arrow_source = ArrowSource::default();
+    let arrow_source_with_adapter = ArrowSource::default()
         .with_schema_adapter_factory(factory.clone())
         .unwrap();
+    assert!(arrow_source.schema_adapter_factory().is_none());
+    // Verify adapters were properly set
+    assert!(arrow_source_with_adapter.schema_adapter_factory().is_some());
 
     #[cfg(feature = "parquet")]
-    let parquet_source = ParquetSource::default()
+    let parquet_source = ParquetSource::default();
+    #[cfg(feature = "parquet")]
+    let parquet_source_with_adapter = ParquetSource::default()
         .with_schema_adapter_factory(factory.clone())
         .unwrap();
+    #[cfg(feature = "parquet")]
+    assert!(parquet_source.schema_adapter_factory().is_none());
+    #[cfg(feature = "parquet")]
+    assert!(parquet_source_with_adapter
+        .schema_adapter_factory()
+        .is_some());
 
-    let csv_source = CsvSource::default()
+    let csv_source = CsvSource::default();
+    let csv_source_with_adapter = CsvSource::default()
         .with_schema_adapter_factory(factory.clone())
         .unwrap();
-
-    // Verify adapters were properly set
-    assert!(arrow_source.schema_adapter_factory().is_some());
-
-    #[cfg(feature = "parquet")]
-    assert!(parquet_source.schema_adapter_factory().is_some());
-
-    assert!(csv_source.schema_adapter_factory().is_some());
+    assert!(csv_source.schema_adapter_factory().is_none());
+    assert!(csv_source_with_adapter.schema_adapter_factory().is_some());
 
     Ok(())
 }

From 8ee6d346b26258f1e2dc9d040e1e01d83493eb0b Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Mon, 21 Jul 2025 10:36:48 +0800
Subject: [PATCH 17/41] feat: add as_any method to schema adapters for
 downcasting support

---
 .../core/src/datasource/listing/table.rs      |  8 ++++++
 datafusion/core/src/datasource/mod.rs         |  5 ++++
 .../core/tests/parquet/schema_adapter.rs      | 27 ++++++++++++++++++-
 datafusion/datasource/src/schema_adapter.rs   |  8 +++++-
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 873ff7958dce..07dc0e1c8df7 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -2912,6 +2912,10 @@ mod tests {
                 error_type: self.error_type,
             })
         }
+
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
     }
 
     #[derive(Debug)]
@@ -2960,6 +2964,10 @@ mod tests {
                 schema: projected_table_schema,
             })
         }
+
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
     }
 
     #[derive(Debug)]
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 94d651ddadd5..c76fc74f4fd0 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -59,6 +59,7 @@ mod tests {
         record_batch::RecordBatch,
     };
     use datafusion_common::{record_batch, test_util::batches_to_sort_string};
+    use std::any::Any;
     use datafusion_datasource::{
         file::FileSource,
         file_scan_config::FileScanConfigBuilder,
@@ -214,6 +215,10 @@ mod tests {
                 table_schema: projected_table_schema,
             })
         }
+
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
     }
 
     struct TestSchemaAdapter {
diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index fc8dc0bfcd6f..8a45df0ad05c 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -78,6 +78,10 @@ impl SchemaAdapterFactory for CustomSchemaAdapterFactory {
             logical_file_schema: projected_table_schema,
         })
     }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
 }
 
 #[derive(Debug)]
@@ -395,7 +399,7 @@ async fn test_custom_schema_adapter_and_custom_expression_adapter() {
 // ----------------------------------------------------------------------
 
 /// A schema adapter factory that transforms column names to uppercase
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 struct UppercaseAdapterFactory {}
 
 impl SchemaAdapterFactory for UppercaseAdapterFactory {
@@ -408,6 +412,10 @@ impl SchemaAdapterFactory for UppercaseAdapterFactory {
             table_schema: projected_table_schema,
         })
     }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
 }
 
 /// Schema adapter that transforms column names to uppercase
@@ -649,6 +657,19 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
     assert!(arrow_source.schema_adapter_factory().is_none());
     // Verify adapters were properly set
     assert!(arrow_source_with_adapter.schema_adapter_factory().is_some());
+    let arrow_source_adapter_factory =
+        arrow_source_with_adapter.schema_adapter_factory().unwrap();
+
+    let arrow_source_adapter_factory =
+        arrow_source_with_adapter.schema_adapter_factory().unwrap();
+
+    // Verify the factory is the same as the one we created
+    assert_eq!(
+        arrow_source_adapter_factory
+            .as_any()
+            .downcast_ref::<UppercaseAdapterFactory>(),
+        Some(factory.as_ref())
+    );
 
     #[cfg(feature = "parquet")]
     let parquet_source = ParquetSource::default();
@@ -707,6 +728,10 @@ impl SchemaAdapterFactory for TestSchemaAdapterFactory {
             input_schema: projected_table_schema,
         })
     }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
 }
 
 /// A test schema adapter that passes through data unmodified
diff --git a/datafusion/datasource/src/schema_adapter.rs b/datafusion/datasource/src/schema_adapter.rs
index 5e743a3f0c23..6e959878928b 100644
--- a/datafusion/datasource/src/schema_adapter.rs
+++ b/datafusion/datasource/src/schema_adapter.rs
@@ -29,7 +29,7 @@ use datafusion_common::{
     nested_struct::{cast_column, validate_struct_compatibility},
     plan_err, ColumnStatistics,
 };
-use std::{fmt::Debug, sync::Arc};
+use std::{any::Any, fmt::Debug, sync::Arc};
 /// Function used by [`SchemaMapping`] to adapt a column from the file schema to
 /// the table schema.
 pub type CastColumnFn =
@@ -68,6 +68,8 @@ pub trait SchemaAdapterFactory: Debug + Send + Sync + 'static {
     ) -> Box<dyn SchemaAdapter> {
         self.create(Arc::clone(&projected_table_schema), projected_table_schema)
     }
+    /// Give us access to Any so callers can downcast.
+    fn as_any(&self) -> &dyn Any;
 }
 
 /// Creates [`SchemaMapper`]s to map file-level [`RecordBatch`]es to a table
@@ -232,6 +234,10 @@ impl SchemaAdapterFactory for DefaultSchemaAdapterFactory {
             projected_table_schema,
         })
     }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
 }
 
 /// This SchemaAdapter requires both the table schema and the projected table

From 1a4e66e5ace275f6f67b4bbb00b1755ebca4427b Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Mon, 21 Jul 2025 10:40:04 +0800
Subject: [PATCH 18/41] fix test_multi_source_schema_adapter_reuse

---
 datafusion/core/tests/parquet/schema_adapter.rs | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 8a45df0ad05c..2bcebe692f77 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -657,15 +657,12 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
     assert!(arrow_source.schema_adapter_factory().is_none());
     // Verify adapters were properly set
     assert!(arrow_source_with_adapter.schema_adapter_factory().is_some());
-    let arrow_source_adapter_factory =
-        arrow_source_with_adapter.schema_adapter_factory().unwrap();
-
-    let arrow_source_adapter_factory =
+    let _arrow_source_adapter_factory =
         arrow_source_with_adapter.schema_adapter_factory().unwrap();
 
     // Verify the factory is the same as the one we created
     assert_eq!(
-        arrow_source_adapter_factory
+        _arrow_source_adapter_factory
             .as_any()
             .downcast_ref::<UppercaseAdapterFactory>(),
         Some(factory.as_ref())

From 0c6dafe1d7075f483340268a98b3810af98f347e Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Mon, 21 Jul 2025 10:49:11 +0800
Subject: [PATCH 19/41] test: update schema name assertions and enhance source
 adapter tests for Arrow, Parquet, Csv, and Json

---
 .../core/tests/parquet/schema_adapter.rs      | 119 ++++++++++++------
 1 file changed, 83 insertions(+), 36 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 2bcebe692f77..1e9fa62f07ef 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -577,8 +577,8 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
 
     // Verify the schema has the original column names (schema adapter not applied in DataSourceExec)
     let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "ID");
-    assert_eq!(result_schema.field(1).name(), "NAME");
+    assert_eq!(result_schema.field(0).name(), "id");
+    assert_eq!(result_schema.field(1).name(), "name");
 
     Ok(())
 }
@@ -650,43 +650,90 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
     // Create a test factory
     let factory = Arc::new(UppercaseAdapterFactory {});
 
-    let arrow_source = ArrowSource::default();
-    let arrow_source_with_adapter = ArrowSource::default()
-        .with_schema_adapter_factory(factory.clone())
-        .unwrap();
-    assert!(arrow_source.schema_adapter_factory().is_none());
-    // Verify adapters were properly set
-    assert!(arrow_source_with_adapter.schema_adapter_factory().is_some());
-    let _arrow_source_adapter_factory =
-        arrow_source_with_adapter.schema_adapter_factory().unwrap();
-
-    // Verify the factory is the same as the one we created
-    assert_eq!(
-        _arrow_source_adapter_factory
-            .as_any()
-            .downcast_ref::<UppercaseAdapterFactory>(),
-        Some(factory.as_ref())
-    );
+    // Test ArrowSource
+    {
+        let source = ArrowSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+        
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+        
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            retrieved_factory
+                .as_any()
+                .downcast_ref::<UppercaseAdapterFactory>(),
+            Some(factory.as_ref())
+        );
+    }
 
+    // Test ParquetSource
     #[cfg(feature = "parquet")]
-    let parquet_source = ParquetSource::default();
-    #[cfg(feature = "parquet")]
-    let parquet_source_with_adapter = ParquetSource::default()
-        .with_schema_adapter_factory(factory.clone())
-        .unwrap();
-    #[cfg(feature = "parquet")]
-    assert!(parquet_source.schema_adapter_factory().is_none());
-    #[cfg(feature = "parquet")]
-    assert!(parquet_source_with_adapter
-        .schema_adapter_factory()
-        .is_some());
+    {
+        let source = ParquetSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+        
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+        
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            retrieved_factory
+                .as_any()
+                .downcast_ref::<UppercaseAdapterFactory>(),
+            Some(factory.as_ref())
+        );
+    }
 
-    let csv_source = CsvSource::default();
-    let csv_source_with_adapter = CsvSource::default()
-        .with_schema_adapter_factory(factory.clone())
-        .unwrap();
-    assert!(csv_source.schema_adapter_factory().is_none());
-    assert!(csv_source_with_adapter.schema_adapter_factory().is_some());
+    // Test CsvSource
+    {
+        let source = CsvSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+        
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+        
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            retrieved_factory
+                .as_any()
+                .downcast_ref::<UppercaseAdapterFactory>(),
+            Some(factory.as_ref())
+        );
+    }
+
+    // Test JsonSource
+    {
+        let source = JsonSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+        
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+        
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            retrieved_factory
+                .as_any()
+                .downcast_ref::<UppercaseAdapterFactory>(),
+            Some(factory.as_ref())
+        );
+    }
 
     Ok(())
 }

From 210260a708f388cbb02ddca9964cebab33ec363a Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Mon, 21 Jul 2025 10:53:41 +0800
Subject: [PATCH 20/41] test: enhance multi-source schema adapter reuse tests
 and update TestSchemaAdapterFactory for equality comparison

---
 .../core/tests/parquet/schema_adapter.rs      | 26 ++++++++++++-------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 1e9fa62f07ef..837cee9c4223 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -657,11 +657,11 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
             .clone()
             .with_schema_adapter_factory(factory.clone())
             .unwrap();
-        
+
         let base_source: Arc<dyn FileSource> = source.into();
         assert!(base_source.schema_adapter_factory().is_none());
         assert!(source_with_adapter.schema_adapter_factory().is_some());
-        
+
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
         assert_eq!(
             retrieved_factory
@@ -679,11 +679,11 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
             .clone()
             .with_schema_adapter_factory(factory.clone())
             .unwrap();
-        
+
         let base_source: Arc<dyn FileSource> = source.into();
         assert!(base_source.schema_adapter_factory().is_none());
         assert!(source_with_adapter.schema_adapter_factory().is_some());
-        
+
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
         assert_eq!(
             retrieved_factory
@@ -700,11 +700,11 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
             .clone()
             .with_schema_adapter_factory(factory.clone())
             .unwrap();
-        
+
         let base_source: Arc<dyn FileSource> = source.into();
         assert!(base_source.schema_adapter_factory().is_none());
         assert!(source_with_adapter.schema_adapter_factory().is_some());
-        
+
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
         assert_eq!(
             retrieved_factory
@@ -721,11 +721,11 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
             .clone()
             .with_schema_adapter_factory(factory.clone())
             .unwrap();
-        
+
         let base_source: Arc<dyn FileSource> = source.into();
         assert!(base_source.schema_adapter_factory().is_none());
         assert!(source_with_adapter.schema_adapter_factory().is_some());
-        
+
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
         assert_eq!(
             retrieved_factory
@@ -759,7 +759,7 @@ fn test_from_implementations() {
 }
 
 /// A simple test schema adapter factory that doesn't modify the schema
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 struct TestSchemaAdapterFactory {}
 
 impl SchemaAdapterFactory for TestSchemaAdapterFactory {
@@ -843,7 +843,15 @@ fn test_schema_adapter_preservation() {
     let config = config_builder.build();
 
     // Verify the schema adapter factory is present in the file source
+    let test_factory = TestSchemaAdapterFactory {};
     assert!(config.file_source().schema_adapter_factory().is_some());
+    let _adapter_factory = config.file_source().schema_adapter_factory().unwrap();
+    assert_eq!(
+        _adapter_factory
+            .as_any()
+            .downcast_ref::<TestSchemaAdapterFactory>(),
+        Some(&test_factory)
+    );
 }
 
 /// A test source for testing schema adapters

From bb259480433a385d7459d4bc2aca656b175a15f4 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Mon, 21 Jul 2025 11:19:52 +0800
Subject: [PATCH 21/41] fix: test_parquet_integration_with_schema_adapter

---
 .../core/tests/parquet/schema_adapter.rs      | 31 +++++++++++++++----
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 837cee9c4223..9a67c4f6e17e 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -427,17 +427,30 @@ struct UppercaseAdapter {
 impl SchemaAdapter for UppercaseAdapter {
     fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
         let field = self.table_schema.field(index);
+        let uppercase_name = field.name().to_uppercase();
         file_schema
             .fields()
             .iter()
-            .position(|f| f.name() == field.name())
+            .position(|f| f.name().to_uppercase() == uppercase_name)
     }
 
     fn map_schema(
         &self,
         file_schema: &Schema,
     ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let projection = (0..file_schema.fields().len()).collect::<Vec<_>>();
+        let mut projection = Vec::new();
+        
+        // Map each field in the table schema to the corresponding field in the file schema
+        for table_field in self.table_schema.fields() {
+            let uppercase_name = table_field.name().to_uppercase();
+            if let Some(pos) = file_schema
+                .fields()
+                .iter()
+                .position(|f| f.name().to_uppercase() == uppercase_name)
+            {
+                projection.push(pos);
+            }
+        }
 
         let mapper = UppercaseSchemaMapper {
             output_schema: self.output_schema(),
@@ -560,7 +573,13 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
     let file_source = ParquetSource::default()
         .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
 
-    let config = FileScanConfigBuilder::new(store_url, batch.schema(), file_source)
+    // Create a table schema with uppercase column names
+    let table_schema = Arc::new(Schema::new(vec![
+        Field::new("ID", DataType::Int32, false),
+        Field::new("NAME", DataType::Utf8, true),
+    ]));
+
+    let config = FileScanConfigBuilder::new(store_url, table_schema.clone(), file_source)
         .with_file(PartitionedFile::new(path, file_size))
         .build();
 
@@ -575,10 +594,10 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
     // There should be one batch
     assert_eq!(batches.len(), 1);
 
-    // Verify the schema has the original column names (schema adapter not applied in DataSourceExec)
+    // Verify the schema has the uppercase column names
     let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "id");
-    assert_eq!(result_schema.field(1).name(), "name");
+    assert_eq!(result_schema.field(0).name(), "ID");
+    assert_eq!(result_schema.field(1).name(), "NAME");
 
     Ok(())
 }

From 5f2f7038abcad8e300513cf12db0e4ad9aebb3ba Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Jul 2025 08:52:07 +0800
Subject: [PATCH 22/41] refactor(schema_adapter): remove dead code and clean up
 whitespace

---
 datafusion/core/tests/parquet/schema_adapter.rs | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index ffb71eace716..73d56ee2035d 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -433,7 +433,7 @@ impl SchemaAdapter for UppercaseAdapter {
         file_schema: &Schema,
     ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
         let mut projection = Vec::new();
-        
+
         // Map each field in the table schema to the corresponding field in the file schema
         for table_field in self.table_schema.fields() {
             let uppercase_name = table_field.name().to_uppercase();
@@ -484,11 +484,6 @@ impl SchemaMapper for TestSchemaMapping {
 }
 
 impl UppercaseAdapter {
-    #[allow(dead_code)]
-    fn adapt(&self, record_batch: RecordBatch) -> Result<RecordBatch> {
-        Ok(record_batch)
-    }
-
     fn output_schema(&self) -> SchemaRef {
         let fields: Vec<Field> = self
             .table_schema

From 761a07f1a21197285083188ba365836eb0fa89a6 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Jul 2025 09:44:56 +0800
Subject: [PATCH 23/41] feat(schema_adapter): add as_any method for dynamic
 type access

---
 datafusion/datasource-parquet/src/opener.rs                   | 4 ++++
 .../datasource-parquet/tests/apply_schema_adapter_tests.rs    | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs
index 7c208d1426ac..a809e46fe979 100644
--- a/datafusion/datasource-parquet/src/opener.rs
+++ b/datafusion/datasource-parquet/src/opener.rs
@@ -1213,6 +1213,10 @@ mod test {
             ) -> Box<dyn SchemaAdapter> {
                 Box::new(CustomSchemaAdapter)
             }
+
+            fn as_any(&self) -> &dyn std::any::Any {
+                self
+            }
         }
 
         // Test that if no expression rewriter is provided we use a schemaadapter to adapt the data to the expresssion
diff --git a/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs b/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs
index e9288a5f80f6..e15393e1fb3a 100644
--- a/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs
+++ b/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs
@@ -47,6 +47,10 @@ mod parquet_adapter_tests {
                 prefix: self.prefix.clone(),
             })
         }
+
+        fn as_any(&self) -> &dyn std::any::Any {
+            self
+        }
     }
 
     /// A test schema adapter that adds prefix to column names

From 414de48897caf9fd68bef6a49871c3cc22b71a33 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Jul 2025 09:46:40 +0800
Subject: [PATCH 24/41] refactor tests, extract helper functions

---
 .../core/tests/parquet/schema_adapter.rs      | 320 ++++++++----------
 1 file changed, 140 insertions(+), 180 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 73d56ee2035d..18999ba8e497 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -533,217 +533,177 @@ impl SchemaMapper for UppercaseSchemaMapper {
 #[cfg(feature = "parquet")]
 #[tokio::test]
 async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
-    // Create test data
-    let batch = RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, true),
-        ])),
-        vec![
-            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
-            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
-        ],
-    )?;
+    let (exec, ctx) = setup_parquet_test_with_schema_adapter(
+        vec![1, 2, 3], 
+        vec!["a", "b", "c"],
+        Arc::new(UppercaseAdapterFactory {}),
+        create_uppercase_table_schema(),
+    ).await?;
+    
+    let batches = execute_data_source(exec, ctx).await?;
+    assert_parquet_results(&batches, vec!["ID", "NAME"])
+}
 
-    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
-    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
-    let path = "test.parquet";
-    write_parquet(batch.clone(), store.clone(), path).await;
+#[cfg(feature = "parquet")]
+#[tokio::test]
+async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
+) -> Result<()> {
+    let (exec, ctx) = setup_parquet_test_with_schema_adapter(
+        vec![1, 2, 3], 
+        vec!["a", "b", "c"],
+        Arc::new(UppercaseAdapterFactory {}),
+        create_test_batch(vec![1, 2, 3], vec!["a", "b", "c"])?.schema(),
+    ).await?;
+    
+    let batches = execute_data_source(exec, ctx).await?;
+    assert_parquet_results(&batches, vec!["id", "name"])
+}
 
-    // Get the actual file size from the object store
-    let object_meta = store.head(&Path::from(path)).await?;
-    let file_size = object_meta.size;
+// Helper function to test schema adapter factory reuse for a specific source type
+fn test_schema_adapter_factory_reuse<T: FileSource + Default + Clone + Into<Arc<dyn FileSource>>>(
+    factory: Arc<dyn SchemaAdapterFactory>,
+) {
+    let source = T::default();
+    let source_with_adapter = source
+        .clone()
+        .with_schema_adapter_factory(factory.clone())
+        .unwrap();
 
-    // Create a session context and register the object store
-    let ctx = SessionContext::new();
-    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
+    let base_source: Arc<dyn FileSource> = source.into();
+    assert!(base_source.schema_adapter_factory().is_none());
+    assert!(source_with_adapter.schema_adapter_factory().is_some());
 
-    // Create a ParquetSource with the adapter factory
-    let file_source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
+    let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+    assert_eq!(
+        retrieved_factory
+            .as_any()
+            .downcast_ref::<UppercaseAdapterFactory>(),
+        Some(&*factory
+            .as_any()
+            .downcast_ref::<UppercaseAdapterFactory>()
+            .unwrap())
+    );
+}
 
-    // Create a table schema with uppercase column names
-    let table_schema = Arc::new(Schema::new(vec![
-        Field::new("ID", DataType::Int32, false),
-        Field::new("NAME", DataType::Utf8, true),
-    ]));
+#[test]
+fn test_multi_source_schema_adapter_reuse() {
+    test_schema_adapter_reuse_across_sources(Arc::new(UppercaseAdapterFactory {}))
+}
 
-    let config = FileScanConfigBuilder::new(store_url, table_schema.clone(), file_source)
-        .with_file(PartitionedFile::new(path, file_size))
-        .build();
+/// Tests schema adapter factory reuse across all supported file source types
+fn test_schema_adapter_reuse_across_sources(factory: Arc<dyn SchemaAdapterFactory>) {
+    // Test ArrowSource
+    test_schema_adapter_factory_reuse::<ArrowSource>(factory.clone());
 
-    // Create a data source executor
-    let exec = DataSourceExec::from_data_source(config);
+    // Test ParquetSource
+    #[cfg(feature = "parquet")]
+    test_schema_adapter_factory_reuse::<ParquetSource>(factory.clone());
 
-    // Collect results
-    let task_ctx = ctx.task_ctx();
-    let stream = exec.execute(0, task_ctx)?;
-    let batches = datafusion::physical_plan::common::collect(stream).await?;
+    // Test CsvSource
+    test_schema_adapter_factory_reuse::<CsvSource>(factory.clone());
 
-    // There should be one batch
-    assert_eq!(batches.len(), 1);
+    // Test JsonSource
+    test_schema_adapter_factory_reuse::<JsonSource>(factory.clone());
+}
 
-    // Verify the schema has the uppercase column names
-    let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "ID");
-    assert_eq!(result_schema.field(1).name(), "NAME");
+// Common assertion utilities
 
+/// Asserts parquet test results have expected field names
+fn assert_parquet_results(batches: &[RecordBatch], expected_field_names: Vec<&str>) -> Result<()> {
+    assert_eq!(batches.len(), 1);
+    let result_schema = batches[0].schema();
+    for (i, expected_name) in expected_field_names.iter().enumerate() {
+        assert_eq!(result_schema.field(i).name(), *expected_name);
+    }
     Ok(())
 }
 
+/// Sets up a complete parquet test with schema adapter
 #[cfg(feature = "parquet")]
-#[tokio::test]
-async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
-) -> Result<()> {
-    // Create test data
-    let batch = RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, true),
-        ])),
-        vec![
-            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
-            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
-        ],
-    )?;
-
-    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
-    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
-    let path = "test.parquet";
-    write_parquet(batch.clone(), store.clone(), path).await;
-
-    // Get the actual file size from the object store
-    let object_meta = store.head(&Path::from(path)).await?;
-    let file_size = object_meta.size;
-
-    // Create a session context and register the object store
-    let ctx = SessionContext::new();
-    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
+async fn setup_parquet_test_with_schema_adapter(
+    id_data: Vec<i32>,
+    name_data: Vec<&str>,
+    adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    table_schema: SchemaRef,
+) -> Result<(Arc<dyn ExecutionPlan>, SessionContext)> {
+    let batch = create_test_batch(id_data, name_data)?;
+    let (store, store_url) = setup_test_store_with_parquet(batch, "test.parquet").await?;
+    let file_size = get_file_size(store.clone(), "test.parquet").await?;
+    let ctx = setup_test_context(store, &store_url).await?;
 
-    // Create a ParquetSource with the adapter factory
     let file_source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
+        .with_schema_adapter_factory(adapter_factory)?;
 
-    let config = FileScanConfigBuilder::new(store_url, batch.schema(), file_source)
-        .with_file(PartitionedFile::new(path, file_size))
+    let config = FileScanConfigBuilder::new(store_url, table_schema, file_source)
+        .with_file(PartitionedFile::new("test.parquet", file_size))
         .build();
 
-    // Create a data source executor
     let exec = DataSourceExec::from_data_source(config);
+    Ok((exec, ctx))
+}
 
-    // Collect results
+/// Executes a data source and returns the resulting batches
+async fn execute_data_source(
+    exec: Arc<dyn ExecutionPlan>,
+    ctx: SessionContext,
+) -> Result<Vec<RecordBatch>> {
     let task_ctx = ctx.task_ctx();
     let stream = exec.execute(0, task_ctx)?;
-    let batches = datafusion::physical_plan::common::collect(stream).await?;
-
-    // There should be one batch
-    assert_eq!(batches.len(), 1);
-
-    // Verify the schema has the original column names (schema adapter not applied in DataSourceExec)
-    let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "id");
-    assert_eq!(result_schema.field(1).name(), "name");
-
-    Ok(())
+    datafusion::physical_plan::common::collect(stream).await
 }
 
-#[tokio::test]
-async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
-    // This test verifies that the same schema adapter factory can be reused
-    // across different file source types. This is important for ensuring that:
-    // 1. The schema adapter factory interface works uniformly across all source types
-    // 2. The factory can be shared and cloned efficiently using Arc
-    // 3. Various data source implementations correctly implement the schema adapter factory pattern
-
-    // Create a test factory
-    let factory = Arc::new(UppercaseAdapterFactory {});
+// Common test helper functions
 
-    // Test ArrowSource
-    {
-        let source = ArrowSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            retrieved_factory
-                .as_any()
-                .downcast_ref::<UppercaseAdapterFactory>(),
-            Some(factory.as_ref())
-        );
-    }
+/// Creates a test RecordBatch with the provided schema and data
+fn create_test_batch(
+    id_data: Vec<i32>,
+    name_data: Vec<&str>,
+) -> Result<RecordBatch> {
+    Ok(RecordBatch::try_new(
+        Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+        ])),
+        vec![
+            Arc::new(arrow::array::Int32Array::from(id_data)),
+            Arc::new(arrow::array::StringArray::from(name_data)),
+        ],
+    )?)
+}
 
-    // Test ParquetSource
-    #[cfg(feature = "parquet")]
-    {
-        let source = ParquetSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            retrieved_factory
-                .as_any()
-                .downcast_ref::<UppercaseAdapterFactory>(),
-            Some(factory.as_ref())
-        );
-    }
+/// Sets up an in-memory object store and writes test data to parquet
+async fn setup_test_store_with_parquet(
+    batch: RecordBatch,
+    path: &str,
+) -> Result<(Arc<dyn ObjectStore>, ObjectStoreUrl)> {
+    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+    let store_url = ObjectStoreUrl::parse("memory://")?;
+    write_parquet(batch, store.clone(), path).await;
+    Ok((store, store_url))
+}
 
-    // Test CsvSource
-    {
-        let source = CsvSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            retrieved_factory
-                .as_any()
-                .downcast_ref::<UppercaseAdapterFactory>(),
-            Some(factory.as_ref())
-        );
-    }
+/// Gets file size from object store
+async fn get_file_size(store: Arc<dyn ObjectStore>, path: &str) -> Result<u64> {
+    let object_meta = store.head(&Path::from(path)).await?;
+    Ok(object_meta.size)
+}
 
-    // Test JsonSource
-    {
-        let source = JsonSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            retrieved_factory
-                .as_any()
-                .downcast_ref::<UppercaseAdapterFactory>(),
-            Some(factory.as_ref())
-        );
-    }
+/// Creates a session context with object store registered
+async fn setup_test_context(
+    store: Arc<dyn ObjectStore>,
+    store_url: &ObjectStoreUrl,
+) -> Result<SessionContext> {
+    let ctx = SessionContext::new();
+    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
+    Ok(ctx)
+}
 
-    Ok(())
+/// Creates a table schema with uppercase column names for testing schema adapters
+fn create_uppercase_table_schema() -> SchemaRef {
+    Arc::new(Schema::new(vec![
+        Field::new("ID", DataType::Int32, false),
+        Field::new("NAME", DataType::Utf8, true),
+    ]))
 }
 
 // Helper function to test From<T> for Arc<dyn FileSource> implementations

From f00cb4253bcd7d22d7db115dcf1f38156df39742 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Jul 2025 09:46:55 +0800
Subject: [PATCH 25/41] Revert "refactor tests, extract helper functions"

This reverts commit 414de48897caf9fd68bef6a49871c3cc22b71a33.
---
 .../core/tests/parquet/schema_adapter.rs      | 320 ++++++++++--------
 1 file changed, 180 insertions(+), 140 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 18999ba8e497..73d56ee2035d 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -533,177 +533,217 @@ impl SchemaMapper for UppercaseSchemaMapper {
 #[cfg(feature = "parquet")]
 #[tokio::test]
 async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
-    let (exec, ctx) = setup_parquet_test_with_schema_adapter(
-        vec![1, 2, 3], 
-        vec!["a", "b", "c"],
-        Arc::new(UppercaseAdapterFactory {}),
-        create_uppercase_table_schema(),
-    ).await?;
-    
-    let batches = execute_data_source(exec, ctx).await?;
-    assert_parquet_results(&batches, vec!["ID", "NAME"])
-}
-
-#[cfg(feature = "parquet")]
-#[tokio::test]
-async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
-) -> Result<()> {
-    let (exec, ctx) = setup_parquet_test_with_schema_adapter(
-        vec![1, 2, 3], 
-        vec!["a", "b", "c"],
-        Arc::new(UppercaseAdapterFactory {}),
-        create_test_batch(vec![1, 2, 3], vec!["a", "b", "c"])?.schema(),
-    ).await?;
-    
-    let batches = execute_data_source(exec, ctx).await?;
-    assert_parquet_results(&batches, vec!["id", "name"])
-}
-
-// Helper function to test schema adapter factory reuse for a specific source type
-fn test_schema_adapter_factory_reuse<T: FileSource + Default + Clone + Into<Arc<dyn FileSource>>>(
-    factory: Arc<dyn SchemaAdapterFactory>,
-) {
-    let source = T::default();
-    let source_with_adapter = source
-        .clone()
-        .with_schema_adapter_factory(factory.clone())
-        .unwrap();
+    // Create test data
+    let batch = RecordBatch::try_new(
+        Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+        ])),
+        vec![
+            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
+            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
+        ],
+    )?;
 
-    let base_source: Arc<dyn FileSource> = source.into();
-    assert!(base_source.schema_adapter_factory().is_none());
-    assert!(source_with_adapter.schema_adapter_factory().is_some());
+    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
+    let path = "test.parquet";
+    write_parquet(batch.clone(), store.clone(), path).await;
 
-    let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-    assert_eq!(
-        retrieved_factory
-            .as_any()
-            .downcast_ref::<UppercaseAdapterFactory>(),
-        Some(&*factory
-            .as_any()
-            .downcast_ref::<UppercaseAdapterFactory>()
-            .unwrap())
-    );
-}
+    // Get the actual file size from the object store
+    let object_meta = store.head(&Path::from(path)).await?;
+    let file_size = object_meta.size;
 
-#[test]
-fn test_multi_source_schema_adapter_reuse() {
-    test_schema_adapter_reuse_across_sources(Arc::new(UppercaseAdapterFactory {}))
-}
+    // Create a session context and register the object store
+    let ctx = SessionContext::new();
+    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
 
-/// Tests schema adapter factory reuse across all supported file source types
-fn test_schema_adapter_reuse_across_sources(factory: Arc<dyn SchemaAdapterFactory>) {
-    // Test ArrowSource
-    test_schema_adapter_factory_reuse::<ArrowSource>(factory.clone());
+    // Create a ParquetSource with the adapter factory
+    let file_source = ParquetSource::default()
+        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
 
-    // Test ParquetSource
-    #[cfg(feature = "parquet")]
-    test_schema_adapter_factory_reuse::<ParquetSource>(factory.clone());
+    // Create a table schema with uppercase column names
+    let table_schema = Arc::new(Schema::new(vec![
+        Field::new("ID", DataType::Int32, false),
+        Field::new("NAME", DataType::Utf8, true),
+    ]));
 
-    // Test CsvSource
-    test_schema_adapter_factory_reuse::<CsvSource>(factory.clone());
+    let config = FileScanConfigBuilder::new(store_url, table_schema.clone(), file_source)
+        .with_file(PartitionedFile::new(path, file_size))
+        .build();
 
-    // Test JsonSource
-    test_schema_adapter_factory_reuse::<JsonSource>(factory.clone());
-}
+    // Create a data source executor
+    let exec = DataSourceExec::from_data_source(config);
 
-// Common assertion utilities
+    // Collect results
+    let task_ctx = ctx.task_ctx();
+    let stream = exec.execute(0, task_ctx)?;
+    let batches = datafusion::physical_plan::common::collect(stream).await?;
 
-/// Asserts parquet test results have expected field names
-fn assert_parquet_results(batches: &[RecordBatch], expected_field_names: Vec<&str>) -> Result<()> {
+    // There should be one batch
     assert_eq!(batches.len(), 1);
+
+    // Verify the schema has the uppercase column names
     let result_schema = batches[0].schema();
-    for (i, expected_name) in expected_field_names.iter().enumerate() {
-        assert_eq!(result_schema.field(i).name(), *expected_name);
-    }
+    assert_eq!(result_schema.field(0).name(), "ID");
+    assert_eq!(result_schema.field(1).name(), "NAME");
+
     Ok(())
 }
 
-/// Sets up a complete parquet test with schema adapter
 #[cfg(feature = "parquet")]
-async fn setup_parquet_test_with_schema_adapter(
-    id_data: Vec<i32>,
-    name_data: Vec<&str>,
-    adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    table_schema: SchemaRef,
-) -> Result<(Arc<dyn ExecutionPlan>, SessionContext)> {
-    let batch = create_test_batch(id_data, name_data)?;
-    let (store, store_url) = setup_test_store_with_parquet(batch, "test.parquet").await?;
-    let file_size = get_file_size(store.clone(), "test.parquet").await?;
-    let ctx = setup_test_context(store, &store_url).await?;
+#[tokio::test]
+async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
+) -> Result<()> {
+    // Create test data
+    let batch = RecordBatch::try_new(
+        Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+        ])),
+        vec![
+            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
+            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
+        ],
+    )?;
+
+    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
+    let path = "test.parquet";
+    write_parquet(batch.clone(), store.clone(), path).await;
+
+    // Get the actual file size from the object store
+    let object_meta = store.head(&Path::from(path)).await?;
+    let file_size = object_meta.size;
+
+    // Create a session context and register the object store
+    let ctx = SessionContext::new();
+    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
 
+    // Create a ParquetSource with the adapter factory
     let file_source = ParquetSource::default()
-        .with_schema_adapter_factory(adapter_factory)?;
+        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
 
-    let config = FileScanConfigBuilder::new(store_url, table_schema, file_source)
-        .with_file(PartitionedFile::new("test.parquet", file_size))
+    let config = FileScanConfigBuilder::new(store_url, batch.schema(), file_source)
+        .with_file(PartitionedFile::new(path, file_size))
         .build();
 
+    // Create a data source executor
     let exec = DataSourceExec::from_data_source(config);
-    Ok((exec, ctx))
-}
 
-/// Executes a data source and returns the resulting batches
-async fn execute_data_source(
-    exec: Arc<dyn ExecutionPlan>,
-    ctx: SessionContext,
-) -> Result<Vec<RecordBatch>> {
+    // Collect results
     let task_ctx = ctx.task_ctx();
     let stream = exec.execute(0, task_ctx)?;
-    datafusion::physical_plan::common::collect(stream).await
-}
+    let batches = datafusion::physical_plan::common::collect(stream).await?;
 
-// Common test helper functions
+    // There should be one batch
+    assert_eq!(batches.len(), 1);
 
-/// Creates a test RecordBatch with the provided schema and data
-fn create_test_batch(
-    id_data: Vec<i32>,
-    name_data: Vec<&str>,
-) -> Result<RecordBatch> {
-    Ok(RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, true),
-        ])),
-        vec![
-            Arc::new(arrow::array::Int32Array::from(id_data)),
-            Arc::new(arrow::array::StringArray::from(name_data)),
-        ],
-    )?)
-}
+    // Verify the schema has the original column names (schema adapter not applied in DataSourceExec)
+    let result_schema = batches[0].schema();
+    assert_eq!(result_schema.field(0).name(), "id");
+    assert_eq!(result_schema.field(1).name(), "name");
 
-/// Sets up an in-memory object store and writes test data to parquet
-async fn setup_test_store_with_parquet(
-    batch: RecordBatch,
-    path: &str,
-) -> Result<(Arc<dyn ObjectStore>, ObjectStoreUrl)> {
-    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
-    let store_url = ObjectStoreUrl::parse("memory://")?;
-    write_parquet(batch, store.clone(), path).await;
-    Ok((store, store_url))
+    Ok(())
 }
 
-/// Gets file size from object store
-async fn get_file_size(store: Arc<dyn ObjectStore>, path: &str) -> Result<u64> {
-    let object_meta = store.head(&Path::from(path)).await?;
-    Ok(object_meta.size)
-}
+#[tokio::test]
+async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
+    // This test verifies that the same schema adapter factory can be reused
+    // across different file source types. This is important for ensuring that:
+    // 1. The schema adapter factory interface works uniformly across all source types
+    // 2. The factory can be shared and cloned efficiently using Arc
+    // 3. Various data source implementations correctly implement the schema adapter factory pattern
 
-/// Creates a session context with object store registered
-async fn setup_test_context(
-    store: Arc<dyn ObjectStore>,
-    store_url: &ObjectStoreUrl,
-) -> Result<SessionContext> {
-    let ctx = SessionContext::new();
-    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
-    Ok(ctx)
-}
+    // Create a test factory
+    let factory = Arc::new(UppercaseAdapterFactory {});
 
-/// Creates a table schema with uppercase column names for testing schema adapters
-fn create_uppercase_table_schema() -> SchemaRef {
-    Arc::new(Schema::new(vec![
-        Field::new("ID", DataType::Int32, false),
-        Field::new("NAME", DataType::Utf8, true),
-    ]))
+    // Test ArrowSource
+    {
+        let source = ArrowSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            retrieved_factory
+                .as_any()
+                .downcast_ref::<UppercaseAdapterFactory>(),
+            Some(factory.as_ref())
+        );
+    }
+
+    // Test ParquetSource
+    #[cfg(feature = "parquet")]
+    {
+        let source = ParquetSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            retrieved_factory
+                .as_any()
+                .downcast_ref::<UppercaseAdapterFactory>(),
+            Some(factory.as_ref())
+        );
+    }
+
+    // Test CsvSource
+    {
+        let source = CsvSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            retrieved_factory
+                .as_any()
+                .downcast_ref::<UppercaseAdapterFactory>(),
+            Some(factory.as_ref())
+        );
+    }
+
+    // Test JsonSource
+    {
+        let source = JsonSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            retrieved_factory
+                .as_any()
+                .downcast_ref::<UppercaseAdapterFactory>(),
+            Some(factory.as_ref())
+        );
+    }
+
+    Ok(())
 }
 
 // Helper function to test From<T> for Arc<dyn FileSource> implementations

From 06d4ea303c252bf7110574a6de01dd1c7a315e42 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Jul 2025 09:52:33 +0800
Subject: [PATCH 26/41] refactor(schema_adapter): remove outdated comments from
 test file

---
 datafusion/core/tests/parquet/schema_adapter.rs | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 73d56ee2035d..571a4eed8553 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -388,10 +388,6 @@ async fn test_custom_schema_adapter_and_custom_expression_adapter() {
     assert_batches_eq!(expected, &batches);
 }
 
-// ----------------------------------------------------------------------
-// Tests migrated from schema_adaptation/schema_adapter_integration_tests.rs
-// ----------------------------------------------------------------------
-
 /// A schema adapter factory that transforms column names to uppercase
 #[derive(Debug, PartialEq)]
 struct UppercaseAdapterFactory {}

From 440fbd42652423a338f32767287b66b49d751530 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Jul 2025 09:52:53 +0800
Subject: [PATCH 27/41] UNPICK Revert "refactor(schema_adapter): remove
 outdated comments from test file"

This reverts commit 06d4ea303c252bf7110574a6de01dd1c7a315e42.
---
 datafusion/core/tests/parquet/schema_adapter.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 571a4eed8553..73d56ee2035d 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -388,6 +388,10 @@ async fn test_custom_schema_adapter_and_custom_expression_adapter() {
     assert_batches_eq!(expected, &batches);
 }
 
+// ----------------------------------------------------------------------
+// Tests migrated from schema_adaptation/schema_adapter_integration_tests.rs
+// ----------------------------------------------------------------------
+
 /// A schema adapter factory that transforms column names to uppercase
 #[derive(Debug, PartialEq)]
 struct UppercaseAdapterFactory {}

From 18e065730d0fbdf8b4445a78a1501fd6ff3d8862 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Jul 2025 09:56:50 +0800
Subject: [PATCH 28/41] fix fmt errors

---
 datafusion/core/src/datasource/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index c76fc74f4fd0..5cd034c43a0d 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -59,7 +59,6 @@ mod tests {
         record_batch::RecordBatch,
     };
     use datafusion_common::{record_batch, test_util::batches_to_sort_string};
-    use std::any::Any;
     use datafusion_datasource::{
         file::FileSource,
         file_scan_config::FileScanConfigBuilder,
@@ -72,6 +71,7 @@ mod tests {
     };
     use datafusion_datasource_parquet::source::ParquetSource;
     use datafusion_physical_plan::collect;
+    use std::any::Any;
     use std::{fs, sync::Arc};
     use tempfile::TempDir;
 

From a3794e6ea408a03d1e7c904da3ede7decb2f19fa Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Jul 2025 09:57:45 +0800
Subject: [PATCH 29/41] refactor(tests): consolidate std imports for clarity

---
 datafusion/core/src/datasource/mod.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 5cd034c43a0d..1a13d9c39958 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -71,8 +71,7 @@ mod tests {
     };
     use datafusion_datasource_parquet::source::ParquetSource;
     use datafusion_physical_plan::collect;
-    use std::any::Any;
-    use std::{fs, sync::Arc};
+    use std::{any::Any, fs, sync::Arc};
     use tempfile::TempDir;
 
     #[tokio::test]

From a2f2fc06d12b39a9cb050760fab998e2a4a16597 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Jul 2025 10:18:48 +0800
Subject: [PATCH 30/41] Remove the duplicated tests and related helper code
 from the schema adapter test module, keeping only the unique tests

---
 .../core/tests/parquet/schema_adapter.rs      | 271 +-----------------
 1 file changed, 1 insertion(+), 270 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 73d56ee2035d..a48445cb40fc 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -46,8 +46,7 @@ use parquet::arrow::ArrowWriter;
 #[cfg(feature = "parquet")]
 use datafusion::datasource::physical_plan::ParquetSource;
 use datafusion::datasource::physical_plan::{
-    ArrowSource, CsvSource, FileOpener, FileScanConfig, FileScanConfigBuilder,
-    FileSource, JsonSource,
+    ArrowSource, CsvSource, FileScanConfigBuilder, FileSource, JsonSource,
 };
 use datafusion::datasource::source::DataSourceExec;
 use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
@@ -455,34 +454,6 @@ impl SchemaAdapter for UppercaseAdapter {
     }
 }
 
-#[derive(Debug)]
-struct TestSchemaMapping {
-    output_schema: SchemaRef,
-    projection: Vec<usize>,
-}
-
-impl SchemaMapper for TestSchemaMapping {
-    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
-        let columns = self
-            .projection
-            .iter()
-            .map(|&i| batch.column(i).clone())
-            .collect::<Vec<_>>();
-        Ok(RecordBatch::try_new(self.output_schema.clone(), columns)?)
-    }
-
-    fn map_column_statistics(
-        &self,
-        stats: &[ColumnStatistics],
-    ) -> Result<Vec<ColumnStatistics>> {
-        Ok(self
-            .projection
-            .iter()
-            .map(|&i| stats.get(i).cloned().unwrap_or_default())
-            .collect())
-    }
-}
-
 impl UppercaseAdapter {
     fn output_schema(&self) -> SchemaRef {
         let fields: Vec<Field> = self
@@ -745,243 +716,3 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
 
     Ok(())
 }
-
-// Helper function to test From<T> for Arc<dyn FileSource> implementations
-fn test_from_impl<T: Into<Arc<dyn FileSource>> + Default>(expected_file_type: &str) {
-    let source = T::default();
-    let file_source: Arc<dyn FileSource> = source.into();
-    assert_eq!(file_source.file_type(), expected_file_type);
-}
-
-#[test]
-fn test_from_implementations() {
-    // Test From implementation for various sources
-    test_from_impl::<ArrowSource>("arrow");
-
-    #[cfg(feature = "parquet")]
-    test_from_impl::<ParquetSource>("parquet");
-
-    test_from_impl::<CsvSource>("csv");
-
-    test_from_impl::<JsonSource>("json");
-}
-
-/// A simple test schema adapter factory that doesn't modify the schema
-#[derive(Debug, PartialEq)]
-struct TestSchemaAdapterFactory {}
-
-impl SchemaAdapterFactory for TestSchemaAdapterFactory {
-    fn create(
-        &self,
-        projected_table_schema: SchemaRef,
-        _table_schema: SchemaRef,
-    ) -> Box<dyn SchemaAdapter> {
-        Box::new(TestSchemaAdapter {
-            input_schema: projected_table_schema,
-        })
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-}
-
-/// A test schema adapter that passes through data unmodified
-#[derive(Debug)]
-struct TestSchemaAdapter {
-    input_schema: SchemaRef,
-}
-
-impl SchemaAdapter for TestSchemaAdapter {
-    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-        let field = self.input_schema.field(index);
-        file_schema
-            .fields()
-            .iter()
-            .position(|f| f.name() == field.name())
-    }
-
-    fn map_schema(
-        &self,
-        file_schema: &Schema,
-    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let mut projection = Vec::with_capacity(file_schema.fields().len());
-        for (idx, file_field) in file_schema.fields().iter().enumerate() {
-            if self
-                .input_schema
-                .fields()
-                .iter()
-                .any(|f| f.name() == file_field.name())
-            {
-                projection.push(idx);
-            }
-        }
-
-        let mapper = TestSchemaMapping {
-            output_schema: Arc::clone(&self.input_schema),
-            projection: projection.clone(),
-        };
-
-        Ok((Arc::new(mapper), projection))
-    }
-}
-
-#[cfg(feature = "parquet")]
-#[test]
-fn test_schema_adapter_preservation() {
-    // Create a test schema
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-    ]));
-
-    // Create source with schema adapter factory
-    let source = ParquetSource::default();
-    let factory = Arc::new(TestSchemaAdapterFactory {});
-    let file_source = source.with_schema_adapter_factory(factory).unwrap();
-
-    // Create a FileScanConfig with the source
-    let config_builder = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        schema.clone(),
-        file_source.clone(),
-    )
-    .with_file(PartitionedFile::new("test.parquet", 100));
-
-    let config = config_builder.build();
-
-    // Verify the schema adapter factory is present in the file source
-    let test_factory = TestSchemaAdapterFactory {};
-    assert!(config.file_source().schema_adapter_factory().is_some());
-    let _adapter_factory = config.file_source().schema_adapter_factory().unwrap();
-    assert_eq!(
-        _adapter_factory
-            .as_any()
-            .downcast_ref::<TestSchemaAdapterFactory>(),
-        Some(&test_factory)
-    );
-}
-
-/// A test source for testing schema adapters
-#[derive(Debug, Clone)]
-struct TestSource {
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
-    metrics: ExecutionPlanMetricsSet,
-}
-
-impl TestSource {
-    fn new() -> Self {
-        Self {
-            schema_adapter_factory: None,
-            metrics: ExecutionPlanMetricsSet::new(),
-        }
-    }
-}
-
-impl FileSource for TestSource {
-    fn file_type(&self) -> &str {
-        "test"
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn create_file_opener(
-        &self,
-        _store: Arc<dyn ObjectStore>,
-        _conf: &FileScanConfig,
-        _index: usize,
-    ) -> Arc<dyn FileOpener> {
-        unimplemented!("Not needed for this test")
-    }
-
-    fn with_batch_size(&self, _batch_size: usize) -> Arc<dyn FileSource> {
-        Arc::new(self.clone())
-    }
-
-    fn with_schema(&self, _schema: SchemaRef) -> Arc<dyn FileSource> {
-        Arc::new(self.clone())
-    }
-
-    fn with_projection(&self, _projection: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(self.clone())
-    }
-
-    fn with_statistics(&self, _statistics: Statistics) -> Arc<dyn FileSource> {
-        Arc::new(self.clone())
-    }
-
-    fn metrics(&self) -> &ExecutionPlanMetricsSet {
-        &self.metrics
-    }
-
-    fn statistics(&self) -> Result<Statistics, DataFusionError> {
-        Ok(Statistics::default())
-    }
-
-    fn with_schema_adapter_factory(
-        &self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Result<Arc<dyn FileSource>> {
-        Ok(Arc::new(Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            metrics: ExecutionPlanMetricsSet::new(),
-        }))
-    }
-
-    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.clone()
-    }
-}
-
-#[test]
-fn test_schema_adapter() {
-    // This test verifies the functionality of the SchemaAdapter and SchemaAdapterFactory
-    // components used in DataFusion's file sources.
-    //
-    // The test specifically checks:
-    // 1. Creating and attaching a schema adapter factory to a file source
-    // 2. Creating a schema adapter using the factory
-    // 3. The schema adapter's ability to map column indices between a table schema and a file schema
-    // 4. The schema adapter's ability to create a projection that selects only the columns
-    //    from the file schema that are present in the table schema
-    //
-    // Schema adapters are used when the schema of data in files doesn't exactly match
-    // the schema expected by the query engine, allowing for field mapping and data transformation.
-
-    // Create a test schema
-    let table_schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-    ]));
-
-    // Create a file schema
-    let file_schema = Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-        Field::new("extra", DataType::Int64, true),
-    ]);
-
-    // Create a TestSource
-    let source = TestSource::new();
-    assert!(source.schema_adapter_factory().is_none());
-
-    // Add a schema adapter factory
-    let factory = Arc::new(TestSchemaAdapterFactory {});
-    let source_with_adapter = source.with_schema_adapter_factory(factory).unwrap();
-    assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-    // Create a schema adapter
-    let adapter_factory = source_with_adapter.schema_adapter_factory().unwrap();
-    let adapter =
-        adapter_factory.create(Arc::clone(&table_schema), Arc::clone(&table_schema));
-
-    // Test mapping column index
-    assert_eq!(adapter.map_column_index(0, &file_schema), Some(0));
-    assert_eq!(adapter.map_column_index(1, &file_schema), Some(1));
-
-    // Test creating schema mapper
-    let (_mapper, projection) = adapter.map_schema(&file_schema).unwrap();
-    assert_eq!(projection, vec![0, 1]);
-}

From d116e0203e21eef20085c3a440612dcb9560c555 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Wed, 23 Jul 2025 08:50:14 +0800
Subject: [PATCH 31/41] refactor(schema_adapter): remove unused import for
 ExecutionPlanMetricsSet

---
 datafusion/core/tests/parquet/schema_adapter.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index a48445cb40fc..4e0e19810103 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -49,8 +49,7 @@ use datafusion::datasource::physical_plan::{
     ArrowSource, CsvSource, FileScanConfigBuilder, FileSource, JsonSource,
 };
 use datafusion::datasource::source::DataSourceExec;
-use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
-use datafusion::physical_plan::{ExecutionPlan, Statistics};
+use datafusion::physical_plan::ExecutionPlan;
 use datafusion_datasource::PartitionedFile;
 
 async fn write_parquet(batch: RecordBatch, store: Arc<dyn ObjectStore>, path: &str) {

From 494de16070420f76cef2327559a9f2329d84df53 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Fri, 25 Jul 2025 08:59:56 +0800
Subject: [PATCH 32/41] Remove unused `as_any` method from schema adapter
 implementations

---
 datafusion/core/src/datasource/listing/table.rs    |  8 --------
 datafusion/core/src/datasource/mod.rs              |  6 +-----
 datafusion/core/tests/parquet/schema_adapter.rs    | 14 --------------
 datafusion/datasource-parquet/src/opener.rs        |  4 ----
 .../tests/apply_schema_adapter_tests.rs            |  4 ----
 datafusion/datasource/src/schema_adapter.rs        |  8 +-------
 6 files changed, 2 insertions(+), 42 deletions(-)

diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 4a7f6bc7ab12..121ab46730b5 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -2912,10 +2912,6 @@ mod tests {
                 error_type: self.error_type,
             })
         }
-
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
     }
 
     #[derive(Debug)]
@@ -2964,10 +2960,6 @@ mod tests {
                 schema: projected_table_schema,
             })
         }
-
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
     }
 
     #[derive(Debug)]
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 1a13d9c39958..94d651ddadd5 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -71,7 +71,7 @@ mod tests {
     };
     use datafusion_datasource_parquet::source::ParquetSource;
     use datafusion_physical_plan::collect;
-    use std::{any::Any, fs, sync::Arc};
+    use std::{fs, sync::Arc};
     use tempfile::TempDir;
 
     #[tokio::test]
@@ -214,10 +214,6 @@ mod tests {
                 table_schema: projected_table_schema,
             })
         }
-
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
     }
 
     struct TestSchemaAdapter {
diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 4e0e19810103..d5a7fc505e94 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{record_batch, RecordBatch, RecordBatchOptions};
@@ -43,15 +42,6 @@ use itertools::Itertools;
 use object_store::{memory::InMemory, path::Path, ObjectStore};
 use parquet::arrow::ArrowWriter;
 
-#[cfg(feature = "parquet")]
-use datafusion::datasource::physical_plan::ParquetSource;
-use datafusion::datasource::physical_plan::{
-    ArrowSource, CsvSource, FileScanConfigBuilder, FileSource, JsonSource,
-};
-use datafusion::datasource::source::DataSourceExec;
-use datafusion::physical_plan::ExecutionPlan;
-use datafusion_datasource::PartitionedFile;
-
 async fn write_parquet(batch: RecordBatch, store: Arc<dyn ObjectStore>, path: &str) {
     let mut out = BytesMut::new().writer();
     {
@@ -76,10 +66,6 @@ impl SchemaAdapterFactory for CustomSchemaAdapterFactory {
             logical_file_schema: projected_table_schema,
         })
     }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
 }
 
 #[derive(Debug)]
diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs
index a809e46fe979..7c208d1426ac 100644
--- a/datafusion/datasource-parquet/src/opener.rs
+++ b/datafusion/datasource-parquet/src/opener.rs
@@ -1213,10 +1213,6 @@ mod test {
             ) -> Box<dyn SchemaAdapter> {
                 Box::new(CustomSchemaAdapter)
             }
-
-            fn as_any(&self) -> &dyn std::any::Any {
-                self
-            }
         }
 
         // Test that if no expression rewriter is provided we use a schemaadapter to adapt the data to the expresssion
diff --git a/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs b/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs
index e15393e1fb3a..e9288a5f80f6 100644
--- a/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs
+++ b/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs
@@ -47,10 +47,6 @@ mod parquet_adapter_tests {
                 prefix: self.prefix.clone(),
             })
         }
-
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
     }
 
     /// A test schema adapter that adds prefix to column names
diff --git a/datafusion/datasource/src/schema_adapter.rs b/datafusion/datasource/src/schema_adapter.rs
index 6e959878928b..5e743a3f0c23 100644
--- a/datafusion/datasource/src/schema_adapter.rs
+++ b/datafusion/datasource/src/schema_adapter.rs
@@ -29,7 +29,7 @@ use datafusion_common::{
     nested_struct::{cast_column, validate_struct_compatibility},
     plan_err, ColumnStatistics,
 };
-use std::{any::Any, fmt::Debug, sync::Arc};
+use std::{fmt::Debug, sync::Arc};
 /// Function used by [`SchemaMapping`] to adapt a column from the file schema to
 /// the table schema.
 pub type CastColumnFn =
@@ -68,8 +68,6 @@ pub trait SchemaAdapterFactory: Debug + Send + Sync + 'static {
     ) -> Box<dyn SchemaAdapter> {
         self.create(Arc::clone(&projected_table_schema), projected_table_schema)
     }
-    /// Give us access to Any so callers can downcast.
-    fn as_any(&self) -> &dyn Any;
 }
 
 /// Creates [`SchemaMapper`]s to map file-level [`RecordBatch`]es to a table
@@ -234,10 +232,6 @@ impl SchemaAdapterFactory for DefaultSchemaAdapterFactory {
             projected_table_schema,
         })
     }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
 }
 
 /// This SchemaAdapter requires both the table schema and the projected table

From a362bcf848f1c24506a82326a7d5669311a42730 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Fri, 25 Jul 2025 09:42:23 +0800
Subject: [PATCH 33/41] Refactor schema adapter tests to remove unused `as_any`
 method and improve type checking

---
 .../core/tests/parquet/schema_adapter.rs      | 39 ++++++-------------
 1 file changed, 11 insertions(+), 28 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index d5a7fc505e94..21f7b7bc0996 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -23,14 +23,20 @@ use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef};
 use bytes::{BufMut, BytesMut};
 use datafusion::assert_batches_eq;
 use datafusion::common::Result;
+use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::listing::{ListingTable, ListingTableConfig};
+use datafusion::datasource::physical_plan::{
+    ArrowSource, CsvSource, FileSource, JsonSource, ParquetSource,
+};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::DataFusionError;
 use datafusion_common::{ColumnStatistics, ScalarValue};
+use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::schema_adapter::{
     DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
 };
+use datafusion_datasource::source::DataSourceExec;
 use datafusion_datasource::ListingTableUrl;
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_physical_expr::expressions::{self, Column};
@@ -38,6 +44,7 @@ use datafusion_physical_expr::schema_rewriter::{
     DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter, PhysicalExprAdapterFactory,
 };
 use datafusion_physical_expr::{DefaultPhysicalExprAdapter, PhysicalExpr};
+use datafusion_physical_plan::ExecutionPlan;
 use itertools::Itertools;
 use object_store::{memory::InMemory, path::Path, ObjectStore};
 use parquet::arrow::ArrowWriter;
@@ -390,10 +397,6 @@ impl SchemaAdapterFactory for UppercaseAdapterFactory {
             table_schema: projected_table_schema,
         })
     }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
 }
 
 /// Schema adapter that transforms column names to uppercase
@@ -627,12 +630,7 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
         assert!(source_with_adapter.schema_adapter_factory().is_some());
 
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            retrieved_factory
-                .as_any()
-                .downcast_ref::<UppercaseAdapterFactory>(),
-            Some(factory.as_ref())
-        );
+        assert_eq!(format!("{:?}", retrieved_factory.as_ref()), format!("{:?}", factory.as_ref()));
     }
 
     // Test ParquetSource
@@ -649,12 +647,7 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
         assert!(source_with_adapter.schema_adapter_factory().is_some());
 
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            retrieved_factory
-                .as_any()
-                .downcast_ref::<UppercaseAdapterFactory>(),
-            Some(factory.as_ref())
-        );
+        assert_eq!(format!("{:?}", retrieved_factory.as_ref()), format!("{:?}", factory.as_ref()));
     }
 
     // Test CsvSource
@@ -670,12 +663,7 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
         assert!(source_with_adapter.schema_adapter_factory().is_some());
 
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            retrieved_factory
-                .as_any()
-                .downcast_ref::<UppercaseAdapterFactory>(),
-            Some(factory.as_ref())
-        );
+        assert_eq!(format!("{:?}", retrieved_factory.as_ref()), format!("{:?}", factory.as_ref()));
     }
 
     // Test JsonSource
@@ -691,12 +679,7 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
         assert!(source_with_adapter.schema_adapter_factory().is_some());
 
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            retrieved_factory
-                .as_any()
-                .downcast_ref::<UppercaseAdapterFactory>(),
-            Some(factory.as_ref())
-        );
+        assert_eq!(format!("{:?}", retrieved_factory.as_ref()), format!("{:?}", factory.as_ref()));
     }
 
     Ok(())

From 0e8f15fa2b4be833b9078dd40ee191c9e1e0f332 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Fri, 25 Jul 2025 10:49:52 +0800
Subject: [PATCH 34/41] Fix fmt errors

---
 .../core/tests/parquet/schema_adapter.rs      | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 21f7b7bc0996..9dbd509cab4b 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -630,7 +630,10 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
         assert!(source_with_adapter.schema_adapter_factory().is_some());
 
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(format!("{:?}", retrieved_factory.as_ref()), format!("{:?}", factory.as_ref()));
+        assert_eq!(
+            format!("{:?}", retrieved_factory.as_ref()),
+            format!("{:?}", factory.as_ref())
+        );
     }
 
     // Test ParquetSource
@@ -647,7 +650,10 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
         assert!(source_with_adapter.schema_adapter_factory().is_some());
 
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(format!("{:?}", retrieved_factory.as_ref()), format!("{:?}", factory.as_ref()));
+        assert_eq!(
+            format!("{:?}", retrieved_factory.as_ref()),
+            format!("{:?}", factory.as_ref())
+        );
     }
 
     // Test CsvSource
@@ -663,7 +669,10 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
         assert!(source_with_adapter.schema_adapter_factory().is_some());
 
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(format!("{:?}", retrieved_factory.as_ref()), format!("{:?}", factory.as_ref()));
+        assert_eq!(
+            format!("{:?}", retrieved_factory.as_ref()),
+            format!("{:?}", factory.as_ref())
+        );
     }
 
     // Test JsonSource
@@ -679,7 +688,10 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
         assert!(source_with_adapter.schema_adapter_factory().is_some());
 
         let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(format!("{:?}", retrieved_factory.as_ref()), format!("{:?}", factory.as_ref()));
+        assert_eq!(
+            format!("{:?}", retrieved_factory.as_ref()),
+            format!("{:?}", factory.as_ref())
+        );
     }
 
     Ok(())

From e34b2bc86b03910f845fb60092db5ae7874862ba Mon Sep 17 00:00:00 2001
From: kosiew <kosiew@gmail.com>
Date: Sun, 27 Jul 2025 15:49:18 +0800
Subject: [PATCH 35/41] refactor: move schema adapter integration tests

move integration tests from parquet/schema_adapter.rs
add new integration_tests/schema_adapter module
add root driver schema_adapter_integration.rs
---
 .../integration_tests/schema_adapter/mod.rs   |   1 +
 .../schema_adapter_integration_tests.rs       | 317 +++++++++++++++++
 .../core/tests/parquet/schema_adapter.rs      | 318 ------------------
 .../core/tests/schema_adapter_integration.rs  |  21 ++
 4 files changed, 339 insertions(+), 318 deletions(-)
 create mode 100644 datafusion/core/tests/integration_tests/schema_adapter/mod.rs
 create mode 100644 datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
 create mode 100644 datafusion/core/tests/schema_adapter_integration.rs

diff --git a/datafusion/core/tests/integration_tests/schema_adapter/mod.rs b/datafusion/core/tests/integration_tests/schema_adapter/mod.rs
new file mode 100644
index 000000000000..68ea355b5d9d
--- /dev/null
+++ b/datafusion/core/tests/integration_tests/schema_adapter/mod.rs
@@ -0,0 +1 @@
+mod schema_adapter_integration_tests;
diff --git a/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs b/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
new file mode 100644
index 000000000000..4904e30aceb1
--- /dev/null
+++ b/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
@@ -0,0 +1,317 @@
+// ----------------------------------------------------------------------
+// Tests migrated from schema_adaptation/schema_adapter_integration_tests.rs
+// ----------------------------------------------------------------------
+
+/// A schema adapter factory that transforms column names to uppercase
+#[derive(Debug, PartialEq)]
+struct UppercaseAdapterFactory {}
+
+impl SchemaAdapterFactory for UppercaseAdapterFactory {
+    fn create(
+        &self,
+        projected_table_schema: SchemaRef,
+        _table_schema: SchemaRef,
+    ) -> Box<dyn SchemaAdapter> {
+        Box::new(UppercaseAdapter {
+            table_schema: projected_table_schema,
+        })
+    }
+}
+
+/// Schema adapter that transforms column names to uppercase
+#[derive(Debug)]
+struct UppercaseAdapter {
+    table_schema: SchemaRef,
+}
+
+impl SchemaAdapter for UppercaseAdapter {
+    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
+        let field = self.table_schema.field(index);
+        let uppercase_name = field.name().to_uppercase();
+        file_schema
+            .fields()
+            .iter()
+            .position(|f| f.name().to_uppercase() == uppercase_name)
+    }
+
+    fn map_schema(
+        &self,
+        file_schema: &Schema,
+    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
+        let mut projection = Vec::new();
+
+        // Map each field in the table schema to the corresponding field in the file schema
+        for table_field in self.table_schema.fields() {
+            let uppercase_name = table_field.name().to_uppercase();
+            if let Some(pos) = file_schema
+                .fields()
+                .iter()
+                .position(|f| f.name().to_uppercase() == uppercase_name)
+            {
+                projection.push(pos);
+            }
+        }
+
+        let mapper = UppercaseSchemaMapper {
+            output_schema: self.output_schema(),
+            projection: projection.clone(),
+        };
+
+        Ok((Arc::new(mapper), projection))
+    }
+}
+
+impl UppercaseAdapter {
+    fn output_schema(&self) -> SchemaRef {
+        let fields: Vec<Field> = self
+            .table_schema
+            .fields()
+            .iter()
+            .map(|f| {
+                Field::new(
+                    f.name().to_uppercase().as_str(),
+                    f.data_type().clone(),
+                    f.is_nullable(),
+                )
+            })
+            .collect();
+
+        Arc::new(Schema::new(fields))
+    }
+}
+
+#[derive(Debug)]
+struct UppercaseSchemaMapper {
+    output_schema: SchemaRef,
+    projection: Vec<usize>,
+}
+
+impl SchemaMapper for UppercaseSchemaMapper {
+    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
+        let columns = self
+            .projection
+            .iter()
+            .map(|&i| batch.column(i).clone())
+            .collect::<Vec<_>>();
+        Ok(RecordBatch::try_new(self.output_schema.clone(), columns)?)
+    }
+
+    fn map_column_statistics(
+        &self,
+        stats: &[ColumnStatistics],
+    ) -> Result<Vec<ColumnStatistics>> {
+        Ok(self
+            .projection
+            .iter()
+            .map(|&i| stats.get(i).cloned().unwrap_or_default())
+            .collect())
+    }
+}
+
+#[cfg(feature = "parquet")]
+#[tokio::test]
+async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
+    // Create test data
+    let batch = RecordBatch::try_new(
+        Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+        ])),
+        vec![
+            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
+            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
+        ],
+    )?;
+
+    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
+    let path = "test.parquet";
+    write_parquet(batch.clone(), store.clone(), path).await;
+
+    // Get the actual file size from the object store
+    let object_meta = store.head(&Path::from(path)).await?;
+    let file_size = object_meta.size;
+
+    // Create a session context and register the object store
+    let ctx = SessionContext::new();
+    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
+
+    // Create a ParquetSource with the adapter factory
+    let file_source = ParquetSource::default()
+        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
+
+    // Create a table schema with uppercase column names
+    let table_schema = Arc::new(Schema::new(vec![
+        Field::new("ID", DataType::Int32, false),
+        Field::new("NAME", DataType::Utf8, true),
+    ]));
+
+    let config = FileScanConfigBuilder::new(store_url, table_schema.clone(), file_source)
+        .with_file(PartitionedFile::new(path, file_size))
+        .build();
+
+    // Create a data source executor
+    let exec = DataSourceExec::from_data_source(config);
+
+    // Collect results
+    let task_ctx = ctx.task_ctx();
+    let stream = exec.execute(0, task_ctx)?;
+    let batches = datafusion::physical_plan::common::collect(stream).await?;
+
+    // There should be one batch
+    assert_eq!(batches.len(), 1);
+
+    // Verify the schema has the uppercase column names
+    let result_schema = batches[0].schema();
+    assert_eq!(result_schema.field(0).name(), "ID");
+    assert_eq!(result_schema.field(1).name(), "NAME");
+
+    Ok(())
+}
+
+#[cfg(feature = "parquet")]
+#[tokio::test]
+async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
+) -> Result<()> {
+    // Create test data
+    let batch = RecordBatch::try_new(
+        Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+        ])),
+        vec![
+            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
+            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
+        ],
+    )?;
+
+    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
+    let path = "test.parquet";
+    write_parquet(batch.clone(), store.clone(), path).await;
+
+    // Get the actual file size from the object store
+    let object_meta = store.head(&Path::from(path)).await?;
+    let file_size = object_meta.size;
+
+    // Create a session context and register the object store
+    let ctx = SessionContext::new();
+    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
+
+    // Create a ParquetSource with the adapter factory
+    let file_source = ParquetSource::default()
+        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
+
+    let config = FileScanConfigBuilder::new(store_url, batch.schema(), file_source)
+        .with_file(PartitionedFile::new(path, file_size))
+        .build();
+
+    // Create a data source executor
+    let exec = DataSourceExec::from_data_source(config);
+
+    // Collect results
+    let task_ctx = ctx.task_ctx();
+    let stream = exec.execute(0, task_ctx)?;
+    let batches = datafusion::physical_plan::common::collect(stream).await?;
+
+    // There should be one batch
+    assert_eq!(batches.len(), 1);
+
+    // Verify the schema has the original column names (schema adapter not applied in DataSourceExec)
+    let result_schema = batches[0].schema();
+    assert_eq!(result_schema.field(0).name(), "id");
+    assert_eq!(result_schema.field(1).name(), "name");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
+    // This test verifies that the same schema adapter factory can be reused
+    // across different file source types. This is important for ensuring that:
+    // 1. The schema adapter factory interface works uniformly across all source types
+    // 2. The factory can be shared and cloned efficiently using Arc
+    // 3. Various data source implementations correctly implement the schema adapter factory pattern
+
+    // Create a test factory
+    let factory = Arc::new(UppercaseAdapterFactory {});
+
+    // Test ArrowSource
+    {
+        let source = ArrowSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            format!("{:?}", retrieved_factory.as_ref()),
+            format!("{:?}", factory.as_ref())
+        );
+    }
+
+    // Test ParquetSource
+    #[cfg(feature = "parquet")]
+    {
+        let source = ParquetSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            format!("{:?}", retrieved_factory.as_ref()),
+            format!("{:?}", factory.as_ref())
+        );
+    }
+
+    // Test CsvSource
+    {
+        let source = CsvSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            format!("{:?}", retrieved_factory.as_ref()),
+            format!("{:?}", factory.as_ref())
+        );
+    }
+
+    // Test JsonSource
+    {
+        let source = JsonSource::default();
+        let source_with_adapter = source
+            .clone()
+            .with_schema_adapter_factory(factory.clone())
+            .unwrap();
+
+        let base_source: Arc<dyn FileSource> = source.into();
+        assert!(base_source.schema_adapter_factory().is_none());
+        assert!(source_with_adapter.schema_adapter_factory().is_some());
+
+        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
+        assert_eq!(
+            format!("{:?}", retrieved_factory.as_ref()),
+            format!("{:?}", factory.as_ref())
+        );
+    }
+
+    Ok(())
+}
diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 9dbd509cab4b..4a30e1b812ce 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -378,321 +378,3 @@ async fn test_custom_schema_adapter_and_custom_expression_adapter() {
     ];
     assert_batches_eq!(expected, &batches);
 }
-
-// ----------------------------------------------------------------------
-// Tests migrated from schema_adaptation/schema_adapter_integration_tests.rs
-// ----------------------------------------------------------------------
-
-/// A schema adapter factory that transforms column names to uppercase
-#[derive(Debug, PartialEq)]
-struct UppercaseAdapterFactory {}
-
-impl SchemaAdapterFactory for UppercaseAdapterFactory {
-    fn create(
-        &self,
-        projected_table_schema: SchemaRef,
-        _table_schema: SchemaRef,
-    ) -> Box<dyn SchemaAdapter> {
-        Box::new(UppercaseAdapter {
-            table_schema: projected_table_schema,
-        })
-    }
-}
-
-/// Schema adapter that transforms column names to uppercase
-#[derive(Debug)]
-struct UppercaseAdapter {
-    table_schema: SchemaRef,
-}
-
-impl SchemaAdapter for UppercaseAdapter {
-    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-        let field = self.table_schema.field(index);
-        let uppercase_name = field.name().to_uppercase();
-        file_schema
-            .fields()
-            .iter()
-            .position(|f| f.name().to_uppercase() == uppercase_name)
-    }
-
-    fn map_schema(
-        &self,
-        file_schema: &Schema,
-    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let mut projection = Vec::new();
-
-        // Map each field in the table schema to the corresponding field in the file schema
-        for table_field in self.table_schema.fields() {
-            let uppercase_name = table_field.name().to_uppercase();
-            if let Some(pos) = file_schema
-                .fields()
-                .iter()
-                .position(|f| f.name().to_uppercase() == uppercase_name)
-            {
-                projection.push(pos);
-            }
-        }
-
-        let mapper = UppercaseSchemaMapper {
-            output_schema: self.output_schema(),
-            projection: projection.clone(),
-        };
-
-        Ok((Arc::new(mapper), projection))
-    }
-}
-
-impl UppercaseAdapter {
-    fn output_schema(&self) -> SchemaRef {
-        let fields: Vec<Field> = self
-            .table_schema
-            .fields()
-            .iter()
-            .map(|f| {
-                Field::new(
-                    f.name().to_uppercase().as_str(),
-                    f.data_type().clone(),
-                    f.is_nullable(),
-                )
-            })
-            .collect();
-
-        Arc::new(Schema::new(fields))
-    }
-}
-
-#[derive(Debug)]
-struct UppercaseSchemaMapper {
-    output_schema: SchemaRef,
-    projection: Vec<usize>,
-}
-
-impl SchemaMapper for UppercaseSchemaMapper {
-    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
-        let columns = self
-            .projection
-            .iter()
-            .map(|&i| batch.column(i).clone())
-            .collect::<Vec<_>>();
-        Ok(RecordBatch::try_new(self.output_schema.clone(), columns)?)
-    }
-
-    fn map_column_statistics(
-        &self,
-        stats: &[ColumnStatistics],
-    ) -> Result<Vec<ColumnStatistics>> {
-        Ok(self
-            .projection
-            .iter()
-            .map(|&i| stats.get(i).cloned().unwrap_or_default())
-            .collect())
-    }
-}
-
-#[cfg(feature = "parquet")]
-#[tokio::test]
-async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
-    // Create test data
-    let batch = RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, true),
-        ])),
-        vec![
-            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
-            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
-        ],
-    )?;
-
-    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
-    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
-    let path = "test.parquet";
-    write_parquet(batch.clone(), store.clone(), path).await;
-
-    // Get the actual file size from the object store
-    let object_meta = store.head(&Path::from(path)).await?;
-    let file_size = object_meta.size;
-
-    // Create a session context and register the object store
-    let ctx = SessionContext::new();
-    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
-
-    // Create a ParquetSource with the adapter factory
-    let file_source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
-
-    // Create a table schema with uppercase column names
-    let table_schema = Arc::new(Schema::new(vec![
-        Field::new("ID", DataType::Int32, false),
-        Field::new("NAME", DataType::Utf8, true),
-    ]));
-
-    let config = FileScanConfigBuilder::new(store_url, table_schema.clone(), file_source)
-        .with_file(PartitionedFile::new(path, file_size))
-        .build();
-
-    // Create a data source executor
-    let exec = DataSourceExec::from_data_source(config);
-
-    // Collect results
-    let task_ctx = ctx.task_ctx();
-    let stream = exec.execute(0, task_ctx)?;
-    let batches = datafusion::physical_plan::common::collect(stream).await?;
-
-    // There should be one batch
-    assert_eq!(batches.len(), 1);
-
-    // Verify the schema has the uppercase column names
-    let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "ID");
-    assert_eq!(result_schema.field(1).name(), "NAME");
-
-    Ok(())
-}
-
-#[cfg(feature = "parquet")]
-#[tokio::test]
-async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
-) -> Result<()> {
-    // Create test data
-    let batch = RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, true),
-        ])),
-        vec![
-            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
-            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
-        ],
-    )?;
-
-    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
-    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
-    let path = "test.parquet";
-    write_parquet(batch.clone(), store.clone(), path).await;
-
-    // Get the actual file size from the object store
-    let object_meta = store.head(&Path::from(path)).await?;
-    let file_size = object_meta.size;
-
-    // Create a session context and register the object store
-    let ctx = SessionContext::new();
-    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
-
-    // Create a ParquetSource with the adapter factory
-    let file_source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
-
-    let config = FileScanConfigBuilder::new(store_url, batch.schema(), file_source)
-        .with_file(PartitionedFile::new(path, file_size))
-        .build();
-
-    // Create a data source executor
-    let exec = DataSourceExec::from_data_source(config);
-
-    // Collect results
-    let task_ctx = ctx.task_ctx();
-    let stream = exec.execute(0, task_ctx)?;
-    let batches = datafusion::physical_plan::common::collect(stream).await?;
-
-    // There should be one batch
-    assert_eq!(batches.len(), 1);
-
-    // Verify the schema has the original column names (schema adapter not applied in DataSourceExec)
-    let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "id");
-    assert_eq!(result_schema.field(1).name(), "name");
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
-    // This test verifies that the same schema adapter factory can be reused
-    // across different file source types. This is important for ensuring that:
-    // 1. The schema adapter factory interface works uniformly across all source types
-    // 2. The factory can be shared and cloned efficiently using Arc
-    // 3. Various data source implementations correctly implement the schema adapter factory pattern
-
-    // Create a test factory
-    let factory = Arc::new(UppercaseAdapterFactory {});
-
-    // Test ArrowSource
-    {
-        let source = ArrowSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            format!("{:?}", retrieved_factory.as_ref()),
-            format!("{:?}", factory.as_ref())
-        );
-    }
-
-    // Test ParquetSource
-    #[cfg(feature = "parquet")]
-    {
-        let source = ParquetSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            format!("{:?}", retrieved_factory.as_ref()),
-            format!("{:?}", factory.as_ref())
-        );
-    }
-
-    // Test CsvSource
-    {
-        let source = CsvSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            format!("{:?}", retrieved_factory.as_ref()),
-            format!("{:?}", factory.as_ref())
-        );
-    }
-
-    // Test JsonSource
-    {
-        let source = JsonSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            format!("{:?}", retrieved_factory.as_ref()),
-            format!("{:?}", factory.as_ref())
-        );
-    }
-
-    Ok(())
-}
diff --git a/datafusion/core/tests/schema_adapter_integration.rs b/datafusion/core/tests/schema_adapter_integration.rs
new file mode 100644
index 000000000000..0ddfa059eb9a
--- /dev/null
+++ b/datafusion/core/tests/schema_adapter_integration.rs
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Run all tests that are found in the `integration_tests/schema_adapter` directory
+mod integration_tests {
+    pub mod schema_adapter;
+}

From ad5e92b5a31aab5560a5236b68565d21d4de384a Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Sun, 27 Jul 2025 16:17:37 +0800
Subject: [PATCH 36/41] chore: update license header in schema adapter
 integration tests

---
 .../schema_adapter_integration_tests.rs       | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs b/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
index 4904e30aceb1..abe0d46bb4c4 100644
--- a/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
+++ b/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
@@ -1,6 +1,19 @@
-// ----------------------------------------------------------------------
-// Tests migrated from schema_adaptation/schema_adapter_integration_tests.rs
-// ----------------------------------------------------------------------
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
 
 /// A schema adapter factory that transforms column names to uppercase
 #[derive(Debug, PartialEq)]

From 1fa04c240c44924d13ae8f2651786d575cc2d244 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Sun, 27 Jul 2025 17:03:26 +0800
Subject: [PATCH 37/41] Add integration tests for schema adapter

- Moved existing schema adapter integration tests from `schema_adaptation/schema_adapter_integration_tests.rs` to a new module in `datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs`.
- Created a new file `schema_adapter.rs` in the integration tests folder to run and organize the tests under the schema adapter directory.
- The tests validate the functionality of a schema adapter that transforms column names to uppercase, ensuring compatibility across different file sources.
- Ensured proper organization of tests for future maintainability and clearer directory structure.
---
 .../schema_adapter_integration_tests.rs       | 52 +++++++++++++------
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs b/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
index abe0d46bb4c4..ce5363b0e16f 100644
--- a/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
+++ b/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
@@ -1,19 +1,39 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
+// ----------------------------------------------------------------------
+// Tests migrated from schema_adaptation/schema_adapter_integration_tests.rs
+// ----------------------------------------------------------------------
+
+use std::sync::Arc;
+
+use arrow::array::RecordBatch;
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use bytes::{BufMut, BytesMut};
+use datafusion::common::Result;
+use datafusion::datasource::listing::PartitionedFile;
+use datafusion::datasource::physical_plan::{
+    ArrowSource, CsvSource, FileSource, JsonSource, ParquetSource,
+};
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::prelude::SessionContext;
+use datafusion_common::ColumnStatistics;
+use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
+use datafusion_datasource::schema_adapter::{
+    SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
+};
+use datafusion_datasource::source::DataSourceExec;
+use datafusion_execution::object_store::ObjectStoreUrl;
+use object_store::{memory::InMemory, path::Path, ObjectStore};
+use parquet::arrow::ArrowWriter;
+
+async fn write_parquet(batch: RecordBatch, store: Arc<dyn ObjectStore>, path: &str) {
+    let mut out = BytesMut::new().writer();
+    {
+        let mut writer = ArrowWriter::try_new(&mut out, batch.schema(), None).unwrap();
+        writer.write(&batch).unwrap();
+        writer.finish().unwrap();
+    }
+    let data = out.into_inner().freeze();
+    store.put(&Path::from(path), data.into()).await.unwrap();
+}
 
 /// A schema adapter factory that transforms column names to uppercase
 #[derive(Debug, PartialEq)]

From 85e29beae3c1a2db3e9fc1eefa0e3c9b2dc26f76 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Sun, 27 Jul 2025 17:11:23 +0800
Subject: [PATCH 38/41] Add Apache License header to schema adapter integration
 tests file

---
 .../schema_adapter_integration_tests.rs       | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs b/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
index ce5363b0e16f..c3c92a9028d6 100644
--- a/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
+++ b/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
@@ -1,6 +1,19 @@
-// ----------------------------------------------------------------------
-// Tests migrated from schema_adaptation/schema_adapter_integration_tests.rs
-// ----------------------------------------------------------------------
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
 
 use std::sync::Arc;
 

From 67213f4882a982efd7afee11ee910619320a1e32 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Sun, 27 Jul 2025 17:16:05 +0800
Subject: [PATCH 39/41] chore: add Apache License header to schema adapter
 integration tests file

---
 .../integration_tests/schema_adapter/mod.rs     | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/datafusion/core/tests/integration_tests/schema_adapter/mod.rs b/datafusion/core/tests/integration_tests/schema_adapter/mod.rs
index 68ea355b5d9d..2f81a43f4736 100644
--- a/datafusion/core/tests/integration_tests/schema_adapter/mod.rs
+++ b/datafusion/core/tests/integration_tests/schema_adapter/mod.rs
@@ -1 +1,18 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
 mod schema_adapter_integration_tests;

From 112f8b6418abbbe32e4d9869c51c717a0835fabc Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 27 Jul 2025 06:22:15 -0400
Subject: [PATCH 40/41] Clippy

---
 datafusion/core/tests/parquet/schema_adapter.rs | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 4a30e1b812ce..f9a46f2e240f 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -23,20 +23,14 @@ use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef};
 use bytes::{BufMut, BytesMut};
 use datafusion::assert_batches_eq;
 use datafusion::common::Result;
-use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::listing::{ListingTable, ListingTableConfig};
-use datafusion::datasource::physical_plan::{
-    ArrowSource, CsvSource, FileSource, JsonSource, ParquetSource,
-};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::DataFusionError;
 use datafusion_common::{ColumnStatistics, ScalarValue};
-use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::schema_adapter::{
     DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
 };
-use datafusion_datasource::source::DataSourceExec;
 use datafusion_datasource::ListingTableUrl;
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_physical_expr::expressions::{self, Column};
@@ -44,7 +38,6 @@ use datafusion_physical_expr::schema_rewriter::{
     DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter, PhysicalExprAdapterFactory,
 };
 use datafusion_physical_expr::{DefaultPhysicalExprAdapter, PhysicalExpr};
-use datafusion_physical_plan::ExecutionPlan;
 use itertools::Itertools;
 use object_store::{memory::InMemory, path::Path, ObjectStore};
 use parquet::arrow::ArrowWriter;

From 74d8a6d548005e8d00b74a43d512298b3dbbc7c9 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 27 Jul 2025 06:45:15 -0400
Subject: [PATCH 41/41] Move schema adapter tests to the `core_integration`
 binary

---
 datafusion/core/tests/core_integration.rs     |  3 +++
 .../schema_adapter/mod.rs                     |  0
 .../schema_adapter_integration_tests.rs       |  0
 .../core/tests/schema_adapter_integration.rs  | 21 -------------------
 4 files changed, 3 insertions(+), 21 deletions(-)
 rename datafusion/core/tests/{integration_tests => }/schema_adapter/mod.rs (100%)
 rename datafusion/core/tests/{integration_tests => }/schema_adapter/schema_adapter_integration_tests.rs (100%)
 delete mode 100644 datafusion/core/tests/schema_adapter_integration.rs

diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs
index 250538b13370..e37a368f0771 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -45,6 +45,9 @@ mod optimizer;
 /// Run all tests that are found in the `physical_optimizer` directory
 mod physical_optimizer;
 
+/// Run all tests that are found in the `schema_adapter` directory
+mod schema_adapter;
+
 /// Run all tests that are found in the `serde` directory
 mod serde;
 
diff --git a/datafusion/core/tests/integration_tests/schema_adapter/mod.rs b/datafusion/core/tests/schema_adapter/mod.rs
similarity index 100%
rename from datafusion/core/tests/integration_tests/schema_adapter/mod.rs
rename to datafusion/core/tests/schema_adapter/mod.rs
diff --git a/datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs b/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs
similarity index 100%
rename from datafusion/core/tests/integration_tests/schema_adapter/schema_adapter_integration_tests.rs
rename to datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs
diff --git a/datafusion/core/tests/schema_adapter_integration.rs b/datafusion/core/tests/schema_adapter_integration.rs
deleted file mode 100644
index 0ddfa059eb9a..000000000000
--- a/datafusion/core/tests/schema_adapter_integration.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// Run all tests that are found in the `integration_tests/schema_adapter` directory
-mod integration_tests {
-    pub mod schema_adapter;
-}