diff --git a/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.cpp index 9a787838037ef..303ee763d4a60 100644 --- a/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.cpp @@ -16,6 +16,7 @@ #include "presto_cpp/main/connectors/PrestoToVeloxConnectorUtils.h" #include "presto_cpp/presto_protocol/connector/iceberg/IcebergConnectorProtocol.h" +#include "velox/connectors/hive/iceberg/IcebergDataSink.h" #include "velox/connectors/hive/iceberg/IcebergSplit.h" #include "velox/type/fbhive/HiveTypeParser.h" @@ -274,4 +275,74 @@ IcebergPrestoToVeloxConnector::createConnectorProtocol() const { return std::make_unique(); } +std::unique_ptr +IcebergPrestoToVeloxConnector::toVeloxInsertTableHandle( + const protocol::CreateHandle* createHandle, + const TypeParser& typeParser) const { + auto icebergOutputTableHandle = + std::dynamic_pointer_cast( + createHandle->handle.connectorHandle); + + VELOX_CHECK_NOT_NULL( + icebergOutputTableHandle, + "Unexpected output table handle type {}", + createHandle->handle.connectorHandle->_type); + + const auto inputColumns = + toHiveColumns(icebergOutputTableHandle->inputColumns, typeParser); + + return std::make_unique< + velox::connector::hive::iceberg::IcebergInsertTableHandle>( + inputColumns, + std::make_shared( + fmt::format("{}/data", icebergOutputTableHandle->outputPath), + fmt::format("{}/data", icebergOutputTableHandle->outputPath), + velox::connector::hive::LocationHandle::TableType::kNew), + toVeloxFileFormat(icebergOutputTableHandle->fileFormat), + std::optional( + toFileCompressionKind(icebergOutputTableHandle->compressionCodec))); +} + +std::unique_ptr +IcebergPrestoToVeloxConnector::toVeloxInsertTableHandle( + const protocol::InsertHandle* insertHandle, + const TypeParser& typeParser) const { + auto icebergInsertTableHandle = + std::dynamic_pointer_cast( + insertHandle->handle.connectorHandle); + + VELOX_CHECK_NOT_NULL( + icebergInsertTableHandle, + "Unexpected insert table handle type {}", + insertHandle->handle.connectorHandle->_type); + + const auto inputColumns = + toHiveColumns(icebergInsertTableHandle->inputColumns, typeParser); + + return std::make_unique< + velox::connector::hive::iceberg::IcebergInsertTableHandle>( + inputColumns, + std::make_shared( + fmt::format("{}/data", icebergInsertTableHandle->outputPath), + fmt::format("{}/data", icebergInsertTableHandle->outputPath), + velox::connector::hive::LocationHandle::TableType::kExisting), + toVeloxFileFormat(icebergInsertTableHandle->fileFormat), + std::optional( + toFileCompressionKind(icebergInsertTableHandle->compressionCodec))); +} + +std::vector +IcebergPrestoToVeloxConnector::toHiveColumns( + const protocol::List& inputColumns, + const TypeParser& typeParser) const { + std::vector hiveColumns; + hiveColumns.reserve(inputColumns.size()); + for (const auto& columnHandle : inputColumns) { + hiveColumns.emplace_back( + std::dynamic_pointer_cast( + std::shared_ptr(toVeloxColumnHandle(&columnHandle, typeParser)))); + } + return hiveColumns; +} + } // namespace facebook::presto diff --git a/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.h b/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.h index b4f48d035cd18..c6b6b8850fa53 100644 --- a/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.h +++ b/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.h @@ -15,6 +15,7 @@ #pragma once #include "presto_cpp/main/connectors/PrestoToVeloxConnector.h" +#include "presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.h" namespace facebook::presto { @@ -40,6 +41,22 @@ class IcebergPrestoToVeloxConnector final : public PrestoToVeloxConnector { std::unique_ptr createConnectorProtocol() const final; + + std::unique_ptr + toVeloxInsertTableHandle( + const protocol::CreateHandle* createHandle, + const TypeParser& typeParser) const final; + + std::unique_ptr + toVeloxInsertTableHandle( + const protocol::InsertHandle* insertHandle, + const TypeParser& typeParser) const final; + + private: + std::vector toHiveColumns( + const protocol::List& + inputColumns, + const TypeParser& typeParser) const; }; } // namespace facebook::presto diff --git a/presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.cpp index 55540a703533b..fe203a47d0374 100644 --- a/presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.cpp @@ -150,26 +150,6 @@ dwio::common::FileFormat toFileFormat( } } -velox::common::CompressionKind toFileCompressionKind( - const protocol::hive::HiveCompressionCodec& hiveCompressionCodec) { - switch (hiveCompressionCodec) { - case protocol::hive::HiveCompressionCodec::SNAPPY: - return velox::common::CompressionKind::CompressionKind_SNAPPY; - case protocol::hive::HiveCompressionCodec::GZIP: - return velox::common::CompressionKind::CompressionKind_GZIP; - case protocol::hive::HiveCompressionCodec::LZ4: - return velox::common::CompressionKind::CompressionKind_LZ4; - case protocol::hive::HiveCompressionCodec::ZSTD: - return velox::common::CompressionKind::CompressionKind_ZSTD; - case protocol::hive::HiveCompressionCodec::NONE: - return velox::common::CompressionKind::CompressionKind_NONE; - default: - VELOX_UNSUPPORTED( - "Unsupported file compression format: {}.", - toJsonString(hiveCompressionCodec)); - } -} - velox::connector::hive::HiveBucketProperty::Kind toHiveBucketPropertyKind( protocol::hive::BucketFunctionType bucketFuncType) { switch (bucketFuncType) { @@ -422,6 +402,26 @@ std::unique_ptr toHiveTableHandle( finalTableParameters); } +velox::common::CompressionKind toFileCompressionKind( + const protocol::hive::HiveCompressionCodec& hiveCompressionCodec) { + switch (hiveCompressionCodec) { + case protocol::hive::HiveCompressionCodec::SNAPPY: + return velox::common::CompressionKind::CompressionKind_SNAPPY; + case protocol::hive::HiveCompressionCodec::GZIP: + return velox::common::CompressionKind::CompressionKind_GZIP; + case protocol::hive::HiveCompressionCodec::LZ4: + return velox::common::CompressionKind::CompressionKind_LZ4; + case protocol::hive::HiveCompressionCodec::ZSTD: + return velox::common::CompressionKind::CompressionKind_ZSTD; + case protocol::hive::HiveCompressionCodec::NONE: + return velox::common::CompressionKind::CompressionKind_NONE; + default: + VELOX_UNSUPPORTED( + "Unsupported file compression format: {}.", + toJsonString(hiveCompressionCodec)); + } +} + std::unique_ptr HivePrestoToVeloxConnector::toVeloxSplit( const protocol::ConnectorId& catalogId, diff --git a/presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.h b/presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.h index 0e86b87a241fe..a611a11f100f9 100644 --- a/presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.h +++ b/presto-native-execution/presto_cpp/main/connectors/PrestoToVeloxConnector.h @@ -52,6 +52,9 @@ std::unique_ptr toHiveTableHandle( const VeloxExprConverter& exprConverter, const TypeParser& typeParser); +velox::common::CompressionKind toFileCompressionKind( + const protocol::hive::HiveCompressionCodec& hiveCompressionCodec); + class PrestoToVeloxConnector { public: virtual ~PrestoToVeloxConnector() = default; diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/PrestoNativeQueryRunnerUtils.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/PrestoNativeQueryRunnerUtils.java index acfb526e4be98..b5d35dc75624c 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/PrestoNativeQueryRunnerUtils.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/PrestoNativeQueryRunnerUtils.java @@ -84,7 +84,7 @@ public enum QueryRunnerType public static final String REMOTE_FUNCTION_CATALOG_NAME = "remote"; public static final String HIVE_DATA = "hive_data"; - protected static final String ICEBERG_DEFAULT_STORAGE_FORMAT = "PARQUET"; + public static final String ICEBERG_DEFAULT_STORAGE_FORMAT = "PARQUET"; private static final Logger log = Logger.get(PrestoNativeQueryRunnerUtils.class); private static final String DEFAULT_STORAGE_FORMAT = "DWRF"; diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestCreateTable.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestCreateTable.java new file mode 100644 index 0000000000000..82b3d8ccf55b8 --- /dev/null +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestCreateTable.java @@ -0,0 +1,220 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.nativeworker.iceberg; + +import com.facebook.presto.testing.ExpectedQueryRunner; +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import org.testng.annotations.Test; + +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.ICEBERG_DEFAULT_STORAGE_FORMAT; +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.javaIcebergQueryRunnerBuilder; +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.nativeIcebergQueryRunnerBuilder; + +public class TestCreateTable + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return nativeIcebergQueryRunnerBuilder() + .setStorageFormat(ICEBERG_DEFAULT_STORAGE_FORMAT) + .setAddStorageFormatToPath(false) + .build(); + } + + @Override + protected ExpectedQueryRunner createExpectedQueryRunner() + throws Exception + { + return javaIcebergQueryRunnerBuilder() + .setStorageFormat(ICEBERG_DEFAULT_STORAGE_FORMAT) + .setAddStorageFormatToPath(false) + .build(); + } + + @Test + public void testCreateSimpleTable() + { + String tableName = "simple"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id BIGINT, data VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '0')"); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'test')", tableName), 1); + assertQuery(String.format("SELECT id, data FROM %s", tableName), "VALUES (BIGINT '1', 'test')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testAllPrimitiveTypes() + { + String tableName = "primitive_types"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "col_boolean BOOLEAN, " + + "col_integer INTEGER, " + + "col_bigint BIGINT, " + + "col_real REAL, " + + "col_double DOUBLE, " + + "col_decimal DECIMAL(10, 2), " + + "col_varchar VARCHAR, " + + "col_varbinary VARBINARY, " + + "col_date DATE, " + + "col_timestamp TIMESTAMP" + + ") WITH (format = 'PARQUET')", tableName)); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '0')"); + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "true, 123, 456789, REAL '1.23', 4.56, DECIMAL '78.90', 'text', X'ABCD', DATE '2024-01-01', TIMESTAMP '2024-01-01 12:00:00')", tableName), 1); + assertQuery(String.format("SELECT col_boolean, col_integer, col_varchar FROM %s", tableName), + "VALUES (true, 123, 'text')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testComplexTypes() + { + String tableName = "complex_types"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "int_array ARRAY(INTEGER), " + + "string_map MAP(VARCHAR, INTEGER), " + + "person ROW(name VARCHAR, age INTEGER)" + + ") WITH (format = 'PARQUET')", tableName)); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '0')"); + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "1, ARRAY[1, 2, 3], MAP(ARRAY['a', 'b'], ARRAY[10, 20]), ROW('Alice', 30))", tableName), 1); + assertQuery(String.format("SELECT id, int_array FROM %s", tableName), + "VALUES (1, ARRAY[1, 2, 3])"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testAsSelect() + { + String sourceTable = "source"; + String targetTable = "target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id BIGINT, name VARCHAR, value DOUBLE) WITH (format = 'PARQUET')", sourceTable)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'first', 1.1), (2, 'second', 2.2), (3, 'third', 3.3)", sourceTable), 3); + assertUpdate(String.format("CREATE TABLE %s WITH (format = 'PARQUET') AS SELECT * FROM %s", targetTable, sourceTable), 3); + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '3')"); + assertQuery(String.format("SELECT id, name, value FROM %s ORDER BY id", targetTable), + "VALUES (BIGINT '1', 'first', DOUBLE '1.1'), (BIGINT '2', 'second', DOUBLE '2.2'), (BIGINT '3', 'third', DOUBLE '3.3')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", sourceTable)); + } + } + + @Test + public void testAsSelectWithFilter() + { + String sourceTable = "filter_source"; + String targetTable = "filter_target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, category VARCHAR, amount DOUBLE) WITH (format = 'PARQUET')", sourceTable)); + assertUpdate(String.format("INSERT INTO %s VALUES " + + "(1, 'A', 100.0), (2, 'B', 200.0), (3, 'A', 150.0), (4, 'B', 250.0), (5, 'A', 175.0)", sourceTable), 5); + assertUpdate(String.format("CREATE TABLE %s WITH (format = 'PARQUET') AS " + + "SELECT * FROM %s WHERE category = 'A'", targetTable, sourceTable), 3); + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '3')"); + assertQuery(String.format("SELECT id, amount FROM %s ORDER BY id", targetTable), + "VALUES (1, DOUBLE '100.0'), (3, DOUBLE '150.0'), (5, DOUBLE '175.0')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", sourceTable)); + } + } + + @Test + public void testAsSelectWithAggregation() + { + String sourceTable = "agg_source"; + String targetTable = "agg_target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (category VARCHAR, amount DOUBLE) WITH (format = 'PARQUET')", sourceTable)); + assertUpdate(String.format("INSERT INTO %s VALUES " + + "('A', 100.0), ('B', 200.0), ('A', 150.0), ('B', 250.0), ('A', 175.0)", sourceTable), 5); + assertUpdate(String.format("CREATE TABLE %s WITH (format = 'PARQUET') AS " + + "SELECT category, SUM(amount) as total_amount FROM %s GROUP BY category", targetTable, sourceTable), 2); + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '2')"); + assertQuery(String.format("SELECT * FROM %s ORDER BY category", targetTable)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", sourceTable)); + } + } + + @Test + public void testAsSelectWithColumnRename() + { + String sourceTable = "rename_source"; + String targetTable = "rename_target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, old_name VARCHAR) WITH (format = 'PARQUET')", sourceTable)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'value1'), (2, 'value2')", sourceTable), 2); + assertUpdate(String.format("CREATE TABLE %s WITH (format = 'PARQUET') AS " + + "SELECT id, old_name as new_name FROM %s", targetTable, sourceTable), 2); + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '2')"); + assertQuery(String.format("SELECT id, new_name FROM %s ORDER BY id", targetTable), + "VALUES (1, 'value1'), (2, 'value2')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", sourceTable)); + } + } + + @Test + public void testAsSelectEmpty() + { + String sourceTable = "empty_source"; + String targetTable = "empty_target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR) WITH (format = 'PARQUET')", sourceTable)); + assertUpdate(String.format("CREATE TABLE %s WITH (format = 'PARQUET') AS SELECT * FROM %s", targetTable, sourceTable), 0); + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '0')"); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'test')", targetTable), 1); + assertQuery(String.format("SELECT id, data FROM %s", targetTable), "VALUES (1, 'test')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", sourceTable)); + } + } +} diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestInsertFromTpch.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestInsertFromTpch.java new file mode 100644 index 0000000000000..7feb7303cc11e --- /dev/null +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestInsertFromTpch.java @@ -0,0 +1,291 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.nativeworker.iceberg; + +import com.facebook.presto.nativeworker.NativeQueryRunnerUtils; +import com.facebook.presto.testing.ExpectedQueryRunner; +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import org.testng.annotations.Test; + +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.ICEBERG_DEFAULT_STORAGE_FORMAT; +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.javaIcebergQueryRunnerBuilder; +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.nativeIcebergQueryRunnerBuilder; + +public class TestInsertFromTpch + extends AbstractTestQueryFramework +{ + private static final String TPCH_SCHEMA = "iceberg.tpch"; + + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return nativeIcebergQueryRunnerBuilder() + .setStorageFormat(ICEBERG_DEFAULT_STORAGE_FORMAT) + .setAddStorageFormatToPath(false) + .build(); + } + + @Override + protected ExpectedQueryRunner createExpectedQueryRunner() + throws Exception + { + QueryRunner javaQueryRunner = javaIcebergQueryRunnerBuilder() + .setStorageFormat(ICEBERG_DEFAULT_STORAGE_FORMAT) + .setAddStorageFormatToPath(false) + .build(); + NativeQueryRunnerUtils.createAllIcebergTables(javaQueryRunner); + return javaQueryRunner; + } + + @Test + public void testInsertFromTpchNation() + { + String targetTable = "iceberg_nation"; + int iterations = 10; + String sourceTable = String.format("%s.%s", TPCH_SCHEMA, "nation"); + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "nationkey BIGINT, " + + "name VARCHAR, " + + "regionkey BIGINT, " + + "comment VARCHAR" + + ") WITH (format = 'PARQUET')", targetTable)); + + long rowCount = (Long) computeActual(String.format("SELECT COUNT(*) FROM %s", sourceTable)).getOnlyValue(); + for (int i = 1; i <= iterations; i++) { + assertUpdate(String.format("INSERT INTO %s SELECT * FROM %s", targetTable, sourceTable), rowCount); + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), + String.format("VALUES (BIGINT '%d')", rowCount * i)); + } + + assertQuery(String.format("SELECT nationkey, COUNT(*) as cnt FROM %s GROUP BY nationkey ORDER BY nationkey", targetTable), + String.format("SELECT nationkey, BIGINT '10' as cnt FROM %s ORDER BY nationkey", sourceTable)); + + assertQuery(String.format("SELECT DISTINCT nationkey, name, comment FROM %s ORDER BY nationkey", targetTable), + String.format("SELECT nationkey, name, comment FROM %s ORDER BY nationkey", sourceTable)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + } + } + + @Test + public void testInsertFromTpchRegion() + { + String targetTable = "iceberg_region"; + int iterations = 10; + String sourceTable = String.format("%s.%s", TPCH_SCHEMA, "region"); + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "regionkey BIGINT, " + + "name VARCHAR, " + + "comment VARCHAR" + + ") WITH (format = 'PARQUET')", targetTable)); + + // First insert: filter that returns 0 rows (edge case) + assertUpdate(String.format("INSERT INTO %s SELECT * FROM %s WHERE regionkey > 999", targetTable, sourceTable), 0); + + // Verify table is still empty after 0-row insert + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '0')"); + + // Multiple inserts: insert all regions 10 times + long rowCount = (Long) computeActual(String.format("SELECT COUNT(*) FROM %s", sourceTable)).getOnlyValue(); + for (int i = 1; i <= iterations; i++) { + assertUpdate(String.format("INSERT INTO %s SELECT * FROM %s", targetTable, sourceTable), rowCount); + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), + String.format("VALUES (BIGINT '%d')", rowCount * i)); + } + + assertQuery(String.format("SELECT regionkey, COUNT(*) as cnt FROM %s GROUP BY regionkey ORDER BY regionkey", targetTable), + String.format("SELECT regionkey, BIGINT '10' as cnt FROM %s ORDER BY regionkey", sourceTable)); + + assertQuery(String.format("SELECT DISTINCT regionkey, name, comment FROM %s ORDER BY regionkey", targetTable), + String.format("SELECT regionkey, name, comment FROM %s ORDER BY regionkey", sourceTable)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + } + } + + @Test + public void testInsertFromTpchCustomerWithFilter() + { + String targetTable = "iceberg_customer_filtered"; + String sourceTable = String.format("%s.%s", TPCH_SCHEMA, "customer"); + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "custkey BIGINT, " + + "name VARCHAR, " + + "nationkey BIGINT, " + + "acctbal DOUBLE" + + ") WITH (format = 'PARQUET')", targetTable)); + + // filter that returns 0 rows + assertUpdate(String.format("INSERT INTO %s " + + "SELECT custkey, name, nationkey, acctbal " + + "FROM %s " + + "WHERE acctbal > 999999999", targetTable, sourceTable), 0); + + // Verify table is still empty after 0-row insert + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '0')"); + + long rowCount = (Long) computeActual(String.format("SELECT COUNT(*) FROM %s WHERE acctbal > 0", sourceTable)).getOnlyValue(); + assertUpdate(String.format("INSERT INTO %s " + + "SELECT custkey, name, nationkey, acctbal " + + "FROM %s " + + "WHERE acctbal > 0", targetTable, sourceTable), rowCount); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), + String.format("SELECT COUNT(*) FROM %s WHERE acctbal > 0", sourceTable)); + + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE acctbal <= 0", targetTable), "VALUES (BIGINT '0')"); + + assertQuery(String.format("SELECT * FROM %s ORDER BY custkey LIMIT 5", targetTable), + String.format("SELECT custkey, name, nationkey, acctbal FROM %s WHERE acctbal > 0 ORDER BY custkey LIMIT 5", sourceTable)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + } + } + + @Test + public void testInsertFromTpchOrdersWithProjection() + { + String targetTable = "iceberg_orders_summary"; + String sourceTable = String.format("%s.%s", TPCH_SCHEMA, "orders"); + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "orderkey BIGINT, " + + "custkey BIGINT, " + + "orderstatus VARCHAR, " + + "totalprice DOUBLE, " + + "orderdate VARCHAR, " + + "orderyear VARCHAR" + + ") WITH (format = 'PARQUET')", targetTable)); + + long rowCount = (Long) computeActual(String.format("SELECT COUNT(*) FROM %s WHERE totalprice > 100000", sourceTable)).getOnlyValue(); + assertUpdate(String.format("INSERT INTO %s " + + "SELECT orderkey, custkey, orderstatus, totalprice, orderdate, orderdate " + + "FROM %s " + + "WHERE totalprice > 100000", targetTable, sourceTable), rowCount); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), + String.format("SELECT COUNT(*) FROM %s WHERE totalprice > 100000", sourceTable)); + + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE totalprice <= 100000", targetTable), "VALUES (BIGINT '0')"); + + assertQuery(String.format("SELECT DISTINCT orderyear FROM %s ORDER BY orderyear", targetTable), + String.format("SELECT DISTINCT orderdate FROM %s WHERE totalprice > 100000 ORDER BY orderdate", sourceTable)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + } + } + + @Test + public void testInsertFromTpchLineitemWithAggregation() + { + String targetTable = "iceberg_lineitem_summary"; + String sourceTable = String.format("%s.%s", TPCH_SCHEMA, "lineitem"); + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "orderkey BIGINT, " + + "total_quantity DOUBLE, " + + "total_price DOUBLE, " + + "avg_discount DOUBLE, " + + "item_count BIGINT" + + ") WITH (format = 'PARQUET')", targetTable)); + + long rowCount = (Long) computeActual(String.format("SELECT COUNT(DISTINCT orderkey) FROM %s", sourceTable)).getOnlyValue(); + assertUpdate(String.format("INSERT INTO %s " + + "SELECT " + + " orderkey, " + + " SUM(quantity) as total_quantity, " + + " SUM(extendedprice) as total_price, " + + " AVG(discount) as avg_discount, " + + " COUNT(*) as item_count " + + "FROM %s " + + "GROUP BY orderkey", targetTable, sourceTable), rowCount); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), + String.format("SELECT COUNT(DISTINCT orderkey) FROM %s", sourceTable)); + + assertQuery(String.format("SELECT total_quantity, item_count FROM %s WHERE orderkey = 1", targetTable), + String.format("SELECT SUM(quantity), COUNT(*) FROM %s WHERE orderkey = 1", sourceTable)); + + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE item_count <= 0", targetTable), "VALUES (BIGINT '0')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + } + } + + @Test + public void testInsertFromTpchJoin() + { + String targetTable = "iceberg_customer_nation"; + String sourceCustomer = String.format("%s.%s", TPCH_SCHEMA, "customer"); + String sourceNation = String.format("%s.%s", TPCH_SCHEMA, "nation"); + String sourceRegion = String.format("%s.%s", TPCH_SCHEMA, "region"); + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "custkey BIGINT, " + + "customer_name VARCHAR, " + + "nation_name VARCHAR, " + + "region_name VARCHAR, " + + "acctbal DOUBLE" + + ") WITH (format = 'PARQUET')", targetTable)); + + long rowCount = (Long) computeActual(String.format("SELECT COUNT(*) " + + "FROM %s c " + + "JOIN %s n ON c.nationkey = n.nationkey " + + "JOIN %s r ON n.regionkey = r.regionkey", sourceCustomer, sourceNation, sourceRegion)).getOnlyValue(); + assertUpdate(String.format("INSERT INTO %s " + + "SELECT " + + " c.custkey, " + + " c.name as customer_name, " + + " n.name as nation_name, " + + " r.name as region_name, " + + " c.acctbal " + + "FROM %s c " + + "JOIN %s n ON c.nationkey = n.nationkey " + + "JOIN %s r ON n.regionkey = r.regionkey", targetTable, sourceCustomer, sourceNation, sourceRegion), rowCount); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), + String.format("SELECT COUNT(*) " + + "FROM %s c " + + "JOIN %s n ON c.nationkey = n.nationkey " + + "JOIN %s r ON n.regionkey = r.regionkey", sourceCustomer, sourceNation, sourceRegion)); + + assertQuery(String.format("SELECT customer_name, nation_name, region_name FROM %s WHERE custkey = 1", targetTable), + String.format("SELECT c.name, n.name, r.name " + + "FROM %s c " + + "JOIN %s n ON c.nationkey = n.nationkey " + + "JOIN %s r ON n.regionkey = r.regionkey " + + "WHERE c.custkey = 1", sourceCustomer, sourceNation, sourceRegion)); + + assertQuery(String.format("SELECT DISTINCT region_name FROM %s ORDER BY region_name", targetTable), + String.format("SELECT DISTINCT r.name FROM %s r ORDER BY r.name", sourceRegion)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + } + } +} diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestMetadata.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestMetadata.java new file mode 100644 index 0000000000000..5e0a5001adab8 --- /dev/null +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestMetadata.java @@ -0,0 +1,600 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.nativeworker.iceberg; + +import com.facebook.presto.testing.ExpectedQueryRunner; +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import org.testng.annotations.Test; + +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.ICEBERG_DEFAULT_STORAGE_FORMAT; +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.javaIcebergQueryRunnerBuilder; +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.nativeIcebergQueryRunnerBuilder; + +public class TestMetadata + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return nativeIcebergQueryRunnerBuilder() + .setStorageFormat(ICEBERG_DEFAULT_STORAGE_FORMAT) + .setAddStorageFormatToPath(false) + .build(); + } + + @Override + protected ExpectedQueryRunner createExpectedQueryRunner() + throws Exception + { + return javaIcebergQueryRunnerBuilder() + .setStorageFormat(ICEBERG_DEFAULT_STORAGE_FORMAT) + .setAddStorageFormatToPath(false) + .build(); + } + + @Test + public void testShowTables() + { + String table1 = "metadata_table1"; + String table2 = "metadata_table2"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR) WITH (format = 'PARQUET')", table1)); + assertUpdate(String.format("CREATE TABLE %s (id BIGINT, value DOUBLE) WITH (format = 'PARQUET')", table2)); + assertQuery("SHOW TABLES LIKE 'metadata_table%'"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", table2)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", table1)); + } + } + + @Test + public void testShowColumns() + { + String tableName = "show_columns"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "col_integer INTEGER, " + + "col_bigint BIGINT, " + + "col_varchar VARCHAR, " + + "col_double DOUBLE, " + + "col_boolean BOOLEAN, " + + "col_decimal DECIMAL(20, 2), " + + "col_date DATE, " + + "col_timestamp TIMESTAMP" + + ") WITH (format = 'PARQUET')", tableName)); + + assertQuery(String.format("SHOW COLUMNS FROM %s", tableName)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testDescribeTable() + { + String tableName = "describe_table"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "name VARCHAR, " + + "amount DOUBLE, " + + "active BOOLEAN" + + ") WITH (format = 'PARQUET')", tableName)); + assertQuery(String.format("DESCRIBE %s", tableName)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testTableWithComplexTypesMetadata() + { + String tableName = "complex_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "int_array ARRAY(INTEGER), " + + "string_map MAP(VARCHAR, INTEGER), " + + "person ROW(name VARCHAR, age INTEGER)" + + ") WITH (format = 'PARQUET')", tableName)); + + assertQuery(String.format("SHOW COLUMNS FROM %s", tableName)); + assertQuery(String.format("DESCRIBE %s", tableName)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testColumnsAfterInsertion() + { + String tableName = "columns"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'test1'), (2, 'test2'), (3, 'test3')", tableName), 3); + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '3')"); + assertQuery(String.format("SHOW COLUMNS FROM %s", tableName)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testMetadataAfterMultipleInserts() + { + String tableName = "multiple_inserts_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, category VARCHAR, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'A', 10.0)", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (2, 'B', 20.0)", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (3, 'C', 30.0)", tableName), 1); + + assertQuery(String.format("SHOW COLUMNS FROM %s", tableName)); + assertQuery(String.format("DESCRIBE %s", tableName)); + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '3')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testMetadataWithNullableColumns() + { + String tableName = "nullable_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "nullable_varchar VARCHAR, " + + "nullable_double DOUBLE" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'value1', 1.0), (2, NULL, 2.0), (3, 'value3', NULL)", tableName), 3); + + assertQuery(String.format("SHOW COLUMNS FROM %s", tableName)); + assertQuery(String.format("DESCRIBE %s", tableName)); + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE nullable_varchar IS NULL", tableName), "VALUES (BIGINT '1')"); + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE nullable_double IS NULL", tableName), "VALUES (BIGINT '1')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testMetadataWithAllPrimitiveTypes() + { + String tableName = "all_types_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "col_boolean BOOLEAN, " + + "col_integer INTEGER, " + + "col_bigint BIGINT, " + + "col_real REAL, " + + "col_double DOUBLE, " + + "col_decimal DECIMAL(10, 2), " + + "col_varchar VARCHAR, " + + "col_varbinary VARBINARY, " + + "col_date DATE, " + + "col_timestamp TIMESTAMP" + + ") WITH (format = 'PARQUET')", tableName)); + + assertQuery(String.format("SHOW COLUMNS FROM %s", tableName)); + assertQuery(String.format("DESCRIBE %s", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "true, 123, 456789, REAL '1.23', 4.56, DECIMAL '78.90', 'text', X'ABCD', DATE '2024-01-01', TIMESTAMP '2024-01-01 12:00:00')", tableName), 1); + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '1')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testMetadataAfterTableRecreation() + { + String tableName = "recreate_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'first')", tableName), 1); + + // Drop and recreate with different schema + assertUpdate(String.format("DROP TABLE %s", tableName)); + assertUpdate(String.format("CREATE TABLE %s (id BIGINT, name VARCHAR, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + + // Verify new schema + assertQuery(String.format("SHOW COLUMNS FROM %s", tableName)); + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '0')"); + + // Insert data with new schema + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'test', 1.5)", tableName), 1); + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '1')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testHiddenMetadataColumnPath() + { + String tableName = "hidden_path"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, name VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'Alice'), (2, 'Bob')", tableName), 2); + + // Query the $path hidden column + assertQuery(String.format("SELECT \"$path\", id, name FROM %s ORDER BY id", tableName)); + + // Verify $path column is not null + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE \"$path\" IS NOT NULL", tableName), "VALUES (BIGINT '2')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testHiddenMetadataColumnDataSequenceNumber() + { + String tableName = "hidden_sequence"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 10.0), (2, 20.0)", tableName), 2); + + // Query the $data_sequence_number hidden column + assertQuery(String.format("SELECT \"$data_sequence_number\", id FROM %s ORDER BY id", tableName)); + + // Verify $data_sequence_number column is not null + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE \"$data_sequence_number\" IS NOT NULL", tableName), "VALUES (BIGINT '2')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testHiddenMetadataColumnsWithComplexQuery() + { + String tableName = "hidden_complex"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, category VARCHAR, amount DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'A', 100.0), (2, 'B', 200.0), (3, 'A', 150.0)", tableName), 3); + + // Query multiple hidden columns together with regular columns + assertQuery(String.format("SELECT \"$path\", \"$data_sequence_number\", id, category FROM %s WHERE category = 'A' ORDER BY id", tableName)); + + // Verify both hidden columns are accessible + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE \"$path\" IS NOT NULL AND \"$data_sequence_number\" IS NOT NULL", tableName), + "VALUES (BIGINT '3')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testHiddenMetadataColumnsAfterMultipleInserts() + { + String tableName = "hidden_multi_insert"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'first')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (2, 'second')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (3, 'third')", tableName), 1); + + assertQuery(String.format("SELECT \"$path\", \"$data_sequence_number\", id FROM %s ORDER BY id", tableName)); + // Verify all rows have hidden metadata + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE \"$path\" IS NOT NULL", tableName), "VALUES (BIGINT '3')"); + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE \"$data_sequence_number\" IS NOT NULL", tableName), "VALUES (BIGINT '3')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testHiddenMetadataColumnsWithAggregation() + { + String tableName = "hidden_aggregation"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, category VARCHAR, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'A', 10.0), (2, 'B', 20.0), (3, 'A', 30.0), (4, 'B', 40.0)", tableName), 4); + + // Use hidden columns in aggregation queries + assertQuery(String.format("SELECT category, COUNT(\"$path\") as file_count FROM %s GROUP BY category ORDER BY category", tableName), + "VALUES ('A', BIGINT '2'), ('B', BIGINT '2')"); + assertQuery(String.format("SELECT COUNT(DISTINCT \"$path\") FROM %s", tableName)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testPropertiesMetadataTable() + { + String tableName = "properties_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'test')", tableName), 1); + + assertQuery(String.format("SELECT * FROM \"%s$properties\"", tableName)); + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$properties\"", tableName), "VALUES (true)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testHistoryMetadataTable() + { + String tableName = "history_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 10.0)", tableName), 1); + + assertQuery(String.format("SELECT snapshot_id, is_current_ancestor FROM \"%s$history\"", tableName)); + // Verify history table has at least one entry + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$history\"", tableName), "VALUES (true)"); + // Verify snapshot_id is not null + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$history\" WHERE snapshot_id IS NOT NULL", tableName)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSnapshotsMetadataTable() + { + String tableName = "snapshots_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, name VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'first')", tableName), 1); + + assertQuery(String.format("SELECT snapshot_id, operation FROM \"%s$snapshots\"", tableName)); + // Verify snapshots table has at least one entry + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$snapshots\"", tableName), "VALUES (true)"); + + // Verify snapshot_id is not null + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$snapshots\" WHERE snapshot_id IS NOT NULL", tableName)); + + // Insert more data and verify new snapshot is created + assertUpdate(String.format("INSERT INTO %s VALUES (2, 'second')", tableName), 1); + assertQuery(String.format("SELECT COUNT(*) >= 2 FROM \"%s$snapshots\"", tableName), "VALUES (true)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testManifestsMetadataTable() + { + String tableName = "manifests_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'test')", tableName), 1); + + assertQuery(String.format("SELECT path, partition_spec_id, added_snapshot_id FROM \"%s$manifests\"", tableName)); + + // Verify manifests table has at least one entry + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$manifests\"", tableName), "VALUES (true)"); + + // Verify added_snapshot_id is not null + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$manifests\" WHERE added_snapshot_id IS NOT NULL", tableName)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testPartitionsMetadataTable() + { + String tableName = "partitions_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, category VARCHAR, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'A', 10.0), (2, 'B', 20.0)", tableName), 2); + + assertQuery(String.format("SELECT row_count, file_count, total_size FROM \"%s$partitions\"", tableName)); + + // Verify partitions table has data + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$partitions\"", tableName), "VALUES (true)"); + + // Verify row_count matches inserted rows + assertQuery(String.format("SELECT SUM(row_count) FROM \"%s$partitions\"", tableName), "VALUES (BIGINT '2')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testFilesMetadataTable() + { + String tableName = "files_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, name VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'Alice'), (2, 'Bob')", tableName), 2); + + assertQuery(String.format("SELECT content, file_format, record_count, file_size_in_bytes FROM \"%s$files\"", tableName)); + + // Verify files table has at least one entry + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$files\"", tableName), "VALUES (true)"); + + // Verify file_format is PARQUET + assertQuery(String.format("SELECT DISTINCT file_format FROM \"%s$files\"", tableName), "VALUES ('PARQUET')"); + + // Verify file_path is not null + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$files\" WHERE file_path IS NOT NULL", tableName)); + + // Verify total record count matches inserted rows + assertQuery(String.format("SELECT SUM(record_count) FROM \"%s$files\"", tableName), "VALUES (BIGINT '2')"); + + // Verify file_size_in_bytes is positive + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$files\" WHERE file_size_in_bytes > 0", tableName)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testRefsMetadataTable() + { + String tableName = "refs_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'test')", tableName), 1); + + assertQuery(String.format("SELECT name, type, snapshot_id FROM \"%s$refs\"", tableName)); + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$refs\"", tableName), "VALUES (true)"); + + // Verify main branch exists + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$refs\" WHERE name = 'main'", tableName)); + + // Verify snapshot_id is not null for main branch + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$refs\" WHERE name = 'main' AND snapshot_id IS NOT NULL", tableName)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testWithMultipleSnapshots() + { + String tableName = "multi_snapshot"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + + // Create multiple snapshots with multiple inserts + assertUpdate(String.format("INSERT INTO %s VALUES (1, 10.0)", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (2, 20.0)", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (3, 30.0)", tableName), 1); + + assertQuery(String.format("SELECT COUNT(*) >= 3 FROM \"%s$history\"", tableName), "VALUES (true)"); + assertQuery(String.format("SELECT COUNT(*) >= 3 FROM \"%s$snapshots\"", tableName), "VALUES (true)"); + assertQuery(String.format("SELECT COUNT(*) >= 3 FROM \"%s$files\"", tableName), "VALUES (true)"); + assertQuery(String.format("SELECT SUM(record_count) FROM \"%s$files\"", tableName), "VALUES (BIGINT '3')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testWithComplexQueries() + { + String tableName = "complex_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, category VARCHAR, amount DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'A', 100.0), (2, 'B', 200.0)", tableName), 2); + + // Aggregate data from $files + assertQuery(String.format("SELECT file_format, COUNT(*) as file_count FROM \"%s$files\" GROUP BY file_format", tableName)); + + // Query $manifests with filtering + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$manifests\" WHERE partition_spec_id >= 0", tableName)); + + // Query $snapshots with filtering + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$snapshots\" WHERE snapshot_id IS NOT NULL", tableName)); + + // Verify file format from $files + assertQuery(String.format("SELECT DISTINCT file_format FROM \"%s$files\"", tableName), "VALUES ('PARQUET')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testAllMetadata() + { + String tableName = "all_metadata"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'test')", tableName), 1); + + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$properties\"", tableName), "VALUES (true)"); + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$history\"", tableName), "VALUES (true)"); + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$snapshots\"", tableName), "VALUES (true)"); + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$manifests\"", tableName), "VALUES (true)"); + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$partitions\"", tableName), "VALUES (true)"); + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$files\"", tableName), "VALUES (true)"); + assertQuery(String.format("SELECT COUNT(*) > 0 FROM \"%s$refs\"", tableName), "VALUES (true)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testFilesMetadata() + { + String tableName = "files_schema"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, name VARCHAR, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'Alice', 100.0), (2, 'Bob', 200.0)", tableName), 2); + + assertQuery(String.format("SELECT content FROM \"%s$files\"", tableName)); + assertQuery(String.format("SELECT file_path FROM \"%s$files\"", tableName)); + assertQuery(String.format("SELECT file_format FROM \"%s$files\"", tableName)); + assertQuery(String.format("SELECT record_count FROM \"%s$files\"", tableName)); + assertQuery(String.format("SELECT file_size_in_bytes FROM \"%s$files\"", tableName)); + assertQuery(String.format("SELECT DISTINCT content FROM \"%s$files\"", tableName), "VALUES (0)"); + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$files\" WHERE column_sizes IS NOT NULL", tableName)); + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$files\" WHERE value_counts IS NOT NULL", tableName)); + assertQuery(String.format("SELECT COUNT(*) FROM \"%s$files\" WHERE null_value_counts IS NOT NULL", tableName)); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } +} diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestSelect.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestSelect.java new file mode 100644 index 0000000000000..68ba41b29ccf7 --- /dev/null +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestSelect.java @@ -0,0 +1,302 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.nativeworker.iceberg; + +import com.facebook.presto.testing.ExpectedQueryRunner; +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import org.testng.annotations.Test; + +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.ICEBERG_DEFAULT_STORAGE_FORMAT; +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.javaIcebergQueryRunnerBuilder; +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.nativeIcebergQueryRunnerBuilder; + +public class TestSelect + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return nativeIcebergQueryRunnerBuilder() + .setStorageFormat(ICEBERG_DEFAULT_STORAGE_FORMAT) + .setAddStorageFormatToPath(false) + .build(); + } + + @Override + protected ExpectedQueryRunner createExpectedQueryRunner() + throws Exception + { + return javaIcebergQueryRunnerBuilder() + .setStorageFormat(ICEBERG_DEFAULT_STORAGE_FORMAT) + .setAddStorageFormatToPath(false) + .build(); + } + + @Test + public void testSelectAll() + { + String tableName = "select_all"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id BIGINT, data VARCHAR, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'a', 1.0), (2, 'b', 2.0), (3, 'c', 3.0)", tableName), 3); + assertQuery(String.format("SELECT * FROM %s ORDER BY id", tableName), + "VALUES (BIGINT '1', 'a', DOUBLE '1.0'), (BIGINT '2', 'b', DOUBLE '2.0'), (BIGINT '3', 'c', DOUBLE '3.0')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectProjection() + { + String tableName = "select_projection"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id BIGINT, data VARCHAR, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'a', 1.0), (2, 'b', 2.0), (3, 'c', 3.0)", tableName), 3); + assertQuery(String.format("SELECT id FROM %s ORDER BY id", tableName), + "VALUES (BIGINT '1'), (BIGINT '2'), (BIGINT '3')"); + assertQuery(String.format("SELECT data, value FROM %s ORDER BY data", tableName), + "VALUES ('a', DOUBLE '1.0'), ('b', DOUBLE '2.0'), ('c', DOUBLE '3.0')"); + assertQuery(String.format("SELECT value, id, data FROM %s WHERE id = 2", tableName), + "VALUES (DOUBLE '2.0', BIGINT '2', 'b')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithFilter() + { + String tableName = "select_filter"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, category VARCHAR, amount DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES " + + "(1, 'A', 100.0), (2, 'B', 200.0), (3, 'A', 150.0), (4, 'B', 250.0), (5, 'A', 175.0)", tableName), 5); + assertQuery(String.format("SELECT id, amount FROM %s WHERE category = 'A' ORDER BY id", tableName), + "VALUES (1, DOUBLE '100.0'), (3, DOUBLE '150.0'), (5, DOUBLE '175.0')"); + assertQuery(String.format("SELECT id FROM %s WHERE amount > 150.0 ORDER BY id", tableName), + "VALUES (2), (4), (5)"); + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE category = 'B'", tableName), + "VALUES (BIGINT '2')"); + assertQuery(String.format("SELECT id FROM %s WHERE id = 2", tableName), + "VALUES (2)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithAggregation() + { + String tableName = "select_aggregation"; + + try { + assertUpdate(String.format("CREATE TABLE %s (category VARCHAR, amount DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES " + + "('A', 100.0), ('B', 200.0), ('A', 150.0), ('B', 250.0), ('A', 175.0)", tableName), 5); + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), + "VALUES (BIGINT '5')"); + assertQuery(String.format("SELECT category, COUNT(*) as cnt FROM %s GROUP BY category ORDER BY category", tableName), + "VALUES ('A', BIGINT '3'), ('B', BIGINT '2')"); + assertQuery(String.format("SELECT SUM(amount) FROM %s", tableName), + "VALUES (DOUBLE '875.0')"); + assertQuery(String.format("SELECT category, SUM(amount) FROM %s GROUP BY category ORDER BY category", tableName)); + assertQuery(String.format("SELECT MAX(amount), MIN(amount) FROM %s", tableName), + "VALUES (DOUBLE '250.0', DOUBLE '100.0')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithOrderBy() + { + String tableName = "select_orderby"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, name VARCHAR, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES " + + "(3, 'charlie', 30.0), (1, 'alice', 10.0), (2, 'bob', 20.0)", tableName), 3); + assertQuery(String.format("SELECT id, name FROM %s ORDER BY id", tableName), + "VALUES (1, 'alice'), (2, 'bob'), (3, 'charlie')"); + assertQuery(String.format("SELECT id, name FROM %s ORDER BY name", tableName), + "VALUES (1, 'alice'), (2, 'bob'), (3, 'charlie')"); + assertQuery(String.format("SELECT id, value FROM %s ORDER BY value DESC", tableName), + "VALUES (3, DOUBLE '30.0'), (2, DOUBLE '20.0'), (1, DOUBLE '10.0')"); + assertQuery(String.format("SELECT name FROM %s ORDER BY value ASC", tableName), + "VALUES ('alice'), ('bob'), ('charlie')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithNullValues() + { + String tableName = "select_nulls"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR, value DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'a', 1.0), (2, NULL, 2.0), (3, 'c', NULL)", tableName), 3); + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE data IS NULL", tableName), + "VALUES (BIGINT '1')"); + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE value IS NULL", tableName), + "VALUES (BIGINT '1')"); + assertQuery(String.format("SELECT id FROM %s WHERE data IS NOT NULL ORDER BY id", tableName), + "VALUES (1), (3)"); + assertQuery(String.format("SELECT id FROM %s WHERE value IS NOT NULL ORDER BY id", tableName), + "VALUES (1), (2)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectComplexTypes() + { + String tableName = "select_complex"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "int_array ARRAY(INTEGER), " + + "string_map MAP(VARCHAR, INTEGER), " + + "person ROW(name VARCHAR, age INTEGER)" + + ") WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "1, ARRAY[1, 2, 3], MAP(ARRAY['a', 'b'], ARRAY[10, 20]), ROW('Alice', 30))", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "2, ARRAY[4, 5], MAP(ARRAY['x'], ARRAY[100]), ROW('Bob', 25))", tableName), 1); + assertQuery(String.format("SELECT id, int_array FROM %s WHERE id = 1", tableName), + "VALUES (1, ARRAY[1, 2, 3])"); + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), + "VALUES (BIGINT '2')"); + assertQuery(String.format("SELECT id FROM %s ORDER BY id", tableName), + "VALUES (1), (2)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithLimit() + { + String tableName = "select_limit"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, data VARCHAR) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')", tableName), 5); + assertQuery(String.format("SELECT COUNT(*) FROM (SELECT * FROM %s LIMIT 3)", tableName), + "VALUES (BIGINT '3')"); + assertQuery(String.format("SELECT id FROM %s ORDER BY id LIMIT 2", tableName), + "VALUES (1), (2)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithMultipleFilters() + { + String tableName = "select_multiple_filters"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, category VARCHAR, amount DOUBLE, active BOOLEAN) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES " + + "(1, 'A', 100.0, true), (2, 'B', 200.0, false), (3, 'A', 150.0, true), " + + "(4, 'B', 250.0, true), (5, 'A', 175.0, false)", tableName), 5); + assertQuery(String.format("SELECT id FROM %s WHERE category = 'A' AND active = true ORDER BY id", tableName), + "VALUES (1), (3)"); + assertQuery(String.format("SELECT id FROM %s WHERE amount > 150.0 AND active = true ORDER BY id", tableName), + "VALUES (4)"); + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE category = 'B' OR active = false", tableName), + "VALUES (BIGINT '3')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithJoin() + { + String table1 = "select_join_1"; + String table2 = "select_join_2"; + + try { + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, name VARCHAR) WITH (format = 'PARQUET')", table1)); + assertUpdate(String.format("CREATE TABLE %s (id INTEGER, value DOUBLE) WITH (format = 'PARQUET')", table2)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'alice'), (2, 'bob'), (3, 'charlie')", table1), 3); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 10.0), (2, 20.0), (4, 40.0)", table2), 3); + assertQuery(String.format("SELECT t1.id, t1.name, t2.value FROM %s t1 INNER JOIN %s t2 ON t1.id = t2.id ORDER BY t1.id", table1, table2), + "VALUES (1, 'alice', DOUBLE '10.0'), (2, 'bob', DOUBLE '20.0')"); + assertQuery(String.format("SELECT COUNT(*) FROM %s t1 INNER JOIN %s t2 ON t1.id = t2.id", table1, table2), + "VALUES (BIGINT '2')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", table2)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", table1)); + } + } + + @Test + public void testSelectDistinct() + { + String tableName = "select_distinct"; + + try { + assertUpdate(String.format("CREATE TABLE %s (category VARCHAR, value INTEGER) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES ('A', 1), ('B', 2), ('A', 3), ('B', 4), ('A', 5)", tableName), 5); + assertQuery(String.format("SELECT DISTINCT category FROM %s ORDER BY category", tableName), + "VALUES ('A'), ('B')"); + assertQuery(String.format("SELECT COUNT(DISTINCT category) FROM %s", tableName), + "VALUES (BIGINT '2')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithHaving() + { + String tableName = "select_having"; + + try { + assertUpdate(String.format("CREATE TABLE %s (category VARCHAR, amount DOUBLE) WITH (format = 'PARQUET')", tableName)); + assertUpdate(String.format("INSERT INTO %s VALUES " + + "('A', 100.0), ('B', 200.0), ('A', 150.0), ('B', 250.0), ('C', 50.0)", tableName), 5); + assertQuery(String.format("SELECT category FROM %s GROUP BY category HAVING SUM(amount) > 200.0 ORDER BY category", tableName), + "VALUES ('A'), ('B')"); + assertQuery(String.format("SELECT category, COUNT(*) FROM %s GROUP BY category HAVING COUNT(*) > 1 ORDER BY category", tableName), + "VALUES ('A', BIGINT '2'), ('B', BIGINT '2')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } +} diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestUnpartitionedWrite.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestUnpartitionedWrite.java new file mode 100644 index 0000000000000..3610a69bae294 --- /dev/null +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/iceberg/TestUnpartitionedWrite.java @@ -0,0 +1,771 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.nativeworker.iceberg; + +import com.facebook.presto.testing.ExpectedQueryRunner; +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import org.testng.annotations.Test; + +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.ICEBERG_DEFAULT_STORAGE_FORMAT; +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.javaIcebergQueryRunnerBuilder; +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.nativeIcebergQueryRunnerBuilder; + +public class TestUnpartitionedWrite + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return nativeIcebergQueryRunnerBuilder() + .setStorageFormat(ICEBERG_DEFAULT_STORAGE_FORMAT) + .setAddStorageFormatToPath(false) + .build(); + } + + @Override + protected ExpectedQueryRunner createExpectedQueryRunner() + throws Exception + { + return javaIcebergQueryRunnerBuilder() + .setStorageFormat(ICEBERG_DEFAULT_STORAGE_FORMAT) + .setAddStorageFormatToPath(false) + .build(); + } + + @Test + public void testPrimitiveTypes() + { + String tableName = "insert_primitives"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "col_boolean BOOLEAN, " + + "col_integer INTEGER, " + + "col_bigint BIGINT, " + + "col_real REAL, " + + "col_double DOUBLE, " + + "col_decimal_short DECIMAL(10, 2), " + + "col_decimal_long DECIMAL(30, 10), " + + "col_varchar VARCHAR, " + + "col_varbinary VARBINARY, " + + "col_date DATE, " + + "col_timestamp TIMESTAMP" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "true, " + + "12345, " + + "1234567890123, " + + "REAL '3.14', " + + "2.718281828, " + + "DECIMAL '12345.67', " + + "DECIMAL '12345678901234567890.1234567890', " + + "'hello world', " + + "X'48656C6C6F', " + + "DATE '2025-01-15', " + + "TIMESTAMP '2024-01-15 14:30:00'" + + ")", tableName), 1); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)", tableName), 1); + + assertQuery(String.format("SELECT col_boolean, col_integer, col_varchar FROM %s WHERE col_boolean IS NOT NULL", tableName), + "VALUES (true, 12345, 'hello world')"); + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE col_boolean IS NULL", tableName), "VALUES (BIGINT '1')"); + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '2')"); + assertQuery(String.format("SELECT col_bigint, col_date FROM %s WHERE col_boolean = true", tableName), + "VALUES (BIGINT '1234567890123', DATE '2025-01-15')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testArrayType() + { + String tableName = "insert_array"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "int_array ARRAY(INTEGER), " + + "varchar_array ARRAY(VARCHAR), " + + "nested_array ARRAY(ARRAY(INTEGER))" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "1, " + + "ARRAY[1, 2, 3, 4, 5], " + + "ARRAY['a', 'b', 'c'], " + + "ARRAY[ARRAY[1, 2], ARRAY[3, 4]]" + + ")", tableName), 1); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "2, NULL, NULL, NULL)", tableName), 1); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "3, ARRAY[], ARRAY[], ARRAY[])", tableName), 1); + + assertQuery(String.format("SELECT id, int_array FROM %s WHERE id = 1", tableName), + "VALUES (1, ARRAY[1, 2, 3, 4, 5])"); + assertQuery(String.format("SELECT id FROM %s WHERE int_array IS NULL", tableName), + "VALUES (2)"); + assertQuery(String.format("SELECT id, cardinality(int_array) FROM %s WHERE id = 3", tableName), + "VALUES (3, BIGINT '0')"); + assertQuery(String.format("SELECT id, varchar_array FROM %s WHERE id = 1", tableName), + "VALUES (1, ARRAY['a', 'b', 'c'])"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testMapType() + { + String tableName = "insert_map"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "simple_map MAP(VARCHAR, INTEGER), " + + "nested_map MAP(VARCHAR, ARRAY(INTEGER))" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "1, " + + "MAP(ARRAY['key1', 'key2'], ARRAY[100, 200]), " + + "MAP(ARRAY['k1', 'k2'], ARRAY[ARRAY[1, 2], ARRAY[3, 4]])" + + ")", tableName), 1); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "2, NULL, NULL)", tableName), 1); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "3, MAP(), MAP())", tableName), 1); + + assertQuery(String.format("SELECT id, simple_map FROM %s WHERE id = 1", tableName)); + assertQuery(String.format("SELECT id FROM %s WHERE simple_map IS NULL", tableName), + "VALUES (2)"); + assertQuery(String.format("SELECT id, cardinality(simple_map) FROM %s WHERE id = 3", tableName), + "VALUES (3, BIGINT '0')"); + assertQuery(String.format("SELECT id, simple_map['key1'] FROM %s WHERE id = 1", tableName), + "VALUES (1, 100)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testRowType() + { + String tableName = "insert_row"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "person ROW(name VARCHAR, age INTEGER), " + + "nested_row ROW(id INTEGER, address ROW(city VARCHAR, zip INTEGER))" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "1, " + + "ROW('Alice', 30), " + + "ROW(100, ROW('New York', 10001))" + + ")", tableName), 1); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "2, NULL, NULL)", tableName), 1); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "3, ROW(NULL, NULL), ROW(NULL, NULL))", tableName), 1); + + assertQuery(String.format("SELECT id, person.name, person.age FROM %s WHERE id = 1", tableName), + "VALUES (1, 'Alice', 30)"); + assertQuery(String.format("SELECT id FROM %s WHERE person IS NULL", tableName), + "VALUES (2)"); + assertQuery(String.format("SELECT id, nested_row.address.city FROM %s WHERE id = 1", tableName), + "VALUES (1, 'New York')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testComplexNestedTypes() + { + String tableName = "insert_complex"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "array_of_rows ARRAY(ROW(name VARCHAR, value INTEGER)), " + + "map_of_arrays MAP(VARCHAR, ARRAY(INTEGER)), " + + "row_with_array ROW(id INTEGER, tags ARRAY(VARCHAR))" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s VALUES (" + + "1, " + + "ARRAY[ROW('item1', 10), ROW('item2', 20)], " + + "MAP(ARRAY['key1', 'key2'], ARRAY[ARRAY[1, 2], ARRAY[3, 4]]), " + + "ROW(100, ARRAY['tag1', 'tag2', 'tag3'])" + + ")", tableName), 1); + + assertQuery(String.format("SELECT id FROM %s WHERE id = 1", tableName), "VALUES (1)"); + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '1')"); + assertQuery(String.format("SELECT id, cardinality(array_of_rows) FROM %s", tableName), + "VALUES (1, BIGINT '2')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testMultipleRows() + { + String tableName = "insert_multiple"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "name VARCHAR, " + + "value DOUBLE" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s VALUES " + + "(1, 'first', 1.1), " + + "(2, 'second', 2.2), " + + "(3, 'third', 3.3), " + + "(4, 'fourth', 4.4), " + + "(5, 'fifth', 5.5)", tableName), 5); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '5')"); + assertQuery(String.format("SELECT id, name FROM %s WHERE id = 3", tableName), + "VALUES (3, 'third')"); + assertQuery(String.format("SELECT SUM(value) FROM %s", tableName), "VALUES (DOUBLE '16.5')"); + assertQuery(String.format("SELECT name FROM %s ORDER BY id", tableName), + "VALUES ('first'), ('second'), ('third'), ('fourth'), ('fifth')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testInsertIntoSelect() + { + String sourceTable = "source"; + String targetTable = "target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "name VARCHAR, " + + "value DOUBLE" + + ") WITH (format = 'PARQUET')", sourceTable)); + + assertUpdate(String.format("INSERT INTO %s VALUES " + + "(1, 'Alice', 100.0), " + + "(2, 'Bob', 200.0), " + + "(3, 'Charlie', 300.0)", sourceTable), 3); + + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "name VARCHAR, " + + "value DOUBLE" + + ") WITH (format = 'PARQUET')", targetTable)); + + assertUpdate(String.format("INSERT INTO %s SELECT * FROM %s", targetTable, sourceTable), 3); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '3')"); + assertQuery(String.format("SELECT id, name, value FROM %s ORDER BY id", targetTable), + "VALUES (1, 'Alice', DOUBLE '100.0'), (2, 'Bob', DOUBLE '200.0'), (3, 'Charlie', DOUBLE '300.0')"); + + assertUpdate(String.format("INSERT INTO %s SELECT * FROM %s WHERE id > 1", targetTable, sourceTable), 2); + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '5')"); + + assertUpdate(String.format("INSERT INTO %s SELECT id, name, value * 2 FROM %s WHERE id = 1", targetTable, sourceTable), 1); + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '6')"); + assertQuery(String.format("SELECT value FROM %s WHERE id = 1 ORDER BY value", targetTable), + "VALUES (DOUBLE '100.0'), (DOUBLE '200.0')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", sourceTable)); + } + } + + @Test + public void testInsertIntoSelectWithComplexTypes() + { + String sourceTable = "complex_source"; + String targetTable = "complex_target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "int_array ARRAY(INTEGER), " + + "string_map MAP(VARCHAR, INTEGER), " + + "person ROW(name VARCHAR, age INTEGER)" + + ") WITH (format = 'PARQUET')", sourceTable)); + + assertUpdate(String.format("INSERT INTO %s VALUES " + + "(1, ARRAY[1, 2, 3], MAP(ARRAY['a', 'b'], ARRAY[10, 20]), ROW('Alice', 30)), " + + "(2, ARRAY[4, 5, 6], MAP(ARRAY['c', 'd'], ARRAY[30, 40]), ROW('Bob', 40))", sourceTable), 2); + + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "int_array ARRAY(INTEGER), " + + "string_map MAP(VARCHAR, INTEGER), " + + "person ROW(name VARCHAR, age INTEGER)" + + ") WITH (format = 'PARQUET')", targetTable)); + + assertUpdate(String.format("INSERT INTO %s SELECT * FROM %s", targetTable, sourceTable), 2); + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '2')"); + assertQuery(String.format("SELECT id, int_array FROM %s WHERE id = 1", targetTable), + "VALUES (1, ARRAY[1, 2, 3])"); + assertQuery(String.format("SELECT id, person.name, person.age FROM %s WHERE id = 2", targetTable), + "VALUES (2, 'Bob', 40)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", sourceTable)); + } + } + + @Test + public void testColumnReordering() + { + String tableName = "column_reorder"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "col1 INTEGER, " + + "col2 VARCHAR, " + + "col3 DOUBLE, " + + "col4 BOOLEAN" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s (col3, col1, col4, col2) VALUES (3.14, 42, true, 'test')", tableName), 1); + + assertQuery(String.format("SELECT col1, col2, col3, col4 FROM %s", tableName), + "VALUES (42, 'test', DOUBLE '3.14', true)"); + + assertUpdate(String.format("INSERT INTO %s (col2, col1, col3, col4) VALUES ('second', 100, 2.71, false)", tableName), 1); + + assertQuery(String.format("SELECT col1, col2, col3, col4 FROM %s WHERE col1 = 100", tableName), + "VALUES (100, 'second', DOUBLE '2.71', false)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testPartialColumns() + { + String tableName = "partial_columns"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "name VARCHAR, " + + "age INTEGER, " + + "email VARCHAR, " + + "score DOUBLE" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s (id, name) VALUES (1, 'Alice')", tableName), 1); + + assertQuery(String.format("SELECT id, name, age, email, score FROM %s WHERE id = 1", tableName), + "VALUES (1, 'Alice', NULL, NULL, NULL)"); + + assertUpdate(String.format("INSERT INTO %s (id, age, score) VALUES (2, 30, 95.5)", tableName), 1); + + assertQuery(String.format("SELECT id, name, age, email, score FROM %s WHERE id = 2", tableName), + "VALUES (2, NULL, 30, NULL, DOUBLE '95.5')"); + + assertUpdate(String.format("INSERT INTO %s (id, name, age, email, score) VALUES (3, 'Charlie', 25, 'charlie@example.com', 88.0)", tableName), 1); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '3')"); + assertQuery(String.format("SELECT COUNT(*) FROM %s WHERE name IS NULL", tableName), "VALUES (BIGINT '1')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testNullsInComplexTypes() + { + String tableName = "nulls_complex"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "nullable_array ARRAY(INTEGER), " + + "nullable_map MAP(VARCHAR, INTEGER), " + + "nullable_row ROW(name VARCHAR, value INTEGER)" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s VALUES (1, NULL, NULL, NULL)", tableName), 1); + + // Insert with empty collections (different from NULL) + assertUpdate(String.format("INSERT INTO %s VALUES (2, ARRAY[], MAP(), ROW(NULL, NULL))", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (3, ARRAY[1, NULL, 3], MAP(ARRAY['a'], ARRAY[NULL]), ROW('test', NULL))", tableName), 1); + assertQuery(String.format("SELECT id FROM %s WHERE nullable_array IS NULL", tableName), "VALUES (1)"); + + // Verify empty vs NULL distinction + assertQuery(String.format("SELECT id, cardinality(nullable_array) FROM %s WHERE id = 2", tableName), + "VALUES (2, BIGINT '0')"); + + // Verify NULL elements within complex types + assertQuery(String.format("SELECT id, nullable_row.name FROM %s WHERE id = 3", tableName), + "VALUES (3, 'test')"); + assertQuery(String.format("SELECT id FROM %s WHERE nullable_row.value IS NULL AND nullable_row.name = 'test'", tableName), + "VALUES (3)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithAggregation() + { + String sourceTable = "agg_source"; + String targetTable = "agg_target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "category VARCHAR, " + + "amount DOUBLE, " + + "quantity INTEGER" + + ") WITH (format = 'PARQUET')", sourceTable)); + + assertUpdate(String.format("INSERT INTO %s VALUES " + + "(1, 'Electronics', 100.0, 2), " + + "(2, 'Electronics', 200.0, 1), " + + "(3, 'Books', 50.0, 5), " + + "(4, 'Books', 30.0, 3), " + + "(5, 'Electronics', 150.0, 1)", sourceTable), 5); + + assertUpdate(String.format("CREATE TABLE %s (" + + "category VARCHAR, " + + "total_amount DOUBLE, " + + "total_quantity BIGINT, " + + "avg_amount DOUBLE, " + + "item_count BIGINT" + + ") WITH (format = 'PARQUET')", targetTable)); + + assertUpdate(String.format("INSERT INTO %s " + + "SELECT category, SUM(amount), SUM(quantity), AVG(amount), COUNT(*) " + + "FROM %s GROUP BY category", targetTable, sourceTable), 2); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '2')"); + assertQuery(String.format("SELECT category, total_amount, total_quantity, item_count FROM %s WHERE category = 'Electronics'", targetTable), + "VALUES ('Electronics', DOUBLE '450.0', BIGINT '4', BIGINT '3')"); + assertQuery(String.format("SELECT category, total_amount, total_quantity, item_count FROM %s WHERE category = 'Books'", targetTable), + "VALUES ('Books', DOUBLE '80.0', BIGINT '8', BIGINT '2')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", sourceTable)); + } + } + + @Test + public void testInsertWithSpecialCharacters() + { + String tableName = "special_chars"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "text VARCHAR" + + ") WITH (format = 'PARQUET')", tableName)); + + // Insert with Unicode characters + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'Hello δΈ–η•Œ 🌍')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (2, 'O''Brien')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (3, 'Line1\nLine2\tTabbed')", tableName), 1); + // Insert with various Unicode scripts + assertUpdate(String.format("INSERT INTO %s VALUES (4, 'ζ±Ÿη•”δ½•δΊΊεˆθ§ζœˆοΌŸζ±Ÿζœˆδ½•εΉ΄εˆη…§δΊΊοΌŸμ•ˆλ…•ν•˜μ„Έμš”')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (5, 'πŸ˜€ πŸ˜ƒ πŸ˜„ 😁')", tableName), 1); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '5')"); + assertQuery(String.format("SELECT text FROM %s WHERE id = 1", tableName), "VALUES ('Hello δΈ–η•Œ 🌍')"); + assertQuery(String.format("SELECT text FROM %s WHERE id = 2", tableName), "VALUES ('O''Brien')"); + assertQuery(String.format("SELECT text FROM %s WHERE id = 4", tableName), "VALUES ('ζ±Ÿη•”δ½•δΊΊεˆθ§ζœˆοΌŸζ±Ÿζœˆδ½•εΉ΄εˆη…§δΊΊοΌŸμ•ˆλ…•ν•˜μ„Έμš”')"); + assertQuery(String.format("SELECT text FROM %s WHERE id = 5", tableName), "VALUES ('πŸ˜€ πŸ˜ƒ πŸ˜„ 😁')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testInsertWithNumericEdgeCases() + { + String tableName = "test_numeric_edges"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "int_val INTEGER, " + + "bigint_val BIGINT, " + + "double_val DOUBLE, " + + "real_val REAL" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s VALUES (1, 2147483647, BIGINT '9223372036854775807', 1.7976931348623157E308, REAL '3.4028235E38')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (2, INT '-2147483648', BIGINT '-9223372036854775808', -1.7976931348623157E308, REAL '-3.4028235E38')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (3, INTEGER '0', BIGINT '0', 0.0, REAL '0.0')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (4, NULL, NULL, infinity(), REAL 'Infinity')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (5, NULL, NULL, -infinity(), REAL '-Infinity')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (6, NULL, NULL, nan(), REAL 'NaN')", tableName), 1); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '6')"); + assertQuery(String.format("SELECT int_val, bigint_val FROM %s WHERE id = 1", tableName), + "VALUES (2147483647, BIGINT '9223372036854775807')"); + assertQuery(String.format("SELECT int_val, bigint_val FROM %s WHERE id = 2", tableName), + "VALUES (INT '-2147483648', BIGINT '-9223372036854775808')"); + assertQuery(String.format("SELECT id FROM %s WHERE is_infinite(double_val) AND double_val > 0", tableName), "VALUES (4)"); + assertQuery(String.format("SELECT id FROM %s WHERE is_nan(double_val)", tableName), "VALUES (6)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithJoin() + { + String table1 = "join_table1"; + String table2 = "join_table2"; + String targetTable = "join_target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "name VARCHAR" + + ") WITH (format = 'PARQUET')", table1)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'Alice'), (2, 'Bob'), (3, 'Charlie')", table1), 3); + + assertUpdate(String.format("CREATE TABLE %s (" + + "user_id INTEGER, " + + "score DOUBLE, " + + "department VARCHAR" + + ") WITH (format = 'PARQUET')", table2)); + + assertUpdate(String.format("INSERT INTO %s VALUES (1, 95.5, 'Engineering'), (2, 88.0, 'Sales'), (3, 92.0, 'Marketing')", table2), 3); + + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "name VARCHAR, " + + "score DOUBLE, " + + "department VARCHAR" + + ") WITH (format = 'PARQUET')", targetTable)); + + assertUpdate(String.format("INSERT INTO %s SELECT t1.id, t1.name, t2.score, t2.department " + + "FROM %s t1 JOIN %s t2 ON t1.id = t2.user_id", targetTable, table1, table2), 3); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '3')"); + assertQuery(String.format("SELECT name, score, department FROM %s WHERE id = 1", targetTable), + "VALUES ('Alice', DOUBLE '95.5', 'Engineering')"); + assertQuery(String.format("SELECT name, department FROM %s WHERE id = 2", targetTable), + "VALUES ('Bob', 'Sales')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", table2)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", table1)); + } + } + + @Test + public void testWithCast() + { + String sourceTable = "cast_source"; + String targetTable = "cast_target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id VARCHAR, " + + "amount VARCHAR, " + + "is_active VARCHAR" + + ") WITH (format = 'PARQUET')", sourceTable)); + + assertUpdate(String.format("INSERT INTO %s VALUES ('1', '100.5', 'true'), ('2', '200.75', 'false')", sourceTable), 2); + + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "amount DOUBLE, " + + "is_active BOOLEAN" + + ") WITH (format = 'PARQUET')", targetTable)); + + assertUpdate(String.format("INSERT INTO %s SELECT CAST(id AS INTEGER), CAST(amount AS DOUBLE), CAST(is_active AS BOOLEAN) FROM %s", + targetTable, sourceTable), 2); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '2')"); + assertQuery(String.format("SELECT id, amount, is_active FROM %s WHERE id = 1", targetTable), + "VALUES (1, DOUBLE '100.5', true)"); + assertQuery(String.format("SELECT id, amount, is_active FROM %s WHERE id = 2", targetTable), + "VALUES (2, DOUBLE '200.75', false)"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", sourceTable)); + } + } + + @Test + public void testDateTimeEdgeCases() + { + String tableName = "datetime_edges"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "date_val DATE, " + + "timestamp_val TIMESTAMP" + + ") WITH (format = 'PARQUET')", tableName)); + + assertUpdate(String.format("INSERT INTO %s VALUES (1, DATE '1970-01-01', TIMESTAMP '1970-01-01 00:00:00')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (2, DATE '2024-02-29', TIMESTAMP '2024-02-29 23:59:59')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (3, DATE '2100-12-31', TIMESTAMP '2100-12-31 23:59:59.999')", tableName), 1); + assertUpdate(String.format("INSERT INTO %s VALUES (4, DATE '1900-01-01', TIMESTAMP '1900-01-01 00:00:00')", tableName), 1); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '4')"); + assertQuery(String.format("SELECT date_val FROM %s WHERE id = 1", tableName), "VALUES (DATE '1970-01-01')"); + assertQuery(String.format("SELECT date_val FROM %s WHERE id = 2", tableName), "VALUES (DATE '2024-02-29')"); + assertQuery(String.format("SELECT year(date_val), month(date_val), day(date_val) FROM %s WHERE id = 3", tableName), + "VALUES (BIGINT '2100', BIGINT '12', BIGINT '31')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + public void testSelectWithUnion() + { + String table1 = "union_table1"; + String table2 = "union_table2"; + String targetTable = "union_target"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "name VARCHAR, " + + "value DOUBLE" + + ") WITH (format = 'PARQUET')", table1)); + assertUpdate(String.format("INSERT INTO %s VALUES (1, 'Alice', 100.0), (2, 'Bob', 200.0)", table1), 2); + + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "name VARCHAR, " + + "value DOUBLE" + + ") WITH (format = 'PARQUET')", table2)); + assertUpdate(String.format("INSERT INTO %s VALUES (3, 'Charlie', 300.0), (4, 'David', 400.0)", table2), 2); + + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "name VARCHAR, " + + "value DOUBLE" + + ") WITH (format = 'PARQUET')", targetTable)); + + assertUpdate(String.format("INSERT INTO %s SELECT * FROM %s UNION ALL SELECT * FROM %s", + targetTable, table1, table2), 4); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", targetTable), "VALUES (BIGINT '4')"); + assertQuery(String.format("SELECT SUM(value) FROM %s", targetTable), "VALUES (DOUBLE '1000.0')"); + assertQuery(String.format("SELECT name FROM %s ORDER BY id", targetTable), + "VALUES ('Alice'), ('Bob'), ('Charlie'), ('David')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", targetTable)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", table2)); + assertUpdate(String.format("DROP TABLE IF EXISTS %s", table1)); + } + } + + @Test + public void testMaximumLengthVarchar() + { + String tableName = "long_varchar"; + + try { + assertUpdate(String.format("CREATE TABLE %s (" + + "id INTEGER, " + + "small_text VARCHAR, " + + "medium_text VARCHAR, " + + "large_text VARCHAR" + + ") WITH (format = 'PARQUET')", tableName)); + + // Insert small text (1KB) + assertUpdate(String.format("INSERT INTO %s (id, small_text) VALUES (1, rpad('A', 1024, 'A'))", tableName), 1); + + // Insert medium text (10KB) + assertUpdate(String.format("INSERT INTO %s (id, medium_text) VALUES (2, rpad('B', 10240, 'B'))", tableName), 1); + + // Insert large text (100KB) + assertUpdate(String.format("INSERT INTO %s (id, large_text) VALUES (3, rpad('C', 102400, 'C'))", tableName), 1); + + // Insert using concat and rpad + assertUpdate(String.format("INSERT INTO %s (id, large_text) VALUES (4, concat('START-', rpad('XYZ', 30000, 'XYZ'), '-END'))", tableName), 1); + + // Insert using multiple string functions + assertUpdate(String.format("INSERT INTO %s (id, medium_text) VALUES (5, upper(rpad('test', 10240, 'test')))", tableName), 1); + assertUpdate(String.format("INSERT INTO %s (id, small_text) VALUES (6, lpad('Z', 2048, 'Z'))", tableName), 1); + + assertQuery(String.format("SELECT COUNT(*) FROM %s", tableName), "VALUES (BIGINT '6')"); + + assertQuery(String.format("SELECT id, length(small_text) FROM %s WHERE id = 1", tableName), + "VALUES (1, BIGINT '1024')"); + assertQuery(String.format("SELECT id, length(medium_text) FROM %s WHERE id = 2", tableName), + "VALUES (2, BIGINT '10240')"); + assertQuery(String.format("SELECT id, length(large_text) FROM %s WHERE id = 3", tableName), + "VALUES (3, BIGINT '102400')"); + + // Verify content integrity using substr + assertQuery(String.format("SELECT substr(small_text, 1, 10) FROM %s WHERE id = 1", tableName), + "VALUES ('AAAAAAAAAA')"); + assertQuery(String.format("SELECT substr(large_text, 1, 6) FROM %s WHERE id = 4", tableName), + "VALUES ('START-')"); + assertQuery(String.format("SELECT substr(large_text, length(large_text) - 3, 4) FROM %s WHERE id = 4", tableName), + "VALUES ('-END')"); + + // Verify all characters are correct using rpad comparison + assertQuery(String.format("SELECT id FROM %s WHERE id = 1 AND small_text = rpad('A', 1024, 'A')", tableName), + "VALUES (1)"); + assertQuery(String.format("SELECT id FROM %s WHERE id = 2 AND medium_text = rpad('B', 10240, 'B')", tableName), + "VALUES (2)"); + assertQuery(String.format("SELECT id FROM %s WHERE id = 3 AND large_text = rpad('C', 102400, 'C')", tableName), + "VALUES (3)"); + assertQuery(String.format("SELECT id, length(small_text) FROM %s WHERE id = 6", tableName), + "VALUES (6, BIGINT '2048')"); + } + finally { + assertUpdate(String.format("DROP TABLE IF EXISTS %s", tableName)); + } + } +}