diff --git a/presto-docs/src/main/sphinx/presto_cpp/sidecar.rst b/presto-docs/src/main/sphinx/presto_cpp/sidecar.rst index 7f8e595f10ac7..2c00e86c7cac9 100644 --- a/presto-docs/src/main/sphinx/presto_cpp/sidecar.rst +++ b/presto-docs/src/main/sphinx/presto_cpp/sidecar.rst @@ -24,6 +24,14 @@ The following HTTP endpoints are implemented by the Presto C++ sidecar. Presto C++ worker. Each function's metadata is serialized to JSON in format ``JsonBasedUdfFunctionMetadata``. +.. function:: GET /v1/functions/{catalog} + + Returns a list of function metadata for all functions registered in the + Presto C++ worker that belong to the specified catalog. Each function's + metadata is serialized to JSON in format ``JsonBasedUdfFunctionMetadata``. + This endpoint allows filtering functions by catalog to support namespace + separation. + .. function:: POST /v1/velox/plan Converts a Presto plan fragment to its corresponding Velox plan and diff --git a/presto-native-execution/presto_cpp/main/CMakeLists.txt b/presto-native-execution/presto_cpp/main/CMakeLists.txt index 2239894914bb0..d86aa5e154052 100644 --- a/presto-native-execution/presto_cpp/main/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/CMakeLists.txt @@ -66,6 +66,7 @@ target_link_libraries( presto_operators presto_session_properties presto_velox_plan_conversion + presto_hive_functions velox_abfs velox_aggregates velox_caching diff --git a/presto-native-execution/presto_cpp/main/PrestoServer.cpp b/presto-native-execution/presto_cpp/main/PrestoServer.cpp index 56ac99f5acefd..683a784b53738 100644 --- a/presto-native-execution/presto_cpp/main/PrestoServer.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoServer.cpp @@ -28,6 +28,7 @@ #include "presto_cpp/main/common/Utils.h" #include "presto_cpp/main/connectors/Registration.h" #include "presto_cpp/main/connectors/SystemConnector.h" +#include "presto_cpp/main/connectors/hive/functions/HiveFunctionRegistration.h" #include "presto_cpp/main/functions/FunctionMetadata.h" #include "presto_cpp/main/http/HttpConstants.h" #include "presto_cpp/main/http/filters/AccessLogFilter.h" @@ -50,6 +51,7 @@ #include "velox/common/file/FileSystems.h" #include "velox/common/memory/SharedArbitrator.h" #include "velox/connectors/Connector.h" +#include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/storage_adapters/abfs/RegisterAbfsFileSystem.h" #include "velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h" #include "velox/connectors/hive/storage_adapters/hdfs/RegisterHdfsFileSystem.h" @@ -1359,6 +1361,12 @@ void PrestoServer::registerFunctions() { prestoBuiltinFunctionPrefix_); velox::window::prestosql::registerAllWindowFunctions( prestoBuiltinFunctionPrefix_); + + if (velox::connector::hasConnector( + velox::connector::hive::HiveConnectorFactory::kHiveConnectorName) || + velox::connector::hasConnector("hive-hadoop2")) { + hive::functions::registerHiveNativeFunctions(); + } } void PrestoServer::registerRemoteFunctions() { @@ -1691,6 +1699,18 @@ void PrestoServer::registerSidecarEndpoints() { proxygen::ResponseHandler* downstream) { http::sendOkResponse(downstream, getFunctionsMetadata()); }); + httpServer_->registerGet( + R"(/v1/functions/([^/]+))", + [](proxygen::HTTPMessage* /*message*/, + const std::vector& pathMatch) { + return new http::CallbackRequestHandler( + [catalog = pathMatch[1]]( + proxygen::HTTPMessage* /*message*/, + std::vector>& /*body*/, + proxygen::ResponseHandler* downstream) { + http::sendOkResponse(downstream, getFunctionsMetadata(catalog)); + }); + }); httpServer_->registerPost( "/v1/velox/plan", [server = this]( diff --git a/presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt b/presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt index 328ff8bf38933..653bc5141d722 100644 --- a/presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt @@ -35,3 +35,5 @@ target_link_libraries( velox_type_fbhive velox_tpcds_connector ) + +add_subdirectory(hive) diff --git a/presto-native-execution/presto_cpp/main/connectors/hive/CMakeLists.txt b/presto-native-execution/presto_cpp/main/connectors/hive/CMakeLists.txt new file mode 100644 index 0000000000000..fb98894c302db --- /dev/null +++ b/presto-native-execution/presto_cpp/main/connectors/hive/CMakeLists.txt @@ -0,0 +1,13 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_subdirectory(functions) diff --git a/presto-native-execution/presto_cpp/main/connectors/hive/functions/CMakeLists.txt b/presto-native-execution/presto_cpp/main/connectors/hive/functions/CMakeLists.txt new file mode 100644 index 0000000000000..2435a6fd73e5f --- /dev/null +++ b/presto-native-execution/presto_cpp/main/connectors/hive/functions/CMakeLists.txt @@ -0,0 +1,22 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_library(presto_hive_functions HiveFunctionRegistration.cpp) +target_link_libraries( + presto_hive_functions + presto_dynamic_function_registrar + velox_functions_string +) + +if(PRESTO_ENABLE_TESTING) + add_subdirectory(tests) +endif() diff --git a/presto-native-execution/presto_cpp/main/connectors/hive/functions/HiveFunctionRegistration.cpp b/presto-native-execution/presto_cpp/main/connectors/hive/functions/HiveFunctionRegistration.cpp new file mode 100644 index 0000000000000..ffe09361605ca --- /dev/null +++ b/presto-native-execution/presto_cpp/main/connectors/hive/functions/HiveFunctionRegistration.cpp @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "presto_cpp/main/connectors/hive/functions/HiveFunctionRegistration.h" + +#include "presto_cpp/main/connectors/hive/functions/InitcapFunction.h" +#include "presto_cpp/main/functions/dynamic_registry/DynamicFunctionRegistrar.h" + +using namespace facebook::velox; +namespace facebook::presto::hive::functions { + +namespace { +void registerHiveFunctions() { + // Register functions under the 'hive.default' namespace. + facebook::presto::registerPrestoFunction( + "initcap", "hive.default"); +} +} // namespace + +void registerHiveNativeFunctions() { + static std::once_flag once; + std::call_once(once, []() { registerHiveFunctions(); }); +} + +} // namespace facebook::presto::hive::functions diff --git a/presto-native-execution/presto_cpp/main/connectors/hive/functions/HiveFunctionRegistration.h b/presto-native-execution/presto_cpp/main/connectors/hive/functions/HiveFunctionRegistration.h new file mode 100644 index 0000000000000..338938f2bbb67 --- /dev/null +++ b/presto-native-execution/presto_cpp/main/connectors/hive/functions/HiveFunctionRegistration.h @@ -0,0 +1,23 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +namespace facebook::presto::hive::functions { + +// Registers Hive-specific native functions into the 'hive.default' namespace. +// This method is safe to call multiple times; it performs one-time registration +// guarded by an internal call_once. +void registerHiveNativeFunctions(); + +} // namespace facebook::presto::hive::functions diff --git a/presto-native-execution/presto_cpp/main/connectors/hive/functions/InitcapFunction.h b/presto-native-execution/presto_cpp/main/connectors/hive/functions/InitcapFunction.h new file mode 100644 index 0000000000000..c7bf162939b07 --- /dev/null +++ b/presto-native-execution/presto_cpp/main/connectors/hive/functions/InitcapFunction.h @@ -0,0 +1,53 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "velox/functions/Macros.h" +#include "velox/functions/lib/string/StringImpl.h" + +namespace facebook::presto::hive::functions { + +/// The InitCapFunction capitalizes the first character of each word in a +/// string, and lowercases the rest. +template +struct InitCapFunction { + VELOX_DEFINE_FUNCTION_TYPES(T); + + // ASCII input always produces ASCII result. This is required for ASCII fast + // path + static constexpr bool is_default_ascii_behavior = true; + + FOLLY_ALWAYS_INLINE void call( + out_type& result, + const arg_type& input) { + velox::functions::stringImpl::initcap< + /*strictSpace=*/false, + /*isAscii=*/false, + /*turkishCasing=*/true, + /*greekFinalSigma=*/true>(result, input); + } + + FOLLY_ALWAYS_INLINE void callAscii( + out_type& result, + const arg_type& input) { + velox::functions::stringImpl::initcap< + /*strictSpace=*/false, + /*isAscii=*/true, + /*turkishCasing=*/true, + /*greekFinalSigma=*/true>(result, input); + } +}; + +} // namespace facebook::presto::hive::functions diff --git a/presto-native-execution/presto_cpp/main/connectors/hive/functions/tests/CMakeLists.txt b/presto-native-execution/presto_cpp/main/connectors/hive/functions/tests/CMakeLists.txt new file mode 100644 index 0000000000000..2089503c68181 --- /dev/null +++ b/presto-native-execution/presto_cpp/main/connectors/hive/functions/tests/CMakeLists.txt @@ -0,0 +1,28 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_executable(presto_hive_functions_test InitcapTest.cpp) + +add_test( + NAME presto_hive_functions_test + COMMAND presto_hive_functions_test + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} +) + +target_link_libraries( + presto_hive_functions_test + presto_hive_functions + presto_common + velox_functions_test_lib + GTest::gtest + GTest::gtest_main +) diff --git a/presto-native-execution/presto_cpp/main/connectors/hive/functions/tests/InitcapTest.cpp b/presto-native-execution/presto_cpp/main/connectors/hive/functions/tests/InitcapTest.cpp new file mode 100644 index 0000000000000..263d03aeb735b --- /dev/null +++ b/presto-native-execution/presto_cpp/main/connectors/hive/functions/tests/InitcapTest.cpp @@ -0,0 +1,80 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include "presto_cpp/main/connectors/hive/functions/HiveFunctionRegistration.h" +#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" + +namespace facebook::presto::functions::test { +class InitcapTest : public velox::functions::test::FunctionBaseTest { + protected: + static void SetUpTestCase() { + velox::functions::test::FunctionBaseTest::SetUpTestCase(); + facebook::presto::hive::functions::registerHiveNativeFunctions(); + } +}; + +TEST_F(InitcapTest, initcap) { + const auto initcap = [&](const std::optional& value) { + return evaluateOnce("\"hive.default.initcap\"(c0)", value); + }; + + // Unicode only. + EXPECT_EQ( + initcap("àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ"), + "Àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ"); + EXPECT_EQ(initcap("αβγδεζηθικλμνξοπρςστυφχψ"), "Αβγδεζηθικλμνξοπρςστυφχψ"); + // Mix of ascii and unicode. + EXPECT_EQ(initcap("αβγδεζ world"), "Αβγδεζ World"); + EXPECT_EQ(initcap("αfoo wβ"), "Αfoo Wβ"); + // Ascii only. + EXPECT_EQ(initcap("hello world"), "Hello World"); + EXPECT_EQ(initcap("HELLO WORLD"), "Hello World"); + EXPECT_EQ(initcap("1234"), "1234"); + EXPECT_EQ(initcap("a b c d"), "A B C D"); + EXPECT_EQ(initcap("abcd"), "Abcd"); + // Numbers. + EXPECT_EQ(initcap("123"), "123"); + EXPECT_EQ(initcap("1abc"), "1abc"); + // Edge cases. + EXPECT_EQ(initcap(""), ""); + EXPECT_EQ(initcap(std::nullopt), std::nullopt); + + // Test with various whitespace characters + EXPECT_EQ(initcap("YQ\tY"), "Yq\tY"); + EXPECT_EQ(initcap("YQ\nY"), "Yq\nY"); + EXPECT_EQ(initcap("YQ\rY"), "Yq\rY"); + EXPECT_EQ(initcap("hello\tworld\ntest"), "Hello\tWorld\nTest"); + EXPECT_EQ(initcap("foo\r\nbar"), "Foo\r\nBar"); + + // Test with multiple consecutive whitespaces + EXPECT_EQ(initcap("hello world"), "Hello World"); + EXPECT_EQ(initcap("a b c"), "A B C"); + EXPECT_EQ(initcap("test\t\tvalue"), "Test\t\tValue"); + EXPECT_EQ(initcap("line\n\n\nbreak"), "Line\n\n\nBreak"); + + // Test with leading and trailing whitespaces + EXPECT_EQ(initcap(" hello"), " Hello"); + EXPECT_EQ(initcap("world "), "World "); + EXPECT_EQ(initcap(" spaces "), " Spaces "); + EXPECT_EQ(initcap("\thello"), "\tHello"); + EXPECT_EQ(initcap("\nworld"), "\nWorld"); + EXPECT_EQ(initcap("test\n"), "Test\n"); + + // Test with mixed whitespace types + EXPECT_EQ(initcap("hello \t\nworld"), "Hello \t\nWorld"); + EXPECT_EQ(initcap("a\tb\nc\rd"), "A\tB\nC\rD"); + EXPECT_EQ(initcap(" \t\n "), " \t\n "); +} +} // namespace facebook::presto::functions::test diff --git a/presto-native-execution/presto_cpp/main/functions/FunctionMetadata.cpp b/presto-native-execution/presto_cpp/main/functions/FunctionMetadata.cpp index eb673461ccc24..761e3500b1fa1 100644 --- a/presto-native-execution/presto_cpp/main/functions/FunctionMetadata.cpp +++ b/presto-native-execution/presto_cpp/main/functions/FunctionMetadata.cpp @@ -265,9 +265,14 @@ json buildWindowMetadata( } // namespace -json getFunctionsMetadata() { +json getFunctionsMetadata(const std::optional& catalog) { json j; + // Lambda to check if a function should be skipped based on catalog filter + auto skipCatalog = [&catalog](const std::string& functionCatalog) { + return catalog.has_value() && functionCatalog != catalog.value(); + }; + // Get metadata for all registered scalar functions in velox. const auto signatures = getFunctionSignatures(); static const std::unordered_set kBlockList = { @@ -285,6 +290,9 @@ json getFunctionsMetadata() { } const auto parts = getFunctionNameParts(name); + if (skipCatalog(parts[0])) { + continue; + } const auto schema = parts[1]; const auto function = parts[2]; j[function] = buildScalarMetadata(name, schema, entry.second); @@ -295,6 +303,9 @@ json getFunctionsMetadata() { if (!aggregateFunctions.at(entry.first).metadata.companionFunction) { const auto name = entry.first; const auto parts = getFunctionNameParts(name); + if (skipCatalog(parts[0])) { + continue; + } const auto schema = parts[1]; const auto function = parts[2]; j[function] = @@ -309,6 +320,9 @@ json getFunctionsMetadata() { if (aggregateFunctions.count(entry.first) == 0) { const auto name = entry.first; const auto parts = getFunctionNameParts(entry.first); + if (skipCatalog(parts[0])) { + continue; + } const auto schema = parts[1]; const auto function = parts[2]; j[function] = buildWindowMetadata(name, schema, entry.second.signatures); diff --git a/presto-native-execution/presto_cpp/main/functions/FunctionMetadata.h b/presto-native-execution/presto_cpp/main/functions/FunctionMetadata.h index 7a39aebb95037..d2a2c66d7a489 100644 --- a/presto-native-execution/presto_cpp/main/functions/FunctionMetadata.h +++ b/presto-native-execution/presto_cpp/main/functions/FunctionMetadata.h @@ -14,11 +14,13 @@ #pragma once +#include #include "presto_cpp/external/json/nlohmann/json.hpp" namespace facebook::presto { // Returns metadata for all registered functions as json. -nlohmann::json getFunctionsMetadata(); +nlohmann::json getFunctionsMetadata( + const std::optional& catalog = std::nullopt); } // namespace facebook::presto diff --git a/presto-native-execution/presto_cpp/main/functions/tests/FunctionMetadataTest.cpp b/presto-native-execution/presto_cpp/main/functions/tests/FunctionMetadataTest.cpp index 226133c9e437c..ba8573e5fa9fb 100644 --- a/presto-native-execution/presto_cpp/main/functions/tests/FunctionMetadataTest.cpp +++ b/presto-native-execution/presto_cpp/main/functions/tests/FunctionMetadataTest.cpp @@ -26,7 +26,6 @@ using namespace facebook::presto; using json = nlohmann::json; static const std::string kPrestoDefaultPrefix = "presto.default."; -static const std::string kDefaultSchema = "default"; class FunctionMetadataTest : public ::testing::Test { protected: @@ -117,3 +116,44 @@ TEST_F(FunctionMetadataTest, transformKeys) { TEST_F(FunctionMetadataTest, variance) { testFunction("variance", "Variance.json", 5); } + +TEST_F(FunctionMetadataTest, catalog) { + // Test with the "presto" catalog that is registered in SetUpTestSuite + std::string catalog = "presto"; + auto metadata = getFunctionsMetadata(catalog); + + // The result should be a JSON object with function names as keys + ASSERT_TRUE(metadata.is_object()); + ASSERT_FALSE(metadata.empty()); + + // Verify that common functions are present + ASSERT_TRUE(metadata.contains("abs")); + ASSERT_TRUE(metadata.contains("mod")); + + // Each function should have an array of signatures + for (auto it = metadata.begin(); it != metadata.end(); ++it) { + ASSERT_TRUE(it.value().is_array()) << "Function: " << it.key(); + ASSERT_FALSE(it.value().empty()) << "Function: " << it.key(); + + // Each signature should have the required fields + for (const auto& signature : it.value()) { + ASSERT_TRUE(signature.contains("outputType")) << "Function: " << it.key(); + ASSERT_TRUE(signature.contains("paramTypes")) << "Function: " << it.key(); + ASSERT_TRUE(signature.contains("schema")) << "Function: " << it.key(); + ASSERT_TRUE(signature.contains("functionKind")) + << "Function: " << it.key(); + + // Schema should be "default" since we registered with "presto.default." + // prefix + EXPECT_EQ(signature["schema"], "default") << "Function: " << it.key(); + } + } +} + +TEST_F(FunctionMetadataTest, nonExistentCatalog) { + auto metadata = getFunctionsMetadata("nonexistent"); + + // When no functions match, it returns a null JSON value or empty object + // The default json() constructor creates a null value + ASSERT_TRUE(metadata.is_null() || (metadata.is_object() && metadata.empty())); +} diff --git a/presto-native-sidecar-plugin/src/main/java/com/facebook/presto/sidecar/functionNamespace/NativeFunctionDefinitionProvider.java b/presto-native-sidecar-plugin/src/main/java/com/facebook/presto/sidecar/functionNamespace/NativeFunctionDefinitionProvider.java index 677752dd4d2c7..191ebcffd027a 100644 --- a/presto-native-sidecar-plugin/src/main/java/com/facebook/presto/sidecar/functionNamespace/NativeFunctionDefinitionProvider.java +++ b/presto-native-sidecar-plugin/src/main/java/com/facebook/presto/sidecar/functionNamespace/NativeFunctionDefinitionProvider.java @@ -14,10 +14,12 @@ package com.facebook.presto.sidecar.functionNamespace; import com.facebook.airlift.http.client.HttpClient; +import com.facebook.airlift.http.client.HttpUriBuilder; import com.facebook.airlift.http.client.Request; import com.facebook.airlift.json.JsonCodec; import com.facebook.airlift.log.Logger; import com.facebook.presto.functionNamespace.JsonBasedUdfFunctionMetadata; +import com.facebook.presto.functionNamespace.ServingCatalog; import com.facebook.presto.functionNamespace.UdfFunctionSignatureMap; import com.facebook.presto.sidecar.ForSidecarInfo; import com.facebook.presto.spi.NodeManager; @@ -26,6 +28,7 @@ import com.google.common.collect.ImmutableMap; import com.google.inject.Inject; +import java.net.URI; import java.util.List; import java.util.Map; @@ -42,32 +45,39 @@ public class NativeFunctionDefinitionProvider private final JsonCodec>> nativeFunctionSignatureMapJsonCodec; private final HttpClient httpClient; private final NativeFunctionNamespaceManagerConfig config; + private final String catalogName; @Inject public NativeFunctionDefinitionProvider( @ForSidecarInfo HttpClient httpClient, JsonCodec>> nativeFunctionSignatureMapJsonCodec, - NativeFunctionNamespaceManagerConfig config) + NativeFunctionNamespaceManagerConfig config, + @ServingCatalog String catalogName) { this.nativeFunctionSignatureMapJsonCodec = requireNonNull(nativeFunctionSignatureMapJsonCodec, "nativeFunctionSignatureMapJsonCodec is null"); this.httpClient = requireNonNull(httpClient, "httpClient is null"); this.config = requireNonNull(config, "config is null"); + this.catalogName = requireNonNull(catalogName, "catalogName is null"); } @Override public UdfFunctionSignatureMap getUdfDefinition(NodeManager nodeManager) { try { - Request request = - prepareGet().setUri( - getSidecarLocationOnStartup( - nodeManager, config.getSidecarNumRetries(), config.getSidecarRetryDelay().toMillis())).build(); - Map> nativeFunctionSignatureMap = httpClient.execute(request, createJsonResponseHandler(nativeFunctionSignatureMapJsonCodec)); + // Base endpoint: /v1/functions + URI baseUri = getSidecarLocationOnStartup( + nodeManager, config.getSidecarNumRetries(), config.getSidecarRetryDelay().toMillis()); + // Catalog-filtered endpoint: /v1/functions/{catalog} + URI catalogUri = HttpUriBuilder.uriBuilderFrom(baseUri).appendPath(catalogName).build(); + Request catalogRequest = prepareGet().setUri(catalogUri).build(); + Map> nativeFunctionSignatureMap = + httpClient.execute(catalogRequest, createJsonResponseHandler(nativeFunctionSignatureMapJsonCodec)); return new UdfFunctionSignatureMap(ImmutableMap.copyOf(nativeFunctionSignatureMap)); } catch (Exception e) { - throw new PrestoException(INVALID_ARGUMENTS, "Failed to get functions from sidecar.", e); + // Do not fall back to unfiltered endpoint to avoid cross-catalog leakage. + throw new PrestoException(INVALID_ARGUMENTS, String.format("Failed to get catalog-scoped functions from sidecar for catalog '%s'", catalogName), e); } } diff --git a/presto-native-sidecar-plugin/src/test/java/com/facebook/presto/sidecar/NativeSidecarPluginQueryRunnerUtils.java b/presto-native-sidecar-plugin/src/test/java/com/facebook/presto/sidecar/NativeSidecarPluginQueryRunnerUtils.java index c8c7e1123f974..7f49d63b746de 100644 --- a/presto-native-sidecar-plugin/src/test/java/com/facebook/presto/sidecar/NativeSidecarPluginQueryRunnerUtils.java +++ b/presto-native-sidecar-plugin/src/test/java/com/facebook/presto/sidecar/NativeSidecarPluginQueryRunnerUtils.java @@ -30,12 +30,25 @@ public static void setupNativeSidecarPlugin(QueryRunner queryRunner) queryRunner.loadSessionPropertyProvider( NativeSystemSessionPropertyProviderFactory.NAME, ImmutableMap.of()); + + // Register native catalog for built-in functions queryRunner.loadFunctionNamespaceManager( NativeFunctionNamespaceManagerFactory.NAME, "native", ImmutableMap.of( "supported-function-languages", "CPP", "function-implementation-type", "CPP")); + + // Register hive catalog for hive-specific functions. + // Note: The C++ PrestoServer registers hive functions only when a hive connector is present. + // Since tests always setup the hive connector, hive functions will be available. + queryRunner.loadFunctionNamespaceManager( + NativeFunctionNamespaceManagerFactory.NAME, + "hive", + ImmutableMap.of( + "supported-function-languages", "CPP", + "function-implementation-type", "CPP")); + queryRunner.loadTypeManager(NativeTypeManagerFactory.NAME); queryRunner.loadPlanCheckerProviderManager("native", ImmutableMap.of()); queryRunner.installPlugin(new NativeSqlInvokedFunctionsPlugin()); diff --git a/presto-native-sidecar-plugin/src/test/java/com/facebook/presto/sidecar/TestNativeSidecarHiveCatalog.java b/presto-native-sidecar-plugin/src/test/java/com/facebook/presto/sidecar/TestNativeSidecarHiveCatalog.java new file mode 100644 index 0000000000000..a35dbb4f87398 --- /dev/null +++ b/presto-native-sidecar-plugin/src/test/java/com/facebook/presto/sidecar/TestNativeSidecarHiveCatalog.java @@ -0,0 +1,157 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sidecar; + +import com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils; +import com.facebook.presto.sidecar.functionNamespace.NativeFunctionNamespaceManagerFactory; +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import com.facebook.presto.tests.DistributedQueryRunner; +import com.google.common.collect.ImmutableMap; +import org.testng.annotations.Test; + +import static com.facebook.presto.nativeworker.NativeQueryRunnerUtils.createLineitem; +import static com.facebook.presto.nativeworker.NativeQueryRunnerUtils.createNation; +import static com.facebook.presto.nativeworker.NativeQueryRunnerUtils.createOrders; +import static com.facebook.presto.nativeworker.NativeQueryRunnerUtils.createOrdersEx; +import static com.facebook.presto.nativeworker.NativeQueryRunnerUtils.createRegion; + +public class TestNativeSidecarHiveCatalog + extends AbstractTestQueryFramework +{ + @Override + protected void createTables() + { + QueryRunner queryRunner = (QueryRunner) getExpectedQueryRunner(); + createLineitem(queryRunner); + createNation(queryRunner); + createOrders(queryRunner); + createOrdersEx(queryRunner); + createRegion(queryRunner); + } + + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + DistributedQueryRunner queryRunner = (DistributedQueryRunner) PrestoNativeQueryRunnerUtils.nativeHiveQueryRunnerBuilder() + .setAddStorageFormatToPath(true) + .setCoordinatorSidecarEnabled(true) + .build(); + TestNativeSidecarPlugin.setupNativeSidecarPlugin(queryRunner); + queryRunner.loadFunctionNamespaceManager( + NativeFunctionNamespaceManagerFactory.NAME, + "hive", + ImmutableMap.of( + "supported-function-languages", "CPP", + "function-implementation-type", "CPP")); + + return queryRunner; + } + + @Override + protected QueryRunner createExpectedQueryRunner() + throws Exception + { + return PrestoNativeQueryRunnerUtils.javaHiveQueryRunnerBuilder() + .setAddStorageFormatToPath(true) + .build(); + } + + @Test + public void testInitcap() + { + assertQuery("SELECT hive.default.initcap('Hello world')", "SELECT('Hello World')"); + assertQuery("SELECT hive.default.initcap('abcd')", "SELECT('Abcd')"); + assertQuery("SELECT hive.default.initcap('a b c')", "SELECT('A B C')"); + assertQuery("SELECT hive.default.initcap('')", "SELECT('')"); + assertQuery("SELECT hive.default.initcap('x')", "SELECT('X')"); + assertQuery("SELECT hive.default.initcap('hello123world')", "SELECT('Hello123world')"); + assertQuery("SELECT hive.default.initcap('hello-world')", "SELECT('Hello-world')"); + assertQuery("SELECT hive.default.initcap(NULL)", "SELECT CAST(NULL AS VARCHAR)"); + assertQuery("SELECT hive.default.initcap('test')", "SELECT('Test')"); + } + + @Test + public void testInitcapWithBuiltInFunctions() + { + assertQuery("SELECT hive.default.initcap(reverse('Hello world'))", "SELECT('Dlrow Olleh')"); + assertQuery("SELECT hive.default.initcap(from_utf8(from_base64('aGVsbG8gd29ybGQ=')))", "SELECT from_utf8(from_base64('SGVsbG8gV29ybGQ='))"); + assertQuery("SELECT to_base64(to_utf8(hive.default.initcap('a b c')))", "SELECT to_base64(to_utf8('A B C'))"); + assertQuery("SELECT to_base64(to_utf8(hive.default.initcap('hello123world')))", "SELECT to_base64(to_utf8('Hello123world'))"); + } + + @Test + public void testInitcapWithNullValues() + { + assertQuery( + "SELECT hive.default.initcap(CASE WHEN nationkey = 0 THEN NULL ELSE name END) " + + "FROM nation WHERE nationkey < 2 ORDER BY nationkey", + "VALUES (CAST(NULL AS VARCHAR)), ('Argentina')"); + assertQuery( + "SELECT COUNT(*) FROM nation WHERE hive.default.initcap(CASE WHEN nationkey < 0 THEN name ELSE NULL END) IS NULL", + "SELECT BIGINT '25'"); + } + + @Test + public void testInitcapWithStringOperations() + { + assertQuery( + "SELECT hive.default.initcap(CONCAT(name, ' region')) FROM region WHERE regionkey = 0", + "SELECT 'Africa Region'"); + assertQuery( + "SELECT hive.default.initcap(SUBSTR(name, 1, 3)) FROM nation WHERE nationkey = 0", + "SELECT 'Alg'"); + assertQuery( + "SELECT hive.default.initcap(LOWER(name)) FROM nation WHERE nationkey IN (0, 1) ORDER BY nationkey", + "VALUES ('Algeria'), ('Argentina')"); + assertQuery( + "SELECT hive.default.initcap(UPPER(name)) FROM region WHERE regionkey < 2 ORDER BY regionkey", + "VALUES ('Africa'), ('America')"); + assertQuery( + "SELECT hive.default.initcap(REPLACE(name, 'A', 'X')) FROM region WHERE regionkey = 0", + "SELECT 'Xfricx'"); + assertQuery( + "SELECT hive.default.initcap(TRIM(CONCAT(' ', name, ' '))) FROM nation WHERE nationkey = 0", + "SELECT 'Algeria'"); + } + + @Test + public void testInitcapWithColumnValues() + { + // Test initcap with column values to ensure worker-side evaluation + assertQuery("SELECT hive.default.initcap(name) FROM region", "VALUES ('Africa'), ('America'), ('Asia'), ('Europe'), ('Middle East')"); + assertQuery( + "SELECT hive.default.initcap(name) FROM nation WHERE nationkey < 5 ORDER BY nationkey", + "VALUES ('Algeria'), ('Argentina'), ('Brazil'), ('Canada'), ('Egypt')"); + assertQuery( + "SELECT hive.default.initcap(name) FROM region ORDER BY regionkey", + "VALUES ('Africa'), ('America'), ('Asia'), ('Europe'), ('Middle East')"); + assertQuery( + "SELECT COUNT(*), hive.default.initcap(name) FROM region GROUP BY name ORDER BY name", + "VALUES (BIGINT '1', 'Africa'), (BIGINT '1', 'America'), (BIGINT '1', 'Asia'), (BIGINT '1', 'Europe'), (BIGINT '1', 'Middle East')"); + assertQuery( + "SELECT LENGTH(hive.default.initcap(comment)) FROM nation WHERE nationkey = 0", + "SELECT LENGTH(comment) FROM nation WHERE nationkey = 0"); + assertQuery( + "SELECT regionkey, LENGTH(hive.default.initcap(comment)) FROM region WHERE regionkey < 3 ORDER BY regionkey", + "SELECT regionkey, LENGTH(comment) FROM region WHERE regionkey < 3 ORDER BY regionkey"); + assertQuery( + "SELECT hive.default.initcap(name) FROM nation WHERE name IN ('ALGERIA', 'ARGENTINA', 'BRAZIL') ORDER BY name", + "VALUES ('Algeria'), ('Argentina'), ('Brazil')"); + assertQuery( + "SELECT COUNT(DISTINCT hive.default.initcap(name)) FROM nation", + "SELECT BIGINT '25'"); + } +} diff --git a/presto-native-sidecar-plugin/src/test/java/com/facebook/presto/sidecar/TestNativeSidecarNotSetForCatalog.java b/presto-native-sidecar-plugin/src/test/java/com/facebook/presto/sidecar/TestNativeSidecarNotSetForCatalog.java new file mode 100644 index 0000000000000..7fa59166d846e --- /dev/null +++ b/presto-native-sidecar-plugin/src/test/java/com/facebook/presto/sidecar/TestNativeSidecarNotSetForCatalog.java @@ -0,0 +1,66 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sidecar; + +import com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils; +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import org.testng.annotations.Test; + +public class TestNativeSidecarNotSetForCatalog + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return PrestoNativeQueryRunnerUtils.nativeHiveQueryRunnerBuilder() + .setAddStorageFormatToPath(true) + .setCoordinatorSidecarEnabled(false) + .build(); + } + + @Override + protected QueryRunner createExpectedQueryRunner() + throws Exception + { + return PrestoNativeQueryRunnerUtils.javaHiveQueryRunnerBuilder() + .setAddStorageFormatToPath(true) + .build(); + } + + @Test + public void testHiveCatalogFailure() + { + assertQueryFails("SELECT hive.default.initcap('Hello world')", "(?s).*Function hive.default.initcap not registered*"); + } + + @Test + public void testNativeCatalogFailure() + { + assertQueryFails("SELECT native.default.array_sum(ARRAY[1, 2, 3])", "(?s).*Function native.default.array_sum not registered*"); + } + + @Test + public void testDefaultCatalog() + { + assertQuery("SELECT array_sum(ARRAY[1, 2, 3])"); + } + + @Test + public void testCustomCatalogFailure() + { + assertQueryFails("SELECT mycatalog.myschema.custom_func('test')", "(?s).*Function mycatalog.myschema.custom_func not registered*"); + } +} diff --git a/presto-openapi/src/main/resources/rest_function_server.yaml b/presto-openapi/src/main/resources/rest_function_server.yaml index e9bab77abc10b..7a1cff5e3363c 100644 --- a/presto-openapi/src/main/resources/rest_function_server.yaml +++ b/presto-openapi/src/main/resources/rest_function_server.yaml @@ -41,6 +41,48 @@ paths: application/json: schema: $ref: '#/components/schemas/UdfSignatureMap' + /v1/functions/{catalog}: + parameters: + - name: catalog + in: path + required: true + schema: + type: string + description: The catalog name to filter functions by (e.g., 'native', 'hive'). + example: "hive" + get: + summary: Retrieve list of functions in the specified catalog. + description: | + This endpoint returns functions that are registered under a specific catalog namespace. + Functions in Presto can be namespaced by catalog (e.g., hive.default.initcap, native.default.abs). + This endpoint filters and returns only those functions that belong to the specified catalog. + + This is particularly useful for: + - Separating built-in functions from connector-specific functions + - Organizing custom C++ functions by their associated connector or catalog + - Supporting multiple function namespace managers with different catalogs + + Example: GET /v1/functions/hive returns only functions registered under the 'hive' catalog. + responses: + '200': + description: A map of function names to lists of function metadata for the specified catalog. + content: + application/json: + schema: + $ref: '#/components/schemas/UdfSignatureMap' + example: + initcap: + - docString: "initcap" + functionKind: "SCALAR" + outputType: "varchar" + paramTypes: ["varchar"] + schema: "default" + routineCharacteristics: + language: "CPP" + determinism: "DETERMINISTIC" + nullCallClause: "RETURNS_NULL_ON_NULL_INPUT" + '404': + description: The specified catalog was not found or contains no functions. /v1/functions/{schema}: parameters: - name: schema