From 8960f041c8d90d12d327b447c67a258a1e31aa85 Mon Sep 17 00:00:00 2001 From: Aaditya Srinivasan Date: Wed, 29 Apr 2026 19:25:18 +0530 Subject: [PATCH 1/5] Track used functions during expression decomposition --- cpp/src/gandiva/expr_decomposer.cc | 1 + cpp/src/gandiva/expr_decomposer.h | 6 +++++ cpp/src/gandiva/expr_decomposer_test.cc | 32 +++++++++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc index 921829db6a95..56ac98baa9de 100644 --- a/cpp/src/gandiva/expr_decomposer.cc +++ b/cpp/src/gandiva/expr_decomposer.cc @@ -70,6 +70,7 @@ Status ExprDecomposer::Visit(const FunctionNode& in_node) { const NativeFunction* native_function = registry_.LookupSignature(signature); DCHECK(native_function) << "Missing Signature " << signature.ToString(); + used_functions_.emplace(native_function->pc_name()); // decompose the children. std::vector args; for (auto& child : node.children()) { diff --git a/cpp/src/gandiva/expr_decomposer.h b/cpp/src/gandiva/expr_decomposer.h index 90a27744b362..e3de40d62b51 100644 --- a/cpp/src/gandiva/expr_decomposer.h +++ b/cpp/src/gandiva/expr_decomposer.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "gandiva/arrow.h" @@ -49,6 +50,10 @@ class GANDIVA_EXPORT ExprDecomposer : public NodeVisitor { return status; } + [[nodiscard]] const std::unordered_set& UsedFunctions() const { + return used_functions_; + } + private: ARROW_DISALLOW_COPY_AND_ASSIGN(ExprDecomposer); @@ -125,6 +130,7 @@ class GANDIVA_EXPORT ExprDecomposer : public NodeVisitor { Annotator& annotator_; std::stack> if_entries_stack_; ValueValidityPairPtr result_; + std::unordered_set used_functions_; bool nested_if_else_; }; diff --git a/cpp/src/gandiva/expr_decomposer_test.cc b/cpp/src/gandiva/expr_decomposer_test.cc index 194c13bc82c8..b954aee7ab92 100644 --- a/cpp/src/gandiva/expr_decomposer_test.cc +++ b/cpp/src/gandiva/expr_decomposer_test.cc @@ -19,11 +19,13 @@ #include +#include "arrow/testing/gtest_util.h" #include "gandiva/annotator.h" #include "gandiva/dex.h" #include "gandiva/function_registry.h" #include "gandiva/gandiva_aliases.h" #include "gandiva/node.h" +#include "gandiva/tree_expr_builder.h" namespace gandiva { @@ -405,4 +407,34 @@ TEST_F(TestExprDecomposer, TestComplexIfCondition) { EXPECT_TRUE(decomposer.if_entries_stack_.empty()); } +TEST_F(TestExprDecomposer, TestGetUsedFunctionsInExpr) { + Annotator annotator; + ExprDecomposer decomposer(*registry_, annotator); + auto field0 = field("f0", int32()); + auto f0 = std::make_shared(field0); + auto is_not_null_func = TreeExprBuilder::MakeFunction("isnotnull", {f0}, boolean()); + ValueValidityPairPtr value_validity; + ASSERT_OK(decomposer.Decompose(*is_not_null_func, &value_validity)); + auto used_functions = decomposer.UsedFunctions(); + ASSERT_EQ(used_functions.size(), 1); + ASSERT_EQ(used_functions.find("isnotnull_int32") != used_functions.end(), true); +} + +TEST_F(TestExprDecomposer, TestGetMultipleUsedFunctionsInExpr) { + Annotator annotator; + ExprDecomposer decomposer(*registry_, annotator); + auto field0 = field("f0", int32()); + auto field1 = field("f1", int32()); + auto f0 = std::make_shared(field0); + auto f1 = std::make_shared(field1); + auto add_func = TreeExprBuilder::MakeFunction("add", {f0, f1}, int32()); + auto is_not_null_func = + TreeExprBuilder::MakeFunction("isnotnull", {add_func}, boolean()); + ValueValidityPairPtr value_validity; + ASSERT_OK(decomposer.Decompose(*is_not_null_func, &value_validity)); + auto used_functions = decomposer.UsedFunctions(); + ASSERT_EQ(used_functions.size(), 2); + ASSERT_EQ(used_functions.find("add_int32_int32") != used_functions.end(), true); + ASSERT_EQ(used_functions.find("isnotnull_int32") != used_functions.end(), true); +} } // namespace gandiva From 9b4a1c7acc1e448d285e1998b73bbc0808f8d23c Mon Sep 17 00:00:00 2001 From: Aaditya Srinivasan Date: Wed, 29 Apr 2026 20:03:14 +0530 Subject: [PATCH 2/5] Delay Engine initialization until expression decomposition --- cpp/src/gandiva/engine.cc | 4 ++-- cpp/src/gandiva/engine.h | 4 +++- cpp/src/gandiva/llvm_generator.cc | 32 ++++++++++++++++++++++++++----- cpp/src/gandiva/llvm_generator.h | 6 +++++- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 901421c86cb3..fa4c5f6728b8 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -346,7 +346,8 @@ Engine::Engine(const std::shared_ptr& conf, Engine::~Engine() {} -Status Engine::Init() { +Status Engine::Init(std::unordered_set function_names) { + used_functions_ = std::move(function_names); std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs); // Add mappings for global functions that can be accessed from LLVM/IR module. @@ -394,7 +395,6 @@ Result> Engine::Make( std::unique_ptr engine{ new Engine(conf, std::move(jit), std::move(shared_target_machine), cached)}; - ARROW_RETURN_NOT_OK(engine->Init()); return engine; } diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 20165787cb66..a20e82499a1b 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -93,13 +93,14 @@ class GANDIVA_EXPORT Engine { // Create a global string as a pointer with "i8*" type. llvm::Constant* CreateGlobalStringPtr(const std::string& string); + Status Init(std::unordered_set function_names); + private: Engine(const std::shared_ptr& conf, std::unique_ptr lljit, std::shared_ptr target_machine, bool cached); // Post construction init. This _must_ be called after the constructor. - Status Init(); static void InitOnce(); @@ -123,6 +124,7 @@ class GANDIVA_EXPORT Engine { LLVMTypes types_; std::vector functions_to_compile_; + std::unordered_set used_functions_; bool optimize_ = true; bool module_finalized_ = false; diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index 0f0918b3a1c7..9aaaf8592918 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -67,12 +67,24 @@ Status LLVMGenerator::SetLLVMObjectCache(GandivaObjectCache& object_cache) { return engine_->SetLLVMObjectCache(object_cache); } -Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr output) { - int idx = static_cast(compiled_exprs_.size()); - // decompose the expression to separate out value and validities. +arrow::Result LLVMGenerator::Decompose( + const ExpressionPtr& expr) { ExprDecomposer decomposer(*function_registry_, annotator_); + ValueValidityPairPtr value_validity; - ARROW_RETURN_NOT_OK(decomposer.Decompose(*expr->root(), &value_validity)); + ARROW_RETURN_NOT_OK( + decomposer.Decompose(*expr->root(), &value_validity)); + + auto& used_functions = decomposer.UsedFunctions(); + functions_in_exprs_.insert( + used_functions.begin(), + used_functions.end()); + + return value_validity; +} + +Status LLVMGenerator::Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity, const FieldDescriptorPtr output) { + int idx = static_cast(compiled_exprs_.size()); // Generate the IR function for the decomposed expression. auto compiled_expr = std::make_unique(value_validity, output); std::string fn_name = "expr_" + std::to_string(idx) + "_" + @@ -93,9 +105,19 @@ Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr out Status LLVMGenerator::Build(const ExpressionVector& exprs, SelectionVector::Mode mode) { selection_vector_mode_ = mode; + std::vector expr_value_validities; for (auto& expr : exprs) { + ARROW_ASSIGN_OR_RAISE(auto value_validity, Decompose(expr)); + expr_value_validities.push_back(value_validity); + } + + ARROW_RETURN_NOT_OK(engine_->Init(std::move(functions_in_exprs_))); + + for (size_t i = 0; i < exprs.size(); ++i) { + const auto& expr = exprs[i]; auto output = annotator_.AddOutputFieldDescriptor(expr->result()); - ARROW_RETURN_NOT_OK(Add(expr, output)); + auto value_validity = expr_value_validities[i]; + ARROW_RETURN_NOT_OK(Add(expr, std::move(value_validity), output)); } // Compile and inject into the process' memory the generated function. diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h index a60e2bf6b29e..a30b7d7f4d17 100644 --- a/cpp/src/gandiva/llvm_generator.h +++ b/cpp/src/gandiva/llvm_generator.h @@ -190,9 +190,12 @@ class GANDIVA_EXPORT LLVMGenerator { bool has_arena_allocs_; }; + arrow::Result Decompose(const ExpressionPtr& expr); + // Generate the code for one expression for default mode, with the output of // the expression going to 'output'. - Status Add(const ExpressionPtr expr, const FieldDescriptorPtr output); + Status Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity, + const FieldDescriptorPtr output); /// Generate code to load the vector at specified index in the 'arg_addrs' array. llvm::Value* LoadVectorAtIndex(llvm::Value* arg_addrs, llvm::Type* type, int idx, @@ -269,6 +272,7 @@ class GANDIVA_EXPORT LLVMGenerator { // used for debug bool enable_ir_traces_; std::vector trace_strings_; + std::unordered_set functions_in_exprs_; }; } // namespace gandiva From e27b758702b2325e4ae7199dd1fcb61c7f086a11 Mon Sep 17 00:00:00 2001 From: Aaditya Srinivasan Date: Wed, 29 Apr 2026 21:07:39 +0530 Subject: [PATCH 3/5] [C++][Gandiva] Skip unused C function mappings during engine initialization --- cpp/src/gandiva/engine.cc | 7 +++++++ cpp/src/gandiva/engine.h | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index fa4c5f6728b8..821198575b50 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -599,6 +599,13 @@ Result Engine::CompiledFunction(const std::string& function) { void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_type, const std::vector& args, void* func) { + bool is_internal_func = + internal_functions_.find(name) != internal_functions_.end(); + + if (!(is_internal_func || + used_functions_.find(name) != used_functions_.end())) { + return; + } const auto prototype = llvm::FunctionType::get(ret_type, args, /*is_var_arg*/ false); llvm::Function::Create(prototype, llvm::GlobalValue::ExternalLinkage, name, module()); AddAbsoluteSymbol(*lljit_, name, func); diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index a20e82499a1b..634cf962c6a2 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -126,6 +126,24 @@ class GANDIVA_EXPORT Engine { std::vector functions_to_compile_; std::unordered_set used_functions_; + static inline const std::unordered_set internal_functions_ = { + "gdv_fn_context_arena_malloc", + "gdv_fn_context_set_error_msg", + "gdv_fn_populate_varlen_vector", + "gdv_fn_context_arena_reset", + "gdv_fn_in_expr_lookup_int32", + "gdv_fn_in_expr_lookup_int64", + "gdv_fn_in_expr_lookup_float", + "gdv_fn_in_expr_lookup_double", + "gdv_fn_in_expr_lookup_decimal", + "gdv_fn_in_expr_lookup_utf8", + + "bitMapGetBit", + "bitMapSetBit", + "bitMapValidityGetBit", + "bitMapClearBitIfFalse", + }; + bool optimize_ = true; bool module_finalized_ = false; bool cached_; From 13d41089edd8caa4eaba1848d7afd93b049e6572 Mon Sep 17 00:00:00 2001 From: Aaditya Srinivasan Date: Wed, 29 Apr 2026 21:56:55 +0530 Subject: [PATCH 4/5] [C++][Gandiva] Add Benchmark --- cpp/src/gandiva/tests/micro_benchmarks.cc | 33 +++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/cpp/src/gandiva/tests/micro_benchmarks.cc b/cpp/src/gandiva/tests/micro_benchmarks.cc index 450e691323ca..df1068917892 100644 --- a/cpp/src/gandiva/tests/micro_benchmarks.cc +++ b/cpp/src/gandiva/tests/micro_benchmarks.cc @@ -450,6 +450,29 @@ static void TimedTestExprCompilation(benchmark::State& state) { } } +static void TimedTestNonBitcodeExprCompilation(benchmark::State& state, bool use_cache) { + int32_t iteration = 0; + for (auto _ : state) { + // schema for input fields + double literal_value = use_cache ? 1.0 : static_cast(iteration); + auto seed = TreeExprBuilder::MakeLiteral(literal_value); + auto schema = arrow::schema({}); + + // output field + auto field_sin = field("c1", float64()); + + // seed is different for each iteration so that cache won't be hit + auto sin_func = TreeExprBuilder::MakeFunction("sin", {seed}, float64()); + + auto expr_0 = TreeExprBuilder::MakeExpression(sin_func, field_sin); + + std::shared_ptr projector; + ASSERT_OK(Projector::Make(schema, {expr_0}, TestConfiguration(), &projector)); + + ++iteration; + } +} + static void DecimalAdd2Fast(benchmark::State& state) { // use lesser precision to test the fast-path DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision - 6, 18); @@ -490,6 +513,16 @@ static void DecimalAdd3Large(benchmark::State& state) { DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 18, true); } +static void TimedTestNonBitcodeExprCompilationNoCache(benchmark::State& state) { + TimedTestNonBitcodeExprCompilation(state, false); +} + +static void TimedTestNonBitcodeExprCompilationWithCache(benchmark::State& state) { + TimedTestNonBitcodeExprCompilation(state, true); +} + +BENCHMARK(TimedTestNonBitcodeExprCompilationNoCache)->Unit(benchmark::kMicrosecond); +BENCHMARK(TimedTestNonBitcodeExprCompilationWithCache)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestExprCompilation)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestAdd3)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestBigNested)->Unit(benchmark::kMicrosecond); From 37b2321cdcf21f6f15d6fb9fd5ca463a3905f03b Mon Sep 17 00:00:00 2001 From: Aaditya Srinivasan Date: Fri, 1 May 2026 21:32:17 +0530 Subject: [PATCH 5/5] [C++][Gandiva] Selectively register external C functions based on expression usage --- cpp/src/gandiva/engine.cc | 17 +++++++++---- cpp/src/gandiva/engine.h | 34 ++++++++++++++----------- cpp/src/gandiva/external_c_functions.cc | 14 ++++++++++ cpp/src/gandiva/llvm_generator.cc | 13 ++++------ cpp/src/gandiva/llvm_generator.h | 2 +- 5 files changed, 51 insertions(+), 29 deletions(-) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 821198575b50..0465f0f4e5ff 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -348,10 +348,19 @@ Engine::~Engine() {} Status Engine::Init(std::unordered_set function_names) { used_functions_ = std::move(function_names); + selective_mapping_enabled_ = true; std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs); // Add mappings for global functions that can be accessed from LLVM/IR module. ARROW_RETURN_NOT_OK(AddGlobalMappings()); + selective_mapping_enabled_ = false; + used_functions_.clear(); + return Status::OK(); +} + +Status Engine::Init() { + std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs); + ARROW_RETURN_NOT_OK(AddGlobalMappings()); return Status::OK(); } @@ -395,6 +404,7 @@ Result> Engine::Make( std::unique_ptr engine{ new Engine(conf, std::move(jit), std::move(shared_target_machine), cached)}; + ARROW_RETURN_NOT_OK(engine->Init()); return engine; } @@ -599,11 +609,8 @@ Result Engine::CompiledFunction(const std::string& function) { void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_type, const std::vector& args, void* func) { - bool is_internal_func = - internal_functions_.find(name) != internal_functions_.end(); - - if (!(is_internal_func || - used_functions_.find(name) != used_functions_.end())) { + auto* existing = module()->getFunction(name); + if (existing != nullptr) { return; } const auto prototype = llvm::FunctionType::get(ret_type, args, /*is_var_arg*/ false); diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 634cf962c6a2..695a782a5019 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -94,8 +94,11 @@ class GANDIVA_EXPORT Engine { llvm::Constant* CreateGlobalStringPtr(const std::string& string); Status Init(std::unordered_set function_names); + Status Init(); private: + friend class ExternalCFunctions; + Engine(const std::shared_ptr& conf, std::unique_ptr lljit, std::shared_ptr target_machine, bool cached); @@ -127,27 +130,28 @@ class GANDIVA_EXPORT Engine { std::unordered_set used_functions_; static inline const std::unordered_set internal_functions_ = { - "gdv_fn_context_arena_malloc", - "gdv_fn_context_set_error_msg", - "gdv_fn_populate_varlen_vector", - "gdv_fn_context_arena_reset", - "gdv_fn_in_expr_lookup_int32", - "gdv_fn_in_expr_lookup_int64", - "gdv_fn_in_expr_lookup_float", - "gdv_fn_in_expr_lookup_double", - "gdv_fn_in_expr_lookup_decimal", - "gdv_fn_in_expr_lookup_utf8", - - "bitMapGetBit", - "bitMapSetBit", - "bitMapValidityGetBit", - "bitMapClearBitIfFalse", + "gdv_fn_context_arena_malloc", + "gdv_fn_context_set_error_msg", + "gdv_fn_populate_varlen_vector", + "gdv_fn_context_arena_reset", + "gdv_fn_in_expr_lookup_int32", + "gdv_fn_in_expr_lookup_int64", + "gdv_fn_in_expr_lookup_float", + "gdv_fn_in_expr_lookup_double", + "gdv_fn_in_expr_lookup_decimal", + "gdv_fn_in_expr_lookup_utf8", + + "bitMapGetBit", + "bitMapSetBit", + "bitMapValidityGetBit", + "bitMapClearBitIfFalse", }; bool optimize_ = true; bool module_finalized_ = false; bool cached_; bool functions_loaded_ = false; + bool selective_mapping_enabled_ = false; std::shared_ptr function_registry_; std::string module_ir_; // The lifetime of the TargetMachine is shared with LLJIT. This prevents unnecessary diff --git a/cpp/src/gandiva/external_c_functions.cc b/cpp/src/gandiva/external_c_functions.cc index 7944cd018fdb..5d0720a3b1fb 100644 --- a/cpp/src/gandiva/external_c_functions.cc +++ b/cpp/src/gandiva/external_c_functions.cc @@ -67,7 +67,21 @@ namespace gandiva { Status ExternalCFunctions::AddMappings(Engine* engine) const { const auto& c_funcs = function_registry_->GetCFunctions(); const auto types = engine->types(); + + // Build allowed set ONCE before the loop + std::unordered_set allowed; + if (engine->selective_mapping_enabled_) { + allowed = engine->internal_functions_; + allowed.insert(engine->used_functions_.begin(), engine->used_functions_.end()); + } + for (auto& [func, func_ptr] : c_funcs) { + const std::string& name = func.pc_name(); + + if (engine->selective_mapping_enabled_ && !allowed.contains(name)) { + continue; + } + for (const auto& sig : func.signatures()) { ARROW_ASSIGN_OR_RAISE(auto llvm_signature, MapToLLVMSignature(sig, func, types)); auto& [args, ret_llvm_type] = llvm_signature; diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index 9aaaf8592918..4d9cfc8addbe 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -67,23 +67,20 @@ Status LLVMGenerator::SetLLVMObjectCache(GandivaObjectCache& object_cache) { return engine_->SetLLVMObjectCache(object_cache); } -arrow::Result LLVMGenerator::Decompose( - const ExpressionPtr& expr) { +arrow::Result LLVMGenerator::Decompose(const ExpressionPtr& expr) { ExprDecomposer decomposer(*function_registry_, annotator_); ValueValidityPairPtr value_validity; - ARROW_RETURN_NOT_OK( - decomposer.Decompose(*expr->root(), &value_validity)); + ARROW_RETURN_NOT_OK(decomposer.Decompose(*expr->root(), &value_validity)); auto& used_functions = decomposer.UsedFunctions(); - functions_in_exprs_.insert( - used_functions.begin(), - used_functions.end()); + functions_in_exprs_.insert(used_functions.begin(), used_functions.end()); return value_validity; } -Status LLVMGenerator::Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity, const FieldDescriptorPtr output) { +Status LLVMGenerator::Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity, + const FieldDescriptorPtr output) { int idx = static_cast(compiled_exprs_.size()); // Generate the IR function for the decomposed expression. auto compiled_expr = std::make_unique(value_validity, output); diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h index a30b7d7f4d17..2e97a2a73069 100644 --- a/cpp/src/gandiva/llvm_generator.h +++ b/cpp/src/gandiva/llvm_generator.h @@ -194,7 +194,7 @@ class GANDIVA_EXPORT LLVMGenerator { // Generate the code for one expression for default mode, with the output of // the expression going to 'output'. - Status Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity, + Status Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity, const FieldDescriptorPtr output); /// Generate code to load the vector at specified index in the 'arg_addrs' array.