From bc029be71215d92d97104d581ec8a6d4fbedef46 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Sat, 4 Apr 2026 15:10:01 +0100 Subject: [PATCH 1/5] Initial version of DuckDB engine for MariaDB based on DuckDB 1.5.2. --- .gitmodules | 3 ++ cmake/make_dist.cmake.in | 5 +++ debian/autobake-deb.sh | 8 ++++ storage/duckdb/CMakeLists.txt | 73 +++++++++++++++++++++++++++++++++++ storage/duckdb/duckdb | 1 + 5 files changed, 90 insertions(+) create mode 100644 storage/duckdb/CMakeLists.txt create mode 160000 storage/duckdb/duckdb diff --git a/.gitmodules b/.gitmodules index 18bcb465fa251..27a211fcad10e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -17,3 +17,6 @@ [submodule "storage/columnstore/columnstore"] path = storage/columnstore/columnstore url = https://github.com/mariadb-corporation/mariadb-columnstore-engine.git +[submodule "storage/duckdb/duckdb"] + path = storage/duckdb/duckdb + url = https://github.com/drrtuy/duckdb-engine diff --git a/cmake/make_dist.cmake.in b/cmake/make_dist.cmake.in index d5380b2034ec7..d37db7da3c448 100644 --- a/cmake/make_dist.cmake.in +++ b/cmake/make_dist.cmake.in @@ -56,6 +56,11 @@ IF(GIT_EXECUTABLE) IF(NOT RESULT EQUAL 0) SET(GIT_EXECUTABLE) ENDIF() + EXECUTE_PROCESS( + COMMAND "${GIT_EXECUTABLE}" submodule foreach "${GIT_EXECUTABLE} checkout-index --all --prefix=${PACKAGE_DIR}/storage/duckdb/duckdb/$path/" + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/storage/duckdb/duckdb + RESULT_VARIABLE RESULT + ) ENDIF() CONFIGURE_FILE(${CMAKE_BINARY_DIR}/include/source_revision.h diff --git a/debian/autobake-deb.sh b/debian/autobake-deb.sh index 220cf1ab781ef..6079902cf90a0 100755 --- a/debian/autobake-deb.sh +++ b/debian/autobake-deb.sh @@ -177,6 +177,14 @@ then fi fi +# Enable DuckDB storage engine plugin packaging +if grep -q "$architecture" storage/duckdb/duckdb/debian/control +then + cp -v storage/duckdb/duckdb/debian/mariadb-plugin-duckdb.* debian/ + echo >> debian/control + cat storage/duckdb/duckdb/debian/control >> debian/control +fi + if [ -n "${AUTOBAKE_PREP_CONTROL_RULES_ONLY:-}" ] then exit 0 diff --git a/storage/duckdb/CMakeLists.txt b/storage/duckdb/CMakeLists.txt new file mode 100644 index 0000000000000..4a0c4a650ef69 --- /dev/null +++ b/storage/duckdb/CMakeLists.txt @@ -0,0 +1,73 @@ +IF(NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/duckdb/CMakeLists.txt) + return() +ENDIF() + +IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Linux") + return() +ENDIF() + +IF(NOT (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR + CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")) + return() +ENDIF() + +# Check C++17 compiler support +MY_CHECK_CXX_COMPILER_FLAG("-std=c++17") +IF(NOT have_CXX__std_c__17) + MESSAGE(STATUS "DuckDB: C++ compiler does not support -std=c++17, skipping") + RETURN() +ENDIF() + +# libduckdb_bundle.a is built without debug STL wrappers. +# Mismatched _GLIBCXX_DEBUG changes sizeof(std::vector) → SIGSEGV. +SET(CMAKE_CXX_FLAGS_DEBUG + "${CMAKE_CXX_FLAGS_DEBUG} -U_GLIBCXX_DEBUG -U_GLIBCXX_ASSERTIONS") + +add_subdirectory(duckdb) + +IF(TARGET duckdb) + # MTR discovers plugins at storage//, not storage/// + SET_TARGET_PROPERTIES(duckdb PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") + INSTALL_MYSQL_TEST("${CMAKE_CURRENT_SOURCE_DIR}/duckdb/mysql-test/" + "plugin/duckdb") + + # MYSQL_ADD_PLUGIN runs in storage/duckdb/duckdb/ and bumps CPACK_* vars + # one level up (here). Re-bump them to the root scope so include(CPack) + # sees them. Without this, no MariaDB-duckdb-engine.spec is generated. + SET(CPACK_COMPONENTS_ALL ${CPACK_COMPONENTS_ALL} PARENT_SCOPE) + SET(CPACK_COMPONENT_DUCKDB-ENGINE_GROUP + ${CPACK_COMPONENT_DUCKDB-ENGINE_GROUP} PARENT_SCOPE) + SET(CPACK_COMPONENT_DUCKDB-ENGINESYMLINKS_GROUP + ${CPACK_COMPONENT_DUCKDB-ENGINESYMLINKS_GROUP} PARENT_SCOPE) + SET(CPACK_RPM_duckdb-engine_PACKAGE_REQUIRES + ${CPACK_RPM_duckdb-engine_PACKAGE_REQUIRES} PARENT_SCOPE) + + IF(RPM) + SET(CPACK_RPM_duckdb-engine_PACKAGE_SUMMARY + "MariaDB DuckDB storage engine" PARENT_SCOPE) + SET(CPACK_RPM_duckdb-engine_PACKAGE_DESCRIPTION + "The MariaDB DuckDB storage engine embeds DuckDB, an in-process analytical database, as a MariaDB storage engine for fast OLAP queries over local data." PARENT_SCOPE) + + # Mark common parent directories as %ignore so the duckdb-engine RPM + # does not claim ownership of dirs owned by MariaDB-server/-common. + # The "%define ignore \#" macro is already set by cmake/cpack_rpm.cmake. + SET(CPACK_RPM_duckdb-engine_USER_FILELIST + "%ignore /etc" + "%ignore /etc/my.cnf.d" + "%ignore /usr" + "%ignore /usr/lib64" + "%ignore /usr/lib64/mysql" + "%ignore /usr/lib64/mysql/plugin" + "%ignore /usr/share" + "%ignore /usr/share/mysql" + PARENT_SCOPE) + + # Apply DuckDB-specific CPack overrides at packaging time. + # CPACK_PROJECT_CONFIG_FILE is included by CPack after CPackConfig.cmake, + # letting these settings override the main project's. + SET(CPACK_PROJECT_CONFIG_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/duckdb/cmake/cpack_overrides.cmake" + CACHE FILEPATH "DuckDB CPack overrides" FORCE) + ENDIF() +ENDIF() diff --git a/storage/duckdb/duckdb b/storage/duckdb/duckdb new file mode 160000 index 0000000000000..e663297172af5 --- /dev/null +++ b/storage/duckdb/duckdb @@ -0,0 +1 @@ +Subproject commit e663297172af58e9c61f3ba636ae73471c753482 From 619da8df7371eba33a495ddd3af4797b25d925d3 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Sun, 26 Apr 2026 20:01:15 +0100 Subject: [PATCH 2/5] Replace configure-time git submodule with configure-time cmake FetchContent. --- .gitmodules | 3 --- storage/duckdb/.gitignore | 2 ++ storage/duckdb/CMakeLists.txt | 27 ++++++++++++++++++++++++++- storage/duckdb/duckdb | 1 - 4 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 storage/duckdb/.gitignore delete mode 160000 storage/duckdb/duckdb diff --git a/.gitmodules b/.gitmodules index 27a211fcad10e..18bcb465fa251 100644 --- a/.gitmodules +++ b/.gitmodules @@ -17,6 +17,3 @@ [submodule "storage/columnstore/columnstore"] path = storage/columnstore/columnstore url = https://github.com/mariadb-corporation/mariadb-columnstore-engine.git -[submodule "storage/duckdb/duckdb"] - path = storage/duckdb/duckdb - url = https://github.com/drrtuy/duckdb-engine diff --git a/storage/duckdb/.gitignore b/storage/duckdb/.gitignore new file mode 100644 index 0000000000000..988a72479ec99 --- /dev/null +++ b/storage/duckdb/.gitignore @@ -0,0 +1,2 @@ +# Fetched at configure time by FetchContent (see CMakeLists.txt). +duckdb/ diff --git a/storage/duckdb/CMakeLists.txt b/storage/duckdb/CMakeLists.txt index 4a0c4a650ef69..001d3a1a1cb20 100644 --- a/storage/duckdb/CMakeLists.txt +++ b/storage/duckdb/CMakeLists.txt @@ -1,4 +1,4 @@ -IF(NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/duckdb/CMakeLists.txt) +IF("NO" STREQUAL "${PLUGIN_DUCKDB}") return() ENDIF() @@ -23,6 +23,31 @@ ENDIF() SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -U_GLIBCXX_DEBUG -U_GLIBCXX_ASSERTIONS") +# Fetch the DuckDB engine plugin sources at configure time. +# Replaces the former git submodule at storage/duckdb/duckdb. +INCLUDE(FetchContent) +SET(DUCKDB_ENGINE_GIT_REPO "https://github.com/drrtuy/duckdb-engine" + CACHE STRING "DuckDB engine plugin git repository") +SET(DUCKDB_ENGINE_GIT_TAG "a1749287decd7262ca6c26c3c32c8d693796791a" + CACHE STRING "DuckDB engine plugin commit (full SHA)") + +FetchContent_Declare(duckdb_engine + GIT_REPOSITORY "${DUCKDB_ENGINE_GIT_REPO}" + GIT_TAG "${DUCKDB_ENGINE_GIT_TAG}" + GIT_SHALLOW TRUE + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/duckdb" +) +FetchContent_GetProperties(duckdb_engine) +IF(NOT duckdb_engine_POPULATED) + MESSAGE(STATUS "Fetching DuckDB engine plugin @ ${DUCKDB_ENGINE_GIT_TAG}") + FetchContent_Populate(duckdb_engine) +ENDIF() + +IF(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/duckdb/CMakeLists.txt") + MESSAGE(WARNING "DuckDB engine sources missing after FetchContent; skipping") + RETURN() +ENDIF() + add_subdirectory(duckdb) IF(TARGET duckdb) diff --git a/storage/duckdb/duckdb b/storage/duckdb/duckdb deleted file mode 160000 index e663297172af5..0000000000000 --- a/storage/duckdb/duckdb +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e663297172af58e9c61f3ba636ae73471c753482 From f14ac029d96cfbd28e4f255384bd001c4ab37529 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Sun, 3 May 2026 21:19:58 +0100 Subject: [PATCH 3/5] Don not download DuckDB engine code if it exists. --- storage/duckdb/CMakeLists.txt | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/storage/duckdb/CMakeLists.txt b/storage/duckdb/CMakeLists.txt index 001d3a1a1cb20..de2f669c5854c 100644 --- a/storage/duckdb/CMakeLists.txt +++ b/storage/duckdb/CMakeLists.txt @@ -26,16 +26,25 @@ SET(CMAKE_CXX_FLAGS_DEBUG # Fetch the DuckDB engine plugin sources at configure time. # Replaces the former git submodule at storage/duckdb/duckdb. INCLUDE(FetchContent) -SET(DUCKDB_ENGINE_GIT_REPO "https://github.com/drrtuy/duckdb-engine" +SET(DUCKDB_ENGINE_GIT_REPO "https://github.com/MariaDB/duckdb-engine" CACHE STRING "DuckDB engine plugin git repository") -SET(DUCKDB_ENGINE_GIT_TAG "a1749287decd7262ca6c26c3c32c8d693796791a" +SET(DUCKDB_ENGINE_GIT_TAG "e663297172af58e9c61f3ba636ae73471c753482" CACHE STRING "DuckDB engine plugin commit (full SHA)") +SET(DUCKDB_ENGINE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/duckdb") + +IF(EXISTS "${DUCKDB_ENGINE_SOURCE_DIR}/CMakeLists.txt") + # Source tree already present — use it as-is, no git operations. + MESSAGE(STATUS "DuckDB engine: using existing sources in ${DUCKDB_ENGINE_SOURCE_DIR}") + SET(FETCHCONTENT_SOURCE_DIR_DUCKDB_ENGINE "${DUCKDB_ENGINE_SOURCE_DIR}" + CACHE PATH "Pre-populated DuckDB engine source" FORCE) +ENDIF() + FetchContent_Declare(duckdb_engine GIT_REPOSITORY "${DUCKDB_ENGINE_GIT_REPO}" GIT_TAG "${DUCKDB_ENGINE_GIT_TAG}" GIT_SHALLOW TRUE - SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/duckdb" + SOURCE_DIR "${DUCKDB_ENGINE_SOURCE_DIR}" ) FetchContent_GetProperties(duckdb_engine) IF(NOT duckdb_engine_POPULATED) From 87f547c32404595104e799dc3a77076d899e58bd Mon Sep 17 00:00:00 2001 From: drrtuy Date: Tue, 2 Jun 2026 11:11:56 +0100 Subject: [PATCH 4/5] Expose static symbols leveraged in DuckDB to stringify WHERE conditions for other MariaDB engines. --- sql/sql_select.cc | 30 +++++++++++++++--------------- sql/sql_select.h | 14 ++++++++++++++ storage/duckdb/CMakeLists.txt | 2 +- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 633b65499b578..aefdd6d5d883b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -254,19 +254,19 @@ static int join_ft_read_first(JOIN_TAB *tab); static int join_ft_read_next(READ_RECORD *info); int join_read_always_key_or_null(JOIN_TAB *tab); int join_read_next_same_or_null(READ_RECORD *info); -static COND *make_cond_for_table(THD *thd, Item *cond,table_map table, - table_map used_table, - int join_tab_idx_arg, - bool exclude_expensive_cond, - bool retain_ref_cond); -static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, - Item *cond, - table_map tables, - table_map used_table, - int join_tab_idx_arg, - bool exclude_expensive_cond, - bool retain_ref_cond, - bool is_top_and_level); +COND *make_cond_for_table(THD *thd, Item *cond,table_map table, + table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond, + bool retain_ref_cond); +COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, + Item *cond, + table_map tables, + table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond, + bool retain_ref_cond, + bool is_top_and_level); static Item* part_of_refkey(TABLE *form,Field *field); static bool test_if_cheaper_ordering(bool in_join_optimizer, @@ -26377,7 +26377,7 @@ bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) make_cond_for_info_schema() uses similar algorithm as well. */ -static Item * +Item * make_cond_for_table(THD *thd, Item *cond, table_map tables, table_map used_table, int join_tab_idx_arg, @@ -26391,7 +26391,7 @@ make_cond_for_table(THD *thd, Item *cond, table_map tables, } -static Item * +Item * make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, table_map tables, table_map used_table, int join_tab_idx_arg, diff --git a/sql/sql_select.h b/sql/sql_select.h index 9c224e730269a..ea1f51b51cae5 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -2285,6 +2285,20 @@ void free_underlaid_joins(THD *thd, SELECT_LEX *select); bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result); +COND *make_cond_for_table(THD *thd, Item *cond, table_map tables, + table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond, + bool retain_ref_cond); +COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, + Item *cond, + table_map tables, + table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond, + bool retain_ref_cond, + bool is_top_and_level); + /* General routine to change field->ptr of a NULL-terminated array of Field objects. Useful when needed to call val_int, val_str or similar and the diff --git a/storage/duckdb/CMakeLists.txt b/storage/duckdb/CMakeLists.txt index de2f669c5854c..c49ba917c1fec 100644 --- a/storage/duckdb/CMakeLists.txt +++ b/storage/duckdb/CMakeLists.txt @@ -28,7 +28,7 @@ SET(CMAKE_CXX_FLAGS_DEBUG INCLUDE(FetchContent) SET(DUCKDB_ENGINE_GIT_REPO "https://github.com/MariaDB/duckdb-engine" CACHE STRING "DuckDB engine plugin git repository") -SET(DUCKDB_ENGINE_GIT_TAG "e663297172af58e9c61f3ba636ae73471c753482" +SET(DUCKDB_ENGINE_GIT_TAG "1639aebe79fdd471fc8848c3757a6b0427471534" CACHE STRING "DuckDB engine plugin commit (full SHA)") SET(DUCKDB_ENGINE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/duckdb") From a60800d33993c2ea9aa588e65a658cdd6baee260 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Fri, 5 Jun 2026 10:18:13 +0100 Subject: [PATCH 5/5] autobake accesses control file only if duckdb directory exists. --- debian/autobake-deb.sh | 3 ++- storage/duckdb/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/debian/autobake-deb.sh b/debian/autobake-deb.sh index 6079902cf90a0..1250aa5543d15 100755 --- a/debian/autobake-deb.sh +++ b/debian/autobake-deb.sh @@ -178,7 +178,8 @@ then fi # Enable DuckDB storage engine plugin packaging -if grep -q "$architecture" storage/duckdb/duckdb/debian/control +if [ -f storage/duckdb/duckdb/debian/control ] && + grep -q "$architecture" storage/duckdb/duckdb/debian/control then cp -v storage/duckdb/duckdb/debian/mariadb-plugin-duckdb.* debian/ echo >> debian/control diff --git a/storage/duckdb/CMakeLists.txt b/storage/duckdb/CMakeLists.txt index c49ba917c1fec..852834eb9956a 100644 --- a/storage/duckdb/CMakeLists.txt +++ b/storage/duckdb/CMakeLists.txt @@ -28,7 +28,7 @@ SET(CMAKE_CXX_FLAGS_DEBUG INCLUDE(FetchContent) SET(DUCKDB_ENGINE_GIT_REPO "https://github.com/MariaDB/duckdb-engine" CACHE STRING "DuckDB engine plugin git repository") -SET(DUCKDB_ENGINE_GIT_TAG "1639aebe79fdd471fc8848c3757a6b0427471534" +SET(DUCKDB_ENGINE_GIT_TAG "612480e439c2869aa4cd860d15bd4f60ce090467" CACHE STRING "DuckDB engine plugin commit (full SHA)") SET(DUCKDB_ENGINE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/duckdb")