Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
1f01a96
GH-49321: [C++][Python] Add pixi build for Arrow and PyArrow
raulcd Apr 23, 2026
bfa4dc7
Minor clean-up
raulcd Apr 23, 2026
87aa61e
Add pixi build for Python
raulcd Apr 24, 2026
c96ede5
Minor fix to x-hierarchy on compose.yaml
raulcd Apr 24, 2026
6c806bb
Add missing PARQUET_TEST_DATA for arrow cpp build
raulcd Apr 24, 2026
dbf94d3
Add Pixi Arrow CPP sanitizer build
raulcd Apr 28, 2026
18dfbbe
Initial PyArrow build against sanitizer-instrumented CPython, NumPy a…
raulcd Apr 28, 2026
f18ccc0
Add required instrumented numpy and python to test dependencies and a…
raulcd Apr 28, 2026
9cb0e48
Increase timeout and add ASAN and UBSAN flags to PyArrow
raulcd Apr 28, 2026
045186f
Generate Cython and libarrow_python ASAN failures to validate instrum…
raulcd Apr 28, 2026
23add32
Some minor cleanups
raulcd Apr 29, 2026
4c16397
Add expected failures
raulcd Apr 29, 2026
5e51c69
Improve ASAN detection and pytest markers
raulcd Apr 30, 2026
c86e21e
Fix for Windows
raulcd Apr 30, 2026
1f012be
Remove failing tests with OOB
raulcd May 4, 2026
7aa0faf
Update version to match what we use on pyproject.toml
raulcd May 4, 2026
c1fc577
Add pixi.toml(s) to bump versions
raulcd May 4, 2026
eea7e55
Try to fix version bump tests
raulcd May 4, 2026
3a31f15
Fix wrong path :)
raulcd May 4, 2026
b240317
And there's a different test file
raulcd May 4, 2026
bcfa1e3
Add back required version for build backend and update regex so it do…
raulcd May 4, 2026
bcaadfa
Comment improvements and remove stray import
raulcd May 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions ci/docker/pixi.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

ARG arch=amd64
FROM ${arch}/ubuntu:24.04

# install build essentials
RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get update -y -q && \
apt-get install -y -q \
curl \
gdb \
git \
libc6-dbg \
tzdata \
tzdata-legacy \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# install pixi
RUN curl -fsSL https://pixi.sh/install.sh -o /tmp/install-pixi.sh
RUN PIXI_HOME=/opt/pixi PIXI_NO_PATH_UPDATE=1 sh /tmp/install-pixi.sh && rm /tmp/install-pixi.sh
ENV PATH=/opt/pixi/bin:$PATH

# Verify pixi installation
RUN pixi --version
19 changes: 19 additions & 0 deletions ci/pixi/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

**/.pixi
**/pixi.lock
95 changes: 95 additions & 0 deletions ci/pixi/asan/cpp/pixi.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[workspace]
channels = ["https://prefix.dev/conda-forge"]
platforms = ["linux-64"]
preview = ["pixi-build"]

[environments]
test = ["test"]

[feature.test.activation.env]
ARROW_TEST_DATA = "/arrow/testing/data"
PARQUET_TEST_DATA = "/arrow/cpp/submodules/parquet-testing/data"
# GH-39973: debug memory pool conflicts with ASAN
ARROW_DEBUG_MEMORY_POOL = "none"
ASAN_OPTIONS = "allocator_may_return_null=1:detect_leaks=0:symbolize=1"
UBSAN_OPTIONS = "print_stacktrace=1"

[feature.test.dependencies]
binutils = "*"
cmake = "*"

[feature.test.tasks]
# This path depends on pixi-build's internal work-directory layout.
# Required for running C++ tests on Docker.
test = "cd $(ls -d /arrow/ci/pixi/asan/cpp/.pixi/build/work/arrow-cpp-*/work/build | head -n 1) && ctest --output-on-failure"

[package]
name = "arrow-cpp"
version = "25.0.0a0"
authors = ["Apache Arrow"]

[package.build]
source.path = "../../../../cpp"

[package.build.backend]
name = "pixi-build-cmake"
version = "*"

[package.build.config]
extra-args = [
"-DCMAKE_BUILD_TYPE=Debug",
"-DARROW_USE_ASAN=ON",
"-DARROW_USE_UBSAN=ON",
# Avoid creating huge static libraries with sanitizer instrumentation
"-DARROW_BUILD_STATIC=OFF",
"-DARROW_BUILD_TESTS=ON",
"-DARROW_COMPUTE=ON",
"-DARROW_CSV=ON",
"-DARROW_DATASET=ON",
"-DARROW_FILESYSTEM=ON",
"-DARROW_JSON=ON",
"-DARROW_PARQUET=ON",
"-DARROW_WITH_BROTLI=ON",
"-DARROW_WITH_BZ2=ON",
"-DARROW_WITH_LZ4=ON",
"-DARROW_WITH_SNAPPY=ON",
"-DARROW_WITH_ZLIB=ON",
"-DARROW_WITH_ZSTD=ON",
# libutf8proc on package-host-dependencies fails use Bundled.
"-Dutf8proc_SOURCE=BUNDLED",
"-DOPENSSL_ROOT_DIR=$PREFIX",
]

[package.host-dependencies]
brotli = "*"
bzip2 = "*"
gflags = "*"
gmock = ">=1.10.0"
gtest = ">=1.10.0"
libboost-devel = "*"
lz4-c = "*"
openssl = "*"
rapidjson = "*"
re2 = "*"
snappy = "*"
thrift-cpp = ">=0.11.0"
xsimd = ">=14.0"
zlib = "*"
zstd = "*"
109 changes: 109 additions & 0 deletions ci/pixi/asan/python/pixi.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[workspace]
channels = ["https://prefix.dev/conda-forge"]
platforms = ["linux-64"]
preview = ["pixi-build"]

[environments]
test = ["test"]

[feature.test]
platforms = ["linux-64"]

[feature.test.activation.env]
ARROW_TEST_DATA = "/arrow/testing/data"
PARQUET_TEST_DATA = "/arrow/cpp/submodules/parquet-testing/data"
# GH-39973: debug memory pool conflicts with ASAN
ARROW_DEBUG_MEMORY_POOL = "none"
ASAN_OPTIONS = "detect_leaks=0:symbolize=1:strict_init_order=true:allocator_may_return_null=1:use_sigaltstack=0"
UBSAN_OPTIONS = "print_stacktrace=1"

[feature.test.dependencies]
hypothesis = "*"
pyarrow = { path = "." }
pytest = "*"
pytest-timeout = "*"

[feature.test.tasks]
test = "pytest -r s --pyargs pyarrow --timeout=180"

[package]
name = "pyarrow"
version = "25.0.0a0"
authors = ["Apache Arrow"]

[package.build]
source.path = "../../../../python"

[package.build.backend]
name = "pixi-build-python"
version = "*"

[package.build.config]
compilers = ["c", "cxx"]
extra-args = [
"-Ccmake.build-type=Debug",
"-Ccmake.define.ARROW_USE_ASAN=ON",
"-Ccmake.define.ARROW_USE_UBSAN=ON",
]
env = {
PYARROW_BUNDLE_ARROW_CPP = "0",
CMAKE_GENERATOR = "Ninja",
ARROW_HOME = "$PREFIX",
ASAN_OPTIONS = "detect_leaks=0:symbolize=1:strict_init_order=true:allocator_may_return_null=1:use_sigaltstack=0"
}

[package.host-dependencies]
# Sanitizer-instrumented Arrow C++ (sibling pixi package).
arrow-cpp = { path = "../cpp" }
cmake = "*"
cython = ">=3.0"
gcc_linux-64 = "*"
git = "*"
gxx_linux-64 = "*"
ninja = "*"
pip = "*"
pkg-config = "*"
scikit-build-core = "*"
setuptools-scm = "*"
sysroot_linux-64 = "*"
zlib = "*"
# Instrumented CPython built from source (v3.15.0a6).
# commit hash must match numpy's pixi-packages/asan pinned commit,
# so pixi-build doesn't build cpython twice with different versions.
python.git = "https://github.com/python/cpython"
python.subdirectory = "Tools/pixi-packages/asan"
python.rev = "15b216f30d0445469ec31bc7509fcc55a216ef7c"
# Instrumented NumPy built from source.
numpy.git = "https://github.com/numpy/numpy"
numpy.subdirectory = "pixi-packages/asan"
numpy.rev = "0419105da9cd0a15a4e02bc22019c2b65272c68a"

[package.run-dependencies]
arrow-cpp = { path = "../cpp" }
# The following are necessary to run the Python test suite,
# which otherwise won't be run with the instrumented Python and NumPy.
# Important to use the same revisions as in host-dependencies to avoid
# mismatches between build and test environments.
python.git = "https://github.com/python/cpython"
python.subdirectory = "Tools/pixi-packages/asan"
python.rev = "15b216f30d0445469ec31bc7509fcc55a216ef7c"
numpy.git = "https://github.com/numpy/numpy"
numpy.subdirectory = "pixi-packages/asan"
numpy.rev = "0419105da9cd0a15a4e02bc22019c2b65272c68a"
87 changes: 87 additions & 0 deletions ci/pixi/default/cpp/pixi.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[workspace]
channels = ["https://prefix.dev/conda-forge"]
platforms = ["linux-64"]
preview = ["pixi-build"]

[environments]
test = ["test"]

[feature.test.activation.env]
ARROW_TEST_DATA = "/arrow/testing/data"
PARQUET_TEST_DATA = "/arrow/cpp/submodules/parquet-testing/data"

[feature.test.dependencies]
binutils = "*"
cmake = "*"

[feature.test.tasks]
# This path depends on pixi-build's internal work-directory layout.
# Required for running C++ tests on Docker.
test = "cd $(ls -d /arrow/ci/pixi/default/cpp/.pixi/build/work/arrow-cpp-*/work/build | head -n 1) && ctest --output-on-failure"

[package]
name = "arrow-cpp"
version = "25.0.0a0"
authors = ["Apache Arrow"]

[package.build]
source.path = "../../../../cpp"

[package.build.backend]
name = "pixi-build-cmake"
version = "*"

[package.build.config]
extra-args = [
"-DCMAKE_BUILD_TYPE=Release",
"-DARROW_BUILD_TESTS=ON",
"-DARROW_COMPUTE=ON",
"-DARROW_CSV=ON",
"-DARROW_DATASET=ON",
"-DARROW_FILESYSTEM=ON",
"-DARROW_JSON=ON",
"-DARROW_PARQUET=ON",
"-DARROW_WITH_BROTLI=ON",
"-DARROW_WITH_BZ2=ON",
"-DARROW_WITH_LZ4=ON",
"-DARROW_WITH_SNAPPY=ON",
"-DARROW_WITH_ZLIB=ON",
"-DARROW_WITH_ZSTD=ON",
# libutf8proc on package-host-dependencies fails use Bundled.
"-Dutf8proc_SOURCE=BUNDLED",
"-DOPENSSL_ROOT_DIR=$PREFIX",
]

[package.host-dependencies]
brotli = "*"
bzip2 = "*"
gflags = "*"
gmock = ">=1.10.0"
gtest = ">=1.10.0"
libboost-devel = "*"
lz4-c = "*"
openssl = "*"
rapidjson = "*"
re2 = "*"
snappy = "*"
thrift-cpp = ">=0.11.0"
xsimd = ">=14.0"
zlib = "*"
zstd = "*"
Loading
Loading