From 887c303e4c3c6f2be4de34030f09f9f97ef87a53 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 09:12:25 -0500 Subject: [PATCH 01/22] First draft of running debug mode for PRs and release mode for main & releases --- .github/workflows/build.yml | 338 ++++++++++++++++++++++------------ .github/workflows/ci.yml | 36 ++++ .github/workflows/release.yml | 46 +++++ 3 files changed, 305 insertions(+), 115 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 911c536a8..f7bab6899 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,34 +15,69 @@ # specific language governing permissions and limitations # under the License. -name: Python Release Build +# Reusable workflow for running tests +# This ensures the same tests run for both debug (PRs) and release (main/tags) builds + +name: Test + on: - pull_request: - branches: ["main"] - push: - tags: ["*-rc*"] - branches: ["branch-*"] + workflow_call: + inputs: + build_mode: + description: 'Build mode: debug or release' + required: true + type: string + run_wheels: + description: 'Whether to build distribution wheels' + required: false + type: boolean + default: false + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 jobs: - build: + # ============================================ + # Linting Jobs + # ============================================ + lint-rust: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@v4 + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + + - name: Cache Cargo + uses: Swatinem/rust-cache@v2 + + - name: Check formatting + run: cargo fmt --all -- --check + +# TODO(tsaucer) clippy isn't fast so maybe this is in the test section? +# - name: Run Clippy +# run: cargo clippy --all-targets --all-features -- -D warnings + + lint-python: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Python - uses: actions/setup-python@v6 + uses: actions/setup-python@v5 with: python-version: "3.12" - - uses: astral-sh/setup-uv@v7 + - uses: astral-sh/setup-uv@v6 with: - enable-cache: true + enable-cache: true - # Use the --no-install-package to only install the dependencies - # but do not yet build the rust library - name: Install dependencies run: uv sync --dev --no-install-package datafusion - # Update output format to enable automatic inline annotations. - name: Run Ruff run: | uv run --no-project ruff check --output-format=github python/ @@ -50,26 +85,147 @@ jobs: - name: Run codespell run: | - uv run --no-project codespell --toml pyproject.toml + uv run --no-project codespell --toml pyproject.toml + + lint-toml: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install taplo + run: | + curl -fsSL https://github.com/tamasfe/taplo/releases/latest/download/taplo-full-linux-x86_64.gz \ + | gunzip > /usr/local/bin/taplo + chmod +x /usr/local/bin/taplo + + - name: Check TOML formatting + run: taplo fmt --check generate-license: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - uses: astral-sh/setup-uv@v7 + - uses: actions/checkout@v4 + + - uses: astral-sh/setup-uv@v6 with: - enable-cache: true + enable-cache: true - name: Generate license file run: uv run --no-project python ./dev/create_license.py + - uses: actions/upload-artifact@v6 with: name: python-wheel-license path: LICENSE.txt + # ============================================ + # Build - Linux x86_64 + # ============================================ + build-manylinux-x86_64: + needs: [generate-license, lint-rust, lint-python] + name: ManyLinux x86_64 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v7 + with: + name: python-wheel-license + path: . + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache Cargo + uses: Swatinem/rust-cache@v2 + with: + key: ${{ inputs.build_mode }} + + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - uses: astral-sh/setup-uv@v6 + with: + enable-cache: true + + - name: Install dependencies + run: uv sync --dev --no-install-package datafusion + + - name: Build (${{ inputs.build_mode }} mode) + run: | + if [[ "${{ inputs.build_mode }}" == "release" ]]; then + uv run --no-project maturin build --release --strip --manylinux 2014 --features substrait + else + uv run --no-project maturin build --manylinux 2014 --features substrait + fi + + - name: Archive wheels + uses: actions/upload-artifact@v6 + with: + name: dist-manylinux-x86_64 + path: target/wheels/* + + # ============================================ + # Build - Linux ARM64 + # ============================================ + build-manylinux-aarch64: + needs: [generate-license, lint-rust, lint-python] + name: ManyLinux ARM64 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v7 + with: + name: python-wheel-license + path: . + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache Cargo + uses: Swatinem/rust-cache@v2 + with: + key: ${{ inputs.build_mode }} + +# - name: Install Python +# uses: actions/setup-python@v5 +# with: +# python-version: ${{ matrix.python-version }} + + - uses: astral-sh/setup-uv@v6 + with: + enable-cache: true + + - name: Install dependencies + run: uv sync --dev --no-install-package datafusion + + - name: Build (${{ inputs.build_mode }} mode) + run: | + if [[ "${{ inputs.build_mode }}" == "release" ]]; then + uv run --no-project maturin build --release --strip --manylinux 2_28 --features substrait + else + uv run --no-project maturin build --manylinux 2_28 --features substrait + fi + + - name: Archive wheels + uses: actions/upload-artifact@v6 + with: + name: dist-manylinux-aarch64 + path: target/wheels/* + + # ============================================ + # Build - macOS ARM64 / Windows + # ============================================ build-python-mac-win: - needs: [generate-license] - name: Mac/Win + needs: [generate-license, lint-rust, lint-python] + name: macOS ARM64 / Windows runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -92,20 +248,26 @@ jobs: name: python-wheel-license path: . - - name: Install Protoc - uses: arduino/setup-protoc@v3 + - name: Cache Cargo + uses: Swatinem/rust-cache@v2 with: - version: "27.4" - repo-token: ${{ secrets.GITHUB_TOKEN }} + key: ${{ inputs.build_mode }} - uses: astral-sh/setup-uv@v7 with: - enable-cache: true + enable-cache: true - - name: Build Python package + # TODO(tsaucer) is this necessary if build doesn't run pytest? + - name: Install dependencies + run: uv sync --dev --no-install-package datafusion + + - name: Build Python package (${{ inputs.build_mode }} mode) run: | - uv sync --dev --no-install-package datafusion - uv run --no-project maturin build --release --strip --features substrait + if [[ "${{ inputs.build_mode }}" == "release" ]]; then + uv run --no-project maturin build --release --strip --features substrait + else + uv run --no-project maturin build --features substrait + fi - name: List Windows wheels if: matrix.os == 'windows-latest' @@ -124,9 +286,12 @@ jobs: name: dist-${{ matrix.os }} path: target/wheels/* + # ============================================ + # Build - macOS x86_64 (release only) + # ============================================ build-macos-x86_64: - needs: [generate-license] - name: Mac x86_64 + if: inputs.build_mode == 'release' + needs: [generate-license, lint-rust, lint-python] runs-on: macos-15-intel strategy: fail-fast: false @@ -148,19 +313,20 @@ jobs: name: python-wheel-license path: . - - name: Install Protoc - uses: arduino/setup-protoc@v3 + - name: Cache Cargo + uses: Swatinem/rust-cache@v2 with: - version: "27.4" - repo-token: ${{ secrets.GITHUB_TOKEN }} + key: ${{ inputs.build_mode }} - uses: astral-sh/setup-uv@v7 with: - enable-cache: true + enable-cache: true - - name: Build Python package + - name: Install dependencies + run: uv sync --dev --no-install-package datafusion + + - name: Build (release mode) run: | - uv sync --dev --no-install-package datafusion uv run --no-project maturin build --release --strip --features substrait - name: List Mac wheels @@ -172,68 +338,14 @@ jobs: name: dist-macos-aarch64 path: target/wheels/* - build-manylinux-x86_64: - needs: [generate-license] - name: Manylinux x86_64 - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - run: rm LICENSE.txt - - name: Download LICENSE.txt - uses: actions/download-artifact@v7 - with: - name: python-wheel-license - path: . - - run: cat LICENSE.txt - - name: Build wheels - uses: PyO3/maturin-action@v1 - env: - RUST_BACKTRACE: 1 - with: - rust-toolchain: nightly - target: x86_64 - manylinux: auto - rustup-components: rust-std rustfmt # Keep them in one line due to https://github.com/PyO3/maturin-action/issues/153 - args: --release --manylinux 2014 --features protoc,substrait - - name: Archive wheels - uses: actions/upload-artifact@v6 - with: - name: dist-manylinux-x86_64 - path: target/wheels/* - - build-manylinux-aarch64: - needs: [generate-license] - name: Manylinux arm64 - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - run: rm LICENSE.txt - - name: Download LICENSE.txt - uses: actions/download-artifact@v7 - with: - name: python-wheel-license - path: . - - run: cat LICENSE.txt - - name: Build wheels - uses: PyO3/maturin-action@v1 - env: - RUST_BACKTRACE: 1 - with: - rust-toolchain: nightly - target: aarch64 - # Use manylinux_2_28-cross because the manylinux2014-cross has GCC 4.8.5, which causes the build to fail - manylinux: 2_28 - rustup-components: rust-std rustfmt # Keep them in one line due to https://github.com/PyO3/maturin-action/issues/153 - args: --release --features protoc,substrait - - name: Archive wheels - uses: actions/upload-artifact@v6 - with: - name: dist-manylinux-aarch64 - path: target/wheels/* + # ============================================ + # Build - Source Distribution + # ============================================ build-sdist: needs: [generate-license] name: Source distribution + if: inputs.build_mode == 'release' runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -253,16 +365,22 @@ jobs: args: --release --sdist --out dist --features protoc,substrait - name: Assert sdist build does not generate wheels run: | - if [ "$(ls -A target/wheels)" ]; then - echo "Error: Sdist build generated wheels" - exit 1 - else - echo "Directory is clean" - fi + if [ "$(ls -A target/wheels)" ]; then + echo "Error: Sdist build generated wheels" + exit 1 + else + echo "Directory is clean" + fi shell: bash - + + # ============================================ + # Build - Source Distribution + # ============================================ + merge-build-artifacts: runs-on: ubuntu-latest + name: Merge build artifacts + if: inputs.build_mode == 'release' needs: - build-python-mac-win - build-macos-x86_64 @@ -276,6 +394,9 @@ jobs: name: dist pattern: dist-* + # ============================================ + # Build - Documentation + # ============================================ # Documentation build job that runs after wheels are built build-docs: name: Build docs @@ -368,16 +489,3 @@ jobs: git commit -m 'Publish built docs triggered by ${{ github.sha }}' git push || git push --force fi - - # NOTE: PyPI publish needs to be done manually for now after release passed the vote - # release: - # name: Publish in PyPI - # needs: [build-manylinux, build-python-mac-win] - # runs-on: ubuntu-latest - # steps: - # - uses: actions/download-artifact@v7 - # - name: Publish to PyPI - # uses: pypa/gh-action-pypi-publish@master - # with: - # user: __token__ - # password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..7b1f23f09 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# CI workflow for pull requests - runs tests in DEBUG mode for faster feedback + +name: CI + +on: + pull_request: + branches: ["main"] + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +jobs: + test: + uses: ./.github/workflows/test.yml + with: + build_mode: debug + run_wheels: false + secrets: inherit diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 000000000..66f374e34 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Release workflow - runs tests in RELEASE mode and builds distribution wheels +# Triggered on: +# - Merges to main +# - Release candidate tags (*-rc*) +# - Release tags (e.g., 45.0.0) +# - Release branches (branch-*) + +name: Release Build + +on: + push: + branches: + - "main" + - "branch-*" # Release branches + tags: + - "*-rc*" # Release candidates (e.g., 45.0.0-rc1) + - "[0-9]+.*" # Release tags (e.g., 45.0.0) + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +jobs: + test-and-build: + uses: ./.github/workflows/test.yml + with: + build_mode: release + run_wheels: true + secrets: inherit From 2917c6bc7758d5ab97f91e0dd43867d92726ccae Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 09:17:18 -0500 Subject: [PATCH 02/22] Update paths --- .github/workflows/build.yml | 4 ++-- .github/workflows/ci.yml | 2 +- .github/workflows/release.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f7bab6899..3a2b2f927 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,10 +15,10 @@ # specific language governing permissions and limitations # under the License. -# Reusable workflow for running tests +# Reusable workflow for running building # This ensures the same tests run for both debug (PRs) and release (main/tags) builds -name: Test +name: Build on: workflow_call: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7b1f23f09..982d1c644 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,7 @@ concurrency: jobs: test: - uses: ./.github/workflows/test.yml + uses: ./.github/workflows/build.yml with: build_mode: debug run_wheels: false diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 66f374e34..8804323d2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -39,7 +39,7 @@ concurrency: jobs: test-and-build: - uses: ./.github/workflows/test.yml + uses: ./.github/workflows/build.yml with: build_mode: release run_wheels: true From 7f4e1f2b8606754dcc8e5924d7ff940d1340cb73 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 09:21:28 -0500 Subject: [PATCH 03/22] Change install command for taplo --- .github/workflows/build.yml | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3a2b2f927..4e5357389 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,7 +44,7 @@ jobs: lint-rust: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Setup Rust uses: dtolnay/rust-toolchain@stable @@ -64,7 +64,7 @@ jobs: lint-python: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install Python uses: actions/setup-python@v5 @@ -90,21 +90,19 @@ jobs: lint-toml: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install taplo - run: | - curl -fsSL https://github.com/tamasfe/taplo/releases/latest/download/taplo-full-linux-x86_64.gz \ - | gunzip > /usr/local/bin/taplo - chmod +x /usr/local/bin/taplo + run: cargo +stable install taplo-cli --version ^0.9 --locked - - name: Check TOML formatting - run: taplo fmt --check + # if you encounter an error, try running 'taplo format' to fix the formatting automatically. + - name: Check Cargo.toml formatting + run: taplo format --check generate-license: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: astral-sh/setup-uv@v6 with: From 34e81a356d83a63d0e681b99dff7cfc1e893dbec Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 09:23:59 -0500 Subject: [PATCH 04/22] install protoc --- .github/workflows/build.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4e5357389..2cbd1830f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -255,6 +255,12 @@ jobs: with: enable-cache: true + - name: Install Protoc + uses: arduino/setup-protoc@v3 + with: + version: "27.4" + repo-token: ${{ secrets.GITHUB_TOKEN }} + # TODO(tsaucer) is this necessary if build doesn't run pytest? - name: Install dependencies run: uv sync --dev --no-install-package datafusion @@ -320,6 +326,12 @@ jobs: with: enable-cache: true + - name: Install Protoc + uses: arduino/setup-protoc@v3 + with: + version: "27.4" + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Install dependencies run: uv sync --dev --no-install-package datafusion From 7ffe1455472061e1af9643b4b5cf2f076338d9e1 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 09:55:40 -0500 Subject: [PATCH 05/22] taplo fmt --- .cargo/config.toml | 11 +- Cargo.toml | 38 ++-- .../datafusion-ffi-example/.cargo/config.toml | 11 +- examples/datafusion-ffi-example/Cargo.toml | 16 +- .../datafusion-ffi-example/pyproject.toml | 6 +- pyproject.toml | 195 +++++++++++------- 6 files changed, 151 insertions(+), 126 deletions(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index 91a099a61..af951327f 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,12 +1,5 @@ [target.x86_64-apple-darwin] -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] +rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"] [target.aarch64-apple-darwin] -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] - +rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"] diff --git a/Cargo.toml b/Cargo.toml index af2ffb012..f4e8575c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,13 +27,13 @@ license = "Apache-2.0" edition = "2021" rust-version = "1.78" include = [ - "/src", - "/datafusion", - "/LICENSE.txt", - "build.rs", - "pyproject.toml", - "Cargo.toml", - "Cargo.lock", + "/src", + "/datafusion", + "/LICENSE.txt", + "build.rs", + "pyproject.toml", + "Cargo.toml", + "Cargo.lock", ] [features] @@ -43,15 +43,15 @@ substrait = ["dep:datafusion-substrait"] [dependencies] tokio = { version = "1.47", features = [ - "macros", - "rt", - "rt-multi-thread", - "sync", + "macros", + "rt", + "rt-multi-thread", + "sync", ] } pyo3 = { version = "0.26", features = [ - "extension-module", - "abi3", - "abi3-py310", + "extension-module", + "abi3", + "abi3-py310", ] } pyo3-async-runtimes = { version = "0.26", features = ["tokio-runtime"] } pyo3-log = "0.13.2" @@ -64,16 +64,16 @@ datafusion-ffi = { version = "52" } prost = "0.14.1" # keep in line with `datafusion-substrait` uuid = { version = "1.18", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = [ - "local_dynamic_tls", + "local_dynamic_tls", ] } async-trait = "0.1.89" futures = "0.3" cstr = "0.2" object_store = { version = "0.12.4", features = [ - "aws", - "gcp", - "azure", - "http", + "aws", + "gcp", + "azure", + "http", ] } url = "2" log = "0.4.27" diff --git a/examples/datafusion-ffi-example/.cargo/config.toml b/examples/datafusion-ffi-example/.cargo/config.toml index 91a099a61..af951327f 100644 --- a/examples/datafusion-ffi-example/.cargo/config.toml +++ b/examples/datafusion-ffi-example/.cargo/config.toml @@ -1,12 +1,5 @@ [target.x86_64-apple-darwin] -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] +rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"] [target.aarch64-apple-darwin] -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] - +rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"] diff --git a/examples/datafusion-ffi-example/Cargo.toml b/examples/datafusion-ffi-example/Cargo.toml index e6708fce8..d54add58b 100644 --- a/examples/datafusion-ffi-example/Cargo.toml +++ b/examples/datafusion-ffi-example/Cargo.toml @@ -21,14 +21,18 @@ version = "0.2.0" edition = "2021" [dependencies] -datafusion-catalog = { version = "52" , default-features = false } -datafusion-common = { version = "52" , default-features = false } -datafusion-functions-aggregate = { version = "52" } -datafusion-functions-window = { version = "52" } -datafusion-expr = { version = "52" } +datafusion-catalog = { version = "52", default-features = false } +datafusion-common = { version = "52", default-features = false } +datafusion-functions-aggregate = { version = "52" } +datafusion-functions-window = { version = "52" } +datafusion-expr = { version = "52" } datafusion-ffi = { version = "52" } -pyo3 = { version = "0.26", features = ["extension-module", "abi3", "abi3-py39"] } +pyo3 = { version = "0.26", features = [ + "extension-module", + "abi3", + "abi3-py39", +] } arrow = { version = "57" } arrow-array = { version = "57" } arrow-schema = { version = "57" } diff --git a/examples/datafusion-ffi-example/pyproject.toml b/examples/datafusion-ffi-example/pyproject.toml index 0c54df95c..7f85e9487 100644 --- a/examples/datafusion-ffi-example/pyproject.toml +++ b/examples/datafusion-ffi-example/pyproject.toml @@ -23,9 +23,9 @@ build-backend = "maturin" name = "datafusion_ffi_example" requires-python = ">=3.9" classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", ] dynamic = ["version"] diff --git a/pyproject.toml b/pyproject.toml index 497943a34..d315dbe19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,26 +27,26 @@ license = { file = "LICENSE.txt" } requires-python = ">=3.10" keywords = ["datafusion", "dataframe", "rust", "query-engine"] classifiers = [ - "Development Status :: 2 - Pre-Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "License :: OSI Approved", - "Operating System :: MacOS", - "Operating System :: Microsoft :: Windows", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", - "Programming Language :: Python", - "Programming Language :: Rust", + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python", + "Programming Language :: Rust", ] dependencies = [ - "pyarrow>=16.0.0;python_version<'3.14'", - "pyarrow>=22.0.0;python_version>='3.14'", - "typing-extensions;python_version<'3.13'" + "pyarrow>=16.0.0;python_version<'3.14'", + "pyarrow>=22.0.0;python_version>='3.14'", + "typing-extensions;python_version<'3.13'", ] dynamic = ["version"] @@ -73,23 +73,23 @@ asyncio_default_fixture_loop_scope = "function" # Enable docstring linting using the google style guide [tool.ruff.lint] -select = ["ALL" ] +select = ["ALL"] ignore = [ - "A001", # Allow using words like min as variable names - "A002", # Allow using words like filter as variable names - "ANN401", # Allow Any for wrapper classes - "COM812", # Recommended to ignore these rules when using with ruff-format - "FIX002", # Allow TODO lines - consider removing at some point - "FBT001", # Allow boolean positional args - "FBT002", # Allow boolean positional args - "ISC001", # Recommended to ignore these rules when using with ruff-format - "SLF001", # Allow accessing private members - "TD002", # Do not require author names in TODO statements - "TD003", # Allow TODO lines - "PLR0913", # Allow many arguments in function definition - "PD901", # Allow variable name df - "N812", # Allow importing functions as `F` - "A005", # Allow module named io + "A001", # Allow using words like min as variable names + "A002", # Allow using words like filter as variable names + "ANN401", # Allow Any for wrapper classes + "COM812", # Recommended to ignore these rules when using with ruff-format + "FIX002", # Allow TODO lines - consider removing at some point + "FBT001", # Allow boolean positional args + "FBT002", # Allow boolean positional args + "ISC001", # Recommended to ignore these rules when using with ruff-format + "SLF001", # Allow accessing private members + "TD002", # Do not require author names in TODO statements + "TD003", # Allow TODO lines + "PLR0913", # Allow many arguments in function definition + "PD901", # Allow variable name df + "N812", # Allow importing functions as `F` + "A005", # Allow module named io ] [tool.ruff.lint.pydocstyle] @@ -104,61 +104,96 @@ extend-allowed-calls = ["lit", "datafusion.lit"] # Disable docstring checking for these directories [tool.ruff.lint.per-file-ignores] "python/tests/*" = [ - "ANN", - "ARG", - "BLE001", - "D", - "S101", - "SLF", - "PD", - "PLR2004", - "PT011", - "RUF015", - "S608", - "PLR0913", - "PT004", + "ANN", + "ARG", + "BLE001", + "D", + "S101", + "SLF", + "PD", + "PLR2004", + "PT011", + "RUF015", + "S608", + "PLR0913", + "PT004", +] +"examples/*" = [ + "D", + "W505", + "E501", + "T201", + "S101", + "PLR2004", + "ANN001", + "ANN202", + "INP001", + "DTZ007", + "RUF015", +] +"dev/*" = [ + "D", + "E", + "T", + "S", + "PLR", + "C", + "SIM", + "UP", + "EXE", + "N817", + "ERA001", + "ANN001", +] +"benchmarks/*" = [ + "D", + "F", + "T", + "BLE", + "FURB", + "PLR", + "E", + "TD", + "TRY", + "S", + "SIM", + "EXE", + "UP", + "ERA001", + "ANN001", + "INP001", ] -"examples/*" = ["D", "W505", "E501", "T201", "S101", "PLR2004", "ANN001", "ANN202", "INP001", "DTZ007", "RUF015"] -"dev/*" = ["D", "E", "T", "S", "PLR", "C", "SIM", "UP", "EXE", "N817", "ERA001", "ANN001"] -"benchmarks/*" = ["D", "F", "T", "BLE", "FURB", "PLR", "E", "TD", "TRY", "S", "SIM", "EXE", "UP", "ERA001", "ANN001", "INP001"] "docs/*" = ["D"] "docs/source/conf.py" = ["ERA001", "ANN001", "INP001"] [tool.codespell] -skip = [ - "./target", - "uv.lock", - "./python/tests/test_functions.py" -] +skip = ["./target", "uv.lock", "./python/tests/test_functions.py"] count = true -ignore-words-list = [ - "ans", - "IST" -] +ignore-words-list = ["ans", "IST"] [dependency-groups] dev = [ - "maturin>=1.8.1", - "numpy>1.25.0;python_version<'3.14'", - "numpy>=2.3.2;python_version>='3.14'", - "pyarrow>=19.0.0", - "pre-commit>=4.3.0", - "pyyaml>=6.0.3", - "pytest>=7.4.4", - "pytest-asyncio>=0.23.3", - "ruff>=0.9.1", - "toml>=0.10.2", - "pygithub==2.5.0", - "codespell==2.4.1", + "maturin>=1.8.1", + "numpy>1.25.0;python_version<'3.14'", + "numpy>=2.3.2;python_version>='3.14'", + "pyarrow>=19.0.0", + "pre-commit>=4.3.0", + "pyyaml>=6.0.3", + "pytest>=7.4.4", + "pytest-asyncio>=0.23.3", + "ruff>=0.9.1", + "toml>=0.10.2", + "pygithub==2.5.0", + "codespell==2.4.1", ] docs = [ - "sphinx>=7.1.2", - "pydata-sphinx-theme==0.8.0", - "myst-parser>=3.0.1", - "jinja2>=3.1.5", - "ipython>=8.12.3", - "pandas>=2.0.3", - "pickleshare>=0.7.5", - "sphinx-autoapi>=3.4.0", - "setuptools>=75.3.0", + "sphinx>=7.1.2", + "pydata-sphinx-theme==0.8.0", + "myst-parser>=3.0.1", + "jinja2>=3.1.5", + "ipython>=8.12.3", + "pandas>=2.0.3", + "pickleshare>=0.7.5", + "sphinx-autoapi>=3.4.0", + "setuptools>=75.3.0", ] From 1de00cc4c0cf55811d530490c65b2e1e0eeee720 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 10:02:59 -0500 Subject: [PATCH 06/22] Working through CI build issues --- .github/workflows/build.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2cbd1830f..001487790 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -156,9 +156,9 @@ jobs: - name: Build (${{ inputs.build_mode }} mode) run: | if [[ "${{ inputs.build_mode }}" == "release" ]]; then - uv run --no-project maturin build --release --strip --manylinux 2014 --features substrait + uv run --no-project maturin build --release --strip --manylinux 2014 --features protoc,substrait else - uv run --no-project maturin build --manylinux 2014 --features substrait + uv run --no-project maturin build --manylinux 2014 --features protoc,substrait fi - name: Archive wheels @@ -207,9 +207,9 @@ jobs: - name: Build (${{ inputs.build_mode }} mode) run: | if [[ "${{ inputs.build_mode }}" == "release" ]]; then - uv run --no-project maturin build --release --strip --manylinux 2_28 --features substrait + uv run --no-project maturin build --release --strip --manylinux 2_28 --features protoc,substrait else - uv run --no-project maturin build --manylinux 2_28 --features substrait + uv run --no-project maturin build --manylinux 2_28 --features protoc,substrait fi - name: Archive wheels @@ -265,13 +265,13 @@ jobs: - name: Install dependencies run: uv sync --dev --no-install-package datafusion - - name: Build Python package (${{ inputs.build_mode }} mode) - run: | - if [[ "${{ inputs.build_mode }}" == "release" ]]; then - uv run --no-project maturin build --release --strip --features substrait - else - uv run --no-project maturin build --features substrait - fi + - name: Build Python package (release mode) + if: inputs.build_mode == 'release' + run: uv run --no-project maturin build --release --strip --features substrait + + - name: Build Python package (debug mode) + if: inputs.build_mode != 'release' + run: uv run --no-project maturin build --features substrait - name: List Windows wheels if: matrix.os == 'windows-latest' From 49f949e14b9ada1cdf3ac0659edaa71a1e9ace24 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 10:18:50 -0500 Subject: [PATCH 07/22] More CI issues --- .github/workflows/build.yml | 57 +++++++++++++++++++++++++++-------- .github/workflows/ci.yml | 2 +- .github/workflows/release.yml | 2 +- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 001487790..15a2e936a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -49,7 +49,7 @@ jobs: - name: Setup Rust uses: dtolnay/rust-toolchain@stable with: - components: rustfmt, clippy + components: rustfmt - name: Cache Cargo uses: Swatinem/rust-cache@v2 @@ -57,10 +57,6 @@ jobs: - name: Check formatting run: cargo fmt --all -- --check -# TODO(tsaucer) clippy isn't fast so maybe this is in the test section? -# - name: Run Clippy -# run: cargo clippy --all-targets --all-features -- -D warnings - lint-python: runs-on: ubuntu-latest steps: @@ -152,6 +148,9 @@ jobs: - name: Install dependencies run: uv sync --dev --no-install-package datafusion + + - name: Run Clippy + run: cargo clippy --all-targets --features protoc,substrait -- -D warnings - name: Build (${{ inputs.build_mode }} mode) run: | @@ -161,6 +160,26 @@ jobs: uv run --no-project maturin build --manylinux 2014 --features protoc,substrait fi + - name: Build (release mode) + uses: PyO3/maturin-action@v1 + if: inputs.build_mode == 'release' + with: + target: x86_64-unknown-linux-gnu + manylinux: "2_17" + # protoc feature uses bundled protoc, avoiding system dependency issues + args: --release --strip --features protoc,substrait --out dist + rustup-components: rust-std + + - name: Build (debug mode) + uses: PyO3/maturin-action@v1 + if: inputs.build_mode == 'debug' + with: + target: x86_64-unknown-linux-gnu + manylinux: "2_17" + # protoc feature uses bundled protoc, avoiding system dependency issues + args: --features protoc,substrait --out dist + rustup-components: rust-std + - name: Archive wheels uses: actions/upload-artifact@v6 with: @@ -204,13 +223,25 @@ jobs: - name: Install dependencies run: uv sync --dev --no-install-package datafusion - - name: Build (${{ inputs.build_mode }} mode) - run: | - if [[ "${{ inputs.build_mode }}" == "release" ]]; then - uv run --no-project maturin build --release --strip --manylinux 2_28 --features protoc,substrait - else - uv run --no-project maturin build --manylinux 2_28 --features protoc,substrait - fi + - name: Build (release mode) + uses: PyO3/maturin-action@v1 + if: inputs.build_mode == 'release' + with: + target: aarch64-unknown-linux-gnu + # aarch64 cross-compilation needs manylinux_2_28 + manylinux: "2_28" + args: --release --strip --features protoc,substrait --out dist + rustup-components: rust-std + + - name: Build (debug mode) + uses: PyO3/maturin-action@v1 + if: inputs.build_mode == 'debug' + with: + target: aarch64-unknown-linux-gnu + # aarch64 cross-compilation needs manylinux_2_28 + manylinux: "2_28" + args: --features protoc,substrait --out dist + rustup-components: rust-std - name: Archive wheels uses: actions/upload-artifact@v6 @@ -223,7 +254,7 @@ jobs: # ============================================ build-python-mac-win: needs: [generate-license, lint-rust, lint-python] - name: macOS ARM64 / Windows + name: macOS ARM64 & Windows runs-on: ${{ matrix.os }} strategy: fail-fast: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 982d1c644..b0863eebd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ concurrency: cancel-in-progress: true jobs: - test: + build: uses: ./.github/workflows/build.yml with: build_mode: debug diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8804323d2..1e5a45005 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -38,7 +38,7 @@ concurrency: cancel-in-progress: true jobs: - test-and-build: + build: uses: ./.github/workflows/build.yml with: build_mode: release From bf0bb97e39b8a4b9d423c06cc33d5cf2ea443b9c Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 10:22:50 -0500 Subject: [PATCH 08/22] do not build taplo, just download it --- .github/workflows/build.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 15a2e936a..f3933ea6b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -89,7 +89,9 @@ jobs: - uses: actions/checkout@v6 - name: Install taplo - run: cargo +stable install taplo-cli --version ^0.9 --locked + uses: taiki-e/install-action@v2 + with: + tool: taplo-cli # if you encounter an error, try running 'taplo format' to fix the formatting automatically. - name: Check Cargo.toml formatting @@ -148,7 +150,7 @@ jobs: - name: Install dependencies run: uv sync --dev --no-install-package datafusion - + - name: Run Clippy run: cargo clippy --all-targets --features protoc,substrait -- -D warnings From a7ed2f60f0fbc24bac637b0ddc2a7015636eadfa Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 10:34:44 -0500 Subject: [PATCH 09/22] Try only running clippy when we can reuse build artifacts --- .github/workflows/build.yml | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f3933ea6b..ce3e825ba 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -139,11 +139,6 @@ jobs: with: key: ${{ inputs.build_mode }} - - name: Install Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - uses: astral-sh/setup-uv@v6 with: enable-cache: true @@ -151,17 +146,6 @@ jobs: - name: Install dependencies run: uv sync --dev --no-install-package datafusion - - name: Run Clippy - run: cargo clippy --all-targets --features protoc,substrait -- -D warnings - - - name: Build (${{ inputs.build_mode }} mode) - run: | - if [[ "${{ inputs.build_mode }}" == "release" ]]; then - uv run --no-project maturin build --release --strip --manylinux 2014 --features protoc,substrait - else - uv run --no-project maturin build --manylinux 2014 --features protoc,substrait - fi - - name: Build (release mode) uses: PyO3/maturin-action@v1 if: inputs.build_mode == 'release' @@ -213,11 +197,6 @@ jobs: with: key: ${{ inputs.build_mode }} -# - name: Install Python -# uses: actions/setup-python@v5 -# with: -# python-version: ${{ matrix.python-version }} - - uses: astral-sh/setup-uv@v6 with: enable-cache: true @@ -298,6 +277,13 @@ jobs: - name: Install dependencies run: uv sync --dev --no-install-package datafusion + # Run clippy BEFORE maturin so we can avoid rebuilding. The features must match + # exactly the features used by maturin. Linux maturin builds need to happen in a + # container so only run this for our mac runner. + - name: Run Clippy + if: matrix.os != 'windows-latest' + run: cargo clippy --no-deps --all-targets --features protoc,substrait -- -D warnings + - name: Build Python package (release mode) if: inputs.build_mode == 'release' run: uv run --no-project maturin build --release --strip --features substrait From 1d8949d634fc1ac9ad06b89c2998455577e0e07b Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 10:35:36 -0500 Subject: [PATCH 10/22] Try removing unnecessary installs during build --- .github/workflows/build.yml | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ce3e825ba..83db493da 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -143,9 +143,6 @@ jobs: with: enable-cache: true - - name: Install dependencies - run: uv sync --dev --no-install-package datafusion - - name: Build (release mode) uses: PyO3/maturin-action@v1 if: inputs.build_mode == 'release' @@ -201,9 +198,6 @@ jobs: with: enable-cache: true - - name: Install dependencies - run: uv sync --dev --no-install-package datafusion - - name: Build (release mode) uses: PyO3/maturin-action@v1 if: inputs.build_mode == 'release' @@ -245,10 +239,6 @@ jobs: steps: - uses: actions/checkout@v6 - - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - - uses: dtolnay/rust-toolchain@stable - run: rm LICENSE.txt @@ -273,10 +263,6 @@ jobs: version: "27.4" repo-token: ${{ secrets.GITHUB_TOKEN }} - # TODO(tsaucer) is this necessary if build doesn't run pytest? - - name: Install dependencies - run: uv sync --dev --no-install-package datafusion - # Run clippy BEFORE maturin so we can avoid rebuilding. The features must match # exactly the features used by maturin. Linux maturin builds need to happen in a # container so only run this for our mac runner. @@ -323,10 +309,6 @@ jobs: steps: - uses: actions/checkout@v6 - - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - - uses: dtolnay/rust-toolchain@stable - run: rm LICENSE.txt @@ -351,9 +333,6 @@ jobs: version: "27.4" repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Install dependencies - run: uv sync --dev --no-install-package datafusion - - name: Build (release mode) run: | uv run --no-project maturin build --release --strip --features substrait From fe57e0d6c3775b8c140a13851fef5a45ec632d3a Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 10:39:30 -0500 Subject: [PATCH 11/22] Don't build cargo-license --- .github/workflows/build.yml | 5 +++++ dev/create_license.py | 4 +--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 83db493da..91f2d4ed5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -106,6 +106,11 @@ jobs: with: enable-cache: true + - name: Install cargo-license + uses: taiki-e/install-action@v2 + with: + tool: cargo-license + - name: Generate license file run: uv run --no-project python ./dev/create_license.py diff --git a/dev/create_license.py b/dev/create_license.py index a28a0abec..acbf8587c 100644 --- a/dev/create_license.py +++ b/dev/create_license.py @@ -22,11 +22,9 @@ import subprocess from pathlib import Path -subprocess.check_output(["cargo", "install", "cargo-license"]) data = subprocess.check_output( [ - "cargo", - "license", + "cargo-license", "--avoid-build-deps", "--avoid-dev-deps", "--do-not-bundle", From 632bd8aa91e826e48365544a528671e0857835b1 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 10:40:56 -0500 Subject: [PATCH 12/22] Add back in uv sync so we can run maturin --- .github/workflows/build.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 91f2d4ed5..b85c4c255 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -268,6 +268,9 @@ jobs: version: "27.4" repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Install dependencies + run: uv sync --dev --no-install-package datafusion + # Run clippy BEFORE maturin so we can avoid rebuilding. The features must match # exactly the features used by maturin. Linux maturin builds need to happen in a # container so only run this for our mac runner. @@ -338,6 +341,9 @@ jobs: version: "27.4" repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Install dependencies + run: uv sync --dev --no-install-package datafusion + - name: Build (release mode) run: | uv run --no-project maturin build --release --strip --features substrait From b3e905a38e06cf914cebfcd86ed7248328dcf501 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 10:45:03 -0500 Subject: [PATCH 13/22] minor: name casing --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b85c4c255..9fd2fb8e3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -179,7 +179,7 @@ jobs: # ============================================ build-manylinux-aarch64: needs: [generate-license, lint-rust, lint-python] - name: ManyLinux ARM64 + name: ManyLinux arm64 runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -230,11 +230,11 @@ jobs: path: target/wheels/* # ============================================ - # Build - macOS ARM64 / Windows + # Build - macOS arm64 / Windows # ============================================ build-python-mac-win: needs: [generate-license, lint-rust, lint-python] - name: macOS ARM64 & Windows + name: macOS arm64 & Windows runs-on: ${{ matrix.os }} strategy: fail-fast: false From 40970038e17fab105110b1ec0f7878e339d213ea Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 11:36:49 -0500 Subject: [PATCH 14/22] Fix path for wheels --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9fd2fb8e3..252ce1f6e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -172,7 +172,7 @@ jobs: uses: actions/upload-artifact@v6 with: name: dist-manylinux-x86_64 - path: target/wheels/* + path: dist/* # ============================================ # Build - Linux ARM64 @@ -227,7 +227,7 @@ jobs: uses: actions/upload-artifact@v6 with: name: dist-manylinux-aarch64 - path: target/wheels/* + path: dist/* # ============================================ # Build - macOS arm64 / Windows From 6f0d76be99d82be68cb73c867c6058c8bdd5e801 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 8 Feb 2026 11:40:47 -0500 Subject: [PATCH 15/22] More CI updates, but expect pytest to fail until we switch to downloading the wheel artifacts from build stage --- .github/workflows/build.yml | 3 ++- .github/workflows/test.yaml | 26 +++----------------------- 2 files changed, 5 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 252ce1f6e..d63b49ebd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -49,13 +49,14 @@ jobs: - name: Setup Rust uses: dtolnay/rust-toolchain@stable with: + toolchain: "nightly" components: rustfmt - name: Cache Cargo uses: Swatinem/rust-cache@v2 - name: Check formatting - run: cargo fmt --all -- --check + run: cargo +nightly fmt --all -- --check lint-python: runs-on: ubuntu-latest diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index df4d8fcda..91ed7c486 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -59,8 +59,6 @@ jobs: - name: Setup Rust Toolchain uses: dtolnay/rust-toolchain@stable id: rust-toolchain - with: - components: clippy,rustfmt - name: Install Protoc uses: arduino/setup-protoc@v3 @@ -79,22 +77,20 @@ jobs: path: ~/.cargo key: cargo-cache-${{ steps.rust-toolchain.outputs.cachekey }}-${{ hashFiles('Cargo.lock') }} - - name: Run Clippy - if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} - run: cargo clippy --all-targets --all-features -- -D clippy::all -D warnings -A clippy::redundant_closure - - name: Install dependencies and build uses: astral-sh/setup-uv@v7 with: enable-cache: true + # TODO: Download wheel and uv pip install + - name: Run tests env: RUST_BACKTRACE: 1 run: | git submodule update --init uv sync --dev --no-install-package datafusion - uv run --no-project maturin develop --uv +# uv run --no-project maturin develop --uv uv run --no-project pytest -v . - name: FFI unit tests @@ -121,19 +117,3 @@ jobs: cd examples/tpch uv run --no-project python convert_data_to_parquet.py uv run --no-project pytest _tests.py - - nightly-fmt: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v6 - - - name: Setup Rust Toolchain - uses: dtolnay/rust-toolchain@stable - id: rust-toolchain - with: - toolchain: "nightly" - components: clippy,rustfmt - - - name: Check Formatting - run: cargo +nightly fmt -- --check From 8e872d1258c29a75f5c4fe605355bcb729e37cd2 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Feb 2026 06:57:00 -0500 Subject: [PATCH 16/22] Correct error in yml file. Rename to match other file extensions --- .github/workflows/{test.yaml => test.yml} | 3 --- 1 file changed, 3 deletions(-) rename .github/workflows/{test.yaml => test.yml} (97%) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yml similarity index 97% rename from .github/workflows/test.yaml rename to .github/workflows/test.yml index 91ed7c486..1c23832e2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yml @@ -82,15 +82,12 @@ jobs: with: enable-cache: true - # TODO: Download wheel and uv pip install - - name: Run tests env: RUST_BACKTRACE: 1 run: | git submodule update --init uv sync --dev --no-install-package datafusion -# uv run --no-project maturin develop --uv uv run --no-project pytest -v . - name: FFI unit tests From c08a788b6a5cb0d3db76eb24ffd6332da9dbf412 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Feb 2026 07:28:22 -0500 Subject: [PATCH 17/22] Download wheel from build stage for testing --- .github/workflows/ci.yml | 5 ++++ .github/workflows/release.yml | 5 ++++ .github/workflows/test.yml | 43 +++++++++++++++++++++++++---------- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b0863eebd..ab284b522 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,3 +34,8 @@ jobs: build_mode: debug run_wheels: false secrets: inherit + + test: + needs: build + uses: ./.github/workflows/test.yml + secrets: inherit diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1e5a45005..f4d48c473 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -44,3 +44,8 @@ jobs: build_mode: release run_wheels: true secrets: inherit + + test: + needs: build + uses: ./.github/workflows/test.yml + secrets: inherit diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1c23832e2..4da0c7717 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,16 +15,13 @@ # specific language governing permissions and limitations # under the License. -name: Python test -on: - push: - branches: [main] - pull_request: - branches: [main] +# Reusable workflow for running tests +# This ensures the same tests run for both debug (PRs) and release (main/tags) builds + +name: Test -concurrency: - group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} - cancel-in-progress: true +on: + workflow_call: jobs: test-matrix: @@ -50,7 +47,7 @@ jobs: EXAMPLE_VERSION=$(grep -A 1 "name = \"datafusion-common\"" examples/datafusion-ffi-example/Cargo.lock | grep "version = " | head -1 | sed 's/.*version = "\(.*\)"/\1/') echo "Main crate datafusion version: $MAIN_VERSION" echo "FFI example datafusion version: $EXAMPLE_VERSION" - + if [ "$MAIN_VERSION" != "$EXAMPLE_VERSION" ]; then echo "❌ Error: FFI example datafusion versions don't match!" exit 1 @@ -77,17 +74,39 @@ jobs: path: ~/.cargo key: cargo-cache-${{ steps.rust-toolchain.outputs.cachekey }}-${{ hashFiles('Cargo.lock') }} - - name: Install dependencies and build + - name: Install dependencies uses: astral-sh/setup-uv@v7 with: enable-cache: true + # Download the Linux wheel built in the build workflow + - name: Download pre-built Linux wheel + uses: actions/download-artifact@v7 + with: + name: dist-manylinux-x86_64 + path: wheels/ + + # Install the pre-built wheel + - name: Install datafusion from pre-built wheel + run: | + set -x + # Install development dependencies + uv sync --dev --no-install-package datafusion + # Install the pre-built wheel + WHEEL=$(find wheels/ -name "*.whl" | head -1) + if [ -n "$WHEEL" ]; then + echo "Installing wheel: $WHEEL" + uv pip install "$WHEEL" + else + echo "ERROR: No wheel found!" + exit 1 + fi + - name: Run tests env: RUST_BACKTRACE: 1 run: | git submodule update --init - uv sync --dev --no-install-package datafusion uv run --no-project pytest -v . - name: FFI unit tests From b8e0e011f6cc69141e443ab23a0a68b151666290 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Feb 2026 08:09:16 -0500 Subject: [PATCH 18/22] For CI tests move into test directory to avoid picking up pyproject.toml file --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4da0c7717..330bbaaff 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -107,6 +107,7 @@ jobs: RUST_BACKTRACE: 1 run: | git submodule update --init + cd python/tests uv run --no-project pytest -v . - name: FFI unit tests From 0cc6642cede141bb5da54b6a5d5b73f7999ee21e Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Feb 2026 09:10:35 -0500 Subject: [PATCH 19/22] Do not upload artifacts not used in testing during debug builds --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d63b49ebd..b4fae0a91 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -226,6 +226,7 @@ jobs: - name: Archive wheels uses: actions/upload-artifact@v6 + if: inputs.build_mode == 'release' with: name: dist-manylinux-aarch64 path: dist/* @@ -300,6 +301,7 @@ jobs: - name: Archive wheels uses: actions/upload-artifact@v6 + if: inputs.build_mode == 'release' with: name: dist-${{ matrix.os }} path: target/wheels/* From c9ff41f8b4e239e30a65c18c126b399358583d35 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Feb 2026 09:10:53 -0500 Subject: [PATCH 20/22] Do not attempt to use local python path for tests --- .github/workflows/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 330bbaaff..201fa287e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -107,8 +107,7 @@ jobs: RUST_BACKTRACE: 1 run: | git submodule update --init - cd python/tests - uv run --no-project pytest -v . + uv run --no-project pytest -v . --import-mode=importlib - name: FFI unit tests run: | From b83bed11e54d77d2c7313d45784a027d018ae7c0 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Feb 2026 09:27:29 -0500 Subject: [PATCH 21/22] Bump manylinux version --- .github/workflows/build.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b4fae0a91..3f5c57525 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -154,8 +154,7 @@ jobs: if: inputs.build_mode == 'release' with: target: x86_64-unknown-linux-gnu - manylinux: "2_17" - # protoc feature uses bundled protoc, avoiding system dependency issues + manylinux: "2_28" args: --release --strip --features protoc,substrait --out dist rustup-components: rust-std @@ -164,8 +163,7 @@ jobs: if: inputs.build_mode == 'debug' with: target: x86_64-unknown-linux-gnu - manylinux: "2_17" - # protoc feature uses bundled protoc, avoiding system dependency issues + manylinux: "2_28" args: --features protoc,substrait --out dist rustup-components: rust-std @@ -209,7 +207,6 @@ jobs: if: inputs.build_mode == 'release' with: target: aarch64-unknown-linux-gnu - # aarch64 cross-compilation needs manylinux_2_28 manylinux: "2_28" args: --release --strip --features protoc,substrait --out dist rustup-components: rust-std @@ -219,7 +216,6 @@ jobs: if: inputs.build_mode == 'debug' with: target: aarch64-unknown-linux-gnu - # aarch64 cross-compilation needs manylinux_2_28 manylinux: "2_28" args: --features protoc,substrait --out dist rustup-components: rust-std From aa88d6540ca0b037347b959788ea32700509b98a Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Feb 2026 09:38:59 -0500 Subject: [PATCH 22/22] Do not run release flow for branches named branch-*. Only run it for pushes to main and release or candidate tags. --- .github/workflows/release.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f4d48c473..bddc89eac 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,6 @@ # - Merges to main # - Release candidate tags (*-rc*) # - Release tags (e.g., 45.0.0) -# - Release branches (branch-*) name: Release Build @@ -28,7 +27,6 @@ on: push: branches: - "main" - - "branch-*" # Release branches tags: - "*-rc*" # Release candidates (e.g., 45.0.0-rc1) - "[0-9]+.*" # Release tags (e.g., 45.0.0)