From 30fc3d5c7c6f8f2588685351b94bb9aa217ee7bb Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 1 Apr 2026 09:44:22 -0400 Subject: [PATCH 01/10] Add missing SessionContext read/register methods for Arrow IPC and batches Add read_arrow, read_empty, register_arrow, and register_batch methods to SessionContext, exposing upstream DataFusion v53 functionality. The write_* methods and read_batch/read_batches are already covered by DataFrame.write_* and SessionContext.from_arrow respectively. Closes #1458. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/core/src/context.rs | 63 +++++++++++++++++++++++++++++++- python/datafusion/context.py | 69 ++++++++++++++++++++++++++++++++++++ python/tests/test_context.py | 39 ++++++++++++++++++++ 3 files changed, 170 insertions(+), 1 deletion(-) diff --git a/crates/core/src/context.rs b/crates/core/src/context.rs index 53994d2f5..4c001b55b 100644 --- a/crates/core/src/context.rs +++ b/crates/core/src/context.rs @@ -41,7 +41,7 @@ use datafusion::execution::context::{ }; use datafusion::execution::disk_manager::DiskManagerMode; use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, UnboundedMemoryPool}; -use datafusion::execution::options::ReadOptions; +use datafusion::execution::options::{ArrowReadOptions, ReadOptions}; use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::execution::session_state::SessionStateBuilder; use datafusion::prelude::{ @@ -956,6 +956,39 @@ impl PySessionContext { Ok(()) } + #[pyo3(signature = (name, path, schema=None, file_extension=".arrow", table_partition_cols=vec![]))] + pub fn register_arrow( + &self, + name: &str, + path: &str, + schema: Option>, + file_extension: &str, + table_partition_cols: Vec<(String, PyArrowType)>, + py: Python, + ) -> PyDataFusionResult<()> { + let mut options = ArrowReadOptions::default().table_partition_cols( + table_partition_cols + .into_iter() + .map(|(name, ty)| (name, ty.0)) + .collect::>(), + ); + options.file_extension = file_extension; + options.schema = schema.as_ref().map(|x| &x.0); + + let result = self.ctx.register_arrow(name, path, options); + wait_for_future(py, result)??; + Ok(()) + } + + pub fn register_batch( + &self, + name: &str, + batch: PyArrowType, + ) -> PyDataFusionResult<()> { + self.ctx.register_batch(name, batch.0)?; + Ok(()) + } + // Registers a PyArrow.Dataset pub fn register_dataset( &self, @@ -1184,6 +1217,34 @@ impl PySessionContext { Ok(PyDataFrame::new(df)) } + pub fn read_empty(&self) -> PyDataFusionResult { + let df = self.ctx.read_empty()?; + Ok(PyDataFrame::new(df)) + } + + #[pyo3(signature = (path, schema=None, file_extension=".arrow", table_partition_cols=vec![]))] + pub fn read_arrow( + &self, + path: &str, + schema: Option>, + file_extension: &str, + table_partition_cols: Vec<(String, PyArrowType)>, + py: Python, + ) -> PyDataFusionResult { + let mut options = ArrowReadOptions::default().table_partition_cols( + table_partition_cols + .into_iter() + .map(|(name, ty)| (name, ty.0)) + .collect::>(), + ); + options.file_extension = file_extension; + options.schema = schema.as_ref().map(|x| &x.0); + + let result = self.ctx.read_arrow(path, options); + let df = wait_for_future(py, result)??; + Ok(PyDataFrame::new(df)) + } + pub fn read_table(&self, table: Bound<'_, PyAny>) -> PyDataFusionResult { let session = self.clone().into_bound_py_any(table.py())?; let table = PyTable::new(table, Some(session))?; diff --git a/python/datafusion/context.py b/python/datafusion/context.py index c8edc816f..c2a06dc82 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -894,6 +894,15 @@ def register_udtf(self, func: TableFunction) -> None: """Register a user defined table function.""" self.ctx.register_udtf(func._udtf) + def register_batch(self, name: str, batch: pa.RecordBatch) -> None: + """Register a single :py:class:`pa.RecordBatch` as a table. + + Args: + name: Name of the resultant table. + batch: Record batch to register as a table. + """ + self.ctx.register_batch(name, batch) + def register_record_batches( self, name: str, partitions: list[list[pa.RecordBatch]] ) -> None: @@ -1092,6 +1101,33 @@ def register_avro( name, str(path), schema, file_extension, table_partition_cols ) + def register_arrow( + self, + name: str, + path: str | pathlib.Path, + schema: pa.Schema | None = None, + file_extension: str = ".arrow", + table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, + ) -> None: + """Register an Arrow IPC file as a table. + + The registered table can be referenced from SQL statements executed + against this context. + + Args: + name: Name of the table to register. + path: Path to the Arrow IPC file. + schema: The data source schema. + file_extension: File extension to select. + table_partition_cols: Partition columns. + """ + if table_partition_cols is None: + table_partition_cols = [] + table_partition_cols = _convert_table_partition_cols(table_partition_cols) + self.ctx.register_arrow( + name, str(path), schema, file_extension, table_partition_cols + ) + def register_dataset(self, name: str, dataset: pa.dataset.Dataset) -> None: """Register a :py:class:`pa.dataset.Dataset` as a table. @@ -1328,6 +1364,39 @@ def read_avro( self.ctx.read_avro(str(path), schema, file_partition_cols, file_extension) ) + def read_arrow( + self, + path: str | pathlib.Path, + schema: pa.Schema | None = None, + file_extension: str = ".arrow", + file_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, + ) -> DataFrame: + """Create a :py:class:`DataFrame` for reading an Arrow IPC data source. + + Args: + path: Path to the Arrow IPC file. + schema: The data source schema. + file_extension: File extension to select. + file_partition_cols: Partition columns. + + Returns: + DataFrame representation of the read Arrow IPC file. + """ + if file_partition_cols is None: + file_partition_cols = [] + file_partition_cols = _convert_table_partition_cols(file_partition_cols) + return DataFrame( + self.ctx.read_arrow(str(path), schema, file_extension, file_partition_cols) + ) + + def read_empty(self) -> DataFrame: + """Create an empty :py:class:`DataFrame` with no columns or rows. + + Returns: + An empty DataFrame. + """ + return DataFrame(self.ctx.read_empty()) + def read_table( self, table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset ) -> DataFrame: diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 5df6ed20f..a4a82cdf6 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -668,6 +668,45 @@ def test_read_avro(ctx): assert avro_df is not None +def test_read_arrow(ctx, tmp_path): + # Write an Arrow IPC file, then read it back + table = pa.table({"a": [1, 2, 3], "b": ["x", "y", "z"]}) + arrow_path = tmp_path / "test.arrow" + with pa.ipc.new_file(str(arrow_path), table.schema) as writer: + writer.write_table(table) + + df = ctx.read_arrow(str(arrow_path)) + result = df.collect() + assert result[0].column(0) == pa.array([1, 2, 3]) + assert result[0].column(1) == pa.array(["x", "y", "z"]) + + +def test_read_empty(ctx): + df = ctx.read_empty() + result = df.collect() + assert result[0].num_columns == 0 + + +def test_register_arrow(ctx, tmp_path): + # Write an Arrow IPC file, then register and query it + table = pa.table({"x": [10, 20, 30]}) + arrow_path = tmp_path / "test.arrow" + with pa.ipc.new_file(str(arrow_path), table.schema) as writer: + writer.write_table(table) + + ctx.register_arrow("arrow_tbl", str(arrow_path)) + result = ctx.sql("SELECT * FROM arrow_tbl").collect() + assert result[0].column(0) == pa.array([10, 20, 30]) + + +def test_register_batch(ctx): + batch = pa.RecordBatch.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]}) + ctx.register_batch("batch_tbl", batch) + result = ctx.sql("SELECT * FROM batch_tbl").collect() + assert result[0].column(0) == pa.array([1, 2, 3]) + assert result[0].column(1) == pa.array([4, 5, 6]) + + def test_create_sql_options(): SQLOptions() From e494bed4587545b8d9ebe02da602a5d34b9c799f Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 5 Apr 2026 07:41:52 -0400 Subject: [PATCH 02/10] Remove redundant read_empty Rust binding, make Python read_empty an alias for empty_table Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/core/src/context.rs | 5 ----- python/datafusion/context.py | 4 +++- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/crates/core/src/context.rs b/crates/core/src/context.rs index 4c001b55b..86675123e 100644 --- a/crates/core/src/context.rs +++ b/crates/core/src/context.rs @@ -1217,11 +1217,6 @@ impl PySessionContext { Ok(PyDataFrame::new(df)) } - pub fn read_empty(&self) -> PyDataFusionResult { - let df = self.ctx.read_empty()?; - Ok(PyDataFrame::new(df)) - } - #[pyo3(signature = (path, schema=None, file_extension=".arrow", table_partition_cols=vec![]))] pub fn read_arrow( &self, diff --git a/python/datafusion/context.py b/python/datafusion/context.py index c2a06dc82..cb5f89f40 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -1392,10 +1392,12 @@ def read_arrow( def read_empty(self) -> DataFrame: """Create an empty :py:class:`DataFrame` with no columns or rows. + This is an alias for :meth:`empty_table`. + Returns: An empty DataFrame. """ - return DataFrame(self.ctx.read_empty()) + return self.empty_table() def read_table( self, table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset From 246105dda794f82628a154ed4116d9dc3dc680f3 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 5 Apr 2026 07:42:08 -0400 Subject: [PATCH 03/10] Add pathlib.Path and empty batch tests for Arrow IPC and register_batch Co-Authored-By: Claude Opus 4.6 (1M context) --- python/tests/test_context.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/tests/test_context.py b/python/tests/test_context.py index a4a82cdf6..9db6c0435 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -680,6 +680,11 @@ def test_read_arrow(ctx, tmp_path): assert result[0].column(0) == pa.array([1, 2, 3]) assert result[0].column(1) == pa.array(["x", "y", "z"]) + # Also verify pathlib.Path works + df = ctx.read_arrow(arrow_path) + result = df.collect() + assert result[0].column(0) == pa.array([1, 2, 3]) + def test_read_empty(ctx): df = ctx.read_empty() @@ -698,6 +703,11 @@ def test_register_arrow(ctx, tmp_path): result = ctx.sql("SELECT * FROM arrow_tbl").collect() assert result[0].column(0) == pa.array([10, 20, 30]) + # Also verify pathlib.Path works + ctx.register_arrow("arrow_tbl_path", arrow_path) + result = ctx.sql("SELECT * FROM arrow_tbl_path").collect() + assert result[0].column(0) == pa.array([10, 20, 30]) + def test_register_batch(ctx): batch = pa.RecordBatch.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]}) @@ -707,6 +717,13 @@ def test_register_batch(ctx): assert result[0].column(1) == pa.array([4, 5, 6]) +def test_register_batch_empty(ctx): + batch = pa.RecordBatch.from_pydict({"a": pa.array([], type=pa.int64())}) + ctx.register_batch("empty_batch_tbl", batch) + result = ctx.sql("SELECT * FROM empty_batch_tbl").collect() + assert result[0].num_rows == 0 + + def test_create_sql_options(): SQLOptions() From 4a2d7ba373ae694295eed79487e8a3f3b722773a Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 5 Apr 2026 07:42:15 -0400 Subject: [PATCH 04/10] Make test_read_empty more robust with length and num_rows checks Co-Authored-By: Claude Opus 4.6 (1M context) --- python/tests/test_context.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 9db6c0435..0b2ce54a5 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -689,7 +689,9 @@ def test_read_arrow(ctx, tmp_path): def test_read_empty(ctx): df = ctx.read_empty() result = df.collect() + assert len(result) == 1 assert result[0].num_columns == 0 + assert result[0].num_rows == 0 def test_register_arrow(ctx, tmp_path): From 03092ed4b5aa2f0a94478da7750058fcb5317c68 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 5 Apr 2026 07:48:29 -0400 Subject: [PATCH 05/10] Add examples to docstrings for new register/read methods Co-Authored-By: Claude Opus 4.6 (1M context) --- conftest.py | 3 +++ python/datafusion/context.py | 51 ++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/conftest.py b/conftest.py index 73e90077a..0a56d54ec 100644 --- a/conftest.py +++ b/conftest.py @@ -19,6 +19,7 @@ import datafusion as dfn import numpy as np +import pyarrow as pa import pytest from datafusion import col, lit from datafusion import functions as F @@ -29,6 +30,8 @@ def _doctest_namespace(doctest_namespace: dict) -> None: """Add common imports to the doctest namespace.""" doctest_namespace["dfn"] = dfn doctest_namespace["np"] = np + doctest_namespace["pa"] = pa doctest_namespace["col"] = col doctest_namespace["lit"] = lit doctest_namespace["F"] = F + doctest_namespace["ctx"] = dfn.SessionContext() diff --git a/python/datafusion/context.py b/python/datafusion/context.py index cb5f89f40..c59c4b082 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -900,6 +900,17 @@ def register_batch(self, name: str, batch: pa.RecordBatch) -> None: Args: name: Name of the resultant table. batch: Record batch to register as a table. + + Examples: + >>> batch = pa.RecordBatch.from_pydict({"a": [1, 2, 3]}) + >>> ctx.register_batch("batch_tbl", batch) + >>> ctx.sql("SELECT * FROM batch_tbl").collect()[0].column(0) + + [ + 1, + 2, + 3 + ] """ self.ctx.register_batch(name, batch) @@ -1120,6 +1131,22 @@ def register_arrow( schema: The data source schema. file_extension: File extension to select. table_partition_cols: Partition columns. + + Examples: + >>> import tempfile, os + >>> table = pa.table({"x": [10, 20, 30]}) + >>> with tempfile.TemporaryDirectory() as tmpdir: + ... path = os.path.join(tmpdir, "data.arrow") + ... with pa.ipc.new_file(path, table.schema) as writer: + ... writer.write_table(table) + ... ctx.register_arrow("arrow_tbl", path) + ... ctx.sql("SELECT * FROM arrow_tbl").collect()[0].column(0) + + [ + 10, + 20, + 30 + ] """ if table_partition_cols is None: table_partition_cols = [] @@ -1381,6 +1408,22 @@ def read_arrow( Returns: DataFrame representation of the read Arrow IPC file. + + Examples: + >>> import tempfile, os + >>> table = pa.table({"a": [1, 2, 3]}) + >>> with tempfile.TemporaryDirectory() as tmpdir: + ... path = os.path.join(tmpdir, "data.arrow") + ... with pa.ipc.new_file(path, table.schema) as writer: + ... writer.write_table(table) + ... df = ctx.read_arrow(path) + ... df.collect()[0].column(0) + + [ + 1, + 2, + 3 + ] """ if file_partition_cols is None: file_partition_cols = [] @@ -1396,6 +1439,14 @@ def read_empty(self) -> DataFrame: Returns: An empty DataFrame. + + Examples: + >>> df = ctx.read_empty() + >>> result = df.collect() + >>> len(result) + 1 + >>> result[0].num_columns + 0 """ return self.empty_table() From 0f96ea3b9d4f4d2b05e7ba1373bdc9a0950a87f2 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 5 Apr 2026 10:30:57 -0400 Subject: [PATCH 06/10] Empty table actually returns record batch of length one but there are no columns --- python/tests/test_context.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 0b2ce54a5..b4b3648ac 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -691,7 +691,11 @@ def test_read_empty(ctx): result = df.collect() assert len(result) == 1 assert result[0].num_columns == 0 - assert result[0].num_rows == 0 + + df = ctx.empty_table() + result = df.collect() + assert len(result) == 1 + assert result[0].num_columns == 0 def test_register_arrow(ctx, tmp_path): From b719b47fcb0bbabda1a13c0729a26c9b56f6e62f Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 8 Apr 2026 08:17:52 -0400 Subject: [PATCH 07/10] Add optional argument examples to register_arrow and read_arrow docstrings Demonstrate schema= and file_extension= keyword arguments in the docstring examples for register_arrow and read_arrow, following project guidelines for optional parameter documentation. Co-Authored-By: Claude Opus 4.6 (1M context) --- python/datafusion/context.py | 66 ++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/python/datafusion/context.py b/python/datafusion/context.py index da9037c0c..a14ff85a9 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -1164,6 +1164,42 @@ def register_arrow( 20, 30 ] + + Provide an explicit ``schema`` to override schema inference: + + >>> with tempfile.TemporaryDirectory() as tmpdir: + ... path = os.path.join(tmpdir, "data.arrow") + ... with pa.ipc.new_file(path, table.schema) as writer: + ... writer.write_table(table) + ... ctx.register_arrow( + ... "arrow_schema", + ... path, + ... schema=pa.schema([("x", pa.int64())]), + ... ) + ... ctx.sql("SELECT * FROM arrow_schema").collect()[0].column(0) + + [ + 10, + 20, + 30 + ] + + Use ``file_extension`` to read files with a non-default extension: + + >>> with tempfile.TemporaryDirectory() as tmpdir: + ... path = os.path.join(tmpdir, "data.ipc") + ... with pa.ipc.new_file(path, table.schema) as writer: + ... writer.write_table(table) + ... ctx.register_arrow( + ... "arrow_ipc", path, file_extension=".ipc" + ... ) + ... ctx.sql("SELECT * FROM arrow_ipc").collect()[0].column(0) + + [ + 10, + 20, + 30 + ] """ if table_partition_cols is None: table_partition_cols = [] @@ -1465,6 +1501,36 @@ def read_arrow( 2, 3 ] + + Provide an explicit ``schema`` to override schema inference: + + >>> with tempfile.TemporaryDirectory() as tmpdir: + ... path = os.path.join(tmpdir, "data.arrow") + ... with pa.ipc.new_file(path, table.schema) as writer: + ... writer.write_table(table) + ... df = ctx.read_arrow(path, schema=pa.schema([("a", pa.int64())])) + ... df.collect()[0].column(0) + + [ + 1, + 2, + 3 + ] + + Use ``file_extension`` to read files with a non-default extension: + + >>> with tempfile.TemporaryDirectory() as tmpdir: + ... path = os.path.join(tmpdir, "data.ipc") + ... with pa.ipc.new_file(path, table.schema) as writer: + ... writer.write_table(table) + ... df = ctx.read_arrow(path, file_extension=".ipc") + ... df.collect()[0].column(0) + + [ + 1, + 2, + 3 + ] """ if file_partition_cols is None: file_partition_cols = [] From af08ee67c235046333fb6b492648f631594255ee Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 8 Apr 2026 08:24:27 -0400 Subject: [PATCH 08/10] Simplify read_empty docstring to use alias pattern Follow the same See Also alias convention used in functions.py since read_empty is a simple alias for empty_table. Co-Authored-By: Claude Opus 4.6 (1M context) --- python/datafusion/context.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/python/datafusion/context.py b/python/datafusion/context.py index a14ff85a9..eea404598 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -1542,18 +1542,8 @@ def read_arrow( def read_empty(self) -> DataFrame: """Create an empty :py:class:`DataFrame` with no columns or rows. - This is an alias for :meth:`empty_table`. - - Returns: - An empty DataFrame. - - Examples: - >>> df = ctx.read_empty() - >>> result = df.collect() - >>> len(result) - 1 - >>> result[0].num_columns - 0 + See Also: + This is an alias for :meth:`empty_table`. """ return self.empty_table() From de89982ca59def32c86a0ba0234f1a6eda7f53b6 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 8 Apr 2026 08:32:03 -0400 Subject: [PATCH 09/10] Remove shared ctx from doctest namespace, use inline SessionContext Avoid shared SessionContext state across doctests by having each docstring example create its own ctx instance, matching the pattern used throughout the rest of the codebase. Co-Authored-By: Claude Opus 4.6 (1M context) --- conftest.py | 1 - python/datafusion/context.py | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/conftest.py b/conftest.py index 0a56d54ec..0c9410636 100644 --- a/conftest.py +++ b/conftest.py @@ -34,4 +34,3 @@ def _doctest_namespace(doctest_namespace: dict) -> None: doctest_namespace["col"] = col doctest_namespace["lit"] = lit doctest_namespace["F"] = F - doctest_namespace["ctx"] = dfn.SessionContext() diff --git a/python/datafusion/context.py b/python/datafusion/context.py index eea404598..7a306f04c 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -911,6 +911,7 @@ def register_batch(self, name: str, batch: pa.RecordBatch) -> None: batch: Record batch to register as a table. Examples: + >>> ctx = dfn.SessionContext() >>> batch = pa.RecordBatch.from_pydict({"a": [1, 2, 3]}) >>> ctx.register_batch("batch_tbl", batch) >>> ctx.sql("SELECT * FROM batch_tbl").collect()[0].column(0) @@ -1151,6 +1152,7 @@ def register_arrow( Examples: >>> import tempfile, os + >>> ctx = dfn.SessionContext() >>> table = pa.table({"x": [10, 20, 30]}) >>> with tempfile.TemporaryDirectory() as tmpdir: ... path = os.path.join(tmpdir, "data.arrow") @@ -1488,6 +1490,7 @@ def read_arrow( Examples: >>> import tempfile, os + >>> ctx = dfn.SessionContext() >>> table = pa.table({"a": [1, 2, 3]}) >>> with tempfile.TemporaryDirectory() as tmpdir: ... path = os.path.join(tmpdir, "data.arrow") From a32292a28e7c9aae82fbf4df3ecba60333dcb02a Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 8 Apr 2026 08:40:09 -0400 Subject: [PATCH 10/10] Remove redundant import pyarrow as pa from docstrings The pa alias is already provided by the doctest namespace in conftest.py, so inline imports are unnecessary. Co-Authored-By: Claude Opus 4.6 (1M context) --- python/datafusion/user_defined.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/datafusion/user_defined.py b/python/datafusion/user_defined.py index 3eaccdfa3..848ab4cee 100644 --- a/python/datafusion/user_defined.py +++ b/python/datafusion/user_defined.py @@ -213,7 +213,6 @@ def udf(*args: Any, **kwargs: Any): # noqa: D417 Examples: Using ``udf`` as a function: - >>> import pyarrow as pa >>> import pyarrow.compute as pc >>> from datafusion.user_defined import ScalarUDF >>> def double_func(x): @@ -480,7 +479,6 @@ def udaf(*args: Any, **kwargs: Any): # noqa: D417, C901 instance in which this UDAF is used. Examples: - >>> import pyarrow as pa >>> import pyarrow.compute as pc >>> from datafusion.user_defined import AggregateUDF, Accumulator, udaf >>> class Summarize(Accumulator): @@ -874,7 +872,6 @@ def udwf(*args: Any, **kwargs: Any): # noqa: D417 When using ``udwf`` as a decorator, do not pass ``func`` explicitly. Examples: - >>> import pyarrow as pa >>> from datafusion.user_defined import WindowUDF, WindowEvaluator, udwf >>> class BiasedNumbers(WindowEvaluator): ... def __init__(self, start: int = 0):