Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,20 @@ Skills follow the [Agent Skills](https://agentskills.io) open standard. Each ski

- `SKILL.md` — The skill definition with YAML frontmatter (name, description, argument-hint) and detailed instructions.
- Additional supporting files as needed.

## Python Function Docstrings

Every Python function must include a docstring with usage examples.

- **Examples are required**: Each function needs at least one doctest-style example
demonstrating basic usage.
- **Optional parameters**: If a function has optional parameters, include separate
examples that show usage both without and with the optional arguments. Pass
optional arguments using their keyword name (e.g., `step=dfn.lit(3)`) so readers
can immediately see which parameter is being demonstrated.
- **Reuse input data**: Use the same input data across examples wherever possible.
The examples should demonstrate how different optional arguments change the output
for the same input, making the effect of each option easy to understand.
- **Alias functions**: Functions that are simple aliases (e.g., `list_sort` aliasing
`array_sort`) only need a one-line description and a `See Also` reference to the
primary function. They do not need their own examples.
Comment on lines +28 to +44
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ntjohnson1 Do you think we should add anything else here?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like it covers the majority of things to me. The only other piece is the stylistic preference on specifying "Returns" or not. I don't know if there is a definitive position on that.

56 changes: 56 additions & 0 deletions crates/core/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,50 @@ fn array_cat(exprs: Vec<PyExpr>) -> PyExpr {
array_concat(exprs)
}

#[pyfunction]
fn array_distance(array1: PyExpr, array2: PyExpr) -> PyExpr {
let args = vec![array1.into(), array2.into()];
Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf(
datafusion::functions_nested::distance::array_distance_udf(),
args,
))
.into()
}

#[pyfunction]
fn arrays_zip(exprs: Vec<PyExpr>) -> PyExpr {
let exprs = exprs.into_iter().map(|x| x.into()).collect();
datafusion::functions_nested::expr_fn::arrays_zip(exprs).into()
}

#[pyfunction]
#[pyo3(signature = (string, delimiter, null_string=None))]
fn string_to_array(string: PyExpr, delimiter: PyExpr, null_string: Option<PyExpr>) -> PyExpr {
let mut args = vec![string.into(), delimiter.into()];
if let Some(null_string) = null_string {
args.push(null_string.into());
}
Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf(
datafusion::functions_nested::string::string_to_array_udf(),
args,
))
.into()
}

#[pyfunction]
#[pyo3(signature = (start, stop, step=None))]
fn gen_series(start: PyExpr, stop: PyExpr, step: Option<PyExpr>) -> PyExpr {
let mut args = vec![start.into(), stop.into()];
if let Some(step) = step {
args.push(step.into());
}
Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf(
datafusion::functions_nested::range::gen_series_udf(),
args,
))
.into()
}

#[pyfunction]
fn make_map(keys: Vec<PyExpr>, values: Vec<PyExpr>) -> PyExpr {
let keys = keys.into_iter().map(|x| x.into()).collect();
Expand Down Expand Up @@ -681,6 +725,10 @@ array_fn!(array_intersect, first_array second_array);
array_fn!(array_union, array1 array2);
array_fn!(array_except, first_array second_array);
array_fn!(array_resize, array size value);
array_fn!(array_any_value, array);
array_fn!(array_max, array);
array_fn!(array_min, array);
array_fn!(array_reverse, array);
array_fn!(cardinality, array);
array_fn!(flatten, array);
array_fn!(range, start stop step);
Expand Down Expand Up @@ -1152,6 +1200,14 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(array_replace_all))?;
m.add_wrapped(wrap_pyfunction!(array_sort))?;
m.add_wrapped(wrap_pyfunction!(array_slice))?;
m.add_wrapped(wrap_pyfunction!(array_any_value))?;
m.add_wrapped(wrap_pyfunction!(array_distance))?;
m.add_wrapped(wrap_pyfunction!(array_max))?;
m.add_wrapped(wrap_pyfunction!(array_min))?;
m.add_wrapped(wrap_pyfunction!(array_reverse))?;
m.add_wrapped(wrap_pyfunction!(arrays_zip))?;
m.add_wrapped(wrap_pyfunction!(string_to_array))?;
m.add_wrapped(wrap_pyfunction!(gen_series))?;
m.add_wrapped(wrap_pyfunction!(flatten))?;
m.add_wrapped(wrap_pyfunction!(cardinality))?;

Expand Down
Loading
Loading