Skip to content

Commit

Permalink
Focus the user-defined-functions page on Python only.
Browse files Browse the repository at this point in the history
  • Loading branch information
pythonspeed committed Mar 20, 2024
1 parent b530e99 commit 7a71d7b
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 47 deletions.
3 changes: 3 additions & 0 deletions docs/src/python/user-guide/expressions/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,6 @@
).filter(pl.struct("Movie", "Theatre").is_duplicated())
print(out)
# --8<-- [end:struct_ranking]

# --8<-- [start:multi_column_apply]
# --8<-- [end:multi_column_apply]
25 changes: 20 additions & 5 deletions docs/src/python/user-guide/expressions/user-defined-functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,27 @@ def diff_from_mean_numba(arr, result):

# --8<-- [end:missing_data]


# --8<-- [start:combine]
out = df.select(
pl.struct(["keys", "values"])
.map_elements(lambda x: len(x["keys"]) + x["values"])
.alias("solution_map_elements"),
(pl.col("keys").str.len_bytes() + pl.col("values")).alias("solution_expr"),
# Add two arrays together:
@guvectorize([(int64[:], int64[:], float64[:])], "(n),(n)->(n)")
def add(arr, arr2, result):
for i in range(len(arr)):
result[i] = arr[i] + arr2[i]


df3 = pl.DataFrame({"values1": [1, 2, 3], "values2": [10, 20, 30]})

out = df3.select(
# Create a struct that has two columns in it:
pl.struct(["values1", "values2"])
# Pass the struct to a lambda that then passes the individual columns to
# the add() function:
.map_batches(
lambda combined: add(
combined.struct.field("values1"), combined.struct.field("values2")
)
).alias("add_columns")
)
print(out)
# --8<-- [end:combine]
49 changes: 49 additions & 0 deletions docs/src/rust/user-guide/expressions/structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,54 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("{}", &out);
// --8<-- [end:struct_ranking]

// --8<-- [start:multi_column_apply]
let df = df!(
"keys" => &["a", "a", "b"],
"values" => &[10, 7, 1],
)?;

let out = df
.lazy()
.select([
// pack to struct to get access to multiple fields in a custom `apply/map`
as_struct(vec![col("keys"), col("values")])
// we will compute the len(a) + b
.apply(
|s| {
// downcast to struct
let ca = s.struct_()?;

// get the fields as Series
let s_a = &ca.fields()[0];
let s_b = &ca.fields()[1];

// downcast the `Series` to their known type
let ca_a = s_a.str()?;
let ca_b = s_b.i32()?;

// iterate both `ChunkedArrays`
let out: Int32Chunked = ca_a
.into_iter()
.zip(ca_b)
.map(|(opt_a, opt_b)| match (opt_a, opt_b) {
(Some(a), Some(b)) => Some(a.len() as i32 + b),
_ => None,
})
.collect();

Ok(Some(out.into_series()))
},
GetOutput::from_type(DataType::Int32),
)
// note: the `'solution_map_elements'` alias is just there to show how you
// get the same output as in the Python API example.
.alias("solution_map_elements"),
(col("keys").str().count_matches(lit("."), true) + col("values"))
.alias("solution_expr"),
])
.collect()?;
println!("{}", out);

// --8<-- [end:multi_column_apply]
Ok(())
}
41 changes: 0 additions & 41 deletions docs/src/rust/user-guide/expressions/user-defined-functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,47 +25,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// --8<-- [end:missing_data]

// --8<-- [start:combine]
let out = df
.lazy()
.select([
// pack to struct to get access to multiple fields in a custom `apply/map`
as_struct(vec![col("keys"), col("values")])
// we will compute the len(a) + b
.apply(
|s| {
// downcast to struct
let ca = s.struct_()?;

// get the fields as Series
let s_a = &ca.fields()[0];
let s_b = &ca.fields()[1];

// downcast the `Series` to their known type
let ca_a = s_a.str()?;
let ca_b = s_b.i32()?;

// iterate both `ChunkedArrays`
let out: Int32Chunked = ca_a
.into_iter()
.zip(ca_b)
.map(|(opt_a, opt_b)| match (opt_a, opt_b) {
(Some(a), Some(b)) => Some(a.len() as i32 + b),
_ => None,
})
.collect();

Ok(Some(out.into_series()))
},
GetOutput::from_type(DataType::Int32),
)
// note: the `'solution_map_elements'` alias is just there to show how you
// get the same output as in the Python API example.
.alias("solution_map_elements"),
(col("keys").str().count_matches(lit("."), true) + col("values"))
.alias("solution_expr"),
])
.collect()?;
println!("{}", out);
// --8<-- [end:combine]
Ok(())
}
6 changes: 5 additions & 1 deletion docs/user-guide/expressions/structs.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,8 @@ That's a pretty complex set of requirements done very elegantly in Polars!

### Using multi-column apply

This was discussed in the previous section on _User Defined Functions_.
This was discussed in the previous section on _User Defined Functions_ for the Python case.
Here's an example of doing so with Rust:


{{code_block('user-guide/expressions/structs','multi_column_apply',[])}}

0 comments on commit 7a71d7b

Please sign in to comment.