Skip to content

Commit befac37

Browse files
Improve PhysicalExpr and Column documentation (#12457)
* Improve PhysicalExpr and Column documentation * Apply suggestions from code review Co-authored-by: Chunchun Ye <[email protected]> --------- Co-authored-by: Chunchun Ye <[email protected]>
1 parent 88b5970 commit befac37

File tree

2 files changed

+61
-7
lines changed

2 files changed

+61
-7
lines changed

datafusion/physical-expr-common/src/physical_expr.rs

+21-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,27 @@ use datafusion_expr_common::columnar_value::ColumnarValue;
3131
use datafusion_expr_common::interval_arithmetic::Interval;
3232
use datafusion_expr_common::sort_properties::ExprProperties;
3333

34-
/// See [create_physical_expr](https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html)
35-
/// for examples of creating `PhysicalExpr` from `Expr`
34+
/// [`PhysicalExpr`]s represent expressions such as `A + 1` or `CAST(c1 AS int)`.
35+
///
36+
/// `PhysicalExpr` knows its type, nullability and can be evaluated directly on
37+
/// a [`RecordBatch`] (see [`Self::evaluate`]).
38+
///
39+
/// `PhysicalExpr` are the physical counterpart to [`Expr`] used in logical
40+
/// planning. They are typically created from [`Expr`] by a [`PhysicalPlanner`]
41+
/// invoked from a higher level API
42+
///
43+
/// Some important examples of `PhysicalExpr` are:
44+
/// * [`Column`]: Represents a column at a given index in a RecordBatch
45+
///
46+
/// To create `PhysicalExpr` from `Expr`, see
47+
/// * [`SessionContext::create_physical_expr`]: A high level API
48+
/// * [`create_physical_expr`]: A low level API
49+
///
50+
/// [`SessionContext::create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.create_physical_expr
51+
/// [`PhysicalPlanner`]: https://docs.rs/datafusion/latest/datafusion/physical_planner/trait.PhysicalPlanner.html
52+
/// [`Expr`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html
53+
/// [`create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html
54+
/// [`Column`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/expressions/struct.Column.html
3655
pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq<dyn Any> {
3756
/// Returns the physical expression as [`Any`] so that it can be
3857
/// downcast to a specific implementation.

datafusion/physical-expr/src/expressions/column.rs

+40-5
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Column expression
18+
//! Physical column reference: [`Column`]
1919
2020
use std::any::Any;
2121
use std::hash::{Hash, Hasher};
@@ -33,32 +33,67 @@ use datafusion_expr::ColumnarValue;
3333
use crate::physical_expr::{down_cast_any_ref, PhysicalExpr};
3434

3535
/// Represents the column at a given index in a RecordBatch
36+
///
37+
/// This is a physical expression that represents a column at a given index in an
38+
/// arrow [`Schema`] / [`RecordBatch`].
39+
///
40+
/// Unlike the [logical `Expr::Column`], this expression is always resolved by schema index,
41+
/// even though it does have a name. This is because the physical plan is always
42+
/// resolved to a specific schema and there is no concept of "relation"
43+
///
44+
/// # Example:
45+
/// If the schema is `a`, `b`, `c` the `Column` for `b` would be represented by
46+
/// index 1, since `b` is the second colum in the schema.
47+
///
48+
/// ```
49+
/// # use datafusion_physical_expr::expressions::Column;
50+
/// # use arrow::datatypes::{DataType, Field, Schema};
51+
/// // Schema with columns a, b, c
52+
/// let schema = Schema::new(vec![
53+
/// Field::new("a", DataType::Int32, false),
54+
/// Field::new("b", DataType::Int32, false),
55+
/// Field::new("c", DataType::Int32, false),
56+
/// ]);
57+
///
58+
/// // reference to column b is index 1
59+
/// let column_b = Column::new_with_schema("b", &schema).unwrap();
60+
/// assert_eq!(column_b.index(), 1);
61+
///
62+
/// // reference to column c is index 2
63+
/// let column_c = Column::new_with_schema("c", &schema).unwrap();
64+
/// assert_eq!(column_c.index(), 2);
65+
/// ```
66+
/// [logical `Expr::Column`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html#variant.Column
3667
#[derive(Debug, Hash, PartialEq, Eq, Clone)]
3768
pub struct Column {
69+
/// The name of the column (used for debugging and display purposes)
3870
name: String,
71+
/// The index of the column in its schema
3972
index: usize,
4073
}
4174

4275
impl Column {
43-
/// Create a new column expression
76+
/// Create a new column expression which references the
77+
/// column with the given index in the schema.
4478
pub fn new(name: &str, index: usize) -> Self {
4579
Self {
4680
name: name.to_owned(),
4781
index,
4882
}
4983
}
5084

51-
/// Create a new column expression based on column name and schema
85+
/// Create a new column expression which references the
86+
/// column with the given name in the schema
5287
pub fn new_with_schema(name: &str, schema: &Schema) -> Result<Self> {
5388
Ok(Column::new(name, schema.index_of(name)?))
5489
}
5590

56-
/// Get the column name
91+
/// Get the column's name
5792
pub fn name(&self) -> &str {
5893
&self.name
5994
}
6095

61-
/// Get the column index
96+
/// Get the column's schema index
6297
pub fn index(&self) -> usize {
6398
self.index
6499
}

0 commit comments

Comments
 (0)