Skip to content

Commit e221a2c

Browse files
authored
feat: support customize metadata in alias for dataframe api (#15120)
* feat: support customize metadata in alias for dataframe api * update doc * remove clone
1 parent d381306 commit e221a2c

File tree

11 files changed

+143
-4
lines changed

11 files changed

+143
-4
lines changed

datafusion/core/src/dataframe/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2039,6 +2039,7 @@ impl DataFrame {
20392039
})),
20402040
relation: None,
20412041
name: field.name().to_string(),
2042+
metadata: None,
20422043
}),
20432044
Err(_) => col(field.name()),
20442045
}

datafusion/core/tests/dataframe/mod.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5374,6 +5374,21 @@ async fn test_alias() -> Result<()> {
53745374
Ok(())
53755375
}
53765376

5377+
#[tokio::test]
5378+
async fn test_alias_with_metadata() -> Result<()> {
5379+
let mut metadata = HashMap::new();
5380+
metadata.insert(String::from("k"), String::from("v"));
5381+
let df = create_test_table("test")
5382+
.await?
5383+
.select(vec![col("a").alias_with_metadata("b", Some(metadata))])?
5384+
.alias("table_alias")?;
5385+
let df = df.select(vec![col("table_alias.b")])?;
5386+
let schema = df.schema();
5387+
let metadata = schema.field(0).metadata();
5388+
assert_eq!(metadata.get("k"), Some(&String::from("v")));
5389+
Ok(())
5390+
}
5391+
53775392
// Use alias to perform a self-join
53785393
// Issue: https://github.com/apache/datafusion/issues/14112
53795394
#[tokio::test]

datafusion/expr/src/expr.rs

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,11 +391,34 @@ impl Unnest {
391391
}
392392

393393
/// Alias expression
394-
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
394+
#[derive(Clone, PartialEq, Eq, Debug)]
395395
pub struct Alias {
396396
pub expr: Box<Expr>,
397397
pub relation: Option<TableReference>,
398398
pub name: String,
399+
pub metadata: Option<std::collections::HashMap<String, String>>,
400+
}
401+
402+
impl Hash for Alias {
403+
fn hash<H: Hasher>(&self, state: &mut H) {
404+
self.expr.hash(state);
405+
self.relation.hash(state);
406+
self.name.hash(state);
407+
}
408+
}
409+
410+
impl PartialOrd for Alias {
411+
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
412+
let cmp = self.expr.partial_cmp(&other.expr);
413+
let Some(std::cmp::Ordering::Equal) = cmp else {
414+
return cmp;
415+
};
416+
let cmp = self.relation.partial_cmp(&other.relation);
417+
let Some(std::cmp::Ordering::Equal) = cmp else {
418+
return cmp;
419+
};
420+
self.name.partial_cmp(&other.name)
421+
}
399422
}
400423

401424
impl Alias {
@@ -409,8 +432,17 @@ impl Alias {
409432
expr: Box::new(expr),
410433
relation: relation.map(|r| r.into()),
411434
name: name.into(),
435+
metadata: None,
412436
}
413437
}
438+
439+
pub fn with_metadata(
440+
mut self,
441+
metadata: Option<std::collections::HashMap<String, String>>,
442+
) -> Self {
443+
self.metadata = metadata;
444+
self
445+
}
414446
}
415447

416448
/// Binary expression
@@ -1278,6 +1310,27 @@ impl Expr {
12781310
Expr::Alias(Alias::new(self, None::<&str>, name.into()))
12791311
}
12801312

1313+
/// Return `self AS name` alias expression with metadata
1314+
///
1315+
/// The metadata will be attached to the Arrow Schema field when the expression
1316+
/// is converted to a field via `Expr.to_field()`.
1317+
///
1318+
/// # Example
1319+
/// ```
1320+
/// # use datafusion_expr::col;
1321+
/// use std::collections::HashMap;
1322+
/// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1323+
/// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
1324+
/// ```
1325+
///
1326+
pub fn alias_with_metadata(
1327+
self,
1328+
name: impl Into<String>,
1329+
metadata: Option<std::collections::HashMap<String, String>>,
1330+
) -> Expr {
1331+
Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata))
1332+
}
1333+
12811334
/// Return `self AS name` alias expression with a specific qualifier
12821335
pub fn alias_qualified(
12831336
self,
@@ -1287,6 +1340,28 @@ impl Expr {
12871340
Expr::Alias(Alias::new(self, relation, name.into()))
12881341
}
12891342

1343+
/// Return `self AS name` alias expression with a specific qualifier and metadata
1344+
///
1345+
/// The metadata will be attached to the Arrow Schema field when the expression
1346+
/// is converted to a field via `Expr.to_field()`.
1347+
///
1348+
/// # Example
1349+
/// ```
1350+
/// # use datafusion_expr::col;
1351+
/// use std::collections::HashMap;
1352+
/// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1353+
/// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
1354+
/// ```
1355+
///
1356+
pub fn alias_qualified_with_metadata(
1357+
self,
1358+
relation: Option<impl Into<TableReference>>,
1359+
name: impl Into<String>,
1360+
metadata: Option<std::collections::HashMap<String, String>>,
1361+
) -> Expr {
1362+
Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata))
1363+
}
1364+
12901365
/// Remove an alias from an expression if one exists.
12911366
///
12921367
/// If the expression is not an alias, the expression is returned unchanged.
@@ -1738,11 +1813,13 @@ impl NormalizeEq for Expr {
17381813
expr: self_expr,
17391814
relation: self_relation,
17401815
name: self_name,
1816+
..
17411817
}),
17421818
Expr::Alias(Alias {
17431819
expr: other_expr,
17441820
relation: other_relation,
17451821
name: other_name,
1822+
..
17461823
}),
17471824
) => {
17481825
self_name == other_name
@@ -2088,6 +2165,7 @@ impl HashNode for Expr {
20882165
expr: _expr,
20892166
relation,
20902167
name,
2168+
..
20912169
}) => {
20922170
relation.hash(state);
20932171
name.hash(state);

datafusion/expr/src/expr_schema.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,16 @@ impl ExprSchemable for Expr {
343343
fn metadata(&self, schema: &dyn ExprSchema) -> Result<HashMap<String, String>> {
344344
match self {
345345
Expr::Column(c) => Ok(schema.metadata(c)?.clone()),
346-
Expr::Alias(Alias { expr, .. }) => expr.metadata(schema),
346+
Expr::Alias(Alias { expr, metadata, .. }) => {
347+
let mut ret = expr.metadata(schema)?;
348+
if let Some(metadata) = metadata {
349+
if !metadata.is_empty() {
350+
ret.extend(metadata.clone());
351+
return Ok(ret);
352+
}
353+
}
354+
Ok(ret)
355+
}
347356
Expr::Cast(Cast { expr, .. }) => expr.metadata(schema),
348357
_ => Ok(HashMap::new()),
349358
}

datafusion/expr/src/tree_node.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,10 @@ impl TreeNode for Expr {
132132
expr,
133133
relation,
134134
name,
135-
}) => f(*expr)?.update_data(|e| e.alias_qualified(relation, name)),
135+
metadata,
136+
}) => f(*expr)?.update_data(|e| {
137+
e.alias_qualified_with_metadata(relation, name, metadata)
138+
}),
136139
Expr::InSubquery(InSubquery {
137140
expr,
138141
subquery,

datafusion/optimizer/src/optimize_projections/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,11 @@ fn merge_consecutive_projections(proj: Projection) -> Result<Transformed<Project
492492
expr,
493493
relation,
494494
name,
495+
metadata,
495496
}) => rewrite_expr(*expr, &prev_projection).map(|result| {
496-
result.update_data(|expr| Expr::Alias(Alias::new(expr, relation, name)))
497+
result.update_data(|expr| {
498+
Expr::Alias(Alias::new(expr, relation, name).with_metadata(metadata))
499+
})
497500
}),
498501
e => rewrite_expr(e, &prev_projection),
499502
}

datafusion/proto/proto/datafusion.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ message AliasNode {
484484
LogicalExprNode expr = 1;
485485
string alias = 2;
486486
repeated TableReference relation = 3;
487+
map<string, string> metadata = 4;
487488
}
488489

489490
message BinaryExprNode {

datafusion/proto/src/generated/pbjson.rs

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/generated/prost.rs

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/logical_plan/to_proto.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
//! DataFusion logical plans to be serialized and transmitted between
2020
//! processes.
2121
22+
use std::collections::HashMap;
23+
2224
use datafusion_common::{TableReference, UnnestOptions};
2325
use datafusion_expr::dml::InsertOp;
2426
use datafusion_expr::expr::{
@@ -200,6 +202,7 @@ pub fn serialize_expr(
200202
expr,
201203
relation,
202204
name,
205+
metadata,
203206
}) => {
204207
let alias = Box::new(protobuf::AliasNode {
205208
expr: Some(Box::new(serialize_expr(expr.as_ref(), codec)?)),
@@ -208,6 +211,7 @@ pub fn serialize_expr(
208211
.map(|r| vec![r.into()])
209212
.unwrap_or(vec![]),
210213
alias: name.to_owned(),
214+
metadata: metadata.to_owned().unwrap_or(HashMap::new()),
211215
});
212216
protobuf::LogicalExprNode {
213217
expr_type: Some(ExprType::Alias(alias)),

datafusion/sql/src/unparser/rewrite.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ pub(super) fn inject_column_aliases(
363363
expr: Box::new(expr.clone()),
364364
relation,
365365
name: col_alias.value,
366+
metadata: None,
366367
})
367368
})
368369
.collect::<Vec<_>>();

0 commit comments

Comments
 (0)