diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index edaa49ec6e7e..1d28989a2154 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -542,6 +542,21 @@ jobs: # If you encounter an error, run './dev/update_config_docs.sh' and commit ./dev/update_config_docs.sh git diff --exit-code + - name: Check if aggregate_functions.md has been modified + run: | + # If you encounter an error, run './dev/update_aggregate_docs.sh' and commit + ./dev/update_aggregate_docs.sh + git diff --exit-code + - name: Check if scalar_functions.md has been modified + run: | + # If you encounter an error, run './dev/update_scalar_docs.sh' and commit + ./dev/update_scalar_docs.sh + git diff --exit-code + - name: Check if window_functions.md has been modified + run: | + # If you encounter an error, run './dev/update_window_docs.sh' and commit + ./dev/update_window_docs.sh + git diff --exit-code # Verify MSRV for the crates which are directly used by other projects: # - datafusion diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index fbe7d5c04b9b..179d410e185e 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1345,6 +1345,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", + "indexmap", "paste", "serde_json", "sqlparser", @@ -1376,6 +1377,7 @@ dependencies = [ "datafusion-expr", "hashbrown", "hex", + "indexmap", "itertools", "log", "md-5", @@ -1400,6 +1402,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "half", + "indexmap", "log", "paste", "sqlparser", diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 55387fea22ee..d7dc1afe4d50 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -48,6 +48,7 @@ datafusion-expr-common = { workspace = true } datafusion-functions-aggregate-common = { workspace = true } datafusion-functions-window-common = { workspace = true } datafusion-physical-expr-common = { workspace = true } +indexmap = { workspace = true } paste = "^1.0" serde_json = { workspace = true } sqlparser = { workspace = true } diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index 260065f69af9..aba2b1e38527 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -34,6 +34,7 @@ mod partition_evaluator; mod table_source; mod udaf; mod udf; +mod udf_docs; mod udwf; pub mod conditional_expressions; @@ -90,9 +91,10 @@ pub use logical_plan::*; pub use partition_evaluator::PartitionEvaluator; pub use sqlparser; pub use table_source::{TableProviderFilterPushDown, TableSource, TableType}; -pub use udaf::{AggregateUDF, AggregateUDFImpl, ReversedUDAF}; -pub use udf::{ScalarUDF, ScalarUDFImpl}; -pub use udwf::{WindowUDF, WindowUDFImpl}; +pub use udaf::{aggregate_doc_sections, AggregateUDF, AggregateUDFImpl, ReversedUDAF}; +pub use udf::{scalar_doc_sections, ScalarUDF, ScalarUDFImpl}; +pub use udf_docs::{DocSection, Documentation, DOCUMENTATION_NONE, DOC_SECTION_NONE}; +pub use udwf::{window_doc_sections, WindowUDF, WindowUDFImpl}; pub use window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits}; #[cfg(test)] diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index e3ef672daf5f..782e62618bf7 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -33,10 +33,11 @@ use crate::function::{ AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs, }; use crate::groups_accumulator::GroupsAccumulator; +use crate::udf_docs::DOCUMENTATION_NONE; use crate::utils::format_state_name; use crate::utils::AggregateOrderSensitivity; -use crate::Signature; use crate::{Accumulator, Expr}; +use crate::{Documentation, Signature}; /// Logical representation of a user-defined [aggregate function] (UDAF). /// @@ -248,6 +249,11 @@ impl AggregateUDF { pub fn default_value(&self, data_type: &DataType) -> Result { self.inner.default_value(data_type) } + + /// Returns this UDF's documentation that will be used to generate public documentation + pub fn documentation(&self) -> &Documentation { + self.inner.documentation() + } } impl From for AggregateUDF @@ -274,19 +280,31 @@ where /// # use std::any::Any; /// # use arrow::datatypes::DataType; /// # use datafusion_common::{DataFusionError, plan_err, Result}; -/// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility, Expr}; +/// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility, Expr, Documentation}; /// # use datafusion_expr::{AggregateUDFImpl, AggregateUDF, Accumulator, function::{AccumulatorArgs, StateFieldsArgs}}; +/// # use datafusion_expr::window_doc_sections::DOC_SECTION_AGGREGATE; /// # use arrow::datatypes::Schema; /// # use arrow::datatypes::Field; +/// # use indexmap::IndexMap; +/// /// #[derive(Debug, Clone)] /// struct GeoMeanUdf { -/// signature: Signature +/// signature: Signature, +/// documentation: Documentation, /// } /// /// impl GeoMeanUdf { /// fn new() -> Self { /// Self { -/// signature: Signature::uniform(1, vec![DataType::Float64], Volatility::Immutable) +/// signature: Signature::uniform(1, vec![DataType::Float64], Volatility::Immutable), +/// documentation: Documentation { +/// doc_section: DOC_SECTION_AGGREGATE, +/// description: "calculates a geometric mean", +/// syntax_example: "geo_mean(2.0)", +/// sql_example: None, +/// arguments: Some(IndexMap::from([("arg_1", "The Float64 number for the geometric mean")])), +/// related_udfs: None, +/// } /// } /// } /// } @@ -298,7 +316,7 @@ where /// fn signature(&self) -> &Signature { &self.signature } /// fn return_type(&self, args: &[DataType]) -> Result { /// if !matches!(args.get(0), Some(&DataType::Float64)) { -/// return plan_err!("add_one only accepts Float64 arguments"); +/// return plan_err!("geo_mean only accepts Float64 arguments"); /// } /// Ok(DataType::Float64) /// } @@ -310,6 +328,9 @@ where /// Field::new("ordering", DataType::UInt32, true) /// ]) /// } +/// fn documentation(&self) -> Documentation { +/// &self.documentation +/// } /// } /// /// // Create a new AggregateUDF from the implementation @@ -564,6 +585,12 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { fn default_value(&self, data_type: &DataType) -> Result { ScalarValue::try_from(data_type) } + + /// Returns the documentation for this Aggregate UDF for use + /// in generating publicly facing documentation. + fn documentation(&self) -> &Documentation { + &DOCUMENTATION_NONE + } } impl PartialEq for dyn AggregateUDFImpl { @@ -710,6 +737,41 @@ impl AggregateUDFImpl for AliasedAggregateUDFImpl { fn is_descending(&self) -> Option { self.inner.is_descending() } + + fn documentation(&self) -> &Documentation { + self.inner.documentation() + } +} + +// Aggregate UDF doc sections for use in public documentation +pub mod aggregate_doc_sections { + use crate::DocSection; + + pub fn doc_sections() -> Vec { + vec![ + DOC_SECTION_GENERAL, + DOC_SECTION_STATISTICAL, + DOC_SECTION_APPROXIMATE, + ] + } + + pub const DOC_SECTION_GENERAL: DocSection = DocSection { + include: true, + label: "General Functions", + description: None, + }; + + pub const DOC_SECTION_STATISTICAL: DocSection = DocSection { + include: true, + label: "Statistical Functions", + description: None, + }; + + pub const DOC_SECTION_APPROXIMATE: DocSection = DocSection { + include: true, + label: "Approximate Functions", + description: None, + }; } #[cfg(test)] diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 938e1181d85d..daa4b33cf3b3 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -20,7 +20,10 @@ use crate::expr::schema_name_from_exprs_comma_seperated_without_space; use crate::simplify::{ExprSimplifyResult, SimplifyInfo}; use crate::sort_properties::{ExprProperties, SortProperties}; -use crate::{ColumnarValue, Expr, ScalarFunctionImplementation, Signature}; +use crate::udf_docs::DOCUMENTATION_NONE; +use crate::{ + ColumnarValue, Documentation, Expr, ScalarFunctionImplementation, Signature, +}; use arrow::datatypes::DataType; use datafusion_common::{not_impl_err, ExprSchema, Result}; use datafusion_expr_common::interval_arithmetic::Interval; @@ -274,6 +277,11 @@ impl ScalarUDF { pub fn coerce_types(&self, arg_types: &[DataType]) -> Result> { self.inner.coerce_types(arg_types) } + + /// Returns this UDF's documentation that will be used to generate public documentation + pub fn documentation(&self) -> &Documentation { + self.inner.documentation() + } } impl From for ScalarUDF @@ -299,18 +307,30 @@ where /// ``` /// # use std::any::Any; /// # use arrow::datatypes::DataType; +/// # use indexmap::IndexMap; /// # use datafusion_common::{DataFusionError, plan_err, Result}; -/// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility}; +/// # use datafusion_expr::{col, ColumnarValue, Documentation, Signature, Volatility}; /// # use datafusion_expr::{ScalarUDFImpl, ScalarUDF}; +/// # use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +/// /// #[derive(Debug)] /// struct AddOne { -/// signature: Signature +/// signature: Signature, +/// documentation: Documentation, /// } /// /// impl AddOne { /// fn new() -> Self { /// Self { -/// signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable) +/// signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable), +/// documentation: Documentation { +/// doc_section: DOC_SECTION_MATH, +/// description: "Add one to an int32", +/// syntax_example: "add_one(2)", +/// sql_example: None, +/// arguments: Some(IndexMap::from([("arg_1", "The int32 number to add one to")])), +/// related_udfs: None, +/// } /// } /// } /// } @@ -328,6 +348,9 @@ where /// } /// // The actual implementation would add one to the argument /// fn invoke(&self, args: &[ColumnarValue]) -> Result { unimplemented!() } +/// fn documentation(&self) -> Documentation { +/// &self.documentation +/// } /// } /// /// // Create a new ScalarUDF from the implementation @@ -596,6 +619,12 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { self.signature().hash(hasher); hasher.finish() } + + /// Returns the documentation for this scalar UDF for use + /// in generating publicly facing documentation. + fn documentation(&self) -> &Documentation { + &DOCUMENTATION_NONE + } } /// ScalarUDF that adds an alias to the underlying function. It is better to @@ -709,4 +738,100 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { self.aliases.hash(hasher); hasher.finish() } + + fn documentation(&self) -> &Documentation { + self.inner.documentation() + } +} + +// Scalar UDF doc sections for use in public documentation +pub mod scalar_doc_sections { + use crate::DocSection; + + pub fn doc_sections() -> Vec { + vec![ + DOC_SECTION_MATH, + DOC_SECTION_CONDITIONAL, + DOC_SECTION_STRING, + DOC_SECTION_BINARY_STRING, + DOC_SECTION_REGEX, + DOC_SECTION_DATETIME, + DOC_SECTION_ARRAY, + DOC_SECTION_STRUCT, + DOC_SECTION_MAP, + DOC_SECTION_HASHING, + DOC_SECTION_OTHER, + ] + } + + pub const DOC_SECTION_MATH: DocSection = DocSection { + include: true, + label: "Math Functions", + description: None, + }; + + pub const DOC_SECTION_CONDITIONAL: DocSection = DocSection { + include: true, + label: "Conditional Functions", + description: None, + }; + + pub const DOC_SECTION_STRING: DocSection = DocSection { + include: true, + label: "String Functions", + description: None, + }; + + pub const DOC_SECTION_BINARY_STRING: DocSection = DocSection { + include: true, + label: "Binary String Functions", + description: None, + }; + + pub const DOC_SECTION_REGEX: DocSection = DocSection { + include: true, + label: "Regular Expression Functions", + description: Some( + r#"Apache DataFusion uses a [PCRE-like](https://en.wikibooks.org/wiki/Regular_Expressions/Perl-Compatible_Regular_Expressions) +regular expression [syntax](https://docs.rs/regex/latest/regex/#syntax) +(minus support for several features including look-around and backreferences). +The following regular expression functions are supported:"#, + ), + }; + + pub const DOC_SECTION_DATETIME: DocSection = DocSection { + include: true, + label: "Time and Date Functions", + description: None, + }; + + pub const DOC_SECTION_ARRAY: DocSection = DocSection { + include: true, + label: "Array Functions", + description: None, + }; + + pub const DOC_SECTION_STRUCT: DocSection = DocSection { + include: true, + label: "Struct Functions", + description: None, + }; + + pub const DOC_SECTION_MAP: DocSection = DocSection { + include: true, + label: "Map Functions", + description: None, + }; + + pub const DOC_SECTION_HASHING: DocSection = DocSection { + include: true, + label: "Hashing Functions", + description: None, + }; + + pub const DOC_SECTION_OTHER: DocSection = DocSection { + include: true, + label: "Other Functions", + description: None, + }; } diff --git a/datafusion/expr/src/udf_docs.rs b/datafusion/expr/src/udf_docs.rs new file mode 100644 index 000000000000..32c03bda385f --- /dev/null +++ b/datafusion/expr/src/udf_docs.rs @@ -0,0 +1,64 @@ +use indexmap::IndexMap; + +/// Documentation for use by [`crate::ScalarUDFImpl`], +/// [`crate::AggregateUDFImpl`] and [`crate::WindowUDFImpl`] functions +/// that will be used to generate public documentation. +/// +/// The name of the udf will be pulled from the [`crate::ScalarUDFImpl::name`], +/// [`crate::AggregateUDFImpl::name`] or [`crate::WindowUDFImpl::name`] function +/// as appropriate. +/// +/// All strings in the documentation are required to be +/// in [markdown format](https://www.markdownguide.org/basic-syntax/). +/// +/// Currently, documentation only supports a single language +/// thus all text should be in English. +#[derive(Debug, Clone)] +pub struct Documentation { + /// the section in the documentation where the UDF will be documented + pub doc_section: DocSection, + /// the description for the UDF + pub description: &'static str, + pub syntax_example: &'static str, + /// a sql example for the UDF, usually in the form of a sql prompt + /// query and output. It is strongly recommended to provide an + /// example for anything but the most basic UDF's + pub sql_example: Option<&'static str>, + /// arguments for the UDF which will be displayed in insertion + /// order. Key is the argument name, value is a description for + /// the argument + pub arguments: Option>, + /// related functions if any. Values should match the related + /// udf's name exactly. Related udf's must be of the same + /// UDF type (scalar, aggregate or window) for proper linking to + /// occur + pub related_udfs: Option>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct DocSection { + /// true to include this doc section in the public + /// documentation, false otherwise + pub include: bool, + /// a display label for the doc section. For example: "Math Expressions" + pub label: &'static str, + /// an optional description for the doc section + pub description: Option<&'static str>, +} + +pub const DOCUMENTATION_NONE: Documentation = Documentation { + doc_section: DOC_SECTION_NONE, + description: "", + syntax_example: "", + sql_example: None, + arguments: None, + related_udfs: None, +}; + +/// A doc section that indicated the UDF should not +/// be publicly documented +pub const DOC_SECTION_NONE: DocSection = DocSection { + include: false, + label: "", + description: None, +}; diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 7cc57523a14d..f94413f1a3cd 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -32,8 +32,10 @@ use datafusion_common::{not_impl_err, Result}; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use crate::expr::WindowFunction; +use crate::udf_docs::DOCUMENTATION_NONE; use crate::{ - function::WindowFunctionSimplification, Expr, PartitionEvaluator, Signature, + function::WindowFunctionSimplification, Documentation, Expr, PartitionEvaluator, + Signature, }; /// Logical representation of a user-defined window function (UDWF) @@ -172,6 +174,11 @@ impl WindowUDF { pub fn coerce_types(&self, arg_types: &[DataType]) -> Result> { self.inner.coerce_types(arg_types) } + + /// Returns this UDF's documentation that will be used to generate public documentation + pub fn documentation(&self) -> &Documentation { + self.inner.documentation() + } } impl From for WindowUDF @@ -198,28 +205,40 @@ where /// # use std::any::Any; /// # use arrow::datatypes::{DataType, Field}; /// # use datafusion_common::{DataFusionError, plan_err, Result}; -/// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt}; +/// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt, Documentation}; /// # use datafusion_expr::{WindowUDFImpl, WindowUDF}; -/// use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # use datafusion_expr::window_doc_sections::DOC_SECTION_ANALYTICAL; +/// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # use indexmap::IndexMap; +/// /// #[derive(Debug, Clone)] /// struct SmoothIt { -/// signature: Signature +/// signature: Signature, +/// documentation: Documentation, /// } /// /// impl SmoothIt { /// fn new() -> Self { /// Self { -/// signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable) +/// signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable), +/// documentation: Documentation { +/// doc_section: DOC_SECTION_ANALYTICAL, +/// description: "smooths the windows", +/// syntax_example: "smooth_it(2)", +/// sql_example: None, +/// arguments: Some(IndexMap::from([("arg_1", "The int32 number to smooth by")])), +/// related_udfs: None, +/// } /// } /// } /// } /// -/// /// Implement the WindowUDFImpl trait for AddOne +/// /// Implement the WindowUDFImpl trait for SmoothIt /// impl WindowUDFImpl for SmoothIt { /// fn as_any(&self) -> &dyn Any { self } /// fn name(&self) -> &str { "smooth_it" } /// fn signature(&self) -> &Signature { &self.signature } -/// // The actual implementation would add one to the argument +/// // The actual implementation would smooth the window /// fn partition_evaluator(&self) -> Result> { unimplemented!() } /// fn field(&self, field_args: WindowUDFFieldArgs) -> Result { /// if let Some(DataType::Int32) = field_args.get_input_type(0) { @@ -228,6 +247,9 @@ where /// plan_err!("smooth_it only accepts Int32 arguments") /// } /// } +/// fn documentation(&self) -> &Documentation { +/// &self.documentation +/// } /// } /// /// // Create a new WindowUDF from the implementation @@ -351,6 +373,12 @@ pub trait WindowUDFImpl: Debug + Send + Sync { fn coerce_types(&self, _arg_types: &[DataType]) -> Result> { not_impl_err!("Function {} does not implement coerce_types", self.name()) } + + /// Returns the documentation for this window UDF for use + /// in generating publicly facing documentation. + fn documentation(&self) -> &Documentation { + &DOCUMENTATION_NONE + } } impl PartialEq for dyn WindowUDFImpl { @@ -439,6 +467,41 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { fn coerce_types(&self, arg_types: &[DataType]) -> Result> { self.inner.coerce_types(arg_types) } + + fn documentation(&self) -> &Documentation { + self.inner.documentation() + } +} + +// Window UDF doc sections for use in public documentation +pub mod window_doc_sections { + use crate::DocSection; + + pub fn doc_sections() -> Vec { + vec![ + DOC_SECTION_AGGREGATE, + DOC_SECTION_RANKING, + DOC_SECTION_ANALYTICAL, + ] + } + + pub const DOC_SECTION_AGGREGATE: DocSection = DocSection { + include: true, + label: "Aggregate Functions", + description: Some("All aggregate functions can be used as window functions."), + }; + + pub const DOC_SECTION_RANKING: DocSection = DocSection { + include: true, + label: "Ranking Functions", + description: None, + }; + + pub const DOC_SECTION_ANALYTICAL: DocSection = DocSection { + include: true, + label: "Analytical Functions", + description: None, + }; } #[cfg(test)] diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml index d78f68a2604e..1d3ec62a35dd 100644 --- a/datafusion/functions-aggregate/Cargo.toml +++ b/datafusion/functions-aggregate/Cargo.toml @@ -48,6 +48,7 @@ datafusion-functions-aggregate-common = { workspace = true } datafusion-physical-expr = { workspace = true } datafusion-physical-expr-common = { workspace = true } half = { workspace = true } +indexmap = { workspace = true } log = { workspace = true } paste = "1.0.14" sqlparser = { workspace = true } diff --git a/datafusion/functions-aggregate/src/bit_and_or_xor.rs b/datafusion/functions-aggregate/src/bit_and_or_xor.rs index aa65062e3330..4307a6d68f5d 100644 --- a/datafusion/functions-aggregate/src/bit_and_or_xor.rs +++ b/datafusion/functions-aggregate/src/bit_and_or_xor.rs @@ -17,6 +17,7 @@ //! Defines `BitAnd`, `BitOr`, `BitXor` and `BitXor DISTINCT` aggregate accumulators +use indexmap::IndexMap; use std::any::Any; use std::collections::HashSet; use std::fmt::{Display, Formatter}; @@ -35,9 +36,11 @@ use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; use datafusion_expr::type_coercion::aggregates::INTEGERS; use datafusion_expr::utils::format_state_name; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, GroupsAccumulator, ReversedUDAF, Signature, Volatility, + Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, ReversedUDAF, + Signature, Volatility, }; +use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL; use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator; use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign}; @@ -110,8 +113,9 @@ macro_rules! downcast_bitwise_accumulator { /// `EXPR_FN` identifier used to name the generated expression function. /// `AGGREGATE_UDF_FN` is an identifier used to name the underlying UDAF function. /// `OPR_TYPE` is an expression that evaluates to the type of bitwise operation to be performed. +/// `DOCUMENTATION` documentation for the UDAF macro_rules! make_bitwise_udaf_expr_and_func { - ($EXPR_FN:ident, $AGGREGATE_UDF_FN:ident, $OPR_TYPE:expr) => { + ($EXPR_FN:ident, $AGGREGATE_UDF_FN:ident, $OPR_TYPE:expr, $DOCUMENTATION:expr) => { make_udaf_expr!( $EXPR_FN, expr_x, @@ -125,14 +129,65 @@ macro_rules! make_bitwise_udaf_expr_and_func { create_func!( $EXPR_FN, $AGGREGATE_UDF_FN, - BitwiseOperation::new($OPR_TYPE, stringify!($EXPR_FN)) + BitwiseOperation::new($OPR_TYPE, stringify!($EXPR_FN), $DOCUMENTATION) ); }; } -make_bitwise_udaf_expr_and_func!(bit_and, bit_and_udaf, BitwiseOperationType::And); -make_bitwise_udaf_expr_and_func!(bit_or, bit_or_udaf, BitwiseOperationType::Or); -make_bitwise_udaf_expr_and_func!(bit_xor, bit_xor_udaf, BitwiseOperationType::Xor); +make_bitwise_udaf_expr_and_func!( + bit_and, + bit_and_udaf, + BitwiseOperationType::And, + Documentation { + doc_section: DOC_SECTION_GENERAL, + description: "Computes the bitwise AND of all non-null input values.", + syntax_example: "bit_and(expression)", + sql_example: None, + arguments: Some(IndexMap::from([ + ( + "expression", + "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.", + ), + ])), + related_udfs: None, + } +); +make_bitwise_udaf_expr_and_func!( + bit_or, + bit_or_udaf, + BitwiseOperationType::Or, + Documentation { + doc_section: DOC_SECTION_GENERAL, + description: "Computes the bitwise OR of all non-null input values.", + syntax_example: "bit_or(expression)", + sql_example: None, + arguments: Some(IndexMap::from([ + ( + "expression", + "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.", + ), + ])), + related_udfs: None, + } +); +make_bitwise_udaf_expr_and_func!( + bit_xor, + bit_xor_udaf, + BitwiseOperationType::Xor, + Documentation { + doc_section: DOC_SECTION_GENERAL, + description: "Computes the bitwise exclusive OR of all non-null input values.", + syntax_example: "bit_xor(expression)", + sql_example: None, + arguments: Some(IndexMap::from([ + ( + "expression", + "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.", + ), + ])), + related_udfs: None, + } +); /// The different types of bitwise operations that can be performed. #[derive(Debug, Clone, Eq, PartialEq)] @@ -155,14 +210,20 @@ struct BitwiseOperation { /// `operation` indicates the type of bitwise operation to be performed. operation: BitwiseOperationType, func_name: &'static str, + documentation: Documentation, } impl BitwiseOperation { - pub fn new(operator: BitwiseOperationType, func_name: &'static str) -> Self { + pub fn new( + operator: BitwiseOperationType, + func_name: &'static str, + documentation: Documentation, + ) -> Self { Self { operation: operator, signature: Signature::uniform(1, INTEGERS.to_vec(), Volatility::Immutable), func_name, + documentation, } } } @@ -239,6 +300,10 @@ impl AggregateUDFImpl for BitwiseOperation { fn reverse_expr(&self) -> ReversedUDAF { ReversedUDAF::Identical } + + fn documentation(&self) -> &Documentation { + &self.documentation + } } struct BitAndAccumulator { diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 7f348bf9d2a0..a50939639dc2 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -28,7 +28,10 @@ use datafusion_common::arrow::datatypes::DataType; use datafusion_common::arrow::datatypes::Field; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::expr::WindowFunction; -use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; +use datafusion_expr::window_doc_sections::DOC_SECTION_RANKING; +use datafusion_expr::{ + Documentation, Expr, PartitionEvaluator, Signature, Volatility, WindowUDFImpl, +}; use datafusion_functions_window_common::field; use field::WindowUDFFieldArgs; @@ -57,6 +60,7 @@ pub fn row_number_udwf() -> std::sync::Arc { #[derive(Debug)] pub struct RowNumber { signature: Signature, + documentation: Documentation, } impl RowNumber { @@ -64,6 +68,15 @@ impl RowNumber { pub fn new() -> Self { Self { signature: Signature::any(0, Volatility::Immutable), + documentation: Documentation { + doc_section: DOC_SECTION_RANKING, + description: + "Number of the current row within its partition, counting from 1.", + syntax_example: "row_number()", + sql_example: None, + arguments: None, + related_udfs: None, + }, } } } @@ -101,6 +114,10 @@ impl WindowUDFImpl for RowNumber { nulls_first: false, }) } + + fn documentation(&self) -> &Documentation { + &self.documentation + } } /// State for the `row_number` built-in window function. diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index ff1b926a9b82..0b21be6821b0 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -76,6 +76,7 @@ datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } hashbrown = { workspace = true, optional = true } hex = { version = "0.4", optional = true } +indexmap = { workspace = true } itertools = { workspace = true } log = { workspace = true } md-5 = { version = "^0.10.0", optional = true } diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs index 2fa6d7c197ad..d2d4b04872e4 100644 --- a/datafusion/functions/src/core/coalesce.rs +++ b/datafusion/functions/src/core/coalesce.rs @@ -22,14 +22,17 @@ use arrow::compute::kernels::zip::zip; use arrow::compute::{and, is_not_null, is_null}; use arrow::datatypes::DataType; use datafusion_common::{exec_err, ExprSchema, Result}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL; use datafusion_expr::type_coercion::binary::type_union_resolution; -use datafusion_expr::{ColumnarValue, Expr, ExprSchemable}; +use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use indexmap::IndexMap; use itertools::Itertools; #[derive(Debug)] pub struct CoalesceFunc { signature: Signature, + documentation: Documentation, } impl Default for CoalesceFunc { @@ -42,6 +45,19 @@ impl CoalesceFunc { pub fn new() -> Self { Self { signature: Signature::user_defined(Volatility::Immutable), + documentation: Documentation { + doc_section: DOC_SECTION_CONDITIONAL, + description: "Returns the first of its arguments that is not _null_. Returns _null_ if all arguments are _null_. This function is often used to substitute a default value for _null_ values.", + syntax_example: "coalesce(expression1[, ..., expression_n])", + sql_example: None, + arguments: Some(IndexMap::from([ + ( + "expression1, expression_n", + "Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary." + ), + ])), + related_udfs: None, + }, } } } @@ -140,6 +156,10 @@ impl ScalarUDFImpl for CoalesceFunc { .unwrap_or(arg_types.first().unwrap().clone()); Ok(vec![new_type; arg_types.len()]) } + + fn documentation(&self) -> &Documentation { + &self.documentation + } } #[cfg(test)] diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs index 2795c4a25004..813f51aef335 100644 --- a/datafusion/functions/src/crypto/sha224.rs +++ b/datafusion/functions/src/crypto/sha224.rs @@ -19,12 +19,17 @@ use super::basic::{sha224, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use indexmap::IndexMap; use std::any::Any; #[derive(Debug)] pub struct SHA224Func { signature: Signature, + documentation: Documentation, } impl Default for SHA224Func { fn default() -> Self { @@ -41,6 +46,19 @@ impl SHA224Func { vec![Utf8, LargeUtf8, Binary, LargeBinary], Volatility::Immutable, ), + documentation: Documentation { + doc_section: DOC_SECTION_HASHING, + description: "Computes the SHA-224 hash of a binary string.", + syntax_example: "sha224(expression)", + sql_example: None, + arguments: Some(IndexMap::from([ + ( + "expression", + "String expression to operate on. Can be a constant, column, or function, and any combination of string operators." + ), + ])), + related_udfs: None, + } } } } @@ -60,7 +78,12 @@ impl ScalarUDFImpl for SHA224Func { fn return_type(&self, arg_types: &[DataType]) -> Result { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } + fn invoke(&self, args: &[ColumnarValue]) -> Result { sha224(args) } + + fn documentation(&self) -> &Documentation { + &self.documentation + } } diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index 288641b84dd7..5b72f7d30705 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -17,19 +17,23 @@ use std::any::Any; +use crate::datetime::common::*; use arrow::datatypes::DataType; use arrow::datatypes::DataType::Date32; use arrow::error::ArrowError::ParseError; use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser}; - -use crate::datetime::common::*; use datafusion_common::error::DataFusionError; use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use indexmap::IndexMap; #[derive(Debug)] pub struct ToDateFunc { signature: Signature, + documentation: Documentation, } impl Default for ToDateFunc { @@ -42,6 +46,49 @@ impl ToDateFunc { pub fn new() -> Self { Self { signature: Signature::variadic_any(Volatility::Immutable), + documentation: Documentation { + doc_section: DOC_SECTION_DATETIME, + description: r#"Converts a value to a date (`YYYY-MM-DD`). +Supports strings, integer and double types as input. +Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. +Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`). +Returns the corresponding date. + +Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`. +"#, + syntax_example: "to_date('2017-05-31', '%Y-%m-%d')", + sql_example: Some( + r#"```sql +> select to_date('2023-01-31'); ++-----------------------------+ +| to_date(Utf8("2023-01-31")) | ++-----------------------------+ +| 2023-01-31 | ++-----------------------------+ +> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d'); ++---------------------------------------------------------------+ +| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) | ++---------------------------------------------------------------+ +| 2023-01-31 | ++---------------------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs) +"#), + arguments: Some(IndexMap::from([ + ( + "expression", + "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + ( + "format_n", + "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order + they appear with the first successful one being returned. If none of the formats successfully parse the expression + an error will be returned.", + ) + ])), + related_udfs: None, + } } } @@ -117,6 +164,10 @@ impl ScalarUDFImpl for ToDateFunc { } } } + + fn documentation(&self) -> &Documentation { + &self.documentation + } } #[cfg(test)] diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs index 5b80c908cfc3..ba66f554d219 100644 --- a/datafusion/functions/src/encoding/inner.rs +++ b/datafusion/functions/src/encoding/inner.rs @@ -28,16 +28,19 @@ use datafusion_common::{ }; use datafusion_common::{exec_err, ScalarValue}; use datafusion_common::{DataFusionError, Result}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, Documentation}; use std::sync::Arc; use std::{fmt, str::FromStr}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_BINARY_STRING; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use indexmap::IndexMap; use std::any::Any; #[derive(Debug)] pub struct EncodeFunc { signature: Signature, + documentation: Documentation, } impl Default for EncodeFunc { @@ -50,6 +53,17 @@ impl EncodeFunc { pub fn new() -> Self { Self { signature: Signature::user_defined(Volatility::Immutable), + documentation: Documentation { + doc_section: DOC_SECTION_BINARY_STRING, + description: "Encode binary data into a textual representation.", + syntax_example: "encode(expression, format)", + sql_example: None, + arguments: Some(IndexMap::from([ + ("expression", "Expression containing string or binary data"), + ("format", "Supported formats are: `base64`, `hex`"), + ])), + related_udfs: Some(vec!["decode"]), + }, } } } @@ -100,11 +114,16 @@ impl ScalarUDFImpl for EncodeFunc { ), } } + + fn documentation(&self) -> &Documentation { + &self.documentation + } } #[derive(Debug)] pub struct DecodeFunc { signature: Signature, + documentation: Documentation, } impl Default for DecodeFunc { @@ -117,6 +136,17 @@ impl DecodeFunc { pub fn new() -> Self { Self { signature: Signature::user_defined(Volatility::Immutable), + documentation: Documentation { + doc_section: DOC_SECTION_BINARY_STRING, + description: "Decode binary data from textual representation in string.", + syntax_example: "decode(expression, format)", + sql_example: None, + arguments: Some(IndexMap::from([ + ("expression", "Expression containing encoded string data"), + ("format", "Same arguments as [encode](#encode)"), + ])), + related_udfs: Some(vec!["encode"]), + }, } } } @@ -167,6 +197,10 @@ impl ScalarUDFImpl for DecodeFunc { ), } } + + fn documentation(&self) -> &Documentation { + &self.documentation + } } #[derive(Debug, Copy, Clone)] diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index ad7cff1f7149..66869e803886 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -29,14 +29,19 @@ use datafusion_common::{ ScalarValue, }; use datafusion_expr::expr::ScalarFunction; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; -use datafusion_expr::{lit, ColumnarValue, Expr, ScalarUDF, TypeSignature::*}; +use datafusion_expr::{ + lit, ColumnarValue, Documentation, Expr, ScalarUDF, TypeSignature::*, +}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use indexmap::IndexMap; #[derive(Debug)] pub struct LogFunc { signature: Signature, + documentation: Documentation, } impl Default for LogFunc { @@ -58,6 +63,24 @@ impl LogFunc { ], Volatility::Immutable, ), + documentation: Documentation { + doc_section: DOC_SECTION_MATH, + description: "Returns the base-x logarithm of a number. Can either provide a specified base, or if omitted then takes the base-10 of a number.", + syntax_example: r#"log(base, numeric_expression) +log(numeric_expression)"#, + sql_example: None, + arguments: Some(IndexMap::from([ + ( + "base", + "Base numeric expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + ( + "numeric_expression", + "Numeric expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + ])), + related_udfs: None, + } } } } @@ -164,6 +187,10 @@ impl ScalarUDFImpl for LogFunc { Ok(ColumnarValue::Array(arr)) } + fn documentation(&self) -> &Documentation { + &self.documentation + } + /// Simplify the `log` function by the relevant rules: /// 1. Log(a, 1) ===> 0 /// 2. Log(a, Power(a, b)) ===> b diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs index 20029ba005c4..9e74a86f1e52 100644 --- a/datafusion/functions/src/regex/regexplike.rs +++ b/datafusion/functions/src/regex/regexplike.rs @@ -25,15 +25,18 @@ use datafusion_common::{arrow_datafusion_err, plan_err}; use datafusion_common::{ cast::as_generic_string_array, internal_err, DataFusionError, Result, }; -use datafusion_expr::ColumnarValue; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX; use datafusion_expr::TypeSignature::*; +use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use indexmap::IndexMap; use std::any::Any; use std::sync::Arc; #[derive(Debug)] pub struct RegexpLikeFunc { signature: Signature, + documentation: Documentation, } impl Default for RegexpLikeFunc { fn default() -> Self { @@ -54,6 +57,46 @@ impl RegexpLikeFunc { ], Volatility::Immutable, ), + documentation: Documentation { + doc_section: DOC_SECTION_REGEX, + description: "Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise.", + syntax_example: "regexp_like(str, regexp[, flags])", + sql_example: Some( + r#"```sql +select regexp_like('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); ++--------------------------------------------------------+ +| regexp_like(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | ++--------------------------------------------------------+ +| true | ++--------------------------------------------------------+ +SELECT regexp_like('aBc', '(b|d)', 'i'); ++--------------------------------------------------+ +| regexp_like(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | ++--------------------------------------------------+ +| true | ++--------------------------------------------------+ +``` +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) +"#), + arguments: Some(IndexMap::from([ + ( + "str", + "String expression to operate on. Can be a constant, column, or function, and any combination of string operators." + ), + ( "regexp", + "Regular expression to test against the string expression. Can be a constant, column, or function." + ), + ("flags", + r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*?"# + ) + ])), + related_udfs: None, + } } } } @@ -105,6 +148,10 @@ impl ScalarUDFImpl for RegexpLikeFunc { result.map(ColumnarValue::Array) } } + + fn documentation(&self) -> &Documentation { + &self.documentation + } } fn regexp_like_func(args: &[ArrayRef]) -> Result { match args[0].data_type() { diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index 68ba3f5ff15f..ca09ada0fed6 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -20,14 +20,17 @@ use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array}; use arrow::datatypes::DataType; use arrow::error::ArrowError; use datafusion_common::{internal_err, Result}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use indexmap::IndexMap; use std::any::Any; use std::sync::Arc; #[derive(Debug)] pub struct AsciiFunc { signature: Signature, + documentation: Documentation, } impl Default for AsciiFunc { @@ -45,6 +48,19 @@ impl AsciiFunc { vec![Utf8, LargeUtf8, Utf8View], Volatility::Immutable, ), + documentation: Documentation { + doc_section: DOC_SECTION_STRING, + description: "Returns the ASCII value of the first character in a string.", + syntax_example: "ascii(str)", + sql_example: None, + arguments: Some(IndexMap::from([ + ( + "str", + "String expression to operate on. Can be a constant, column, or function that evaluates to or can be coerced to a Utf8, LargeUtf8 or a Utf8View." + ) + ])), + related_udfs: Some(vec!["chr"]), + }, } } } @@ -71,6 +87,10 @@ impl ScalarUDFImpl for AsciiFunc { fn invoke(&self, args: &[ColumnarValue]) -> Result { make_scalar_function(ascii, vec![])(args) } + + fn documentation(&self) -> &Documentation { + &self.documentation + } } fn calculate_ascii<'a, V>(array: V) -> Result diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs index c1d6f327928f..1867e7cfecae 100644 --- a/datafusion/functions/src/unicode/rpad.rs +++ b/datafusion/functions/src/unicode/rpad.rs @@ -25,8 +25,12 @@ use arrow::datatypes::DataType; use datafusion_common::cast::as_int64_array; use datafusion_common::DataFusionError; use datafusion_common::{exec_err, Result}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use indexmap::IndexMap; use std::any::Any; use std::fmt::Write; use std::sync::Arc; @@ -36,6 +40,7 @@ use DataType::{LargeUtf8, Utf8, Utf8View}; #[derive(Debug)] pub struct RPadFunc { signature: Signature, + documentation: Documentation, } impl Default for RPadFunc { @@ -65,6 +70,27 @@ impl RPadFunc { ], Volatility::Immutable, ), + documentation: Documentation { + doc_section: DOC_SECTION_STRING, + description: "Pads the right side of a string with another string to a specified string length.", + syntax_example: "rpad(str, n[, padding_str])", + sql_example: None, + arguments: Some(IndexMap::from([ + ( + "str", + "String expression to operate on. Can be a constant, column, or function, and any combination of string operators." + ), + ( + "n", + "String length to pad to." + ), + ( + "padding_str", + "String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._" + ), + ])), + related_udfs: Some(vec!["lpad"]), + }, } } } @@ -113,6 +139,10 @@ impl ScalarUDFImpl for RPadFunc { } } } + + fn documentation(&self) -> &Documentation { + &self.documentation + } } pub fn rpad( diff --git a/dev/update_config_docs.sh b/dev/update_config_docs.sh index 836ba6772eac..585cb77839f9 100755 --- a/dev/update_config_docs.sh +++ b/dev/update_config_docs.sh @@ -24,7 +24,7 @@ SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "${SOURCE_DIR}/../" && pwd TARGET_FILE="docs/source/user-guide/configs.md" -PRINT_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_config_docs" +PRINT_CONFIG_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_config_docs" echo "Inserting header" cat <<'EOF' > "$TARGET_FILE" @@ -67,8 +67,8 @@ Environment variables are read during `SessionConfig` initialisation so they mus EOF -echo "Running CLI and inserting docs table" -$PRINT_DOCS_COMMAND >> "$TARGET_FILE" +echo "Running CLI and inserting config docs table" +$PRINT_CONFIG_DOCS_COMMAND >> "$TARGET_FILE" echo "Running prettier" npx prettier@2.3.2 --write "$TARGET_FILE" diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md index c8f0ffbec701..ababb001f5c5 100644 --- a/docs/source/user-guide/expressions.md +++ b/docs/source/user-guide/expressions.md @@ -69,7 +69,7 @@ value ::: :::{note} -Since `&&` and `||` are existed as logical operators in Rust, but those are not overloadable and not works with expression API. +Since `&&` and `||` are logical operators in Rust and cannot be overloaded these are not available in the expression API. ::: ## Bitwise Expressions @@ -151,7 +151,7 @@ but these operators always return a `bool` which makes them not work with the ex | trunc(x) | truncate toward zero | :::{note} -Unlike to some databases the math functions in Datafusion works the same way as Rust math functions, avoiding failing on corner cases e.g +Unlike to some databases the math functions in Datafusion works the same way as Rust math functions, avoiding failing on corner cases e.g. ```sql select log(-1), log(0), sqrt(-1); diff --git a/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/core/src/bin/print_aggregate_functions_docs.rs b/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/core/src/bin/print_aggregate_functions_docs.rs new file mode 100644 index 000000000000..9f1661cfd6a6 --- /dev/null +++ b/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/core/src/bin/print_aggregate_functions_docs.rs @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::execution::SessionStateDefaults; +use datafusion_expr::aggregate_doc_sections::doc_sections; +use datafusion_expr::AggregateUDF; +use itertools::Itertools; +use std::fmt::Write as _; +use std::sync::Arc; + +fn main() { + let functions = SessionStateDefaults::default_aggregate_functions(); + let mut docs = "".to_string(); + + // doc sections only includes sections that have 'include' == true + for doc_section in doc_sections() { + // make sure there is a function that is in this doc section + if !functions + .iter() + .any(|f| f.documentation().doc_section == doc_section) + { + continue; + } + + // write out section header + let _ = writeln!(&mut docs, "## {} ", doc_section.label); + + if let Some(description) = doc_section.description { + let _ = writeln!(&mut docs, "{description}"); + } + + let filtered = functions + .clone() + .into_iter() + .filter(|f| f.documentation().doc_section == doc_section) + .collect_vec(); + + // names is a sorted list of function names and aliases since we display + // both in the documentation + let names = get_names_and_aliases(&filtered); + + // write out the list of function names and aliases + names.iter().for_each(|name| { + let _ = writeln!(&mut docs, "- [{name}](#{name})"); + }); + + // write out each function and alias in the order of the sorted name list + for name in names { + let f = filtered + .iter() + .find(|f| f.name() == name || f.aliases().contains(&name)) + .unwrap(); + let documentation = f.documentation(); + + // if this name is an alias we need to display what it's an alias of + if f.aliases().contains(&name) { + let _ = write!(&mut docs, "_Alias of [{name}](#{name})._"); + continue; + } + + // otherwise display the documentation for the function + + // first, the name, description and syntax example + let _ = write!( + &mut docs, + r#" +### `{}` + +{} + +``` +{} +``` +"#, + f.name(), + documentation.description, + documentation.syntax_example + ); + + // next, arguments + if let Some(args) = &documentation.arguments { + let _ = writeln!(&mut docs, "#### Arguments\n"); + for (arg_name, arg_desc) in args { + let _ = writeln!(&mut docs, "- **{arg_name}**: {arg_desc}"); + } + } + + // next, sql example if provided + if let Some(example) = documentation.sql_example { + let _ = writeln!( + &mut docs, + r#" +#### Example + +{} +"#, + example + ); + } + + // next, aliases + if !f.aliases().is_empty() { + let _ = write!(&mut docs, "#### Aliases"); + + for alias in f.aliases() { + let _ = writeln!(&mut docs, "- {alias}"); + } + } + + // finally, any related udfs + if let Some(related_udfs) = &documentation.related_udfs { + let _ = writeln!(&mut docs, "\n**Related functions**:"); + + for related in related_udfs { + let _ = writeln!(&mut docs, "- [{related}](#{related})"); + } + } + } + } + + println!("{docs}"); +} + +fn get_names_and_aliases(functions: &[Arc]) -> Vec { + functions + .iter() + .flat_map(|f| { + if f.aliases().is_empty() { + vec![f.name().to_string()] + } else { + let mut names = vec![f.name().to_string()]; + names.extend(f.aliases().iter().cloned()); + names + } + }) + .sorted() + .collect_vec() +} diff --git a/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/core/src/bin/print_scalar_functions_docs.rs b/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/core/src/bin/print_scalar_functions_docs.rs new file mode 100644 index 000000000000..b96b42e15948 --- /dev/null +++ b/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/core/src/bin/print_scalar_functions_docs.rs @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::execution::SessionStateDefaults; +use datafusion_expr::scalar_doc_sections::doc_sections; +use datafusion_expr::ScalarUDF; +use itertools::Itertools; +use std::fmt::Write as _; +use std::sync::Arc; + +fn main() { + let functions = SessionStateDefaults::default_scalar_functions(); + let mut docs = "".to_string(); + + // doc sections only includes sections that have 'include' == true + for doc_section in doc_sections() { + // make sure there is a function that is in this doc section + if !functions + .iter() + .any(|f| f.documentation().doc_section == doc_section) + { + continue; + } + + // write out section header + let _ = writeln!(&mut docs, "## {} ", doc_section.label); + + if let Some(description) = doc_section.description { + let _ = writeln!(&mut docs, "{description}"); + } + + let filtered = functions + .clone() + .into_iter() + .filter(|f| f.documentation().doc_section == doc_section) + .collect_vec(); + + // names is a sorted list of function names and aliases since we display + // both in the documentation + let names = get_names_and_aliases(&filtered); + + // write out the list of function names and aliases + names.iter().for_each(|name| { + let _ = writeln!(&mut docs, "- [{name}](#{name})"); + }); + + // write out each function and alias in the order of the sorted name list + for name in names { + let f = filtered + .iter() + .find(|f| f.name() == name || f.aliases().contains(&name)) + .unwrap(); + let documentation = f.documentation(); + + // if this name is an alias we need to display what it's an alias of + if f.aliases().contains(&name) { + let _ = write!(&mut docs, "_Alias of [{name}](#{name})._"); + continue; + } + + // otherwise display the documentation for the function + + // first, the name, description and syntax example + let _ = write!( + &mut docs, + r#" +### `{}` + +{} + +``` +{} +``` +"#, + f.name(), + documentation.description, + documentation.syntax_example + ); + + // next, arguments + if let Some(args) = &documentation.arguments { + let _ = writeln!(&mut docs, "#### Arguments\n"); + for (arg_name, arg_desc) in args { + let _ = writeln!(&mut docs, "- **{arg_name}**: {arg_desc}"); + } + } + + // next, sql example if provided + if let Some(example) = documentation.sql_example { + let _ = writeln!( + &mut docs, + r#" +#### Example + +{} +"#, + example + ); + } + + // next, aliases + if !f.aliases().is_empty() { + let _ = write!(&mut docs, "#### Aliases"); + + for alias in f.aliases() { + let _ = writeln!(&mut docs, "- {alias}"); + } + } + + // finally, any related udfs + if let Some(related_udfs) = &documentation.related_udfs { + let _ = writeln!(&mut docs, "\n**Related functions**:"); + + for related in related_udfs { + let _ = writeln!(&mut docs, "- [{related}](#{related})"); + } + } + } + } + + println!("{docs}"); +} + +fn get_names_and_aliases(functions: &[Arc]) -> Vec { + functions + .iter() + .flat_map(|f| { + if f.aliases().is_empty() { + vec![f.name().to_string()] + } else { + let mut names = vec![f.name().to_string()]; + names.extend(f.aliases().iter().cloned()); + names + } + }) + .sorted() + .collect_vec() +} diff --git a/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/core/src/bin/print_window_functions_docs.rs b/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/core/src/bin/print_window_functions_docs.rs new file mode 100644 index 000000000000..272f423af2dc --- /dev/null +++ b/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/core/src/bin/print_window_functions_docs.rs @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::execution::SessionStateDefaults; +use datafusion_expr::window_doc_sections::doc_sections; +use datafusion_expr::WindowUDF; +use itertools::Itertools; +use std::fmt::Write as _; +use std::sync::Arc; + +fn main() { + let functions = SessionStateDefaults::default_window_functions(); + let mut docs = "".to_string(); + + // doc sections only includes sections that have 'include' == true + for doc_section in doc_sections() { + // make sure there is a function that is in this doc section + if !functions + .iter() + .any(|f| f.documentation().doc_section == doc_section) + { + continue; + } + + // write out section header + let _ = writeln!(&mut docs, "## {} ", doc_section.label); + + if let Some(description) = doc_section.description { + let _ = writeln!(&mut docs, "{description}"); + } + + let filtered = functions + .clone() + .into_iter() + .filter(|f| f.documentation().doc_section == doc_section) + .collect_vec(); + + // names is a sorted list of function names and aliases since we display + // both in the documentation + let names = get_names_and_aliases(&filtered); + + // write out the list of function names and aliases + names.iter().for_each(|name| { + let _ = writeln!(&mut docs, "- [{name}](#{name})"); + }); + + // write out each function and alias in the order of the sorted name list + for name in names { + let f = filtered + .iter() + .find(|f| f.name() == name || f.aliases().contains(&name)) + .unwrap(); + let documentation = f.documentation(); + + // if this name is an alias we need to display what it's an alias of + if f.aliases().contains(&name) { + let _ = write!(&mut docs, "_Alias of [{name}](#{name})._"); + continue; + } + + // otherwise display the documentation for the function + + // first, the name, description and syntax example + let _ = write!( + &mut docs, + r#" +### `{}` + +{} + +``` +{} +``` +"#, + f.name(), + documentation.description, + documentation.syntax_example + ); + + // next, arguments + if let Some(args) = &documentation.arguments { + let _ = writeln!(&mut docs, "#### Arguments\n"); + for (arg_name, arg_desc) in args { + let _ = writeln!(&mut docs, "- **{arg_name}**: {arg_desc}"); + } + } + + // next, sql example if provided + if let Some(example) = documentation.sql_example { + let _ = writeln!( + &mut docs, + r#" +#### Example + +{} +"#, + example + ); + } + + // next, aliases + if !f.aliases().is_empty() { + let _ = write!(&mut docs, "#### Aliases"); + + for alias in f.aliases() { + let _ = writeln!(&mut docs, "- {alias}"); + } + } + + // finally, any related udfs + if let Some(related_udfs) = &documentation.related_udfs { + let _ = writeln!(&mut docs, "\n**Related functions**:"); + + for related in related_udfs { + let _ = writeln!(&mut docs, "- [{related}](#{related})"); + } + } + } + } + + println!("{docs}"); +} + +fn get_names_and_aliases(functions: &[Arc]) -> Vec { + functions + .iter() + .flat_map(|f| { + if f.aliases().is_empty() { + vec![f.name().to_string()] + } else { + let mut names = vec![f.name().to_string()]; + names.extend(f.aliases().iter().cloned()); + names + } + }) + .sorted() + .collect_vec() +} diff --git a/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/expr/src/udf_docs.rs b/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/expr/src/udf_docs.rs new file mode 100644 index 000000000000..32c03bda385f --- /dev/null +++ b/wsl.localhost/Ubuntu/opt/dev/datafusion/datafusion/expr/src/udf_docs.rs @@ -0,0 +1,64 @@ +use indexmap::IndexMap; + +/// Documentation for use by [`crate::ScalarUDFImpl`], +/// [`crate::AggregateUDFImpl`] and [`crate::WindowUDFImpl`] functions +/// that will be used to generate public documentation. +/// +/// The name of the udf will be pulled from the [`crate::ScalarUDFImpl::name`], +/// [`crate::AggregateUDFImpl::name`] or [`crate::WindowUDFImpl::name`] function +/// as appropriate. +/// +/// All strings in the documentation are required to be +/// in [markdown format](https://www.markdownguide.org/basic-syntax/). +/// +/// Currently, documentation only supports a single language +/// thus all text should be in English. +#[derive(Debug, Clone)] +pub struct Documentation { + /// the section in the documentation where the UDF will be documented + pub doc_section: DocSection, + /// the description for the UDF + pub description: &'static str, + pub syntax_example: &'static str, + /// a sql example for the UDF, usually in the form of a sql prompt + /// query and output. It is strongly recommended to provide an + /// example for anything but the most basic UDF's + pub sql_example: Option<&'static str>, + /// arguments for the UDF which will be displayed in insertion + /// order. Key is the argument name, value is a description for + /// the argument + pub arguments: Option>, + /// related functions if any. Values should match the related + /// udf's name exactly. Related udf's must be of the same + /// UDF type (scalar, aggregate or window) for proper linking to + /// occur + pub related_udfs: Option>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct DocSection { + /// true to include this doc section in the public + /// documentation, false otherwise + pub include: bool, + /// a display label for the doc section. For example: "Math Expressions" + pub label: &'static str, + /// an optional description for the doc section + pub description: Option<&'static str>, +} + +pub const DOCUMENTATION_NONE: Documentation = Documentation { + doc_section: DOC_SECTION_NONE, + description: "", + syntax_example: "", + sql_example: None, + arguments: None, + related_udfs: None, +}; + +/// A doc section that indicated the UDF should not +/// be publicly documented +pub const DOC_SECTION_NONE: DocSection = DocSection { + include: false, + label: "", + description: None, +}; diff --git a/wsl.localhost/Ubuntu/opt/dev/datafusion/dev/update_aggregate_docs.sh b/wsl.localhost/Ubuntu/opt/dev/datafusion/dev/update_aggregate_docs.sh new file mode 100644 index 000000000000..9ad8074927d2 --- /dev/null +++ b/wsl.localhost/Ubuntu/opt/dev/datafusion/dev/update_aggregate_docs.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -e + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "${SOURCE_DIR}/../" && pwd + +TARGET_FILE="docs/source/user-guide/sql/aggregate_functions_new.md" +PRINT_AGGREGATE_FUNCTION_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_aggregate_functions_docs" + +echo "Inserting header" +cat <<'EOF' > "$TARGET_FILE" + + + + +# Aggregate Functions + +Aggregate functions operate on a set of values to compute a single result. +EOF + +echo "Running CLI and inserting aggregate function docs table" +$PRINT_AGGREGATE_FUNCTION_DOCS_COMMAND >> "$TARGET_FILE" + +echo "Running prettier" +npx prettier@2.3.2 --write "$TARGET_FILE" + +echo "'$TARGET_FILE' successfully updated!" diff --git a/wsl.localhost/Ubuntu/opt/dev/datafusion/dev/update_scalar_docs.sh b/wsl.localhost/Ubuntu/opt/dev/datafusion/dev/update_scalar_docs.sh new file mode 100644 index 000000000000..5ff5cebad4f1 --- /dev/null +++ b/wsl.localhost/Ubuntu/opt/dev/datafusion/dev/update_scalar_docs.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -e + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "${SOURCE_DIR}/../" && pwd + +TARGET_FILE="docs/source/user-guide/sql/scalar_functions_new.md" +PRINT_SCALAR_FUNCTION_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_scalar_functions_docs" + +echo "Inserting header" +cat <<'EOF' > "$TARGET_FILE" + + + + +# Scalar Functions +EOF + +echo "Running CLI and inserting scalar function docs table" +$PRINT_SCALAR_FUNCTION_DOCS_COMMAND >> "$TARGET_FILE" + +echo "Running prettier" +npx prettier@2.3.2 --write "$TARGET_FILE" + +echo "'$TARGET_FILE' successfully updated!" diff --git a/wsl.localhost/Ubuntu/opt/dev/datafusion/dev/update_window_docs.sh b/wsl.localhost/Ubuntu/opt/dev/datafusion/dev/update_window_docs.sh new file mode 100644 index 000000000000..a77fd2fd8ccc --- /dev/null +++ b/wsl.localhost/Ubuntu/opt/dev/datafusion/dev/update_window_docs.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -e + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "${SOURCE_DIR}/../" && pwd + +TARGET_FILE="docs/source/user-guide/sql/window_functions_new.md" +PRINT_WINDOW_FUNCTION_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_window_functions_docs" + +echo "Inserting header" +cat <<'EOF' > "$TARGET_FILE" + + + + + +# Window Functions + +A _window function_ performs a calculation across a set of table rows that are somehow related to the current row. This is comparable to the type of calculation that can be done with an aggregate function. However, window functions do not cause rows to become grouped into a single output row like non-window aggregate calls would. Instead, the rows retain their separate identities. Behind the scenes, the window function is able to access more than just the current row of the query result + +Here is an example that shows how to compare each employee's salary with the average salary in his or her department: + +```sql +SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary; + ++-----------+-------+--------+-------------------+ +| depname | empno | salary | avg | ++-----------+-------+--------+-------------------+ +| personnel | 2 | 3900 | 3700.0 | +| personnel | 5 | 3500 | 3700.0 | +| develop | 8 | 6000 | 5020.0 | +| develop | 10 | 5200 | 5020.0 | +| develop | 11 | 5200 | 5020.0 | +| develop | 9 | 4500 | 5020.0 | +| develop | 7 | 4200 | 5020.0 | +| sales | 1 | 5000 | 4866.666666666667 | +| sales | 4 | 4800 | 4866.666666666667 | +| sales | 3 | 4800 | 4866.666666666667 | ++-----------+-------+--------+-------------------+ +``` + +A window function call always contains an OVER clause directly following the window function's name and argument(s). This is what syntactically distinguishes it from a normal function or non-window aggregate. The OVER clause determines exactly how the rows of the query are split up for processing by the window function. The PARTITION BY clause within OVER divides the rows into groups, or partitions, that share the same values of the PARTITION BY expression(s). For each row, the window function is computed across the rows that fall into the same partition as the current row. The previous example showed how to count the average of a column per partition. + +You can also control the order in which rows are processed by window functions using ORDER BY within OVER. (The window ORDER BY does not even have to match the order in which the rows are output.) Here is an example: + +```sql +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) +FROM empsalary; + ++-----------+-------+--------+--------+ +| depname | empno | salary | rank | ++-----------+-------+--------+--------+ +| personnel | 2 | 3900 | 1 | +| develop | 8 | 6000 | 1 | +| develop | 10 | 5200 | 2 | +| develop | 11 | 5200 | 2 | +| develop | 9 | 4500 | 4 | +| develop | 7 | 4200 | 5 | +| sales | 1 | 5000 | 1 | +| sales | 4 | 4800 | 2 | +| personnel | 5 | 3500 | 2 | +| sales | 3 | 4800 | 2 | ++-----------+-------+--------+--------+ +``` + +There is another important concept associated with window functions: for each row, there is a set of rows within its partition called its window frame. Some window functions act only on the rows of the window frame, rather than of the whole partition. Here is an example of using window frames in queries: + +```sql +SELECT depname, empno, salary, + avg(salary) OVER(ORDER BY salary ASC ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS avg, + min(salary) OVER(ORDER BY empno ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cum_min +FROM empsalary +ORDER BY empno ASC; + ++-----------+-------+--------+--------------------+---------+ +| depname | empno | salary | avg | cum_min | ++-----------+-------+--------+--------------------+---------+ +| sales | 1 | 5000 | 5000.0 | 5000 | +| personnel | 2 | 3900 | 3866.6666666666665 | 3900 | +| sales | 3 | 4800 | 4700.0 | 3900 | +| sales | 4 | 4800 | 4866.666666666667 | 3900 | +| personnel | 5 | 3500 | 3700.0 | 3500 | +| develop | 7 | 4200 | 4200.0 | 3500 | +| develop | 8 | 6000 | 5600.0 | 3500 | +| develop | 9 | 4500 | 4500.0 | 3500 | +| develop | 10 | 5200 | 5133.333333333333 | 3500 | +| develop | 11 | 5200 | 5466.666666666667 | 3500 | ++-----------+-------+--------+--------------------+---------+ +``` + +When a query involves multiple window functions, it is possible to write out each one with a separate OVER clause, but this is duplicative and error-prone if the same windowing behavior is wanted for several functions. Instead, each windowing behavior can be named in a WINDOW clause and then referenced in OVER. For example: + +```sql +SELECT sum(salary) OVER w, avg(salary) OVER w +FROM empsalary +WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); +``` + +## Syntax + +The syntax for the OVER-clause is + +``` +function([expr]) + OVER( + [PARTITION BY expr[, …]] + [ORDER BY expr [ ASC | DESC ][, …]] + [ frame_clause ] + ) +``` + +where **frame_clause** is one of: + +``` + { RANGE | ROWS | GROUPS } frame_start + { RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end +``` + +and **frame_start** and **frame_end** can be one of + +```sql +UNBOUNDED PRECEDING +offset PRECEDING +CURRENT ROW +offset FOLLOWING +UNBOUNDED FOLLOWING +``` + +where **offset** is an non-negative integer. + +RANGE and GROUPS modes require an ORDER BY clause (with RANGE the ORDER BY must specify exactly one column). + +## Aggregate functions + +All [aggregate functions](aggregate_functions.md) can be used as window functions. + +EOF + +echo "Running CLI and inserting window function docs table" +$PRINT_WINDOW_FUNCTION_DOCS_COMMAND >> "$TARGET_FILE" + +echo "Running prettier" +npx prettier@2.3.2 --write "$TARGET_FILE" + +echo "'$TARGET_FILE' successfully updated!"