Skip to content

Commit

Permalink
feat: add missing scalar math functions (apache#465)
Browse files Browse the repository at this point in the history
  • Loading branch information
mesejo committed Sep 6, 2023
1 parent e24dc75 commit bc62aaf
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 1 deletion.
36 changes: 35 additions & 1 deletion datafusion/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import math

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -85,12 +86,15 @@ def test_math_functions():
ctx = SessionContext()
# create a RecordBatch and a new DataFrame from it
batch = pa.RecordBatch.from_arrays(
[pa.array([0.1, -0.7, 0.55])], names=["value"]
[pa.array([0.1, -0.7, 0.55]), pa.array([float("nan"), 0, 2.0])],
names=["value", "na_value"],
)
df = ctx.create_dataframe([[batch]])

values = np.array([0.1, -0.7, 0.55])
na_values = np.array([np.nan, 0, 2.0])
col_v = column("value")
col_nav = column("na_value")
df = df.select(
f.abs(col_v),
f.sin(col_v),
Expand All @@ -113,6 +117,20 @@ def test_math_functions():
f.sqrt(col_v),
f.signum(col_v),
f.trunc(col_v),
f.asinh(col_v),
f.acosh(col_v),
f.atanh(col_v),
f.cbrt(col_v),
f.cosh(col_v),
f.degrees(col_v),
f.gcd(literal(9), literal(3)),
f.lcm(literal(6), literal(4)),
f.nanvl(col_nav, literal(5)),
f.pi(),
f.radians(col_v),
f.sinh(col_v),
f.tanh(col_v),
f.factorial(literal(6)),
)
batches = df.collect()
assert len(batches) == 1
Expand Down Expand Up @@ -151,6 +169,22 @@ def test_math_functions():
np.testing.assert_array_almost_equal(result.column(18), np.sqrt(values))
np.testing.assert_array_almost_equal(result.column(19), np.sign(values))
np.testing.assert_array_almost_equal(result.column(20), np.trunc(values))
np.testing.assert_array_almost_equal(result.column(21), np.arcsinh(values))
np.testing.assert_array_almost_equal(result.column(22), np.arccosh(values))
np.testing.assert_array_almost_equal(result.column(23), np.arctanh(values))
np.testing.assert_array_almost_equal(result.column(24), np.cbrt(values))
np.testing.assert_array_almost_equal(result.column(25), np.cosh(values))
np.testing.assert_array_almost_equal(result.column(26), np.degrees(values))
np.testing.assert_array_almost_equal(result.column(27), np.gcd(9, 3))
np.testing.assert_array_almost_equal(result.column(28), np.lcm(6, 4))
np.testing.assert_array_almost_equal(
result.column(29), np.where(np.isnan(na_values), 5, na_values)
)
np.testing.assert_array_almost_equal(result.column(30), np.pi)
np.testing.assert_array_almost_equal(result.column(31), np.radians(values))
np.testing.assert_array_almost_equal(result.column(32), np.sinh(values))
np.testing.assert_array_almost_equal(result.column(33), np.tanh(values))
np.testing.assert_array_almost_equal(result.column(34), math.factorial(6))


def test_string_functions(df):
Expand Down
32 changes: 32 additions & 0 deletions src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,16 +198,20 @@ macro_rules! aggregate_function {

scalar_function!(abs, Abs);
scalar_function!(acos, Acos);
scalar_function!(acosh, Acosh);
scalar_function!(ascii, Ascii, "Returns the numeric code of the first character of the argument. In UTF8 encoding, returns the Unicode code point of the character. In other multibyte encodings, the argument must be an ASCII character.");
scalar_function!(asin, Asin);
scalar_function!(asinh, Asinh);
scalar_function!(atan, Atan);
scalar_function!(atanh, Atanh);
scalar_function!(atan2, Atan2);
scalar_function!(
bit_length,
BitLength,
"Returns number of bits in the string (8 times the octet_length)."
);
scalar_function!(btrim, Btrim, "Removes the longest string containing only characters in characters (a space by default) from the start and end of string.");
scalar_function!(cbrt, Cbrt);
scalar_function!(ceil, Ceil);
scalar_function!(
character_length,
Expand All @@ -219,9 +223,14 @@ scalar_function!(char_length, CharacterLength);
scalar_function!(chr, Chr, "Returns the character with the given code.");
scalar_function!(coalesce, Coalesce);
scalar_function!(cos, Cos);
scalar_function!(cosh, Cosh);
scalar_function!(degrees, Degrees);
scalar_function!(exp, Exp);
scalar_function!(factorial, Factorial);
scalar_function!(floor, Floor);
scalar_function!(gcd, Gcd);
scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.");
scalar_function!(lcm, Lcm);
scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters.");
scalar_function!(ln, Ln);
scalar_function!(log, Log);
Expand All @@ -235,9 +244,16 @@ scalar_function!(
MD5,
"Computes the MD5 hash of the argument, with the result written in hexadecimal."
);
scalar_function!(
nanvl,
Nanvl,
"Computes the MD5 hash of the argument, with the result written in hexadecimal."
);
scalar_function!(octet_length, OctetLength, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces.");
scalar_function!(pi, Pi);
scalar_function!(power, Power);
scalar_function!(pow, Power);
scalar_function!(radians, Radians);
scalar_function!(regexp_match, RegexpMatch);
scalar_function!(
regexp_replace,
Expand Down Expand Up @@ -269,6 +285,7 @@ scalar_function!(sha384, SHA384);
scalar_function!(sha512, SHA512);
scalar_function!(signum, Signum);
scalar_function!(sin, Sin);
scalar_function!(sinh, Sinh);
scalar_function!(
split_part,
SplitPart,
Expand All @@ -283,6 +300,7 @@ scalar_function!(
scalar_function!(strpos, Strpos, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)");
scalar_function!(substr, Substr);
scalar_function!(tan, Tan);
scalar_function!(tanh, Tanh);
scalar_function!(
to_hex,
ToHex,
Expand Down Expand Up @@ -343,6 +361,7 @@ aggregate_function!(var_samp, Variance);
pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(abs))?;
m.add_wrapped(wrap_pyfunction!(acos))?;
m.add_wrapped(wrap_pyfunction!(acosh))?;
m.add_wrapped(wrap_pyfunction!(approx_distinct))?;
m.add_wrapped(wrap_pyfunction!(alias))?;
m.add_wrapped(wrap_pyfunction!(approx_median))?;
Expand All @@ -353,11 +372,14 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(arrow_typeof))?;
m.add_wrapped(wrap_pyfunction!(ascii))?;
m.add_wrapped(wrap_pyfunction!(asin))?;
m.add_wrapped(wrap_pyfunction!(asinh))?;
m.add_wrapped(wrap_pyfunction!(atan))?;
m.add_wrapped(wrap_pyfunction!(atanh))?;
m.add_wrapped(wrap_pyfunction!(atan2))?;
m.add_wrapped(wrap_pyfunction!(avg))?;
m.add_wrapped(wrap_pyfunction!(bit_length))?;
m.add_wrapped(wrap_pyfunction!(btrim))?;
m.add_wrapped(wrap_pyfunction!(cbrt))?;
m.add_wrapped(wrap_pyfunction!(ceil))?;
m.add_wrapped(wrap_pyfunction!(character_length))?;
m.add_wrapped(wrap_pyfunction!(chr))?;
Expand All @@ -369,25 +391,30 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(concat))?;
m.add_wrapped(wrap_pyfunction!(corr))?;
m.add_wrapped(wrap_pyfunction!(cos))?;
m.add_wrapped(wrap_pyfunction!(cosh))?;
m.add_wrapped(wrap_pyfunction!(count))?;
m.add_wrapped(wrap_pyfunction!(count_star))?;
m.add_wrapped(wrap_pyfunction!(covar))?;
m.add_wrapped(wrap_pyfunction!(covar_pop))?;
m.add_wrapped(wrap_pyfunction!(covar_samp))?;
m.add_wrapped(wrap_pyfunction!(current_date))?;
m.add_wrapped(wrap_pyfunction!(current_time))?;
m.add_wrapped(wrap_pyfunction!(degrees))?;
m.add_wrapped(wrap_pyfunction!(date_bin))?;
m.add_wrapped(wrap_pyfunction!(datepart))?;
m.add_wrapped(wrap_pyfunction!(date_part))?;
m.add_wrapped(wrap_pyfunction!(datetrunc))?;
m.add_wrapped(wrap_pyfunction!(date_trunc))?;
m.add_wrapped(wrap_pyfunction!(digest))?;
m.add_wrapped(wrap_pyfunction!(exp))?;
m.add_wrapped(wrap_pyfunction!(factorial))?;
m.add_wrapped(wrap_pyfunction!(floor))?;
m.add_wrapped(wrap_pyfunction!(from_unixtime))?;
m.add_wrapped(wrap_pyfunction!(gcd))?;
m.add_wrapped(wrap_pyfunction!(grouping))?;
m.add_wrapped(wrap_pyfunction!(in_list))?;
m.add_wrapped(wrap_pyfunction!(initcap))?;
m.add_wrapped(wrap_pyfunction!(lcm))?;
m.add_wrapped(wrap_pyfunction!(left))?;
m.add_wrapped(wrap_pyfunction!(length))?;
m.add_wrapped(wrap_pyfunction!(ln))?;
Expand All @@ -403,12 +430,15 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(mean))?;
m.add_wrapped(wrap_pyfunction!(median))?;
m.add_wrapped(wrap_pyfunction!(min))?;
m.add_wrapped(wrap_pyfunction!(nanvl))?;
m.add_wrapped(wrap_pyfunction!(now))?;
m.add_wrapped(wrap_pyfunction!(nullif))?;
m.add_wrapped(wrap_pyfunction!(octet_length))?;
m.add_wrapped(wrap_pyfunction!(order_by))?;
m.add_wrapped(wrap_pyfunction!(pi))?;
m.add_wrapped(wrap_pyfunction!(power))?;
m.add_wrapped(wrap_pyfunction!(pow))?;
m.add_wrapped(wrap_pyfunction!(radians))?;
m.add_wrapped(wrap_pyfunction!(random))?;
m.add_wrapped(wrap_pyfunction!(regexp_match))?;
m.add_wrapped(wrap_pyfunction!(regexp_replace))?;
Expand All @@ -425,6 +455,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(sha512))?;
m.add_wrapped(wrap_pyfunction!(signum))?;
m.add_wrapped(wrap_pyfunction!(sin))?;
m.add_wrapped(wrap_pyfunction!(sinh))?;
m.add_wrapped(wrap_pyfunction!(split_part))?;
m.add_wrapped(wrap_pyfunction!(sqrt))?;
m.add_wrapped(wrap_pyfunction!(starts_with))?;
Expand All @@ -436,6 +467,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(substr))?;
m.add_wrapped(wrap_pyfunction!(sum))?;
m.add_wrapped(wrap_pyfunction!(tan))?;
m.add_wrapped(wrap_pyfunction!(tanh))?;
m.add_wrapped(wrap_pyfunction!(to_hex))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?;
Expand Down

0 comments on commit bc62aaf

Please sign in to comment.