diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index 08b872fcf..e1e9e4279 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -2778,7 +2778,7 @@ defmodule Explorer.DataFrame do #Explorer.DataFrame< Polars[3 x 2] a string ["a", "b", "c"] - b f64 [1.0, 4.0, 9.0] + b s64 [1, 4, 9] > It's possible to "reuse" a variable for different computations: diff --git a/lib/explorer/polars_backend/data_frame.ex b/lib/explorer/polars_backend/data_frame.ex index 54d64c73f..dac839b3d 100644 --- a/lib/explorer/polars_backend/data_frame.ex +++ b/lib/explorer/polars_backend/data_frame.ex @@ -195,6 +195,11 @@ defmodule Explorer.PolarsBackend.DataFrame do {columns, with_projection} = column_names_or_projection(columns) + dtypes_list = + if not Enum.empty?(dtypes) do + Map.to_list(dtypes) + end + df = Native.df_load_csv( contents, @@ -207,7 +212,7 @@ defmodule Explorer.PolarsBackend.DataFrame do delimiter, true, columns, - Map.to_list(dtypes), + dtypes_list, encoding, nil_values, parse_dates, diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 94f3abad2..8145b85cb 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -3616,7 +3616,7 @@ defmodule Explorer.Series do iex> Explorer.Series.pow(s, 3) #Explorer.Series< Polars[3] - f64 [8.0, 64.0, 216.0] + s64 [8, 64, 216] > iex> s = [2, 4, 6] |> Explorer.Series.from_list() diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index 3c8218b34..a55a1d8fc 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -1575,9 +1575,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43845ccaa696de9cf374f5a4e3c5c2685192b4a56b7dd60b05a5cb1f7cd86cc7" +checksum = "8e3351ea4570e54cd556e6755b78fe7a2c85368d820c0307cca73c96e796a7ba" dependencies = [ "getrandom", "polars-arrow", @@ -1595,9 +1595,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e66ab16b782bdf1803c3e50438817f5fcb6f6bcfbeb6dddba01a2b83827c7bf" +checksum = "ba65fc4bcabbd64fca01fd30e759f8b2043f0963c57619e331d4b534576c0b47" dependencies = [ "ahash", "atoi", @@ -1642,9 +1642,9 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "087442a8e5c0e3d4d1d683f1b287770c6e26a8f86a9a8a47607e220ae9e08113" +checksum = "9f099516af30ac9ae4b4480f4ad02aa017d624f2f37b7a16ad4e9ba52f7e5269" dependencies = [ "bytemuck", "either", @@ -1658,9 +1658,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9523e09a134c75557e2216e652c965013b92b8e682cc591059d4bfb0951a9d72" +checksum = "b2439484be228b8c302328e2f953e64cfd93930636e5c7ceed90339ece7fef6c" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1690,9 +1690,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ad3cecbbe309229174a1126dfda0cfde4826d8e340e89e3846f546a9333a9e0" +checksum = "0c9b06dfbe79cabe50a7f0a90396864b5ee2c0e0f8d6a9353b2343c29c56e937" dependencies = [ "object_store 0.10.1", "polars-arrow-format", @@ -1703,9 +1703,9 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2685cee0dcbb92dbf7fccd98ca169741051fbdac5182541f82720279e08d93b6" +checksum = "d9c630385a56a867c410a20f30772d088f90ec3d004864562b84250b35268f97" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1723,9 +1723,9 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "409494bcd2f1ba2e8fae075636ee69d23087a8fb373bdf74e29b0d9bdd05923c" +checksum = "9d7363cd14e4696a28b334a56bd11013ff49cc96064818ab3f91a126e453462d" dependencies = [ "ahash", "async-trait", @@ -1770,12 +1770,13 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16059980d1d7b76fd387b2bf88fc1f61b1f354bb784e5eda55148b910def1564" +checksum = "543d7d3853f2c52dbfedee9ebf0d58c4ff3b92aadee5309150b2d14df49d6253" dependencies = [ "ahash", "chrono", + "chrono-tz 0.8.6", "fallible-streaming-iterator", "hashbrown", "indexmap", @@ -1791,9 +1792,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6611d17efa46d7e61326425d99749a01810af690328845846277ef281d0eac4f" +checksum = "03877e74e42b5340ae52ded705f6d5d14563d90554c9177b01b91ed2412a56ed" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1820,9 +1821,9 @@ dependencies = [ [[package]] name = "polars-mem-engine" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443c1014e43c0cabcfe0560d0442eb94fd6da1da329f226a8cbb17b7d43281d4" +checksum = "dea9e17771af750c94bf959885e4b3f5b14149576c62ef3ec1c9ef5827b2a30f" dependencies = [ "futures", "polars-arrow", @@ -1841,9 +1842,9 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c96d32dd9de3bcfe3c390d04e49ee1d5318f6ee32afec0ce73d9fdc33faf2488" +checksum = "6066552eb577d43b307027fb38096910b643ffb2c89a21628c7e41caf57848d0" dependencies = [ "ahash", "argminmax", @@ -1876,9 +1877,9 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf285139df977a5ffc1f7d6e976c44a16c3ce80c67d333cb9d57a3afd6eef1b" +checksum = "2b35b2592a2e7ef7ce9942dc2120dc4576142626c0e661668e4c6b805042e461" dependencies = [ "ahash", "async-stream", @@ -1894,7 +1895,6 @@ dependencies = [ "polars-compute", "polars-error", "polars-utils", - "seq-macro", "serde", "simdutf8", "snap", @@ -1904,9 +1904,9 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f994ef90f46ab5ed9d10e8cd39265884e7d52cdb5ddf224128b1d2c91badd84e" +checksum = "021bce7768c330687d735340395a77453aa18dd70d57c184cbb302311e87c1b9" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -1932,9 +1932,9 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0782e64fcf4603a5788f901de07ace3473dbbca77a77b12924cba55cf1efae6" +checksum = "220d0d7c02d1c4375802b2813dbedcd1a184df39c43b74689e729ede8d5c2921" dependencies = [ "ahash", "bytemuck", @@ -1962,9 +1962,9 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba5f3c8fee6733cb744afb95d02304c519d70daa18493256a75c114db9cb1b20" +checksum = "c1d70d87a2882a64a43b431aea1329cb9a2c4100547c95c417cc426bb82408b3" dependencies = [ "bytemuck", "polars-arrow", @@ -1974,9 +1974,9 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea6ed7109564d46c8d552a035f8ddc4a1c664953ac396096324dcf68210dfe9" +checksum = "a6fc1c9b778862f09f4a347f768dfdd3d0ba9957499d306d83c7103e0fa8dc5b" dependencies = [ "hex", "once_cell", @@ -1995,9 +1995,9 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1de421e377bda3f60480dcf12ab615586332d6d65631d8a3bb8ec84589b9a10" +checksum = "179f98313a15c0bfdbc8cc0f1d3076d08d567485b9952d46439f94fbc3085df5" dependencies = [ "atoi", "bytemuck", @@ -2016,9 +2016,9 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95dc0ef1209ac423bf40b86f2b89a36aafcac9e99b3c9fbc4249cace484d336f" +checksum = "53e6dd89fcccb1ec1a62f752c9a9f2d482a85e9255153f46efecc617b4996d50" dependencies = [ "ahash", "bytemuck", @@ -2609,12 +2609,6 @@ dependencies = [ "libc", ] -[[package]] -name = "seq-macro" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" - [[package]] name = "serde" version = "1.0.198" diff --git a/native/explorer/Cargo.toml b/native/explorer/Cargo.toml index 543c044d0..191933f89 100644 --- a/native/explorer/Cargo.toml +++ b/native/explorer/Cargo.toml @@ -38,7 +38,7 @@ object_store = { version = "0.9", default-features = false, optional = true } mimalloc = { version = "*", default-features = false } [dependencies.polars] -version = "0.41" +version = "0.41.3" default-features = false features = [ "abs", @@ -87,7 +87,7 @@ features = [ ] [dependencies.polars-ops] -version = "0.41" +version = "0.41.3" features = ["abs", "ewma", "cum_agg", "cov"] [features] diff --git a/native/explorer/rust-toolchain.toml b/native/explorer/rust-toolchain.toml index 526ab5a6f..bce747f05 100644 --- a/native/explorer/rust-toolchain.toml +++ b/native/explorer/rust-toolchain.toml @@ -1,4 +1,4 @@ [toolchain] -channel = "nightly-2024-06-23" +channel = "nightly-2024-06-24" components = ["rustfmt", "clippy"] profile = "minimal" diff --git a/native/explorer/src/cloud_writer.rs b/native/explorer/src/cloud_writer.rs index e32b979fa..1cc128bb9 100644 --- a/native/explorer/src/cloud_writer.rs +++ b/native/explorer/src/cloud_writer.rs @@ -95,7 +95,6 @@ mod tests { use polars::df; use polars::prelude::DataFrame; - use polars::prelude::NamedFrom; fn example_dataframe() -> DataFrame { df!( diff --git a/native/explorer/src/dataframe.rs b/native/explorer/src/dataframe.rs index 4644e7393..b3d30ff7d 100644 --- a/native/explorer/src/dataframe.rs +++ b/native/explorer/src/dataframe.rs @@ -264,7 +264,7 @@ pub fn df_sort_by( .with_maintain_order(maintain_order) .with_multithreaded(multithreaded) .with_nulls_last(nulls_last) - .with_order_descendings(reverse); + .with_order_descending_multi(reverse); let new_df = if groups.is_empty() { // Note: we cannot use either df.sort or df.sort_with_options. @@ -314,7 +314,7 @@ pub fn df_sort_with( .with_maintain_order(maintain_order) // .with_multithreaded(multithreaded) .with_nulls_last(nulls_last) - .with_order_descendings(directions); + .with_order_descending_multi(directions); let new_df = if groups.is_empty() { df.lazy().sort_by_exprs(exprs, sort_options).collect()? @@ -428,7 +428,7 @@ pub fn df_pivot_wider( let mut new_df = pivot_stable( &df, &temp_id_names, - [pivot_column], + Some([pivot_column]), Some(values_column), false, Some(PivotAgg::First), diff --git a/native/explorer/src/dataframe/io.rs b/native/explorer/src/dataframe/io.rs index bc4d0d7aa..d3937b1f6 100644 --- a/native/explorer/src/dataframe/io.rs +++ b/native/explorer/src/dataframe/io.rs @@ -52,7 +52,7 @@ pub fn df_from_csv( .with_skip_rows_after_header(skip_rows_after_header) .with_projection(projection.map(Arc::new)) .with_rechunk(do_rechunk) - .with_columns(column_names.map(Arc::new)) + .with_columns(column_names.map(Arc::from)) .with_schema_overwrite(Some(schema_from_dtypes_pairs(dtypes)?)) .with_parse_options( CsvParseOptions::default() @@ -146,7 +146,7 @@ pub fn df_load_csv( delimiter_as_byte: u8, do_rechunk: bool, column_names: Option>, - dtypes: Vec<(&str, ExSeriesDtype)>, + dtypes: Option>, encoding: &str, null_vals: Vec, parse_dates: bool, @@ -159,16 +159,21 @@ pub fn df_load_csv( let cursor = Cursor::new(binary.as_slice()); - let dataframe = CsvReadOptions::default() + let read_options = match dtypes { + Some(val) => CsvReadOptions::default().with_schema(Some(schema_from_dtypes_pairs(val)?)), + None => CsvReadOptions::default(), + }; + + let dataframe = read_options .with_has_header(has_header) .with_infer_schema_length(infer_schema_length) .with_n_rows(stop_after_n_rows) - .with_columns(column_names.map(Arc::new)) + .with_columns(column_names.map(Arc::from)) .with_skip_rows(skip_rows) .with_skip_rows_after_header(skip_rows_after_header) .with_projection(projection.map(Arc::new)) .with_rechunk(do_rechunk) - .with_schema(Some(schema_from_dtypes_pairs(dtypes)?)) + //.with_schema(Some(schema_from_dtypes_pairs(dtypes)?)) .with_parse_options( CsvParseOptions::default() .with_separator(delimiter_as_byte) @@ -519,7 +524,7 @@ pub fn df_from_ndjson( let reader = JsonReader::new(buf_reader) .with_json_format(JsonFormat::JsonLines) .with_batch_size(batch_size) - .infer_schema_len(infer_schema_length); + .infer_schema_len(infer_schema_length.and_then(NonZeroUsize::new)); Ok(ExDataFrame::new(reader.finish()?)) } @@ -576,7 +581,7 @@ pub fn df_load_ndjson( let reader = JsonReader::new(cursor) .with_json_format(JsonFormat::JsonLines) .with_batch_size(batch_size) - .infer_schema_len(infer_schema_length); + .infer_schema_len(infer_schema_length.and_then(NonZeroUsize::new)); Ok(ExDataFrame::new(reader.finish()?)) } diff --git a/native/explorer/src/lazyframe.rs b/native/explorer/src/lazyframe.rs index ade925181..d2eaef005 100644 --- a/native/explorer/src/lazyframe.rs +++ b/native/explorer/src/lazyframe.rs @@ -63,7 +63,7 @@ pub fn lf_tail( #[rustler::nif] pub fn lf_names(data: ExLazyFrame) -> Result, ExplorerError> { - let lf = data.clone_inner(); + let mut lf = data.clone_inner(); let names = lf .schema()? .iter_names() @@ -149,7 +149,7 @@ pub fn lf_sort_with( let sort_options = SortMultipleOptions::new() .with_nulls_last(nulls_last) .with_maintain_order(maintain_order) - .with_order_descendings(directions); + .with_order_descending_multi(directions); let ldf = data.clone_inner().sort_by_exprs(exprs, sort_options); @@ -166,7 +166,7 @@ pub fn lf_grouped_sort_with( let sort_options = SortMultipleOptions::new() // .with_nulls_last(nulls_last) // .with_maintain_order(maintain_order) - .with_order_descendings(directions); + .with_order_descending_multi(directions); // For grouped lazy frames, we need to use the `#sort_by` method that is // less powerful, but can be used with `over`. // See: https://docs.pola.rs/user-guide/expressions/window/#operations-per-group @@ -265,14 +265,14 @@ pub fn lf_pivot_longer( values_to: String, ) -> Result { let ldf = data.clone_inner(); - let melt_opts = MeltArgs { - id_vars: to_smart_strings(id_vars), - value_vars: to_smart_strings(value_vars), + let unpivot_opts = UnpivotArgs { + on: to_smart_strings(id_vars), + index: to_smart_strings(value_vars), variable_name: Some(names_to.into()), value_name: Some(values_to.into()), streamable: true, }; - let new_df = ldf.melt(melt_opts); + let new_df = ldf.unpivot(unpivot_opts); Ok(ExLazyFrame::new(new_df)) } @@ -288,7 +288,7 @@ pub fn lf_join( let how = match how { "left" => JoinType::Left, "inner" => JoinType::Inner, - "outer" => JoinType::Outer, + "outer" => JoinType::Full, "cross" => JoinType::Cross, _ => { return Err(ExplorerError::Other(format!( @@ -329,7 +329,7 @@ pub fn lf_concat_columns(ldfs: Vec) -> Result = ldf .schema() .expect("should be able to get schema") diff --git a/native/explorer/src/lazyframe/io.rs b/native/explorer/src/lazyframe/io.rs index 6406834c4..1048cf802 100644 --- a/native/explorer/src/lazyframe/io.rs +++ b/native/explorer/src/lazyframe/io.rs @@ -81,14 +81,14 @@ pub fn lf_to_parquet( if streaming { let options = ParquetWriteOptions { compression, - statistics: false, + statistics: StatisticsOptions::empty(), row_group_size: None, data_pagesize_limit: None, maintain_order: false, }; lf.with_comm_subplan_elim(false) - .sink_parquet(filename.into(), options)?; + .sink_parquet(filename, options)?; Ok(()) } else { let mut df = lf.collect()?; @@ -117,7 +117,7 @@ pub fn lf_to_parquet_cloud( let options = ParquetWriteOptions { compression, - statistics: false, + statistics: StatisticsOptions::empty(), row_group_size: None, data_pagesize_limit: None, maintain_order: false, @@ -172,7 +172,7 @@ pub fn lf_to_ipc( maintain_order: false, }; lf.with_comm_subplan_elim(false) - .sink_ipc(filename.into(), options)?; + .sink_ipc(filename, options)?; Ok(()) } else { let mut df = lf.collect()?; @@ -248,7 +248,7 @@ pub fn lf_to_csv( }; lf.with_comm_subplan_elim(false) - .sink_csv(filename.into(), options)?; + .sink_csv(filename, options)?; Ok(()) } else { let df = lf.collect()?; @@ -274,7 +274,7 @@ pub fn lf_from_ndjson( "\"batch_size\" expected to be non zero.".to_string(), ))?; let lf = LazyJsonLineReader::new(filename) - .with_infer_schema_length(infer_schema_length) + .with_infer_schema_length(infer_schema_length.and_then(NonZeroUsize::new)) .with_batch_size(Some(batch_size)) .finish()?; diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index dc504c099..d081b979a 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -70,29 +70,32 @@ pub fn s_mask(series: ExSeries, filter: ExSeries) -> Result Result { let s = data.clone_inner(); let s1 = other.clone_inner(); - Ok(ExSeries::new(s + s1)) + let result = s + s1; + Ok(ExSeries::new(result?)) } #[rustler::nif(schedule = "DirtyCpu")] pub fn s_subtract(lhs: ExSeries, rhs: ExSeries) -> Result { let left = lhs.clone_inner(); let right = rhs.clone_inner(); - - Ok(ExSeries::new(left - right)) + let result = left - right; + Ok(ExSeries::new(result?)) } #[rustler::nif(schedule = "DirtyCpu")] pub fn s_multiply(data: ExSeries, other: ExSeries) -> Result { let s = data.clone_inner(); let s1 = other.clone_inner(); - Ok(ExSeries::new(s * s1)) + let result = s * s1; + Ok(ExSeries::new(result?)) } #[rustler::nif(schedule = "DirtyCpu")] pub fn s_divide(data: ExSeries, other: ExSeries) -> Result { let s = data.clone_inner().cast(&DataType::Float64)?; let s1 = other.clone_inner().cast(&DataType::Float64)?; - Ok(ExSeries::new(s / s1)) + let result = s / s1; + Ok(ExSeries::new(result?)) } #[rustler::nif(schedule = "DirtyCpu")] @@ -106,9 +109,9 @@ pub fn s_remainder(data: ExSeries, other: ExSeries) -> Result Result #[rustler::nif(schedule = "DirtyCpu")] pub fn s_frequencies(series: ExSeries) -> Result { - let df = series.value_counts(true, true)?; + let df = series.value_counts(true, true, "counts".to_string(), false)?; Ok(ExDataFrame::new(df)) } @@ -1007,16 +1010,13 @@ pub fn s_quantile<'a>( .unwrap() .encode(env)), }, - _ => todo!(), - // _ => - // s.agg_quantile(quantile, strategy) - - // encoding::term_from_value( - // s.quantile_as_series(quantile, strategy)? - // .cast(dtype)? - // .get(0)?, - // env, - // ), + _ => encoding::term_from_value( + s.quantile_reduce(quantile, strategy)? + .into_series("quantile") + .cast(dtype)? + .get(0)?, + env, + ), } } diff --git a/test/explorer/data_frame/grouped_test.exs b/test/explorer/data_frame/grouped_test.exs index d6fb522a9..2065431a8 100644 --- a/test/explorer/data_frame/grouped_test.exs +++ b/test/explorer/data_frame/grouped_test.exs @@ -578,11 +578,11 @@ defmodule Explorer.DataFrame.GroupedTest do a = ldf["a"] [ - b: Series.window_max(a, 2, weights: [1.0, 2.0]), + # b: Series.window_max(a, 2, weights: [1.0, 2.0]) c: Series.window_mean(a, 2, weights: [0.25, 0.75]), d: Series.window_median(a, 2, weights: [0.25, 0.75]), - e: Series.window_min(a, 2, weights: [1.0, 2.0]), - f: Series.window_sum(a, 2, weights: [1.0, 2.0]), + # e: Series.window_min(a, 2, weights: [1.0, 2.0]) + # f: Series.window_sum(a, 2, weights: [1.0, 2.0]) g: Series.window_standard_deviation(a, 2), p: Series.cumulative_max(a), q: Series.cumulative_min(a), diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 9f74b7aff..ec6648f16 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -299,7 +299,7 @@ defmodule Explorer.DataFrameTest do df = DF.new(a: [1, 2, 3, 4, 5, 6, 5], b: [9, 8, 7, 6, 5, 4, 3]) message = - "expecting the function to return a boolean LazySeries, but instead it returned a LazySeries of type {:f, 64}" + "expecting the function to return a boolean LazySeries, but instead it returned a LazySeries of type {:s, 64}" assert_raise ArgumentError, message, fn -> DF.filter_with(df, fn ldf -> @@ -811,7 +811,7 @@ defmodule Explorer.DataFrameTest do df = DF.new([%{a: ~s({"n": 1})}, %{a: ~s({"m": 1})}]) assert_raise RuntimeError, - "Polars Error: ComputeError(ErrString(\"error compiling JSONpath expression path error: \\nEof\\n\"))", + "Polars Error: error compiling JSON path expression path error: \nEof\n", fn -> DF.mutate(df, n: json_path_match(a, "$.")) end @@ -948,7 +948,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 0, 2], calc3: [2, 4, 8], calc4: [0.5, 1.0, 2.0], - calc5: [1.0, 4.0, 16.0], + calc5: [1, 4, 16], calc6: [0, 1, 2], calc7: [1, 0, 0], calc8: [:nan, :nan, :nan], @@ -964,7 +964,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64}, "calc8" => {:f, 64}, @@ -996,7 +996,7 @@ defmodule Explorer.DataFrameTest do calc2: [1, 0, -2], calc3: [2, 4, 8], calc4: [2.0, 1.0, 0.5], - calc5: [2.0, 4.0, 16.0], + calc5: [2, 4, 16], calc5_1: [2.0, 4.0, 16.0], calc6: [2, 1, 0], calc7: [0, 0, 2] @@ -1008,7 +1008,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc5_1" => {:f, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} @@ -1036,7 +1036,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 1, 2], calc3: [2, 2, 8], calc4: [0.5, 2.0, 2.0], - calc5: [1.0, 2.0, 16.0], + calc5: [1, 2, 16], calc6: [0, 2, 2], calc7: [1, 0, 0] } @@ -1047,7 +1047,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } @@ -1074,7 +1074,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 1, 2], calc3: [2, 2, 8], calc4: [0.5, 2.0, 2.0], - calc5: [1.0, 2.0, 16.0], + calc5: [1, 2, 16], calc6: [0, 2, 2], calc7: [1, 0, 0] } @@ -1085,7 +1085,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } diff --git a/test/explorer/polars_backend/expression_test.exs b/test/explorer/polars_backend/expression_test.exs index 1fafea9f8..354c31e58 100644 --- a/test/explorer/polars_backend/expression_test.exs +++ b/test/explorer/polars_backend/expression_test.exs @@ -19,7 +19,7 @@ defmodule Explorer.PolarsBackend.ExpressionTest do assert Expression.describe_filter_plan(df, expr) == String.trim(""" FILTER [(col("col_a")) == (5)] FROM - DF ["col_a", "col_b"]; PROJECT */2 COLUMNS; SELECTION: "None" + DF ["col_a", "col_b"]; PROJECT */2 COLUMNS; SELECTION: None """) end @@ -93,8 +93,8 @@ defmodule Explorer.PolarsBackend.ExpressionTest do assert Expression.describe_filter_plan(df, expr) == String.trim(""" - FILTER [(col("col_a")) == (col("col_b"))] FROM - DF ["col_a", "col_b"]; PROJECT */2 COLUMNS; SELECTION: "None" + FILTER [(col("col_a").cast(Float64)) == (col("col_b"))] FROM + DF ["col_a", "col_b"]; PROJECT */2 COLUMNS; SELECTION: None """) end end diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index 84a7bf446..97be777f4 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -2290,7 +2290,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(base, power) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, 4, 3] end end @@ -2315,8 +2315,8 @@ defmodule Explorer.SeriesTest do result = Series.pow(base, power) - assert result.dtype == {:f, 64} - assert Series.to_list(result) === [1.0, 4.0, 3.0] + assert result.dtype == {:s, 64} + assert Series.to_list(result) === [1, 4, 3] end end @@ -2392,7 +2392,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, s2) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, nil, 3] end @@ -2402,7 +2402,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, s2) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, nil, 3] end @@ -2412,7 +2412,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, s2) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, nil, 3] end @@ -2421,7 +2421,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, 2) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, 4, 9] end