Skip to content

Commit

Permalink
Working version 0.41.3
Browse files Browse the repository at this point in the history
  • Loading branch information
lkarthee committed Jul 14, 2024
1 parent c0b9763 commit 66ee582
Show file tree
Hide file tree
Showing 16 changed files with 119 additions and 116 deletions.
2 changes: 1 addition & 1 deletion lib/explorer/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2778,7 +2778,7 @@ defmodule Explorer.DataFrame do
#Explorer.DataFrame<
Polars[3 x 2]
a string ["a", "b", "c"]
b f64 [1.0, 4.0, 9.0]
b s64 [1, 4, 9]
>
It's possible to "reuse" a variable for different computations:
Expand Down
7 changes: 6 additions & 1 deletion lib/explorer/polars_backend/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,11 @@ defmodule Explorer.PolarsBackend.DataFrame do

{columns, with_projection} = column_names_or_projection(columns)

dtypes_list =
if not Enum.empty?(dtypes) do
Map.to_list(dtypes)
end

df =
Native.df_load_csv(
contents,
Expand All @@ -207,7 +212,7 @@ defmodule Explorer.PolarsBackend.DataFrame do
delimiter,
true,
columns,
Map.to_list(dtypes),
dtypes_list,
encoding,
nil_values,
parse_dates,
Expand Down
2 changes: 1 addition & 1 deletion lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3616,7 +3616,7 @@ defmodule Explorer.Series do
iex> Explorer.Series.pow(s, 3)
#Explorer.Series<
Polars[3]
f64 [8.0, 64.0, 216.0]
s64 [8, 64, 216]
>
iex> s = [2, 4, 6] |> Explorer.Series.from_list()
Expand Down
80 changes: 37 additions & 43 deletions native/explorer/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions native/explorer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ object_store = { version = "0.9", default-features = false, optional = true }
mimalloc = { version = "*", default-features = false }

[dependencies.polars]
version = "0.41"
version = "0.41.3"
default-features = false
features = [
"abs",
Expand Down Expand Up @@ -87,7 +87,7 @@ features = [
]

[dependencies.polars-ops]
version = "0.41"
version = "0.41.3"
features = ["abs", "ewma", "cum_agg", "cov"]

[features]
Expand Down
2 changes: 1 addition & 1 deletion native/explorer/rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[toolchain]
channel = "nightly-2024-06-23"
channel = "nightly-2024-06-24"
components = ["rustfmt", "clippy"]
profile = "minimal"
1 change: 0 additions & 1 deletion native/explorer/src/cloud_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ mod tests {

use polars::df;
use polars::prelude::DataFrame;
use polars::prelude::NamedFrom;

fn example_dataframe() -> DataFrame {
df!(
Expand Down
6 changes: 3 additions & 3 deletions native/explorer/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ pub fn df_sort_by(
.with_maintain_order(maintain_order)
.with_multithreaded(multithreaded)
.with_nulls_last(nulls_last)
.with_order_descendings(reverse);
.with_order_descending_multi(reverse);

let new_df = if groups.is_empty() {
// Note: we cannot use either df.sort or df.sort_with_options.
Expand Down Expand Up @@ -314,7 +314,7 @@ pub fn df_sort_with(
.with_maintain_order(maintain_order)
// .with_multithreaded(multithreaded)
.with_nulls_last(nulls_last)
.with_order_descendings(directions);
.with_order_descending_multi(directions);

let new_df = if groups.is_empty() {
df.lazy().sort_by_exprs(exprs, sort_options).collect()?
Expand Down Expand Up @@ -428,7 +428,7 @@ pub fn df_pivot_wider(
let mut new_df = pivot_stable(
&df,
&temp_id_names,
[pivot_column],
Some([pivot_column]),
Some(values_column),
false,
Some(PivotAgg::First),
Expand Down
19 changes: 12 additions & 7 deletions native/explorer/src/dataframe/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ pub fn df_from_csv(
.with_skip_rows_after_header(skip_rows_after_header)
.with_projection(projection.map(Arc::new))
.with_rechunk(do_rechunk)
.with_columns(column_names.map(Arc::new))
.with_columns(column_names.map(Arc::from))
.with_schema_overwrite(Some(schema_from_dtypes_pairs(dtypes)?))
.with_parse_options(
CsvParseOptions::default()
Expand Down Expand Up @@ -146,7 +146,7 @@ pub fn df_load_csv(
delimiter_as_byte: u8,
do_rechunk: bool,
column_names: Option<Vec<String>>,
dtypes: Vec<(&str, ExSeriesDtype)>,
dtypes: Option<Vec<(&str, ExSeriesDtype)>>,
encoding: &str,
null_vals: Vec<String>,
parse_dates: bool,
Expand All @@ -159,16 +159,21 @@ pub fn df_load_csv(

let cursor = Cursor::new(binary.as_slice());

let dataframe = CsvReadOptions::default()
let read_options = match dtypes {
Some(val) => CsvReadOptions::default().with_schema(Some(schema_from_dtypes_pairs(val)?)),
None => CsvReadOptions::default(),
};

let dataframe = read_options
.with_has_header(has_header)
.with_infer_schema_length(infer_schema_length)
.with_n_rows(stop_after_n_rows)
.with_columns(column_names.map(Arc::new))
.with_columns(column_names.map(Arc::from))
.with_skip_rows(skip_rows)
.with_skip_rows_after_header(skip_rows_after_header)
.with_projection(projection.map(Arc::new))
.with_rechunk(do_rechunk)
.with_schema(Some(schema_from_dtypes_pairs(dtypes)?))
//.with_schema(Some(schema_from_dtypes_pairs(dtypes)?))
.with_parse_options(
CsvParseOptions::default()
.with_separator(delimiter_as_byte)
Expand Down Expand Up @@ -519,7 +524,7 @@ pub fn df_from_ndjson(
let reader = JsonReader::new(buf_reader)
.with_json_format(JsonFormat::JsonLines)
.with_batch_size(batch_size)
.infer_schema_len(infer_schema_length);
.infer_schema_len(infer_schema_length.and_then(NonZeroUsize::new));

Ok(ExDataFrame::new(reader.finish()?))
}
Expand Down Expand Up @@ -576,7 +581,7 @@ pub fn df_load_ndjson(
let reader = JsonReader::new(cursor)
.with_json_format(JsonFormat::JsonLines)
.with_batch_size(batch_size)
.infer_schema_len(infer_schema_length);
.infer_schema_len(infer_schema_length.and_then(NonZeroUsize::new));

Ok(ExDataFrame::new(reader.finish()?))
}
Expand Down
Loading

0 comments on commit 66ee582

Please sign in to comment.