Skip to content

Commit

Permalink
Align types across all csv methods
Browse files Browse the repository at this point in the history
  • Loading branch information
svaningelgem committed Oct 7, 2023
1 parent 99f9ef2 commit b5404d5
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 34 deletions.
6 changes: 3 additions & 3 deletions crates/polars-plan/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ impl LogicalPlanBuilder {
low_memory: bool,
comment_char: Option<u8>,
quote_char: Option<u8>,
line_terminator: u8,
eol_char: u8,
null_values: Option<NullValues>,
infer_schema_length: Option<usize>,
rechunk: bool,
Expand Down Expand Up @@ -322,7 +322,7 @@ impl LogicalPlanBuilder {
skip_rows_after_header,
comment_char,
quote_char,
line_terminator,
eol_char,
null_values.as_ref(),
try_parse_dates,
raise_if_empty,
Expand Down Expand Up @@ -374,7 +374,7 @@ impl LogicalPlanBuilder {
low_memory,
comment_char,
quote_char,
eol_char: line_terminator,
eol_char,
null_values,
encoding,
try_parse_dates,
Expand Down
10 changes: 5 additions & 5 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ def _read_csv(
columns: Sequence[int] | Sequence[str] | None = None,
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: None | (SchemaDict | Sequence[PolarsDataType]) = None,
schema: None | SchemaDict = None,
Expand Down Expand Up @@ -776,7 +776,7 @@ def _read_csv(
n_rows,
skip_rows,
projection,
delimiter_char,
ord(delimiter_char),
rechunk,
columns,
encoding,
Expand All @@ -785,15 +785,15 @@ def _read_csv(
dtype_list,
dtype_slice,
low_memory,
comment_char,
quote_char,
ord(comment_char) if comment_char else None,
ord(quote_char) if quote_char else None,
processed_null_values,
missing_utf8_is_empty_string,
try_parse_dates,
skip_rows_after_header,
_prepare_row_count_args(row_count_name, row_count_offset),
sample_size=sample_size,
eol_char=eol_char,
eol_char=ord(eol_char),
raise_if_empty=raise_if_empty,
truncate_ragged_lines=truncate_ragged_lines,
schema=schema,
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/io/csv/batched_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(
columns: Sequence[int] | Sequence[str] | None = None,
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: None | (SchemaDict | Sequence[PolarsDataType]) = None,
null_values: str | Sequence[str] | dict[str, str] | None = None,
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/io/csv/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def read_csv(
new_columns: Sequence[str] | None = None,
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: Mapping[str, PolarsDataType] | Sequence[PolarsDataType] | None = None,
schema: SchemaDict | None = None,
Expand Down Expand Up @@ -406,7 +406,7 @@ def read_csv_batched(
new_columns: Sequence[str] | None = None,
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: Mapping[str, PolarsDataType] | Sequence[PolarsDataType] | None = None,
null_values: str | Sequence[str] | dict[str, str] | None = None,
Expand Down Expand Up @@ -700,7 +700,7 @@ def scan_csv(
has_header: bool = True,
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: SchemaDict | Sequence[PolarsDataType] | None = None,
schema: SchemaDict | None = None,
Expand Down
10 changes: 5 additions & 5 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def _scan_csv(
has_header: bool = True,
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: SchemaDict | None = None,
schema: SchemaDict | None = None,
Expand Down Expand Up @@ -359,16 +359,16 @@ def _scan_csv(
self = cls.__new__(cls)
self._ldf = PyLazyFrame.new_from_csv(
source,
delimiter_char,
ord(delimiter_char),
has_header,
ignore_errors,
skip_rows,
n_rows,
cache,
dtype_list,
low_memory,
comment_char,
quote_char,
ord(comment_char) if comment_char else None,
ord(quote_char) if quote_char else None,
processed_null_values,
missing_utf8_is_empty_string,
infer_schema_length,
Expand All @@ -378,7 +378,7 @@ def _scan_csv(
encoding,
_prepare_row_count_args(row_count_name, row_count_offset),
try_parse_dates,
eol_char=eol_char,
eol_char=ord(eol_char),
raise_if_empty=raise_if_empty,
truncate_ragged_lines=truncate_ragged_lines,
schema=schema,
Expand Down
13 changes: 5 additions & 8 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ impl PyDataFrame {
n_rows: Option<usize>,
skip_rows: usize,
projection: Option<Vec<usize>>,
delimiter_char: &str,
delimiter_char: u8,
rechunk: bool,
columns: Option<Vec<String>>,
encoding: Wrap<CsvEncoding>,
Expand All @@ -188,24 +188,21 @@ impl PyDataFrame {
overwrite_dtype: Option<Vec<(&str, Wrap<DataType>)>>,
overwrite_dtype_slice: Option<Vec<Wrap<DataType>>>,
low_memory: bool,
comment_char: Option<&str>,
quote_char: Option<&str>,
comment_char: Option<u8>,
quote_char: Option<u8>,
null_values: Option<Wrap<NullValues>>,
missing_utf8_is_empty_string: bool,
try_parse_dates: bool,
skip_rows_after_header: usize,
row_count: Option<(String, IdxSize)>,
sample_size: usize,
eol_char: &str,
eol_char: u8,
raise_if_empty: bool,
truncate_ragged_lines: bool,
schema: Option<Wrap<Schema>>,
) -> PyResult<Self> {
let null_values = null_values.map(|w| w.0);
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
let eol_char = eol_char.as_bytes()[0];
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
let quote_char = quote_char.and_then(|s| s.as_bytes().first().copied());

let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
overwrite_dtype
Expand All @@ -229,7 +226,7 @@ impl PyDataFrame {
.infer_schema(infer_schema_length)
.has_header(has_header)
.with_n_rows(n_rows)
.with_delimiter(delimiter_char.as_bytes()[0])
.with_delimiter(delimiter_char)
.with_skip_rows(skip_rows)
.with_ignore_errors(ignore_errors)
.with_projection(projection)
Expand Down
14 changes: 5 additions & 9 deletions py-polars/src/lazyframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,16 +146,16 @@ impl PyLazyFrame {
)]
fn new_from_csv(
path: String,
delimiter_char: &str,
delimiter_char: u8,
has_header: bool,
ignore_errors: bool,
skip_rows: usize,
n_rows: Option<usize>,
cache: bool,
overwrite_dtype: Option<Vec<(&str, Wrap<DataType>)>>,
low_memory: bool,
comment_char: Option<&str>,
quote_char: Option<&str>,
comment_char: Option<u8>,
quote_char: Option<u8>,
null_values: Option<Wrap<NullValues>>,
missing_utf8_is_empty_string: bool,
infer_schema_length: Option<usize>,
Expand All @@ -165,16 +165,12 @@ impl PyLazyFrame {
encoding: Wrap<CsvEncoding>,
row_count: Option<(String, IdxSize)>,
try_parse_dates: bool,
eol_char: &str,
eol_char: u8,
raise_if_empty: bool,
truncate_ragged_lines: bool,
schema: Option<Wrap<Schema>>,
) -> PyResult<Self> {
let null_values = null_values.map(|w| w.0);
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
let delimiter = delimiter_char.as_bytes()[0];
let eol_char = eol_char.as_bytes()[0];
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });

let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
Expand All @@ -185,7 +181,7 @@ impl PyLazyFrame {
});
let mut r = LazyCsvReader::new(path)
.with_infer_schema_length(infer_schema_length)
.with_delimiter(delimiter)
.with_delimiter(delimiter_char)
.has_header(has_header)
.with_ignore_errors(ignore_errors)
.with_skip_rows(skip_rows)
Expand Down

0 comments on commit b5404d5

Please sign in to comment.