Skip to content

Commit

Permalink
feat: Improve read_csv SQL table reading function defaults (better …
Browse files Browse the repository at this point in the history
…handle dates) (#16866)
  • Loading branch information
alexander-beedie authored Jun 11, 2024
1 parent 806feb5 commit d4462f7
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 6 deletions.
15 changes: 9 additions & 6 deletions crates/polars-sql/src/table_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,20 @@ impl PolarsTableFunctions {

#[cfg(feature = "csv")]
fn read_csv(&self, args: &[FunctionArg]) -> PolarsResult<(String, LazyFrame)> {
polars_ensure!(!args.is_empty(), SQLSyntax: "`read_csv` expected a path");
polars_ensure!(args.len() == 1, SQLSyntax: "`read_csv` expects a single file path; found {:?} arguments", args.len());

use polars_lazy::frame::LazyFileListReader;
let path = self.get_file_path_from_arg(&args[0])?;
let lf = LazyCsvReader::new(&path).finish()?;
let lf = LazyCsvReader::new(&path)
.with_try_parse_dates(true)
.with_missing_is_null(true)
.finish()?;
Ok((path, lf))
}

#[cfg(feature = "parquet")]
fn read_parquet(&self, args: &[FunctionArg]) -> PolarsResult<(String, LazyFrame)> {
polars_ensure!(!args.is_empty(), SQLSyntax: "`read_parquet` expected a path");
polars_ensure!(args.len() == 1, SQLSyntax: "`read_parquet` expects a single file path; found {:?} arguments", args.len());

let path = self.get_file_path_from_arg(&args[0])?;
let lf = LazyFrame::scan_parquet(&path, Default::default())?;
Expand All @@ -98,15 +101,15 @@ impl PolarsTableFunctions {

#[cfg(feature = "ipc")]
fn read_ipc(&self, args: &[FunctionArg]) -> PolarsResult<(String, LazyFrame)> {
polars_ensure!(!args.is_empty(), SQLSyntax: "`read_ipc` expected a path");
polars_ensure!(args.len() == 1, SQLSyntax: "`read_ipc` expects a single file path; found {:?} arguments", args.len());

let path = self.get_file_path_from_arg(&args[0])?;
let lf = LazyFrame::scan_ipc(&path, Default::default())?;
Ok((path, lf))
}
#[cfg(feature = "json")]
fn read_ndjson(&self, args: &[FunctionArg]) -> PolarsResult<(String, LazyFrame)> {
polars_ensure!(!args.is_empty(), SQLSyntax: "`read_json` expected a path");
polars_ensure!(args.len() == 1, SQLSyntax: "`read_ndjson` expects a single file path; found {:?} arguments", args.len());

use polars_lazy::frame::LazyFileListReader;
use polars_lazy::prelude::LazyJsonLineReader;
Expand All @@ -125,7 +128,7 @@ impl PolarsTableFunctions {
))) => Ok(s.to_string()),
_ => polars_bail!(
SQLSyntax:
"only a single quoted string is accepted for the parameter; found: {}", arg,
"expected a valid file path as a single-quoted string; found: {}", arg,
),
}
}
Expand Down
28 changes: 28 additions & 0 deletions py-polars/tests/unit/sql/test_miscellaneous.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from datetime import date
from pathlib import Path

import pytest
Expand Down Expand Up @@ -241,3 +242,30 @@ def test_values_clause_table_registration() -> None:
res2 = ctx.execute("SELECT x, y FROM tbl")
for res in (res1, res2):
assert res.to_dict(as_series=False) == {"x": [-1], "y": [1]}


def test_read_csv(tmp_path: Path) -> None:
# check empty string vs null, parsing of dates, etc
df = pl.DataFrame(
{
"label": ["lorem", None, "", "ipsum"],
"num": [-1, None, 0, 1],
"dt": [
date(1969, 7, 5),
date(1999, 12, 31),
date(2077, 10, 10),
None,
],
}
)
csv_target = tmp_path / "test_sql_read.csv"
df.write_csv(csv_target)

res = pl.sql(f"SELECT * FROM read_csv('{csv_target}')").collect()
assert_frame_equal(df, res)

with pytest.raises(
SQLSyntaxError,
match="`read_csv` expects a single file path; found 3 arguments",
):
pl.sql("SELECT * FROM read_csv('a','b','c')")

0 comments on commit d4462f7

Please sign in to comment.