Skip to content

Commit

Permalink
Add non-utf8 encoded test
Browse files Browse the repository at this point in the history
  • Loading branch information
delucchi-cmu committed Nov 14, 2024
1 parent e069f4b commit b3c1d38
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
6 changes: 2 additions & 4 deletions src/hats/io/file_io/file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def load_csv_to_pandas(file_pointer: str | Path | UPath, **kwargs) -> pd.DataFra


def load_csv_to_pandas_generator(
file_pointer: str | Path | UPath, *, chunksize=10_000, open_mode=None, compression=None, **kwargs
file_pointer: str | Path | UPath, *, chunksize=10_000, compression=None, **kwargs
) -> Generator[pd.DataFrame]:
"""Load a csv file to a pandas dataframe
Args:
Expand All @@ -119,9 +119,7 @@ def load_csv_to_pandas_generator(
pandas dataframe loaded from CSV
"""
file_pointer = get_upath(file_pointer)
if open_mode is None:
open_mode = "r" if compression is None else "rb"
with file_pointer.open(mode=open_mode, compression=compression, **kwargs) as csv_file:
with file_pointer.open(mode="rb", compression=compression, **kwargs) as csv_file:
with pd.read_csv(csv_file, chunksize=chunksize, **kwargs) as reader:
yield from reader

Expand Down
11 changes: 11 additions & 0 deletions tests/hats/io/file_io/test_file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,17 @@ def test_load_csv_to_pandas_generator(small_sky_source_dir):
assert num_reads == 2


def test_load_csv_to_pandas_generator_encoding(tmp_path):
path = tmp_path / "koi8-r.csv"
with path.open(encoding="koi8-r", mode="w") as fh:
fh.write("col1,col2\nыыы,яяя\n")
num_reads = 0
for frame in load_csv_to_pandas_generator(path, chunksize=7, encoding="koi8-r"):
assert len(frame) == 1
num_reads += 1
assert num_reads == 1


def test_write_df_to_csv(tmp_path):
random_df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list("ABCD"))
test_file_path = tmp_path / "test.csv"
Expand Down

0 comments on commit b3c1d38

Please sign in to comment.