Skip to content

Commit 3b8fa14

Browse files
committed
fixes
Signed-off-by: Anatoly Myachev <[email protected]>
1 parent ac8c9cb commit 3b8fa14

File tree

5 files changed

+165
-190
lines changed

5 files changed

+165
-190
lines changed

modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@
1818
import pandas
1919
import pyarrow
2020
import pytest
21-
from pandas._testing import ensure_clean
2221
from pandas.core.dtypes.common import is_list_like
2322
from pyhdk import __version__ as hdk_version
2423

2524
from modin.config import StorageFormat
2625
from modin.pandas.test.utils import (
2726
create_test_dfs,
2827
default_to_pandas_ignore_string,
28+
get_unique_filename,
2929
io_ops_bad_exc,
3030
random_state,
3131
test_data,
@@ -324,17 +324,17 @@ def test_read_csv_datetime(
324324

325325
@pytest.mark.parametrize("engine", [None, "arrow"])
326326
@pytest.mark.parametrize("parse_dates", [None, True, False])
327-
def test_read_csv_datetime_tz(self, engine, parse_dates):
328-
with ensure_clean(".csv") as file:
329-
with open(file, "w") as f:
330-
f.write("test\n2023-01-01T00:00:00.000-07:00")
327+
def test_read_csv_datetime_tz(self, engine, parse_dates, tmp_path):
328+
unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path)
329+
with open(unique_filename, "w") as f:
330+
f.write("test\n2023-01-01T00:00:00.000-07:00")
331331

332-
eval_io(
333-
fn_name="read_csv",
334-
filepath_or_buffer=file,
335-
md_extra_kwargs={"engine": engine},
336-
parse_dates=parse_dates,
337-
)
332+
eval_io(
333+
fn_name="read_csv",
334+
filepath_or_buffer=unique_filename,
335+
md_extra_kwargs={"engine": engine},
336+
parse_dates=parse_dates,
337+
)
338338

339339
@pytest.mark.parametrize("engine", [None, "arrow"])
340340
@pytest.mark.parametrize(
@@ -382,26 +382,26 @@ def test_read_csv_col_handling(
382382
"c1.1,c1,c1.1,c1,c1.1,c1.2,c1.2,c2",
383383
],
384384
)
385-
def test_read_csv_duplicate_cols(self, cols):
385+
def test_read_csv_duplicate_cols(self, cols, tmp_path):
386386
def test(df, lib, **kwargs):
387387
data = f"{cols}\n"
388-
with ensure_clean(".csv") as fname:
389-
with open(fname, "w") as f:
390-
f.write(data)
391-
return lib.read_csv(fname)
388+
unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path)
389+
with open(unique_filename, "w") as f:
390+
f.write(data)
391+
return lib.read_csv(unique_filename)
392392

393393
run_and_compare(test, data={})
394394

395-
def test_read_csv_dtype_object(self):
395+
def test_read_csv_dtype_object(self, tmp_path):
396396
with pytest.warns(UserWarning) as warns:
397-
with ensure_clean(".csv") as file:
398-
with open(file, "w") as f:
399-
f.write("test\ntest")
397+
unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path)
398+
with open(unique_filename, "w") as f:
399+
f.write("test\ntest")
400400

401-
def test(**kwargs):
402-
return pd.read_csv(file, dtype={"test": "object"})
401+
def test(**kwargs):
402+
return pd.read_csv(unique_filename, dtype={"test": "object"})
403403

404-
run_and_compare(test, data={})
404+
run_and_compare(test, data={})
405405
for warn in warns.list:
406406
assert not re.match(r".*defaulting to pandas.*", str(warn))
407407

@@ -870,30 +870,30 @@ def concat(df1, df2, lib, **kwargs):
870870
@pytest.mark.parametrize("transform", [True, False])
871871
@pytest.mark.parametrize("sort_last", [True, False])
872872
# RecursionError in case of concatenation of big number of frames
873-
def test_issue_5889(self, transform, sort_last):
874-
with ensure_clean(".csv") as file:
875-
data = {"a": [1, 2, 3], "b": [1, 2, 3]} if transform else {"a": [1, 2, 3]}
876-
pandas.DataFrame(data).to_csv(file, index=False)
873+
def test_issue_5889(self, transform, sort_last, tmp_path):
874+
unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path)
875+
data = {"a": [1, 2, 3], "b": [1, 2, 3]} if transform else {"a": [1, 2, 3]}
876+
pandas.DataFrame(data).to_csv(unique_filename, index=False)
877877

878-
def test_concat(lib, **kwargs):
879-
if transform:
878+
def test_concat(lib, **kwargs):
879+
if transform:
880880

881-
def read_csv():
882-
return lib.read_csv(file)["b"]
881+
def read_csv():
882+
return lib.read_csv(unique_filename)["b"]
883883

884-
else:
884+
else:
885885

886-
def read_csv():
887-
return lib.read_csv(file)
886+
def read_csv():
887+
return lib.read_csv(unique_filename)
888888

889-
df = read_csv()
890-
for _ in range(100):
891-
df = lib.concat([df, read_csv()])
892-
if sort_last:
893-
df = lib.concat([df, read_csv()], sort=True)
894-
return df
889+
df = read_csv()
890+
for _ in range(100):
891+
df = lib.concat([df, read_csv()])
892+
if sort_last:
893+
df = lib.concat([df, read_csv()], sort=True)
894+
return df
895895

896-
run_and_compare(test_concat, data={})
896+
run_and_compare(test_concat, data={})
897897

898898

899899
class TestGroupby:

modin/experimental/pandas/test/test_io_exp.py

Lines changed: 28 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@
1818
import numpy as np
1919
import pandas
2020
import pytest
21-
from pandas._testing import ensure_clean
2221

2322
import modin.experimental.pandas as pd
24-
from modin.config import AsyncReadMode, Engine
23+
from modin.config import Engine
2524
from modin.pandas.test.utils import (
2625
df_equals,
2726
eval_general,
27+
get_unique_filename,
2828
parse_dates_values_by_id,
2929
test_data,
3030
time_parsing_csv_path,
@@ -355,7 +355,7 @@ def test_xml_glob(tmp_path, filename):
355355
reason=f"{Engine.get()} does not have experimental read_custom_text API",
356356
)
357357
@pytest.mark.parametrize("set_async_read_mode", [False, True], indirect=True)
358-
def test_read_custom_json_text(set_async_read_mode):
358+
def test_read_custom_json_text(set_async_read_mode, tmp_path):
359359
def _generate_json(file_name, nrows, ncols):
360360
data = np.random.rand(nrows, ncols)
361361
df = pandas.DataFrame(data, columns=[f"col{x}" for x in range(ncols)])
@@ -374,33 +374,27 @@ def _custom_parser(io_input, **kwargs):
374374
result[key].append(obj[key])
375375
return pandas.DataFrame(result).rename(columns={"col0": "testID"})
376376

377-
with ensure_clean() as filename:
378-
_generate_json(filename, 64, 8)
377+
unique_filename = get_unique_filename(data_dir=tmp_path)
378+
_generate_json(unique_filename, 64, 8)
379379

380-
df1 = pd.read_custom_text(
381-
filename,
382-
columns=["testID", "col1", "col3"],
383-
custom_parser=_custom_parser,
384-
is_quoting=False,
385-
)
386-
df2 = pd.read_json(filename, lines=True)[["col0", "col1", "col3"]].rename(
387-
columns={"col0": "testID"}
388-
)
389-
if AsyncReadMode.get():
390-
# If read operations are asynchronous, then the dataframes
391-
# check should be inside `ensure_clean` context
392-
# because the file may be deleted before actual reading starts
393-
df_equals(df1, df2)
394-
if not AsyncReadMode.get():
395-
df_equals(df1, df2)
380+
df1 = pd.read_custom_text(
381+
unique_filename,
382+
columns=["testID", "col1", "col3"],
383+
custom_parser=_custom_parser,
384+
is_quoting=False,
385+
)
386+
df2 = pd.read_json(unique_filename, lines=True)[["col0", "col1", "col3"]].rename(
387+
columns={"col0": "testID"}
388+
)
389+
df_equals(df1, df2)
396390

397391

398392
@pytest.mark.skipif(
399393
Engine.get() not in ("Ray", "Unidist", "Dask"),
400394
reason=f"{Engine.get()} does not have experimental API",
401395
)
402396
@pytest.mark.parametrize("set_async_read_mode", [False, True], indirect=True)
403-
def test_read_evaluated_dict(set_async_read_mode):
397+
def test_read_evaluated_dict(set_async_read_mode, tmp_path):
404398
def _generate_evaluated_dict(file_name, nrows, ncols):
405399
result = {}
406400
keys = [f"col{x}" for x in range(ncols)]
@@ -430,23 +424,17 @@ def columns_callback(io_input, **kwargs):
430424
break
431425
return columns
432426

433-
with ensure_clean() as filename:
434-
_generate_evaluated_dict(filename, 64, 8)
427+
unique_filename = get_unique_filename(data_dir=tmp_path)
428+
_generate_evaluated_dict(unique_filename, 64, 8)
435429

436-
df1 = pd.read_custom_text(
437-
filename,
438-
columns=["col1", "col2"],
439-
custom_parser=_custom_parser,
440-
)
441-
assert df1.shape == (64, 2)
430+
df1 = pd.read_custom_text(
431+
unique_filename,
432+
columns=["col1", "col2"],
433+
custom_parser=_custom_parser,
434+
)
435+
assert df1.shape == (64, 2)
442436

443-
df2 = pd.read_custom_text(
444-
filename, columns=columns_callback, custom_parser=_custom_parser
445-
)
446-
if AsyncReadMode.get():
447-
# If read operations are asynchronous, then the dataframes
448-
# check should be inside `ensure_clean` context
449-
# because the file may be deleted before actual reading starts
450-
df_equals(df1, df2)
451-
if not AsyncReadMode.get():
452-
df_equals(df1, df2)
437+
df2 = pd.read_custom_text(
438+
unique_filename, columns=columns_callback, custom_parser=_custom_parser
439+
)
440+
df_equals(df1, df2)

modin/pandas/test/dataframe/test_indexing.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import numpy as np
1818
import pandas
1919
import pytest
20-
from pandas._testing import ensure_clean
2120
from pandas.testing import assert_index_equal
2221

2322
import modin.pandas as pd
@@ -35,6 +34,7 @@
3534
df_equals,
3635
eval_general,
3736
generate_multiindex,
37+
get_unique_filename,
3838
int_arg_keys,
3939
int_arg_values,
4040
name_contains,
@@ -2207,14 +2207,16 @@ def test___setitem__partitions_aligning():
22072207
df_equals(md_df, pd_df)
22082208

22092209

2210-
def test___setitem__with_mismatched_partitions():
2211-
with ensure_clean(".csv") as fname:
2212-
np.savetxt(fname, np.random.randint(0, 100, size=(200_000, 99)), delimiter=",")
2213-
modin_df = pd.read_csv(fname)
2214-
pandas_df = pandas.read_csv(fname)
2215-
modin_df["new"] = pd.Series(list(range(len(modin_df))))
2216-
pandas_df["new"] = pandas.Series(list(range(len(pandas_df))))
2217-
df_equals(modin_df, pandas_df)
2210+
def test___setitem__with_mismatched_partitions(tmp_path):
2211+
unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path)
2212+
np.savetxt(
2213+
unique_filename, np.random.randint(0, 100, size=(200_000, 99)), delimiter=","
2214+
)
2215+
modin_df = pd.read_csv(unique_filename)
2216+
pandas_df = pandas.read_csv(unique_filename)
2217+
modin_df["new"] = pd.Series(list(range(len(modin_df))))
2218+
pandas_df["new"] = pandas.Series(list(range(len(pandas_df))))
2219+
df_equals(modin_df, pandas_df)
22182220

22192221

22202222
def test___setitem__mask():

0 commit comments

Comments
 (0)