diff --git a/modin/conftest.py b/modin/conftest.py index 66500cae54a..66bfcf80cad 100644 --- a/modin/conftest.py +++ b/modin/conftest.py @@ -16,7 +16,6 @@ import os import platform -import shutil import subprocess import sys import time @@ -340,16 +339,15 @@ def fixture(tmp_path): @pytest.fixture -def make_parquet_file(): +def make_parquet_file(tmp_path): """Pytest fixture factory that makes a parquet file/dir for testing. Yields: Function that generates a parquet file/dir """ - filenames = [] def _make_parquet_file( - filename, + filename=None, nrows=NROWS, ncols=2, force=True, @@ -369,6 +367,8 @@ def _make_parquet_file( partitioned_columns: Create a partitioned directory using pandas. row_group_size: Maximum size of each row group. """ + if filename is None: + filename = get_unique_filename(extension=".parquet", data_dir=tmp_path) if force or not os.path.exists(filename): df = pandas.DataFrame( {f"col{x + 1}": np.arange(nrows) for x in range(ncols)} @@ -395,19 +395,11 @@ def _make_parquet_file( ) else: df.to_parquet(filename, row_group_size=row_group_size) - filenames.append(filename) + return filename # Return function that generates parquet files yield _make_parquet_file - # Delete parquet file that was created - for path in filenames: - if os.path.exists(path): - if os.path.isdir(path): - shutil.rmtree(path) - else: - os.remove(path) - @pytest.fixture def make_sql_connection(): diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py index 366d1256763..d38927628ac 100644 --- a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py +++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py @@ -18,7 +18,6 @@ import pandas import pyarrow import pytest -from pandas._testing import ensure_clean from pandas.core.dtypes.common import is_list_like from pyhdk import __version__ as hdk_version @@ -26,6 +25,7 @@ from modin.pandas.test.utils import ( create_test_dfs, default_to_pandas_ignore_string, + get_unique_filename, random_state, test_data, ) @@ -341,17 +341,17 @@ def test_read_csv_datetime( @pytest.mark.parametrize("engine", [None, "arrow"]) @pytest.mark.parametrize("parse_dates", [None, True, False]) - def test_read_csv_datetime_tz(self, engine, parse_dates): - with ensure_clean(".csv") as file: - with open(file, "w") as f: - f.write("test\n2023-01-01T00:00:00.000-07:00") + def test_read_csv_datetime_tz(self, engine, parse_dates, tmp_path): + unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path) + with open(unique_filename, "w") as f: + f.write("test\n2023-01-01T00:00:00.000-07:00") - eval_io( - fn_name="read_csv", - filepath_or_buffer=file, - md_extra_kwargs={"engine": engine}, - parse_dates=parse_dates, - ) + eval_io( + fn_name="read_csv", + filepath_or_buffer=unique_filename, + md_extra_kwargs={"engine": engine}, + parse_dates=parse_dates, + ) @pytest.mark.parametrize("engine", [None, "arrow"]) @pytest.mark.parametrize( @@ -399,26 +399,26 @@ def test_read_csv_col_handling( "c1.1,c1,c1.1,c1,c1.1,c1.2,c1.2,c2", ], ) - def test_read_csv_duplicate_cols(self, cols): + def test_read_csv_duplicate_cols(self, cols, tmp_path): def test(df, lib, **kwargs): data = f"{cols}\n" - with ensure_clean(".csv") as fname: - with open(fname, "w") as f: - f.write(data) - return lib.read_csv(fname) + unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path) + with open(unique_filename, "w") as f: + f.write(data) + return lib.read_csv(unique_filename) run_and_compare(test, data={}) - def test_read_csv_dtype_object(self): + def test_read_csv_dtype_object(self, tmp_path): with pytest.warns(UserWarning) as warns: - with ensure_clean(".csv") as file: - with open(file, "w") as f: - f.write("test\ntest") + unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path) + with open(unique_filename, "w") as f: + f.write("test\ntest") - def test(**kwargs): - return pd.read_csv(file, dtype={"test": "object"}) + def test(**kwargs): + return pd.read_csv(unique_filename, dtype={"test": "object"}) - run_and_compare(test, data={}) + run_and_compare(test, data={}) for warn in warns.list: assert not re.match(r".*defaulting to pandas.*", str(warn)) @@ -892,30 +892,30 @@ def concat(df1, df2, lib, **kwargs): @pytest.mark.parametrize("transform", [True, False]) @pytest.mark.parametrize("sort_last", [True, False]) # RecursionError in case of concatenation of big number of frames - def test_issue_5889(self, transform, sort_last): - with ensure_clean(".csv") as file: - data = {"a": [1, 2, 3], "b": [1, 2, 3]} if transform else {"a": [1, 2, 3]} - pandas.DataFrame(data).to_csv(file, index=False) + def test_issue_5889(self, transform, sort_last, tmp_path): + unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path) + data = {"a": [1, 2, 3], "b": [1, 2, 3]} if transform else {"a": [1, 2, 3]} + pandas.DataFrame(data).to_csv(unique_filename, index=False) - def test_concat(lib, **kwargs): - if transform: + def test_concat(lib, **kwargs): + if transform: - def read_csv(): - return lib.read_csv(file)["b"] + def read_csv(): + return lib.read_csv(unique_filename)["b"] - else: + else: - def read_csv(): - return lib.read_csv(file) + def read_csv(): + return lib.read_csv(unique_filename) - df = read_csv() - for _ in range(100): - df = lib.concat([df, read_csv()]) - if sort_last: - df = lib.concat([df, read_csv()], sort=True) - return df + df = read_csv() + for _ in range(100): + df = lib.concat([df, read_csv()]) + if sort_last: + df = lib.concat([df, read_csv()], sort=True) + return df - run_and_compare(test_concat, data={}) + run_and_compare(test_concat, data={}) class TestGroupby: diff --git a/modin/experimental/pandas/test/test_io_exp.py b/modin/experimental/pandas/test/test_io_exp.py index dfc11f7cdde..37bff8de06a 100644 --- a/modin/experimental/pandas/test/test_io_exp.py +++ b/modin/experimental/pandas/test/test_io_exp.py @@ -18,13 +18,13 @@ import numpy as np import pandas import pytest -from pandas._testing import ensure_clean import modin.experimental.pandas as pd -from modin.config import AsyncReadMode, Engine +from modin.config import Engine from modin.pandas.test.utils import ( df_equals, eval_general, + get_unique_filename, parse_dates_values_by_id, test_data, time_parsing_csv_path, @@ -359,7 +359,7 @@ def test_xml_glob(tmp_path, filename): reason=f"{Engine.get()} does not have experimental read_custom_text API", ) @pytest.mark.parametrize("set_async_read_mode", [False, True], indirect=True) -def test_read_custom_json_text(set_async_read_mode): +def test_read_custom_json_text(set_async_read_mode, tmp_path): def _generate_json(file_name, nrows, ncols): data = np.random.rand(nrows, ncols) df = pandas.DataFrame(data, columns=[f"col{x}" for x in range(ncols)]) @@ -378,25 +378,19 @@ def _custom_parser(io_input, **kwargs): result[key].append(obj[key]) return pandas.DataFrame(result).rename(columns={"col0": "testID"}) - with ensure_clean() as filename: - _generate_json(filename, 64, 8) + unique_filename = get_unique_filename(data_dir=tmp_path) + _generate_json(unique_filename, 64, 8) - df1 = pd.read_custom_text( - filename, - columns=["testID", "col1", "col3"], - custom_parser=_custom_parser, - is_quoting=False, - ) - df2 = pd.read_json(filename, lines=True)[["col0", "col1", "col3"]].rename( - columns={"col0": "testID"} - ) - if AsyncReadMode.get(): - # If read operations are asynchronous, then the dataframes - # check should be inside `ensure_clean` context - # because the file may be deleted before actual reading starts - df_equals(df1, df2) - if not AsyncReadMode.get(): - df_equals(df1, df2) + df1 = pd.read_custom_text( + unique_filename, + columns=["testID", "col1", "col3"], + custom_parser=_custom_parser, + is_quoting=False, + ) + df2 = pd.read_json(unique_filename, lines=True)[["col0", "col1", "col3"]].rename( + columns={"col0": "testID"} + ) + df_equals(df1, df2) @pytest.mark.skipif( @@ -404,7 +398,7 @@ def _custom_parser(io_input, **kwargs): reason=f"{Engine.get()} does not have experimental API", ) @pytest.mark.parametrize("set_async_read_mode", [False, True], indirect=True) -def test_read_evaluated_dict(set_async_read_mode): +def test_read_evaluated_dict(set_async_read_mode, tmp_path): def _generate_evaluated_dict(file_name, nrows, ncols): result = {} keys = [f"col{x}" for x in range(ncols)] @@ -434,23 +428,17 @@ def columns_callback(io_input, **kwargs): break return columns - with ensure_clean() as filename: - _generate_evaluated_dict(filename, 64, 8) + unique_filename = get_unique_filename(data_dir=tmp_path) + _generate_evaluated_dict(unique_filename, 64, 8) - df1 = pd.read_custom_text( - filename, - columns=["col1", "col2"], - custom_parser=_custom_parser, - ) - assert df1.shape == (64, 2) + df1 = pd.read_custom_text( + unique_filename, + columns=["col1", "col2"], + custom_parser=_custom_parser, + ) + assert df1.shape == (64, 2) - df2 = pd.read_custom_text( - filename, columns=columns_callback, custom_parser=_custom_parser - ) - if AsyncReadMode.get(): - # If read operations are asynchronous, then the dataframes - # check should be inside `ensure_clean` context - # because the file may be deleted before actual reading starts - df_equals(df1, df2) - if not AsyncReadMode.get(): - df_equals(df1, df2) + df2 = pd.read_custom_text( + unique_filename, columns=columns_callback, custom_parser=_custom_parser + ) + df_equals(df1, df2) diff --git a/modin/pandas/test/dataframe/test_indexing.py b/modin/pandas/test/dataframe/test_indexing.py index 027c968ba5c..3923dad512d 100644 --- a/modin/pandas/test/dataframe/test_indexing.py +++ b/modin/pandas/test/dataframe/test_indexing.py @@ -18,7 +18,6 @@ import numpy as np import pandas import pytest -from pandas._testing import ensure_clean import modin.pandas as pd from modin.config import MinPartitionSize, NPartitions, StorageFormat @@ -35,6 +34,7 @@ df_equals, eval_general, generate_multiindex, + get_unique_filename, int_arg_keys, int_arg_values, name_contains, @@ -2243,14 +2243,16 @@ def test___setitem__partitions_aligning(): df_equals(md_df, pd_df) -def test___setitem__with_mismatched_partitions(): - with ensure_clean(".csv") as fname: - np.savetxt(fname, np.random.randint(0, 100, size=(200_000, 99)), delimiter=",") - modin_df = pd.read_csv(fname) - pandas_df = pandas.read_csv(fname) - modin_df["new"] = pd.Series(list(range(len(modin_df)))) - pandas_df["new"] = pandas.Series(list(range(len(pandas_df)))) - df_equals(modin_df, pandas_df) +def test___setitem__with_mismatched_partitions(tmp_path): + unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path) + np.savetxt( + unique_filename, np.random.randint(0, 100, size=(200_000, 99)), delimiter="," + ) + modin_df = pd.read_csv(unique_filename) + pandas_df = pandas.read_csv(unique_filename) + modin_df["new"] = pd.Series(list(range(len(modin_df)))) + pandas_df["new"] = pandas.Series(list(range(len(pandas_df)))) + df_equals(modin_df, pandas_df) def test___setitem__mask(): diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py index be09c6e2f5f..6e30423c27c 100644 --- a/modin/pandas/test/test_io.py +++ b/modin/pandas/test/test_io.py @@ -31,7 +31,6 @@ import pytest import sqlalchemy as sa from packaging import version -from pandas._testing import ensure_clean from pandas.errors import ParserWarning from scipy import sparse @@ -493,16 +492,16 @@ def test_read_csv_parsing_3( nrows=nrows, ) - def test_read_csv_skipinitialspace(self): - with ensure_clean(".csv") as unique_filename: - str_initial_spaces = ( - "col1,col2,col3,col4\n" - + "five, six, seven, eight\n" - + " five, six, seven, eight\n" - + "five, six, seven, eight\n" - ) + def test_read_csv_skipinitialspace(self, tmp_path): + unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path) + str_initial_spaces = ( + "col1,col2,col3,col4\n" + + "five, six, seven, eight\n" + + " five, six, seven, eight\n" + + "five, six, seven, eight\n" + ) - eval_io_from_str(str_initial_spaces, unique_filename, skipinitialspace=True) + eval_io_from_str(str_initial_spaces, unique_filename, skipinitialspace=True) # NA and Missing Data Handling tests @pytest.mark.parametrize("na_values", ["custom_nan", "73"]) @@ -586,17 +585,17 @@ def test_read_csv_datetime( @pytest.mark.parametrize("date", ["2023-01-01 00:00:01.000000000", "2023"]) @pytest.mark.parametrize("dtype", [None, "str", {"id": "int64"}]) @pytest.mark.parametrize("parse_dates", [None, [], ["date"], [1]]) - def test_read_csv_dtype_parse_dates(self, date, dtype, parse_dates): - with ensure_clean(".csv") as filename: - with open(filename, "w") as file: - file.write(f"id,date\n1,{date}") - eval_io( - fn_name="read_csv", - # read_csv kwargs - filepath_or_buffer=filename, - dtype=dtype, - parse_dates=parse_dates, - ) + def test_read_csv_dtype_parse_dates(self, date, dtype, parse_dates, tmp_path): + unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path) + with open(unique_filename, "w") as file: + file.write(f"id,date\n1,{date}") + eval_io( + fn_name="read_csv", + # read_csv kwargs + filepath_or_buffer=unique_filename, + dtype=dtype, + parse_dates=parse_dates, + ) # Iteration tests @pytest.mark.parametrize("iterator", [True, False]) @@ -909,13 +908,12 @@ def test_read_csv_internal( # Issue related, specific or corner cases @pytest.mark.parametrize("nrows", [2, None]) - def test_read_csv_bad_quotes(self, nrows): + def test_read_csv_bad_quotes(self, nrows, tmp_path): csv_bad_quotes = ( '1, 2, 3, 4\none, two, three, four\nfive, "six", seven, "eight\n' ) - - with ensure_clean(".csv") as unique_filename: - eval_io_from_str(csv_bad_quotes, unique_filename, nrows=nrows) + unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path) + eval_io_from_str(csv_bad_quotes, unique_filename, nrows=nrows) def test_read_csv_categories(self): eval_io( @@ -1329,20 +1327,13 @@ def test_to_csv_with_index(self, tmp_path): eval_to_csv_file(tmp_path, modin_df, pandas_df, "csv") @pytest.mark.parametrize("set_async_read_mode", [False, True], indirect=True) - def test_read_csv_issue_5150(self, set_async_read_mode): - with ensure_clean(".csv") as unique_filename: - pandas_df = pandas.DataFrame(np.random.randint(0, 100, size=(2**6, 2**6))) - pandas_df.to_csv(unique_filename, index=False) - expected_pandas_df = pandas.read_csv(unique_filename, index_col=False) - modin_df = pd.read_csv(unique_filename, index_col=False) - actual_pandas_df = modin_df._to_pandas() - if AsyncReadMode.get(): - # If read operations are asynchronous, then the dataframes - # check should be inside `ensure_clean` context - # because the file may be deleted before actual reading starts - df_equals(expected_pandas_df, actual_pandas_df) - if not AsyncReadMode.get(): - df_equals(expected_pandas_df, actual_pandas_df) + def test_read_csv_issue_5150(self, set_async_read_mode, tmp_path): + unique_filename = get_unique_filename(extension="csv", data_dir=tmp_path) + pandas_df = pandas.DataFrame(np.random.randint(0, 100, size=(2**6, 2**6))) + pandas_df.to_csv(unique_filename, index=False) + expected_pandas_df = pandas.read_csv(unique_filename, index_col=False) + modin_actual_df = pd.read_csv(unique_filename, index_col=False) + df_equals(expected_pandas_df, modin_actual_df) @pytest.mark.parametrize("usecols", [None, [0, 1, 2, 3, 4]]) def test_read_csv_1930(self, usecols): @@ -1470,66 +1461,63 @@ def _test_read_parquet( "Skipping empty filters error case to avoid race condition - see #6460" ) - with ensure_clean(".parquet") as unique_filename: - unique_filename = path_type(unique_filename) - make_parquet_file( - filename=unique_filename, - row_group_size=row_group_size, - range_index_start=range_index_start, - range_index_step=range_index_step, - range_index_name=range_index_name, - ) + unique_filename = make_parquet_file( + row_group_size=row_group_size, + range_index_start=range_index_start, + range_index_step=range_index_step, + range_index_name=range_index_name, + ) + unique_filename = path_type(unique_filename) - eval_io( - fn_name="read_parquet", - # read_parquet kwargs - engine=engine, - path=unique_filename, - columns=columns, - filters=filters, - expected_exception=expected_exception, - ) + eval_io( + fn_name="read_parquet", + # read_parquet kwargs + engine=engine, + path=unique_filename, + columns=columns, + filters=filters, + expected_exception=expected_exception, + ) @pytest.mark.parametrize( "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"] ) def test_read_parquet_dtype_backend(self, engine, make_parquet_file, dtype_backend): - with ensure_clean(".parquet") as unique_filename: - make_parquet_file(filename=unique_filename, row_group_size=100) + unique_filename = make_parquet_file(row_group_size=100) - def comparator(df1, df2): - df_equals(df1, df2) - df_equals(df1.dtypes, df2.dtypes) - - expected_exception = None - if engine == "fastparquet": - expected_exception = ValueError( - "The 'dtype_backend' argument is not supported for the fastparquet engine" - ) + def comparator(df1, df2): + df_equals(df1, df2) + df_equals(df1.dtypes, df2.dtypes) - eval_io( - fn_name="read_parquet", - # read_parquet kwargs - engine=engine, - path=unique_filename, - dtype_backend=dtype_backend, - comparator=comparator, - expected_exception=expected_exception, + expected_exception = None + if engine == "fastparquet": + expected_exception = ValueError( + "The 'dtype_backend' argument is not supported for the fastparquet engine" ) + eval_io( + fn_name="read_parquet", + # read_parquet kwargs + engine=engine, + path=unique_filename, + dtype_backend=dtype_backend, + comparator=comparator, + expected_exception=expected_exception, + ) + # Tests issue #6778 - def test_read_parquet_no_extension(self, engine, make_parquet_file): - with ensure_clean(".parquet") as unique_filename: - # Remove the .parquet extension - no_ext_fname = unique_filename[: unique_filename.index(".parquet")] + def test_read_parquet_no_extension(self, engine, make_parquet_file, tmp_path): + unique_filename = get_unique_filename(extension="parquet", data_dir=tmp_path) + # Remove the .parquet extension + no_ext_fname = unique_filename[: unique_filename.index(".parquet")] - make_parquet_file(filename=no_ext_fname) - eval_io( - fn_name="read_parquet", - # read_parquet kwargs - engine=engine, - path=no_ext_fname, - ) + make_parquet_file(filename=no_ext_fname) + eval_io( + fn_name="read_parquet", + # read_parquet kwargs + engine=engine, + path=no_ext_fname, + ) @pytest.mark.parametrize( "filters", @@ -1591,14 +1579,12 @@ def test_read_parquet_range_index( def test_read_parquet_list_of_files_5698(self, engine, make_parquet_file): if engine == "fastparquet" and os.name == "nt": pytest.xfail(reason="https://github.com/pandas-dev/pandas/issues/51720") - with ensure_clean(".parquet") as f1, ensure_clean( - ".parquet" - ) as f2, ensure_clean(".parquet") as f3: - for f in [f1, f2, f3]: - make_parquet_file(filename=f) - eval_io(fn_name="read_parquet", path=[f1, f2, f3], engine=engine) - - def test_read_parquet_indexing_by_column(self, tmp_path, engine, make_parquet_file): + filenames = [None] * 3 + for i in range(3): + filenames[i] = make_parquet_file() + eval_io(fn_name="read_parquet", path=filenames, engine=engine) + + def test_read_parquet_indexing_by_column(self, engine, make_parquet_file): # Test indexing into a column of Modin with various parquet file row lengths. # Specifically, tests for https://github.com/modin-project/modin/issues/3527 # which fails when min_partition_size < nrows < min_partition_size * (num_partitions - 1) @@ -1606,8 +1592,7 @@ def test_read_parquet_indexing_by_column(self, tmp_path, engine, make_parquet_fi nrows = ( MinPartitionSize.get() + 1 ) # Use the minimal guaranteed failing value for nrows. - unique_filename = get_unique_filename(extension="parquet", data_dir=tmp_path) - make_parquet_file(filename=unique_filename, nrows=nrows) + unique_filename = make_parquet_file(nrows=nrows) parquet_df = pd.read_parquet(unique_filename, engine=engine) for col in parquet_df.columns: @@ -1826,7 +1811,6 @@ def test_read_parquet_directory_range_index_consistent_metadata( ) def test_read_parquet_partitioned_directory( self, - tmp_path, make_parquet_file, columns, filters, @@ -1834,9 +1818,7 @@ def test_read_parquet_partitioned_directory( range_index_step, engine, ): - unique_filename = get_unique_filename(extension=None, data_dir=tmp_path) - make_parquet_file( - filename=unique_filename, + unique_filename = make_parquet_file( partitioned_columns=["col1"], range_index_start=range_index_start, range_index_step=range_index_step, @@ -1870,7 +1852,7 @@ def test_read_parquet_partitioned_directory( ], ], ) - def test_read_parquet_pandas_index(self, engine, filters): + def test_read_parquet_pandas_index(self, engine, filters, tmp_path): if ( version.parse(pa.__version__) >= version.parse("12.0.0") and version.parse(pd.__version__) < version.parse("2.0.0") @@ -1923,26 +1905,28 @@ def test_read_parquet_pandas_index(self, engine, filters): ): continue - with ensure_clean(".parquet") as unique_filename: - pandas_df.set_index(col).to_parquet(unique_filename) - # read the same parquet using modin.pandas - eval_io( - "read_parquet", - # read_parquet kwargs - path=unique_filename, - engine=engine, - filters=filters, - ) - - with ensure_clean(".parquet") as unique_filename: - pandas_df.set_index(["idx", "A"]).to_parquet(unique_filename) - eval_io( - "read_parquet", - # read_parquet kwargs - path=unique_filename, - engine=engine, - filters=filters, - ) + unique_filename = get_unique_filename( + extension="parquet", data_dir=tmp_path + ) + pandas_df.set_index(col).to_parquet(unique_filename) + # read the same parquet using modin.pandas + eval_io( + "read_parquet", + # read_parquet kwargs + path=unique_filename, + engine=engine, + filters=filters, + ) + + unique_filename = get_unique_filename(extension="parquet", data_dir=tmp_path) + pandas_df.set_index(["idx", "A"]).to_parquet(unique_filename) + eval_io( + "read_parquet", + # read_parquet kwargs + path=unique_filename, + engine=engine, + filters=filters, + ) @pytest.mark.parametrize( "filters", @@ -2170,11 +2154,10 @@ def test_read_parquet_s3_with_column_partitioning( # TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this # commment once we turn all default to pandas messages into errors. def test_read_parquet_relative_to_user_home(make_parquet_file): - with ensure_clean(".parquet") as unique_filename: - make_parquet_file(filename=unique_filename) - _check_relative_io( - "read_parquet", unique_filename, "path", storage_default=("Hdk",) - ) + unique_filename = make_parquet_file() + _check_relative_io( + "read_parquet", unique_filename, "path", storage_default=("Hdk",) + ) @pytest.mark.filterwarnings(default_to_pandas_ignore_string) @@ -2567,24 +2550,24 @@ def test_HDFStore(self, tmp_path): df_equals(modin_df, pandas_df) assert isinstance(modin_store, pd.HDFStore) - with ensure_clean(".hdf5") as hdf_file: - with pd.HDFStore(hdf_file, mode="w") as store: - store.append("data/df1", pd.DataFrame(np.random.randn(5, 5))) - store.append("data/df2", pd.DataFrame(np.random.randn(4, 4))) + unique_filename = get_unique_filename(extension="hdf5", data_dir=tmp_path) + with pd.HDFStore(unique_filename, mode="w") as store: + store.append("data/df1", pd.DataFrame(np.random.randn(5, 5))) + store.append("data/df2", pd.DataFrame(np.random.randn(4, 4))) - modin_df = pd.read_hdf(hdf_file, key="data/df1", mode="r") - pandas_df = pandas.read_hdf(hdf_file, key="data/df1", mode="r") + modin_df = pd.read_hdf(unique_filename, key="data/df1", mode="r") + pandas_df = pandas.read_hdf(unique_filename, key="data/df1", mode="r") df_equals(modin_df, pandas_df) - def test_HDFStore_in_read_hdf(self): - with ensure_clean(".hdf") as filename: - dfin = pd.DataFrame(np.random.rand(8, 8)) - dfin.to_hdf(filename, "/key") + def test_HDFStore_in_read_hdf(self, tmp_path): + unique_filename = get_unique_filename(extension="hdf", data_dir=tmp_path) + dfin = pd.DataFrame(np.random.rand(8, 8)) + dfin.to_hdf(unique_filename, "/key") - with pd.HDFStore(filename) as h: - modin_df = pd.read_hdf(h, "/key") - with pandas.HDFStore(filename) as h: - pandas_df = pandas.read_hdf(h, "/key") + with pd.HDFStore(unique_filename) as h: + modin_df = pd.read_hdf(h, "/key") + with pandas.HDFStore(unique_filename) as h: + pandas_df = pandas.read_hdf(h, "/key") df_equals(modin_df, pandas_df)