diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bb59985..5550679 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -54,6 +54,7 @@ repos: args: [] additional_dependencies: - pytest + - pandas-stubs - repo: https://github.com/codespell-project/codespell rev: "v2.2.6" diff --git a/pyproject.toml b/pyproject.toml index f1a4358..2ac0bdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build" [project] name = "idc-index-data" -version = "17.0.1" +version = "17.0.2" authors = [ { name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" }, { name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" }, @@ -44,6 +44,8 @@ dependencies = [] [project.optional-dependencies] test = [ + "pandas", + "pyarrow", "pytest >=6", "pytest-cov >=3", ] diff --git a/scripts/python/idc_index_data_manager.py b/scripts/python/idc_index_data_manager.py index 06cca81..d617db0 100644 --- a/scripts/python/idc_index_data_manager.py +++ b/scripts/python/idc_index_data_manager.py @@ -31,6 +31,8 @@ def execute_sql_query(self, file_path: str) -> tuple[pd.DataFrame, str]: with Path(file_path).open("r") as file: sql_query = file.read() index_df = self.client.query(sql_query).to_dataframe() + if "StudyDate" in index_df.columns: + index_df["StudyDate"] = index_df["StudyDate"].astype(str) output_basename = Path(file_path).name.split(".")[0] logger.debug("Executed SQL query from file: %s", file_path) return index_df, output_basename diff --git a/tests/test_package.py b/tests/test_package.py index d27a51d..f137692 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -2,6 +2,7 @@ import importlib.metadata +import pandas as pd from packaging.version import Version import idc_index_data as m @@ -25,3 +26,15 @@ def test_filepath(): if m.IDC_INDEX_PARQUET_FILEPATH is not None: assert m.IDC_INDEX_PARQUET_FILEPATH.is_file() assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet" + + +def test_reading_index(): + if m.IDC_INDEX_CSV_ARCHIVE_FILEPATH is not None: + assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file() + df_csv = pd.read_csv(m.IDC_INDEX_CSV_ARCHIVE_FILEPATH) + assert not df_csv.empty + + if m.IDC_INDEX_PARQUET_FILEPATH is not None: + assert m.IDC_INDEX_PARQUET_FILEPATH.is_file() + df_parquet = pd.read_parquet(m.IDC_INDEX_PARQUET_FILEPATH) + assert not df_parquet.empty