Skip to content

Commit

Permalink
bug(fix): change date column to str prior to writing to parquet (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
vkt1414 committed Apr 9, 2024
1 parent a678877 commit 5278a96
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 1 deletion.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ repos:
args: []
additional_dependencies:
- pytest
- pandas-stubs

- repo: https://github.com/codespell-project/codespell
rev: "v2.2.6"
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"

[project]
name = "idc-index-data"
version = "17.0.1"
version = "17.0.2"
authors = [
{ name = "Andrey Fedorov", email = "[email protected]" },
{ name = "Vamsi Thiriveedhi", email = "[email protected]" },
Expand Down Expand Up @@ -44,6 +44,8 @@ dependencies = []

[project.optional-dependencies]
test = [
"pandas",
"pyarrow",
"pytest >=6",
"pytest-cov >=3",
]
Expand Down
2 changes: 2 additions & 0 deletions scripts/python/idc_index_data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def execute_sql_query(self, file_path: str) -> tuple[pd.DataFrame, str]:
with Path(file_path).open("r") as file:
sql_query = file.read()
index_df = self.client.query(sql_query).to_dataframe()
if "StudyDate" in index_df.columns:
index_df["StudyDate"] = index_df["StudyDate"].astype(str)
output_basename = Path(file_path).name.split(".")[0]
logger.debug("Executed SQL query from file: %s", file_path)
return index_df, output_basename
Expand Down
13 changes: 13 additions & 0 deletions tests/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import importlib.metadata

import pandas as pd
from packaging.version import Version

import idc_index_data as m
Expand All @@ -25,3 +26,15 @@ def test_filepath():
if m.IDC_INDEX_PARQUET_FILEPATH is not None:
assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"


def test_reading_index():
if m.IDC_INDEX_CSV_ARCHIVE_FILEPATH is not None:
assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file()
df_csv = pd.read_csv(m.IDC_INDEX_CSV_ARCHIVE_FILEPATH)
assert not df_csv.empty

if m.IDC_INDEX_PARQUET_FILEPATH is not None:
assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
df_parquet = pd.read_parquet(m.IDC_INDEX_PARQUET_FILEPATH)
assert not df_parquet.empty

0 comments on commit 5278a96

Please sign in to comment.