Skip to content

Commit

Permalink
enh: use csv index only for now
Browse files Browse the repository at this point in the history
fix minor formatting errors
  • Loading branch information
vkt1414 committed Mar 22, 2024
1 parent 8b4ab7c commit 3d38017
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 16 deletions.
1 change: 1 addition & 0 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
id-token: write
runs-on: ubuntu-latest
if: github.event_name == 'release' && github.event.action == 'published'

steps:
- uses: actions/download-artifact@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.8", "3.12"]
runs-on: [ubuntu-latest, macos-latest, windows-latest]

#currently not working on pypi-3.10
Expand Down
13 changes: 2 additions & 11 deletions scripts/python/idc_index_data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@
class IDCIndexDataManager:
def __init__(self, project_id: str):
"""
Initializes the IDCIndexDataManager.
Initializes the IDCIndexDataManager using the Google Cloud Platform project ID.
Args:
project_id (str): The Google Cloud Platform project ID.
"""
self.project_id = project_id
self.client = bigquery.Client(project=project_id)
Expand All @@ -27,9 +25,6 @@ def execute_sql_query(self, file_path: str) -> tuple[pd.DataFrame, str]:
"""
Executes the SQL query in the specified file.
Args:
file_path (str): The path to the file containing the SQL query.
Returns:
Tuple[pd.DataFrame, str]: A tuple containing the DataFrame with query results,
the output basename.
Expand All @@ -53,10 +48,6 @@ def generate_index_data_files(
a DataFrame 'index_df'. The DataFrame is then saved as a compressed CSV
and/or a Parquet file, depending on the method arguments.
Args:
generate_compressed_csv (bool): If True, generates a zip compressed CSV file.
generate_parquet (bool): If True, generates a Parquet file.
"""

scripts_dir = Path(__file__).parent.parent
Expand All @@ -83,7 +74,7 @@ def generate_index_data_files(

def run(self) -> None:
"""
Runs the IDCIndexDataManager process.
Runs the IDCIndexDataManager to locally generate a index-data file (.czv.zip) by running queries against the Google Cloud Platform IDC project tables.
"""
self.generate_index_data_files(
generate_compressed_csv=True, generate_parquet=False
Expand Down
4 changes: 2 additions & 2 deletions src/idc_index_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
__all__ = [
"__version__",
"IDC_INDEX_CSV_ARCHIVE_FILEPATH",
"IDC_INDEX_PARQUET_FILEPATH",
# "IDC_INDEX_PARQUET_FILEPATH",
]


Expand All @@ -35,4 +35,4 @@ def _lookup(path: str) -> Path:


IDC_INDEX_CSV_ARCHIVE_FILEPATH: Path = _lookup("idc_index_data/idc_index.csv.zip")
IDC_INDEX_PARQUET_FILEPATH: Path = _lookup("idc_index_data/idc_index.parquet")
# IDC_INDEX_PARQUET_FILEPATH: Path = _lookup("idc_index_data/idc_index.parquet")
4 changes: 2 additions & 2 deletions tests/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ def test_version():
def test_filepath():
assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file()
assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.name == "idc_index.csv.zip"
#assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
#assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"
# assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
# assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"

0 comments on commit 3d38017

Please sign in to comment.