enh: use csv index only for now

vkt1414 · vkt1414 · commit 3d38017ef779 · 2024-03-22T18:19:59.000Z
fix minor formatting errors
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -40,6 +40,7 @@ jobs:
       id-token: write
     runs-on: ubuntu-latest
     if: github.event_name == 'release' && github.event.action == 'published'
+
     steps:
       - uses: actions/download-artifact@v4
         with:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -50,7 +50,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.8", "3.12"]
         runs-on: [ubuntu-latest, macos-latest, windows-latest]
 
         #currently not working on pypi-3.10
diff --git a/scripts/python/idc_index_data_manager.py b/scripts/python/idc_index_data_manager.py
@@ -14,10 +14,8 @@
 class IDCIndexDataManager:
     def __init__(self, project_id: str):
         """
-        Initializes the IDCIndexDataManager.
+        Initializes the IDCIndexDataManager using the Google Cloud Platform project ID.
 
-        Args:
-            project_id (str): The Google Cloud Platform project ID.
         """
         self.project_id = project_id
         self.client = bigquery.Client(project=project_id)
@@ -27,9 +25,6 @@ def execute_sql_query(self, file_path: str) -> tuple[pd.DataFrame, str]:
         """
         Executes the SQL query in the specified file.
 
-        Args:
-            file_path (str): The path to the file containing the SQL query.
-
         Returns:
             Tuple[pd.DataFrame, str]: A tuple containing the DataFrame with query results,
             the output basename.
@@ -53,10 +48,6 @@ def generate_index_data_files(
         a DataFrame 'index_df'. The DataFrame is then saved as a compressed CSV
         and/or a Parquet file, depending on the method arguments.
 
-        Args:
-            generate_compressed_csv (bool): If True, generates a zip compressed CSV file.
-            generate_parquet (bool): If True, generates a Parquet file.
-
         """
 
         scripts_dir = Path(__file__).parent.parent
@@ -83,7 +74,7 @@ def generate_index_data_files(
 
     def run(self) -> None:
         """
-        Runs the IDCIndexDataManager process.
+        Runs the IDCIndexDataManager to locally generate a index-data file (.czv.zip) by running queries against the Google Cloud Platform IDC project tables.
         """
         self.generate_index_data_files(
             generate_compressed_csv=True, generate_parquet=False
diff --git a/src/idc_index_data/__init__.py b/src/idc_index_data/__init__.py
@@ -19,7 +19,7 @@
 __all__ = [
     "__version__",
     "IDC_INDEX_CSV_ARCHIVE_FILEPATH",
-    "IDC_INDEX_PARQUET_FILEPATH",
+    # "IDC_INDEX_PARQUET_FILEPATH",
 ]
 
 
@@ -35,4 +35,4 @@ def _lookup(path: str) -> Path:
 
 
 IDC_INDEX_CSV_ARCHIVE_FILEPATH: Path = _lookup("idc_index_data/idc_index.csv.zip")
-IDC_INDEX_PARQUET_FILEPATH: Path = _lookup("idc_index_data/idc_index.parquet")
+# IDC_INDEX_PARQUET_FILEPATH: Path = _lookup("idc_index_data/idc_index.parquet")
diff --git a/tests/test_package.py b/tests/test_package.py
@@ -12,5 +12,5 @@ def test_version():
 def test_filepath():
     assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file()
     assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.name == "idc_index.csv.zip"
-    #assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
-    #assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"
+    # assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
+    # assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"