microbiomedata · hesspnnl · Mar 12, 2025 · Mar 11, 2025 · Mar 11, 2025 · Mar 11, 2025
diff --git a/.github/workflows/dev_tests.yml b/.github/workflows/dev_tests.yml
@@ -0,0 +1,48 @@
+name: dev tests
+
+on: 
+  workflow_dispatch:
+  schedule:
+    - cron: '0 0 * * *'
+
+
+permissions:
+  contents: write
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+            python-version: 3.12
+      - name: Set up environment variables
+        run: echo "Environment variables set up"
+        env:
+          CLIENT_ID: ${{ secrets.CLIENT_ID }}
+          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
+          ENV : "dev"
+      - name: Install dependencies
+        run: |
+            pip install -r requirements-dev.txt
+      - name: Run tests
+        env:
+          CLIENT_ID: ${{ secrets.CLIENT_ID }}
+          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
+          ENV : "dev"
+        run: pytest -r nmdc_api_utilities/test/
+      - name: Create Issue
+        if: failure() && github.event_name == 'schedule'
+        uses: actions/github-script@v6
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const { owner, repo } = context.repo;
+            await github.rest.issues.create({
+              owner,
+              repo,
+              title: "notebook_api_utilities production tests failed",
+              body: "The development tests have failed in the latest run. Please investigate the issue.",
+              labels: ["report", "automated issue"]
+            });
diff --git a/.github/workflows/prod_tests.yml b/.github/workflows/prod_tests.yml
@@ -0,0 +1,50 @@
+name: prod tests
+
+on:
+  push:
+  pull_request:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 0 * * *'
+
+permissions:
+  contents: write
+  issues: write
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+            python-version: 3.12
+      - name: Set up environment variables
+        run: echo "Environment variables set up"
+        env:
+          CLIENT_ID: ${{ secrets.CLIENT_ID }}
+          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
+          ENV : "prod"
+      - name: Install dependencies
+        run: |
+            pip install -r requirements-dev.txt
+      - name: Run tests
+        env:
+          CLIENT_ID: ${{ secrets.CLIENT_ID }}
+          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
+          ENV : "prod"
+        run: pytest -r nmdc_api_utilities/test/
+      - name: Create Issue
+        if: failure() && github.event.pull_request == null && github.event_name == 'schedule'
+        uses: actions/github-script@v6
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const { owner, repo } = context.repo;
+            await github.rest.issues.create({
+              owner,
+              repo,
+              title: "notebook_api_utilities production tests failed",
+              body: "The production tests have failed in the latest run. Please investigate the issue.",
+              labels: ["report", "automated issue"]
+            });
diff --git a/nmdc_api_utilities/biosample_search.py b/nmdc_api_utilities/biosample_search.py
@@ -11,5 +11,5 @@ class BiosampleSearch(LatLongFilters, CollectionSearch):
     Class to interact with the NMDC API to get biosamples.
     """
 
-    def __init__(self):
-        super().__init__("biosample_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="biosample_set", env=env)
diff --git a/nmdc_api_utilities/calibration_search.py b/nmdc_api_utilities/calibration_search.py
@@ -10,5 +10,5 @@ class CalibrationSearch(CollectionSearch):
     Class to interact with the NMDC API to get calibration records.
     """
 
-    def __init__(self):
-        super().__init__("calibration_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="calibration_set", env=env)
diff --git a/nmdc_api_utilities/chemical_entity_search.py b/nmdc_api_utilities/chemical_entity_search.py
@@ -10,5 +10,5 @@ class ChemicalEntitySearch(CollectionSearch):
     Class to interact with the NMDC API to get chemical entities.
     """
 
-    def __init__(self):
-        super().__init__("chemical_entity_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="chemical_entity_set", env=env)
diff --git a/nmdc_api_utilities/collecting_biosamples_from_site_search.py b/nmdc_api_utilities/collecting_biosamples_from_site_search.py
@@ -10,5 +10,5 @@ class CollectingBiosamplesFromSiteSearch(CollectionSearch):
     Class to interact with the NMDC API to get collecting biosamples from site sets.
     """
 
-    def __init__(self):
-        super().__init__("collecting_biosamples_from_site_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="collecting_biosamples_from_site_set", env=env)
diff --git a/nmdc_api_utilities/collection_helpers.py b/nmdc_api_utilities/collection_helpers.py
@@ -12,8 +12,8 @@ class CollectionHelpers(NMDCSearch):
     These functions may not be specific to a particular collection.
     """
 
-    def __init__(self):
-        super().__init__()
+    def __init__(self, env="prod"):
+        super().__init__(env=env)
 
     def get_record_name_from_id(self, doc_id: str):
         """

diff --git a/nmdc_api_utilities/collection_search.py b/nmdc_api_utilities/collection_search.py
@@ -13,9 +13,9 @@ class CollectionSearch(NMDCSearch):
     Class to interact with the NMDC API to get collections of data. Must know the collection name to query.
     """
 
-    def __init__(self, collection_name):
+    def __init__(self, collection_name, env="prod"):
         self.collection_name = collection_name
-        super().__init__()
+        super().__init__(env=env)
 
     def get_records(
         self,
@@ -166,57 +166,8 @@ def get_record_by_id(
             logging.debug(
                 f"API request response: {response.json()}\n API Status Code: {response.status_code}"
             )
-
-        results = response.json()["resources"]
-
+        results = response.json()
         return results
-
-    def get_record_data_object_by_type(
-        self,
-        data_object_type: str = "",
-        max_page_size: int = 100,
-        fields: str = "",
-        all_pages: bool = False,
-    ):
-        """
-        Get a collection of data from the NMDC API. Specific function to get a collection of data from the NMDC API, filtered by data object type.
-        params:
-            data_object_type: str
-                The data_object_type to filter by. Default is an empty string, which will return all data.
-            max_page_size: int
-                The maximum number of items to return per page. Default is 100.
-            fields: str
-                The fields to return. Default is all fields.
-            pages: bool
-                True to return all pages. False to return the first page. Default is False.
-        """
-        results = []
-        dp = DataProcessing()
-        # create the filter based on data object type
-        filter = f'{{"data_object_type":{{"$regex": "{data_object_type}"}}}}'
-        filter = urllib.parse.quote_plus(filter)
-        # if fields is empty, return all fields
-        if not fields:
-            fields = "id,name,description,alternative_identifiers,file_size_bytes,md5_checksum,data_object_type,url,type"
-        url = f"{self.base_url}/nmdcschema/data_object_set?filter={filter}&max_page_size={max_page_size}&projection={fields}"
-        # get the reponse
-        try:
-            response = requests.get(url)
-            response.raise_for_status()
-        except requests.exceptions.RequestException as e:
-            logger.error("API request failed", exc_info=True)
-            raise RuntimeError("Failed to get data object from NMDC API") from e
-        else:
-            logging.debug(
-                f"API request response: {response.json()}\n API Status Code: {response.status_code}"
-            )
-        results = response.json()["resources"]
-        # otherwise, get all pages
-        if all_pages:
-            results = self._get_all_pages(
-                response, "data_object_set", filter, max_page_size, fields
-            )["resources"]
-        return dp.convert_to_df(results)
 
     def check_ids_exist(self, ids: list) -> bool:
         """

diff --git a/nmdc_api_utilities/configuration_search.py b/nmdc_api_utilities/configuration_search.py
@@ -10,5 +10,5 @@ class ConfigurationSearch(CollectionSearch):
     Class to interact with the NMDC API to get cofiguration sets.
     """
 
-    def __init__(self):
-        super().__init__("configuration_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="configuration_set", env=env)
diff --git a/nmdc_api_utilities/data_generation_search.py b/nmdc_api_utilities/data_generation_search.py
@@ -10,5 +10,5 @@ class DataGenerationSearch(CollectionSearch):
     Class to interact with the NMDC API to get data generation sets.
     """
 
-    def __init__(self):
-        super().__init__("data_generation_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="data_generation_set", env=env)
diff --git a/nmdc_api_utilities/data_object_search.py b/nmdc_api_utilities/data_object_search.py
@@ -10,5 +10,5 @@ class DataObjectSearch(CollectionSearch):
     Class to interact with the NMDC API to get data object sets.
     """
 
-    def __init__(self):
-        super().__init__("data_object_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="data_object_set", env=env)
diff --git a/nmdc_api_utilities/field_research_site_search.py b/nmdc_api_utilities/field_research_site_search.py
@@ -4,13 +4,11 @@
 import logging
 
 logger = logging.getLogger(__name__)
-# TODO - what are these
-
 
 class FieldResearchSiteSearch(LatLongFilters, CollectionSearch):
     """
     Class to interact with the NMDC API to get field research site sets.
     """
 
-    def __init__(self):
-        super().__init__("field_research_site_set")
+    def __init__(self, env="prod"):
+        super().__init__("field_research_site_set", env=env)
diff --git a/nmdc_api_utilities/functional_annotation_agg_search.py b/nmdc_api_utilities/functional_annotation_agg_search.py
@@ -10,5 +10,5 @@ class FunctionalAnnotationAggSearch(FunctionalSearch):
     Class to interact with the NMDC API to get functional annotation agg sets. These are most helpful when trying identify workflows associted with a KEGG, COG, or PFAM ids.
     """
 
-    def __init__(self):
-        super().__init__()
+    def __init__(self, env="prod"):
+        super().__init__(env=env)
diff --git a/nmdc_api_utilities/functional_search.py b/nmdc_api_utilities/functional_search.py
@@ -8,8 +8,8 @@ class FunctionalSearch:
     Class to interact with the NMDC API to filter functional annotations by KEGG, COG, or PFAM ids.
     """
 
-    def __init__(self):
-        self.collectioninstance = CollectionSearch("functional_annotation_agg")
+    def __init__(self, env="prod"):
+        self.collectioninstance = CollectionSearch(collection_name="functional_annotation_agg", env=env)
 
     def get_functional_annotations(
         self,
@@ -70,4 +70,4 @@ def get_records(
             fields: str
                 The fields to return. Default is all fields.
         """
-        self.collectioninstance.get_records(filter, max_page_size, fields, all_pages)
+        return self.collectioninstance.get_records(filter, max_page_size, fields, all_pages)
diff --git a/nmdc_api_utilities/instrument_search.py b/nmdc_api_utilities/instrument_search.py
@@ -10,5 +10,6 @@ class InstrumentSearch(CollectionSearch):
     Class to interact with the NMDC API to get instrument sets.
     """
 
-    def __init__(self):
-        super().__init__("instrument_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="instrument_set", env=env)
+
diff --git a/nmdc_api_utilities/lat_long_filters.py b/nmdc_api_utilities/lat_long_filters.py
@@ -10,9 +10,9 @@ class LatLongFilters(CollectionSearch):
     Class to interact with the NMDC API to filter sets by latitude and longitude.
     """
 
-    def __init__(self, collection_name):
+    def __init__(self, collection_name, env="prod"):
         self.collection_name = collection_name
-        super().__init__(self.collection_name)
+        super().__init__(collection_name=self.collection_name, env=env)
 
     def get_record_by_latitude(
         self, comparison: str, latitude: float, page_size=25, fields="", all_pages=False

diff --git a/nmdc_api_utilities/manifest_search.py b/nmdc_api_utilities/manifest_search.py
@@ -10,5 +10,5 @@ class ManifestSearch(CollectionSearch):
     Class to interact with the NMDC API to get genome menifest sets.
     """
 
-    def __init__(self):
-        super().__init__("manifest_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="manifest_set", env=env)
diff --git a/nmdc_api_utilities/material_processing_search.py b/nmdc_api_utilities/material_processing_search.py
@@ -10,5 +10,5 @@ class MaterialProcessingSearch(CollectionSearch):
     Class to interact with the NMDC API to get material processing sets.
     """
 
-    def __init__(self):
-        super().__init__("material_processing_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="material_processing_set", env=env)
diff --git a/nmdc_api_utilities/metadata.py b/nmdc_api_utilities/metadata.py
@@ -9,8 +9,8 @@ class Metadata(NMDCSearch):
     """
     Class to interact with the NMDC API metadata. 
     """
-    def __init__(self):
-        super().__init__()
+    def __init__(self, env="prod"):
+        super().__init__(env=env)
 
     def validate_json(self, json_path) -> None:
         """

diff --git a/nmdc_api_utilities/minter.py b/nmdc_api_utilities/minter.py
@@ -10,8 +10,8 @@ class Minter(NMDCSearch):
     """
     Class to interact with the NMDC API to mint new identifiers.
     """
-    def __init__(self):
-        super().__init__()
+    def __init__(self, env="prod"):
+        super().__init__(env=env)
     def mint(self, nmdc_type:str, client_id: str, client_secret: str) -> str:
         """
         Mint a new identifier for a collection.

diff --git a/nmdc_api_utilities/nmdc_search.py b/nmdc_api_utilities/nmdc_search.py
@@ -5,5 +5,21 @@
 
 
 class NMDCSearch:
-    def __init__(self):
-        self.base_url = "https://api.microbiomedata.org"
+    """
+        Base class for interacting with the NMDC API. Sets the base URL for the API based on the environment.
+    """
+    def __init__(self, env="prod"):
+        """
+        Environment is defaulted to the production isntance of the API. This functionality is in place for monthly testing of the runtime updates to the API.
+        params 
+            env: str
+                The environment to use. Default is prod. Must be one of the following:
+                    prod
+                    dev
+        """
+        if env == "prod":
+            self.base_url = "https://api.microbiomedata.org"
+        elif env == "dev":
+            self.base_url = "https://api-dev.microbiomedata.org"
+        else:
+            raise ValueError("env must be one of the following: prod, dev")
diff --git a/nmdc_api_utilities/processed_sample_search.py b/nmdc_api_utilities/processed_sample_search.py
@@ -11,4 +11,4 @@ class ProcessedSampleSearch(CollectionSearch):
     """
 
     def __init__(self):
-        super().__init__("processed_sample_set")
+        super().__init__(collection_name="processed_sample_set", env="prod")
diff --git a/nmdc_api_utilities/protocol_execution_search.py b/nmdc_api_utilities/protocol_execution_search.py
@@ -10,5 +10,5 @@ class ProtocolExecutionSearch(CollectionSearch):
     Class to interact with the NMDC API to get protocol execution sets.
     """
 
-    def __init__(self):
-        super().__init__("protocol_execution_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="protocol_execution_set", env=env)
diff --git a/nmdc_api_utilities/storage_process_search.py b/nmdc_api_utilities/storage_process_search.py
@@ -10,5 +10,5 @@ class StorageProcessSearch(CollectionSearch):
     Class to interact with the NMDC API to get storage process sets.
     """
 
-    def __init__(self):
-        super().__init__("storage_process_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="storage_process_set", env=env)
diff --git a/nmdc_api_utilities/study_search.py b/nmdc_api_utilities/study_search.py
@@ -10,5 +10,5 @@ class StudySearch(CollectionSearch):
     Class to interact with the NMDC API to get studies.
     """
 
-    def __init__(self):
-        super().__init__("study_set")
+    def __init__(self, env="prod"):
+        super().__init__(collection_name="study_set", env=env)