Merge pull request #83 from pepkit/dev_get_records

Dev get records
pepkit · Sep 27, 2023 · 7de7b5e · 7de7b5e
2 parents 9b6aae1 + f02678f
commit 7de7b5e
Show file tree

Hide file tree

Showing 6 changed files with 106 additions and 111 deletions.
diff --git a/pipestat/backends/abstract.py b/pipestat/backends/abstract.py
@@ -82,8 +82,9 @@ def count_records(self):
 
     def get_records(
         self,
-        pipeline_type: Optional[str] = None,
-    ):
+        limit: Optional[int] = 1000,
+        offset: Optional[int] = 0,
+    ) -> Optional[dict]:
         _LOGGER.warning("Not implemented yet for this backend")
         pass
 

diff --git a/pipestat/backends/dbbackend.py b/pipestat/backends/dbbackend.py
@@ -134,41 +134,39 @@ def get_one_record(
 
     def get_records(
         self,
-        pipeline_type: Optional[str] = None,
-    ) -> Optional[list]:
-        """Returns list of sample names and pipeline type as a list of tuples that have been reported, regardless of sample or project level"""
-        all_samples_list = []
-
-        pipeline_type = pipeline_type or self.pipeline_type
-
-        # TODO this should be simplified with the separation of sample and project managers.
-        if pipeline_type is not None:
-            mod = self.get_model(table_name=self.table_name)
-            with self.session as s:
-                sample_list = []
-                stmt = sql_select(mod)
-                records = s.exec(stmt).all()
-                for i in records:
-                    pair = (i.record_identifier, pipeline_type)
-                    sample_list.append(pair)
-
-            return sample_list
-        else:
-            pipelines = ["sample", "project"]
-            for i in pipelines:
-                pipeline_type = i
-                table_name = self.table_name
-                mod = self.get_model(table_name=table_name)
-                with self.session as s:
-                    sample_list = []
-                    stmt = sql_select(mod)
-                    records = s.exec(stmt).all()
-                    for i in records:
-                        pair = (i.record_identifier, pipeline_type)
-                        sample_list.append(pair)
-
-                all_samples_list += sample_list
-            return all_samples_list
+        limit: Optional[int] = 1000,
+        offset: Optional[int] = 0,
+    ) -> Optional[dict]:
+        """Returns list of records
+
+        :param int limit: limit number of records to this amount
+        :param int offset: offset records by this amount
+        :return dict records_dict: dictionary of records
+        {
+          "count": x,
+          "limit": l,
+          "offset": o,
+          "records": [...]
+        }
+        """
+
+        mod = self.get_model(table_name=self.table_name)
+
+        with self.session as s:
+            sample_list = []
+            stmt = sql_select(mod).offset(offset).limit(limit)
+            records = s.exec(stmt).all()
+            for i in records:
+                sample_list.append(i.record_identifier)
+
+        records_dict = {
+            "count": len(records),
+            "limit": limit,
+            "offset": offset,
+            "records": sample_list,
+        }
+
+        return records_dict
 
     def get_status(self, record_identifier: str) -> Optional[str]:
         """

diff --git a/pipestat/backends/filebackend.py b/pipestat/backends/filebackend.py
@@ -142,27 +142,34 @@ def get_flag_file(self, record_identifier: str = None) -> Union[str, List[str],
 
     def get_records(
         self,
-        pipeline_type: Optional[str] = None,
-    ) -> Optional[list]:
-        """Returns list of sample names and pipeline type as a list of tuples that have been reported, regardless of sample or project level"""
-        all_samples_list = []
-        pipeline_type = pipeline_type or self.pipeline_type
-
-        if pipeline_type is not None:
-            for k in list(self._data.data[self.pipeline_name][pipeline_type].keys()):
-                pair = (k, pipeline_type)
-                all_samples_list.append(pair)
-            return all_samples_list
-
-        else:
-            keys = self._data.data[self.pipeline_name].keys()
-        for k in keys:
-            sample_list = []
-            for i in list(self._data.data[self.pipeline_name][k].keys()):
-                pair = (i, k)
-                sample_list.append(pair)
-            all_samples_list += sample_list
-        return all_samples_list
+        limit: Optional[int] = 1000,
+        offset: Optional[int] = 0,
+    ) -> Optional[dict]:
+        """Returns list of records
+        :param int limit: limit number of records to this amount
+        :param int offset: offset records by this amount
+        :return dict records_dict: dictionary of records
+        {
+          "count": x,
+          "limit": l,
+          "offset": o,
+          "records": [...]
+        }
+        """
+        record_list = []
+        for k in list(self._data.data[self.pipeline_name][self.pipeline_type].keys())[
+            offset : offset + limit
+        ]:
+            record_list.append(k)
+
+        records_dict = {
+            "count": len(record_list),
+            "limit": limit,
+            "offset": offset,
+            "records": record_list,
+        }
+
+        return records_dict
 
     def get_status(self, record_identifier: str) -> Optional[str]:
         """

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
@@ -278,10 +278,17 @@ def count_records(self) -> int:
     @require_backend
     def get_records(
         self,
-    ) -> Optional[list]:
-        """Returns list of sample names and pipeline type as a list of tuples that have been reported, regardless of sample or project level"""
+        limit: Optional[int] = 1000,
+        offset: Optional[int] = 0,
+    ) -> Optional[dict]:
+        """
+        Returns list of records
+        :param int limit: limit number of records to this amount
+        :param int offset: offset records by this amount
+        :return dict: dictionary of records
+        """
 
-        return self.backend.get_records()
+        return self.backend.get_records(limit=limit, offset=offset)
 
     @require_backend
     def get_status(
@@ -511,7 +518,6 @@ def summarize(
     @require_backend
     def table(
         self,
-        pipeline_type: Optional[str] = None,
     ) -> List[str]:
         """
         Generates stats (.tsv) and object (.yaml) files.
@@ -521,8 +527,7 @@ def table(
         """
 
         pipeline_name = self.pipeline_name
-        pipeline_type = pipeline_type or self[PIPELINE_TYPE]
-        table_path_list = _create_stats_objs_summaries(self, pipeline_name, pipeline_type)
+        table_path_list = _create_stats_objs_summaries(self, pipeline_name)
 
         return table_path_list
 

diff --git a/pipestat/reports.py b/pipestat/reports.py
@@ -119,9 +119,8 @@ def create_sample_parent_html(self, navbar, footer):
             os.makedirs(self.pipeline_reports)
         pages = []
         labels = []
-        for sample in self.prj.backend.get_records():
-            sample_name = sample[0]
-            pipeline_type = sample[1]
+        for sample in self.prj.backend.get_records()["records"]:
+            sample_name = sample
             sample_dir = self.pipeline_reports
 
             # Confirm sample directory exists, then build page
@@ -260,14 +259,12 @@ def create_object_htmls(self, navbar, footer):
             links = []
             html_page_path = os.path.join(self.pipeline_reports, f"{file_result}.html".lower())
 
-            for sample in self.prj.backend.get_records():
-                sample_name = sample[0]
-                pipeline_type = sample[1]
+            for sample in self.prj.backend.get_records()["records"]:
+                sample_name = sample
                 sample_result = fetch_pipeline_results(
                     project=self.prj,
                     pipeline_name=self.pipeline_name,
                     sample_name=sample_name,
-                    pipeline_type=pipeline_type,
                 )
                 if file_result not in sample_result:
                     pass
@@ -306,14 +303,12 @@ def create_object_htmls(self, navbar, footer):
             html_page_path = os.path.join(self.pipeline_reports, f"{image_result}.html".lower())
             figures = []
 
-            for sample in self.prj.backend.get_records():
-                sample_name = sample[0]
-                pipeline_type = sample[1]
+            for sample in self.prj.backend.get_records()["records"]:
+                sample_name = sample
                 sample_result = fetch_pipeline_results(
                     project=self.prj,
                     pipeline_name=self.pipeline_name,
                     sample_name=sample_name,
-                    pipeline_type=pipeline_type,
                 )
                 if image_result not in sample_result:
                     pass
@@ -358,7 +353,7 @@ def create_glossary_html(self, glossary_table, navbar, footer):
         _LOGGER.debug(f"glossary.html | template_vars:\n{template_vars}")
         return render_jinja_template("glossary.html", self.jinja_env, template_vars)
 
-    def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline_type):
+    def create_sample_html(self, sample_stats, navbar, footer, sample_name):
         """
         Produce an HTML page containing all of a sample's objects
         and the sample summary statistics
@@ -374,10 +369,7 @@ def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline
             os.makedirs(self.pipeline_reports)
         html_page = os.path.join(self.pipeline_reports, f"{sample_name}.html".lower())
 
-        if pipeline_type == "sample":
-            flag = self.prj.get_status(record_identifier=sample_name)
-        if pipeline_type == "project":
-            flag = self.prj.get_status(record_identifier=sample_name)
+        flag = self.prj.get_status(record_identifier=sample_name)
         if not flag:
             button_class = "btn btn-secondary"
             flag = "Missing"
@@ -396,7 +388,6 @@ def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline
             sample_name=sample_name,
             inclusion_fun=lambda x: x == "file",
             highlighted=True,
-            pipeline_type=pipeline_type,
         )
 
         for k in highlighted_results.keys():
@@ -410,7 +401,6 @@ def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline
             pipeline_name=self.pipeline_name,
             sample_name=sample_name,
             inclusion_fun=lambda x: x == "file",
-            pipeline_type=pipeline_type,
         )
         for result_id, result in file_results.items():
             desc = (
@@ -429,7 +419,6 @@ def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline
             pipeline_name=self.pipeline_name,
             sample_name=sample_name,
             inclusion_fun=lambda x: x == "image",
-            pipeline_type=pipeline_type,
         )
         figures = []
         for result_id, result in image_results.items():
@@ -521,19 +510,20 @@ def create_index_html(self, navbar, footer):
         # Produce table rows
         table_row_data = []
         _LOGGER.info(" * Creating sample pages")
-        for sample in self.prj.backend.get_records():
-            sample_name = sample[0]
-            pipeline_type = sample[1]
+        for sample in self.prj.backend.get_records()["records"]:
+            sample_name = sample
             sample_stat_results = fetch_pipeline_results(
                 project=self.prj,
                 pipeline_name=self.pipeline_name,
                 sample_name=sample_name,
                 inclusion_fun=None,
                 casting_fun=str,
-                pipeline_type=pipeline_type,
             )
             sample_html = self.create_sample_html(
-                sample_stat_results, navbar, footer, sample_name, pipeline_type
+                sample_stat_results,
+                navbar,
+                footer,
+                sample_name,
             )
             rel_sample_html = os.path.relpath(sample_html, self.pipeline_reports)
             # treat sample_name column differently - will need to provide
@@ -624,16 +614,14 @@ def get_nonhighlighted_results(self, types):
 
     def _stats_to_json_str(self):
         results = {}
-        for sample in self.prj.backend.get_records():
-            sample_name = sample[0]
-            pipeline_type = sample[1]
+        for sample in self.prj.backend.get_records()["records"]:
+            sample_name = sample
             results[sample_name] = fetch_pipeline_results(
                 project=self.prj,
                 sample_name=sample_name,
                 pipeline_name=self.prj.pipeline_name,
                 inclusion_fun=lambda x: x not in OBJECT_TYPES,
                 casting_fun=str,
-                pipeline_type=pipeline_type,
             )
         return dumps(results)
 
@@ -653,9 +641,8 @@ def _get_navbar_dropdown_data_objects(self, objs, wd, context):
     def _get_navbar_dropdown_data_samples(self, wd, context):
         relpaths = []
         sample_names = []
-        for sample in self.prj.backend.get_records():
-            sample_name = sample[0]
-            pipeline_type = sample[1]
+        for sample in self.prj.backend.get_records()["records"]:
+            sample_name = sample
             page_name = os.path.join(
                 self.pipeline_reports,
                 f"{sample_name}.html".replace(" ", "_").lower(),
@@ -809,7 +796,6 @@ def fetch_pipeline_results(
     inclusion_fun=None,
     casting_fun=None,
     highlighted=False,
-    pipeline_type=None,
 ):
     """
     Get the specific pipeline results for sample based on inclusion function
@@ -831,10 +817,7 @@ def fetch_pipeline_results(
     casting_fun = casting_fun or pass_all_fun
     psm = project
     # exclude object-like results from the stats results mapping
-    if pipeline_type == "sample":
-        rep_data = psm.retrieve(record_identifier=sample_name)
-    if pipeline_type == "project":
-        rep_data = psm.retrieve(record_identifier=sample_name)
+    rep_data = psm.retrieve(record_identifier=sample_name)
     results = {
         k: casting_fun(v)
         for k, v in rep_data.items()
@@ -879,9 +862,8 @@ def _warn(what, e, sn):
     times = []
     mems = []
     status_descs = []
-    for sample in project.backend.get_records():
-        sample_name = sample[0]
-        pipeline_type = sample[1]
+    for sample in project.backend.get_records()["records"]:
+        sample_name = sample
         psm = project
         sample_names.append(sample_name)
         # status and status style
@@ -1041,7 +1023,7 @@ def get_file_for_table(prj, pipeline_name, appendix=None, directory=None):
     return fp
 
 
-def _create_stats_objs_summaries(prj, pipeline_name, pipeline_type) -> List[str]:
+def _create_stats_objs_summaries(prj, pipeline_name) -> List[str]:
     """
     Create stats spreadsheet and objects summary.
 
@@ -1058,18 +1040,18 @@ def _create_stats_objs_summaries(prj, pipeline_name, pipeline_type) -> List[str]
     reported_stats = []
     stats = []
 
-    if pipeline_type == "sample":
+    if prj.pipeline_type == "sample":
         columns = ["Sample Index", "Sample Name", "Results"]
     else:
         columns = ["Sample Index", "Project Name", "Sample Name", "Results"]
 
-    records = prj.backend.get_records(pipeline_type=pipeline_type)
+    records = prj.backend.get_records()["records"]
     record_index = 0
     for record in records:
         record_index += 1
-        record_name = record[0]
+        record_name = record
 
-        if pipeline_type == "sample":
+        if prj.pipeline_type == "sample":
             reported_stats = [record_index, record_name]
             rep_data = prj.retrieve(record_identifier=record_name)
         else: