Skip to content

Commit

Permalink
Merge pull request #83 from pepkit/dev_get_records
Browse files Browse the repository at this point in the history
Dev get records
  • Loading branch information
donaldcampbelljr authored Sep 27, 2023
2 parents 9b6aae1 + f02678f commit 7de7b5e
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 111 deletions.
5 changes: 3 additions & 2 deletions pipestat/backends/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,9 @@ def count_records(self):

def get_records(
self,
pipeline_type: Optional[str] = None,
):
limit: Optional[int] = 1000,
offset: Optional[int] = 0,
) -> Optional[dict]:
_LOGGER.warning("Not implemented yet for this backend")
pass

Expand Down
68 changes: 33 additions & 35 deletions pipestat/backends/dbbackend.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,41 +134,39 @@ def get_one_record(

def get_records(
self,
pipeline_type: Optional[str] = None,
) -> Optional[list]:
"""Returns list of sample names and pipeline type as a list of tuples that have been reported, regardless of sample or project level"""
all_samples_list = []

pipeline_type = pipeline_type or self.pipeline_type

# TODO this should be simplified with the separation of sample and project managers.
if pipeline_type is not None:
mod = self.get_model(table_name=self.table_name)
with self.session as s:
sample_list = []
stmt = sql_select(mod)
records = s.exec(stmt).all()
for i in records:
pair = (i.record_identifier, pipeline_type)
sample_list.append(pair)

return sample_list
else:
pipelines = ["sample", "project"]
for i in pipelines:
pipeline_type = i
table_name = self.table_name
mod = self.get_model(table_name=table_name)
with self.session as s:
sample_list = []
stmt = sql_select(mod)
records = s.exec(stmt).all()
for i in records:
pair = (i.record_identifier, pipeline_type)
sample_list.append(pair)

all_samples_list += sample_list
return all_samples_list
limit: Optional[int] = 1000,
offset: Optional[int] = 0,
) -> Optional[dict]:
"""Returns list of records
:param int limit: limit number of records to this amount
:param int offset: offset records by this amount
:return dict records_dict: dictionary of records
{
"count": x,
"limit": l,
"offset": o,
"records": [...]
}
"""

mod = self.get_model(table_name=self.table_name)

with self.session as s:
sample_list = []
stmt = sql_select(mod).offset(offset).limit(limit)
records = s.exec(stmt).all()
for i in records:
sample_list.append(i.record_identifier)

records_dict = {
"count": len(records),
"limit": limit,
"offset": offset,
"records": sample_list,
}

return records_dict

def get_status(self, record_identifier: str) -> Optional[str]:
"""
Expand Down
49 changes: 28 additions & 21 deletions pipestat/backends/filebackend.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,27 +142,34 @@ def get_flag_file(self, record_identifier: str = None) -> Union[str, List[str],

def get_records(
self,
pipeline_type: Optional[str] = None,
) -> Optional[list]:
"""Returns list of sample names and pipeline type as a list of tuples that have been reported, regardless of sample or project level"""
all_samples_list = []
pipeline_type = pipeline_type or self.pipeline_type

if pipeline_type is not None:
for k in list(self._data.data[self.pipeline_name][pipeline_type].keys()):
pair = (k, pipeline_type)
all_samples_list.append(pair)
return all_samples_list

else:
keys = self._data.data[self.pipeline_name].keys()
for k in keys:
sample_list = []
for i in list(self._data.data[self.pipeline_name][k].keys()):
pair = (i, k)
sample_list.append(pair)
all_samples_list += sample_list
return all_samples_list
limit: Optional[int] = 1000,
offset: Optional[int] = 0,
) -> Optional[dict]:
"""Returns list of records
:param int limit: limit number of records to this amount
:param int offset: offset records by this amount
:return dict records_dict: dictionary of records
{
"count": x,
"limit": l,
"offset": o,
"records": [...]
}
"""
record_list = []
for k in list(self._data.data[self.pipeline_name][self.pipeline_type].keys())[
offset : offset + limit
]:
record_list.append(k)

records_dict = {
"count": len(record_list),
"limit": limit,
"offset": offset,
"records": record_list,
}

return records_dict

def get_status(self, record_identifier: str) -> Optional[str]:
"""
Expand Down
17 changes: 11 additions & 6 deletions pipestat/pipestat.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,10 +278,17 @@ def count_records(self) -> int:
@require_backend
def get_records(
self,
) -> Optional[list]:
"""Returns list of sample names and pipeline type as a list of tuples that have been reported, regardless of sample or project level"""
limit: Optional[int] = 1000,
offset: Optional[int] = 0,
) -> Optional[dict]:
"""
Returns list of records
:param int limit: limit number of records to this amount
:param int offset: offset records by this amount
:return dict: dictionary of records
"""

return self.backend.get_records()
return self.backend.get_records(limit=limit, offset=offset)

@require_backend
def get_status(
Expand Down Expand Up @@ -511,7 +518,6 @@ def summarize(
@require_backend
def table(
self,
pipeline_type: Optional[str] = None,
) -> List[str]:
"""
Generates stats (.tsv) and object (.yaml) files.
Expand All @@ -521,8 +527,7 @@ def table(
"""

pipeline_name = self.pipeline_name
pipeline_type = pipeline_type or self[PIPELINE_TYPE]
table_path_list = _create_stats_objs_summaries(self, pipeline_name, pipeline_type)
table_path_list = _create_stats_objs_summaries(self, pipeline_name)

return table_path_list

Expand Down
70 changes: 26 additions & 44 deletions pipestat/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,8 @@ def create_sample_parent_html(self, navbar, footer):
os.makedirs(self.pipeline_reports)
pages = []
labels = []
for sample in self.prj.backend.get_records():
sample_name = sample[0]
pipeline_type = sample[1]
for sample in self.prj.backend.get_records()["records"]:
sample_name = sample
sample_dir = self.pipeline_reports

# Confirm sample directory exists, then build page
Expand Down Expand Up @@ -260,14 +259,12 @@ def create_object_htmls(self, navbar, footer):
links = []
html_page_path = os.path.join(self.pipeline_reports, f"{file_result}.html".lower())

for sample in self.prj.backend.get_records():
sample_name = sample[0]
pipeline_type = sample[1]
for sample in self.prj.backend.get_records()["records"]:
sample_name = sample
sample_result = fetch_pipeline_results(
project=self.prj,
pipeline_name=self.pipeline_name,
sample_name=sample_name,
pipeline_type=pipeline_type,
)
if file_result not in sample_result:
pass
Expand Down Expand Up @@ -306,14 +303,12 @@ def create_object_htmls(self, navbar, footer):
html_page_path = os.path.join(self.pipeline_reports, f"{image_result}.html".lower())
figures = []

for sample in self.prj.backend.get_records():
sample_name = sample[0]
pipeline_type = sample[1]
for sample in self.prj.backend.get_records()["records"]:
sample_name = sample
sample_result = fetch_pipeline_results(
project=self.prj,
pipeline_name=self.pipeline_name,
sample_name=sample_name,
pipeline_type=pipeline_type,
)
if image_result not in sample_result:
pass
Expand Down Expand Up @@ -358,7 +353,7 @@ def create_glossary_html(self, glossary_table, navbar, footer):
_LOGGER.debug(f"glossary.html | template_vars:\n{template_vars}")
return render_jinja_template("glossary.html", self.jinja_env, template_vars)

def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline_type):
def create_sample_html(self, sample_stats, navbar, footer, sample_name):
"""
Produce an HTML page containing all of a sample's objects
and the sample summary statistics
Expand All @@ -374,10 +369,7 @@ def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline
os.makedirs(self.pipeline_reports)
html_page = os.path.join(self.pipeline_reports, f"{sample_name}.html".lower())

if pipeline_type == "sample":
flag = self.prj.get_status(record_identifier=sample_name)
if pipeline_type == "project":
flag = self.prj.get_status(record_identifier=sample_name)
flag = self.prj.get_status(record_identifier=sample_name)
if not flag:
button_class = "btn btn-secondary"
flag = "Missing"
Expand All @@ -396,7 +388,6 @@ def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline
sample_name=sample_name,
inclusion_fun=lambda x: x == "file",
highlighted=True,
pipeline_type=pipeline_type,
)

for k in highlighted_results.keys():
Expand All @@ -410,7 +401,6 @@ def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline
pipeline_name=self.pipeline_name,
sample_name=sample_name,
inclusion_fun=lambda x: x == "file",
pipeline_type=pipeline_type,
)
for result_id, result in file_results.items():
desc = (
Expand All @@ -429,7 +419,6 @@ def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline
pipeline_name=self.pipeline_name,
sample_name=sample_name,
inclusion_fun=lambda x: x == "image",
pipeline_type=pipeline_type,
)
figures = []
for result_id, result in image_results.items():
Expand Down Expand Up @@ -521,19 +510,20 @@ def create_index_html(self, navbar, footer):
# Produce table rows
table_row_data = []
_LOGGER.info(" * Creating sample pages")
for sample in self.prj.backend.get_records():
sample_name = sample[0]
pipeline_type = sample[1]
for sample in self.prj.backend.get_records()["records"]:
sample_name = sample
sample_stat_results = fetch_pipeline_results(
project=self.prj,
pipeline_name=self.pipeline_name,
sample_name=sample_name,
inclusion_fun=None,
casting_fun=str,
pipeline_type=pipeline_type,
)
sample_html = self.create_sample_html(
sample_stat_results, navbar, footer, sample_name, pipeline_type
sample_stat_results,
navbar,
footer,
sample_name,
)
rel_sample_html = os.path.relpath(sample_html, self.pipeline_reports)
# treat sample_name column differently - will need to provide
Expand Down Expand Up @@ -624,16 +614,14 @@ def get_nonhighlighted_results(self, types):

def _stats_to_json_str(self):
results = {}
for sample in self.prj.backend.get_records():
sample_name = sample[0]
pipeline_type = sample[1]
for sample in self.prj.backend.get_records()["records"]:
sample_name = sample
results[sample_name] = fetch_pipeline_results(
project=self.prj,
sample_name=sample_name,
pipeline_name=self.prj.pipeline_name,
inclusion_fun=lambda x: x not in OBJECT_TYPES,
casting_fun=str,
pipeline_type=pipeline_type,
)
return dumps(results)

Expand All @@ -653,9 +641,8 @@ def _get_navbar_dropdown_data_objects(self, objs, wd, context):
def _get_navbar_dropdown_data_samples(self, wd, context):
relpaths = []
sample_names = []
for sample in self.prj.backend.get_records():
sample_name = sample[0]
pipeline_type = sample[1]
for sample in self.prj.backend.get_records()["records"]:
sample_name = sample
page_name = os.path.join(
self.pipeline_reports,
f"{sample_name}.html".replace(" ", "_").lower(),
Expand Down Expand Up @@ -809,7 +796,6 @@ def fetch_pipeline_results(
inclusion_fun=None,
casting_fun=None,
highlighted=False,
pipeline_type=None,
):
"""
Get the specific pipeline results for sample based on inclusion function
Expand All @@ -831,10 +817,7 @@ def fetch_pipeline_results(
casting_fun = casting_fun or pass_all_fun
psm = project
# exclude object-like results from the stats results mapping
if pipeline_type == "sample":
rep_data = psm.retrieve(record_identifier=sample_name)
if pipeline_type == "project":
rep_data = psm.retrieve(record_identifier=sample_name)
rep_data = psm.retrieve(record_identifier=sample_name)
results = {
k: casting_fun(v)
for k, v in rep_data.items()
Expand Down Expand Up @@ -879,9 +862,8 @@ def _warn(what, e, sn):
times = []
mems = []
status_descs = []
for sample in project.backend.get_records():
sample_name = sample[0]
pipeline_type = sample[1]
for sample in project.backend.get_records()["records"]:
sample_name = sample
psm = project
sample_names.append(sample_name)
# status and status style
Expand Down Expand Up @@ -1041,7 +1023,7 @@ def get_file_for_table(prj, pipeline_name, appendix=None, directory=None):
return fp


def _create_stats_objs_summaries(prj, pipeline_name, pipeline_type) -> List[str]:
def _create_stats_objs_summaries(prj, pipeline_name) -> List[str]:
"""
Create stats spreadsheet and objects summary.
Expand All @@ -1058,18 +1040,18 @@ def _create_stats_objs_summaries(prj, pipeline_name, pipeline_type) -> List[str]
reported_stats = []
stats = []

if pipeline_type == "sample":
if prj.pipeline_type == "sample":
columns = ["Sample Index", "Sample Name", "Results"]
else:
columns = ["Sample Index", "Project Name", "Sample Name", "Results"]

records = prj.backend.get_records(pipeline_type=pipeline_type)
records = prj.backend.get_records()["records"]
record_index = 0
for record in records:
record_index += 1
record_name = record[0]
record_name = record

if pipeline_type == "sample":
if prj.pipeline_type == "sample":
reported_stats = [record_index, record_name]
rep_data = prj.retrieve(record_identifier=record_name)
else:
Expand Down
Loading

0 comments on commit 7de7b5e

Please sign in to comment.