Skip to content

Commit

Permalink
gsheet: Add --ignore-priority to submit-experiments-from-gsheet
Browse files Browse the repository at this point in the history
  • Loading branch information
arteymix committed Feb 8, 2022
1 parent 051bdff commit 815a7d6
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 13 deletions.
27 changes: 15 additions & 12 deletions rnaseq_pipeline/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,23 +460,26 @@ def complete(self):
"""
return super().complete() and all(not out.exists() for out in self._targets_to_remove())

class SubmitExperimentsFromFileToGemma(TaskWithOutputMixin, WrapperTask):
input_file = luigi.Parameter()
class SubmitExperimentsFromDataFrameMixin:
ignore_priority = luigi.BoolParameter(positional=False, significant=False, description='Ignore the priority column and use 100 everywhere as priority')
def requires(self):
df = pd.read_csv(self.input_file, sep='\t', converters={'priority': lambda x: 0 if x == '' else int(x)})
return [SubmitExperimentToGemma(row.experiment_id, priority=row.get('priority', 0), rerun=row.get('data')=='resubmit')
df = self._retrieve_dataframe()
return [SubmitExperimentToGemma(row.experiment_id, priority=100 if self.ignore_priority else row.get('priority', 0), rerun=row['data']=='resubmit')
for _, row in df.iterrows() if row.get('priority', 0) > 0]

class SubmitExperimentsFromGoogleSpreadsheetToGemma(WrapperTask):
spreadsheet_id = luigi.Parameter()
sheet_name = luigi.Parameter()
class SubmitExperimentsFromFileToGemma(SubmitExperimentsFromDataFrameMixin, TaskWithOutputMixin, WrapperTask):
input_file = luigi.Parameter()
def _retrieve_dataframe(self):
return pd.read_csv(self.input_file, sep='\t', converters={'priority': lambda x: 0 if x == '' else int(x)})

class SubmitExperimentsFromGoogleSpreadsheetToGemma(SubmitExperimentsFromDataFrameMixin, WrapperTask):
spreadsheet_id = luigi.Parameter(description='Spreadsheet ID in Google Sheets (lookup {spreadsheetId} in https://docs.google.com/spreadsheets/d/{spreadsheetId}/edit)')
sheet_name = luigi.Parameter(description='Name of the spreadsheet in the document')
# TODO: use the spreadsheet revision ID
# For now, all that does is distinguishing spreadsheet tasks which might
# refer to different revisions, which in turn allows newly added tasks to
# be executed
revision_id = luigi.Parameter(default=str(uuid.uuid4()))
def requires(self):
revision_id = luigi.Parameter(default=str(uuid.uuid4()), description='Revision ID of the spreadsheet (not yet supported, but will default to the latest)')
def _retrieve_dataframe(self):
from .gsheet import retrieve_spreadsheet
df = retrieve_spreadsheet(self.spreadsheet_id, self.sheet_name)
return [SubmitExperimentToGemma(row.experiment_id, priority=row.get('priority', 0), rerun=row['data']=='resubmit')
for _, row in df.iterrows() if row.get('priority', 0) > 0]
return retrieve_spreadsheet(self.spreadsheet_id, self.sheet_name)
3 changes: 2 additions & 1 deletion scripts/submit-experiments-from-gsheet
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ def main(argv):
parser.add_argument('--sheet-name')
parser.add_argument('--umask', type=parse_octal, default='002')
parser.add_argument('--workers', type=int, default=100)
parser.add_argument('--ignore-priority', action='store_true')
args = parser.parse_args(argv)
with umask(args.umask):
results = luigi.build([SubmitExperimentsFromGoogleSpreadsheetToGemma(args.spreadsheet_id, args.sheet_name)], workers=args.workers, detailed_summary=True)
results = luigi.build([SubmitExperimentsFromGoogleSpreadsheetToGemma(args.spreadsheet_id, args.sheet_name, ignore_priority=args.ignore_priority)], workers=args.workers, detailed_summary=True)
print(results.summary_text)

if __name__ == '__main__':
Expand Down

0 comments on commit 815a7d6

Please sign in to comment.