kbase · bio-boris · Aug 6, 2021 · Jul 16, 2021 · Jul 16, 2021 · Jul 16, 2021
diff --git a/lib/execution_engine2/db/MongoUtil.py b/lib/execution_engine2/db/MongoUtil.py
@@ -3,18 +3,20 @@
 import time
 import traceback
 from contextlib import contextmanager
-from typing import Dict
+from datetime import datetime
+from typing import Dict, List
 
 from bson.objectid import ObjectId
 from mongoengine import connect, connection
-from pymongo import MongoClient
+from pymongo import MongoClient, UpdateOne
 from pymongo.errors import ServerSelectionTimeoutError
 
-from lib.execution_engine2.db.models.models import JobLog, Job, Status, TerminatedCode
-from lib.execution_engine2.exceptions import (
+from execution_engine2.db.models.models import JobLog, Job, Status, TerminatedCode
+from execution_engine2.exceptions import (
     RecordNotFoundException,
     InvalidStatusTransitionException,
 )
+from execution_engine2.sdk.EE2Runjob import JobIdPair
 
 
 class MongoUtil:
@@ -216,7 +218,9 @@ def get_job(self, job_id=None, exclude_fields=None) -> Job:
 
         return job
 
-    def get_jobs(self, job_ids=None, exclude_fields=None, sort_id_ascending=None):
+    def get_jobs(
+        self, job_ids=None, exclude_fields=None, sort_id_ascending=None
+    ) -> List[Job]:
         if not (job_ids and isinstance(job_ids, list)):
             raise ValueError("Please provide a non empty list of job ids")
 
@@ -263,6 +267,68 @@ def check_if_already_finished(job_status):
             return True
         return False
 
+    def update_jobs_to_queued(
+        self, job_id_pairs: List[JobIdPair], scheduler_type: str = "condor"
+    ) -> None:
+        f"""
+        * Adds scheduler id to list of jobs
+        * Updates a list of {Status.created.value} jobs to queued. Does not work on jobs that already have gone through any other
+        status transition. If the record is not in the {Status.created.value} status, nothing will happen
+        :param job_id_pairs: A list of pairs of Job Ids and Scheduler Ids
+        :param scheduler_type: The scheduler this job was queued in, default condor
+        """
+
+        bulk_update_scheduler_jobs = []
+        bulk_update_created_to_queued = []
+        queue_time_now = datetime.utcnow().timestamp()
+        for job_id_pair in job_id_pairs:
+            if job_id_pair.job_id is None:
+                raise ValueError(
+                    f"Provided a bad job_id_pair, missing job_id for {job_id_pair.scheduler_id}"
+                )
+            elif job_id_pair.scheduler_id is None:
+                raise ValueError(
+                    f"Provided a bad job_id_pair, missing scheduler_id for {job_id_pair.job_id}"
+                )
+
+            bulk_update_scheduler_jobs.append(
+                UpdateOne(
+                    {
+                        "_id": ObjectId(job_id_pair.job_id),
+                    },
+                    {
+                        "$set": {
+                            "scheduler_id": job_id_pair.scheduler_id,
+                            "scheduler_type": scheduler_type,
+                        }
+                    },
+                )
+            )
+            bulk_update_created_to_queued.append(
+                UpdateOne(
+                    {
+                        "_id": ObjectId(job_id_pair.job_id),
+                        "status": Status.created.value,
+                    },
+                    {
+                        "$set": {
+                            "status": Status.queued.value,
+                            "queued": queue_time_now,
+                        }
+                    },
+                )
+            )
+        # Update provided jobs with scheduler id. Then only update non terminated jobs into updated status.
+        mongo_collection = self.config["mongo-jobs-collection"]
+
+        if bulk_update_scheduler_jobs:
+            with self.pymongo_client(mongo_collection) as pymongo_client:
+                ee2_jobs_col = pymongo_client[self.mongo_database][mongo_collection]
+                # Bulk Update to add scheduler ids
+                ee2_jobs_col.bulk_write(bulk_update_scheduler_jobs, ordered=False)
+                # Bulk Update to add queued status ids
+                ee2_jobs_col.bulk_write(bulk_update_created_to_queued, ordered=False)
+
     def cancel_job(self, job_id=None, terminated_code=None):
         """
         #TODO Should we check for a valid state transition here also?
@@ -420,6 +486,18 @@ def update_job_status(self, job_id, status, msg=None, error_message=None):
     def mongo_engine_connection(self):
         yield self.me_connection
 
+    def insert_jobs(self, jobs_to_insert: List[Job]) -> List[ObjectId]:
+        """
+        Insert multiple job records using MongoEngine
+        :param jobs_to_insert: Multiple jobs to insert at once
+        :return: List of job ids from the insertion
+        """
+        # TODO Look at pymongo write_concerns that may be useful
+        # TODO see if pymongo is faster
+        # TODO: Think about error handling
+        inserted = Job.objects.insert(doc_or_docs=jobs_to_insert, load_bulk=False)
+        return inserted
+
     def insert_one(self, doc):
         """
         insert a doc into collection

diff --git a/lib/execution_engine2/sdk/EE2Logs.py b/lib/execution_engine2/sdk/EE2Logs.py
@@ -1,8 +1,8 @@
 from enum import Enum
 from typing import Dict, NamedTuple
 
-from lib.execution_engine2.db.models.models import JobLog as JLModel, LogLines
-from lib.execution_engine2.exceptions import RecordNotFoundException
+from execution_engine2.db.models.models import JobLog as JLModel, LogLines
+from execution_engine2.exceptions import RecordNotFoundException
 
 
 # if TYPE_CHECKING:
@@ -104,7 +104,6 @@ def add_job_logs(self, job_id, log_lines, as_admin=False) -> AddLogResult:
         self.sdkmr.get_job_with_permission(
             job_id, JobPermissions.WRITE, as_admin=as_admin
         )
-        self.sdkmr.logger.debug(f"About to add logs for {job_id}")
         try:
             try:
                 job_log = self.sdkmr.mongo_util.get_job_log_pymongo(job_id)