autolab · KesterTan · Jan 26, 2025 · Jan 26, 2025 · Jan 26, 2025 · Jan 26, 2025
diff --git a/.gitignore b/.gitignore
@@ -6,8 +6,10 @@
 scripts/*.sh
 # exclude
 vmms/id_rsa*
+vmms/id_ed25519*
 courselabs/*
 dockerTmp/*
+
 # config
 config.py
 
@@ -27,3 +29,10 @@ pip-selfcheck.json
 
 # Backup files
 *.bak
+
+# Redis
+*.rdb
+
+# Tests
+tests/*
+!tests/*.py
diff --git a/README.md b/README.md
@@ -2,8 +2,7 @@
   <img src="images/autolab_banner.svg" width="380px" height="100px">
 </a>
 
-Tango
-======
+# Tango
 
 Tango is a standalone RESTful Web service that runs and manages jobs. A job is a set of files that must satisfy the following constraints:
 
@@ -16,13 +15,13 @@ Upon receiving a job, Tango will copy all of the job's input files into a VM, ru
 
 A brief overview of the Tango respository:
 
-* `tango.py` - Main tango server
-* `jobQueue.py` - Manages the job queue
-* `jobManager.py` - Assigns jobs to free VMs
-* `worker.py` - Shepherds a job through its execution
-* `preallocator.py` - Manages pools of VMs
-* `vmms/` - VMMS library implementations
-* `restful_tango/` - HTTP server layer on the main Tango
+- `tango.py` - Main tango server
+- `jobQueue.py` - Manages the job queue
+- `jobManager.py` - Assigns jobs to free VMs
+- `worker.py` - Shepherds a job through its execution
+- `preallocator.py` - Manages pools of VMs
+- `vmms/` - VMMS library implementations
+- `restful_tango/` - HTTP server layer on the main Tango
 
 Tango was developed as a distributed grading system for [Autolab](https://github.com/autolab/Autolab) at Carnegie Mellon University and has been extensively used for autograding programming assignments in CMU courses.
 
@@ -35,7 +34,63 @@ Please feel free to use Tango at your school/organization. If you run into any p
 3. [Read the documentation for the VMMS API](https://docs.autolabproject.com/tango-vmms/).
 4. [Test whether Tango is set up properly and can process jobs](https://docs.autolabproject.com/tango-cli/).
 
+## Stress Testing Tango
+
+To stress test Tango by running a large number of submissions, use `stressTest.py`. Currently, this is not a feature on the master branch. To use this feature, go on the `copy-in`.
+
+### Setting up the testing directory
+
+Create your testing directory by copying the 　`sample_test` directory into the `my_tests` directory.
+
+```
+cp -r sample_tests my_tests/<Test Name>
+```
+
+A brief overview of the testing directory
+
+- `input` - Directory to put your input files
+- `output` - Directory for the autograder output for each of the test submissions
+- `<Test Name>.yaml` - Yaml file to configure the stress test
+- `expected_output.txt` - Expected JSON output of the autograder
+- `summary.txt` - Summary of the autograder outputs
+- `log.txt` - Log of the submissions
+
+First, rename the `sample_test.yaml` to be `<Test Name>.yaml`
+
+```
+mv sample_test.yaml <Test Name>.yaml
+```
+
+Next, update the Yaml file.
+
+```yaml
+num_submissions: 5
+submission_delay: 0.1
+autograder_image: <Autograding Image>
+output_file: log.txt
+tango_port: 3001
+cli_path: <Path to Tango>/clients/tango-cli.py
+instance_type: <Instance Type>
+timeout: 180
+ec2: True
+expected_output: expected_output.txt
+stop_before:
+```
+
+After creating the Yaml file, copy the `autograde-Makefile`, `autograde.tar` and the file to submit in the `input` directory.
+
+### Running the stress test
+
+```
+virtualenv env
+source env/bin/activate
+pip install -r requirements.txt
+cd <Path to Tango>/tests
+python3 stressTest.py --test_dir my_tests/<Test Name>
+```
+
 ## Python 2 Support
+
 Tango now runs on Python 3. However, there is a legacy branch [master-python2](https://github.com/autolab/Tango/tree/master-python2) which is a snapshot of the last Python 2 Tango commit for legacy reasons. You are strongly encouraged to upgrade to the current Python 3 version of Tango if you are still on the Python 2 version, as future enhancements and bug fixes will be focused on the current master.
 
 We will not be backporting new features from `master` to `master-python2`.

diff --git a/clients/tango-cli.py b/clients/tango-cli.py
@@ -116,6 +116,10 @@
     "--notifyURL",
     help="Complete URL for Tango to give callback to once job is complete.",
 )
+parser.add_argument(
+    "--callbackURL",
+    help="Complete URL for Tango to give callback to once job is complete.",
+)
 parser.add_argument(
     "--disableNetwork",
     action="store_true",
@@ -127,7 +131,8 @@
 parser.add_argument("--accessKeyId", default="", help="AWS account access key ID")
 parser.add_argument("--accessKey", default="", help="AWS account access key content")
 parser.add_argument("--instanceType", default="", help="AWS EC2 instance type")
-
+parser.add_argument("--ec2", action="store_true", help="Enable ec2SSH VMMS")
+parser.add_argument("--stopBefore", default="", help="Stops the worker before a function is executed")
 
 def checkKey():
     if args.key is None:
@@ -210,11 +215,11 @@ def tango_upload():
         if res != 0:
             raise Exception("Invalid usage: [upload] " + upload_help)
 
-        f = open(args.filename)
         dirs = args.filename.split("/")
         filename = dirs[len(dirs) - 1]
         header = {"Filename": filename}
 
+        f = open(args.filename, 'rb')
         response = requests.post(
             "%s://%s:%d/upload/%s/%s/"
             % (_tango_protocol, args.server, args.port, args.key, args.courselab),
@@ -257,10 +262,15 @@ def tango_addJob():
         if args.notifyURL:
             requestObj["notifyURL"] = args.notifyURL
 
+        if args.callbackURL:
+            requestObj["callback_url"] = args.callbackURL
+
         requestObj["accessKeyId"] = args.accessKeyId
         requestObj["accessKey"] = args.accessKey
         requestObj["disable_network"] = args.disableNetwork
         requestObj["instanceType"] = args.instanceType
+        requestObj["ec2Vmms"] = args.ec2
+        requestObj["stopBefore"] = args.stopBefore
 
         response = requests.post(
             "%s://%s:%d/addJob/%s/%s/"

diff --git a/jobManager.py b/jobManager.py
@@ -11,18 +11,16 @@
 #
 
 import copy
-import time
 import logging
 import threading
+import time
 import traceback
 from datetime import datetime
 
 import tango  # Written this way to avoid circular imports
 from config import Config
 from tangoObjects import TangoQueue
 from worker import Worker
-from preallocator import Preallocator
-from jobQueue import JobQueue
 
 
 class JobManager(object):
@@ -78,6 +76,7 @@ def __manage(self):
                     from vmms.ec2SSH import Ec2SSH
 
                     vmms = Ec2SSH(job.accessKeyId, job.accessKey)
+
                     newVM = copy.deepcopy(job.vm)
                     newVM.id = self._getNextID()
                     try:
@@ -86,7 +85,9 @@ def __manage(self):
                         self.log.error("ERROR initialization VM: %s", e)
                         self.log.error(traceback.format_exc())
                     if preVM is None:
-                        raise Exception("EC2 SSH VM initialization failed: see log")
+                        raise Exception(
+                            "EC2 SSH VM initialization failed: see log"
+                        )
                 else:
                     # Try to find a vm on the free list and allocate it to
                     # the worker if successful.
@@ -113,11 +114,18 @@ def __manage(self):
                 )
                 # Mark the job assigned
                 self.jobQueue.assignJob(job.id, preVM)
-                Worker(job, vmms, self.jobQueue, self.preallocator, preVM).start()
+                Worker(
+                    job, vmms, self.jobQueue, self.preallocator, preVM
+                ).start()
 
             except Exception as err:
-                self.log.error("job failed during creation %d %s" % (job.id, str(err)))
-                self.jobQueue.makeDead(job.id, str(err))
+                if job is None:
+                    self.log.info("job_manager: job is None")
+                else:
+                    self.log.error(
+                        "job failed during creation %d %s" % (job.id, str(err))
+                    )
+                    self.jobQueue.makeDead(job.id, str(err))
 
 
 if __name__ == "__main__":
@@ -133,6 +141,13 @@ def __manage(self):
         tango.resetTango(tango.preallocator.vmms)
         for key in tango.preallocator.machines.keys():
             tango.preallocator.machines.set(key, [[], TangoQueue(key)])
+
+            # The above call sets the total pool empty.  But the free pool which
+            # is a queue in redis, may not be empty.  When the job manager restarts,
+            # resetting the free queue using the key doesn't change its content.
+            # Therefore we empty the queue, thus the free pool, to keep it consistent
+            # with the total pool.
+            tango.preallocator.machines.get(key)[1].make_empty()
         jobs = JobManager(tango.jobQueue)
 
         print("Starting the stand-alone Tango JobManager")

diff --git a/preallocator.py b/preallocator.py
@@ -47,6 +47,7 @@ def update(self, vm, num):
         self.lock.acquire()
         if vm.name not in self.machines:
             self.machines.set(vm.name, [[], TangoQueue(vm.name)])
+            self.machines.get(vm.name)[1].make_empty()
             self.log.debug("Creating empty pool of %s instances" % (vm.name))
         self.lock.release()
 

diff --git a/requirements.txt b/requirements.txt
@@ -9,3 +9,4 @@ urllib3==1.26.19
 docker==5.0.3
 backoff==2.2.1
 pytz
+pyyaml
diff --git a/restful_tango/tangoREST.py b/restful_tango/tangoREST.py
@@ -166,7 +166,11 @@ def convertJobObj(self, dirName, jobObj):
 
         ec2_vmms = False
         if "ec2Vmms" in jobObj:
-            ec2_vmms = True
+            ec2_vmms = jobObj["ec2Vmms"]
+
+        stopBefore = ""
+        if "stopBefore" in jobObj:
+            stopBefore = jobObj["stopBefore"]
 
         instance_type = None
         if "instanceType" in jobObj and len(jobObj["instanceType"]) > 0:
@@ -198,6 +202,7 @@ def convertJobObj(self, dirName, jobObj):
             accessKey=accessKey,
             accessKeyId=accessKeyId,
             disableNetwork=disableNetwork,
+            stopBefore=stopBefore
         )
 
         self.log.debug("inputFiles: %s" % [file.localFile for file in input])

diff --git a/tangoObjects.py b/tangoObjects.py
@@ -100,6 +100,7 @@ def __init__(
         accessKeyId=None,
         accessKey=None,
         disableNetwork=None,
+        stopBefore="",
     ):
         self.assigned = False
         self.retries = 0
@@ -120,6 +121,7 @@ def __init__(
         self.accessKeyId = accessKeyId
         self.accessKey = accessKey
         self.disableNetwork = disableNetwork
+        self.stopBefore = "stopBefore"
 
     def __repr__(self):
         self.syncRemote()
@@ -319,6 +321,12 @@ def remove(self, item):
     def _clean(self):
         self.__db.delete(self.key)
 
+    def make_empty(self):
+        while True:
+            item = self.__db.lpop(self.key)
+            if item is None:
+                break
+
 
 # This is an abstract class that decides on
 # if we should initiate a TangoRemoteDictionary or TangoNativeDictionary

diff --git a/tests/sample_test/expected_output.txt b/tests/sample_test/expected_output.txt
@@ -0,0 +1 @@
+Hello world
diff --git a/tests/sample_test/input/autograde-Makefile b/tests/sample_test/input/autograde-Makefile
@@ -0,0 +1,2 @@
+autograde:
+	bash hello.sh
diff --git a/tests/sample_test/input/hello.sh b/tests/sample_test/input/hello.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+echo "Hello world"
diff --git a/tests/sample_test/sample_test.yaml b/tests/sample_test/sample_test.yaml
@@ -0,0 +1,11 @@
+num_submissions: 5
+submission_delay: 0.1
+autograder_image: ec2_213_llvm_14_s25
+output_file: log.txt
+tango_port: 3001
+cli_path: /home/snarita/Autolab/Tango/clients/tango-cli.py
+instance_type: t2.micro
+timeout: 180
+ec2: True
+expected_output: expected_output.txt
+stop_before:
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,3 +9,4 @@ urllib3==1.26.19 @@
     docker==5.0.3
     backoff==2.2.1
     pytz
+    pyyaml