From 7c385a0f4b32285b6293ebf1bc91b2f4f860e70a Mon Sep 17 00:00:00 2001
From: Alberto Sonnino <alberto@sonnino.com>
Date: Thu, 8 Feb 2024 11:41:53 +0000
Subject: [PATCH] update AWS image

---
 README.md                       |  15 ++-
 benchmark/benchmark/instance.py | 199 ++++++++++++++++----------------
 2 files changed, 114 insertions(+), 100 deletions(-)

diff --git a/README.md b/README.md
index dc2beac7..0b5f5d4b 100644
--- a/README.md
+++ b/README.md
@@ -1,26 +1,33 @@
 > **Note to readers:** MystenLabs is making this codebase production-ready [here](https://github.com/MystenLabs/sui/tree/main/narwhal).
 
-
 # Narwhal and Tusk
-[![build status](https://img.shields.io/github/actions/workflow/status/asonnino/narwhal/rust.yml?branch=main&logo=github&style=flat-square)](https://github.com/asonnino/narwhal/actions)
+
+[![build status](https://img.shields.io/github/actions/workflow/status/asonnino/narwhal/rust.yml?branch=master&logo=github&style=flat-square)](https://github.com/asonnino/narwhal/actions)
 [![rustc](https://img.shields.io/badge/rustc-1.51+-blue?style=flat-square&logo=rust)](https://www.rust-lang.org)
+[![python](https://img.shields.io/badge/python-3.9-blue?style=flat-square&logo=python&logoColor=white)](https://www.python.org/downloads/release/python-390/)
 [![license](https://img.shields.io/badge/license-Apache-blue.svg?style=flat-square)](LICENSE)
 
 This repo provides an implementation of [Narwhal and Tusk](https://arxiv.org/pdf/2105.11827.pdf). The codebase has been designed to be small, efficient, and easy to benchmark and modify. It has not been designed to run in production but uses real cryptography ([dalek](https://doc.dalek.rs/ed25519_dalek)), networking ([tokio](https://docs.rs/tokio)), and storage ([rocksdb](https://docs.rs/rocksdb)).
 
 ## Quick Start
+
 The core protocols are written in Rust, but all benchmarking scripts are written in Python and run with [Fabric](http://www.fabfile.org/).
 To deploy and benchmark a testbed of 4 nodes on your local machine, clone the repo and install the python dependencies:
+
 ```
 $ git clone https://github.com/asonnino/narwhal.git
 $ cd narwhal/benchmark
 $ pip install -r requirements.txt
 ```
+
 You also need to install Clang (required by rocksdb) and [tmux](https://linuxize.com/post/getting-started-with-tmux/#installing-tmux) (which runs all nodes and clients in the background). Finally, run a local benchmark using fabric:
+
 ```
 $ fab local
 ```
+
 This command may take a long time the first time you run it (compiling rust code in `release` mode may be slow) and you can customize a number of benchmark parameters in `fabfile.py`. When the benchmark terminates, it displays a summary of the execution similarly to the one below.
+
 ```
 -----------------------------------------
  SUMMARY:
@@ -54,9 +61,11 @@ This command may take a long time the first time you run it (compiling rust code
 ```
 
 ## Next Steps
-The next step is to read the paper [Narwhal and Tusk: A DAG-based Mempool and Efficient BFT Consensus](https://arxiv.org/pdf/2105.11827.pdf). It is then recommended to have a look at the README files of the [worker](https://github.com/asonnino/narwhal/tree/master/worker) and [primary](https://github.com/asonnino/narwhal/tree/master/primary) crates. An additional resource to better understand the Tusk consensus protocol is the paper [All You Need is DAG](https://arxiv.org/abs/2102.08325) as it describes a similar protocol. 
+
+The next step is to read the paper [Narwhal and Tusk: A DAG-based Mempool and Efficient BFT Consensus](https://arxiv.org/pdf/2105.11827.pdf). It is then recommended to have a look at the README files of the [worker](https://github.com/asonnino/narwhal/tree/master/worker) and [primary](https://github.com/asonnino/narwhal/tree/master/primary) crates. An additional resource to better understand the Tusk consensus protocol is the paper [All You Need is DAG](https://arxiv.org/abs/2102.08325) as it describes a similar protocol.
 
 The README file of the [benchmark folder](https://github.com/asonnino/narwhal/tree/master/benchmark) explains how to benchmark the codebase and read benchmarks' results. It also provides a step-by-step tutorial to run benchmarks on [Amazon Web Services (AWS)](https://aws.amazon.com) accross multiple data centers (WAN).
 
 ## License
+
 This software is licensed as [Apache 2.0](LICENSE).
diff --git a/benchmark/benchmark/instance.py b/benchmark/benchmark/instance.py
index 011fa96b..70a8cab7 100644
--- a/benchmark/benchmark/instance.py
+++ b/benchmark/benchmark/instance.py
@@ -11,28 +11,28 @@
 class AWSError(Exception):
     def __init__(self, error):
         assert isinstance(error, ClientError)
-        self.message = error.response['Error']['Message']
-        self.code = error.response['Error']['Code']
+        self.message = error.response["Error"]["Message"]
+        self.code = error.response["Error"]["Code"]
         super().__init__(self.message)
 
 
 class InstanceManager:
-    INSTANCE_NAME = 'dag-node'
-    SECURITY_GROUP_NAME = 'dag'
+    INSTANCE_NAME = "dag-node"
+    SECURITY_GROUP_NAME = "dag"
 
     def __init__(self, settings):
         assert isinstance(settings, Settings)
         self.settings = settings
         self.clients = OrderedDict()
         for region in settings.aws_regions:
-            self.clients[region] = boto3.client('ec2', region_name=region)
+            self.clients[region] = boto3.client("ec2", region_name=region)
 
     @classmethod
-    def make(cls, settings_file='settings.json'):
+    def make(cls, settings_file="settings.json"):
         try:
             return cls(Settings.load(settings_file))
         except SettingsError as e:
-            raise BenchError('Failed to load settings', e)
+            raise BenchError("Failed to load settings", e)
 
     def _get(self, state):
         # Possible states are: 'pending', 'running', 'shutting-down',
@@ -41,21 +41,15 @@ def _get(self, state):
         for region, client in self.clients.items():
             r = client.describe_instances(
                 Filters=[
-                    {
-                        'Name': 'tag:Name',
-                        'Values': [self.INSTANCE_NAME]
-                    },
-                    {
-                        'Name': 'instance-state-name',
-                        'Values': state
-                    }
+                    {"Name": "tag:Name", "Values": [self.INSTANCE_NAME]},
+                    {"Name": "instance-state-name", "Values": state},
                 ]
             )
-            instances = [y for x in r['Reservations'] for y in x['Instances']]
+            instances = [y for x in r["Reservations"] for y in x["Instances"]]
             for x in instances:
-                ids[region] += [x['InstanceId']]
-                if 'PublicIpAddress' in x:
-                    ips[region] += [x['PublicIpAddress']]
+                ids[region] += [x["InstanceId"]]
+                if "PublicIpAddress" in x:
+                    ips[region] += [x["PublicIpAddress"]]
         return ids, ips
 
     def _wait(self, state):
@@ -69,7 +63,7 @@ def _wait(self, state):
 
     def _create_security_group(self, client):
         client.create_security_group(
-            Description='HotStuff node',
+            Description="HotStuff node",
             GroupName=self.SECURITY_GROUP_NAME,
         )
 
@@ -77,43 +71,55 @@ def _create_security_group(self, client):
             GroupName=self.SECURITY_GROUP_NAME,
             IpPermissions=[
                 {
-                    'IpProtocol': 'tcp',
-                    'FromPort': 22,
-                    'ToPort': 22,
-                    'IpRanges': [{
-                        'CidrIp': '0.0.0.0/0',
-                        'Description': 'Debug SSH access',
-                    }],
-                    'Ipv6Ranges': [{
-                        'CidrIpv6': '::/0',
-                        'Description': 'Debug SSH access',
-                    }],
+                    "IpProtocol": "tcp",
+                    "FromPort": 22,
+                    "ToPort": 22,
+                    "IpRanges": [
+                        {
+                            "CidrIp": "0.0.0.0/0",
+                            "Description": "Debug SSH access",
+                        }
+                    ],
+                    "Ipv6Ranges": [
+                        {
+                            "CidrIpv6": "::/0",
+                            "Description": "Debug SSH access",
+                        }
+                    ],
                 },
                 {
-                    'IpProtocol': 'tcp',
-                    'FromPort': self.settings.base_port,
-                    'ToPort': self.settings.base_port + 2_000,
-                    'IpRanges': [{
-                        'CidrIp': '0.0.0.0/0',
-                        'Description': 'Dag port',
-                    }],
-                    'Ipv6Ranges': [{
-                        'CidrIpv6': '::/0',
-                        'Description': 'Dag port',
-                    }],
-                }
-            ]
+                    "IpProtocol": "tcp",
+                    "FromPort": self.settings.base_port,
+                    "ToPort": self.settings.base_port + 2_000,
+                    "IpRanges": [
+                        {
+                            "CidrIp": "0.0.0.0/0",
+                            "Description": "Dag port",
+                        }
+                    ],
+                    "Ipv6Ranges": [
+                        {
+                            "CidrIpv6": "::/0",
+                            "Description": "Dag port",
+                        }
+                    ],
+                },
+            ],
         )
 
     def _get_ami(self, client):
         # The AMI changes with regions.
         response = client.describe_images(
-            Filters=[{
-                'Name': 'description',
-                'Values': ['Canonical, Ubuntu, 20.04 LTS, amd64 focal image build on 2020-10-26']
-            }]
+            Filters=[
+                {
+                    "Name": "description",
+                    "Values": [
+                        "Canonical, Ubuntu, 22.04 LTS, amd64 jammy image build on 2023-09-19"
+                    ],
+                }
+            ]
         )
-        return response['Images'][0]['ImageId']
+        return response["Images"][0]["ImageId"]
 
     def create_instances(self, instances):
         assert isinstance(instances, int) and instances > 0
@@ -124,14 +130,14 @@ def create_instances(self, instances):
                 self._create_security_group(client)
             except ClientError as e:
                 error = AWSError(e)
-                if error.code != 'InvalidGroup.Duplicate':
-                    raise BenchError('Failed to create security group', error)
+                if error.code != "InvalidGroup.Duplicate":
+                    raise BenchError("Failed to create security group", error)
 
         try:
             # Create all instances.
             size = instances * len(self.clients)
             progress = progress_bar(
-                self.clients.values(), prefix=f'Creating {size} instances'
+                self.clients.values(), prefix=f"Creating {size} instances"
             )
             for client in progress:
                 client.run_instances(
@@ -141,37 +147,38 @@ def create_instances(self, instances):
                     MaxCount=instances,
                     MinCount=instances,
                     SecurityGroups=[self.SECURITY_GROUP_NAME],
-                    TagSpecifications=[{
-                        'ResourceType': 'instance',
-                        'Tags': [{
-                            'Key': 'Name',
-                            'Value': self.INSTANCE_NAME
-                        }]
-                    }],
+                    TagSpecifications=[
+                        {
+                            "ResourceType": "instance",
+                            "Tags": [{"Key": "Name", "Value": self.INSTANCE_NAME}],
+                        }
+                    ],
                     EbsOptimized=True,
-                    BlockDeviceMappings=[{
-                        'DeviceName': '/dev/sda1',
-                        'Ebs': {
-                            'VolumeType': 'gp2',
-                            'VolumeSize': 200,
-                            'DeleteOnTermination': True
+                    BlockDeviceMappings=[
+                        {
+                            "DeviceName": "/dev/sda1",
+                            "Ebs": {
+                                "VolumeType": "gp2",
+                                "VolumeSize": 200,
+                                "DeleteOnTermination": True,
+                            },
                         }
-                    }],
+                    ],
                 )
 
             # Wait for the instances to boot.
-            Print.info('Waiting for all instances to boot...')
-            self._wait(['pending'])
-            Print.heading(f'Successfully created {size} new instances')
+            Print.info("Waiting for all instances to boot...")
+            self._wait(["pending"])
+            Print.heading(f"Successfully created {size} new instances")
         except ClientError as e:
-            raise BenchError('Failed to create AWS instances', AWSError(e))
+            raise BenchError("Failed to create AWS instances", AWSError(e))
 
     def terminate_instances(self):
         try:
-            ids, _ = self._get(['pending', 'running', 'stopping', 'stopped'])
+            ids, _ = self._get(["pending", "running", "stopping", "stopped"])
             size = sum(len(x) for x in ids.values())
             if size == 0:
-                Print.heading(f'All instances are shut down')
+                Print.heading(f"All instances are shut down")
                 return
 
             # Terminate instances.
@@ -180,64 +187,62 @@ def terminate_instances(self):
                     client.terminate_instances(InstanceIds=ids[region])
 
             # Wait for all instances to properly shut down.
-            Print.info('Waiting for all instances to shut down...')
-            self._wait(['shutting-down'])
+            Print.info("Waiting for all instances to shut down...")
+            self._wait(["shutting-down"])
             for client in self.clients.values():
-                client.delete_security_group(
-                    GroupName=self.SECURITY_GROUP_NAME
-                )
+                client.delete_security_group(GroupName=self.SECURITY_GROUP_NAME)
 
-            Print.heading(f'Testbed of {size} instances destroyed')
+            Print.heading(f"Testbed of {size} instances destroyed")
         except ClientError as e:
-            raise BenchError('Failed to terminate instances', AWSError(e))
+            raise BenchError("Failed to terminate instances", AWSError(e))
 
     def start_instances(self, max):
         size = 0
         try:
-            ids, _ = self._get(['stopping', 'stopped'])
+            ids, _ = self._get(["stopping", "stopped"])
             for region, client in self.clients.items():
                 if ids[region]:
                     target = ids[region]
                     target = target if len(target) < max else target[:max]
                     size += len(target)
                     client.start_instances(InstanceIds=target)
-            Print.heading(f'Starting {size} instances')
+            Print.heading(f"Starting {size} instances")
         except ClientError as e:
-            raise BenchError('Failed to start instances', AWSError(e))
+            raise BenchError("Failed to start instances", AWSError(e))
 
     def stop_instances(self):
         try:
-            ids, _ = self._get(['pending', 'running'])
+            ids, _ = self._get(["pending", "running"])
             for region, client in self.clients.items():
                 if ids[region]:
                     client.stop_instances(InstanceIds=ids[region])
             size = sum(len(x) for x in ids.values())
-            Print.heading(f'Stopping {size} instances')
+            Print.heading(f"Stopping {size} instances")
         except ClientError as e:
             raise BenchError(AWSError(e))
 
     def hosts(self, flat=False):
         try:
-            _, ips = self._get(['pending', 'running'])
+            _, ips = self._get(["pending", "running"])
             return [x for y in ips.values() for x in y] if flat else ips
         except ClientError as e:
-            raise BenchError('Failed to gather instances IPs', AWSError(e))
+            raise BenchError("Failed to gather instances IPs", AWSError(e))
 
     def print_info(self):
         hosts = self.hosts()
         key = self.settings.key_path
-        text = ''
+        text = ""
         for region, ips in hosts.items():
-            text += f'\n Region: {region.upper()}\n'
+            text += f"\n Region: {region.upper()}\n"
             for i, ip in enumerate(ips):
-                new_line = '\n' if (i+1) % 6 == 0 else ''
-                text += f'{new_line} {i}\tssh -i {key} ubuntu@{ip}\n'
+                new_line = "\n" if (i + 1) % 6 == 0 else ""
+                text += f"{new_line} {i}\tssh -i {key} ubuntu@{ip}\n"
         print(
-            '\n'
-            '----------------------------------------------------------------\n'
-            ' INFO:\n'
-            '----------------------------------------------------------------\n'
-            f' Available machines: {sum(len(x) for x in hosts.values())}\n'
-            f'{text}'
-            '----------------------------------------------------------------\n'
+            "\n"
+            "----------------------------------------------------------------\n"
+            " INFO:\n"
+            "----------------------------------------------------------------\n"
+            f" Available machines: {sum(len(x) for x in hosts.values())}\n"
+            f"{text}"
+            "----------------------------------------------------------------\n"
         )