Skip to content

Add validations on EC2 enclave pre-init #1159

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/actions/build_aws_eif/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ runs:

cp ${{ steps.buildFolder.outputs.BUILD_FOLDER }}/identity_scope.txt ${ARTIFACTS_OUTPUT_DIR}/
cp ${{ steps.buildFolder.outputs.BUILD_FOLDER }}/version_number.txt ${ARTIFACTS_OUTPUT_DIR}/
cp ./scripts/aws/start.sh ${ARTIFACTS_OUTPUT_DIR}/
cp ./scripts/aws/stop.sh ${ARTIFACTS_OUTPUT_DIR}/
cp ./scripts/confidential_compute.py ${ARTIFACTS_OUTPUT_DIR}/
cp ./scripts/aws/ec2.py ${ARTIFACTS_OUTPUT_DIR}/
cp ./scripts/aws/proxies.host.yaml ${ARTIFACTS_OUTPUT_DIR}/
cp ./scripts/aws/sockd.conf ${ARTIFACTS_OUTPUT_DIR}/
cp ./scripts/aws/uid2operator.service ${ARTIFACTS_OUTPUT_DIR}/
Expand Down
2 changes: 1 addition & 1 deletion scripts/aws/EUID_CloudFormation.template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ Resources:
KmsKeyId: !GetAtt KMSKey.Arn
Name: !Sub 'euid-config-stack-${AWS::StackName}'
SecretString: !Sub '{
"api_token":"${APIToken}",
"operator_key":"${APIToken}",
"service_instances":6,
"enclave_cpu_count":6,
"enclave_memory_mb":24576,
Expand Down
2 changes: 1 addition & 1 deletion scripts/aws/UID_CloudFormation.template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ Resources:
KmsKeyId: !GetAtt KMSKey.Arn
Name: !Sub 'uid2-config-stack-${AWS::StackName}'
SecretString: !Sub '{
"api_token":"${APIToken}",
"operator_key":"${APIToken}",
"service_instances":6,
"enclave_cpu_count":6,
"enclave_memory_mb":24576,
Expand Down
3 changes: 3 additions & 0 deletions scripts/aws/config-server/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
Flask==2.3.2
Werkzeug==3.0.3
setuptools==70.0.0
requests==2.32.3
boto3==1.35.59
urllib3==2.2.3
212 changes: 212 additions & 0 deletions scripts/aws/ec2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
#!/usr/bin/env python3

import boto3
import json
import os
import subprocess
import re
import multiprocessing
import requests #need requests[socks]
import signal
import argparse
from botocore.exceptions import ClientError
from typing import Dict
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from confidential_compute import ConfidentialCompute, ConfidentialComputeConfig


class EC2(ConfidentialCompute):

def __init__(self):
super().__init__()
self.configs: ConfidentialComputeConfig = {}

def __get_aws_token(self) -> str:
"""Fetches a temporary AWS EC2 metadata token."""
try:
token_url = "http://169.254.169.254/latest/api/token"
response = requests.put(
token_url, headers={"X-aws-ec2-metadata-token-ttl-seconds": "3600"}, timeout=2
)
return response.text
except requests.RequestException as e:
raise RuntimeError(f"Failed to fetch aws token: {e}")

def __get_current_region(self) -> str:
"""Fetches the current AWS region from EC2 instance metadata."""
token = self.__get_aws_token()
metadata_url = "http://169.254.169.254/latest/dynamic/instance-identity/document"
headers = {"X-aws-ec2-metadata-token": token}
try:
response = requests.get(metadata_url, headers=headers, timeout=2)
response.raise_for_status()
return response.json()["region"]
except requests.RequestException as e:
raise RuntimeError(f"Failed to fetch region: {e}")

def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig:
secret_identifier = "uid2-config-stack-tjm-unvalidate-eif-test1"
"""Fetches a secret value from AWS Secrets Manager."""
region = self.__get_current_region()
client = boto3.client("secretsmanager", region_name=region)
try:
secret = client.get_secret_value(SecretId=secret_identifier)
return self.__add_defaults(json.loads(secret["SecretString"]))
except ClientError as e:
raise RuntimeError(f"Unable to access Secrets Manager {secret_identifier}: {e}")

@staticmethod
def __add_defaults(configs: Dict[str, any]) -> ConfidentialComputeConfig:
"""Adds default values to configuration if missing."""
configs.setdefault("enclave_memory_mb", 24576)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where are these default coming from? Why are they reasonable?

configs.setdefault("enclave_cpu_count", 6)
configs.setdefault("debug_mode", False)
configs.setdefault("core_base_url", "https://core.uidapi.com" if configs["environment"] == "prod" else "https://core-integ.uidapi.com")
configs.setdefault("optout_base_url", "https://optout.uidapi.com" if configs["environment"] == "prod" else "https://optout-integ.uidapi.com")
return configs

def __setup_vsockproxy(self, log_level: int) -> None:
"""
Sets up the vsock proxy service.
TODO: Evaluate adding vsock logging based on log_level here
"""
thread_count = (multiprocessing.cpu_count() + 1) // 2
command = [
"/usr/bin/vsockpx", "-c", "/etc/uid2operator/proxy.yaml",
"--workers", str(thread_count), "--log-level", str(log_level), "--daemon"
]
subprocess.run(command)

def __run_config_server(self,log_level) -> None:
"""
Starts the Flask configuration server.
TODO: Based on log level add logging to flask
"""
os.makedirs("/etc/secret/secret-value", exist_ok=True)
config_path = "/etc/secret/secret-value/config"
with open(config_path, 'w') as config_file:
json.dump(self.configs, config_file)
os.chdir("/opt/uid2operator/config-server")
command = ["./bin/flask", "run", "--host", "127.0.0.1", "--port", "27015"]
try:
subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except Exception as e:
print(f"Failed to start the Flask config server.\n '{' '.join(command)}': {e}")
raise RuntimeError ("Failed to start required flask server")

def __run_socks_proxy(self, log_level) -> None:
"""
Starts the SOCKS proxy service.
TODO: Based on log level add logging to sockd
"""
command = ["sockd", "-D"]
subprocess.run(command)

def __get_secret_name_from_userdata(self) -> str:
"""Extracts the secret name from EC2 user data."""
token = self.__get_aws_token()
user_data_url = "http://169.254.169.254/latest/user-data"
response = requests.get(user_data_url, headers={"X-aws-ec2-metadata-token": token})
user_data = response.text

with open("/opt/uid2operator/identity_scope.txt") as file:
identity_scope = file.read().strip()

default_name = f"{identity_scope.lower()}-operator-config-key"
hardcoded_value = f"{identity_scope.upper()}_CONFIG_SECRET_KEY"
match = re.search(rf'^export {hardcoded_value}="(.+?)"$', user_data, re.MULTILINE)
return match.group(1) if match else default_name

def _setup_auxiliaries(self) -> None:
"""Sets up the necessary auxiliary services and configurations."""
hostname = os.getenv("HOSTNAME", default=os.uname()[1])
try:
with open("HOSTNAME", "w") as file:
file.write(hostname)
print(f"Hostname '{hostname}' written to file.")
except Exception as e:
"""
Ignoring error here, as we are currently not using this information anywhere.
But can be added in future for tracibility on debug
"""
print(f"Error writing hostname: {e}")
self.configs = self._get_secret(self.__get_secret_name_from_userdata())
log_level = 3 if self.configs["debug_mode"] else 1
self.__setup_vsockproxy(log_level)
self.__run_config_server(log_level)
self.__run_socks_proxy(log_level)

def _validate_auxiliaries(self) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auxiliaries is meaningless - just say Config Server?

for every function that does validation, add some comments to say what you are testing as std out description.

"""Validates auxiliary services."""
proxy = "socks5://127.0.0.1:3306"
config_url = "http://127.0.0.1:27015/getConfig"
try:
response = requests.get(config_url)
response.raise_for_status()
except requests.RequestException as e:
raise RuntimeError(f"Config server unreachable: {e}")
proxies = {"http": proxy, "https": proxy}
try:
response = requests.get(config_url, proxies=proxies)
response.raise_for_status()
except requests.RequestException as e:
raise RuntimeError(f"Cannot connect to config server via SOCKS proxy: {e}")

def run_compute(self) -> None:
"""Main execution flow for confidential compute."""
self._setup_auxiliaries()
self._validate_auxiliaries()
self.validate_connectivity(self.configs)
command = [
"nitro-cli", "run-enclave",
"--eif-path", "/opt/uid2operator/uid2operator.eif",
"--memory", str(self.configs["enclave_memory_mb"]),
"--cpu-count", str(self.configs["enclave_cpu_count"]),
"--enclave-cid", "42",
"--enclave-name", "uid2operator"
]
if self.configs["debug_mode"]:
command += ["--debug-mode", "--attach-console"]
subprocess.run(command, check=True)

def cleanup(self) -> None:
"""Terminates the Nitro Enclave and auxiliary processes."""
try:
describe_output = subprocess.check_output(["nitro-cli", "describe-enclaves"], text=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not just nitro-cli terminate-enclave --all

enclaves = json.loads(describe_output)
enclave_id = enclaves[0].get("EnclaveID") if enclaves else None
if enclave_id:
subprocess.run(["nitro-cli", "terminate-enclave", "--enclave-id", enclave_id])
print(f"Terminated enclave with ID: {enclave_id}")
else:
print("No active enclaves found.")
self.__kill_auxiliaries()
except subprocess.SubprocessError as e:
raise (f"Error during cleanup: {e}")

def __kill_auxiliaries(self) -> None:
"""Kills a process by its name."""
try:
for process_name in ["vsockpx", "sockd"]:
result = subprocess.run(["pgrep", "-f", process_name], stdout=subprocess.PIPE, text=True, check=False)
if result.stdout.strip():
for pid in result.stdout.strip().split("\n"):
os.kill(int(pid), signal.SIGKILL)
print(f"Killed process '{process_name}'.")
else:
print(f"No process named '{process_name}' found.")
except Exception as e:
print(f"Error killing process '{process_name}': {e}")


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Manage EC2-based confidential compute workflows.")
parser.add_argument("-o", "--operation", choices=["stop", "start"], default="start", help="Operation to perform.")
args = parser.parse_args()
ec2 = EC2()
if args.operation == "stop":
ec2.cleanup()
else:
ec2.run_compute()

1 change: 1 addition & 0 deletions scripts/aws/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ else
exit 1
fi

# DO WE NEED THIS? do we expect customers to change URL?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've never heard of it being done before.

# -- replace base URLs if both CORE_BASE_URL and OPTOUT_BASE_URL are provided
# -- using hardcoded domains is fine because they should not be changed frequently
if [ -n "${CORE_BASE_URL}" ] && [ "${CORE_BASE_URL}" != "null" ] && [ -n "${OPTOUT_BASE_URL}" ] && [ "${OPTOUT_BASE_URL}" != "null" ] && [ "${DEPLOYMENT_ENVIRONMENT}" != "prod" ]; then
Expand Down
41 changes: 0 additions & 41 deletions scripts/aws/load_config.py

This file was deleted.

Loading