Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Cloud Resource ID #46

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 122 additions & 25 deletions src/censys/cloud_connectors/aws_connector/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class AwsCloudConnector(CloudConnector):

account_number: str
region: Optional[str]
partition: Optional[str]

# Current set of ignored tags (combined set of user settings + overall settings)
ignored_tags: list[str]
Expand Down Expand Up @@ -194,6 +195,20 @@ def credentials(self) -> dict:
self.provider_settings.session_token,
)

def get_aws_partition(self) -> str:
"""Get the AWS partition.

Returns:
str: AWS partition.
"""
default_session = boto3._get_default_session()
return default_session.get_partition_for_region(
self.region
or default_session.region_name
# We need to have some type of region name here, just to check the partition.
# This shouldn't effect the client since we are passing in the credentials.
)

def get_aws_client(
self, service: AwsServices, credentials: Optional[dict] = None
) -> T:
Expand All @@ -213,14 +228,18 @@ def get_aws_client(
credentials = credentials or self.credentials()
if credentials.get("aws_access_key_id"):
self.logger.debug(f"AWS Service {service} using access key credentials")
return boto3.client(service, **credentials) # type: ignore
client = boto3.client(service, **credentials) # type: ignore
self.partition = self.get_aws_partition()
return client

# calling client without credentials follows the standard
# credential import path to source creds from the environment
self.logger.debug(
f"AWS Service {service} using external boto configuration"
)
return boto3.client(service) # type: ignore
client = boto3.client(service) # type: ignore
self.partition = self.get_aws_partition()
return client
except Exception as e:
self.logger.error(
f"Could not connect with client type '{service}'. Error: {e}"
Expand Down Expand Up @@ -341,12 +360,20 @@ def get_api_gateway_domains_v1(self):
label = self.format_label(SeedLabel.API_GATEWAY)
try:
apis = client.get_rest_apis()
for domain in apis.get("items", []):
domain_name = f"{domain['id']}.execute-api.{self.region}.amazonaws.com"
for rest_api_item in apis.get("items", []):
api_id = rest_api_item["id"]
domain_name = f"{api_id}.execute-api.{self.region}.amazonaws.com"
arn = f"arn:{self.partition}:apigateway:{self.region}:{self.account_number}:/restapis/{api_id}"
created_at = rest_api_item["createdDate"]
# TODO: emit log when a seeds is dropped due to validation error
with SuppressValidationError():
domain_seed = DomainSeed(value=domain_name, label=label)
self.add_seed(domain_seed, api_gateway_res=domain)
domain_seed = DomainSeed(
value=domain_name,
label=label,
cloud_resource_id=arn,
created_at=created_at,
)
self.add_seed(domain_seed, api_gateway_res=rest_api_item)
except ClientError as e:
self.logger.error(f"Could not connect to API Gateway V1. Error: {e}")

Expand All @@ -358,11 +385,19 @@ def get_api_gateway_domains_v2(self):
label = self.format_label(SeedLabel.API_GATEWAY)
try:
apis = client.get_apis()
for domain in apis.get("Items", []):
domain_name = domain["ApiEndpoint"].split("//")[1]
for api_item in apis.get("Items", []):
domain_name = api_item["ApiEndpoint"].split("//")[1]
api_id = api_item["ApiId"]
arn = f"arn:{self.partition}:apigateway:{self.region}:{self.account_number}:/apis/{api_id}"
created_at = api_item["CreatedDate"]
with SuppressValidationError():
domain_seed = DomainSeed(value=domain_name, label=label)
self.add_seed(domain_seed, api_gateway_res=domain)
domain_seed = DomainSeed(
value=domain_name,
label=label,
cloud_resource_id=arn,
created_at=created_at,
)
self.add_seed(domain_seed, api_gateway_res=api_item)
except ClientError as e:
self.logger.error(f"Could not connect to API Gateway V2. Error: {e}")

Expand All @@ -384,8 +419,16 @@ def get_load_balancers_v1(self):
data = client.describe_load_balancers()
for elb in data.get("LoadBalancerDescriptions", []):
if value := elb.get("DNSName"):
loadbalancer_name = elb.get("LoadBalancerName")
arn = f"arn:{self.partition}:elasticloadbalancing:{self.region}:{self.account_number}:loadbalancer/{loadbalancer_name}"
created_at = elb.get("CreatedTime")
with SuppressValidationError():
domain_seed = DomainSeed(value=value, label=label)
domain_seed = DomainSeed(
value=value,
label=label,
cloud_resource_id=arn,
created_at=created_at,
)
self.add_seed(domain_seed, elb_res=elb, aws_client=client)
except ClientError as e:
self.logger.error(f"Could not connect to ELB V1. Error: {e}")
Expand All @@ -400,8 +443,15 @@ def get_load_balancers_v2(self):
data = client.describe_load_balancers()
for elb in data.get("LoadBalancers", []):
if value := elb.get("DNSName"):
arn = elb.get("LoadBalancerArn")
created_at = elb.get("CreatedTime")
with SuppressValidationError():
domain_seed = DomainSeed(value=value, label=label)
domain_seed = DomainSeed(
value=value,
label=label,
cloud_resource_id=arn,
created_at=created_at,
)
self.add_seed(domain_seed, elb_res=elb, aws_client=client)
except ClientError as e:
self.logger.error(f"Could not connect to ELB V2. Error: {e}")
Expand Down Expand Up @@ -434,8 +484,16 @@ def get_network_interfaces(self):
)
continue

arn = f"arn:{self.partition}:ec2:{self.region}:{self.account_number}:network-interface/{record['NetworkInterfaceId']}"
created_at = record["AttachedAt"]

with SuppressValidationError():
ip_seed = IpSeed(value=ip_address, label=label)
ip_seed = IpSeed(
value=ip_address,
label=label,
cloud_resource_id=arn,
created_at=created_at,
)
self.add_seed(ip_seed, tags=instance_tag_sets.get(instance_id))
has_added_seeds = True
if not has_added_seeds:
Expand All @@ -462,7 +520,9 @@ def describe_network_interfaces(self) -> dict:
data = ec2.describe_network_interfaces(Filters=filters)
for network in data.get("NetworkInterfaces", {}):
network_interface_id = network.get("NetworkInterfaceId")
instance_id = network.get("Attachment", {}).get("InstanceId")
attachment = network.get("Attachment", {})
instance_id = attachment.get("InstanceId")
attached_at = attachment.get("AttachTime")

if self.network_interfaces_ignored_tags(network):
self.logger.debug(
Expand All @@ -475,6 +535,7 @@ def describe_network_interfaces(self) -> dict:
interfaces[ip_address] = {
"NetworkInterfaceId": network_interface_id,
"InstanceId": instance_id,
"AttachedAt": attached_at,
}
except ClientError as e:
self.logger.error(f"Could not connect to ENI Service. Error: {e}")
Expand Down Expand Up @@ -564,8 +625,15 @@ def get_rds_instances(self):
continue

if domain_name := instance.get("Endpoint", {}).get("Address"):
arn = instance.get("DBInstanceArn")
created_at = instance.get("InstanceCreateTime")
with SuppressValidationError():
domain_seed = DomainSeed(value=domain_name, label=label)
domain_seed = DomainSeed(
value=domain_name,
label=label,
cloud_resource_id=arn,
created_at=created_at,
)
self.add_seed(domain_seed, rds_res=instance)
has_added_seeds = True
if not has_added_seeds:
Expand Down Expand Up @@ -616,22 +684,28 @@ def get_route53_zones(self):
if zone.get("Config", {}).get("PrivateZone"):
continue

zone_id = zone.get("Id")
arn = f"arn:{self.partition}:route53::{self.account_number}:hostedzone/{zone_id}"

# Add the zone itself as a seed
domain_name = zone.get("Name").rstrip(".")
with SuppressValidationError():
domain_seed = DomainSeed(value=domain_name, label=label)
domain_seed = DomainSeed(
value=domain_name, label=label, cloud_resource_id=arn
)
self.add_seed(domain_seed, route53_zone_res=zone, aws_client=client)
has_added_seeds = True

id = zone.get("Id")
resource_sets = self._get_route53_zone_resources(client, id)
resource_sets = self._get_route53_zone_resources(client, zone_id)
for resource_set in resource_sets.get("ResourceRecordSets", []):
if resource_set.get("Type") not in VALID_RECORD_TYPES:
continue

domain_name = resource_set.get("Name").rstrip(".")
with SuppressValidationError():
domain_seed = DomainSeed(value=domain_name, label=label)
domain_seed = DomainSeed(
value=domain_name, label=label, cloud_resource_id=arn
)
self.add_seed(
domain_seed, route53_zone_res=zone, aws_client=client
)
Expand Down Expand Up @@ -659,22 +733,40 @@ def get_ecs_instances(self):
cluster=cluster, containerInstances=containers
)

instance_ids = [
i.get("ec2InstanceId")
instance_id_to_details = {
i.get("ec2InstanceId"): {
"arn": i.get("containerInstanceArn"),
"created_at": i.get("registeredAt"),
}
for i in instances.get("containerInstances", [])
]
if not instance_ids:
}
if not instance_id_to_details:
continue

descriptions = ec2.describe_instances(InstanceIds=instance_ids)
descriptions = ec2.describe_instances(
InstanceIds=instance_id_to_details.keys()
)
for reservation in descriptions.get("Reservations", []):
for instance in reservation.get("Instances", []):
ip_address = instance.get("PublicIpAddress")
if not ip_address:
continue

instance_id = instance.get("InstanceId")
details = instance_id_to_details.get(instance_id)
if not details:
continue

arn = details.get("arn")
created_at = details.get("created_at")

with SuppressValidationError():
ip_seed = IpSeed(value=ip_address, label=label)
ip_seed = IpSeed(
value=ip_address,
label=label,
cloud_resource_id=arn,
created_at=created_at,
)
self.add_seed(ip_seed, ecs_res=instance)
has_added_seeds = True
if not has_added_seeds:
Expand Down Expand Up @@ -709,13 +801,18 @@ def get_s3_instances(self):
region = self.get_s3_region(client, bucket_name)
label = self.format_label(SeedLabel.STORAGE_BUCKET, region)

arn = f"arn:{self.partition}:s3:::{bucket_name}"
created_at = bucket.get("CreationDate")

with SuppressValidationError():
bucket_asset = AwsStorageBucketAsset(
value=AwsStorageBucketAsset.url(bucket_name, region),
uid=label,
scan_data={
"accountNumber": self.account_number,
},
cloud_resource_id=arn,
created_at=created_at,
)
self.add_cloud_asset(
bucket_asset, bucket_name=bucket_name, aws_client=client
Expand Down
42 changes: 33 additions & 9 deletions src/censys/cloud_connectors/azure_connector/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,12 @@ def get_ip_addresses(self):
for asset in network_client.public_ip_addresses.list_all():
asset_dict = asset.as_dict()
if ip_address := asset_dict.get("ip_address"):
resource_id = asset_dict.get("id")
with SuppressValidationError():
label = self.format_label(asset)
ip_seed = IpSeed(value=ip_address, label=label)
ip_seed = IpSeed(
value=ip_address, label=label, cloud_resource_id=resource_id
)
self.add_seed(ip_seed)
self.possible_labels.discard(label)

Expand All @@ -156,13 +159,18 @@ def get_clusters(self):
and (ip_address := ip_address_dict.get("ip"))
):
label = self.format_label(asset)
resource_id = asset_dict.get("id")
with SuppressValidationError():
ip_seed = IpSeed(value=ip_address, label=label)
ip_seed = IpSeed(
value=ip_address, label=label, cloud_resource_id=resource_id
)
self.add_seed(ip_seed)
self.possible_labels.discard(label)
if domain := ip_address_dict.get("fqdn"):
with SuppressValidationError():
domain_seed = DomainSeed(value=domain, label=label)
domain_seed = DomainSeed(
value=domain, label=label, cloud_resource_id=resource_id
)
self.add_seed(domain_seed)
self.possible_labels.discard(label)

Expand All @@ -174,9 +182,12 @@ def get_sql_servers(self):
if (
domain := asset_dict.get("fully_qualified_domain_name")
) and asset_dict.get("public_network_access") == "Enabled":
label = self.format_label(asset)
resource_id = asset_dict.get("id")
with SuppressValidationError():
label = self.format_label(asset)
domain_seed = DomainSeed(value=domain, label=label)
domain_seed = DomainSeed(
value=domain, label=label, cloud_resource_id=resource_id
)
self.add_seed(domain_seed)
self.possible_labels.discard(label)

Expand All @@ -203,14 +214,21 @@ def get_dns_records(self):
zone_resource_group, zone_dict.get("name")
):
asset_dict = asset.as_dict()
resource_id = asset_dict.get("id")
if domain_name := asset_dict.get("fqdn"):
with SuppressValidationError():
domain_seed = DomainSeed(value=domain_name, label=label)
domain_seed = DomainSeed(
value=domain_name,
label=label,
cloud_resource_id=resource_id,
)
self.add_seed(domain_seed)
self.possible_labels.discard(label)
if cname := asset_dict.get("cname_record", {}).get("cname"):
with SuppressValidationError():
domain_seed = DomainSeed(value=cname, label=label)
domain_seed = DomainSeed(
value=cname, label=label, cloud_resource_id=resource_id
)
self.add_seed(domain_seed)
self.possible_labels.discard(label)
for a_record in asset_dict.get("a_records", []):
Expand All @@ -219,7 +237,9 @@ def get_dns_records(self):
continue

with SuppressValidationError():
ip_seed = IpSeed(value=ip_address, label=label)
ip_seed = IpSeed(
value=ip_address, label=label, cloud_resource_id=resource_id
)
self.add_seed(ip_seed)
self.possible_labels.discard(label)

Expand Down Expand Up @@ -253,11 +273,14 @@ def get_storage_containers(self):
)
label = self.format_label(account)
account_dict = account.as_dict()
resource_id = account_dict.get("id")
if (custom_domain := account_dict.get("custom_domain")) and (
domain := custom_domain.get("name")
):
with SuppressValidationError():
domain_seed = DomainSeed(value=domain, label=label)
domain_seed = DomainSeed(
value=domain, label=label, cloud_resource_id=resource_id
)
self.add_seed(domain_seed)
self.possible_labels.discard(label)
uid = f"{self.subscription_id}/{self.credentials._tenant_id}/{account.name}"
Expand All @@ -275,6 +298,7 @@ def get_storage_containers(self):
"publicAccess": container.public_access,
"location": account.location,
},
cloud_resource_id=resource_id,
)
self.add_cloud_asset(container_asset)
except ServiceRequestError as error: # pragma: no cover
Expand Down
Loading