Skip to content

Commit

Permalink
add provisioned version and refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
gavinjwl committed Apr 17, 2023
1 parent 5c3cd4c commit ddf93f9
Show file tree
Hide file tree
Showing 40 changed files with 704 additions and 1,122 deletions.
3 changes: 1 addition & 2 deletions cdk.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
"@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true,
"@aws-cdk/core:checkSecretUsage": true,
"@aws-cdk/aws-iam:minimizePolicies": true,
"@aws-cdk/core:target-partitions": ["aws", "aws-cn"],
"vpc-id": ""
"@aws-cdk/core:target-partitions": ["aws", "aws-cn"]
}
}
33 changes: 11 additions & 22 deletions cdk_app.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,21 @@
import os
from aws_cdk import App

from clickstream.stacks import CoreStack, DashboardStack, ScheduledRefreshStack
from aws_cdk import App, Environment, Tags

from clickstream.stack import ProvisionedStack
# from clickstream.stack import ServerlessStack

app = App()

core_stack = CoreStack(
app, 'CoreStack',
os.path.join(
os.path.dirname(os.path.realpath(__file__)), 'clickstream', 'core'
)
)
Tags.of(app).add('Project', 'Clickstream')

scheduled_refresh_stack = ScheduledRefreshStack(
app, 'ScheduledRefreshStack',
core_stack.workgroup,
core_stack.clickstream_redshift_role,
os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'clickstream', 'scheduled_refresh'
)
env = Environment(
account=os.environ.get('CDK_DEPLOY_ACCOUNT', os.environ['CDK_DEFAULT_ACCOUNT']),
region=os.environ.get('CDK_DEPLOY_REGION', os.environ['CDK_DEFAULT_REGION'])
)

dashboard_stack = DashboardStack(
app, 'DashboardStack',
core_stack.clickstream_backend_api,
core_stack.clickstream_backend_function,
core_stack.kinesis_stream,
)
ProvisionedStack(app, 'Clickstream', env=env)

# ServerlessStack(app, 'Clickstream-Serverless')

app.synth()
5 changes: 5 additions & 0 deletions clickstream/backend/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.pytest_cache/
__pycache__/
tests/
infrastructure.py
**/*/infrastructure.py
15 changes: 15 additions & 0 deletions clickstream/backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM python:3.9-slim-buster

WORKDIR /app

COPY requirements.txt .
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt --target .

# Copy function code
COPY env.py .
COPY kinesis_producer.py .
COPY models.py .
COPY routers ./routers
COPY server.py .

CMD ["python", "-m", "uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8080"]
File renamed without changes.
125 changes: 125 additions & 0 deletions clickstream/backend/infrastructure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import os
from os import path

from aws_cdk import CfnOutput, CfnParameter, Duration, NestedStack, Stack
from aws_cdk import aws_apigateway as aws_apigw
from aws_cdk import aws_ec2 as ec2
from aws_cdk import aws_ecr_assets as ecr_assets
from aws_cdk import aws_ecs as ecs
from aws_cdk import aws_ecs_patterns as ecs_patterns
from aws_cdk import aws_kinesis as kinesis
from aws_cdk import aws_lambda
from aws_cdk import aws_logs as logs
from constructs import Construct


class EcsStack(NestedStack):

def __init__(self, scope: Construct, id: str, vpc: ec2.IVpc, stream: kinesis.Stream, **kwargs) -> None:
super().__init__(scope, id, **kwargs)

ecr_asset = ecr_assets.DockerImageAsset(
self, 'DockerImageAsset',
directory=path.join(os.path.dirname(__file__)),
)

cluster = ecs.Cluster(
self, 'EcsCluster',
vpc=vpc,
)

fargate_task = ecs.FargateTaskDefinition(
self, "FargetTaskDefinition",
cpu=1024,
memory_limit_mib=2048,
)
fargate_task.add_container(
"FargateContainer",
image=ecs.ContainerImage.from_ecr_repository(ecr_asset.repository, tag=ecr_asset.image_tag),
port_mappings=[ecs.PortMapping(container_port=8080)],
environment={
'KINESIS_STREAM_NAME': stream.stream_name,
'WRITE_KEY': 'write_key',
},
logging=ecs.LogDrivers.aws_logs(
stream_prefix='clickstream',
log_retention=logs.RetentionDays.ONE_WEEK,
),
)
stream.grant_read_write(fargate_task.task_role)

# TODO: enable custom domain name with ACM certificate
fargate_service = ecs_patterns.ApplicationLoadBalancedFargateService(
self, "FargateService",
cluster=cluster, # Required
task_definition=fargate_task,
desired_count=2, # Default is 1
public_load_balancer=True, # Default is False
# protocol='HTTPS',
# certificate (Optional[ICertificate]) =None,
# domain_name
# domain_zone
# redirect_http=True,
)

# TODO Setup AutoScaling policy
# scaling_policy = fargate_service.service.auto_scale_task_count(
# min_capacity=2,
# max_capacity=10
# )
# scaling_policy.scale_on_cpu_utilization(
# "CpuScaling",
# policy_name="clickstream-ecs-service-cpu-scaling",
# target_utilization_percent=40,
# scale_in_cooldown=Duration.seconds(300),
# scale_out_cooldown=Duration.seconds(60),
# )
# scaling_policy.scale_on_request_count(
# "RequestScaling",
# target_group=fargate_service.target_group,
# requests_per_target=1000,
# scale_in_cooldown=Duration.seconds(300),
# scale_out_cooldown=Duration.seconds(60),
# )

CfnOutput(
self, "LoadBalancerDNS",
value=fargate_service.load_balancer.load_balancer_dns_name
)


class ApiGatewayStack(NestedStack):
def __init__(self, scope: Construct, id: str, stream: kinesis.Stream, **kwargs) -> None:
super().__init__(scope, id, **kwargs)

# Log server used for parsing and transform
clickstream_backend_function = aws_lambda.Function(
self, id='ClickstreamBackendFunction',
function_name='ClickstreamBackendFunction',
code=aws_lambda.EcrImageCode.from_asset_image(
directory=path.join(os.path.dirname(__file__)),
file='lambda.dockerfile',
),
handler=aws_lambda.Handler.FROM_IMAGE,
runtime=aws_lambda.Runtime.FROM_IMAGE,
memory_size=128,
environment={
'KINESIS_STREAM_NAME': stream.stream_name,
'WRITE_KEY': 'write_key',
},
dead_letter_queue_enabled=True,
timeout=Duration.seconds(30),
)
stream.grant_read_write(clickstream_backend_function)

# Log API endpoint
# ANY /*
clickstream_backend_api = aws_apigw.LambdaRestApi(
self, id='ClickstreamBackendAPI',
rest_api_name='ClickstreamBackendAPI',
handler=clickstream_backend_function,
deploy_options=aws_apigw.StageOptions(
throttling_rate_limit=20,
throttling_burst_limit=10,
),
)
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM public.ecr.aws/lambda/python:3.9
# Install the function's dependencies using file requirements.txt
# from your project folder.
COPY requirements.txt .
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
RUN pip3 install --no-cache-dir --upgrade -r lambda_requirements.txt --target "${LAMBDA_TASK_ROOT}"

# Copy function code
COPY env.py ${LAMBDA_TASK_ROOT}
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
3 changes: 3 additions & 0 deletions clickstream/backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
boto3==1.26.87
fastapi==0.93.0
uvicorn==0.20.0
File renamed without changes.
File renamed without changes.
File renamed without changes.
7 changes: 7 additions & 0 deletions clickstream/core/server.py → clickstream/backend/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,12 @@
allow_methods=["*"],
allow_headers=["*"],
)


@app.get('/')
def index():
return 'ok'


app.include_router(analytics_next.router)
app.include_router(analytics_python.router)
File renamed without changes.
2 changes: 0 additions & 2 deletions clickstream/core/.dockerignore

This file was deleted.

108 changes: 108 additions & 0 deletions clickstream/data_warehouse/infrastructure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from aws_cdk import CfnTag, NestedStack
from aws_cdk import aws_ec2 as ec2
from aws_cdk import aws_iam as iam
from aws_cdk import aws_kinesis as kinesis
from aws_cdk import aws_redshift as redshift
from aws_cdk import aws_redshiftserverless as redshiftserverless
from constructs import Construct


class RedshiftStack(NestedStack):
def __init__(self, scope: Construct, id: str, vpc: ec2.IVpc, stream: kinesis.Stream, redshift_password: str, **kwargs):
super().__init__(scope, id, **kwargs)

role = iam.Role(
self, id='RedshiftRole',
assumed_by=iam.ServicePrincipal('redshift.amazonaws.com'),
managed_policies=[
iam.ManagedPolicy.from_aws_managed_policy_name(
'AmazonRedshiftAllCommandsFullAccess')
]
)
stream.grant_read_write(role)

selection = map(
lambda subnet: subnet.subnet_id,
vpc.private_subnets
)
subnet_group = redshift.CfnClusterSubnetGroup(
self, "SubnetGroup",
description="Clickstream redshift subnet group",
subnet_ids=list(selection),
tags=[
CfnTag(key="Project", value="Clickstream")
],
)

redshift.CfnCluster(
self, "Redshift",

# required properties
cluster_type="multi-node",
db_name="clickstream",
master_username="awsuser",
master_user_password=redshift_password,
node_type="ra3.xlplus",
number_of_nodes=2,

# the properties below are optional
publicly_accessible=False,
availability_zone_relocation=True,
cluster_subnet_group_name=subnet_group.ref,
encrypted=True,
enhanced_vpc_routing=True,
iam_roles=[
role.role_arn
],
tags=[
CfnTag(key="Project", value="Clickstream")
],

)


class RedshiftServerlessStack(NestedStack):
def __init__(self, scope: Construct, id: str, vpc: ec2.IVpc, stream: kinesis.Stream, **kwargs):
super().__init__(scope, id, **kwargs)

selection = map(
lambda subnet: subnet.subnet_id,
vpc.private_subnets
)
subnet_ids = list(selection)

default_security_group = ec2.SecurityGroup.from_lookup_by_name(
self, id='DefaultSecurityGroup',
security_group_name='default',
vpc=vpc,
)

# Data warehouse
redshift_service_role = iam.Role(
self, id='ClickstreamRedshiftServiceRole',
assumed_by=iam.ServicePrincipal('redshift.amazonaws.com'),
managed_policies=[
iam.ManagedPolicy.from_aws_managed_policy_name(
'AmazonRedshiftAllCommandsFullAccess')
]
)
stream.grant_read(redshift_service_role)

namespace = redshiftserverless.CfnNamespace(
self, id='RedshiftServerlessNamespace',
namespace_name='clickstream-namespace',
default_iam_role_arn=redshift_service_role.role_arn,
iam_roles=[redshift_service_role.role_arn],
log_exports=['userlog', 'connectionlog', 'useractivitylog'],
)
workgroup = redshiftserverless.CfnWorkgroup(
self, id='RedshiftServerlessWorkgroup',
namespace_name=namespace.namespace_name,
workgroup_name='clickstream-workgroup',
base_capacity=32,
subnet_ids=subnet_ids,
security_group_ids=[default_security_group.security_group_id],
publicly_accessible=False,
enhanced_vpc_routing=False,
)
workgroup.add_depends_on(namespace)
12 changes: 0 additions & 12 deletions clickstream/scheduled_refresh/Dockerfile

This file was deleted.

26 changes: 0 additions & 26 deletions clickstream/scheduled_refresh/lambda_function.py

This file was deleted.

2 changes: 0 additions & 2 deletions clickstream/scheduled_refresh/requirements.txt

This file was deleted.

Loading

0 comments on commit ddf93f9

Please sign in to comment.