Skip to content

Commit

Permalink
chore: stack name
Browse files Browse the repository at this point in the history
  • Loading branch information
ADGEfficiency committed Jun 3, 2024
1 parent 1b67e8e commit 0340d88
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 28 deletions.
13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ DATA_HOME = ./data
# --------------------------------------
.PHONY: deploy

deploy: setup seed regen pushs3 zip deploy-flyio
deploy: setup pulls3 pulls3-urls seed regen crawl pushs3 zip deploy-flyio

# --------------------------------------
# SETUP
Expand All @@ -24,7 +24,7 @@ setup:
# --------------------------------------
.PHONY: crawl

crawl: setup pulls3-urls
crawl:
cat newspapers.json | jq '.[].name' | xargs -n 1 -I {} scrapy crawl {} -o $(DATA_HOME)/articles/{}.jsonl -L DEBUG

# --------------------------------------
Expand Down Expand Up @@ -60,16 +60,19 @@ regen: seed
# --------------------------------------
.PHONY: pulls3 pulls3-urls pushs3

S3_BUCKET=$(shell aws cloudformation describe-stacks --stack-name ClimateNewsDB --region ap-southeast-2 --query 'Stacks[0].Outputs[?OutputKey==`BucketNameOutput`].OutputValue' --output text)
S3_BUCKET=$(shell aws cloudformation describe-stacks --stack-name ClimateNewsDB --region ap-southeast-2 --query 'Stacks[0].Outputs[?OutputKey==`UnversionedBucket`].OutputValue' --output text)
S3_DIR=s3://$(S3_BUCKET)

VERISONED_S3_BUCKET=$(shell aws cloudformation describe-stacks --stack-name ClimateNewsDB --region ap-southeast-2 --query 'Stacks[0].Outputs[?OutputKey==`VersionedBucket`].OutputValue' --output text)
VERISONED_S3_DIR=s3://$(VERISONED_S3_BUCKET)

pulls3:
aws --region ap-southeast-2 s3 sync $(S3_DIR) $(DATA_HOME) --exclude 'html/*'

pulls3-urls:
echo "$(shell wc -l $(DATA_HOME)/urls.jsonl) urls"
aws --region ap-southeast-2 s3 cp $(S3_DIR)/urls.jsonl $(DATA_HOME)/urls.jsonl
echo "$(shell wc -l $(DATA_HOME)/urls.jsonl) urls"
aws --region ap-southeast-2 s3 cp $(VERISONED_S3_DIR)/urls.jsonl $(DATA_HOME)/urls.jsonl
echo "$$(wc -l $(DATA_HOME)/urls.jsonl) urls"

pushs3:
aws s3 sync $(DATA_HOME) $(S3_DIR)
Expand Down
34 changes: 15 additions & 19 deletions infra/stacks.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
import pandas as pd
from aws_cdk import (
CfnOutput,
Duration,
Stack,
aws_events,
aws_events_targets,
aws_iam,
aws_lambda,
aws_s3,
)
from aws_cdk import (CfnOutput, Duration, Stack, aws_events,
aws_events_targets, aws_iam, aws_lambda, aws_s3)
from aws_cdk.aws_ecr_assets import Platform
from constructs import Construct

Expand All @@ -17,9 +9,20 @@


class Search(Stack):
def __init__(self, scope: Construct, id: str, **kwargs) -> None:
def __init__(self, scope: Construct, id: str, **kwargs: dict) -> None:
super().__init__(scope, id, **kwargs)

unversioned_bucket = aws_s3.Bucket(self, "Unversioned")
CfnOutput(self, "UnversionedBucket", value=unversioned_bucket.bucket_name, export_name="UnversionedBucket")

versioned_bucket = aws_s3.Bucket(
self,
"Versioned",
versioned=True,
lifecycle_rules=[aws_s3.LifecycleRule(noncurrent_version_expiration=Duration.days(30))]
)
CfnOutput(self, "VersionedBucket", value=versioned_bucket.bucket_name, export_name="VersionedBucket")

lambda_role = aws_iam.Role(
self,
"LambdaRole",
Expand All @@ -42,11 +45,6 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
},
)

bucket = aws_s3.Bucket(self, "BucketName")
CfnOutput(
self, "BucketNameOutput", value=bucket.bucket_name, export_name="BucketName"
)

search_function = aws_lambda.Function(
self,
"SearchLambda",
Expand Down Expand Up @@ -79,11 +77,9 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
lambda_start_times = pd.date_range(
"2021-01-01T00:00:00", freq=f"{timeout}T", periods=len(newspapers)
)

max_newspapers_per_day = 24 * 60 / timeout
assert len(newspapers) < max_newspapers_per_day
print(f"scheduling newspapers until {lambda_start_times[-1]}")

for start_time, newspaper in zip(lambda_start_times, newspapers):
print(f"scheduling {start_time} {newspaper.name}")
aws_events.Rule(
Expand All @@ -100,7 +96,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
search_function,
event=aws_events.RuleTargetInput.from_object(
SearchLambdaEvent(
s3_bucket=bucket.bucket_name,
s3_bucket=versioned_bucket.bucket_name,
newspaper_name=newspaper.name,
).dict()
),
Expand Down
6 changes: 3 additions & 3 deletions scripts/run-search-lambdas.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import random
import typing

import boto3
Expand Down Expand Up @@ -28,13 +29,12 @@ def get_exported_bucket_name(export_name: str) -> typing.Optional[str]:


if __name__ == "__main__":
bucket_name = get_exported_bucket_name("BucketName")
bucket_name = get_exported_bucket_name("VersionedBucket")
function_name = get_lambda_function_name("SearchLambda")

assert bucket_name is not None
assert function_name is not None
newspapers = read_newspapers_json()
import random
random.shuffle(newspapers)

print(
Expand All @@ -48,7 +48,7 @@ def get_exported_bucket_name(export_name: str) -> typing.Optional[str]:
FunctionName=function_name,
InvocationType="RequestResponse",
Payload=SearchLambdaEvent(
s3_bucket=bucket_name, newspaper_name=newspaper.name, num=5
s3_bucket=bucket_name, newspaper_name=newspaper.name, num=50
).json(),
)
response_payload = json.loads(response["Payload"].read())
Expand Down
1 change: 0 additions & 1 deletion stats.json

This file was deleted.

0 comments on commit 0340d88

Please sign in to comment.