Skip to content

Commit

Permalink
feat: infra TL scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
cka-y committed Nov 5, 2024
1 parent 3d75359 commit 9e2e617
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 10 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/api-deployer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,18 @@ jobs:
echo "GLOBAL_RATE_LIMIT_REQ_PER_MINUTE=${{ inputs.GLOBAL_RATE_LIMIT_REQ_PER_MINUTE }}" >> $GITHUB_ENV
echo "VALIDATOR_ENDPOINT=${{ inputs.VALIDATOR_ENDPOINT }}" >> $GITHUB_ENV
- name: Load secret from 1Password
uses: 1password/load-secrets-action@v2
with:
export-env: true
env:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
TRANSITLAND_API_KEY: "op://TECH: Keys & Tokens/TansitLand API Key/credentials"

- name: Populate Variables
run: |
scripts/replace-variables.sh -in_file infra/backend.conf.rename_me -out_file infra/backend.conf -variables BUCKET_NAME,OBJECT_PREFIX
scripts/replace-variables.sh -in_file infra/vars.tfvars.rename_me -out_file infra/vars.tfvars -variables PROJECT_ID,REGION,ENVIRONMENT,DEPLOYER_SERVICE_ACCOUNT,FEED_API_IMAGE_VERSION,OAUTH2_CLIENT_ID,OAUTH2_CLIENT_SECRET,GLOBAL_RATE_LIMIT_REQ_PER_MINUTE,ARTIFACT_REPO_NAME,VALIDATOR_ENDPOINT
scripts/replace-variables.sh -in_file infra/vars.tfvars.rename_me -out_file infra/vars.tfvars -variables PROJECT_ID,REGION,ENVIRONMENT,DEPLOYER_SERVICE_ACCOUNT,FEED_API_IMAGE_VERSION,OAUTH2_CLIENT_ID,OAUTH2_CLIENT_SECRET,GLOBAL_RATE_LIMIT_REQ_PER_MINUTE,ARTIFACT_REPO_NAME,VALIDATOR_ENDPOINT,TRANSITLAND_API_KEY
- uses: hashicorp/setup-terraform@v3
with:
Expand Down
1 change: 1 addition & 0 deletions functions-python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ The function configuration file contains the following properties:
- `max_instance_count`: The maximum number of function instances that can be created in response to a load.
- `min_instance_count`: The minimum number of function instances that can be created in response to a load.
- `available_cpu_count`: The number of CPU cores that are available to the function.
- `available_memory`: The amount of memory available to the function.

# Local Setup

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "feed-sync-dispatcher-transitland",
"description": "Feed Sync Dispatcher for Transitland",
"entry_point": "feed_sync_dispatcher_transitland",
"timeout": 540,
"timeout": 3600,
"memory": "512Mi",
"trigger_http": true,
"include_folders": ["database_gen", "helpers"],
Expand All @@ -11,9 +11,10 @@
"key": "FEEDS_DATABASE_URL"
}
],
"ingress_settings": "ALLOW_INTERNAL_AND_GCLB",
"max_instance_request_concurrency": 20,
"max_instance_count": 10,
"ingress_settings": "ALLOW_ALL",
"max_instance_request_concurrency": 1,
"max_instance_count": 1,
"min_instance_count": 0,
"available_cpu": 1
"available_cpu": 1,
"available_memory": "512Mi"
}
6 changes: 3 additions & 3 deletions functions-python/feed_sync_dispatcher_transitland/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
PUBSUB_TOPIC_NAME = os.getenv("PUBSUB_TOPIC_NAME")
PROJECT_ID = os.getenv("PROJECT_ID")
FEEDS_DATABASE_URL = os.getenv("FEEDS_DATABASE_URL")
apikey = os.getenv("TRANSITLAND_API_KEY")
TRANSITLAND_API_KEY = os.getenv("TRANSITLAND_API_KEY")
TRANSITLAND_OPERATOR_URL = os.getenv("TRANSITLAND_OPERATOR_URL")
TRANSITLAND_FEED_URL = os.getenv("TRANSITLAND_FEED_URL")
spec = ["gtfs", "gtfs-rt"]
Expand Down Expand Up @@ -99,9 +99,9 @@ def process_sync(
Process data synchronously to fetch, extract, combine, filter and prepare payloads for publishing
to a queue based on conditions related to the data retrieved from TransitLand API.
"""
feeds_data = self.get_data(TRANSITLAND_FEED_URL, apikey, spec, session)
feeds_data = self.get_data(TRANSITLAND_FEED_URL, TRANSITLAND_API_KEY, spec, session)
operators_data = self.get_data(
TRANSITLAND_OPERATOR_URL, apikey, session=session
TRANSITLAND_OPERATOR_URL, TRANSITLAND_API_KEY, session=session
)

feeds = self.extract_feeds_data(feeds_data)
Expand Down
63 changes: 63 additions & 0 deletions infra/functions-python/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ locals {

function_gbfs_validation_report_config = jsondecode(file("${path.module}/../../functions-python/gbfs_validator/function_config.json"))
function_gbfs_validation_report_zip = "${path.module}/../../functions-python/gbfs_validator/.dist/gbfs_validator.zip"

function_feed_sync_dispatcher_transitland_config = jsondecode(file("${path.module}/../../functions-python/feed_sync_dispatcher_transitland/function_config.json"))
function_feed_sync_dispatcher_transitland_zip = "${path.module}/../../functions-python/feed_sync_dispatcher_transitland/.dist/feed_sync_dispatcher_transitland.zip"
}

locals {
Expand Down Expand Up @@ -106,6 +109,13 @@ resource "google_storage_bucket_object" "gbfs_validation_report_zip" {
source = local.function_gbfs_validation_report_zip
}

# 6. Feed sync dispatcher transitland
resource "google_storage_bucket_object" "feed_sync_dispatcher_transitland_zip" {
bucket = google_storage_bucket.functions_bucket.name
name = "feed-sync-dispatcher-transitland-${substr(filebase64sha256(local.function_feed_sync_dispatcher_transitland_zip), 0, 10)}.zip"
source = local.function_feed_sync_dispatcher_transitland_zip
}

# Secrets access
resource "google_secret_manager_secret_iam_member" "secret_iam_member" {
for_each = local.unique_secret_keys
Expand Down Expand Up @@ -155,6 +165,10 @@ resource "google_cloudfunctions2_function" "tokens" {
}

# 2.1 functions/extract_location cloud function
# 2.1.1 Create Pub/Sub topic
resource "google_pubsub_topic" "transitland_feeds_dispatch" {
name = "transitland-feeds-dispatch"
}
resource "google_cloudfunctions2_function" "extract_location" {
name = local.function_extract_location_config.name
description = local.function_extract_location_config.description
Expand Down Expand Up @@ -520,6 +534,55 @@ resource "google_cloudfunctions2_function" "gbfs_validator_pubsub" {
}
}

# 6. functions/feed_sync_dispatcher_transitland cloud function
resource "google_cloudfunctions2_function" "feed_sync_dispatcher_transitland" {
name = "${local.function_feed_sync_dispatcher_transitland_config.name}-batch"
description = local.function_feed_sync_dispatcher_transitland_config.description
location = var.gcp_region
depends_on = [google_project_iam_member.event-receiving, google_secret_manager_secret_iam_member.secret_iam_member]

build_config {
runtime = var.python_runtime
entry_point = local.function_feed_sync_dispatcher_transitland_config.entry_point
source {
storage_source {
bucket = google_storage_bucket.functions_bucket.name
object = google_storage_bucket_object.feed_sync_dispatcher_transitland_zip.name
}
}
}
service_config {
environment_variables = {
PROJECT_ID = var.project_id
PYTHONNODEBUGRANGES = 0
PUBSUB_TOPIC_NAME = google_pubsub_topic.transitland_feeds_dispatch.name
TRANSITLAND_API_KEY=var.transitland_api_key
TRANSITLAND_OPERATOR_URL="https://transit.land/api/v2/rest/operators"
TRANSITLAND_FEED_URL="https://transit.land/api/v2/rest/feeds"
}
available_memory = local.function_feed_sync_dispatcher_transitland_config.available_memory
timeout_seconds = local.function_feed_sync_dispatcher_transitland_config.timeout
available_cpu = local.function_feed_sync_dispatcher_transitland_config.available_cpu
max_instance_request_concurrency = local.function_feed_sync_dispatcher_transitland_config.max_instance_request_concurrency
max_instance_count = local.function_feed_sync_dispatcher_transitland_config.max_instance_count
min_instance_count = local.function_feed_sync_dispatcher_transitland_config.min_instance_count
service_account_email = google_service_account.functions_service_account.email
ingress_settings = local.function_feed_sync_dispatcher_transitland_config.ingress_settings
vpc_connector = data.google_vpc_access_connector.vpc_connector.id
vpc_connector_egress_settings = "PRIVATE_RANGES_ONLY"
dynamic "secret_environment_variables" {
for_each = local.function_extract_location_config.secret_environment_variables
content {
key = secret_environment_variables.value["key"]
project_id = var.project_id
secret = "${upper(var.environment)}_${secret_environment_variables.value["key"]}"
version = "latest"
}
}
}
}


# IAM entry for all users to invoke the function
resource "google_cloudfunctions2_function_iam_member" "tokens_invoker" {
project = var.project_id
Expand Down
5 changes: 5 additions & 0 deletions infra/functions-python/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,8 @@ variable "gbfs_scheduler_schedule" {
description = "Schedule for the GBFS scheduler job"
default = "0 0 1 * *" # every month on the first day at 00:00
}

variable "transitland_api_key" {
type = string
description = "Transitland API key"
}
3 changes: 3 additions & 0 deletions infra/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,14 @@ module "feed-api" {
source = "./feed-api"
}


module "functions-python" {
source = "./functions-python"
project_id = var.project_id
gcp_region = var.gcp_region
environment = var.environment
transitland_api_key = var.transitland_api_key
web_validator_url = var.validator_endpoint
}

module "workflows" {
Expand Down
4 changes: 4 additions & 0 deletions infra/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,8 @@ variable "artifact_repo_name" {
variable "validator_endpoint" {
type = string
description = "URL of the validator endpoint"
}

variable "transitland_api_key" {
type = string
}
3 changes: 2 additions & 1 deletion infra/vars.tfvars.rename_me
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ oauth2_client_id = {{OAUTH2_CLIENT_ID}}
oauth2_client_secret = {{OAUTH2_CLIENT_SECRET}}
global_rate_limit_req_per_minute = {{GLOBAL_RATE_LIMIT_REQ_PER_MINUTE}}

validator_endpoint = {{VALIDATOR_ENDPOINT}}
validator_endpoint = {{VALIDATOR_ENDPOINT}}
transitland_api_key = {{TRANSITLAND_API_KEY}}

0 comments on commit 9e2e617

Please sign in to comment.