From 9e2e617de94d5a0e112e6f994512635c67834975 Mon Sep 17 00:00:00 2001 From: cka-y Date: Tue, 5 Nov 2024 15:33:46 -0500 Subject: [PATCH] feat: infra TL scraping --- .github/workflows/api-deployer.yml | 10 ++- functions-python/README.md | 1 + .../function_config.json | 11 ++-- .../src/main.py | 6 +- infra/functions-python/main.tf | 63 +++++++++++++++++++ infra/functions-python/vars.tf | 5 ++ infra/main.tf | 3 + infra/vars.tf | 4 ++ infra/vars.tfvars.rename_me | 3 +- 9 files changed, 96 insertions(+), 10 deletions(-) diff --git a/.github/workflows/api-deployer.yml b/.github/workflows/api-deployer.yml index f533b8605..110e4be39 100644 --- a/.github/workflows/api-deployer.yml +++ b/.github/workflows/api-deployer.yml @@ -280,10 +280,18 @@ jobs: echo "GLOBAL_RATE_LIMIT_REQ_PER_MINUTE=${{ inputs.GLOBAL_RATE_LIMIT_REQ_PER_MINUTE }}" >> $GITHUB_ENV echo "VALIDATOR_ENDPOINT=${{ inputs.VALIDATOR_ENDPOINT }}" >> $GITHUB_ENV + - name: Load secret from 1Password + uses: 1password/load-secrets-action@v2 + with: + export-env: true + env: + OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} + TRANSITLAND_API_KEY: "op://TECH: Keys & Tokens/TansitLand API Key/credentials" + - name: Populate Variables run: | scripts/replace-variables.sh -in_file infra/backend.conf.rename_me -out_file infra/backend.conf -variables BUCKET_NAME,OBJECT_PREFIX - scripts/replace-variables.sh -in_file infra/vars.tfvars.rename_me -out_file infra/vars.tfvars -variables PROJECT_ID,REGION,ENVIRONMENT,DEPLOYER_SERVICE_ACCOUNT,FEED_API_IMAGE_VERSION,OAUTH2_CLIENT_ID,OAUTH2_CLIENT_SECRET,GLOBAL_RATE_LIMIT_REQ_PER_MINUTE,ARTIFACT_REPO_NAME,VALIDATOR_ENDPOINT + scripts/replace-variables.sh -in_file infra/vars.tfvars.rename_me -out_file infra/vars.tfvars -variables PROJECT_ID,REGION,ENVIRONMENT,DEPLOYER_SERVICE_ACCOUNT,FEED_API_IMAGE_VERSION,OAUTH2_CLIENT_ID,OAUTH2_CLIENT_SECRET,GLOBAL_RATE_LIMIT_REQ_PER_MINUTE,ARTIFACT_REPO_NAME,VALIDATOR_ENDPOINT,TRANSITLAND_API_KEY - uses: hashicorp/setup-terraform@v3 with: diff --git a/functions-python/README.md b/functions-python/README.md index c94f41a71..a95eb27ff 100644 --- a/functions-python/README.md +++ b/functions-python/README.md @@ -31,6 +31,7 @@ The function configuration file contains the following properties: - `max_instance_count`: The maximum number of function instances that can be created in response to a load. - `min_instance_count`: The minimum number of function instances that can be created in response to a load. - `available_cpu_count`: The number of CPU cores that are available to the function. +- `available_memory`: The amount of memory available to the function. # Local Setup diff --git a/functions-python/feed_sync_dispatcher_transitland/function_config.json b/functions-python/feed_sync_dispatcher_transitland/function_config.json index 99554a359..fcf05749f 100644 --- a/functions-python/feed_sync_dispatcher_transitland/function_config.json +++ b/functions-python/feed_sync_dispatcher_transitland/function_config.json @@ -2,7 +2,7 @@ "name": "feed-sync-dispatcher-transitland", "description": "Feed Sync Dispatcher for Transitland", "entry_point": "feed_sync_dispatcher_transitland", - "timeout": 540, + "timeout": 3600, "memory": "512Mi", "trigger_http": true, "include_folders": ["database_gen", "helpers"], @@ -11,9 +11,10 @@ "key": "FEEDS_DATABASE_URL" } ], - "ingress_settings": "ALLOW_INTERNAL_AND_GCLB", - "max_instance_request_concurrency": 20, - "max_instance_count": 10, + "ingress_settings": "ALLOW_ALL", + "max_instance_request_concurrency": 1, + "max_instance_count": 1, "min_instance_count": 0, - "available_cpu": 1 + "available_cpu": 1, + "available_memory": "512Mi" } diff --git a/functions-python/feed_sync_dispatcher_transitland/src/main.py b/functions-python/feed_sync_dispatcher_transitland/src/main.py index 90592f725..efa2ebf54 100644 --- a/functions-python/feed_sync_dispatcher_transitland/src/main.py +++ b/functions-python/feed_sync_dispatcher_transitland/src/main.py @@ -43,7 +43,7 @@ PUBSUB_TOPIC_NAME = os.getenv("PUBSUB_TOPIC_NAME") PROJECT_ID = os.getenv("PROJECT_ID") FEEDS_DATABASE_URL = os.getenv("FEEDS_DATABASE_URL") -apikey = os.getenv("TRANSITLAND_API_KEY") +TRANSITLAND_API_KEY = os.getenv("TRANSITLAND_API_KEY") TRANSITLAND_OPERATOR_URL = os.getenv("TRANSITLAND_OPERATOR_URL") TRANSITLAND_FEED_URL = os.getenv("TRANSITLAND_FEED_URL") spec = ["gtfs", "gtfs-rt"] @@ -99,9 +99,9 @@ def process_sync( Process data synchronously to fetch, extract, combine, filter and prepare payloads for publishing to a queue based on conditions related to the data retrieved from TransitLand API. """ - feeds_data = self.get_data(TRANSITLAND_FEED_URL, apikey, spec, session) + feeds_data = self.get_data(TRANSITLAND_FEED_URL, TRANSITLAND_API_KEY, spec, session) operators_data = self.get_data( - TRANSITLAND_OPERATOR_URL, apikey, session=session + TRANSITLAND_OPERATOR_URL, TRANSITLAND_API_KEY, session=session ) feeds = self.extract_feeds_data(feeds_data) diff --git a/infra/functions-python/main.tf b/infra/functions-python/main.tf index ddf768c82..8201ca1ea 100644 --- a/infra/functions-python/main.tf +++ b/infra/functions-python/main.tf @@ -33,6 +33,9 @@ locals { function_gbfs_validation_report_config = jsondecode(file("${path.module}/../../functions-python/gbfs_validator/function_config.json")) function_gbfs_validation_report_zip = "${path.module}/../../functions-python/gbfs_validator/.dist/gbfs_validator.zip" + + function_feed_sync_dispatcher_transitland_config = jsondecode(file("${path.module}/../../functions-python/feed_sync_dispatcher_transitland/function_config.json")) + function_feed_sync_dispatcher_transitland_zip = "${path.module}/../../functions-python/feed_sync_dispatcher_transitland/.dist/feed_sync_dispatcher_transitland.zip" } locals { @@ -106,6 +109,13 @@ resource "google_storage_bucket_object" "gbfs_validation_report_zip" { source = local.function_gbfs_validation_report_zip } +# 6. Feed sync dispatcher transitland +resource "google_storage_bucket_object" "feed_sync_dispatcher_transitland_zip" { + bucket = google_storage_bucket.functions_bucket.name + name = "feed-sync-dispatcher-transitland-${substr(filebase64sha256(local.function_feed_sync_dispatcher_transitland_zip), 0, 10)}.zip" + source = local.function_feed_sync_dispatcher_transitland_zip +} + # Secrets access resource "google_secret_manager_secret_iam_member" "secret_iam_member" { for_each = local.unique_secret_keys @@ -155,6 +165,10 @@ resource "google_cloudfunctions2_function" "tokens" { } # 2.1 functions/extract_location cloud function +# 2.1.1 Create Pub/Sub topic +resource "google_pubsub_topic" "transitland_feeds_dispatch" { + name = "transitland-feeds-dispatch" +} resource "google_cloudfunctions2_function" "extract_location" { name = local.function_extract_location_config.name description = local.function_extract_location_config.description @@ -520,6 +534,55 @@ resource "google_cloudfunctions2_function" "gbfs_validator_pubsub" { } } +# 6. functions/feed_sync_dispatcher_transitland cloud function +resource "google_cloudfunctions2_function" "feed_sync_dispatcher_transitland" { + name = "${local.function_feed_sync_dispatcher_transitland_config.name}-batch" + description = local.function_feed_sync_dispatcher_transitland_config.description + location = var.gcp_region + depends_on = [google_project_iam_member.event-receiving, google_secret_manager_secret_iam_member.secret_iam_member] + + build_config { + runtime = var.python_runtime + entry_point = local.function_feed_sync_dispatcher_transitland_config.entry_point + source { + storage_source { + bucket = google_storage_bucket.functions_bucket.name + object = google_storage_bucket_object.feed_sync_dispatcher_transitland_zip.name + } + } + } + service_config { + environment_variables = { + PROJECT_ID = var.project_id + PYTHONNODEBUGRANGES = 0 + PUBSUB_TOPIC_NAME = google_pubsub_topic.transitland_feeds_dispatch.name + TRANSITLAND_API_KEY=var.transitland_api_key + TRANSITLAND_OPERATOR_URL="https://transit.land/api/v2/rest/operators" + TRANSITLAND_FEED_URL="https://transit.land/api/v2/rest/feeds" + } + available_memory = local.function_feed_sync_dispatcher_transitland_config.available_memory + timeout_seconds = local.function_feed_sync_dispatcher_transitland_config.timeout + available_cpu = local.function_feed_sync_dispatcher_transitland_config.available_cpu + max_instance_request_concurrency = local.function_feed_sync_dispatcher_transitland_config.max_instance_request_concurrency + max_instance_count = local.function_feed_sync_dispatcher_transitland_config.max_instance_count + min_instance_count = local.function_feed_sync_dispatcher_transitland_config.min_instance_count + service_account_email = google_service_account.functions_service_account.email + ingress_settings = local.function_feed_sync_dispatcher_transitland_config.ingress_settings + vpc_connector = data.google_vpc_access_connector.vpc_connector.id + vpc_connector_egress_settings = "PRIVATE_RANGES_ONLY" + dynamic "secret_environment_variables" { + for_each = local.function_extract_location_config.secret_environment_variables + content { + key = secret_environment_variables.value["key"] + project_id = var.project_id + secret = "${upper(var.environment)}_${secret_environment_variables.value["key"]}" + version = "latest" + } + } + } +} + + # IAM entry for all users to invoke the function resource "google_cloudfunctions2_function_iam_member" "tokens_invoker" { project = var.project_id diff --git a/infra/functions-python/vars.tf b/infra/functions-python/vars.tf index 1b6ea4f0f..12b87c877 100644 --- a/infra/functions-python/vars.tf +++ b/infra/functions-python/vars.tf @@ -64,3 +64,8 @@ variable "gbfs_scheduler_schedule" { description = "Schedule for the GBFS scheduler job" default = "0 0 1 * *" # every month on the first day at 00:00 } + +variable "transitland_api_key" { + type = string + description = "Transitland API key" +} diff --git a/infra/main.tf b/infra/main.tf index 45341c4b7..756b142df 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -99,11 +99,14 @@ module "feed-api" { source = "./feed-api" } + module "functions-python" { source = "./functions-python" project_id = var.project_id gcp_region = var.gcp_region environment = var.environment + transitland_api_key = var.transitland_api_key + web_validator_url = var.validator_endpoint } module "workflows" { diff --git a/infra/vars.tf b/infra/vars.tf index 6140ae67d..6dc0ebee1 100644 --- a/infra/vars.tf +++ b/infra/vars.tf @@ -62,4 +62,8 @@ variable "artifact_repo_name" { variable "validator_endpoint" { type = string description = "URL of the validator endpoint" +} + +variable "transitland_api_key" { + type = string } \ No newline at end of file diff --git a/infra/vars.tfvars.rename_me b/infra/vars.tfvars.rename_me index d1aeac5ef..6dc3bd0b5 100644 --- a/infra/vars.tfvars.rename_me +++ b/infra/vars.tfvars.rename_me @@ -16,4 +16,5 @@ oauth2_client_id = {{OAUTH2_CLIENT_ID}} oauth2_client_secret = {{OAUTH2_CLIENT_SECRET}} global_rate_limit_req_per_minute = {{GLOBAL_RATE_LIMIT_REQ_PER_MINUTE}} -validator_endpoint = {{VALIDATOR_ENDPOINT}} \ No newline at end of file +validator_endpoint = {{VALIDATOR_ENDPOINT}} +transitland_api_key = {{TRANSITLAND_API_KEY}} \ No newline at end of file