From 17a6909cd5393e9d85057a66d40c164b800c7a19 Mon Sep 17 00:00:00 2001 From: Charlie Wang Date: Wed, 10 Jul 2024 22:27:00 +0000 Subject: [PATCH 1/7] remove dependency to config.yaml from feature-store --- config/config.yaml.tftpl | 226 +------ infrastructure/terraform/main.tf | 2 +- .../feature-store/bigquery-datasets.tf | 366 ++--------- .../feature-store/bigquery-procedures.tf | 285 ++++----- .../modules/feature-store/bigquery-tables.tf | 569 ++++++------------ .../terraform/modules/feature-store/main.tf | 28 +- .../modules/feature-store/variables.tf | 10 +- .../user_predictions_aggregation.sqlx | 59 ++ .../aggregated_predictions_per_user.json | 58 ++ 9 files changed, 522 insertions(+), 1081 deletions(-) create mode 100644 sql/procedure/user_predictions_aggregation.sqlx create mode 100644 sql/schema/table/aggregated_predictions_per_user.json diff --git a/config/config.yaml.tftpl b/config/config.yaml.tftpl index d8c5766e..cb71088c 100644 --- a/config/config.yaml.tftpl +++ b/config/config.yaml.tftpl @@ -1485,227 +1485,7 @@ vertex_ai: # This block contains configuration parameters for the BigQuery Datasets, Tables, Queries and Stored Procedures. bigquery: project_id: "${project_id}" - region: "${location}" - dataset: - # Dataset for the feature engineering tables and procedures. - feature_store: - project_id: "${project_id}" - name: "feature_store" - location: "${location}" - collation: "und:ci" - is_case_insensitive: TRUE - description: "Feature Store dataset for Marketing behavioural modeling" - friendly_name: "Feature Store" - max_time_travel_hours: 168 - default_partition_expiration_days: 365 - default_table_expiration_days: 365 - # Dataset for the purchase propensity use case. - purchase_propensity: - name: "purchase_propensity" - location: "${location}" - project_id: "${project_id}" - collation: "und:ci" - is_case_insensitive: TRUE - description: "Purchase Propensity Use Case dataset for Marketing behavioural modeling" - friendly_name: "Purchase Propensity Dataset" - max_time_travel_hours: 168 - default_partition_expiration_days: 365 - default_table_expiration_days: 365 - # Dataset for the churn propensity use case. - churn_propensity: - name: "churn_propensity" - location: "${location}" - project_id: "${project_id}" - collation: "und:ci" - is_case_insensitive: TRUE - description: "Churn Propensity Use Case dataset for Marketing behavioural modeling" - friendly_name: "Churn Propensity Dataset" - max_time_travel_hours: 168 - default_partition_expiration_days: 365 - default_table_expiration_days: 365 - # Dataset for the customer lifetime value use case. - customer_lifetime_value: - project_id: "${project_id}" - name: "customer_lifetime_value" - location: "${location}" - collation: "und:ci" - is_case_insensitive: TRUE - description: "Customer Lifetime Value Use Case dataset for Marketing behavioural modeling" - friendly_name: "Customer Lifetime Value Dataset" - max_time_travel_hours: 168 - default_partition_expiration_days: 365 - default_table_expiration_days: 365 - # Dataset for the demographic based audience segmentation use case. - audience_segmentation: - project_id: "${project_id}" - name: "audience_segmentation" - location: "${location}" - collation: "und:ci" - is_case_insensitive: TRUE - description: "Audience Segmentation Use Case dataset for Marketing behavioural modeling" - friendly_name: "Audience Segmentation Dataset" - max_time_travel_hours: 168 - default_partition_expiration_days: 365 - default_table_expiration_days: 365 - # Dataset for the auto audience segmentation (Interests Based Audience Segmentation) use case. - auto_audience_segmentation: - project_id: "${project_id}" - name: "auto_audience_segmentation" - location: "${location}" - collation: "und:ci" - is_case_insensitive: TRUE - description: "Auto Audience Segmentation Use Case dataset for Marketing behavioural modeling" - friendly_name: "Auto Audience Segmentation Dataset" - max_time_travel_hours: 48 - default_partition_expiration_days: 365 - default_table_expiration_days: 365 - # Dataset for the aggregated Value Based Bidding (VBB) use case. - aggregated_vbb: - project_id: "${project_id}" - name: "aggregated_vbb" - location: "${location}" - collation: "und:ci" - is_case_insensitive: TRUE - description: "Aggregated VBB Use Case dataset for Marketing behavioural modeling" - friendly_name: "Aggregated VBB Dataset" - max_time_travel_hours: 48 - default_partition_expiration_days: 365 - default_table_expiration_days: 365 - # Dataset for the aggregated predictions tables and procedures. - aggregated_predictions: - project_id: "${project_id}" - name: "aggregated_predictions" - location: "${location}" - description: "Dataset with aggregated prediction results from multiple use cases" - friendly_name: "Aggregated Predictions Dataset" - # Dataset for the gemini insights tables and procedures. - gemini_insights: - project_id: "${project_id}" - name: "gemini_insights" - location: "${location}" - description: "Dataset with gemini_insights results from multiple use cases" - friendly_name: "Gemini Insights Dataset" - max_time_travel_hours: 168 - table: - # Table containing the feature engineered dataset that will be used for the Audience Segmentation prediction pipeline. - audience_segmentation_inference_preparation: - project_id: "${project_id}" - dataset: "audience_segmentation" - table_name: "audience_segmentation_inference_preparation" - location: "${location}" - table_description: "Audience Segmentation Inference Preparation table to be used for Model Prediction" - # Table containing the feature engineered dataset that will be used for the Customer Lifetime Value prediction pipeline. - customer_lifetime_value_inference_preparation: - project_id: "${project_id}" - dataset: "customer_lifetime_value" - table_name: "customer_lifetime_value_inference_preparation" - location: "${location}" - table_description: "Customer Lifetime Value Inference Preparation table to be used for Model Prediction" - # Table containing the feature engineered labels that will be used for the Customer Lifetime Value training pipeline. - customer_lifetime_value_label: - project_id: "${project_id}" - dataset: "customer_lifetime_value" - table_name: "customer_lifetime_value_label" - location: "${location}" - table_description: "Customer Lifetime Value Label table to be used for Model Traning" - # Table containing the feature engineered dataset that will be used for the Purchase Propensity prediction pipeline. - purchase_propensity_inference_preparation: - project_id: "${project_id}" - dataset: "purchase_propensity" - table_name: "purchase_propensity_inference_preparation" - location: "${location}" - table_description: "Purchase Propensity Inference Preparation table to be used for Model Prediction" - # Table containing the feature engineered dataset that will be used for the Churn Propensity prediction pipeline. - churn_propensity_inference_preparation: - project_id: "${project_id}" - dataset: "churn_propensity" - table_name: "churn_propensity_inference_preparation" - location: "${location}" - table_description: "Purchase Propensity Inference Preparation table to be used for Model Prediction" - # Table containing the feature engineered labels that will be used for the Purchase Propensity training pipeline. - purchase_propensity_label: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "purchase_propensity_label" - location: "${location}" - table_description: "Purchase Propensity Label table to be used for Model Prediction" - # Table containing the feature engineered labels that will be used for the Purchase Propensity training pipeline. - churn_propensity_label: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "churn_propensity_label" - location: "${location}" - table_description: "Churn Propensity Label table to be used for Model Prediction" - # Table containing the feature engineered dimensions that will be used for the Purchase Propensity training and inference pipeline. - user_dimensions: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "user_dimensions" - location: "${location}" - table_description: "User Dimensions table as part of the Feature Store for the Purchase Propensity use case" - # Table containing the feature engineered dimensions that will be used for the Customer Lifetime Value training and inference pipeline. - user_lifetime_dimensions: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "user_lifetime_dimensions" - location: "${location}" - table_description: "User Lifetime Dimensions table as part of the Feature Store for the Customer Lifetime Value use case" - # Table containing the feature engineered lookback rolling window metrics that will be used for the Audience Segmentation training and inference pipeline. - user_lookback_metrics: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "user_lookback_metrics" - location: "${location}" - table_description: "User Lookback Metrics table as part of the Feature Store" - # Table containing the feature engineered rolling window metrics that will be used for the Customer Lifetime Value training and inference pipeline. - user_rolling_window_lifetime_metrics: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "user_rolling_window_lifetime_metrics" - location: "${location}" - table_description: "User Rolling Window Lifetime Metrics table as part of the Feature Store for the Customer Lifetime Value use case" - # Table containing the featured engineered rolling window metrics that will be used for the Purchase Propensity training and inference pipeline. - user_rolling_window_metrics: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "user_rolling_window_metrics" - location: "${location}" - table_description: "User Rolling Window Metrics table as part of the Feature Store for Purchase Propensity use case" - # Table containing the feature engineered all users metrics that will be used for the Customer Lifetime Value training and inference pipeline. - user_scoped_lifetime_metrics: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "user_scoped_lifetime_metrics" - location: "${location}" - table_description: "User Scoped Lifetime Metrics table as part of the Feature Store for the Customer Lifetime Value use case" - # Table containing the feature engineered all users metrics that will be used for the Purchase Propensity training and inference pipeline. - user_scoped_metrics: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "user_scoped_metrics" - location: "${location}" - table_description: "User Scoped Metrics table as part of the Feature Store for the Purchase Propensity use case" - # Table containing the feature engineered all users metrics that will be used for the Audience Segmentation training and inference pipeline. - user_scoped_segmentation_metrics: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "user_scoped_segmentation_metrics" - location: "${location}" - table_description: "User Scoped Segmentation Metrics table as part of the Feature Store for Audience Segmentation use case" - # Table containing the feature engineered user dimensions that will be used for the Audience Segmentation training and inference pipeline. - user_segmentation_dimensions: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "user_segmentation_dimensions" - location: "${location}" - table_description: "User Segmentation Dimensions table as part of the Feature Store for Audience Segmentation use case" - # Table containing the feature engineered user aggregated sessions and events metrics that will be used for the Purchase Propensity training and inference pipeline - user_session_event_aggregated_metrics: - project_id: "${project_id}" - dataset: "feature_store" - table_name: "user_session_event_aggregated_metrics" - location: "${location}" - table_description: "User Session Event Aggregated Metrics table as part of the Feature Store" + region: "${location}" query: # This is a query template to be used by the Activation application, so there is no configuration to be applied. audience_segmentation_query_template: @@ -2492,6 +2272,10 @@ bigquery: churn_propensity_dataset: "churn_propensity" audience_segmentation_dataset: "audience_segmentation" auto_audience_segmentation_dataset: "auto_audience_segmentation" + user_predictions_aggregation: + project_id: "${project_id}" + dataset_id: "aggregated_predictions" + table_id: "user_predictions" user_behaviour_revenue_insights: project_id: "${project_id}" dataset: "gemini_insights" diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 877caad6..9f6b3b67 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -368,7 +368,6 @@ module "data_store" { module "feature_store" { # The source is the path to the feature store module. source = "./modules/feature-store" - config_file_path = local_file.feature_store_configuration.id != "" ? local_file.feature_store_configuration.filename : "" enabled = var.deploy_feature_store # the count determines if the feature store is created or not. # If the count is 1, the feature store is created. @@ -379,6 +378,7 @@ module "feature_store" { # The region is the region in which the feature store is created. # This is set to the default region in the terraform.tfvars file. region = var.google_default_region + data_location = var.destination_data_location # The sql_dir_input is the path to the sql directory. # This is set to the path to the sql directory in the feature store module. sql_dir_input = null_resource.generate_sql_queries.id != "" ? "${local.source_root_dir}/sql" : "" diff --git a/infrastructure/terraform/modules/feature-store/bigquery-datasets.tf b/infrastructure/terraform/modules/feature-store/bigquery-datasets.tf index 6a6c6cfe..57e7ad38 100644 --- a/infrastructure/terraform/modules/feature-store/bigquery-datasets.tf +++ b/infrastructure/terraform/modules/feature-store/bigquery-datasets.tf @@ -13,311 +13,77 @@ # limitations under the License. # This resource creates a BigQuery dataset called `feature_store`. -resource "google_bigquery_dataset" "feature_store" { - dataset_id = local.config_bigquery.dataset.feature_store.name - friendly_name = local.config_bigquery.dataset.feature_store.friendly_name - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : var.project_id - description = local.config_bigquery.dataset.feature_store.description - location = local.config_bigquery.dataset.feature_store.location - # The max_time_travel_hours attribute specifies the maximum number of hours that data in the dataset can be accessed using time travel queries. - # In this case, the maximum time travel hours is set to the value of the local file config.yaml section bigquery.dataset.feature_store.max_time_travel_hours configuration. - max_time_travel_hours = local.config_bigquery.dataset.feature_store.max_time_travel_hours - # The delete_contents_on_destroy attribute specifies whether the contents of the dataset should be deleted when the dataset is destroyed. - # In this case, the delete_contents_on_destroy attribute is set to false, which means that the contents of the dataset will not be deleted when the dataset is destroyed. - delete_contents_on_destroy = false - - labels = { - version = "prod" - } - - # The lifecycle block allows you to configure the lifecycle of the dataset. - # In this case, the ignore_changes attribute is set to all, which means that - # Terraform will ignore any changes to the dataset and will not attempt to update the dataset. - lifecycle { - ignore_changes = all - } -} - -# This resource creates a BigQuery dataset called `purchase_propensity`. -resource "google_bigquery_dataset" "purchase_propensity" { - dataset_id = local.config_bigquery.dataset.purchase_propensity.name - friendly_name = local.config_bigquery.dataset.purchase_propensity.friendly_name - project = null_resource.check_bigquery_api.id != "" ? local.purchase_propensity_project_id : local.feature_store_project_id - description = local.config_bigquery.dataset.purchase_propensity.description - location = local.config_bigquery.dataset.purchase_propensity.location - # The max_time_travel_hours attribute specifies the maximum number of hours that data in the dataset can be accessed using time travel queries. - # In this case, the maximum time travel hours is set to the value of the local file config.yaml section bigquery.dataset.feature_store.max_time_travel_hours configuration. - max_time_travel_hours = local.config_bigquery.dataset.purchase_propensity.max_time_travel_hours - # The delete_contents_on_destroy attribute specifies whether the contents of the dataset should be deleted when the dataset is destroyed. - # In this case, the delete_contents_on_destroy attribute is set to false, which means that the contents of the dataset will not be deleted when the dataset is destroyed. - delete_contents_on_destroy = false - - labels = { - version = "prod" - } - - # The lifecycle block allows you to configure the lifecycle of the dataset. - # In this case, the ignore_changes attribute is set to all, which means that - # Terraform will ignore any changes to the dataset and will not attempt to update the dataset. - lifecycle { - ignore_changes = all - } -} - -# This resource creates a BigQuery dataset called `churn_propensity`. -resource "google_bigquery_dataset" "churn_propensity" { - dataset_id = local.config_bigquery.dataset.churn_propensity.name - friendly_name = local.config_bigquery.dataset.churn_propensity.friendly_name - project = null_resource.check_bigquery_api.id != "" ? local.churn_propensity_project_id : local.feature_store_project_id - description = local.config_bigquery.dataset.churn_propensity.description - location = local.config_bigquery.dataset.churn_propensity.location - # The max_time_travel_hours attribute specifies the maximum number of hours that data in the dataset can be accessed using time travel queries. - # In this case, the maximum time travel hours is set to the value of the local file config.yaml section bigquery.dataset.feature_store.max_time_travel_hours configuration. - max_time_travel_hours = local.config_bigquery.dataset.churn_propensity.max_time_travel_hours - # The delete_contents_on_destroy attribute specifies whether the contents of the dataset should be deleted when the dataset is destroyed. - # In this case, the delete_contents_on_destroy attribute is set to false, which means that the contents of the dataset will not be deleted when the dataset is destroyed. - delete_contents_on_destroy = false - - labels = { - version = "prod" - } - - # The lifecycle block allows you to configure the lifecycle of the dataset. - # In this case, the ignore_changes attribute is set to all, which means that - # Terraform will ignore any changes to the dataset and will not attempt to update the dataset. - lifecycle { - ignore_changes = all - } -} - -# This resource creates a BigQuery dataset called `customer_lifetime_value`. -resource "google_bigquery_dataset" "customer_lifetime_value" { - dataset_id = local.config_bigquery.dataset.customer_lifetime_value.name - friendly_name = local.config_bigquery.dataset.customer_lifetime_value.friendly_name - project = null_resource.check_bigquery_api.id != "" ? local.customer_lifetime_value_project_id : local.feature_store_project_id - description = local.config_bigquery.dataset.customer_lifetime_value.description - location = local.config_bigquery.dataset.customer_lifetime_value.location - # The max_time_travel_hours attribute specifies the maximum number of hours that data in the dataset can be accessed using time travel queries. - # In this case, the maximum time travel hours is set to the value of the local file config.yaml section bigquery.dataset.customer_lifetime_value.max_time_travel_hours configuration. - max_time_travel_hours = local.config_bigquery.dataset.customer_lifetime_value.max_time_travel_hours - # The delete_contents_on_destroy attribute specifies whether the contents of the dataset should be deleted when the dataset is destroyed. - # In this case, the delete_contents_on_destroy attribute is set to false, which means that the contents of the dataset will not be deleted when the dataset is destroyed. - delete_contents_on_destroy = false - - labels = { - version = "prod" - } - - # The lifecycle block allows you to configure the lifecycle of the dataset. - # In this case, the ignore_changes attribute is set to all, which means that - # Terraform will ignore any changes to the dataset and will not attempt to update the dataset. - lifecycle { - ignore_changes = all - } -} - -# This resource creates a BigQuery dataset called `audience_segmentation`. -resource "google_bigquery_dataset" "audience_segmentation" { - dataset_id = local.config_bigquery.dataset.audience_segmentation.name - friendly_name = local.config_bigquery.dataset.audience_segmentation.friendly_name - project = null_resource.check_bigquery_api.id != "" ? local.audience_segmentation_project_id : local.feature_store_project_id - description = local.config_bigquery.dataset.audience_segmentation.description - location = local.config_bigquery.dataset.audience_segmentation.location - # The max_time_travel_hours attribute specifies the maximum number of hours that data in the dataset can be accessed using time travel queries. - # In this case, the maximum time travel hours is set to the value of the local file config.yaml section bigquery.dataset.audience_segmentation.max_time_travel_hours configuration. - max_time_travel_hours = local.config_bigquery.dataset.audience_segmentation.max_time_travel_hours - # The delete_contents_on_destroy attribute specifies whether the contents of the dataset should be deleted when the dataset is destroyed. - # In this case, the delete_contents_on_destroy attribute is set to false, which means that the contents of the dataset will not be deleted when the dataset is destroyed. - delete_contents_on_destroy = false - - labels = { - version = "prod" - } - # The lifecycle block allows you to configure the lifecycle of the dataset. - # In this case, the ignore_changes attribute is set to all, which means that - # Terraform will ignore any changes to the dataset and will not attempt to update the dataset. - lifecycle { - ignore_changes = all - } +locals { + datasets = tomap({ + feature_store = { + name = "feature_store" + friendly_name = "Feature Store" + description = "Feature Store dataset for Marketing behavioural modeling" + max_time_travel_hours = 168 + }, + purchase_propensity = { + name = "purchase_propensity" + friendly_name = "Purchase Propensity Dataset" + description = "Purchase Propensity Use Case dataset for Marketing behavioural modeling" + max_time_travel_hours = 168 + }, + churn_propensity = { + name = "churn_propensity" + friendly_name = "Churn Propensity Dataset" + description = "Churn Propensity Use Case dataset for Marketing behavioural modeling" + max_time_travel_hours = 168 + }, + customer_lifetime_value = { + name = "customer_lifetime_value" + friendly_name = "Customer Lifetime Value Dataset" + description = "Customer Lifetime Value Use Case dataset for Marketing behavioural modeling" + max_time_travel_hours = 168 + }, + audience_segmentation = { + name = "audience_segmentation" + friendly_name = "Audience Segmentation Dataset" + description = "Audience Segmentation Use Case dataset for Marketing behavioural modeling" + max_time_travel_hours = 168 + }, + auto_audience_segmentation = { + name = "auto_audience_segmentation" + friendly_name = "Auto Audience Segmentation Dataset" + description = "Auto Audience Segmentation Use Case dataset for Marketing behavioural modeling" + max_time_travel_hours = 168 + }, + aggregated_vbb = { + name = "aggregated_vbb" + friendly_name = "Aggregated VBB Dataset" + description = "Aggregated VBB Use Case dataset for Marketing behavioural modeling" + max_time_travel_hours = 48 + }, + aggregated_predictions = { + name = "aggregated_predictions" + friendly_name = "Aggregated Predictions Dataset" + description = "Dataset with aggregated prediction results from multiple use cases" + max_time_travel_hours = 48 + }, + gemini_insights = { + name = "gemini_insights" + friendly_name = "Gemini Insights Dataset" + description = "Dataset with gemini_insights results from multiple use cases" + max_time_travel_hours = 48 + }, + }) } -# This resource creates a BigQuery dataset called `auto_audience_segmentation`. -resource "google_bigquery_dataset" "auto_audience_segmentation" { - dataset_id = local.config_bigquery.dataset.auto_audience_segmentation.name - friendly_name = local.config_bigquery.dataset.auto_audience_segmentation.friendly_name - project = null_resource.check_bigquery_api.id != "" ? local.auto_audience_segmentation_project_id : local.feature_store_project_id - description = local.config_bigquery.dataset.auto_audience_segmentation.description - location = local.config_bigquery.dataset.auto_audience_segmentation.location - # The max_time_travel_hours attribute specifies the maximum number of hours that data in the dataset can be accessed using time travel queries. - # In this case, the maximum time travel hours is set to the value of the local file config.yaml section bigquery.dataset.auto_audience_segmentation.max_time_travel_hours configuration. - max_time_travel_hours = local.config_bigquery.dataset.auto_audience_segmentation.max_time_travel_hours - # The delete_contents_on_destroy attribute specifies whether the contents of the dataset should be deleted when the dataset is destroyed. - # In this case, the delete_contents_on_destroy attribute is set to false, which means that the contents of the dataset will not be deleted when the dataset is destroyed. +resource "google_bigquery_dataset" "datasets" { + for_each = local.datasets + dataset_id = each.value.name + friendly_name = each.value.friendly_name + project = var.project_id + description = each.value.description + location = var.data_location + max_time_travel_hours = each.value.max_time_travel_hours delete_contents_on_destroy = false - labels = { version = "prod" } - - # The lifecycle block allows you to configure the lifecycle of the dataset. - # In this case, the ignore_changes attribute is set to all, which means that - # Terraform will ignore any changes to the dataset and will not attempt to update the dataset. - lifecycle { - ignore_changes = all - } -} - - -# This resource creates a BigQuery dataset called `aggregated_vbb`. -# For existing users that has pulled this change will result in that -# terraform try to created the `aggregated_vbb` dataset along with -# the underlying tables. terraform apply will result in an error saying -# it failed to create resources that are already exist. To resolve you -# need to import the the existing dataset and tables to terraform using -# the following commands: -# > `terraform -chdir="${TERRAFORM_RUN_DIR}" import module.feature_store[0].module.aggregated_vbb.google_bigquery_dataset.main 'projects/${MAJ_FEATURE_STORE_PROJECT_ID}/datasets/aggregated_vbb'` -# -# > `terraform -chdir="${TERRAFORM_RUN_DIR}" import 'module.feature_store[0].module.aggregated_vbb.google_bigquery_table.main["vbb_weights"]' 'projects/${MAJ_FEATURE_STORE_PROJECT_ID}/datasets/aggregated_vbb/tables/vbb_weights'` -# -# > `terraform -chdir="${TERRAFORM_RUN_DIR}" import 'module.feature_store[0].module.aggregated_vbb.google_bigquery_table.main["aggregated_value_based_bidding_volume_weekly"]' 'projects/${MAJ_FEATURE_STORE_PROJECT_ID}/datasets/aggregated_vbb/tables/aggregated_value_based_bidding_volume_weekly'` -# -# > `terraform -chdir="${TERRAFORM_RUN_DIR}" import 'module.feature_store[0].module.aggregated_vbb.google_bigquery_table.main["aggregated_value_based_bidding_correlation"]' 'projects/${MAJ_FEATURE_STORE_PROJECT_ID}/datasets/aggregated_vbb/tables/aggregated_value_based_bidding_correlation'` -# -# > `terraform -chdir="${TERRAFORM_RUN_DIR}" import 'module.feature_store[0].module.aggregated_vbb.google_bigquery_table.main["aggregated_value_based_bidding_volume_daily"]' 'projects/${MAJ_FEATURE_STORE_PROJECT_ID}/datasets/aggregated_vbb/tables/aggregated_value_based_bidding_volume_daily'` -# -# You also need to remove the information of the existing aggregated_vbb -# dataset from the terraform state by running following command: -# > `terraform state rm 'module.feature_store[0].google_bigquery_dataset.aggregated_vbb'` -locals { - aggregated_vbb_tables = [ - "vbb_weights", - "aggregated_value_based_bidding_correlation", - "aggregated_value_based_bidding_volume_daily", - "aggregated_value_based_bidding_volume_weekly" - ] -} - -module "aggregated_vbb" { - source = "terraform-google-modules/bigquery/google" - version = "~> 5.4" - - dataset_id = local.config_bigquery.dataset.aggregated_vbb.name - dataset_name = local.config_bigquery.dataset.aggregated_vbb.friendly_name - description = local.config_bigquery.dataset.aggregated_vbb.description - project_id = null_resource.check_bigquery_api.id != "" ? local.aggregated_vbb_project_id : local.feature_store_project_id - location = local.config_bigquery.dataset.aggregated_vbb.location - # The delete_contents_on_destroy attribute specifies whether the contents of the dataset should be deleted when the dataset is destroyed. - # In this case, the delete_contents_on_destroy attribute is set to false, which means that the contents of the dataset will not be deleted when the dataset is destroyed. - delete_contents_on_destroy = true - - dataset_labels = { - version = "prod" - } - - tables = [for table_id in local.aggregated_vbb_tables : - { - table_id = table_id - schema = file("../../sql/schema/table/${table_id}.json") - # The max_time_travel_hours attribute specifies the maximum number of hours that data in the dataset can be accessed using time travel queries. - # In this case, the maximum time travel hours is set to the value of the local file config.yaml section bigquery.dataset.auto_audience_segmentation.max_time_travel_hours configuration. - max_time_travel_hours = local.config_bigquery.dataset.aggregated_vbb.max_time_travel_hours - deletion_protection = false - time_partitioning = null, - range_partitioning = null, - expiration_time = null, - clustering = [], - labels = {}, - }] -} - -# This module creates a BigQuery dataset called `aggregated_predictions` and a table called "latest". -# The aggregated_predictions module is used to create a BigQuery dataset and table that will be used to store -# the aggregated predictions generated by the predictions pipelines. -module "aggregated_predictions" { - source = "terraform-google-modules/bigquery/google" - version = "~> 5.4" - - dataset_id = local.config_bigquery.dataset.aggregated_predictions.name - dataset_name = local.config_bigquery.dataset.aggregated_predictions.friendly_name - description = local.config_bigquery.dataset.aggregated_predictions.description - project_id = local.config_bigquery.dataset.aggregated_predictions.project_id - location = local.config_bigquery.dataset.aggregated_predictions.location - # The delete_contents_on_destroy attribute specifies whether the contents of the dataset should be deleted when the dataset is destroyed. - # In this case, the delete_contents_on_destroy attribute is set to true, which means that the contents of the dataset will be deleted when the dataset is destroyed. - delete_contents_on_destroy = true - - # The tables attribute is used to configure the BigQuery table within the dataset - tables = [ - { - table_id = "latest" - # The schema of the table, defined in a JSON file. - schema = file("../../sql/schema/table/aggregated_predictions_latest.json") - time_partitioning = null, - range_partitioning = null, - expiration_time = null, - clustering = [], - labels = {}, - } - ] -} - - -# This resource creates a BigQuery dataset called `gemini_insights`. -# For existing users that has pulled this change will result in that -# terraform try to created the `gemini_insights` dataset along with -# the underlying tables. terraform apply will result in an error saying -# it failed to create resources that are already exist. To resolve you -# need to import the the existing dataset and tables to terraform using -# the following commands: -# > `terraform -chdir="${TERRAFORM_RUN_DIR}" import module.feature_store[0].module.gemini_insights.google_bigquery_dataset.main 'projects/${MAJ_FEATURE_STORE_PROJECT_ID}/datasets/gemini_insights'` -# -# > `terraform -chdir="${TERRAFORM_RUN_DIR}" import 'module.feature_store[0].module.gemini_insights.google_bigquery_table.main["user_behaviour_revenue_insights_monthly"]' 'projects/${MAJ_FEATURE_STORE_PROJECT_ID}/datasets/gemini_insights/tables/user_behaviour_revenue_insights_monthly'` -# -# > `terraform -chdir="${TERRAFORM_RUN_DIR}" import 'module.feature_store[0].module.gemini_insights.google_bigquery_table.main["user_behaviour_revenue_insights_weekly"]' 'projects/${MAJ_FEATURE_STORE_PROJECT_ID}/datasets/gemini_insights/tables/user_behaviour_revenue_insights_weekly'` -# -# > `terraform -chdir="${TERRAFORM_RUN_DIR}" import 'module.feature_store[0].module.gemini_insights.google_bigquery_table.main["user_behaviour_revenue_insights_daily"]' 'projects/${MAJ_FEATURE_STORE_PROJECT_ID}/datasets/gemini_insights/tables/user_behaviour_revenue_insights_daily'` -# -# You also need to remove the information of the existing gemini_insights -# dataset from the terraform state by running following command: -# > `terraform state rm 'module.feature_store[0].google_bigquery_dataset.gemini_insights'` -locals { - gemini_insights_tables = [ - "user_behaviour_revenue_insights_monthly", - "user_behaviour_revenue_insights_weekly", - "user_behaviour_revenue_insights_daily" - ] -} - -module "gemini_insights" { - source = "terraform-google-modules/bigquery/google" - version = "~> 5.4" - - dataset_id = local.config_bigquery.dataset.gemini_insights.name - dataset_name = local.config_bigquery.dataset.gemini_insights.friendly_name - description = local.config_bigquery.dataset.gemini_insights.description - project_id = null_resource.check_bigquery_api.id != "" ? local.gemini_insights_project_id : local.feature_store_project_id - location = local.config_bigquery.dataset.gemini_insights.location - # The delete_contents_on_destroy attribute specifies whether the contents of the dataset should be deleted when the dataset is destroyed. - # In this case, the delete_contents_on_destroy attribute is set to false, which means that the contents of the dataset will not be deleted when the dataset is destroyed. - delete_contents_on_destroy = true - - dataset_labels = { - version = "prod" - } - - tables = [for table_id in local.gemini_insights_tables : - { - table_id = table_id - schema = file("../../sql/schema/table/${table_id}.json") - # The max_time_travel_hours attribute specifies the maximum number of hours that data in the dataset can be accessed using time travel queries. - # In this case, the maximum time travel hours is set to the value of the local file config.yaml section bigquery.dataset.gemini_insights.max_time_travel_hours configuration. - max_time_travel_hours = local.config_bigquery.dataset.gemini_insights.max_time_travel_hours - deletion_protection = false - time_partitioning = null, - range_partitioning = null, - expiration_time = null, - clustering = [], - labels = {}, - }] + depends_on = [null_resource.check_bigquery_api] } \ No newline at end of file diff --git a/infrastructure/terraform/modules/feature-store/bigquery-procedures.tf b/infrastructure/terraform/modules/feature-store/bigquery-procedures.tf index ab03b602..f3b52351 100644 --- a/infrastructure/terraform/modules/feature-store/bigquery-procedures.tf +++ b/infrastructure/terraform/modules/feature-store/bigquery-procedures.tf @@ -26,8 +26,8 @@ data "local_file" "audience_segmentation_inference_preparation_file" { # The procedure is typically invoked before running the Audience Segmentation model to ensure that the input data # is in the correct format and contains the necessary features for accurate predictions. resource "google_bigquery_routine" "audience_segmentation_inference_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.audience_segmentation_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.audience_segmentation.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["audience_segmentation"].dataset_id routine_id = "audience_segmentation_inference_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -54,8 +54,8 @@ data "local_file" "aggregated_value_based_bidding_training_preparation_file" { # The procedure is typically invoked before running the Aggregated Value Based Bidding model to ensure that the input data # is in the correct format and contains the necessary features for training. resource "google_bigquery_routine" "aggregated_value_based_bidding_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.aggregated_vbb_project_id : local.feature_store_project_id - dataset_id = module.aggregated_vbb.bigquery_dataset.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["aggregated_vbb"].dataset_id routine_id = "aggregated_value_based_bidding_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -78,8 +78,8 @@ data "local_file" "aggregated_value_based_bidding_explanation_preparation_file" # The procedure is typically invoked before running the Aggregated Value Based Bidding model to ensure that the input data # is in the correct format and contains the necessary features for explanation. resource "google_bigquery_routine" "aggregated_value_based_bidding_explanation_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.aggregated_vbb_project_id : local.feature_store_project_id - dataset_id = module.aggregated_vbb.bigquery_dataset.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["aggregated_vbb"].dataset_id routine_id = "aggregated_value_based_bidding_explanation_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -101,8 +101,8 @@ data "local_file" "auto_audience_segmentation_inference_preparation_file" { # The procedure is typically invoked before running the Auto Audience Segmentation model to ensure that the input data # is in the correct format and contains the necessary features for prediction. resource "google_bigquery_routine" "auto_audience_segmentation_inference_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.audience_segmentation_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.auto_audience_segmentation.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["auto_audience_segmentation"].dataset_id routine_id = "auto_audience_segmentation_inference_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -128,8 +128,8 @@ data "local_file" "audience_segmentation_training_preparation_file" { # The procedure is typically invoked before running the Audience Segmentation model to ensure that the input data # is in the correct format and contains the necessary features for training. resource "google_bigquery_routine" "audience_segmentation_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.audience_segmentation_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.audience_segmentation.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["audience_segmentation"].dataset_id routine_id = "audience_segmentation_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -170,8 +170,8 @@ data "local_file" "auto_audience_segmentation_training_preparation_file" { # The procedure is typically invoked before running the Auto Audience Segmentation model to ensure that the input data # is in the correct format and contains the necessary features for training. resource "google_bigquery_routine" "auto_audience_segmentation_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.auto_audience_segmentation_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.auto_audience_segmentation.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["auto_audience_segmentation"].dataset_id routine_id = "auto_audience_segmentation_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -207,8 +207,8 @@ data "local_file" "customer_lifetime_value_inference_preparation_file" { # The procedure is typically invoked before running the Customer Lifetime Value model to ensure that the input data # is in the correct format and contains the necessary features for prediction. resource "google_bigquery_routine" "customer_lifetime_value_inference_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.customer_lifetime_value_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.customer_lifetime_value.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["customer_lifetime_value"].dataset_id routine_id = "customer_lifetime_value_inference_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -233,8 +233,8 @@ data "local_file" "customer_lifetime_value_label_file" { # The procedure is typically invoked before training the Customer Lifetime Value model to ensure that the labeled data # is in the correct format and ready for training. resource "google_bigquery_routine" "customer_lifetime_value_label" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "customer_lifetime_value_label" routine_type = "PROCEDURE" language = "SQL" @@ -269,8 +269,8 @@ data "local_file" "customer_lifetime_value_training_preparation_file" { # The procedure is typically invoked before training the Customer Lifetime Value model to ensure that the features data # is in the correct format and contains the necessary features for training. resource "google_bigquery_routine" "customer_lifetime_value_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.customer_lifetime_value_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.customer_lifetime_value.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["customer_lifetime_value"].dataset_id routine_id = "customer_lifetime_value_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -310,8 +310,8 @@ data "local_file" "purchase_propensity_inference_preparation_file" { # The procedure is typically invoked before prediction the Purchase Propensity model to ensure that the features data # is in the correct format and contains the necessary features for prediction. resource "google_bigquery_routine" "purchase_propensity_inference_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.purchase_propensity_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.purchase_propensity.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["purchase_propensity"].dataset_id routine_id = "purchase_propensity_inference_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -336,8 +336,8 @@ data "local_file" "churn_propensity_inference_preparation_file" { # The procedure is typically invoked before prediction the Churn Propensity model to ensure that the features data # is in the correct format and contains the necessary features for prediction. resource "google_bigquery_routine" "churn_propensity_inference_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.churn_propensity_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.churn_propensity.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["churn_propensity"].dataset_id routine_id = "churn_propensity_inference_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -362,8 +362,8 @@ data "local_file" "purchase_propensity_label_file" { # The procedure is typically invoked before training the Purchase Propensity model to ensure that the labeled data # is in the correct format and ready for training. resource "google_bigquery_routine" "purchase_propensity_label" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "purchase_propensity_label" routine_type = "PROCEDURE" language = "SQL" @@ -398,8 +398,8 @@ data "local_file" "churn_propensity_label_file" { # The procedure is typically invoked before training the Churn Propensity model to ensure that the labeled data # is in the correct format and ready for training. resource "google_bigquery_routine" "churn_propensity_label" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "churn_propensity_label" routine_type = "PROCEDURE" language = "SQL" @@ -434,8 +434,8 @@ data "local_file" "purchase_propensity_training_preparation_file" { # The procedure is typically invoked before training the Purchase Propensity model to ensure that the features data # is in the correct format and contains the necessary features for training. resource "google_bigquery_routine" "purchase_propensity_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.purchase_propensity_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.purchase_propensity.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["purchase_propensity"].dataset_id routine_id = "purchase_propensity_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -476,8 +476,8 @@ data "local_file" "churn_propensity_training_preparation_file" { # The procedure is typically invoked before training the Churn Propensity model to ensure that the features data # is in the correct format and contains the necessary features for training. resource "google_bigquery_routine" "churn_propensity_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.churn_propensity_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.churn_propensity.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["churn_propensity"].dataset_id routine_id = "churn_propensity_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -517,8 +517,8 @@ data "local_file" "user_dimensions_file" { # The procedure is typically invoked before training the Purchase Propensity model to ensure that the features data # is in the correct format and ready for training. resource "google_bigquery_routine" "user_dimensions" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "user_dimensions" routine_type = "PROCEDURE" language = "SQL" @@ -553,8 +553,8 @@ data "local_file" "user_lifetime_dimensions_file" { # The procedure is typically invoked before training the Customer Lifetime Value model to ensure that the features data # is in the correct format and ready for training. resource "google_bigquery_routine" "user_lifetime_dimensions" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "user_lifetime_dimensions" routine_type = "PROCEDURE" language = "SQL" @@ -589,8 +589,8 @@ data "local_file" "user_lookback_metrics_file" { # The procedure is typically invoked before training the Audience Segmentation model to ensure that the features data # is in the correct format and ready for training. resource "google_bigquery_routine" "user_lookback_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "user_lookback_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -625,8 +625,8 @@ data "local_file" "user_rolling_window_lifetime_metrics_file" { # The procedure is typically invoked before training the Customer Lifetime Value model to ensure that the features data # is in the correct format and ready for training. resource "google_bigquery_routine" "user_rolling_window_lifetime_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "user_rolling_window_lifetime_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -661,8 +661,8 @@ data "local_file" "user_rolling_window_metrics_file" { # The procedure is typically invoked before training the Purchase Propensity model to ensure that the features data # is in the correct format and ready for training. resource "google_bigquery_routine" "user_rolling_window_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "user_rolling_window_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -692,8 +692,8 @@ data "local_file" "user_scoped_lifetime_metrics_file" { # The user_rolling_window_metrics procedure is designed to prepare the features for the Customer Lifetime Value model. resource "google_bigquery_routine" "user_scoped_lifetime_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "user_scoped_lifetime_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -723,8 +723,8 @@ data "local_file" "user_scoped_metrics_file" { # The user_scoped_metrics procedure is designed to prepare the features for the Purchase Propensity model. resource "google_bigquery_routine" "user_scoped_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "user_scoped_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -754,8 +754,8 @@ data "local_file" "user_scoped_segmentation_metrics_file" { # The user_scoped_segmentation_metrics procedure is designed to prepare the features for the Audience Segmentation model. resource "google_bigquery_routine" "user_scoped_segmentation_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "user_scoped_segmentation_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -785,8 +785,8 @@ data "local_file" "user_segmentation_dimensions_file" { # The user_segmentation_dimensions procedure is designed to prepare the features for the Audience Segmentation model. resource "google_bigquery_routine" "user_segmentation_dimensions" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "user_segmentation_dimensions" routine_type = "PROCEDURE" language = "SQL" @@ -816,8 +816,8 @@ data "local_file" "user_session_event_aggregated_metrics_file" { # The user_session_event_aggregated_metrics procedure is designed to prepare the features for the Purchase Propensity model. resource "google_bigquery_routine" "user_session_event_aggregated_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "user_session_event_aggregated_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -847,14 +847,29 @@ data "local_file" "aggregate_predictions_procedure_file" { # The aggregate_last_day_predictions procedure is designed to aggregated the latest predictions from all models. resource "google_bigquery_routine" "aggregate_last_day_predictions" { - project = null_resource.check_bigquery_api.id != "" ? local.aggregate_predictions_project_id : local.feature_store_project_id - dataset_id = module.aggregated_predictions.bigquery_dataset.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["aggregated_predictions"].dataset_id routine_id = "aggregate_last_day_predictions" routine_type = "PROCEDURE" language = "SQL" definition_body = data.local_file.aggregate_predictions_procedure_file.content } +# This resource reads the contents of a local SQL file named user_predictions_aggregation.sql +data "local_file" "user_predictions_aggregation_file" { + filename = "${local.sql_dir}/procedure/user_predictions_aggregation.sql" +} + +# The user_predictions_aggregation procedure is designed to aggregated the latest predictions from all models. +resource "google_bigquery_routine" "user_predictions_aggregation" { + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["aggregated_predictions"].dataset_id + routine_id = "user_predictions_aggregation" + routine_type = "PROCEDURE" + language = "SQL" + definition_body = data.local_file.user_predictions_aggregation_file.content +} + # This resource reads the contents of a local SQL file named user_behaviour_revenue_insights.sql and # stores it in a variable named user_behaviour_revenue_insights_file.content. # The SQL file is expected to contain the definition of a BigQuery procedure named user_behaviour_revenue_insights. @@ -864,8 +879,8 @@ data "local_file" "user_behaviour_revenue_insights_file" { # The user_behaviour_revenue_insights procedure is designed to generate gemini insights. resource "google_bigquery_routine" "user_behaviour_revenue_insights" { - project = null_resource.check_bigquery_api.id != "" ? local.gemini_insights_project_id : local.feature_store_project_id - dataset_id = local.config_bigquery.dataset.gemini_insights.name + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["gemini_insights"].dataset_id routine_id = "user_behaviour_revenue_insights" routine_type = "PROCEDURE" language = "SQL" @@ -893,8 +908,8 @@ data "local_file" "invoke_backfill_customer_lifetime_value_label_file" { # The invoke_backfill_customer_lifetime_value_label procedure is designed to invoke the backfill query for customer_lifetime_value_label. resource "google_bigquery_routine" "invoke_backfill_customer_lifetime_value_label" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_customer_lifetime_value_label" routine_type = "PROCEDURE" language = "SQL" @@ -907,8 +922,8 @@ data "local_file" "invoke_backfill_purchase_propensity_label_file" { } resource "google_bigquery_routine" "invoke_backfill_purchase_propensity_label" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_purchase_propensity_label" routine_type = "PROCEDURE" language = "SQL" @@ -921,8 +936,8 @@ data "local_file" "invoke_backfill_churn_propensity_label_file" { } resource "google_bigquery_routine" "invoke_backfill_churn_propensity_label" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_churn_propensity_label" routine_type = "PROCEDURE" language = "SQL" @@ -935,8 +950,8 @@ data "local_file" "invoke_backfill_user_dimensions_file" { } resource "google_bigquery_routine" "invoke_backfill_user_dimensions" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_user_dimensions" routine_type = "PROCEDURE" language = "SQL" @@ -949,8 +964,8 @@ data "local_file" "invoke_backfill_user_lifetime_dimensions_file" { } resource "google_bigquery_routine" "invoke_backfill_user_lifetime_dimensions" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_user_lifetime_dimensions" routine_type = "PROCEDURE" language = "SQL" @@ -964,8 +979,8 @@ data "local_file" "invoke_backfill_user_lookback_metrics_file" { } resource "google_bigquery_routine" "invoke_backfill_user_lookback_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_user_lookback_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -979,8 +994,8 @@ data "local_file" "invoke_backfill_user_rolling_window_lifetime_metrics_file" { } resource "google_bigquery_routine" "invoke_backfill_user_rolling_window_lifetime_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_user_rolling_window_lifetime_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -994,8 +1009,8 @@ data "local_file" "invoke_backfill_user_rolling_window_metrics_file" { } resource "google_bigquery_routine" "invoke_backfill_user_rolling_window_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_user_rolling_window_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1009,8 +1024,8 @@ data "local_file" "invoke_backfill_user_scoped_lifetime_metrics_file" { } resource "google_bigquery_routine" "invoke_backfill_user_scoped_lifetime_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_user_scoped_lifetime_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1023,8 +1038,8 @@ data "local_file" "invoke_backfill_user_scoped_metrics_file" { } resource "google_bigquery_routine" "invoke_backfill_user_scoped_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_user_scoped_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1037,8 +1052,8 @@ data "local_file" "invoke_backfill_user_scoped_segmentation_metrics_file" { } resource "google_bigquery_routine" "invoke_backfill_user_scoped_segmentation_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_user_scoped_segmentation_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1051,8 +1066,8 @@ data "local_file" "invoke_backfill_user_segmentation_dimensions_file" { } resource "google_bigquery_routine" "invoke_backfill_user_segmentation_dimensions" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_user_segmentation_dimensions" routine_type = "PROCEDURE" language = "SQL" @@ -1065,8 +1080,8 @@ data "local_file" "invoke_backfill_user_session_event_aggregated_metrics_file" { } resource "google_bigquery_routine" "invoke_backfill_user_session_event_aggregated_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_backfill_user_session_event_aggregated_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1079,8 +1094,8 @@ data "local_file" "invoke_backfill_user_behaviour_revenue_insights_file" { } resource "google_bigquery_routine" "invoke_backfill_user_behaviour_revenue_insights" { - project = null_resource.check_bigquery_api.id != "" ? local.gemini_insights_project_id : local.feature_store_project_id - dataset_id = local.config_bigquery.dataset.gemini_insights.name + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["gemini_insights"].dataset_id routine_id = "invoke_backfill_user_behaviour_revenue_insights" routine_type = "PROCEDURE" language = "SQL" @@ -1102,8 +1117,8 @@ data "local_file" "invoke_customer_lifetime_value_inference_preparation_file" { } resource "google_bigquery_routine" "invoke_customer_lifetime_value_inference_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.customer_lifetime_value_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.customer_lifetime_value.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["customer_lifetime_value"].dataset_id routine_id = "invoke_customer_lifetime_value_inference_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1116,8 +1131,8 @@ data "local_file" "invoke_purchase_propensity_inference_preparation_file" { } resource "google_bigquery_routine" "invoke_purchase_propensity_inference_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.purchase_propensity_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.purchase_propensity.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["purchase_propensity"].dataset_id routine_id = "invoke_purchase_propensity_inference_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1130,8 +1145,8 @@ data "local_file" "invoke_churn_propensity_inference_preparation_file" { } resource "google_bigquery_routine" "invoke_churn_propensity_inference_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.churn_propensity_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.churn_propensity.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["churn_propensity"].dataset_id routine_id = "invoke_churn_propensity_inference_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1144,8 +1159,8 @@ data "local_file" "invoke_audience_segmentation_inference_preparation_file" { } resource "google_bigquery_routine" "invoke_audience_segmentation_inference_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.audience_segmentation_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.audience_segmentation.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["audience_segmentation"].dataset_id routine_id = "invoke_audience_segmentation_inference_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1157,8 +1172,8 @@ data "local_file" "invoke_auto_audience_segmentation_inference_preparation_file" } resource "google_bigquery_routine" "invoke_auto_audience_segmentation_inference_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.auto_audience_segmentation_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.auto_audience_segmentation.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["auto_audience_segmentation"].dataset_id routine_id = "invoke_auto_audience_segmentation_inference_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1170,8 +1185,8 @@ data "local_file" "invoke_auto_audience_segmentation_training_preparation_file" } resource "google_bigquery_routine" "invoke_auto_audience_segmentation_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.auto_audience_segmentation_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.auto_audience_segmentation.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["auto_audience_segmentation"].dataset_id routine_id = "invoke_auto_audience_segmentation_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1184,8 +1199,8 @@ data "local_file" "invoke_customer_lifetime_value_training_preparation_file" { } resource "google_bigquery_routine" "invoke_customer_lifetime_value_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.customer_lifetime_value_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.customer_lifetime_value.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["customer_lifetime_value"].dataset_id routine_id = "invoke_customer_lifetime_value_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1198,8 +1213,8 @@ data "local_file" "invoke_purchase_propensity_training_preparation_file" { } resource "google_bigquery_routine" "invoke_purchase_propensity_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.purchase_propensity_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.purchase_propensity.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["purchase_propensity"].dataset_id routine_id = "invoke_purchase_propensity_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1212,8 +1227,8 @@ data "local_file" "invoke_churn_propensity_training_preparation_file" { } resource "google_bigquery_routine" "invoke_churn_propensity_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.churn_propensity_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.churn_propensity.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["churn_propensity"].dataset_id routine_id = "invoke_churn_propensity_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1226,8 +1241,8 @@ data "local_file" "invoke_audience_segmentation_training_preparation_file" { } resource "google_bigquery_routine" "invoke_audience_segmentation_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.audience_segmentation_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.audience_segmentation.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["audience_segmentation"].dataset_id routine_id = "invoke_audience_segmentation_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1241,8 +1256,8 @@ data "local_file" "invoke_aggregated_value_based_bidding_training_preparation_fi # Terraform resource for invoking the bigquery stored procedure resource "google_bigquery_routine" "invoke_aggregated_value_based_bidding_training_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.aggregated_vbb_project_id : local.feature_store_project_id - dataset_id = module.aggregated_vbb.bigquery_dataset.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["aggregated_vbb"].dataset_id routine_id = "invoke_aggregated_value_based_bidding_training_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1256,8 +1271,8 @@ data "local_file" "invoke_aggregated_value_based_bidding_explanation_preparation # Terraform resource for invoking the bigquery stored procedure resource "google_bigquery_routine" "invoke_aggregated_value_based_bidding_explanation_preparation" { - project = null_resource.check_bigquery_api.id != "" ? local.aggregated_vbb_project_id : local.feature_store_project_id - dataset_id = module.aggregated_vbb.bigquery_dataset.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["aggregated_vbb"].dataset_id routine_id = "invoke_aggregated_value_based_bidding_explanation_preparation" routine_type = "PROCEDURE" language = "SQL" @@ -1273,8 +1288,8 @@ data "local_file" "invoke_customer_lifetime_value_label_file" { } resource "google_bigquery_routine" "invoke_customer_lifetime_value_label" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_customer_lifetime_value_label" routine_type = "PROCEDURE" language = "SQL" @@ -1287,8 +1302,8 @@ data "local_file" "invoke_purchase_propensity_label_file" { } resource "google_bigquery_routine" "invoke_purchase_propensity_label" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_purchase_propensity_label" routine_type = "PROCEDURE" language = "SQL" @@ -1302,8 +1317,8 @@ data "local_file" "invoke_churn_propensity_label_file" { } resource "google_bigquery_routine" "invoke_churn_propensity_label" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_churn_propensity_label" routine_type = "PROCEDURE" language = "SQL" @@ -1317,8 +1332,8 @@ data "local_file" "invoke_user_dimensions_file" { } resource "google_bigquery_routine" "invoke_user_dimensions" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_user_dimensions" routine_type = "PROCEDURE" language = "SQL" @@ -1331,8 +1346,8 @@ data "local_file" "invoke_user_lifetime_dimensions_file" { } resource "google_bigquery_routine" "invoke_user_lifetime_dimensions" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_user_lifetime_dimensions" routine_type = "PROCEDURE" language = "SQL" @@ -1346,8 +1361,8 @@ data "local_file" "invoke_user_lookback_metrics_file" { } resource "google_bigquery_routine" "invoke_user_lookback_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_user_lookback_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1361,8 +1376,8 @@ data "local_file" "invoke_user_rolling_window_lifetime_metrics_file" { } resource "google_bigquery_routine" "invoke_user_rolling_window_lifetime_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_user_rolling_window_lifetime_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1376,8 +1391,8 @@ data "local_file" "invoke_user_rolling_window_metrics_file" { } resource "google_bigquery_routine" "invoke_user_rolling_window_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_user_rolling_window_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1391,8 +1406,8 @@ data "local_file" "invoke_user_scoped_lifetime_metrics_file" { } resource "google_bigquery_routine" "invoke_user_scoped_lifetime_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_user_scoped_lifetime_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1405,8 +1420,8 @@ data "local_file" "invoke_user_scoped_metrics_file" { } resource "google_bigquery_routine" "invoke_user_scoped_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_user_scoped_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1419,8 +1434,8 @@ data "local_file" "invoke_user_scoped_segmentation_metrics_file" { } resource "google_bigquery_routine" "invoke_user_scoped_segmentation_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_user_scoped_segmentation_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1433,8 +1448,8 @@ data "local_file" "invoke_user_segmentation_dimensions_file" { } resource "google_bigquery_routine" "invoke_user_segmentation_dimensions" { - project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_user_segmentation_dimensions" routine_type = "PROCEDURE" language = "SQL" @@ -1447,8 +1462,8 @@ data "local_file" "invoke_user_session_event_aggregated_metrics_file" { } resource "google_bigquery_routine" "invoke_user_session_event_aggregated_metrics" { - project = null_resource.check_bigquery_api.id != "" ? local.purchase_propensity_project_id : local.feature_store_project_id - dataset_id = google_bigquery_dataset.feature_store.dataset_id + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["feature_store"].dataset_id routine_id = "invoke_user_session_event_aggregated_metrics" routine_type = "PROCEDURE" language = "SQL" @@ -1478,7 +1493,7 @@ resource "null_resource" "check_gemini_model_exists" { command = <<-EOT COUNTER=0 MAX_TRIES=100 - while ! bq --project_id=${local.gemini_insights_project_id} ls -m --format=pretty ${local.gemini_insights_project_id}:${local.config_bigquery.dataset.gemini_insights.name} | grep -i "gemini_1_5_pro" && [ $COUNTER -lt $MAX_TRIES ] + while ! bq --project_id=${var.project_id} ls -m --format=pretty ${var.project_id}:${google_bigquery_dataset.datasets["gemini_insights"].dataset_id} | grep -i "gemini_1_5_pro" && [ $COUNTER -lt $MAX_TRIES ] do sleep 5 printf "." @@ -1503,8 +1518,8 @@ data "local_file" "invoke_user_behaviour_revenue_insights_file" { } resource "google_bigquery_routine" "invoke_user_behaviour_revenue_insights" { - project = null_resource.check_bigquery_api.id != "" ? local.gemini_insights_project_id : local.feature_store_project_id - dataset_id = local.config_bigquery.dataset.gemini_insights.name + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["gemini_insights"].dataset_id routine_id = "invoke_user_behaviour_revenue_insights" routine_type = "PROCEDURE" language = "SQL" diff --git a/infrastructure/terraform/modules/feature-store/bigquery-tables.tf b/infrastructure/terraform/modules/feature-store/bigquery-tables.tf index c968bda2..b50cb230 100644 --- a/infrastructure/terraform/modules/feature-store/bigquery-tables.tf +++ b/infrastructure/terraform/modules/feature-store/bigquery-tables.tf @@ -14,402 +14,177 @@ # This resource creates a BigQuery table named audience_segmentation_inference_preparation # in the dataset specified by google_bigquery_dataset.audience_segmentation.dataset_id. -resource "google_bigquery_table" "audience_segmentation_inference_preparation" { - project = google_bigquery_dataset.audience_segmentation.project - dataset_id = google_bigquery_dataset.audience_segmentation.dataset_id - table_id = local.config_bigquery.table.audience_segmentation_inference_preparation.table_name - description = local.config_bigquery.table.audience_segmentation_inference_preparation.table_description - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/audience_segmentation_inference_preparation.json") -} - -# This resource creates a BigQuery table named customer_lifetime_value_inference_preparation -# in the dataset specified by google_bigquery_dataset.customer_lifetime_value.dataset_id. -resource "google_bigquery_table" "customer_lifetime_value_inference_preparation" { - project = google_bigquery_dataset.customer_lifetime_value.project - dataset_id = google_bigquery_dataset.customer_lifetime_value.dataset_id - table_id = local.config_bigquery.table.customer_lifetime_value_inference_preparation.table_name - description = local.config_bigquery.table.customer_lifetime_value_inference_preparation.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/customer_lifetime_value_inference_preparation.json") -} - -# This resource creates a BigQuery table named customer_lifetime_value_label -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "customer_lifetime_value_label" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.customer_lifetime_value_label.table_name - description = local.config_bigquery.table.customer_lifetime_value_label.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/customer_lifetime_value_label.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named purchase_propensity_inference_preparation -# in the dataset specified by google_bigquery_dataset.purchase_propensity.dataset_id. -resource "google_bigquery_table" "purchase_propensity_inference_preparation" { - project = google_bigquery_dataset.purchase_propensity.project - dataset_id = google_bigquery_dataset.purchase_propensity.dataset_id - table_id = local.config_bigquery.table.purchase_propensity_inference_preparation.table_name - description = local.config_bigquery.table.purchase_propensity_inference_preparation.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/purchase_propensity_inference_preparation.json") -} - -# This resource creates a BigQuery table named churn_propensity_inference_preparation -# in the dataset specified by google_bigquery_dataset.churn_propensity.dataset_id. -resource "google_bigquery_table" "churn_propensity_inference_preparation" { - project = google_bigquery_dataset.churn_propensity.project - dataset_id = google_bigquery_dataset.churn_propensity.dataset_id - table_id = local.config_bigquery.table.churn_propensity_inference_preparation.table_name - description = local.config_bigquery.table.churn_propensity_inference_preparation.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/churn_propensity_inference_preparation.json") -} - -# This resource creates a BigQuery table named purchase_propensity_label -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "purchase_propensity_label" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.purchase_propensity_label.table_name - description = local.config_bigquery.table.purchase_propensity_label.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/purchase_propensity_label.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named churn_propensity_label -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "churn_propensity_label" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.churn_propensity_label.table_name - description = local.config_bigquery.table.churn_propensity_label.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/churn_propensity_label.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named user_dimensions -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "user_dimensions" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.user_dimensions.table_name - description = local.config_bigquery.table.user_dimensions.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/user_dimensions.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named user_lifetime_dimensions -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "user_lifetime_dimensions" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.user_lifetime_dimensions.table_name - description = local.config_bigquery.table.user_lifetime_dimensions.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/user_lifetime_dimensions.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } +locals { + tables = tomap({ + audience_segmentation_inference_preparation = { + dataset = "audience_segmentation" + name = "audience_segmentation_inference_preparation" + table_description = "Audience Segmentation Inference Preparation table to be used for Model Prediction" + schema_file = "audience_segmentation_inference_preparation.json" + }, + customer_lifetime_value_inference_preparation = { + dataset = "customer_lifetime_value" + name = "customer_lifetime_value_inference_preparation" + table_description = "Customer Lifetime Value Inference Preparation table to be used for Model Prediction" + schema_file = "customer_lifetime_value_inference_preparation.json" + }, + customer_lifetime_value_label = { + dataset = "feature_store" + name = "customer_lifetime_value_label" + table_description = "Customer Lifetime Value Label table to be used for Model Traning" + schema_file = "customer_lifetime_value_label.json" + }, + purchase_propensity_inference_preparation = { + dataset = "purchase_propensity" + name = "purchase_propensity_inference_preparation" + table_description = "Purchase Propensity Inference Preparation table to be used for Model Prediction" + schema_file = "purchase_propensity_inference_preparation.json" + }, + churn_propensity_inference_preparation = { + dataset = "churn_propensity" + name = "churn_propensity_inference_preparation" + table_description = "Purchase Propensity Inference Preparation table to be used for Model Prediction" + schema_file = "churn_propensity_inference_preparation.json" + }, + purchase_propensity_label = { + dataset = "feature_store" + name = "purchase_propensity_label" + table_description = "Purchase Propensity Label table to be used for Model Prediction" + schema_file = "purchase_propensity_label.json" + }, + churn_propensity_label = { + dataset = "feature_store" + name = "churn_propensity_label" + table_description = "Churn Propensity Label table to be used for Model Prediction" + schema_file = "churn_propensity_label.json" + }, + user_dimensions = { + dataset = "feature_store" + name = "user_dimensions" + table_description = "User Dimensions table as part of the Feature Store for the Purchase Propensity use case" + schema_file = "user_dimensions.json" + }, + user_lifetime_dimensions = { + dataset = "feature_store" + name = "user_lifetime_dimensions" + table_description = "User Lifetime Dimensions table as part of the Feature Store for the Customer Lifetime Value use case" + schema_file = "user_lifetime_dimensions.json" + }, + user_lookback_metrics = { + dataset = "feature_store" + name = "user_lookback_metrics" + table_description = "User Lookback Metrics table as part of the Feature Store" + schema_file = "user_lookback_metrics.json" + }, + user_rolling_window_lifetime_metrics = { + dataset = "feature_store" + name = "user_rolling_window_lifetime_metrics" + table_description = "User Rolling Window Lifetime Metrics table as part of the Feature Store for the Customer Lifetime Value use case" + schema_file = "user_rolling_window_lifetime_metrics.json" + }, + user_rolling_window_metrics = { + dataset = "feature_store" + name = "user_rolling_window_metrics" + table_description = "User Rolling Window Metrics table as part of the Feature Store for Purchase Propensity use case" + schema_file = "user_rolling_window_metrics.json" + }, + user_scoped_lifetime_metrics = { + dataset = "feature_store" + name = "user_scoped_lifetime_metrics" + table_description = "User Scoped Lifetime Metrics table as part of the Feature Store for the Customer Lifetime Value use case" + schema_file = "user_scoped_lifetime_metrics.json" + }, + user_scoped_metrics = { + dataset = "feature_store" + name = "user_scoped_metrics" + table_description = "User Scoped Metrics table as part of the Feature Store for the Purchase Propensity use case" + schema_file = "user_scoped_metrics.json" + }, + user_scoped_segmentation_metrics = { + dataset = "feature_store" + name = "user_scoped_segmentation_metrics" + table_description = "User Scoped Segmentation Metrics table as part of the Feature Store for Audience Segmentation use case" + schema_file = "user_scoped_segmentation_metrics.json" + }, + user_segmentation_dimensions = { + dataset = "feature_store" + name = "user_segmentation_dimensions" + table_description = "User Segmentation Dimensions table as part of the Feature Store for Audience Segmentation use case" + schema_file = "user_segmentation_dimensions.json" + }, + user_session_event_aggregated_metrics = { + dataset = "feature_store" + name = "user_session_event_aggregated_metrics" + table_description = "User Session Event Aggregated Metrics table as part of the Feature Store" + schema_file = "user_session_event_aggregated_metrics.json" + }, + vbb_weights = { + dataset = "aggregated_vbb" + name = "vbb_weights" + table_description = "Aggregated Value Based Bidding weights table" + schema_file = "vbb_weights.json" + }, + aggregated_value_based_bidding_correlation = { + dataset = "aggregated_vbb" + name = "aggregated_value_based_bidding_correlation" + table_description = "Aggregated Value Based Bidding correlation table" + schema_file = "aggregated_value_based_bidding_correlation.json" + }, + aggregated_value_based_bidding_volume_daily = { + dataset = "aggregated_vbb" + name = "aggregated_value_based_bidding_volume_daily" + table_description = "Aggregated Value Based Bidding daily volume table" + schema_file = "aggregated_value_based_bidding_volume_daily.json" + }, + aggregated_value_based_bidding_volume_weekly = { + dataset = "aggregated_vbb" + name = "aggregated_value_based_bidding_volume_weekly" + table_description = "Aggregated Value Based Bidding weekly volume table" + schema_file = "aggregated_value_based_bidding_volume_weekly.json" + }, + aggregated_predictions_latest = { + dataset = "aggregated_predictions" + name = "latest" + table_description = "Stores aggregated predictions generated by the predictions pipelines." + schema_file = "aggregated_predictions_latest.json" + }, + aggregated_user_predictions = { + dataset = "aggregated_predictions" + name = "user_predictions" + table_description = "Stores aggregated predictions per user generated by the predictions pipelines." + schema_file = "aggregated_predictions_per_user.json" + }, + user_behaviour_revenue_insights_monthly = { + dataset = "gemini_insights" + name = "user_behaviour_revenue_insights_monthly" + table_description = "User Behaviour Revenue monthly insights" + schema_file = "user_behaviour_revenue_insights_monthly.json" + }, + user_behaviour_revenue_insights_weekly = { + dataset = "gemini_insights" + name = "user_behaviour_revenue_insights_weekly" + table_description = "User Behaviour Revenue weekly insights" + schema_file = "user_behaviour_revenue_insights_weekly.json" + }, + user_behaviour_revenue_insights_daily = { + dataset = "gemini_insights" + name = "user_behaviour_revenue_insights_daily" + table_description = "User Behaviour Revenue daily insights" + schema_file = "user_behaviour_revenue_insights_daily.json" + } + }) +} + +resource "google_bigquery_table" "tables" { + for_each = local.tables + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["${each.value.dataset}"].dataset_id + table_id = each.value.name + description = each.value.table_description + deletion_protection = false + labels = { + version = "prod" + } + schema = file("${local.sql_dir}/schema/table/${each.value.schema_file}") } - -# This resource creates a BigQuery table named user_lookback_metrics -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "user_lookback_metrics" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.user_lookback_metrics.table_name - description = local.config_bigquery.table.user_lookback_metrics.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/user_lookback_metrics.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named user_rolling_window_lifetime_metrics -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "user_rolling_window_lifetime_metrics" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.user_rolling_window_lifetime_metrics.table_name - description = local.config_bigquery.table.user_rolling_window_lifetime_metrics.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/user_rolling_window_lifetime_metrics.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named user_rolling_window_metrics -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "user_rolling_window_metrics" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.user_rolling_window_metrics.table_name - description = local.config_bigquery.table.user_rolling_window_metrics.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/user_rolling_window_metrics.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named user_scoped_lifetime_metrics -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "user_scoped_lifetime_metrics" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.user_scoped_lifetime_metrics.table_name - description = local.config_bigquery.table.user_scoped_lifetime_metrics.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/user_scoped_lifetime_metrics.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named user_scoped_metrics -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "user_scoped_metrics" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.user_scoped_metrics.table_name - description = local.config_bigquery.table.user_scoped_metrics.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/user_scoped_metrics.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named user_scoped_segmentation_metrics -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "user_scoped_segmentation_metrics" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.user_scoped_segmentation_metrics.table_name - description = local.config_bigquery.table.user_scoped_segmentation_metrics.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/user_scoped_segmentation_metrics.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named user_segmentation_dimensions -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "user_segmentation_dimensions" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.user_segmentation_dimensions.table_name - description = local.config_bigquery.table.user_segmentation_dimensions.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/user_segmentation_dimensions.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - -# This resource creates a BigQuery table named user_session_event_aggregated_metrics -# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id. -resource "google_bigquery_table" "user_session_event_aggregated_metrics" { - project = google_bigquery_dataset.feature_store.project - dataset_id = google_bigquery_dataset.feature_store.dataset_id - table_id = local.config_bigquery.table.user_session_event_aggregated_metrics.table_name - description = local.config_bigquery.table.user_session_event_aggregated_metrics.table_description - - # The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted. - deletion_protection = false - labels = { - version = "prod" - } - - # The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file. - schema = file("${local.sql_dir}/schema/table/user_session_event_aggregated_metrics.json") - - # The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore - # any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed. - lifecycle { - ignore_changes = all - prevent_destroy = true - } -} - - diff --git a/infrastructure/terraform/modules/feature-store/main.tf b/infrastructure/terraform/modules/feature-store/main.tf index 6c556f68..85e71741 100644 --- a/infrastructure/terraform/modules/feature-store/main.tf +++ b/infrastructure/terraform/modules/feature-store/main.tf @@ -12,25 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -data "local_file" "config_vars" { - filename = var.config_file_path -} - locals { - config_vars = yamldecode(data.local_file.config_vars.content) - config_bigquery = local.config_vars.bigquery - feature_store_project_id = local.config_vars.bigquery.dataset.feature_store.project_id - sql_dir = var.sql_dir_input - poetry_run_alias = "${var.poetry_cmd} run" - builder_repository_id = "marketing-analytics-jumpstart-base-repo" - purchase_propensity_project_id = null_resource.check_bigquery_api.id != "" ? local.config_vars.bigquery.dataset.purchase_propensity.project_id : local.feature_store_project_id - churn_propensity_project_id = null_resource.check_bigquery_api.id != "" ? local.config_vars.bigquery.dataset.churn_propensity.project_id : local.feature_store_project_id - audience_segmentation_project_id = null_resource.check_bigquery_api.id != "" ? local.config_vars.bigquery.dataset.audience_segmentation.project_id : local.feature_store_project_id - auto_audience_segmentation_project_id = null_resource.check_bigquery_api.id != "" ? local.config_vars.bigquery.dataset.auto_audience_segmentation.project_id : local.feature_store_project_id - aggregated_vbb_project_id = null_resource.check_bigquery_api.id != "" ? local.config_vars.bigquery.dataset.aggregated_vbb.project_id : local.feature_store_project_id - customer_lifetime_value_project_id = null_resource.check_bigquery_api.id != "" ? local.config_vars.bigquery.dataset.customer_lifetime_value.project_id : local.feature_store_project_id - aggregate_predictions_project_id = null_resource.check_bigquery_api.id != "" ? local.config_vars.bigquery.dataset.aggregated_predictions.project_id : local.feature_store_project_id - gemini_insights_project_id = null_resource.check_bigquery_api.id != "" ? local.config_vars.bigquery.dataset.gemini_insights.project_id : local.feature_store_project_id + sql_dir = var.sql_dir_input + poetry_run_alias = "${var.poetry_cmd} run" } module "project_services" { @@ -40,7 +24,7 @@ module "project_services" { disable_dependent_services = true disable_services_on_destroy = false - project_id = local.feature_store_project_id + project_id = var.project_id activate_apis = [ "artifactregistry.googleapis.com", @@ -114,8 +98,8 @@ resource "null_resource" "check_aiplatform_api" { ## Note: The cloud resource nested object has only one output only field - serviceAccountId. resource "google_bigquery_connection" "vertex_ai_connection" { connection_id = "vertex_ai" - project = null_resource.check_aiplatform_api.id != "" ? module.project_services.project_id : local.feature_store_project_id - location = local.config_bigquery.region + project = null_resource.check_aiplatform_api.id != "" ? module.project_services.project_id : var.project_id + location = var.data_location cloud_resource {} } @@ -128,7 +112,7 @@ resource "google_project_iam_member" "vertex_ai_connection_sa_roles" { google_bigquery_connection.vertex_ai_connection ] - project = null_resource.check_aiplatform_api.id != "" ? module.project_services.project_id : local.feature_store_project_id + project = null_resource.check_aiplatform_api.id != "" ? module.project_services.project_id : var.project_id member = "serviceAccount:${google_bigquery_connection.vertex_ai_connection.cloud_resource[0].service_account_id}" for_each = toset([ diff --git a/infrastructure/terraform/modules/feature-store/variables.tf b/infrastructure/terraform/modules/feature-store/variables.tf index d20b92b7..92653739 100644 --- a/infrastructure/terraform/modules/feature-store/variables.tf +++ b/infrastructure/terraform/modules/feature-store/variables.tf @@ -12,11 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -variable "config_file_path" { - type = string - description = "feature store config file" -} - variable "enabled" { type = bool description = "Toogle all resources in module" @@ -42,3 +37,8 @@ variable "poetry_cmd" { type = string default = "poetry" } + +variable "data_location" { + description = "Location of the BigQuery datasets" + type = string +} diff --git a/sql/procedure/user_predictions_aggregation.sqlx b/sql/procedure/user_predictions_aggregation.sqlx new file mode 100644 index 00000000..600118ba --- /dev/null +++ b/sql/procedure/user_predictions_aggregation.sqlx @@ -0,0 +1,59 @@ +CREATE OR REPLACE FUNCTION `{{project_id}}.{{dataset_id}}.dataset_exists`(dataset_name STRING) +RETURNS BOOL AS (( SELECT COUNT(*) > 0 FROM `{{project_id}}`.INFORMATION_SCHEMA.SCHEMATA WHERE schema_name = dataset_name )); + +CREATE OR REPLACE FUNCTION `{{project_id}}.{{dataset_id}}.create_update_field_values_str`( + fields ARRAY, + t_alias STRING, + s_alias STRING) +RETURNS STRING AS (( + SELECT ARRAY_TO_STRING(( + SELECT ARRAY( + SELECT CONCAT(t_alias, '.', element, '=', s_alias,'.', element) + FROM UNNEST(fields) AS element) ), ',') )); + +CREATE OR REPLACE FUNCTION `{{project_id}}.{{dataset_id}}.create_insert_field_values_str`( + fields ARRAY, + t_alias STRING) +RETURNS STRING AS (( + SELECT ARRAY_TO_STRING(( + SELECT ARRAY( + SELECT CONCAT(t_alias, '.', element) + FROM UNNEST(fields) AS element) ), ',') )); + +CREATE OR REPLACE PROCEDURE + `{{project_id}}.{{dataset_id}}.load_latest_prediction`( + use_case_dataset STRING, + prediction_table_pattern STRING, + prediction_fields STRING, + update_fields ARRAY) +BEGIN + DECLARE prediction_table_name, value_select_query, update_set_fields, insert_value_fields, insert_source_fields, merge_query STRING; + IF `{{project_id}}.{{dataset_id}}.dataset_exists`(use_case_dataset) THEN + CALL {{dataset_id}}.get_latest_table_by_pattern(use_case_dataset, prediction_table_pattern, prediction_table_name); + IF NOT prediction_table_name = '' THEN + SET value_select_query = FORMAT(""" + CREATE OR REPLACE TEMP TABLE prediction_value_tmp_table AS + SELECT + user_pseudo_id, + user_id, + %s + FROM `%s`; + """, prediction_fields, prediction_table_name); + EXECUTE IMMEDIATE value_select_query; + SET update_set_fields = `{{project_id}}.{{dataset_id}}.create_update_field_values_str`(update_fields,'T','S'); + SET insert_value_fields = ARRAY_TO_STRING(update_fields, ','); + SET insert_source_fields = `{{project_id}}.{{dataset_id}}.create_insert_field_values_str`(update_fields,'S'); + SET merge_query = FORMAT(""" + MERGE `{{project_id}}.{{dataset_id}}.{{table_id}}` T + USING prediction_value_tmp_table S + ON T.user_pseudo_id = S.user_pseudo_id AND COALESCE(T.user_id, "") = COALESCE(S.user_id, "") + WHEN MATCHED THEN UPDATE SET %s + WHEN NOT MATCHED THEN INSERT + ( user_pseudo_id, user_id, %s ) + VALUES + ( S.user_pseudo_id, S.user_id, %s ) + """, update_set_fields, insert_value_fields, insert_source_fields); + EXECUTE IMMEDIATE merge_query; + END IF; + END IF; +END; diff --git a/sql/schema/table/aggregated_predictions_per_user.json b/sql/schema/table/aggregated_predictions_per_user.json new file mode 100644 index 00000000..f90df718 --- /dev/null +++ b/sql/schema/table/aggregated_predictions_per_user.json @@ -0,0 +1,58 @@ +[ + { + "name": "user_pseudo_id", + "type": "STRING" + }, + { + "name": "user_id", + "type": "STRING" + }, + { + "name": "ltv_prediction", + "type": "FLOAT" + }, + { + "name": "ltv_decile", + "type": "INTEGER" + }, + { + "name": "ltv_prediction_timestamp", + "type": "TIMESTAMP" + }, + { + "name": "propensity_prediction", + "type": "STRING" + }, + { + "name": "propensity_score", + "type": "FLOAT" + }, + { + "name": "propensity_decile", + "type": "INTEGER" + }, + { + "name": "propensity_prediction_timestamp", + "type": "TIMESTAMP" + }, + { + "name": "segment_prediction", + "type": "INTEGER" + }, + { + "name": "segment_distance", + "type": "FLOAT" + }, + { + "name": "segment_prediction_timestamp", + "type": "TIMESTAMP" + }, + { + "name": "auto_segment_prediction", + "type": "STRING" + }, + { + "name": "auto_segment_prediction_timestamp", + "type": "TIMESTAMP" + } +] \ No newline at end of file From 7b760f950a9e46d36ae97143e4dc45d34205c393 Mon Sep 17 00:00:00 2001 From: Charlie Wang Date: Thu, 11 Jul 2024 07:30:00 +0000 Subject: [PATCH 2/7] add fields for churn predictions --- .../table/aggregated_predictions_per_user.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sql/schema/table/aggregated_predictions_per_user.json b/sql/schema/table/aggregated_predictions_per_user.json index f90df718..dfdb809c 100644 --- a/sql/schema/table/aggregated_predictions_per_user.json +++ b/sql/schema/table/aggregated_predictions_per_user.json @@ -54,5 +54,21 @@ { "name": "auto_segment_prediction_timestamp", "type": "TIMESTAMP" + }, + { + "name": "churn_propensity_prediction", + "type": "STRING" + }, + { + "name": "churn_propensity_score", + "type": "FLOAT" + }, + { + "name": "churn_propensity_decile", + "type": "INTEGER" + }, + { + "name": "churn_propensity_prediction_timestamp", + "type": "TIMESTAMP" } ] \ No newline at end of file From 37ba099860248cbfa668f054add83fbee76da8f0 Mon Sep 17 00:00:00 2001 From: Charlie Wang Date: Thu, 11 Jul 2024 11:26:37 +0000 Subject: [PATCH 3/7] add feature date column --- .../table/aggregated_predictions_per_user.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sql/schema/table/aggregated_predictions_per_user.json b/sql/schema/table/aggregated_predictions_per_user.json index dfdb809c..d377969e 100644 --- a/sql/schema/table/aggregated_predictions_per_user.json +++ b/sql/schema/table/aggregated_predictions_per_user.json @@ -15,6 +15,10 @@ "name": "ltv_decile", "type": "INTEGER" }, + { + "name": "ltv_feature_date", + "type": "DATE" + }, { "name": "ltv_prediction_timestamp", "type": "TIMESTAMP" @@ -31,6 +35,10 @@ "name": "propensity_decile", "type": "INTEGER" }, + { + "name": "propensity_feature_date", + "type": "DATE" + }, { "name": "propensity_prediction_timestamp", "type": "TIMESTAMP" @@ -43,6 +51,10 @@ "name": "segment_distance", "type": "FLOAT" }, + { + "name": "segment_feature_date", + "type": "DATE" + }, { "name": "segment_prediction_timestamp", "type": "TIMESTAMP" @@ -67,6 +79,10 @@ "name": "churn_propensity_decile", "type": "INTEGER" }, + { + "name": "churn_propensity_feature_date", + "type": "DATE" + }, { "name": "churn_propensity_prediction_timestamp", "type": "TIMESTAMP" From 8d9bb1cf24a03e3020e8bf589bf556c7b130561c Mon Sep 17 00:00:00 2001 From: Charlie Wang Date: Thu, 11 Jul 2024 12:04:46 +0000 Subject: [PATCH 4/7] invoke the load latest prediction procedure for all use cases --- config/config.yaml.tftpl | 5 ++ .../feature-store/bigquery-procedures.tf | 14 ++++++ sql/query/invoke_load_latest_prediction.sqlx | 48 +++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 sql/query/invoke_load_latest_prediction.sqlx diff --git a/config/config.yaml.tftpl b/config/config.yaml.tftpl index cb71088c..119e62be 100644 --- a/config/config.yaml.tftpl +++ b/config/config.yaml.tftpl @@ -1945,6 +1945,11 @@ bigquery: # The `interval_end_date` parameter defines how many days we leave out of the backfill before the last dates of events. # This is usually the same value as the look forward window. interval_end_date: 30 + # This is a stored procedure that CALLs the Load Latest Prediction stored procedure. + invoke_load_latest_prediction: + project_id: "${project_id}" + dataset: "aggregated_predictions" + stored_procedure: "load_latest_prediction" # This section sets the parameters for the features, training and inference procedures that insert data into tables and views to be used for # training and prediction. # There is no strict recommendation on the right parameters that will maximize the models performance, however here are some back of the envelope numbers. diff --git a/infrastructure/terraform/modules/feature-store/bigquery-procedures.tf b/infrastructure/terraform/modules/feature-store/bigquery-procedures.tf index f3b52351..4ea00aa4 100644 --- a/infrastructure/terraform/modules/feature-store/bigquery-procedures.tf +++ b/infrastructure/terraform/modules/feature-store/bigquery-procedures.tf @@ -1529,4 +1529,18 @@ resource "google_bigquery_routine" "invoke_user_behaviour_revenue_insights" { depends_on = [ null_resource.check_gemini_model_exists ] +} + +data "local_file" "invoke_load_latest_prediction_file" { + filename = "${local.sql_dir}/query/invoke_load_latest_prediction.sql" +} + +resource "google_bigquery_routine" "invoke_load_latest_prediction" { + project = var.project_id + dataset_id = google_bigquery_dataset.datasets["aggregated_predictions"].dataset_id + routine_id = "invoke_load_latest_prediction" + routine_type = "PROCEDURE" + language = "SQL" + definition_body = data.local_file.invoke_load_latest_prediction_file.content + description = "Load latest predictions into the user_predictions table for all the use cases" } \ No newline at end of file diff --git a/sql/query/invoke_load_latest_prediction.sqlx b/sql/query/invoke_load_latest_prediction.sqlx new file mode 100644 index 00000000..31e16b40 --- /dev/null +++ b/sql/query/invoke_load_latest_prediction.sqlx @@ -0,0 +1,48 @@ +-- Copyright 2023 Google LLC +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +CALL `{{project_id}}.{{dataset_id}}.{{stored_procedure}}( + 'purchase_propensity', + 'predictions_%_view', + 'prediction AS propensity_prediction, prediction_prob AS propensity_score, NTILE(10) OVER (ORDER BY prediction_prob DESC) AS propensity_decile, feature_date AS propensity_feature_date, processed_timestamp AS propensity_prediction_timestamp', + ['propensity_prediction','propensity_score', 'propensity_decile', 'propensity_feature_date', 'propensity_prediction_timestamp'] +); + +CALL `{{project_id}}.{{dataset_id}}.{{stored_procedure}}`( + 'customer_lifetime_value', + 'predictions_%_view_final', + 'prediction AS ltv_prediction, NTILE(10) OVER (ORDER BY prediction DESC) AS ltv_decile, feature_date AS ltv_feature_date, processed_timestamp AS ltv_prediction_timestamp', + ['ltv_prediction','ltv_decile','ltv_feature_date','ltv_prediction_timestamp'] +); + +CALL `{{project_id}}.{{dataset_id}}.{{stored_procedure}}`( + 'audience_segmentation', + 'pred_%_view', + 'prediction AS segment_prediction, NEAREST_CENTROIDS_DISTANCE[SAFE_OFFSET(0)].CENTROID_ID AS segment_distance, feature_date AS segment_feature_date, processed_timestamp AS segment_prediction_timestamp', + ['segment_prediction','segment_distance','segment_feature_date','segment_prediction_timestamp'] +); + +CALL `{{project_id}}.{{dataset_id}}.{{stored_procedure}}`( + 'auto_audience_segmentation', + 'predictions_%', + 'prediction AS auto_segment_prediction, feature_timestamp AS auto_segment_prediction_timestamp', + ['auto_segment_prediction','auto_segment_prediction_timestamp'] +); + +CALL `{{project_id}}.{{dataset_id}}.{{stored_procedure}}`( + 'churn_propensity', + 'predictions_%_view', + 'prediction AS churn_propensity_prediction, prediction_prob AS churn_propensity_score, NTILE(10) OVER (ORDER BY prediction_prob DESC) AS churn_propensity_decile, feature_date AS churn_propensity_feature_date, processed_timestamp AS churn_propensity_prediction_timestamp', + ['churn_propensity_prediction','churn_propensity_score', 'churn_propensity_decile','churn_propensity_feature_date','churn_propensity_prediction_timestamp'] +); \ No newline at end of file From 9799e709ca9f3adf9ee46269ba926a3fa92823b2 Mon Sep 17 00:00:00 2001 From: Charlie Wang Date: Thu, 11 Jul 2024 12:09:32 +0000 Subject: [PATCH 5/7] correct place holder --- sql/query/invoke_load_latest_prediction.sqlx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/query/invoke_load_latest_prediction.sqlx b/sql/query/invoke_load_latest_prediction.sqlx index 31e16b40..29b220e3 100644 --- a/sql/query/invoke_load_latest_prediction.sqlx +++ b/sql/query/invoke_load_latest_prediction.sqlx @@ -12,35 +12,35 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -CALL `{{project_id}}.{{dataset_id}}.{{stored_procedure}}( +CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}( 'purchase_propensity', 'predictions_%_view', 'prediction AS propensity_prediction, prediction_prob AS propensity_score, NTILE(10) OVER (ORDER BY prediction_prob DESC) AS propensity_decile, feature_date AS propensity_feature_date, processed_timestamp AS propensity_prediction_timestamp', ['propensity_prediction','propensity_score', 'propensity_decile', 'propensity_feature_date', 'propensity_prediction_timestamp'] ); -CALL `{{project_id}}.{{dataset_id}}.{{stored_procedure}}`( +CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`( 'customer_lifetime_value', 'predictions_%_view_final', 'prediction AS ltv_prediction, NTILE(10) OVER (ORDER BY prediction DESC) AS ltv_decile, feature_date AS ltv_feature_date, processed_timestamp AS ltv_prediction_timestamp', ['ltv_prediction','ltv_decile','ltv_feature_date','ltv_prediction_timestamp'] ); -CALL `{{project_id}}.{{dataset_id}}.{{stored_procedure}}`( +CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`( 'audience_segmentation', 'pred_%_view', 'prediction AS segment_prediction, NEAREST_CENTROIDS_DISTANCE[SAFE_OFFSET(0)].CENTROID_ID AS segment_distance, feature_date AS segment_feature_date, processed_timestamp AS segment_prediction_timestamp', ['segment_prediction','segment_distance','segment_feature_date','segment_prediction_timestamp'] ); -CALL `{{project_id}}.{{dataset_id}}.{{stored_procedure}}`( +CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`( 'auto_audience_segmentation', 'predictions_%', 'prediction AS auto_segment_prediction, feature_timestamp AS auto_segment_prediction_timestamp', ['auto_segment_prediction','auto_segment_prediction_timestamp'] ); -CALL `{{project_id}}.{{dataset_id}}.{{stored_procedure}}`( +CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`( 'churn_propensity', 'predictions_%_view', 'prediction AS churn_propensity_prediction, prediction_prob AS churn_propensity_score, NTILE(10) OVER (ORDER BY prediction_prob DESC) AS churn_propensity_decile, feature_date AS churn_propensity_feature_date, processed_timestamp AS churn_propensity_prediction_timestamp', From 0ebde758bd168c21f5032581bca27503047b4afd Mon Sep 17 00:00:00 2001 From: Charlie Wang Date: Thu, 11 Jul 2024 12:13:00 +0000 Subject: [PATCH 6/7] fix query --- sql/query/invoke_load_latest_prediction.sqlx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/query/invoke_load_latest_prediction.sqlx b/sql/query/invoke_load_latest_prediction.sqlx index 29b220e3..d41020e4 100644 --- a/sql/query/invoke_load_latest_prediction.sqlx +++ b/sql/query/invoke_load_latest_prediction.sqlx @@ -12,7 +12,7 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}( +CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`( 'purchase_propensity', 'predictions_%_view', 'prediction AS propensity_prediction, prediction_prob AS propensity_score, NTILE(10) OVER (ORDER BY prediction_prob DESC) AS propensity_decile, feature_date AS propensity_feature_date, processed_timestamp AS propensity_prediction_timestamp', From b2995259c8321d7f75b32193c3b0dd4f5e71be91 Mon Sep 17 00:00:00 2001 From: Charlie Wang Date: Thu, 11 Jul 2024 12:21:55 +0000 Subject: [PATCH 7/7] new field for the aggregated prediction table --- .../table/aggregated_predictions_latest.json | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/sql/schema/table/aggregated_predictions_latest.json b/sql/schema/table/aggregated_predictions_latest.json index ad33d32c..cd7a89af 100644 --- a/sql/schema/table/aggregated_predictions_latest.json +++ b/sql/schema/table/aggregated_predictions_latest.json @@ -698,5 +698,53 @@ "name": "Auto Segment_Distance", "type": "FLOAT", "description": "The segment distance for that user predicted by the auto audience segmentation model" + }, + { + "name": "homepage", + "type": "INTEGER" + }, + { + "name": "Google_Redesign_Clearance", + "type": "INTEGER" + }, + { + "name": "Google_Redesign_Apparel_Mens_sortci_newest_desc", + "type": "INTEGER" + }, + { + "name": "basket_html", + "type": "INTEGER" + }, + { + "name": "Google_Redesign_New_sortci_newest_desc", + "type": "INTEGER" + }, + { + "name": "Google_Redesign_Lifestyle_Bags", + "type": "INTEGER" + }, + { + "name": "store_html", + "type": "INTEGER" + }, + { + "name": "Google_Redesign_Lifestyle_Drinkware", + "type": "INTEGER" + }, + { + "name": "signin_html", + "type": "INTEGER" + }, + { + "name": "Google_Redesign_Apparel_sortci_newest_desc", + "type": "INTEGER" + }, + { + "name": "Google_Redesign_Apparel_Womens", + "type": "INTEGER" + }, + { + "name": "Google_Redesign_Stationery_sortci_newest_desc", + "type": "INTEGER" } ] \ No newline at end of file