diff --git a/kustomizations/apps/data-hub-configs/kubernetes-pipeline--stg.config.yaml b/kustomizations/apps/data-hub-configs/kubernetes-pipeline--stg.config.yaml index 6135cb84c..a7d5c2bbe 100644 --- a/kustomizations/apps/data-hub-configs/kubernetes-pipeline--stg.config.yaml +++ b/kustomizations/apps/data-hub-configs/kubernetes-pipeline--stg.config.yaml @@ -944,6 +944,59 @@ kubernetesPipelines: memory: 500Mi cpu: 100m + - dataPipelineId: 'Web_API_biorxiv_medrxiv_meca_path_metadata_v2_Kubernetes' + airflow: + dagParameters: + schedule: null + max_active_runs: 1 + tags: + - 'Kubernetes' + - 'DocMaps' + - 'Web API' + image: '${data_hub_core_dags_unstable_image_repo}:${data_hub_core_dags_unstable_image_tag}' + imagePullPolicy: Always + arguments: + - 'python' + - '-m' + - 'data_pipeline.generic_web_api.cli' + - '--data-pipeline-id=biorxiv_medrxiv_meca_path_metadata_v2' + env: + - name: DEPLOYMENT_ENV + value: '{ENV}' + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /dag_secret_files/gcloud/credentials.json + - name: AWS_CONFIG_FILE + value: /dag_secret_files/aws/credentials + - name: WEB_API_CONFIG_FILE_PATH + value: /dag_config_files/web-api-data-pipeline.config.yaml + volumeMounts: + - name: gcloud-secret-volume + mountPath: /dag_secret_files/gcloud/ + readOnly: true + - name: aws-secret-volume + mountPath: /dag_secret_files/aws + readOnly: true + - name: data-hub-config-volume + mountPath: /dag_config_files/ + readOnly: true + volumes: + - name: aws-secret-volume + secret: + secretName: credentials + - name: gcloud-secret-volume + secret: + secretName: gcloud + - name: data-hub-config-volume + configMap: + name: data-hub-configs + resources: + limits: + memory: 600Mi + cpu: 1000m + requests: + memory: 500Mi + cpu: 100m + - dataPipelineId: 'Web_API_biorxiv_medrxiv_meca_path_metadata_latest_Kubernetes' airflow: dagParameters: diff --git a/kustomizations/apps/data-hub-configs/web-api-data-pipeline.config.yaml b/kustomizations/apps/data-hub-configs/web-api-data-pipeline.config.yaml index 5ae3cc03b..1c11caa8e 100644 --- a/kustomizations/apps/data-hub-configs/web-api-data-pipeline.config.yaml +++ b/kustomizations/apps/data-hub-configs/web-api-data-pipeline.config.yaml @@ -440,6 +440,37 @@ webApi: - 'Web API' - 'DocMaps' + - dataPipelineId: biorxiv_medrxiv_meca_path_metadata_v2 + dataset: '{ENV}' + table: biorxiv_medrxiv_meca_path_metadata_v2 + source: + include: + bigQuery: + projectName: 'elife-data-pipeline' + sqlQuery: |- + SELECT DISTINCT + CONCAT(manuscript_version.preprint_doi, 'v', manuscript_version.preprint_version) AS biorxiv_versioned_doi + FROM `elife-data-pipeline.{ENV}.mv_docmaps_index` + JOIN UNNEST(manuscript_versions) AS manuscript_version + WHERE STARTS_WITH(manuscript_version.preprint_doi , '10.1101') + AND manuscript_version.preprint_version IS NOT NULL + exclude: + bigQuery: + ignoreNotFound: true + projectName: 'elife-data-pipeline' + sqlQuery: + SELECT + CONCAT(meca_path_metadata.tdm_doi, 'v', meca_path_metadata.ms_version) AS biorxiv_versioned_doi + FROM `elife-data-pipeline.{ENV}.biorxiv_medrxiv_meca_path_metadata_v2` + JOIN UNNEST(results) AS meca_path_metadata + keyFieldNameFromInclude: 'biorxiv_versioned_doi' + urlSourceType: + name: 'crossref_metadata_api' + dataUrl: + urlExcludingConfigurableParameters: https://api.biorxiv.org/meca_index_v2/elife/{biorxiv_versioned_doi} + response: + provenanceEnabled: True + # bioRxiv/medRxiv apis # bioRxiv api - dataPipelineId: biorxiv_api