Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/apache/beam into fix-meta…
Browse files Browse the repository at this point in the history
…data-index
  • Loading branch information
thiagotnunes committed Oct 9, 2024
2 parents 5049588 + 2ee6100 commit e5e49a3
Show file tree
Hide file tree
Showing 103 changed files with 5,108 additions and 1,141 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"https://github.com/apache/beam/pull/31156": "noting that PR #31156 should run this test"
"modification": 1
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 0
"modification": 1
}
2 changes: 1 addition & 1 deletion .github/trigger_files/beam_PostCommit_Java_PVR_Samza.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 0
"modification": 1
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 0
"modification": 1
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 0
"modification": 1
}
4 changes: 2 additions & 2 deletions .github/workflows/beam_LoadTests_Go_CoGBK_Flink_batch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-go-cogbk-flink-batch-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-cogbk-flink-batch-${{ github.run_id }}

jobs:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/beam_LoadTests_Go_Combine_Flink_Batch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-go-combine-flink-batch-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-combine-flink-batch-${{ github.run_id }}

jobs:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/beam_LoadTests_Go_GBK_Flink_Batch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-go-gbk-flink-batch-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-gbk-flink-batch-${{ github.run_id }}

jobs:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/beam_LoadTests_Go_ParDo_Flink_Batch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-go-pardo-flink-batch-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-pardo-flink-batch-${{ github.run_id }}

jobs:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/beam_LoadTests_Go_SideInput_Flink_Batch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-go-sideinput-flink-batch-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-sideinput-flink-batch-${{ github.run_id }}

jobs:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-py-cogbk-flink-batch-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-cogbk-flink-batch-${{ github.run_id }}

jobs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-py-cmb-flink-batch-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-py-cmb-flink-batch-${{ github.run_id }}

jobs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-py-cmb-flink-streaming-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-py-cmb-flink-streaming-${{ github.run_id }}

jobs:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/beam_LoadTests_Python_GBK_Flink_Batch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-py-gbk-flk-batch-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-py-gbk-flk-batch-${{ github.run_id }}

jobs:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/beam_LoadTests_Python_ParDo_Flink_Batch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-py-pardo-flink-batch-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-pardo-flink-batch-${{ github.run_id }}

jobs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ env:
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-py-pardo-flink-stream-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-pardo-flink-stream-${{ github.run_id }}

jobs:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/beam_Publish_Docker_Snapshots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
- name: run Publish Docker Snapshots script for Flink
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :runners:flink:1.15:job-server-container:dockerPush
gradle-command: :runners:flink:1.17:job-server-container:dockerPush
arguments: |
-Pdocker-repository-root=gcr.io/apache-beam-testing/beam_portability \
-Pdocker-tag-list=latest
10 changes: 5 additions & 5 deletions .test-infra/dataproc/flink_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# Provide the following environment to run this script:
#
# GCLOUD_ZONE: Google cloud zone. Optional. Default: "us-central1-a"
# DATAPROC_VERSION: Dataproc version. Optional. Default: 2.1
# DATAPROC_VERSION: Dataproc version. Optional. Default: 2.2
# CLUSTER_NAME: Cluster name
# GCS_BUCKET: GCS bucket url for Dataproc resources (init actions)
# HARNESS_IMAGES_TO_PULL: Urls to SDK Harness' images to pull on dataproc workers (optional: 0, 1 or multiple urls for every harness image)
Expand All @@ -35,8 +35,8 @@
# HARNESS_IMAGES_TO_PULL='gcr.io/<IMAGE_REPOSITORY>/python:latest gcr.io/<IMAGE_REPOSITORY>/java:latest' \
# JOB_SERVER_IMAGE=gcr.io/<IMAGE_REPOSITORY>/job-server-flink:latest \
# ARTIFACTS_DIR=gs://<bucket-for-artifacts> \
# FLINK_DOWNLOAD_URL=https://archive.apache.org/dist/flink/flink-1.12.3/flink-1.12.3-bin-scala_2.11.tgz \
# HADOOP_DOWNLOAD_URL=https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-9.0/flink-shaded-hadoop-2-uber-2.8.3-9.0.jar \
# FLINK_DOWNLOAD_URL=https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz \
# HADOOP_DOWNLOAD_URL=https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-9.0.jar \
# FLINK_NUM_WORKERS=2 \
# FLINK_TASKMANAGER_SLOTS=1 \
# DETACHED_MODE=false \
Expand All @@ -46,7 +46,7 @@ set -Eeuxo pipefail

# GCloud properties
GCLOUD_ZONE="${GCLOUD_ZONE:=us-central1-a}"
DATAPROC_VERSION="${DATAPROC_VERSION:=2.1-debian}"
DATAPROC_VERSION="${DATAPROC_VERSION:=2.2-debian}"
GCLOUD_REGION=`echo $GCLOUD_ZONE | sed -E "s/(-[a-z])?$//"`

MASTER_NAME="$CLUSTER_NAME-m"
Expand Down Expand Up @@ -133,7 +133,7 @@ function create_cluster() {
# This is why flink init action is invoked last.
# TODO(11/11/2022) remove --worker-machine-type and --master-machine-type once N2 CPUs quota relaxed
# Dataproc 2.1 uses n2-standard-2 by default but there is N2 CPUs=24 quota limit
gcloud dataproc clusters create $CLUSTER_NAME --region=$GCLOUD_REGION --num-workers=$FLINK_NUM_WORKERS \
gcloud dataproc clusters create $CLUSTER_NAME --region=$GCLOUD_REGION --num-workers=$FLINK_NUM_WORKERS --public-ip-address \
--master-machine-type=n1-standard-2 --worker-machine-type=n1-standard-2 --metadata "${metadata}", \
--image-version=$image_version --zone=$GCLOUD_ZONE --optional-components=FLINK,DOCKER --quiet
}
Expand Down
2 changes: 1 addition & 1 deletion .test-infra/jenkins/CommonTestProperties.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class CommonTestProperties {
}

static String getFlinkVersion() {
return "1.15"
return "1.17"
}

static String getSparkVersion() {
Expand Down
120 changes: 0 additions & 120 deletions .test-infra/jenkins/Flink.groovy

This file was deleted.

2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
## New Features / Improvements

* X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)).
* Added support for processing events which use a global sequence to "ordered" extension (Java) [#32540](https://github.com/apache/beam/pull/32540)

## Breaking Changes

Expand All @@ -80,6 +81,7 @@
## Bugfixes

* Fixed X (Java/Python) ([#X](https://github.com/apache/beam/issues/X)).
* (Java) Fixed tearDown not invoked when DoFn throws on Portable Runners ([#18592](https://github.com/apache/beam/issues/18592), [#31381](https://github.com/apache/beam/issues/31381)).

## Security Fixes
* Fixed (CVE-YYYY-NNNN)[https://www.cve.org/CVERecord?id=CVE-YYYY-NNNN] (Java/Python/Go) ([#X](https://github.com/apache/beam/issues/X)).
Expand Down
Loading

0 comments on commit e5e49a3

Please sign in to comment.