From dd5a13f99c2e9a4b048dffdf555cb9622dff2ef5 Mon Sep 17 00:00:00 2001 From: Jeffrey Kinard Date: Mon, 25 Mar 2024 21:36:28 -0400 Subject: [PATCH] YamlTemplate speed improvements Signed-off-by: Jeffrey Kinard --- .github/scripts/startup-script.sh | 20 ++--- .../cloud/teleport/metadata/Template.java | 11 ++- .../plugin/PythonDockerfileGenerator.java | 2 +- .../plugin/YamlDockerfileGenerator.java | 17 +++- ...le-template => Dockerfile-template-python} | 0 .../main/resources/Dockerfile-template-yaml | 66 ++++++++++++++-- .../plugin/YamlDockerfileGeneratorTest.java | 79 +++++++++++++++++++ .../plugin/maven/TemplatesStageMojo.java | 59 +++++++++++--- .../templates/python/YAMLTemplate.java | 2 +- .../src/main/python/yaml-template/Dockerfile | 30 ------- python/src/main/python/yaml-template/main.py | 24 +----- .../python/yaml-template/requirements.txt | 1 - .../templates/python/YAMLTemplateIT.java | 3 - python/src/test/resources/YamlTemplateIT.yaml | 14 +++- .../templates/yaml/KafkaToBigQueryYaml.java | 2 +- yaml/src/main/python/main.py | 2 +- yaml/src/main/python/requirements.txt | 1 - 17 files changed, 237 insertions(+), 96 deletions(-) rename plugins/core-plugin/src/main/resources/{Dockerfile-template => Dockerfile-template-python} (100%) create mode 100644 plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/YamlDockerfileGeneratorTest.java delete mode 100644 python/src/main/python/yaml-template/Dockerfile delete mode 100644 python/src/main/python/yaml-template/requirements.txt delete mode 100644 yaml/src/main/python/requirements.txt diff --git a/.github/scripts/startup-script.sh b/.github/scripts/startup-script.sh index 7691baa90e..5ee8470b18 100644 --- a/.github/scripts/startup-script.sh +++ b/.github/scripts/startup-script.sh @@ -58,13 +58,13 @@ echo \ sudo apt-get update sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin -y -# add runner to docker group +# add user to docker group sudo groupadd docker -sudo gpasswd -a runner docker +sudo gpasswd -a $user docker -# create runner HOME -sudo mkdir /home/runner -sudo chown runner /home/runner +# create user HOME +sudo mkdir /home/$user +sudo chown $user /home/$user # access secrets from secretsmanager secrets=$(gcloud secrets versions access latest --secret="GITACTION_SECRET_NAME") @@ -79,12 +79,12 @@ done ACTIONS_RUNNER_INPUT_TOKEN="$(curl -sS --request POST --url "https://api.github.com/repos/${REPO_OWNER}/${REPO_NAME}/actions/runners/registration-token" --header "authorization: Bearer ${GITHUB_TOKEN}" --header 'content-type: application/json' | jq -r .token)" # create actions-runner directory -sudo -u runner bash -c "mkdir /home/runner/actions-runner" +sudo -u $user bash -c "mkdir /home/$user/actions-runner" # download and extract gitactions binary -sudo -u runner bash -c "cd /home/runner/actions-runner && curl -o actions-runner-linux-x64.tar.gz --location https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-x64-${GH_RUNNER_VERSION}.tar.gz" -sudo -u runner bash -c "cd /home/runner/actions-runner && tar -zxf ./actions-runner-linux-x64.tar.gz" +sudo -u $user bash -c "cd /home/$user/actions-runner && curl -o actions-runner-linux-x64.tar.gz --location https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-x64-${GH_RUNNER_VERSION}.tar.gz" +sudo -u $user bash -c "cd /home/$user/actions-runner && tar -zxf ./actions-runner-linux-x64.tar.gz" # configure and run gitactions runner -sudo -u runner bash -c "cd /home/runner/actions-runner && ./config.sh --url ${REPO_URL} --token ${ACTIONS_RUNNER_INPUT_TOKEN} --labels ${GITACTIONS_LABELS} --unattended" -sudo -u runner bash -c "cd /home/runner/actions-runner && ./run.sh &" +sudo -u $user bash -c "cd /home/$user/actions-runner && ./config.sh --url ${REPO_URL} --token ${ACTIONS_RUNNER_INPUT_TOKEN} --labels ${GITACTIONS_LABELS} --unattended" +sudo -u $user bash -c "cd /home/$user/actions-runner && ./run.sh &" diff --git a/metadata/src/main/java/com/google/cloud/teleport/metadata/Template.java b/metadata/src/main/java/com/google/cloud/teleport/metadata/Template.java index 81dea9b09d..8cb40d9b62 100644 --- a/metadata/src/main/java/com/google/cloud/teleport/metadata/Template.java +++ b/metadata/src/main/java/com/google/cloud/teleport/metadata/Template.java @@ -41,7 +41,7 @@ /** Container name to stage (required for Flex templates). */ String flexContainerName() default ""; - String yamlTemplateName() default ""; + String yamlTemplateFile() default ""; String xlangContainerName() default ""; @@ -113,6 +113,15 @@ enum TemplateType { /** Marker if the template is still in preview / pre-GA. */ boolean preview() default false; + /** + * Comma-separated list of files to include in Template image when building with Dockerfile. Only + * works for YAML and XLANG types. Must be in the path of the build files, i.e. copied to target + * folder. + * + *

Will be copied as such, using Docker command: COPY ${otherFiles} /template/ + */ + String filesToCopy() default ""; + StreamingMode defaultStreamingMode() default StreamingMode.UNSPECIFIED; enum StreamingMode { diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java index 2c78cfd121..2487650331 100644 --- a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java +++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java @@ -50,7 +50,7 @@ public static void generateDockerfile( Map parameters = new HashMap<>(); parameters.put("baseContainerImage", basePythonContainerImage); - Template template = freemarkerConfig.getTemplate("Dockerfile-template"); + Template template = freemarkerConfig.getTemplate("Dockerfile-template-python"); ByteArrayOutputStream baos = new ByteArrayOutputStream(); OutputStreamWriter writer = new OutputStreamWriter(baos); diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/YamlDockerfileGenerator.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/YamlDockerfileGenerator.java index 74c56158f5..55807df4f3 100644 --- a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/YamlDockerfileGenerator.java +++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/YamlDockerfileGenerator.java @@ -27,6 +27,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.logging.Logger; @@ -37,9 +38,13 @@ public class YamlDockerfileGenerator { private YamlDockerfileGenerator() {} public static void generateDockerfile( - String basePythonContainerImage, String yamlTemplateName, File targetDirectory) + String baseJavaContainerImage, + String beamVersion, + String pythonVersion, + String yamlTemplateName, + List otherFiles, + File targetDirectory) throws IOException, TemplateException { - Configuration freemarkerConfig = new Configuration(Configuration.VERSION_2_3_32); freemarkerConfig.setDefaultEncoding("UTF-8"); freemarkerConfig.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER); @@ -47,8 +52,12 @@ public static void generateDockerfile( freemarkerConfig.setClassForTemplateLoading(PythonDockerfileGenerator.class, "/"); Map parameters = new HashMap<>(); - parameters.put("baseContainerImage", basePythonContainerImage); - parameters.put("yamlTemplateName", yamlTemplateName + ".yaml"); + parameters.put("baseJavaContainerImage", baseJavaContainerImage); + parameters.put("beamVersion", beamVersion); + parameters.put("pythonVersion", pythonVersion); + if (!otherFiles.isEmpty()) { + parameters.put("copyOtherFiles", String.join(" ", otherFiles)); + } Template template = freemarkerConfig.getTemplate("Dockerfile-template-yaml"); diff --git a/plugins/core-plugin/src/main/resources/Dockerfile-template b/plugins/core-plugin/src/main/resources/Dockerfile-template-python similarity index 100% rename from plugins/core-plugin/src/main/resources/Dockerfile-template rename to plugins/core-plugin/src/main/resources/Dockerfile-template-python diff --git a/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml b/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml index 0478254e2c..9af50e750b 100644 --- a/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml +++ b/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml @@ -1,16 +1,68 @@ -FROM ${baseContainerImage} +#===================================================================# +# Create build environment from base Python template launcher image # +#===================================================================# +FROM gcr.io/dataflow-templates-base/python311-template-launcher-base:latest as python-base -# Copy template files to /template +# Build args ARG WORKDIR=/template -COPY main.py /template -COPY requirements.txt /template -COPY ${yamlTemplateName} /template/template.yaml +ARG REQUIREMENTS_FILE=requirements.txt +ARG BEAM_VERSION=${beamVersion} +ARG BEAM_PACKAGE=apache-beam[dataframe,gcp,test,yaml]==$BEAM_VERSION +ARG PY_VERSION=${pythonVersion} + +# Copy template files to /template +RUN mkdir -p $WORKDIR +COPY main.py requirements.txt* /template/ +<#if copyOtherFiles??>COPY ${copyOtherFiles} /template/ WORKDIR $WORKDIR -ENV FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE=requirements.txt +# Create requirements.txt file if not provided +RUN if ! [ -f requirements.txt ] ; then echo "$BEAM_PACKAGE" > requirements.txt ; fi + +# Install dependencies to launch the pipeline and download to reduce startup time +RUN python -m venv /venv \ + && /venv/bin/pip install --no-cache-dir --upgrade pip setuptools \ + && /venv/bin/pip install --no-cache-dir -U -r $REQUIREMENTS_FILE \ + && /venv/bin/pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \ + && rm -rf /usr/local/lib/python$PY_VERSION/site-packages \ + && mv /venv/lib/python$PY_VERSION/site-packages /usr/local/lib/python$PY_VERSION/ + + +#============================================================# +# Create Distroless xlang image compatible with YamlTemplate # +#============================================================# +FROM ${baseJavaContainerImage} + +# Build args +ARG CHIPSET_ARCH=x86_64-linux-gnu +ARG PY_VERSION=${pythonVersion} + +# Set python environment variables ENV FLEX_TEMPLATE_PYTHON_PY_FILE=main.py ENV PIP_NO_DEPS=True +# Copy template, python wheels and python launcher script from python-base +COPY --from=python-base /template /template +COPY --from=python-base /tmp/dataflow-requirements-cache /tmp/dataflow-requirements-cache +COPY --from=python-base /opt/google/dataflow/python_template_launcher /opt/google/dataflow/python_template_launcher + +# Copy python and installed packages from python-base +COPY --from=python-base /usr/local/bin/python$PY_VERSION /usr/local/bin/python +COPY --from=python-base /usr/local/lib/python$PY_VERSION /usr/local/lib/python$PY_VERSION + +# Copy required shared libraries from python-base +COPY --from=python-base /lib/$CHIPSET_ARCH/ld-*so* /lib64/ +COPY --from=python-base /lib/$CHIPSET_ARCH/lib*so* /lib/$CHIPSET_ARCH/ +COPY --from=python-base /usr/lib/$CHIPSET_ARCH/libffi* /usr/lib/$CHIPSET_ARCH/ +COPY --from=python-base /usr/local/lib/libpython$PY_VERSION* /usr/local/lib/ + +# Copy minimal commands from python-base needed to execute template +COPY --from=python-base /bin/dash /bin/sh +COPY --from=python-base /usr/bin/which.debianutils /usr/bin/which + +# Copy licenses +COPY --from=python-base /usr/licenses/ /usr/licenses/ + WORKDIR /template -ENTRYPOINT ["/opt/google/dataflow/python_template_launcher"] \ No newline at end of file +ENTRYPOINT ["/opt/google/dataflow/python_template_launcher"] diff --git a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/YamlDockerfileGeneratorTest.java b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/YamlDockerfileGeneratorTest.java new file mode 100644 index 0000000000..0ea2f21612 --- /dev/null +++ b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/YamlDockerfileGeneratorTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.plugin; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertTrue; + +import com.google.common.base.Charsets; +import com.google.common.io.Files; +import freemarker.template.TemplateException; +import java.io.File; +import java.io.IOException; +import java.util.List; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for class {@link PythonDockerfileGenerator}. */ +@RunWith(JUnit4.class) +public class YamlDockerfileGeneratorTest { + private final File outputFolder = Files.createTempDir().getAbsoluteFile(); + + @Test + public void testGenerateDockerfile() throws IOException, TemplateException { + new File(outputFolder.getAbsolutePath() + "/word-count").mkdirs(); + YamlDockerfileGenerator.generateDockerfile( + "a java container image", + "beam_version", + "py_version", + "word-count", + List.of(), + outputFolder); + File outputFile = new File(outputFolder.getAbsolutePath() + "/word-count/Dockerfile"); + + assertTrue(outputFile.exists()); + String fileContents = Files.toString(outputFile, Charsets.UTF_8); + assertThat(fileContents).contains("FROM a java container image"); + assertThat(fileContents).contains("=beam_version"); + assertThat(fileContents).contains("=py_version"); + assertThat(fileContents).contains("COPY main.py requirements.txt* /template/"); + assertThat(fileContents) + .doesNotContainMatch( + "(?m)^(?!COPY main\\.py.*)(COPY(?!.*--from=).*/template.*$|COPY main\\.py.*)$"); + } + + @Test + public void testGenerateDockerfileWithOtherFiles() throws IOException, TemplateException { + new File(outputFolder.getAbsolutePath() + "/word-count").mkdirs(); + YamlDockerfileGenerator.generateDockerfile( + "a java container image", + "beam_version", + "py_version", + "word-count", + List.of("other_file"), + outputFolder); + File outputFile = new File(outputFolder.getAbsolutePath() + "/word-count/Dockerfile"); + + assertTrue(outputFile.exists()); + String fileContents = Files.toString(outputFile, Charsets.UTF_8); + assertThat(fileContents).contains("FROM a java container image"); + assertThat(fileContents).contains("=beam_version"); + assertThat(fileContents).contains("=py_version"); + assertThat(fileContents).contains("COPY main.py requirements.txt* /template/"); + assertThat(fileContents).contains("COPY other_file /template/"); + } +} diff --git a/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java b/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java index b325dc1dda..2fc1244ead 100644 --- a/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java +++ b/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java @@ -33,6 +33,7 @@ import com.google.cloud.teleport.plugin.YamlDockerfileGenerator; import com.google.cloud.teleport.plugin.model.ImageSpec; import com.google.cloud.teleport.plugin.model.TemplateDefinitions; +import com.google.common.base.Strings; import freemarker.template.TemplateException; import java.io.File; import java.io.FileWriter; @@ -108,12 +109,15 @@ public class TemplatesStageMojo extends TemplatesBaseMojo { required = false) protected String baseContainerImage; + // Keep pythonVersion below in sync with version in image @Parameter( name = "basePythonContainerImage", defaultValue = "gcr.io/dataflow-templates-base/python311-template-launcher-base:latest", required = false) protected String basePythonContainerImage; + protected String pythonVersion = "3.11"; + @Parameter(defaultValue = "${unifiedWorker}", readonly = true, required = false) protected boolean unifiedWorker; @@ -338,8 +342,7 @@ protected String stageFlexTemplate( TemplateSpecsGenerator generator = new TemplateSpecsGenerator(); String containerName = definition.getTemplateAnnotation().flexContainerName(); - String yamlTemplateName = - definition.getTemplateAnnotation().yamlTemplateName().replace(".yaml", ""); + String yamlTemplateFile = definition.getTemplateAnnotation().yamlTemplateFile(); String imagePath = imageSpec.getImage(); LOG.info("Stage image to GCR: {}", imagePath); @@ -393,7 +396,7 @@ protected String stageFlexTemplate( definition, currentTemplateName, imagePath, metadataFile, containerName, templatePath); } else if (definition.getTemplateAnnotation().type() == TemplateType.YAML) { stageFlexYamlTemplate( - definition, currentTemplateName, imagePath, metadataFile, yamlTemplateName, templatePath); + definition, currentTemplateName, imagePath, metadataFile, yamlTemplateFile, templatePath); } else { throw new IllegalArgumentException( "Type not known: " + definition.getTemplateAnnotation().type()); @@ -567,15 +570,43 @@ private void stageFlexYamlTemplate( String currentTemplateName, String imagePath, File metadataFile, - String yamlTemplateName, + String yamlTemplateFile, String templatePath) throws IOException, InterruptedException, TemplateException { - // TODO(polber) Use basePythonContainerImage once plugin can parse metadata from YAML Templates - String containerImage = "gcr.io/" + projectId + "/beam-yaml/yaml-template-base:latest"; + // extract image properties for Dockerfile + String yamlTemplateName = yamlTemplateFile.replace(".yaml", ""); + String beamVersion = project.getProperties().getProperty("beam.version"); + List otherFiles = new ArrayList<>(); + String filesToCopy = definition.getTemplateAnnotation().filesToCopy(); + if (!Strings.isNullOrEmpty(filesToCopy)) { + otherFiles.addAll(List.of(filesToCopy.split(","))); + } + if (!Strings.isNullOrEmpty(yamlTemplateFile)) { + otherFiles.add(yamlTemplateFile); + } else { + yamlTemplateName = definition.getTemplateAnnotation().flexContainerName(); + } YamlDockerfileGenerator.generateDockerfile( - containerImage, yamlTemplateName, outputClassesDirectory); - stageYamlUsingDockerfile(imagePath, yamlTemplateName + "/Dockerfile"); + baseContainerImage, + beamVersion, + pythonVersion, + yamlTemplateName, + otherFiles, + outputClassesDirectory); + + boolean useRootDirectory = true; + if (new File(outputClassesDirectory.getPath() + "/" + yamlTemplateName + "/main.py").exists()) { + useRootDirectory = false; + } else if (!new File(outputClassesDirectory.getPath() + "/main.py").exists()) { + throw new IllegalStateException( + String.format( + "main.py not found in %s or %s.", + outputClassesDirectory.getPath(), + outputClassesDirectory.getPath() + "/" + yamlTemplateName + "/main.py")); + } + + stageYamlUsingDockerfile(imagePath, yamlTemplateName, useRootDirectory); String[] flexTemplateBuildCmd = new String[] { @@ -664,9 +695,13 @@ private void stageFlexPythonTemplate( } } - private void stageYamlUsingDockerfile(String imagePath, String dockerfile) + private void stageYamlUsingDockerfile( + String imagePath, String yamlTemplateName, boolean useRootDirectory) throws IOException, InterruptedException { - File directory = new File(outputClassesDirectory.getAbsolutePath()); + File directory = + new File( + outputClassesDirectory.getAbsolutePath() + + (useRootDirectory ? "" : "/" + yamlTemplateName)); File cloudbuildFile = File.createTempFile("cloudbuild", ".yaml"); try (FileWriter writer = new FileWriter(cloudbuildFile)) { @@ -679,8 +714,8 @@ private void stageYamlUsingDockerfile(String imagePath, String dockerfile) + imagePath + "\n" + " - --dockerfile=" - + dockerfile - + "\n" + + (useRootDirectory ? yamlTemplateName + "/" : "") + + "Dockerfile\n" + " - --cache=true\n" + " - --cache-ttl=6h\n" + " - --compressed-caching=false\n" diff --git a/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java b/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java index 325a9e45b7..739c866835 100644 --- a/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java +++ b/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java @@ -23,7 +23,7 @@ @Template( name = "Yaml_Template", category = TemplateCategory.GET_STARTED, - type = Template.TemplateType.PYTHON, + type = Template.TemplateType.YAML, displayName = "YAML Template (Experimental)", description = "YAML pipeline. Reads YAML from Cloud Storage and dynamically expands YAML into " diff --git a/python/src/main/python/yaml-template/Dockerfile b/python/src/main/python/yaml-template/Dockerfile deleted file mode 100644 index 02651bb02b..0000000000 --- a/python/src/main/python/yaml-template/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -FROM gcr.io/dataflow-templates-base/python311-template-launcher-base - -ARG WORKDIR=/template -RUN mkdir -p ${WORKDIR} -COPY main.py /template -COPY requirements.txt /template -WORKDIR ${WORKDIR} - -ENV FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE=requirements.txt -ENV FLEX_TEMPLATE_PYTHON_PY_FILE=main.py - -# Install dependencies to launch the pipeline and download to reduce startup time -RUN pip install --no-cache-dir --upgrade pip \ - && pip install --no-cache-dir -U -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE \ - && pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE - -# Install OpenJDK-11 -RUN apt-get update && \ - apt-get install -y openjdk-11-jdk ca-certificates-java && \ - apt-get clean && \ - update-ca-certificates -f - -# Setup JAVA_HOME -- useful for docker commandline -ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64/ -RUN export JAVA_HOME - -# Avoid downloading dependencies with pip to reduce startup time -ENV PIP_NO_DEPS=True - -ENTRYPOINT ["/opt/google/dataflow/python_template_launcher"] \ No newline at end of file diff --git a/python/src/main/python/yaml-template/main.py b/python/src/main/python/yaml-template/main.py index 3c798ff56a..5fad83470c 100644 --- a/python/src/main/python/yaml-template/main.py +++ b/python/src/main/python/yaml-template/main.py @@ -21,28 +21,12 @@ from apache_beam.yaml import main -# TODO(https://github.com/apache/beam/issues/29916): Remove once alias args -# are added to main.py -def _get_alias_args(argv): - parser = argparse.ArgumentParser() - parser.add_argument( - '--yaml_pipeline', help='A yaml description of the pipeline to run.') - parser.add_argument( - '--yaml_pipeline_file', - help='A file containing a yaml description of the pipeline to run.') - known_args, pipeline_args = parser.parse_known_args(argv) - - if known_args.yaml_pipeline: - pipeline_args += [f'--pipeline_spec={known_args.yaml_pipeline}'] - if known_args.yaml_pipeline_file: - pipeline_args += [f'--pipeline_spec_file={known_args.yaml_pipeline_file}'] - return pipeline_args - - def run(argv=None): - args = _get_alias_args(argv) + parser = argparse.ArgumentParser() + _, pipeline_args = parser.parse_known_args(argv) + pipeline_args += ['--sdk_location=container'] cache_provider_artifacts.cache_provider_artifacts() - main.run(argv=args) + main.run(argv=pipeline_args) if __name__ == '__main__': diff --git a/python/src/main/python/yaml-template/requirements.txt b/python/src/main/python/yaml-template/requirements.txt deleted file mode 100644 index b380938284..0000000000 --- a/python/src/main/python/yaml-template/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -apache-beam[dataframe,gcp,test,yaml]==2.55.1 diff --git a/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java b/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java index d153ee9b95..40b77e0d30 100644 --- a/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java +++ b/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java @@ -34,7 +34,6 @@ import org.apache.beam.it.common.PipelineOperator; import org.apache.beam.it.gcp.TemplateTestBase; import org.apache.beam.it.gcp.artifacts.Artifact; -import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; @@ -48,7 +47,6 @@ public final class YAMLTemplateIT extends TemplateTestBase { @Test - @Ignore("Breaking gitactions. Fix in #1405") public void testSimpleCompositeSpec() throws IOException { // Arrange String yamlMessage = createSimpleYamlMessage(); @@ -58,7 +56,6 @@ public void testSimpleCompositeSpec() throws IOException { } @Test - @Ignore("Breaking gitactions. Fix in #1405") public void testSimpleCompositeSpecFile() throws IOException { // Arrange gcsClient.createArtifact("input/simple.yaml", createSimpleYamlMessage()); diff --git a/python/src/test/resources/YamlTemplateIT.yaml b/python/src/test/resources/YamlTemplateIT.yaml index cc975ef115..2646e5de75 100644 --- a/python/src/test/resources/YamlTemplateIT.yaml +++ b/python/src/test/resources/YamlTemplateIT.yaml @@ -42,13 +42,21 @@ pipeline: fields: sum: expression: num + inverse - - type: WriteToJson + - type: WriteToJsonPython name: WriteGoodFiles input: Sum config: path: "OUTPUT_PATH/good" - - type: WriteToJson + - type: WriteToJsonPython name: WriteBadFiles input: TrimErrors config: - path: "OUTPUT_PATH/bad" \ No newline at end of file + path: "OUTPUT_PATH/bad" + +# TODO(polber) - remove with https://github.com/apache/beam/pull/30777 +providers: + - type: python + config: + packages: [] + transforms: + 'WriteToJsonPython': 'apache_beam.io.WriteToJson' \ No newline at end of file diff --git a/yaml/src/main/java/com/google/cloud/teleport/templates/yaml/KafkaToBigQueryYaml.java b/yaml/src/main/java/com/google/cloud/teleport/templates/yaml/KafkaToBigQueryYaml.java index adcb29bab9..d6a7f4c1f1 100644 --- a/yaml/src/main/java/com/google/cloud/teleport/templates/yaml/KafkaToBigQueryYaml.java +++ b/yaml/src/main/java/com/google/cloud/teleport/templates/yaml/KafkaToBigQueryYaml.java @@ -31,7 +31,7 @@ + "Any errors which occur in the transformation of the data, execution of the UDF, or inserting into the output table are inserted into a separate errors table in BigQuery. " + "If the errors table does not exist prior to execution, then it is created.", flexContainerName = "kafka-to-bigquery-yaml", - yamlTemplateName = "KafkaToBigQuery.yaml", + yamlTemplateFile = "KafkaToBigQuery.yaml", documentation = "https://cloud.google.com/dataflow/docs/guides/templates/provided/kafka-to-bigquery", contactInformation = "https://cloud.google.com/support", diff --git a/yaml/src/main/python/main.py b/yaml/src/main/python/main.py index 5d30d993a2..705a9a7404 100644 --- a/yaml/src/main/python/main.py +++ b/yaml/src/main/python/main.py @@ -31,7 +31,7 @@ def _get_pipeline_yaml(): def run(argv=None): parser = argparse.ArgumentParser() _, pipeline_args = parser.parse_known_args(argv) - pipeline_args += [f'--pipeline_spec={_get_pipeline_yaml()}'] + pipeline_args += [f'--yaml_pipeline={_get_pipeline_yaml()}'] cache_provider_artifacts.cache_provider_artifacts() main.run(argv=pipeline_args) diff --git a/yaml/src/main/python/requirements.txt b/yaml/src/main/python/requirements.txt deleted file mode 100644 index ed775f16d1..0000000000 --- a/yaml/src/main/python/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -apache-beam[gcp,yaml]