Skip to content

Commit

Permalink
Merge pull request #1405 from Polber:jkinard/yaml-speed
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 626020498
  • Loading branch information
cloud-teleport committed Apr 18, 2024
2 parents 359329d + dd5a13f commit 2b77b2f
Show file tree
Hide file tree
Showing 17 changed files with 237 additions and 96 deletions.
20 changes: 10 additions & 10 deletions .github/scripts/startup-script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ echo \
sudo apt-get update
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin -y

# add runner to docker group
# add user to docker group
sudo groupadd docker
sudo gpasswd -a runner docker
sudo gpasswd -a $user docker

# create runner HOME
sudo mkdir /home/runner
sudo chown runner /home/runner
# create user HOME
sudo mkdir /home/$user
sudo chown $user /home/$user

# access secrets from secretsmanager
secrets=$(gcloud secrets versions access latest --secret="GITACTION_SECRET_NAME")
Expand All @@ -79,12 +79,12 @@ done
ACTIONS_RUNNER_INPUT_TOKEN="$(curl -sS --request POST --url "https://api.github.com/repos/${REPO_OWNER}/${REPO_NAME}/actions/runners/registration-token" --header "authorization: Bearer ${GITHUB_TOKEN}" --header 'content-type: application/json' | jq -r .token)"

# create actions-runner directory
sudo -u runner bash -c "mkdir /home/runner/actions-runner"
sudo -u $user bash -c "mkdir /home/$user/actions-runner"

# download and extract gitactions binary
sudo -u runner bash -c "cd /home/runner/actions-runner && curl -o actions-runner-linux-x64.tar.gz --location https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-x64-${GH_RUNNER_VERSION}.tar.gz"
sudo -u runner bash -c "cd /home/runner/actions-runner && tar -zxf ./actions-runner-linux-x64.tar.gz"
sudo -u $user bash -c "cd /home/$user/actions-runner && curl -o actions-runner-linux-x64.tar.gz --location https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-x64-${GH_RUNNER_VERSION}.tar.gz"
sudo -u $user bash -c "cd /home/$user/actions-runner && tar -zxf ./actions-runner-linux-x64.tar.gz"

# configure and run gitactions runner
sudo -u runner bash -c "cd /home/runner/actions-runner && ./config.sh --url ${REPO_URL} --token ${ACTIONS_RUNNER_INPUT_TOKEN} --labels ${GITACTIONS_LABELS} --unattended"
sudo -u runner bash -c "cd /home/runner/actions-runner && ./run.sh &"
sudo -u $user bash -c "cd /home/$user/actions-runner && ./config.sh --url ${REPO_URL} --token ${ACTIONS_RUNNER_INPUT_TOKEN} --labels ${GITACTIONS_LABELS} --unattended"
sudo -u $user bash -c "cd /home/$user/actions-runner && ./run.sh &"
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
/** Container name to stage (required for Flex templates). */
String flexContainerName() default "";

String yamlTemplateName() default "";
String yamlTemplateFile() default "";

String xlangContainerName() default "";

Expand Down Expand Up @@ -113,6 +113,15 @@ enum TemplateType {
/** Marker if the template is still in preview / pre-GA. */
boolean preview() default false;

/**
* Comma-separated list of files to include in Template image when building with Dockerfile. Only
* works for YAML and XLANG types. Must be in the path of the build files, i.e. copied to target
* folder.
*
* <p>Will be copied as such, using Docker command: COPY ${otherFiles} /template/
*/
String filesToCopy() default "";

StreamingMode defaultStreamingMode() default StreamingMode.UNSPECIFIED;

enum StreamingMode {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public static void generateDockerfile(
Map<String, Object> parameters = new HashMap<>();
parameters.put("baseContainerImage", basePythonContainerImage);

Template template = freemarkerConfig.getTemplate("Dockerfile-template");
Template template = freemarkerConfig.getTemplate("Dockerfile-template-python");

ByteArrayOutputStream baos = new ByteArrayOutputStream();
OutputStreamWriter writer = new OutputStreamWriter(baos);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

Expand All @@ -37,18 +38,26 @@ public class YamlDockerfileGenerator {
private YamlDockerfileGenerator() {}

public static void generateDockerfile(
String basePythonContainerImage, String yamlTemplateName, File targetDirectory)
String baseJavaContainerImage,
String beamVersion,
String pythonVersion,
String yamlTemplateName,
List<String> otherFiles,
File targetDirectory)
throws IOException, TemplateException {

Configuration freemarkerConfig = new Configuration(Configuration.VERSION_2_3_32);
freemarkerConfig.setDefaultEncoding("UTF-8");
freemarkerConfig.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER);
freemarkerConfig.setLogTemplateExceptions(true);
freemarkerConfig.setClassForTemplateLoading(PythonDockerfileGenerator.class, "/");

Map<String, Object> parameters = new HashMap<>();
parameters.put("baseContainerImage", basePythonContainerImage);
parameters.put("yamlTemplateName", yamlTemplateName + ".yaml");
parameters.put("baseJavaContainerImage", baseJavaContainerImage);
parameters.put("beamVersion", beamVersion);
parameters.put("pythonVersion", pythonVersion);
if (!otherFiles.isEmpty()) {
parameters.put("copyOtherFiles", String.join(" ", otherFiles));
}

Template template = freemarkerConfig.getTemplate("Dockerfile-template-yaml");

Expand Down
66 changes: 59 additions & 7 deletions plugins/core-plugin/src/main/resources/Dockerfile-template-yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,68 @@
FROM ${baseContainerImage}
#===================================================================#
# Create build environment from base Python template launcher image #
#===================================================================#
FROM gcr.io/dataflow-templates-base/python311-template-launcher-base:latest as python-base

# Copy template files to /template
# Build args
ARG WORKDIR=/template
COPY main.py /template
COPY requirements.txt /template
COPY ${yamlTemplateName} /template/template.yaml
ARG REQUIREMENTS_FILE=requirements.txt
ARG BEAM_VERSION=${beamVersion}
ARG BEAM_PACKAGE=apache-beam[dataframe,gcp,test,yaml]==$BEAM_VERSION
ARG PY_VERSION=${pythonVersion}

# Copy template files to /template
RUN mkdir -p $WORKDIR
COPY main.py requirements.txt* /template/
<#if copyOtherFiles??>COPY ${copyOtherFiles} /template/</#if>
WORKDIR $WORKDIR

ENV FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE=requirements.txt
# Create requirements.txt file if not provided
RUN if ! [ -f requirements.txt ] ; then echo "$BEAM_PACKAGE" > requirements.txt ; fi

# Install dependencies to launch the pipeline and download to reduce startup time
RUN python -m venv /venv \
&& /venv/bin/pip install --no-cache-dir --upgrade pip setuptools \
&& /venv/bin/pip install --no-cache-dir -U -r $REQUIREMENTS_FILE \
&& /venv/bin/pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \
&& rm -rf /usr/local/lib/python$PY_VERSION/site-packages \
&& mv /venv/lib/python$PY_VERSION/site-packages /usr/local/lib/python$PY_VERSION/


#============================================================#
# Create Distroless xlang image compatible with YamlTemplate #
#============================================================#
FROM ${baseJavaContainerImage}

# Build args
ARG CHIPSET_ARCH=x86_64-linux-gnu
ARG PY_VERSION=${pythonVersion}

# Set python environment variables
ENV FLEX_TEMPLATE_PYTHON_PY_FILE=main.py
ENV PIP_NO_DEPS=True

# Copy template, python wheels and python launcher script from python-base
COPY --from=python-base /template /template
COPY --from=python-base /tmp/dataflow-requirements-cache /tmp/dataflow-requirements-cache
COPY --from=python-base /opt/google/dataflow/python_template_launcher /opt/google/dataflow/python_template_launcher

# Copy python and installed packages from python-base
COPY --from=python-base /usr/local/bin/python$PY_VERSION /usr/local/bin/python
COPY --from=python-base /usr/local/lib/python$PY_VERSION /usr/local/lib/python$PY_VERSION

# Copy required shared libraries from python-base
COPY --from=python-base /lib/$CHIPSET_ARCH/ld-*so* /lib64/
COPY --from=python-base /lib/$CHIPSET_ARCH/lib*so* /lib/$CHIPSET_ARCH/
COPY --from=python-base /usr/lib/$CHIPSET_ARCH/libffi* /usr/lib/$CHIPSET_ARCH/
COPY --from=python-base /usr/local/lib/libpython$PY_VERSION* /usr/local/lib/

# Copy minimal commands from python-base needed to execute template
COPY --from=python-base /bin/dash /bin/sh
COPY --from=python-base /usr/bin/which.debianutils /usr/bin/which

# Copy licenses
COPY --from=python-base /usr/licenses/ /usr/licenses/

WORKDIR /template

ENTRYPOINT ["/opt/google/dataflow/python_template_launcher"]
ENTRYPOINT ["/opt/google/dataflow/python_template_launcher"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright (C) 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.teleport.plugin;

import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertTrue;

import com.google.common.base.Charsets;
import com.google.common.io.Files;
import freemarker.template.TemplateException;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

/** Tests for class {@link PythonDockerfileGenerator}. */
@RunWith(JUnit4.class)
public class YamlDockerfileGeneratorTest {
private final File outputFolder = Files.createTempDir().getAbsoluteFile();

@Test
public void testGenerateDockerfile() throws IOException, TemplateException {
new File(outputFolder.getAbsolutePath() + "/word-count").mkdirs();
YamlDockerfileGenerator.generateDockerfile(
"a java container image",
"beam_version",
"py_version",
"word-count",
List.of(),
outputFolder);
File outputFile = new File(outputFolder.getAbsolutePath() + "/word-count/Dockerfile");

assertTrue(outputFile.exists());
String fileContents = Files.toString(outputFile, Charsets.UTF_8);
assertThat(fileContents).contains("FROM a java container image");
assertThat(fileContents).contains("=beam_version");
assertThat(fileContents).contains("=py_version");
assertThat(fileContents).contains("COPY main.py requirements.txt* /template/");
assertThat(fileContents)
.doesNotContainMatch(
"(?m)^(?!COPY main\\.py.*)(COPY(?!.*--from=).*/template.*$|COPY main\\.py.*)$");
}

@Test
public void testGenerateDockerfileWithOtherFiles() throws IOException, TemplateException {
new File(outputFolder.getAbsolutePath() + "/word-count").mkdirs();
YamlDockerfileGenerator.generateDockerfile(
"a java container image",
"beam_version",
"py_version",
"word-count",
List.of("other_file"),
outputFolder);
File outputFile = new File(outputFolder.getAbsolutePath() + "/word-count/Dockerfile");

assertTrue(outputFile.exists());
String fileContents = Files.toString(outputFile, Charsets.UTF_8);
assertThat(fileContents).contains("FROM a java container image");
assertThat(fileContents).contains("=beam_version");
assertThat(fileContents).contains("=py_version");
assertThat(fileContents).contains("COPY main.py requirements.txt* /template/");
assertThat(fileContents).contains("COPY other_file /template/");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import com.google.cloud.teleport.plugin.YamlDockerfileGenerator;
import com.google.cloud.teleport.plugin.model.ImageSpec;
import com.google.cloud.teleport.plugin.model.TemplateDefinitions;
import com.google.common.base.Strings;
import freemarker.template.TemplateException;
import java.io.File;
import java.io.FileWriter;
Expand Down Expand Up @@ -108,12 +109,15 @@ public class TemplatesStageMojo extends TemplatesBaseMojo {
required = false)
protected String baseContainerImage;

// Keep pythonVersion below in sync with version in image
@Parameter(
name = "basePythonContainerImage",
defaultValue = "gcr.io/dataflow-templates-base/python311-template-launcher-base:latest",
required = false)
protected String basePythonContainerImage;

protected String pythonVersion = "3.11";

@Parameter(defaultValue = "${unifiedWorker}", readonly = true, required = false)
protected boolean unifiedWorker;

Expand Down Expand Up @@ -338,8 +342,7 @@ protected String stageFlexTemplate(
TemplateSpecsGenerator generator = new TemplateSpecsGenerator();

String containerName = definition.getTemplateAnnotation().flexContainerName();
String yamlTemplateName =
definition.getTemplateAnnotation().yamlTemplateName().replace(".yaml", "");
String yamlTemplateFile = definition.getTemplateAnnotation().yamlTemplateFile();
String imagePath = imageSpec.getImage();
LOG.info("Stage image to GCR: {}", imagePath);

Expand Down Expand Up @@ -393,7 +396,7 @@ protected String stageFlexTemplate(
definition, currentTemplateName, imagePath, metadataFile, containerName, templatePath);
} else if (definition.getTemplateAnnotation().type() == TemplateType.YAML) {
stageFlexYamlTemplate(
definition, currentTemplateName, imagePath, metadataFile, yamlTemplateName, templatePath);
definition, currentTemplateName, imagePath, metadataFile, yamlTemplateFile, templatePath);
} else {
throw new IllegalArgumentException(
"Type not known: " + definition.getTemplateAnnotation().type());
Expand Down Expand Up @@ -567,15 +570,43 @@ private void stageFlexYamlTemplate(
String currentTemplateName,
String imagePath,
File metadataFile,
String yamlTemplateName,
String yamlTemplateFile,
String templatePath)
throws IOException, InterruptedException, TemplateException {

// TODO(polber) Use basePythonContainerImage once plugin can parse metadata from YAML Templates
String containerImage = "gcr.io/" + projectId + "/beam-yaml/yaml-template-base:latest";
// extract image properties for Dockerfile
String yamlTemplateName = yamlTemplateFile.replace(".yaml", "");
String beamVersion = project.getProperties().getProperty("beam.version");
List<String> otherFiles = new ArrayList<>();
String filesToCopy = definition.getTemplateAnnotation().filesToCopy();
if (!Strings.isNullOrEmpty(filesToCopy)) {
otherFiles.addAll(List.of(filesToCopy.split(",")));
}
if (!Strings.isNullOrEmpty(yamlTemplateFile)) {
otherFiles.add(yamlTemplateFile);
} else {
yamlTemplateName = definition.getTemplateAnnotation().flexContainerName();
}
YamlDockerfileGenerator.generateDockerfile(
containerImage, yamlTemplateName, outputClassesDirectory);
stageYamlUsingDockerfile(imagePath, yamlTemplateName + "/Dockerfile");
baseContainerImage,
beamVersion,
pythonVersion,
yamlTemplateName,
otherFiles,
outputClassesDirectory);

boolean useRootDirectory = true;
if (new File(outputClassesDirectory.getPath() + "/" + yamlTemplateName + "/main.py").exists()) {
useRootDirectory = false;
} else if (!new File(outputClassesDirectory.getPath() + "/main.py").exists()) {
throw new IllegalStateException(
String.format(
"main.py not found in %s or %s.",
outputClassesDirectory.getPath(),
outputClassesDirectory.getPath() + "/" + yamlTemplateName + "/main.py"));
}

stageYamlUsingDockerfile(imagePath, yamlTemplateName, useRootDirectory);

String[] flexTemplateBuildCmd =
new String[] {
Expand Down Expand Up @@ -664,9 +695,13 @@ private void stageFlexPythonTemplate(
}
}

private void stageYamlUsingDockerfile(String imagePath, String dockerfile)
private void stageYamlUsingDockerfile(
String imagePath, String yamlTemplateName, boolean useRootDirectory)
throws IOException, InterruptedException {
File directory = new File(outputClassesDirectory.getAbsolutePath());
File directory =
new File(
outputClassesDirectory.getAbsolutePath()
+ (useRootDirectory ? "" : "/" + yamlTemplateName));

File cloudbuildFile = File.createTempFile("cloudbuild", ".yaml");
try (FileWriter writer = new FileWriter(cloudbuildFile)) {
Expand All @@ -679,8 +714,8 @@ private void stageYamlUsingDockerfile(String imagePath, String dockerfile)
+ imagePath
+ "\n"
+ " - --dockerfile="
+ dockerfile
+ "\n"
+ (useRootDirectory ? yamlTemplateName + "/" : "")
+ "Dockerfile\n"
+ " - --cache=true\n"
+ " - --cache-ttl=6h\n"
+ " - --compressed-caching=false\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
@Template(
name = "Yaml_Template",
category = TemplateCategory.GET_STARTED,
type = Template.TemplateType.PYTHON,
type = Template.TemplateType.YAML,
displayName = "YAML Template (Experimental)",
description =
"YAML pipeline. Reads YAML from Cloud Storage and dynamically expands YAML into "
Expand Down
Loading

0 comments on commit 2b77b2f

Please sign in to comment.