From 4b315ba57b2c77fd9ddab0286b0642b399b5afbb Mon Sep 17 00:00:00 2001 From: Armando Zhu Date: Thu, 15 Feb 2024 21:00:05 -0800 Subject: [PATCH] ci: Add hdfs test case for s3 (#4184) --- .../hdfs/hdfs_default_on_minio_s3/action.yml | 68 +++++++++++++++++++ fixtures/hdfs/minio-s3-core-site.xml | 51 ++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 .github/services/hdfs/hdfs_default_on_minio_s3/action.yml create mode 100644 fixtures/hdfs/minio-s3-core-site.xml diff --git a/.github/services/hdfs/hdfs_default_on_minio_s3/action.yml b/.github/services/hdfs/hdfs_default_on_minio_s3/action.yml new file mode 100644 index 00000000000..e75b8e41562 --- /dev/null +++ b/.github/services/hdfs/hdfs_default_on_minio_s3/action.yml @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: hdfs_default_on_minio_s3 +description: 'Behavior test for hdfs default on minio s3' + +runs: + using: "composite" + steps: + - name: Setup java env + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "11" + - name: Setup MinIO Server + shell: bash + working-directory: fixtures/s3 + run: docker compose -f docker-compose-minio.yml up -d --wait + - name: Setup test bucket + shell: bash + env: + AWS_ACCESS_KEY_ID: "minioadmin" + AWS_SECRET_ACCESS_KEY: "minioadmin" + AWS_EC2_METADATA_DISABLED: "true" + run: aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://test + - name: Setup + shell: bash + run: | + curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner + + export HADOOP_HOME="/home/runner/hadoop-3.3.5" + + curl -LsSf -o ${HADOOP_HOME}/share/hadoop/common/lib/hadoop-aws-3.3.5.jar https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.5/hadoop-aws-3.3.5.jar + curl -LsSf -o ${HADOOP_HOME}/share/hadoop/common/lib/aws-java-sdk-bundle-1.12.653.jar https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.653/aws-java-sdk-bundle-1.12.653.jar + + export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) + + mkdir -p /tmp/hdfs + + pushd ${HADOOP_HOME} + ./bin/hadoop credential create fs.s3a.access.key -value minioadmin -provider localjceks://file/tmp/hdfs/s3.jceks + ./bin/hadoop credential create fs.s3a.secret.key -value minioadmin -provider localjceks://file/tmp/hdfs/s3.jceks + popd + + cp ./fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml + cp ./fixtures/hdfs/minio-s3-core-site.xml ${HADOOP_HOME}/etc/hadoop/core-site.xml + + cat << EOF >> $GITHUB_ENV + HADOOP_HOME=${HADOOP_HOME} + CLASSPATH=${CLASSPATH} + LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${HADOOP_HOME}/lib/native + OPENDAL_HDFS_NAME_NODE=s3a://test + OPENDAL_HDFS_ENABLE_APPEND=false + EOF diff --git a/fixtures/hdfs/minio-s3-core-site.xml b/fixtures/hdfs/minio-s3-core-site.xml new file mode 100644 index 00000000000..9a5b79fb3f6 --- /dev/null +++ b/fixtures/hdfs/minio-s3-core-site.xml @@ -0,0 +1,51 @@ + + + + + + + + + + hadoop.security.credential.provider.path + localjceks://file/tmp/hdfs/s3.jceks + Path to interrogate for protected aws s3 storage credentials. + + + fs.s3a.endpoint + http://127.0.0.1:9000 + AWS S3 endpoint to connect to. An up-to-date list is + provided in the AWS Documentation: regions and endpoints. Without this + property, the standard region (s3.amazonaws.com) is assumed. + + + + fs.s3a.endpoint.region + us-east-1 + AWS S3 region for a bucket, which bypasses the parsing of + fs.s3a.endpoint to know the region. Would be helpful in avoiding errors + while using privateLink URL and explicitly set the bucket region. + If set to a blank string (or 1+ space), falls back to the + (potentially brittle) SDK region resolution process. + + +