From 8dcb19d1f8cb2e60256c15e4d9e70e2c461ccc63 Mon Sep 17 00:00:00 2001 From: Armando Zhu Date: Mon, 5 Feb 2024 03:27:39 +0000 Subject: [PATCH 1/3] ci: Add test case for hdfs over gcs bucket (#3504) --- .../services/hdfs/hdfs_default_gcs/action.yml | 64 +++++++++++++++++++ core/src/services/hdfs/docs.md | 1 + fixtures/hdfs/gcs-core-site.xml | 55 ++++++++++++++++ 3 files changed, 120 insertions(+) create mode 100644 .github/services/hdfs/hdfs_default_gcs/action.yml create mode 100644 fixtures/hdfs/gcs-core-site.xml diff --git a/.github/services/hdfs/hdfs_default_gcs/action.yml b/.github/services/hdfs/hdfs_default_gcs/action.yml new file mode 100644 index 00000000000..e492e25bc03 --- /dev/null +++ b/.github/services/hdfs/hdfs_default_gcs/action.yml @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: hdfs_default_gcs +description: 'Behavior test for hdfs default over gcs' + +runs: + using: "composite" + steps: + - name: Setup java env + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "11" + - name: Load secrets + uses: 1password/load-secrets-action@v1 + with: + export-env: true + env: + OPENDAL_GCS_ROOT: op://services/gcs/root + OPENDAL_GCS_BUCKET: op://services/gcs/bucket + OPENDAL_GCS_CREDENTIAL: op://services/gcs/credential + - name: Setup + shell: bash + run: | + curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner + + export HADOOP_HOME="/home/runner/hadoop-3.3.5" + export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) + + curl -LsSf -o ${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-hadoop3-2.2.19-shaded.jar https://github.com/GoogleCloudDataproc/hadoop-connectors/releases/download/v2.2.19/gcs-connector-hadoop3-2.2.19-shaded.jar + + cp ./fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml + cp ./fixtures/hdfs/gcs-core-site.xml ${HADOOP_HOME}/etc/hadoop/core-site.xml + + cat << EOF >> $GITHUB_ENV + HADOOP_HOME=${HADOOP_HOME} + CLASSPATH=${CLASSPATH} + LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${HADOOP_HOME}/lib/native + OPENDAL_HDFS_ROOT=${OPENDAL_GCS_ROOT} + OPENDAL_HDFS_NAME_NODE=gs://${OPENDAL_GCS_BUCKET} + OPENDAL_HDFS_ENABLE_APPEND=false + EOF + + mkdir -p /tmp/hdfs + + cat << EOF > /tmp/hdfs/gcs-credentials.json + `echo ${OPENDAL_GCS_CREDENTIAL} | base64 -d` + EOF + diff --git a/core/src/services/hdfs/docs.md b/core/src/services/hdfs/docs.md index c9289159f5b..7e4d8821128 100644 --- a/core/src/services/hdfs/docs.md +++ b/core/src/services/hdfs/docs.md @@ -121,6 +121,7 @@ async fn main() -> Result<()> { // Create fs backend builder. let mut builder = Hdfs::default(); // Set the name node for hdfs. + // If the string starts with a protocol type such as file://, hdfs://, or gs://, this protocol type will be used. builder.name_node("hdfs://127.0.0.1:9000"); // Set the root for hdfs, all operations will happen under this root. // diff --git a/fixtures/hdfs/gcs-core-site.xml b/fixtures/hdfs/gcs-core-site.xml new file mode 100644 index 00000000000..cd304c7f7ad --- /dev/null +++ b/fixtures/hdfs/gcs-core-site.xml @@ -0,0 +1,55 @@ + + + + + + + + + fs.AbstractFileSystem.gs.impl + com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS + The AbstractFileSystem for 'gs:' URIs. + + + fs.gs.project.id + + + Optional. Google Cloud Project ID with access to GCS buckets. + Required only for list buckets and create bucket operations. + + + + google.cloud.auth.type + SERVICE_ACCOUNT_JSON_KEYFILE + + Authentication type to use for GCS access. + + + + google.cloud.auth.service.account.json.keyfile + /tmp/hdfs/gcs-credentials.json + + The JSON keyfile of the service account used for GCS + access when google.cloud.auth.type is SERVICE_ACCOUNT_JSON_KEYFILE. + + + From b54da6ca462ff1c26f547d19f734731b7eb4eda3 Mon Sep 17 00:00:00 2001 From: Armando Zhu Date: Wed, 7 Feb 2024 07:23:02 +0000 Subject: [PATCH 2/3] Update xml headers --- .github/services/hdfs/hdfs_default_gcs/action.yml | 4 +--- fixtures/hdfs/gcs-core-site.xml | 5 +++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/services/hdfs/hdfs_default_gcs/action.yml b/.github/services/hdfs/hdfs_default_gcs/action.yml index e492e25bc03..c4501716290 100644 --- a/.github/services/hdfs/hdfs_default_gcs/action.yml +++ b/.github/services/hdfs/hdfs_default_gcs/action.yml @@ -58,7 +58,5 @@ runs: mkdir -p /tmp/hdfs - cat << EOF > /tmp/hdfs/gcs-credentials.json - `echo ${OPENDAL_GCS_CREDENTIAL} | base64 -d` - EOF + echo ${OPENDAL_GCS_CREDENTIAL} | base64 -d > /tmp/hdfs/gcs-credentials.json diff --git a/fixtures/hdfs/gcs-core-site.xml b/fixtures/hdfs/gcs-core-site.xml index cd304c7f7ad..e8d7418a29e 100644 --- a/fixtures/hdfs/gcs-core-site.xml +++ b/fixtures/hdfs/gcs-core-site.xml @@ -1,3 +1,5 @@ + + - - + From 305638632df8a8a6f8051824c88707d6b968a5f1 Mon Sep 17 00:00:00 2001 From: Armando Zhu Date: Wed, 7 Feb 2024 07:46:52 +0000 Subject: [PATCH 3/3] Modify CLASSPATH --- .github/services/hdfs/hdfs_default_gcs/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/services/hdfs/hdfs_default_gcs/action.yml b/.github/services/hdfs/hdfs_default_gcs/action.yml index c4501716290..69aabeba75d 100644 --- a/.github/services/hdfs/hdfs_default_gcs/action.yml +++ b/.github/services/hdfs/hdfs_default_gcs/action.yml @@ -40,10 +40,11 @@ runs: curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner export HADOOP_HOME="/home/runner/hadoop-3.3.5" - export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) curl -LsSf -o ${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-hadoop3-2.2.19-shaded.jar https://github.com/GoogleCloudDataproc/hadoop-connectors/releases/download/v2.2.19/gcs-connector-hadoop3-2.2.19-shaded.jar + export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) + cp ./fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml cp ./fixtures/hdfs/gcs-core-site.xml ${HADOOP_HOME}/etc/hadoop/core-site.xml @@ -59,4 +60,3 @@ runs: mkdir -p /tmp/hdfs echo ${OPENDAL_GCS_CREDENTIAL} | base64 -d > /tmp/hdfs/gcs-credentials.json -