From 9c1e27384b60cc0f9505879045a28c43c0a3b8f5 Mon Sep 17 00:00:00 2001 From: Armando Zhu Date: Mon, 5 Feb 2024 03:27:39 +0000 Subject: [PATCH] ci: Add test case for hdfs over gcs bucket (#3504) --- .github/workflows/service_test_hdfs.yml | 38 ++++++++++++++++++++ core/src/services/hdfs/docs.md | 1 + fixtures/hdfs/gcs-core-site.xml | 48 +++++++++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 fixtures/hdfs/gcs-core-site.xml diff --git a/.github/workflows/service_test_hdfs.yml b/.github/workflows/service_test_hdfs.yml index 1e1dae9f7e6..ce05b65f343 100644 --- a/.github/workflows/service_test_hdfs.yml +++ b/.github/workflows/service_test_hdfs.yml @@ -173,3 +173,41 @@ jobs: OPENDAL_HDFS_ATOMIC_WRITE_DIR: /tmp/atomic_write_dir/opendal/ OPENDAL_HDFS_NAME_NODE: default OPENDAL_HDFS_ENABLE_APPEND: false + + hdfs-default-gcs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust toolchain + uses: ./.github/actions/setup + with: + need-nextest: true + + - name: Setup java env + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "11" + - name: Setup hadoop env + shell: bash + run: | + curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner + + - name: Test + shell: bash + working-directory: core + run: | + curl -LsSf -o ${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-hadoop2-2.2.19-shaded.jar https://github.com/GoogleCloudDataproc/hadoop-connectors/releases/download/v2.2.19/gcs-connector-hadoop3-2.2.19-shaded.jar + export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) + export LD_LIBRARY_PATH=${{ env.JAVA_HOME }}/lib/server:${{ env.HADOOP_HOME }}/lib/native + cp ${{ github.workspace }}/fixtures/hdfs/hdfs-site.xml ${{ env.HADOOP_HOME }}/etc/hadoop/hdfs-site.xml + cp ${{ github.workspace }}/fixtures/hdfs/gcs-core-site.xml ${{ env.HADOOP_HOME }}/etc/hadoop/core-site.xml + + cargo test behavior --features tests,services-hdfs + env: + HADOOP_HOME: "/home/runner/hadoop-3.3.5" + OPENDAL_TEST: hdfs + OPENDAL_HDFS_ROOT: /tmp/opendal/ + OPENDAL_HDFS_NAME_NODE: gs://bucket_name + OPENDAL_HDFS_ENABLE_APPEND: false \ No newline at end of file diff --git a/core/src/services/hdfs/docs.md b/core/src/services/hdfs/docs.md index c9289159f5b..7e4d8821128 100644 --- a/core/src/services/hdfs/docs.md +++ b/core/src/services/hdfs/docs.md @@ -121,6 +121,7 @@ async fn main() -> Result<()> { // Create fs backend builder. let mut builder = Hdfs::default(); // Set the name node for hdfs. + // If the string starts with a protocol type such as file://, hdfs://, or gs://, this protocol type will be used. builder.name_node("hdfs://127.0.0.1:9000"); // Set the root for hdfs, all operations will happen under this root. // diff --git a/fixtures/hdfs/gcs-core-site.xml b/fixtures/hdfs/gcs-core-site.xml new file mode 100644 index 00000000000..18c4516d601 --- /dev/null +++ b/fixtures/hdfs/gcs-core-site.xml @@ -0,0 +1,48 @@ + + + + + + + + + fs.AbstractFileSystem.gs.impl + com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS + The AbstractFileSystem for 'gs:' URIs. + + + fs.gs.project.id + + + Optional. Google Cloud Project ID with access to GCS buckets. + Required only for list buckets and create bucket operations. + + + + google.cloud.auth.type + SERVICE_ACCOUNT_JSON_KEYFILE + + Authentication type to use for GCS access. + + + + google.cloud.auth.service.account.json.keyfile + /path/to/json_key_file + + The JSON keyfile of the service account used for GCS + access when google.cloud.auth.type is SERVICE_ACCOUNT_JSON_KEYFILE. + + +