Skip to content

Commit

Permalink
branch-3.0: [opt](iceberg docker)Use PostgreSQL as the backend for th…
Browse files Browse the repository at this point in the history
…e Iceberg REST server. apache#46289 (apache#46575)

Cherry-picked from apache#46289

Co-authored-by: wuwenchi <[email protected]>
  • Loading branch information
github-actions[bot] and wuwenchi authored Jan 10, 2025
1 parent c095b83 commit f3e570a
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 18 deletions.
37 changes: 19 additions & 18 deletions docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,25 @@ start-worker.sh spark://doris--spark-iceberg:7077
start-history-server.sh
start-thriftserver.sh --driver-java-options "-Dderby.system.home=/tmp/derby"



ls /mnt/scripts/create_preinstalled_scripts/iceberg/*.sql | xargs -n 1 -I {} bash -c '
START_TIME=$(date +%s)
spark-sql --master spark://doris--spark-iceberg:7077 --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions -f {}
END_TIME=$(date +%s)
EXECUTION_TIME=$((END_TIME - START_TIME))
echo "Script: {} executed in $EXECUTION_TIME seconds"
'

ls /mnt/scripts/create_preinstalled_scripts/paimon/*.sql | xargs -n 1 -I {} bash -c '
START_TIME=$(date +%s)
spark-sql --master spark://doris--spark-iceberg:7077 --conf spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions -f {}
END_TIME=$(date +%s)
EXECUTION_TIME=$((END_TIME - START_TIME))
echo "Script: {} executed in $EXECUTION_TIME seconds"
'

# The creation of a Spark SQL client is time-consuming,
# and reopening a new client for each SQL file execution leads to significant overhead.
# To reduce the time spent on creating clients,
# we group these files together and execute them using a single client.
# This approach can reduce the time from 150s to 40s.

START_TIME1=$(date +%s)
find /mnt/scripts/create_preinstalled_scripts/iceberg -name '*.sql' | sed 's|^|source |' | sed 's|$|;|'> iceberg_total.sql
spark-sql --master spark://doris--spark-iceberg:7077 --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions -f iceberg_total.sql
END_TIME1=$(date +%s)
EXECUTION_TIME1=$((END_TIME1 - START_TIME1))
echo "Script iceberg total: {} executed in $EXECUTION_TIME1 seconds"

START_TIME2=$(date +%s)
find /mnt/scripts/create_preinstalled_scripts/paimon -name '*.sql' | sed 's|^|source |' | sed 's|$|;|'> paimon_total.sql
spark-sql --master spark://doris--spark-iceberg:7077 --conf spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions -f paimon_total.sql
END_TIME2=$(date +%s)
EXECUTION_TIME2=$((END_TIME2 - START_TIME2))
echo "Script paimon total: {} executed in $EXECUTION_TIME2 seconds"

touch /mnt/SUCCESS;

Expand Down
19 changes: 19 additions & 0 deletions docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
version: "3"

services:

spark-iceberg:
image: tabulario/spark-iceberg
container_name: doris--spark-iceberg
Expand Down Expand Up @@ -47,20 +48,38 @@ services:
interval: 5s
timeout: 120s
retries: 120

postgres:
image: postgis/postgis:14-3.3
container_name: doris--postgres
environment:
POSTGRES_PASSWORD: 123456
POSTGRES_USER: root
POSTGRES_DB: iceberg
volumes:
- ./data/input/pgdata:/var/lib/postgresql/data
networks:
- doris--iceberg

rest:
image: tabulario/iceberg-rest
container_name: doris--iceberg-rest
ports:
- ${REST_CATALOG_PORT}:8181
volumes:
- ./data:/mnt/data
depends_on:
- postgres
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- CATALOG_WAREHOUSE=s3a://warehouse/wh/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000
- CATALOG_URI=jdbc:postgresql://postgres:5432/iceberg
- CATALOG_JDBC_USER=root
- CATALOG_JDBC_PASSWORD=123456
networks:
- doris--iceberg
entrypoint: /bin/bash /mnt/data/input/script/rest_init.sh
Expand Down

0 comments on commit f3e570a

Please sign in to comment.