-
Notifications
You must be signed in to change notification settings - Fork 34
43 lines (39 loc) · 1.19 KB
/
generate-tpch-s3.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
name: "Generate TPC-H files to S3"
on:
workflow_dispatch: {}
schedule:
# 5AM UTC
- cron: "0 5 * * *"
concurrency:
group: ${{ github.workflow }}
permissions:
actions: write
id-token: write
jobs:
generate_files:
name: Generate S3 TPCH files
runs-on:
- runs-on=${{ github.run_id }}
- family=m7i.2xlarge
- image=ubuntu24-full-x64
- spot=false
env:
REMOTE_PATH: s3://vortex-bench-dev-eu/tpch-sf1/
TMPDIR: /work
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Generate files locally
shell: bash
run: |
# We run each query once to make sure we don't upload a file if there's a bug that causes a panic.
cargo run --release --bin tpch -- --formats parquet,vortex -i1
aws s3 cp --recursive bench-vortex/data/tpch/1 $REMOTE_PATH
# We can now clear generated data
rm -rf bench-vortex/data/