diff --git a/datafusion-partitioned/benchmark.sh b/datafusion-partitioned/benchmark.sh index e236dbbd2..6226127fc 100755 --- a/datafusion-partitioned/benchmark.sh +++ b/datafusion-partitioned/benchmark.sh @@ -3,4 +3,7 @@ export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-partitioned" export BENCH_DURABLE=yes export BENCH_RESTARTABLE=no +# skip concurrent_qps tests by default as datafusion-cli is configured +# for single user/single process usage +export BENCH_CONCURRENT_DURATION="${BENCH_CONCURRENT_DURATION:-0}" exec ../lib/benchmark-common.sh diff --git a/datafusion-partitioned/load b/datafusion-partitioned/load index 275c2c6ae..357df066a 100755 --- a/datafusion-partitioned/load +++ b/datafusion-partitioned/load @@ -1,9 +1,14 @@ #!/bin/bash # datafusion queries the parquet files via an external table at LOCATION # 'partitioned' (see create.sql). The shared bench_download fetches the -# parquet files into CWD; move them into the expected subdir. +# parquet files into CWD; hard-link them into the expected subdir. +# +# Note: don't move them to avoid re-downloading each time set -e mkdir -p partitioned -mv hits_*.parquet partitioned/ 2>/dev/null || true +for f in hits_*.parquet; do + ln -f "$f" "partitioned/$f" +done + sync diff --git a/datafusion/README.md b/datafusion/README.md index b7f6b5dd4..3a4de18ef 100644 --- a/datafusion/README.md +++ b/datafusion/README.md @@ -25,7 +25,7 @@ All with no EBS optimization and no instance store. 2. Wait for the status checks to pass, then ssh to EC2: `ssh ubuntu@{ip}` 3. `git clone https://github.com/ClickHouse/ClickBench` 4. `cd ClickBench/datafusion` -5. `vi benchmark.sh` and modify the following line to target the DataFusion version +5. `vi install` and modify the following line to target the DataFusion version ```bash git checkout 53.1.0 diff --git a/datafusion/benchmark.sh b/datafusion/benchmark.sh index 617422ddc..6df1aea0a 100755 --- a/datafusion/benchmark.sh +++ b/datafusion/benchmark.sh @@ -3,4 +3,7 @@ export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-single" export BENCH_DURABLE=yes export BENCH_RESTARTABLE=no +# skip concurrent_qps tests by default as datafusion-cli is configured +# for single user/single process usage +export BENCH_CONCURRENT_DURATION="${BENCH_CONCURRENT_DURATION:-0}" exec ../lib/benchmark-common.sh