Introduce templated benchmarks. (#301)

This PR introduces templated benchmarks for BenchBase. In essence, users can define their own query templates that are parametrized with one or more values at runtime. If multiple parameter combinations are possible, we will continue to cycle through all available combinations. A templated benchmark has the following structure: ```xml <templates> <template name="$QueryTemplateName"> <query>$SQLQuery</query> <types> <type>$ParameterType1</type> <type>$ParameterType2</type> </types> <values> <value>$ParameterValueA1</value> <value>$ParameterValueA2</value> ... </values> <values> <value>$ParameterValueB1</value> <value>$ParameterValueB2</value> ... </values> </template> ... <templates> ``` where `$ParameterType` is the integer `java.sql.Types` value (i.e., `INTEGER`, `VARCHAR`, etc.) and each value tag within `values` contains the values for one instantiation of the parameters set in `$SQLQuery`. The SQL query string is read as a `PreparedStatement`, i.e., parameters are defined in the string via a `?` placeholder. An example for a templated benchmark can be found in `data/templated/example.xml`. The file path for the XML template has to be defined in the workload configuration using the `query_templates_file` tag. An example configuration can be found in `config/sqlserver/sample_template_config.xml`. The example can be executed if a loaded TPC-C instance is used as JDBC endpoint. Templated benchmarks are instantiated using `templated` as benchmark class when running BenchBase via the command line.
cmu-db · Oct 2, 2023 · 611f3d4 · 611f3d4
1 parent 109064e
commit 611f3d4
Show file tree

Hide file tree

Showing 33 changed files with 1,775 additions and 43 deletions.
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
@@ -89,7 +89,7 @@ jobs:
       fail-fast: false
       matrix:
         # BROKEN: tpch
-        benchmark: [ 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     steps:
       - name: Download artifact
         uses: actions/download-artifact@v3
@@ -112,7 +112,16 @@ jobs:
 
       - name: Run benchmark
         run: |
-          java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlite/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
+          # For templated benchmarks, we need to preload some data for the test since by design, templated benchmarks do not support the 'load' operation
+          # In this case, we load the tpcc data.
+          if [[ ${{matrix.benchmark}} == templated ]]; then
+            # Disable synchronous mode for sqlite tpcc data loading to save some time.
+            java -jar benchbase.jar -b tpcc -c config/sqlite/sample_tpcc_nosync_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            # Run the templated benchmark.
+            java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlite/sample_${{matrix.benchmark}}_config.xml --create=false --load=false --execute=true --json-histograms results/histograms.json
+          else
+            java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlite/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
+          fi
           # FIXME: Reduce the error rate so we don't need these overrides.
           if [ ${{matrix.benchmark}} == auctionmark ]; then
               ERRORS_THRESHOLD=0.02
@@ -134,7 +143,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     services:
       mariadb: # https://hub.docker.com/_/mariadb
         image: mariadb:latest
@@ -176,7 +185,16 @@ jobs:
           MARIADB_PORT: ${{ job.services.mariadb.ports[3306] }}
         run: |
           mysql -h127.0.0.1 -P$MARIADB_PORT -uadmin -ppassword -e "DROP DATABASE IF EXISTS benchbase; CREATE DATABASE benchbase"
-          java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mariadb/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
+
+          # For templated benchmarks, we need to preload some data for the test since by design, templated benchmarks do not support the 'load' operation
+          # In this case, we load the tpcc data.
+          if [[ ${{matrix.benchmark}} == templated ]]; then
+            java -jar benchbase.jar -b tpcc -c config/mariadb/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mariadb/sample_${{matrix.benchmark}}_config.xml --create=false --load=false --execute=true --json-histograms results/histograms.json
+          else
+            java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mariadb/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
+          fi
+
           # FIXME: Reduce the error rate so we don't need these overrides.
           if [ ${{matrix.benchmark}} == auctionmark ]; then
               ERRORS_THRESHOLD=0.02
@@ -194,7 +212,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     services:
       mysql: # https://hub.docker.com/_/mysql
         image: mysql:latest
@@ -235,7 +253,16 @@ jobs:
           MYSQL_PORT: ${{ job.services.mysql.ports[3306] }}
         run: |
           mysql -h127.0.0.1 -P$MYSQL_PORT -uadmin -ppassword -e "DROP DATABASE IF EXISTS benchbase; CREATE DATABASE benchbase"
-          java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mysql/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
+
+          # For templated benchmarks, we need to preload some data for the test since by design, templated benchmarks do not support the 'load' operation
+          # In this case, we load the tpcc data.
+          if [[ ${{matrix.benchmark}} == templated ]]; then
+            java -jar benchbase.jar -b tpcc -c config/mysql/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mysql/sample_${{matrix.benchmark}}_config.xml --create=false --load=false --execute=true --json-histograms results/histograms.json
+          else
+            java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mysql/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
+          fi
+
           # FIXME: Reduce the error rate so we don't need these overrides.
           if [ ${{matrix.benchmark}} == auctionmark ]; then
               ERRORS_THRESHOLD=0.02
@@ -253,7 +280,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     services:
       postgres: # https://hub.docker.com/_/postgres
         image: postgres:latest
@@ -292,7 +319,16 @@ jobs:
         run: |
           PGPASSWORD=password dropdb -h localhost -U admin benchbase --if-exists
           PGPASSWORD=password createdb -h localhost -U admin benchbase
-          java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/postgres/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
+
+          # For templated benchmarks, we need to preload some data for the test since by design, templated benchmarks do not support the 'load' operation
+          # In this case, we load the tpcc data.
+          if [[ ${{matrix.benchmark}} == templated ]]; then
+            java -jar benchbase.jar -b tpcc -c config/postgres/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/postgres/sample_${{matrix.benchmark}}_config.xml --create=false --load=false --execute=true --json-histograms results/histograms.json
+          else
+            java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/postgres/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
+          fi
+
           # FIXME: Reduce the error rate so we don't need these overrides.
           if [ ${{matrix.benchmark}} == auctionmark ]; then
               ERRORS_THRESHOLD=0.02
@@ -362,7 +398,7 @@ jobs:
       matrix:
         # TODO: add more benchmarks
         #benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
-        benchmark: [ 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'sibench', 'smallbank', 'tatp', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'sibench', 'smallbank', 'tatp', 'tpcc', 'templated', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     services:
       sqlserver:
         image: mcr.microsoft.com/mssql/server:latest
@@ -423,7 +459,15 @@ jobs:
       - name: Run benchmark
         # Note: user/pass should match those used in sample configs.
         run: |
-          java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlserver/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
+          # For templated benchmarks, we need to preload some data for the test since by design, templated benchmarks do not support the 'load' operation
+          # In this case, we load the tpcc data.
+          if [[ ${{matrix.benchmark}} == templated ]]; then
+            java -jar benchbase.jar -b tpcc -c config/sqlserver/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlserver/sample_${{matrix.benchmark}}_config.xml --create=false --load=false --execute=true --json-histograms results/histograms.json
+          else
+            java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlserver/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
+          fi
+
           # FIXME: Reduce the error rate so we don't need these overrides.
           if [ ${{matrix.benchmark}} == tatp ]; then
               ERRORS_THRESHOLD=0.05

diff --git a/config/mariadb/sample_templated_config.xml b/config/mariadb/sample_templated_config.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+<parameters>
+
+    <!-- Connection details -->
+    <type>MARIADB</type>
+    <driver>org.mariadb.jdbc.Driver</driver>
+    <url>jdbc:mariadb://localhost:3306/benchbase?useServerPrepStmts</url>
+    <username>admin</username>
+    <password>password</password>
+    <isolation>TRANSACTION_SERIALIZABLE</isolation>
+    <batchsize>128</batchsize>
+
+    <!-- Note: these example template reuses TPC-C queries and data as a demonstration, but others are possible.
+      To test this sample as is, you must first run benchbase with parameters `load=true` and
+      `config=sample_tpcc_config.xml` to load the TPC-C data to the target database.
+      Other templated benchmarks need to ensure that the data is loaded manually prior to running the benchmark.
+    -->
+
+    <query_templates_file>data/templated/example.xml</query_templates_file>
+
+    <!-- The workload -->
+    <terminals>1</terminals>
+    <works>
+        <work>
+            <time>10</time>
+            <rate>100</rate>
+            <weights>30,20,10,30,10</weights>
+        </work>
+    </works>
+
+    <!-- Select all templates that are executed. -->
+    <transactiontypes>
+        <transactiontype>
+            <name>GetOrder</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetCust</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetCustNull</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetWarehouse</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetItemByPrice</name>
+        </transactiontype>
+    </transactiontypes>
+</parameters>
diff --git a/config/mysql/sample_templated_config.xml b/config/mysql/sample_templated_config.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+<parameters>
+
+    <!-- Connection details -->
+    <type>MYSQL</type>
+    <driver>com.mysql.cj.jdbc.Driver</driver>
+    <url>jdbc:mysql://localhost:3306/benchbase?rewriteBatchedStatements=true&amp;allowPublicKeyRetrieval=True&amp;sslMode=DISABLED</url>
+    <username>admin</username>
+    <password>password</password>
+    <isolation>TRANSACTION_SERIALIZABLE</isolation>
+    <batchsize>128</batchsize>
+
+    <!-- Note: these example template reuses TPC-C queries and data as a demonstration, but others are possible.
+      To test this sample as is, you must first run benchbase with parameters `load=true` and
+      `config=sample_tpcc_config.xml` to load the TPC-C data to the target database.
+      Other templated benchmarks need to ensure that the data is loaded manually prior to running the benchmark.
+    -->
+
+    <query_templates_file>data/templated/example.xml</query_templates_file>
+
+    <!-- The workload -->
+    <terminals>1</terminals>
+    <works>
+        <work>
+            <time>10</time>
+            <rate>100</rate>
+            <weights>30,20,10,30,10</weights>
+        </work>
+    </works>
+
+    <!-- Select all templates that are executed. -->
+    <transactiontypes>
+        <transactiontype>
+            <name>GetOrder</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetCust</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetCustNull</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetWarehouse</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetItemByPrice</name>
+        </transactiontype>
+    </transactiontypes>
+</parameters>
diff --git a/config/plugin.xml b/config/plugin.xml
@@ -17,4 +17,5 @@
     <plugin name="smallbank">com.oltpbenchmark.benchmarks.smallbank.SmallBankBenchmark</plugin>
     <plugin name="hyadapt">com.oltpbenchmark.benchmarks.hyadapt.HYADAPTBenchmark</plugin>
     <plugin name="otmetrics">com.oltpbenchmark.benchmarks.otmetrics.OTMetricsBenchmark</plugin>
+    <plugin name="templated">com.oltpbenchmark.benchmarks.templated.TemplatedBenchmark</plugin>
 </plugins>
diff --git a/config/postgres/sample_templated_config.xml b/config/postgres/sample_templated_config.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+<parameters>
+
+    <!-- Connection details -->
+    <type>POSTGRES</type>
+    <driver>org.postgresql.Driver</driver>
+    <url>jdbc:postgresql://localhost:5432/benchbase?sslmode=disable&amp;ApplicationName=templated&amp;reWriteBatchedInserts=true</url>
+    <username>admin</username>
+    <password>password</password>
+    <isolation>TRANSACTION_SERIALIZABLE</isolation>
+    <batchsize>128</batchsize>
+
+    <!-- Note: these example template reuses TPC-C queries and data as a demonstration, but others are possible.
+      To test this sample as is, you must first run benchbase with parameters `load=true` and
+      `config=sample_tpcc_config.xml` to load the TPC-C data to the target database.
+      Other templated benchmarks need to ensure that the data is loaded manually prior to running the benchmark.
+    -->
+
+    <query_templates_file>data/templated/example.xml</query_templates_file>
+
+    <!-- The workload -->
+    <terminals>1</terminals>
+    <works>
+        <work>
+            <time>10</time>
+            <rate>100</rate>
+            <weights>30,20,10,30,10</weights>
+        </work>
+    </works>
+
+    <!-- Select all templates that are executed. -->
+    <transactiontypes>
+        <transactiontype>
+            <name>GetOrder</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetCust</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetCustNull</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetWarehouse</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetItemByPrice</name>
+        </transactiontype>
+    </transactiontypes>
+</parameters>
diff --git a/config/sqlite/sample_templated_config.xml b/config/sqlite/sample_templated_config.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0"?>
+<parameters>
+
+    <!-- Connection details -->
+    <type>SQLITE</type>
+    <driver>org.sqlite.JDBC</driver>
+    <url>jdbc:sqlite:tpcc.db</url>
+    <isolation>TRANSACTION_SERIALIZABLE</isolation>
+    <batchsize>128</batchsize>
+
+    <!-- Note: these example template reuses TPC-C queries and data as a demonstration, but others are possible.
+      To test this sample as is, you must first run benchbase with parameters `load=true` and
+      `config=sample_tpcc_config.xml` to load the TPC-C data to the target database.
+      Other templated benchmarks need to ensure that the data is loaded manually prior to running the benchmark.
+    -->
+
+    <query_templates_file>data/templated/example.xml</query_templates_file>
+
+    <!-- The workload -->
+    <terminals>1</terminals>
+    <works>
+        <work>
+            <time>10</time>
+            <rate>100</rate>
+            <weights>30,20,10,30,10</weights>
+        </work>
+    </works>
+
+    <!-- Select all templates that are executed. -->
+    <transactiontypes>
+        <transactiontype>
+            <name>GetOrder</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetCust</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetCustNull</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetWarehouse</name>
+        </transactiontype>
+        <transactiontype>
+            <name>GetItemByPrice</name>
+        </transactiontype>
+    </transactiontypes>
+</parameters>
diff --git a/config/sqlite/sample_tpcc_config.xml b/config/sqlite/sample_tpcc_config.xml
@@ -10,7 +10,7 @@
 
     <!-- Scale factor is the number of warehouses in TPCC -->
     <scalefactor>1</scalefactor>
-    
+
     <!-- SQLITE only supports one writer thread -->
     <loaderThreads>1</loaderThreads>