Skip to content

Commit

Permalink
HIVE-11107 : Support for Performance regression test suite with TPCDS…
Browse files Browse the repository at this point in the history
… (Hari Subramaniyan, reviewed by Ashutosh Chauhan)
  • Loading branch information
hsubramaniyan committed Dec 15, 2015
1 parent e091bc2 commit 09b6f9a
Show file tree
Hide file tree
Showing 130 changed files with 19,403 additions and 3 deletions.
285 changes: 285 additions & 0 deletions data/conf/perf-reg/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<configuration>

<property>
<name>hive.in.test</name>
<value>true</value>
<description>Internal marker for test. Used for masking env-dependent values</description>
</property>

<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
<!-- that are implied by Hadoop setup variables. -->
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
<!-- resource). -->

<!-- Hive Execution Parameters -->
<property>
<name>hadoop.tmp.dir</name>
<value>${test.tmp.dir}/hadoop-tmp</value>
<description>A base for other temporary directories.</description>
</property>

<property>
<name>hive.tez.container.size</name>
<value>128</value>
<description></description>
</property>

<property>
<name>hive.merge.tezfiles</name>
<value>false</value>
<description>Merge small files at the end of a Tez DAG</description>
</property>

<property>
<name>hive.tez.input.format</name>
<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
<description>The default input format for tez. Tez groups splits in the AM.</description>
</property>

<property>
<name>hive.exec.scratchdir</name>
<value>${test.tmp.dir}/scratchdir</value>
<description>Scratch space for Hive jobs</description>
</property>

<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
</property>

<property>
<name>datanucleus.fixedDatastore</name>
<value>false</value>
</property>

<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>

<property>
<name>hive.exec.local.scratchdir</name>
<value>${test.tmp.dir}/localscratchdir/</value>
<description>Local scratch space for Hive jobs</description>
</property>

<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:derby:;databaseName=${test.tmp.dir}/junit_metastore_db;create=true</value>
</property>

<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
</property>

<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>APP</value>
</property>

<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>mine</value>
</property>

<property>
<!-- this should eventually be deprecated since the metastore should supply this -->
<name>hive.metastore.warehouse.dir</name>
<value>${test.warehouse.dir}</value>
<description></description>
</property>

<property>
<name>hive.metastore.metadb.dir</name>
<value>file://${test.tmp.dir}/metadb/</value>
<description>
Required by metastore server or if the uris argument below is not supplied
</description>
</property>

<property>
<name>test.log.dir</name>
<value>${test.tmp.dir}/log/</value>
<description></description>
</property>

<property>
<name>test.data.files</name>
<value>${hive.root}/data/files</value>
<description></description>
</property>

<property>
<name>test.data.scripts</name>
<value>${hive.root}/data/scripts</value>
<description></description>
</property>

<property>
<name>hive.jar.path</name>
<value>${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar</value>
<description></description>
</property>

<property>
<name>hive.metastore.rawstore.impl</name>
<value>org.apache.hadoop.hive.metastore.ObjectStore</value>
<description>Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database</description>
</property>

<property>
<name>hive.querylog.location</name>
<value>${test.tmp.dir}/tmp</value>
<description>Location of the structured hive logs</description>
</property>

<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
<description>Pre Execute Hook for Tests</description>
</property>

<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
<description>Post Execute Hook for Tests</description>
</property>

<property>
<name>hive.support.concurrency</name>
<value>false</value>
<description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>
</property>

<property>
<name>fs.pfile.impl</name>
<value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
<description>A proxy for local file system used for cross file system testing</description>
</property>

<property>
<name>hive.exec.mode.local.auto</name>
<value>false</value>
<description>
Let hive determine whether to run in local mode automatically
Disabling this for tests so that minimr is not affected
</description>
</property>

<property>
<name>hive.auto.convert.join</name>
<value>false</value>
<description>Whether Hive enable the optimization about converting common join into mapjoin based on the input file size</description>
</property>

<property>
<name>hive.ignore.mapjoin.hint</name>
<value>true</value>
<description>Whether Hive ignores the mapjoin hint</description>
</property>

<property>
<name>io.sort.mb</name>
<value>10</value>
</property>

<property>
<name>hive.input.format</name>
<value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
<description>The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. </description>
</property>

<property>
<name>hive.default.rcfile.serde</name>
<value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value>
<description>The default SerDe hive will use for the rcfile format</description>
</property>

<property>
<name>hive.stats.dbclass</name>
<value>fs</value>
<description>The default storatge that stores temporary hive statistics. Currently, fs type is supported</description>
</property>

<property>
<name>hive.execution.engine</name>
<value>tez</value>
<description>Whether to use MR or Tez</description>
</property>

<property>
<name>tez.am.node-blacklisting.enabled</name>
<value>false</value>
</property>

<property>
<name>hive.prewarm.enabled</name>
<value>true</value>
<description>
Enables container prewarm for tez (hadoop 2 only)
</description>
</property>

<property>
<name>hive.prewarm.numcontainers</name>
<value>3</value>
<description>
Controls the number of containers to prewarm for tez (hadoop 2 only)
</description>
</property>

<property>
<name>hive.in.tez.test</name>
<value>true</value>
<description>
Indicates that we are in tez testing mode.
</description>
</property>

<property>
<name>hive.tez.java.opts</name>
<value> -Dlog4j.configurationFile=tez-container-log4j2.properties -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
</property>

<property>
<name>tez.am.launch.cmd-opts</name>
<value> -Dlog4j.configurationFile=tez-container-log4j2.properties -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
</property>

<property>
<name>hive.metastore.fastpath</name>
<value>true</value>
</property>

<property>
<name>hive.metastore.rawstore.impl</name>
<value>org.apache.hadoop.hive.metastore.ObjectStore</value>
</property>

<property>
<name>hive.orc.splits.ms.footer.cache.enabled</name>
<value>true</value>
</property>

</configuration>
6 changes: 6 additions & 0 deletions data/conf/perf-reg/tez-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>tez.am.dag.scheduler.class</name>
<value>org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled</value>
</property>
</configuration>
102 changes: 102 additions & 0 deletions data/files/tpcds-perf/metastore_export/csv/TABLE_PARAMS.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
_store_,COLUMN_STATS_ACCURATE,true
_store_,numFiles,1
_store_,numRows,1704
_store_,rawDataSize,3256276
_store_,totalSize,101707
_store_,transient_lastDdlTime,143456_customer_demographics_
_call_center_,COLUMN_STATS_ACCURATE,true
_call_center_,numFiles,1
_call_center_,numRows,60
_call_center_,rawDataSize,122700
_call_center_,totalSize,10347
_call_center_,transient_lastDdlTime,1434561922
_catalog_page_,COLUMN_STATS_ACCURATE,true
_catalog_page_,numFiles,1
_catalog_page_,numRows,46000
_catalog_page_,rawDataSize,21198808
_catalog_page_,totalSize,1576662
_catalog_page_,transient_lastDdlTime,1434561925
_customer_,COLUMN_STATS_ACCURATE,true
_customer_,numFiles,538
_customer_,numRows,80000000
_customer_,rawDataSize,68801615852
_customer_,totalSize,3143935054
_customer_,transient_lastDdlTime,1434561966
_customer_address_,COLUMN_STATS_ACCURATE,true
_customer_address_,numFiles,274
_customer_address_,numRows,40000000
_customer_address_,rawDataSize,40595195284
_customer_address_,totalSize,530195843
_customer_address_,transient_lastDdlTime,1434561994
_customer_demographics_,COLUMN_STATS_ACCURATE,true
_customer_demographics_,numFiles,8
_customer_demographics_,numRows,_time_dim_800
_customer_demographics_,rawDataSize,717_income_band_59
_customer_demographics_,totalSize,323062
_customer_demographics_,transient_lastDdlTime,1434562071
_date_dim_,COLUMN_STATS_ACCURATE,true
_date_dim_,numFiles,1
_date_dim_,numRows,73049
_date_dim_,rawDataSize,81741831
_date_dim_,totalSize,362925
_date_dim_,transient_lastDdlTime,1434562075
_household_demographics_,COLUMN_STATS_ACCURATE,true
_household_demographics_,numFiles,1
_household_demographics_,numRows,7200
_household_demographics_,rawDataSize,770400
_household_demographics_,totalSize,901
_household_demographics_,transient_lastDdlTime,1434562078
_income_band_,COLUMN_STATS_ACCURATE,true
_income_band_,numFiles,1
_income_band_,numRows,20
_income_band_,rawDataSize,240
_income_band_,totalSize,399
_income_band_,transient_lastDdlTime,1434562081
_item_,COLUMN_STATS_ACCURATE,true
_item_,numFiles,9
_item_,numRows,462000
_item_,rawDataSize,663560457
_item_,totalSize,29760748
_item_,transient_lastDdlTime,1434562091
_promotion_,COLUMN_STATS_ACCURATE,true
_promotion_,numFiles,1
_promotion_,numRows,2300
_promotion_,rawDataSize,2713420
_promotion_,totalSize,63964
_promotion_,transient_lastDdlTime,1434562093
_reason_,COLUMN_STATS_ACCURATE,true
_reason_,numFiles,1
_reason_,numRows,72
_reason_,rawDataSize,14400
_reason_,totalSize,1024
_reason_,transient_lastDdlTime,1434562095
_ship_mode_,COLUMN_STATS_ACCURATE,true
_ship_mode_,numFiles,0
_ship_mode_,numRows,0
_ship_mode_,rawDataSize,0
_ship_mode_,totalSize,0
_ship_mode_,transient_lastDdlTime,1434562097
_time_dim_,COLUMN_STATS_ACCURATE,true
_time_dim_,numFiles,1
_time_dim_,numRows,86400
_time_dim_,rawDataSize,40694400
_time_dim_,totalSize,133902
_time_dim_,transient_lastDdlTime,1434562099
_warehouse_,COLUMN_STATS_ACCURATE,true
_warehouse_,numFiles,1
_warehouse_,numRows,27
_warehouse_,rawDataSize,27802
_warehouse_,totalSize,2971
_warehouse_,transient_lastDdlTime,1434562102
_web_page_,COLUMN_STATS_ACCURATE,true
_web_page_,numFiles,1
_web_page_,numRows,4602
_web_page_,rawDataSize,2696178
_web_page_,totalSize,50572
_web_page_,transient_lastDdlTime,1434562104
_web_site_,COLUMN_STATS_ACCURATE,true
_web_site_,numFiles,1
_web_site_,numRows,84
_web_site_,rawDataSize,155408
_web_site_,totalSize,11271
_web_site_,transient_lastDdlTime,1434562107
Loading

0 comments on commit 09b6f9a

Please sign in to comment.