From 4a46ddf22df26a6a26f7a2152719dd4e26216f8b Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Tue, 28 Nov 2023 19:11:37 -0800 Subject: [PATCH] Add notebook for using qualification user tools in Databricks (#334) * Add notebook for using qualification user tools Signed-off-by: Partho Sarthi * Rebase branch and fix env setup Signed-off-by: Partho Sarthi --------- Signed-off-by: Partho Sarthi --- ...fication User Tool Notebook Template.ipynb | 2252 +++++++++++++++++ 1 file changed, 2252 insertions(+) create mode 100644 tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template.ipynb diff --git a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template.ipynb b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template.ipynb new file mode 100644 index 000000000..4f1519235 --- /dev/null +++ b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template.ipynb @@ -0,0 +1,2252 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "df33c614-2ecc-47a0-8600-bc891681997f", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Welcome to the Qualification User Tool for the RAPIDS Accelerator for Apache Spark\n", + "To run the user tool, you need to enter a log path that represents the DBFS location for your Spark CPU event logs. Then you can select \"Run all\" to execute the notebook. After the notebook completes, you will see various output tables show up below. More options for running the qualification user tool can be found here: https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#running-the-qualification-tool-standalone-for-csp-environments-on-spark-event-logs.\n", + "\n", + "## Summary Output\n", + "The report represents the entire app execution, including unsupported operators and non-SQL operations. By default, the applications and queries are sorted in descending order by the following fields:\n", + "- Recommendation;\n", + "- Estimated GPU Speed-up;\n", + "- Estimated GPU Time Saved; and\n", + "- End Time.\n", + "\n", + "## Stages Output\n", + "For each stage used in SQL operations, the Qualification tool generates the following information:\n", + "1. App ID\n", + "1. Stage ID\n", + "1. Average Speedup Factor: the average estimated speed-up of all the operators in the given stage.\n", + "1. Stage Task Duration: amount of time spent in tasks of SQL Dataframe operations for the given stage.\n", + "1. Unsupported Task Duration: sum of task durations for the unsupported operators. For more details, see Supported Operators.\n", + "1. Stage Estimated: True or False indicates if we had to estimate the stage duration.\n", + "\n", + "## Execs Output\n", + "The Qualification tool generates a report of the “Exec” in the “SparkPlan” or “Executor Nodes” along with the estimated acceleration on the GPU. Please refer to the Supported Operators guide for more details on limitations on UDFs and unsupported operators.\n", + "1. App ID\n", + "1. SQL ID\n", + "1. Exec Name: example Filter, HashAggregate\n", + "1. Expression Name\n", + "1. Task Speedup Factor: it is simply the average acceleration of the operators based on the original CPU duration of the operator divided by the GPU duration. The tool uses historical queries and benchmarks to estimate a speed-up at an individual operator level to calculate how much a specific operator would accelerate on GPU.\n", + "1. Exec Duration: wall-Clock time measured since the operator starts till it is completed.\n", + "1. SQL Node Id\n", + "1. Exec Is Supported: whether the Exec is supported by RAPIDS or not. Please refer to the Supported Operators section.\n", + "1. Exec Stages: an array of stage IDs\n", + "1. Exec Children\n", + "1. Exec Children Node Ids\n", + "1. Exec Should Remove: whether the Op is removed from the migrated plan." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "811645aa-ec72-4bd9-8b79-fb1cadf768f8", + "showTitle": true, + "title": "Environment Setup" + }, + "jupyter": { + "source_hidden": true + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "Setting up the virtual environment 'databricks_venv'.\nSpark Rapids User Tools installed successfully.\n" + ] + } + ], + "source": [ + "%sh\n", + "VENV=\"databricks_venv\"\n", + "echo \"Setting up the virtual environment '$VENV'.\"\n", + "(apt update && \\\n", + "apt install -y python3-venv && \\\n", + "python3 -m venv $VENV && \\\n", + "source $VENV/bin/activate && \\\n", + "echo \"Installing Spark Rapids User Tools\"\n", + "pip install spark-rapids-user-tools) > /dev/null 2>&1 || \\\n", + "{ echo \"Error: Failed to install Spark Rapids User Tools\"; exit 1; }\n", + "echo \"Spark Rapids User Tools installed successfully.\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "acf401a3-12d3-4236-a6c5-8fe8990b153a", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "dbutils.widgets.text(\"log_path\", \"\")\n", + "eventlog_string=dbutils.widgets.get(\"log_path\")\n", + "\n", + "dbutils.widgets.text(\"output_path\", \"\")\n", + "outputpath_string=dbutils.widgets.get(\"output_path\")\n", + "\n", + "dbutils.widgets.dropdown(\"csp\", \"aws\", [\"aws\", \"azure\"])\n", + "csp_string=dbutils.widgets.get(\"csp\")\n", + "\n", + "os.environ[\"EVENTLOG_PATH\"] = eventlog_string\n", + "os.environ[\"OUTPUT_PATH\"] = outputpath_string\n", + "os.environ[\"PLATFORM\"] = f\"databricks-{csp_string}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "693b5ee0-7500-43f3-b3e2-717fd5468aa8", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sh\n", + "source databricks_venv/bin/activate\n", + "spark_rapids_user_tools $PLATFORM qualification --eventlogs $EVENTLOG_PATH --local_folder $OUTPUT_PATH --verbose &> $OUTPUT_PATH/qual_debug.log" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "05f96ca1-1b08-494c-a12b-7e6cc3dcc546", + "showTitle": true, + "title": "Parse Output" + }, + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "log_path = os.path.join(outputpath_string, \"qual_debug.log\")\n", + "\n", + "try:\n", + " with open(log_path, 'r') as file:\n", + " output_folder = next((line.split(\":\", 1)[1].strip() for line in file if line.startswith(\"Qualification tool output: \")), None)\n", + " if output_folder is None:\n", + " raise ValueError(f\"Cannot find output folder. See logs: {log_path}\")\n", + "except FileNotFoundError:\n", + " print(f\"File not found: {log_path}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "bbe50fde-0bd6-4281-95fd-6a1ec6f17ab2", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## Summary Output" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "fb8edb26-e173-47ff-92a1-463baec7c06b", + "showTitle": false, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
App NameApp IDRecommendationEstimated GPU SpeedupEstimated GPU DurationEstimated GPU Time SavedSQL DF DurationSQL Dataframe Task DurationApp DurationGPU OpportunityExecutor CPU Time PercentSQL Ids with FailuresUnsupported Read File Formats and TypesUnsupported Write Data FormatComplex TypesNested Complex TypesPotential ProblemsLongest SQL DurationNONSQL Task Duration Plus OverheadUnsupported Task DurationSupported SQL DF Task DurationTask Speedup FactorApp Duration EstimatedUnsupported ExecsUnsupported ExpressionsEstimated Job Frequency (monthly)
TPC-DS Like Bench q1app-20220209224147-0004Recommended1.422476.589130.41134177598550316071341737.09nullnullnullnullnullnull134123012015075985503.13falseExecute CreateViewCommandnull30
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "TPC-DS Like Bench q1", + "app-20220209224147-0004", + "Recommended", + 1.4, + 22476.58, + 9130.41, + 13417, + 7598550, + 31607, + 13417, + 37.09, + null, + null, + null, + null, + null, + null, + 13412, + 3012015, + 0, + 7598550, + 3.13, + false, + "Execute CreateViewCommand", + null, + 30 + ] + ], + "datasetInfos": [], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": {}, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "App Name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "App ID", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "Recommendation", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "Estimated GPU Speedup", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Estimated GPU Duration", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Estimated GPU Time Saved", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "SQL DF Duration", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "SQL Dataframe Task Duration", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "App Duration", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "GPU Opportunity", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "Executor CPU Time Percent", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "SQL Ids with Failures", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Unsupported Read File Formats and Types", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Unsupported Write Data Format", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Complex Types", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Nested Complex Types", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Potential Problems", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Longest SQL Duration", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "NONSQL Task Duration Plus Overhead", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "Unsupported Task Duration", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "Supported SQL DF Task Duration", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "Task Speedup Factor", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "App Duration Estimated", + "type": "\"boolean\"" + }, + { + "metadata": "{}", + "name": "Unsupported Execs", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "Unsupported Expressions", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Estimated Job Frequency (monthly)", + "type": "\"long\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "summary_output=pd.read_csv(os.path.join(output_folder, \"rapids_4_spark_qualification_output.csv\"))\n", + "display(summary_output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6756159b-30ca-407a-ab6b-9c29ced01ea6", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## Stages Output" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "cdde6177-db5f-434a-995b-776678a64a3a", + "showTitle": false, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
App IDStage IDAverage Speedup FactorStage Task DurationUnsupported Task DurationStage EstimatedNumber of transitions from or to GPU
app-20220209224147-0004374.35953090false0
app-20220209224147-0004322.679690false0
app-20220209224147-0004363.0216310760false0
app-20220209224147-0004394.883790290false0
app-20220209224147-0004332.6911413110false0
app-20220209224147-0004402.452180false0
app-20220209224147-0004312.679140false0
app-20220209224147-0004342.6926254370false0
app-20220209224147-0004352.9311666720false0
app-20220209224147-0004382.99576150false0
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "app-20220209224147-0004", + 37, + 4.3, + 595309, + 0, + false, + 0 + ], + [ + "app-20220209224147-0004", + 32, + 2.67, + 969, + 0, + false, + 0 + ], + [ + "app-20220209224147-0004", + 36, + 3.02, + 1631076, + 0, + false, + 0 + ], + [ + "app-20220209224147-0004", + 39, + 4.88, + 379029, + 0, + false, + 0 + ], + [ + "app-20220209224147-0004", + 33, + 2.69, + 1141311, + 0, + false, + 0 + ], + [ + "app-20220209224147-0004", + 40, + 2.45, + 218, + 0, + false, + 0 + ], + [ + "app-20220209224147-0004", + 31, + 2.67, + 914, + 0, + false, + 0 + ], + [ + "app-20220209224147-0004", + 34, + 2.69, + 2625437, + 0, + false, + 0 + ], + [ + "app-20220209224147-0004", + 35, + 2.93, + 1166672, + 0, + false, + 0 + ], + [ + "app-20220209224147-0004", + 38, + 2.99, + 57615, + 0, + false, + 0 + ] + ], + "datasetInfos": [], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": {}, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "App ID", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "Stage ID", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "Average Speedup Factor", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Stage Task Duration", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "Unsupported Task Duration", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "Stage Estimated", + "type": "\"boolean\"" + }, + { + "metadata": "{}", + "name": "Number of transitions from or to GPU", + "type": "\"long\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "stages_output=pd.read_csv(os.path.join(output_folder, \"rapids_4_spark_qualification_output_stages.csv\"))\n", + "display(stages_output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "4d7ce219-ae75-4a0c-a78c-4e7f25b8cd6f", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## Execs Output" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "998b0c51-0cb6-408e-a01a-d1f5b1a61e1f", + "showTitle": false, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
App IDSQL IDExec NameExpression NameTask Speedup FactorExec DurationSQL Node IdExec Is SupportedExec StagesExec ChildrenExec Children Node IdsExec Should Remove
app-20220209224147-00048Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424WholeStageCodegen (7)WholeStageCodegen (7)3.2385810046true35HashAggregate:HashAggregate47:48false
app-20220209224147-000424TakeOrderedAndProjectnull2.4501true39:40nullnullfalse
app-20220209224147-000424WholeStageCodegen (9)WholeStageCodegen (9)3.151059true31Project:Filter:ColumnarToRow60:61:62false
app-20220209224147-000424Execute InsertIntoHadoopFsRelationCommand parquetnull2.4500truenullnullnullfalse
app-20220209224147-000424SortMergeJoinnull20.57012true37nullnullfalse
app-20220209224147-00045Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Filternull3.75038true32nullnullfalse
app-20220209224147-000424Scan parquetnull2.451942970true38nullnullfalse
app-20220209224147-000424Exchangenull2.78242345true35:37nullnullfalse
app-20220209224147-000410Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Filternull3.75032true32nullnulltrue
app-20220209224147-000424WholeStageCodegen (6)WholeStageCodegen (6)3.06157938450true34HashAggregate:Project:BroadcastHashJoin:Filter:ColumnarToRow51:52:53:54:55false
app-20220209224147-000424HashAggregatenull3.23047true35nullnullfalse
app-20220209224147-000424ColumnarToRownull1.0039false32nullnulltrue
app-20220209224147-000424Projectnull2.4503truenullnullnullfalse
app-20220209224147-00042Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000412Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424ColumnarToRownull1.0062false31nullnulltrue
app-20220209224147-00049Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Exchangenull2.78347true37:39nullnullfalse
app-20220209224147-00041Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Projectnull2.45037truenullnullnullfalse
app-20220209224147-000416Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424WholeStageCodegen (13)WholeStageCodegen (13)6.11579164true39Sort65false
app-20220209224147-000424SortMergeJoinnull20.5704true39nullnullfalse
app-20220209224147-000424WholeStageCodegen (8)WholeStageCodegen (8)4.3611634141true37Sort:Filter:HashAggregate42:43:44false
app-20220209224147-000424Scan parquetnull2.4554840true32nullnullfalse
app-20220209224147-000424HashAggregatenull3.23044true37nullnullfalse
app-20220209224147-000424WholeStageCodegen (10)WholeStageCodegen (10)7.071794158true37Project:BroadcastHashJoin:Project:SortMergeJoin9:10:11:12false
app-20220209224147-000424HashAggregatenull3.23048true35nullnullfalse
app-20220209224147-000424Filternull3.75068true38nullnullfalse
app-20220209224147-000424Scan parquetnull2.4577446456true34nullnullfalse
app-20220209224147-000424WholeStageCodegen (1)WholeStageCodegen (1)3.156736true32Project:Filter:ColumnarToRow37:38:39false
app-20220209224147-000413Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424BroadcastHashJoinnull2.82023true33nullnullfalse
app-20220209224147-000424Filternull3.75043true37nullnullfalse
app-20220209224147-000424ColumnarToRownull1.0033false32nullnulltrue
app-20220209224147-000417Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Filternull3.75024true33nullnullfalse
app-20220209224147-000424WholeStageCodegen (12)WholeStageCodegen (12)3.753194467true38Filter:ColumnarToRow68:69false
app-20220209224147-000424Scan parquetnull2.4528286426true33nullnullfalse
app-20220209224147-000418Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424HashAggregatenull3.23051true34nullnullfalse
app-20220209224147-000424BroadcastHashJoinnull2.82010true37nullnullfalse
app-20220209224147-000424Sortnull6.11065true39nullnullfalse
app-20220209224147-000424WholeStageCodegen (2)WholeStageCodegen (2)3.0683372720true33HashAggregate:Project:BroadcastHashJoin:Filter:ColumnarToRow21:22:23:24:25false
app-20220209224147-000424WholeStageCodegen (3)WholeStageCodegen (3)3.4975826916true36Filter:HashAggregate17:18false
app-20220209224147-000424BroadcastExchangenull2.45146335truenullnullnullfalse
app-20220209224147-000424SubqueryBroadcastnull2.45148627truenullnullnullfalse
app-20220209224147-000421Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424HashAggregatenull3.23018true36nullnullfalse
app-20220209224147-000419Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Projectnull2.4509truenullnullnullfalse
app-20220209224147-000424Projectnull2.45052truenullnullnullfalse
app-20220209224147-000424Filternull3.75054true34nullnullfalse
app-20220209224147-000424HashAggregatenull3.23021true33nullnullfalse
app-20220209224147-000424ColumnarToRownull1.0025false33nullnulltrue
app-20220209224147-000424BroadcastHashJoinnull2.82053true34nullnullfalse
app-20220209224147-000424BroadcastExchangenull2.45142458truenullnullnullfalse
app-20220209224147-00044Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424ColumnarToRownull1.0069false38nullnulltrue
app-20220209224147-000422Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000415Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Projectnull2.45011truenullnullnullfalse
app-20220209224147-000424WholeStageCodegen (1)WholeStageCodegen (1)1.056730true32Project:Filter:ColumnarToRow31:32:33true
app-20220209224147-000424ReusedExchangenull1.0028truenullnullnulltrue
app-20220209224147-000423Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Scan parquetnull2.4550663true31nullnullfalse
app-20220209224147-000424Projectnull2.45031truenullnullnulltrue
app-20220209224147-000424Exchangenull2.78794015true36:37nullnullfalse
app-20220209224147-000424WholeStageCodegen (14)WholeStageCodegen (14)11.511728102true39Project:SortMergeJoin3:4false
app-20220209224147-00046Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424ColumnarToRownull1.0055false34nullnulltrue
app-20220209224147-000424Sortnull6.11042true37nullnullfalse
app-20220209224147-000424Scan parquetnull2.4554834true32nullnulltrue
app-20220209224147-000424Projectnull2.45022truenullnullnullfalse
app-20220209224147-000411Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424WholeStageCodegen (4)WholeStageCodegen (4)6.1141238013true37Sort14false
app-20220209224147-00047Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000420Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Filternull3.75017true36nullnullfalse
app-20220209224147-000424BroadcastExchangenull2.45146329truenullnullnulltrue
app-20220209224147-000424Exchangenull2.781024449true34:35nullnullfalse
app-20220209224147-000414Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-00040Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Sortnull6.11014true37nullnullfalse
app-20220209224147-00043Execute CreateViewCommandnull1.000falsenullnullnullfalse
app-20220209224147-000424Exchangenull2.7813066true38:39nullnullfalse
app-20220209224147-000424Filternull3.75061true31nullnullfalse
app-20220209224147-000424Sortnull6.1106true39nullnullfalse
app-20220209224147-000424Projectnull2.45060truenullnullnullfalse
app-20220209224147-000424WholeStageCodegen (11)WholeStageCodegen (11)6.112608045true39Sort6false
app-20220209224147-000424SubqueryBroadcastnull2.45148657truenullnullnullfalse
app-20220209224147-000424Exchangenull2.781614819true33:36nullnullfalse
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "app-20220209224147-0004", + 8, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (7)", + "WholeStageCodegen (7)", + 3.23, + 858100, + 46, + true, + "35", + "HashAggregate:HashAggregate", + "47:48", + false + ], + [ + "app-20220209224147-0004", + 24, + "TakeOrderedAndProject", + null, + 2.45, + 0, + 1, + true, + "39:40", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (9)", + "WholeStageCodegen (9)", + 3.1, + 510, + 59, + true, + "31", + "Project:Filter:ColumnarToRow", + "60:61:62", + false + ], + [ + "app-20220209224147-0004", + 24, + "Execute InsertIntoHadoopFsRelationCommand parquet", + null, + 2.45, + 0, + 0, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "SortMergeJoin", + null, + 20.57, + 0, + 12, + true, + "37", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 5, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Filter", + null, + 3.75, + 0, + 38, + true, + "32", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Scan parquet", + null, + 2.45, + 19429, + 70, + true, + "38", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Exchange", + null, + 2.78, + 2423, + 45, + true, + "35:37", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 10, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Filter", + null, + 3.75, + 0, + 32, + true, + "32", + null, + null, + true + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (6)", + "WholeStageCodegen (6)", + 3.06, + 1579384, + 50, + true, + "34", + "HashAggregate:Project:BroadcastHashJoin:Filter:ColumnarToRow", + "51:52:53:54:55", + false + ], + [ + "app-20220209224147-0004", + 24, + "HashAggregate", + null, + 3.23, + 0, + 47, + true, + "35", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "ColumnarToRow", + null, + 1.0, + 0, + 39, + false, + "32", + null, + null, + true + ], + [ + "app-20220209224147-0004", + 24, + "Project", + null, + 2.45, + 0, + 3, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 2, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 12, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "ColumnarToRow", + null, + 1.0, + 0, + 62, + false, + "31", + null, + null, + true + ], + [ + "app-20220209224147-0004", + 9, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Exchange", + null, + 2.78, + 34, + 7, + true, + "37:39", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 1, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Project", + null, + 2.45, + 0, + 37, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 16, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (13)", + "WholeStageCodegen (13)", + 6.11, + 5791, + 64, + true, + "39", + "Sort", + "65", + false + ], + [ + "app-20220209224147-0004", + 24, + "SortMergeJoin", + null, + 20.57, + 0, + 4, + true, + "39", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (8)", + "WholeStageCodegen (8)", + 4.36, + 116341, + 41, + true, + "37", + "Sort:Filter:HashAggregate", + "42:43:44", + false + ], + [ + "app-20220209224147-0004", + 24, + "Scan parquet", + null, + 2.45, + 548, + 40, + true, + "32", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "HashAggregate", + null, + 3.23, + 0, + 44, + true, + "37", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (10)", + "WholeStageCodegen (10)", + 7.07, + 179415, + 8, + true, + "37", + "Project:BroadcastHashJoin:Project:SortMergeJoin", + "9:10:11:12", + false + ], + [ + "app-20220209224147-0004", + 24, + "HashAggregate", + null, + 3.23, + 0, + 48, + true, + "35", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Filter", + null, + 3.75, + 0, + 68, + true, + "38", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Scan parquet", + null, + 2.45, + 774464, + 56, + true, + "34", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (1)", + "WholeStageCodegen (1)", + 3.1, + 567, + 36, + true, + "32", + "Project:Filter:ColumnarToRow", + "37:38:39", + false + ], + [ + "app-20220209224147-0004", + 13, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "BroadcastHashJoin", + null, + 2.82, + 0, + 23, + true, + "33", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Filter", + null, + 3.75, + 0, + 43, + true, + "37", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "ColumnarToRow", + null, + 1.0, + 0, + 33, + false, + "32", + null, + null, + true + ], + [ + "app-20220209224147-0004", + 17, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Filter", + null, + 3.75, + 0, + 24, + true, + "33", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (12)", + "WholeStageCodegen (12)", + 3.75, + 31944, + 67, + true, + "38", + "Filter:ColumnarToRow", + "68:69", + false + ], + [ + "app-20220209224147-0004", + 24, + "Scan parquet", + null, + 2.45, + 282864, + 26, + true, + "33", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 18, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "HashAggregate", + null, + 3.23, + 0, + 51, + true, + "34", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "BroadcastHashJoin", + null, + 2.82, + 0, + 10, + true, + "37", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Sort", + null, + 6.11, + 0, + 65, + true, + "39", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (2)", + "WholeStageCodegen (2)", + 3.06, + 833727, + 20, + true, + "33", + "HashAggregate:Project:BroadcastHashJoin:Filter:ColumnarToRow", + "21:22:23:24:25", + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (3)", + "WholeStageCodegen (3)", + 3.49, + 758269, + 16, + true, + "36", + "Filter:HashAggregate", + "17:18", + false + ], + [ + "app-20220209224147-0004", + 24, + "BroadcastExchange", + null, + 2.45, + 1463, + 35, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "SubqueryBroadcast", + null, + 2.45, + 1486, + 27, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 21, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "HashAggregate", + null, + 3.23, + 0, + 18, + true, + "36", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 19, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Project", + null, + 2.45, + 0, + 9, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Project", + null, + 2.45, + 0, + 52, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Filter", + null, + 3.75, + 0, + 54, + true, + "34", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "HashAggregate", + null, + 3.23, + 0, + 21, + true, + "33", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "ColumnarToRow", + null, + 1.0, + 0, + 25, + false, + "33", + null, + null, + true + ], + [ + "app-20220209224147-0004", + 24, + "BroadcastHashJoin", + null, + 2.82, + 0, + 53, + true, + "34", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "BroadcastExchange", + null, + 2.45, + 1424, + 58, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 4, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "ColumnarToRow", + null, + 1.0, + 0, + 69, + false, + "38", + null, + null, + true + ], + [ + "app-20220209224147-0004", + 22, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 15, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Project", + null, + 2.45, + 0, + 11, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (1)", + "WholeStageCodegen (1)", + 1.0, + 567, + 30, + true, + "32", + "Project:Filter:ColumnarToRow", + "31:32:33", + true + ], + [ + "app-20220209224147-0004", + 24, + "ReusedExchange", + null, + 1.0, + 0, + 28, + true, + null, + null, + null, + true + ], + [ + "app-20220209224147-0004", + 23, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Scan parquet", + null, + 2.45, + 506, + 63, + true, + "31", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Project", + null, + 2.45, + 0, + 31, + true, + null, + null, + null, + true + ], + [ + "app-20220209224147-0004", + 24, + "Exchange", + null, + 2.78, + 7940, + 15, + true, + "36:37", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (14)", + "WholeStageCodegen (14)", + 11.51, + 172810, + 2, + true, + "39", + "Project:SortMergeJoin", + "3:4", + false + ], + [ + "app-20220209224147-0004", + 6, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "ColumnarToRow", + null, + 1.0, + 0, + 55, + false, + "34", + null, + null, + true + ], + [ + "app-20220209224147-0004", + 24, + "Sort", + null, + 6.11, + 0, + 42, + true, + "37", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Scan parquet", + null, + 2.45, + 548, + 34, + true, + "32", + null, + null, + true + ], + [ + "app-20220209224147-0004", + 24, + "Project", + null, + 2.45, + 0, + 22, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 11, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (4)", + "WholeStageCodegen (4)", + 6.11, + 412380, + 13, + true, + "37", + "Sort", + "14", + false + ], + [ + "app-20220209224147-0004", + 7, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 20, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Filter", + null, + 3.75, + 0, + 17, + true, + "36", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "BroadcastExchange", + null, + 2.45, + 1463, + 29, + true, + null, + null, + null, + true + ], + [ + "app-20220209224147-0004", + 24, + "Exchange", + null, + 2.78, + 10244, + 49, + true, + "34:35", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 14, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 0, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Sort", + null, + 6.11, + 0, + 14, + true, + "37", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 3, + "Execute CreateViewCommand", + null, + 1.0, + 0, + 0, + false, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Exchange", + null, + 2.78, + 130, + 66, + true, + "38:39", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Filter", + null, + 3.75, + 0, + 61, + true, + "31", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Sort", + null, + 6.11, + 0, + 6, + true, + "39", + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Project", + null, + 2.45, + 0, + 60, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "WholeStageCodegen (11)", + "WholeStageCodegen (11)", + 6.11, + 260804, + 5, + true, + "39", + "Sort", + "6", + false + ], + [ + "app-20220209224147-0004", + 24, + "SubqueryBroadcast", + null, + 2.45, + 1486, + 57, + true, + null, + null, + null, + false + ], + [ + "app-20220209224147-0004", + 24, + "Exchange", + null, + 2.78, + 16148, + 19, + true, + "33:36", + null, + null, + false + ] + ], + "datasetInfos": [], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": {}, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "App ID", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "SQL ID", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "Exec Name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "Expression Name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "Task Speedup Factor", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "Exec Duration", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "SQL Node Id", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "Exec Is Supported", + "type": "\"boolean\"" + }, + { + "metadata": "{}", + "name": "Exec Stages", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "Exec Children", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "Exec Children Node Ids", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "Exec Should Remove", + "type": "\"boolean\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "execs_output=pd.read_csv(os.path.join(output_folder, \"rapids_4_spark_qualification_output_execs.csv\"))\n", + "display(execs_output)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [ + { + "elements": [], + "globalVars": {}, + "guid": "", + "layoutOption": { + "grid": true, + "stack": true + }, + "nuid": "91c1bfb2-695a-4e5c-8a25-848a433108dc", + "origId": 556902937009666, + "title": "Executive View", + "version": "DashboardViewV1", + "width": 1600 + }, + { + "elements": [], + "globalVars": {}, + "guid": "", + "layoutOption": { + "grid": true, + "stack": true + }, + "nuid": "62243296-4562-4f06-90ac-d7a609f19c16", + "origId": 556902937009667, + "title": "App View", + "version": "DashboardViewV1", + "width": 1920 + } + ], + "language": "python", + "notebookMetadata": { + "mostRecentlyExecutedCommandWithImplicitDF": { + "commandId": 556902937009671, + "dataframes": [ + "_sqldf" + ] + }, + "pythonIndentUnit": 2, + "widgetLayout": [ + { + "breakBefore": false, + "name": "log_path", + "width": 551 + }, + { + "breakBefore": false, + "name": "output_path", + "width": 551 + } + ] + }, + "notebookName": "[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template", + "widgets": { + "csp": { + "currentValue": "aws", + "nuid": "c75636ae-f875-4022-98b5-14dbe4ac9957", + "widgetInfo": { + "widgetType": "dropdown", + "defaultValue": "aws", + "label": null, + "name": "csp", + "options": { + "widgetType": "dropdown", + "choices": [ + "aws", + "azure" + ] + } + } + }, + "log_path": { + "currentValue": "/dbfs/user1/qualification_logs", + "nuid": "88986aa6-6e67-4d09-aeeb-7c96ea1ea8f1", + "widgetInfo": { + "widgetType": "text", + "defaultValue": "", + "label": null, + "name": "log_path", + "options": { + "widgetType": "text", + "validationRegex": null + } + } + }, + "output_path": { + "currentValue": "/tmp", + "nuid": "0b9846bd-5c3e-4ae2-ba00-80bb9da1ee32", + "widgetInfo": { + "widgetType": "text", + "defaultValue": "", + "label": null, + "name": "output_path", + "options": { + "widgetType": "text", + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}