Skip to content

Commit

Permalink
Update Databricks Notebook for Tools v24.08.0 (#427)
Browse files Browse the repository at this point in the history
* Update Databricks Notebook for Tools for v24.08

* Sign-off commit

Signed-off-by: Partho Sarthi <[email protected]>

---------

Signed-off-by: Partho Sarthi <[email protected]>
  • Loading branch information
parthosa authored Aug 16, 2024
1 parent 806b355 commit 2b0a724
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 16 deletions.
2 changes: 1 addition & 1 deletion tools/databricks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ top of the notebook. After that, select *Run all* to execute the tools for the
1. Multiple event logs must be comma-separated.
- For example: `/dbfs/path/to/eventlog1,/dbfs/path/to/eventlog2`

**Latest Tools Version Supported** 24.06.1
**Latest Tools Version Supported** 24.08.0
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "df33c614-2ecc-47a0-8600-bc891681997f",
"showTitle": false,
Expand Down Expand Up @@ -50,7 +53,7 @@
},
"outputs": [],
"source": [
"TOOLS_VER = \"24.06.1\"\n",
"TOOLS_VER = \"24.08.0\"\n",
"print(f\"Using Tools Version: {TOOLS_VER}\")"
]
},
Expand Down Expand Up @@ -156,7 +159,10 @@
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "f83af6c8-5a79-4a46-965b-38a4cb621877",
"showTitle": false,
Expand Down Expand Up @@ -380,7 +386,10 @@
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "bbe50fde-0bd6-4281-95fd-6a1ec6f17ab2",
"showTitle": false,
Expand Down Expand Up @@ -455,7 +464,7 @@
"stack": true
},
"nuid": "91c1bfb2-695a-4e5c-8a25-848a433108dc",
"origId": 1075819839476955,
"origId": 2173122769183713,
"title": "Executive View",
"version": "DashboardViewV1",
"width": 1600
Expand All @@ -469,7 +478,7 @@
"stack": true
},
"nuid": "62243296-4562-4f06-90ac-d7a609f19c16",
"origId": 1075819839476956,
"origId": 2173122769183714,
"title": "App View",
"version": "DashboardViewV1",
"width": 1920
Expand All @@ -479,7 +488,7 @@
"language": "python",
"notebookMetadata": {
"mostRecentlyExecutedCommandWithImplicitDF": {
"commandId": 203373918309288,
"commandId": 2173122769183692,
"dataframes": [
"_sqldf"
]
Expand Down Expand Up @@ -507,11 +516,11 @@
"widgetInfo": {
"widgetType": "text",
"defaultValue": "/dbfs/user1/profiling_logs",
"label": null,
"label": "",
"name": "Eventlog Path",
"options": {
"widgetType": "text",
"autoCreated": null,
"autoCreated": false,
"validationRegex": null
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
},
"outputs": [],
"source": [
"TOOLS_VER = \"24.06.1\"\n",
"TOOLS_VER = \"24.08.0\"\n",
"print(f\"Using Tools Version: {TOOLS_VER}\")"
]
},
Expand Down Expand Up @@ -282,6 +282,7 @@
"\n",
"try:\n",
" output_folder, log_file_location = extract_file_info(CONSOLE_OUTPUT_PATH, OUTPUT_PATH)\n",
" jar_output_folder = os.path.join(output_folder, \"rapids_4_spark_qualification_output\")\n",
" print(f\"Output folder detected {output_folder}\")\n",
" copy_logs(output_folder, log_file_location, CONSOLE_OUTPUT_PATH, CONSOLE_ERROR_PATH)\n",
" print(f\"Logs successfully copied to {output_folder}\")\n",
Expand Down Expand Up @@ -424,9 +425,110 @@
"outputs": [],
"source": [
"summary_output=pd.read_csv(os.path.join(output_folder, \"qualification_summary.csv\"))\n",
"summary_output=summary_output.drop(columns=[\"Unnamed: 0\"]).rename_axis('Index').reset_index()\n",
"display(summary_output)"
]
},
{
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "73b5e0b0-3a96-4cc6-8e6c-840e4b0d9d43",
"showTitle": false,
"title": ""
}
},
"source": [
"\n",
"## Application Status\n",
"\n",
"The report show the status of each eventlog file that was provided\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "c9ffbfdb-dbb6-4736-b9cb-2ac457cc6714",
"showTitle": true,
"title": "rapids_4_spark_qualification_output_status.csv"
},
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"status_output=pd.read_csv(os.path.join(jar_output_folder, \"rapids_4_spark_qualification_output_status.csv\"))\n",
"display(status_output)"
]
},
{
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "09945d39-f9c2-4f4a-8afd-4f309f24f8e0",
"showTitle": false,
"title": ""
}
},
"source": [
"\n",
"## Metadata for Migration\n",
"\n",
"The report show the metadata of each app as:\n",
"- Recommended GPU cluster\n",
"- File location of full cluster config recommendations\n",
"- File location of only Gpu specific config recommendations\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "133cf1bd-33b6-4a62-9ae2-5505717092d1",
"showTitle": true,
"title": "app_metadata.json"
},
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"import json\n",
"metadata_file = os.path.join(output_folder, \"app_metadata.json\")\n",
"def camel_to_title(name):\n",
" return re.sub('([a-z])([A-Z])', r'\\1 \\2', name).title()\n",
" \n",
"with open(metadata_file, 'r') as file:\n",
" json_data = json.load(file)\n",
"\n",
"df = pd.DataFrame(json_data)\n",
"df['recommendedGpuCluster'] = df['clusterInfo'].apply(lambda x: x['recommendedCluster'])\n",
"df['sourceCluster'] = df['clusterInfo'].apply(lambda x: x['sourceCluster'])\n",
"df.drop(columns=['clusterInfo'], inplace=True)\n",
"df = df[['appId', 'appName', 'estimatedGpuSpeedupCategory', 'recommendedGpuCluster', 'fullClusterConfigRecommendations', 'gpuConfigRecommendationBreakdown']]\n",
"df.columns = [camel_to_title(col) for col in df.columns]\n",
"display(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
Expand Down Expand Up @@ -474,7 +576,6 @@
},
"outputs": [],
"source": [
"jar_output_folder = os.path.join(output_folder, \"rapids_4_spark_qualification_output\")\n",
"stages_output=pd.read_csv(os.path.join(jar_output_folder, \"rapids_4_spark_qualification_output_stages.csv\"))\n",
"display(stages_output)"
]
Expand Down Expand Up @@ -524,7 +625,7 @@
"inputWidgets": {},
"nuid": "998b0c51-0cb6-408e-a01a-d1f5b1a61e1f",
"showTitle": true,
"title": "rapids_4_spark_qualification_output_execs"
"title": "rapids_4_spark_qualification_output_execs.csv"
},
"jupyter": {
"source_hidden": true
Expand All @@ -549,7 +650,7 @@
"stack": true
},
"nuid": "91c1bfb2-695a-4e5c-8a25-848a433108dc",
"origId": 1075819839476974,
"origId": 2173122769183715,
"title": "Executive View",
"version": "DashboardViewV1",
"width": 1600
Expand All @@ -563,17 +664,31 @@
"stack": true
},
"nuid": "62243296-4562-4f06-90ac-d7a609f19c16",
"origId": 1075819839476975,
"origId": 2173122769183716,
"title": "App View",
"version": "DashboardViewV1",
"width": 1920
},
{
"elements": [],
"globalVars": {},
"guid": "",
"layoutOption": {
"grid": true,
"stack": true
},
"nuid": "854f9c75-5977-42aa-b3dd-c680b8331f19",
"origId": 2173122769183722,
"title": "Untitled",
"version": "DashboardViewV1",
"width": 1024
}
],
"environmentMetadata": null,
"language": "python",
"notebookMetadata": {
"mostRecentlyExecutedCommandWithImplicitDF": {
"commandId": 1075819839476965,
"commandId": 2173122769183704,
"dataframes": [
"_sqldf"
]
Expand Down

0 comments on commit 2b0a724

Please sign in to comment.