From 26f0ab410c81bcecf7a8227d29fd9e6690020f6b Mon Sep 17 00:00:00 2001
From: "Ahmed Hussein (amahussein)"
Date: Fri, 15 Nov 2024 14:52:45 -0600
Subject: [PATCH] Reduce the log noise caused by core report summary
Signed-off-by: Ahmed Hussein (amahussein)
Contributes to #1416
This change set the log level of ToolTextFileWriter to debug and
summarize the logging into a single message to show the final directory
---
.../com/nvidia/spark/rapids/tool/ToolTextFileWriter.scala | 2 +-
.../com/nvidia/spark/rapids/tool/profiling/Profiler.scala | 1 +
.../spark/rapids/tool/qualification/Qualification.scala | 8 +-------
.../spark/sql/rapids/tool/util/RuntimeReporter.scala | 3 +++
4 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/ToolTextFileWriter.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/ToolTextFileWriter.scala
index 87701419a..a05f7c74a 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/ToolTextFileWriter.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/ToolTextFileWriter.scala
@@ -70,7 +70,7 @@ class ToolTextFileWriter(
// No need to close the outputStream.
// Java should handle nested streams automatically.
utf8Writer.foreach { writer =>
- logInfo(s"$finalLocationText output location: $textOutputLoc")
+ logDebug(s"$finalLocationText output location: $textOutputLoc")
writer.flush()
writer.close()
}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
index bad5524e3..a69701c64 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala
@@ -119,6 +119,7 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea
progressBar.foreach(_.finishAll())
// Write status reports for all event logs to a CSV file
+ logOutputPath()
val reportResults = generateStatusResults(appStatusReporter.asScala.values.toSeq)
ProfileOutputWriter.writeCSVTable("Profiling Status", reportResults, outputDir)
}
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala
index 23d0defe7..c50464233 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala
@@ -237,13 +237,6 @@ class Qualification(outputPath: String, numRows: Int, hadoopConf: Configuration,
}
}
- /**
- * The outputPath of the current instance of the provider
- */
- def getReportOutputPath: String = {
- s"$outputDir/rapids_4_spark_qualification_output"
- }
-
/**
* Generates a qualification report based on the provided summary information.
*/
@@ -263,6 +256,7 @@ class Qualification(outputPath: String, numRows: Int, hadoopConf: Configuration,
qWriter.writeStageReport(allAppsSum, order)
qWriter.writeUnsupportedOpsSummaryCSVReport(allAppsSum)
val appStatusResult = generateStatusResults(appStatusReporter.asScala.values.toSeq)
+ logOutputPath()
qWriter.writeStatusReport(appStatusResult, order)
if (mlOpsEnabled) {
if (allAppsSum.exists(x => x.mlFunctions.nonEmpty)) {
diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeReporter.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeReporter.scala
index 57a8ad074..0ae7bfa6b 100644
--- a/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeReporter.scala
+++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeReporter.scala
@@ -27,6 +27,9 @@ trait RuntimeReporter extends Logging {
def generateRuntimeReport(hadoopConf: Option[Configuration] = None): Unit = {
RuntimeUtil.generateReport(outputDir, hadoopConf)
}
+ def logOutputPath(): Unit = {
+ logInfo(s"Tools output directory: $outputDir")
+ }
/**
* Updates the status of "SUCCESS" applications to "SKIPPED" if newer attempts with