From 26f0ab410c81bcecf7a8227d29fd9e6690020f6b Mon Sep 17 00:00:00 2001 From: "Ahmed Hussein (amahussein)" Date: Fri, 15 Nov 2024 14:52:45 -0600 Subject: [PATCH] Reduce the log noise caused by core report summary Signed-off-by: Ahmed Hussein (amahussein) Contributes to #1416 This change set the log level of ToolTextFileWriter to debug and summarize the logging into a single message to show the final directory --- .../com/nvidia/spark/rapids/tool/ToolTextFileWriter.scala | 2 +- .../com/nvidia/spark/rapids/tool/profiling/Profiler.scala | 1 + .../spark/rapids/tool/qualification/Qualification.scala | 8 +------- .../spark/sql/rapids/tool/util/RuntimeReporter.scala | 3 +++ 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/ToolTextFileWriter.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/ToolTextFileWriter.scala index 87701419a..a05f7c74a 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/ToolTextFileWriter.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/ToolTextFileWriter.scala @@ -70,7 +70,7 @@ class ToolTextFileWriter( // No need to close the outputStream. // Java should handle nested streams automatically. utf8Writer.foreach { writer => - logInfo(s"$finalLocationText output location: $textOutputLoc") + logDebug(s"$finalLocationText output location: $textOutputLoc") writer.flush() writer.close() } diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala index bad5524e3..a69701c64 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala @@ -119,6 +119,7 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea progressBar.foreach(_.finishAll()) // Write status reports for all event logs to a CSV file + logOutputPath() val reportResults = generateStatusResults(appStatusReporter.asScala.values.toSeq) ProfileOutputWriter.writeCSVTable("Profiling Status", reportResults, outputDir) } diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala index 23d0defe7..c50464233 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala @@ -237,13 +237,6 @@ class Qualification(outputPath: String, numRows: Int, hadoopConf: Configuration, } } - /** - * The outputPath of the current instance of the provider - */ - def getReportOutputPath: String = { - s"$outputDir/rapids_4_spark_qualification_output" - } - /** * Generates a qualification report based on the provided summary information. */ @@ -263,6 +256,7 @@ class Qualification(outputPath: String, numRows: Int, hadoopConf: Configuration, qWriter.writeStageReport(allAppsSum, order) qWriter.writeUnsupportedOpsSummaryCSVReport(allAppsSum) val appStatusResult = generateStatusResults(appStatusReporter.asScala.values.toSeq) + logOutputPath() qWriter.writeStatusReport(appStatusResult, order) if (mlOpsEnabled) { if (allAppsSum.exists(x => x.mlFunctions.nonEmpty)) { diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeReporter.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeReporter.scala index 57a8ad074..0ae7bfa6b 100644 --- a/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeReporter.scala +++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeReporter.scala @@ -27,6 +27,9 @@ trait RuntimeReporter extends Logging { def generateRuntimeReport(hadoopConf: Option[Configuration] = None): Unit = { RuntimeUtil.generateReport(outputDir, hadoopConf) } + def logOutputPath(): Unit = { + logInfo(s"Tools output directory: $outputDir") + } /** * Updates the status of "SUCCESS" applications to "SKIPPED" if newer attempts with