Use move files using NIO if the shuffle dir is mounted as a file system.

Signed-off-by: Pascal Spörri <[email protected]>
IBM · Sep 22, 2023 · 1594c8d · 1594c8d
1 parent 0b56e2b
commit 1594c8d
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 5 deletions.
diff --git a/src/main/scala/org/apache/spark/shuffle/S3SingleSpillShuffleMapOutputWriter.scala b/src/main/scala/org/apache/spark/shuffle/S3SingleSpillShuffleMapOutputWriter.scala
@@ -5,14 +5,17 @@
 
 package org.apache.spark.shuffle
 
+import org.apache.spark.TaskContext
+import org.apache.spark.internal.Logging
 import org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter
 import org.apache.spark.shuffle.helper.{S3ShuffleDispatcher, S3ShuffleHelper}
 import org.apache.spark.storage.ShuffleDataBlockId
 import org.apache.spark.util.Utils
 
 import java.io.{File, FileInputStream}
+import java.nio.file.{Files, Path}
 
-class S3SingleSpillShuffleMapOutputWriter(shuffleId: Int, mapId: Long) extends SingleSpillShuffleMapOutputWriter {
+class S3SingleSpillShuffleMapOutputWriter(shuffleId: Int, mapId: Long) extends SingleSpillShuffleMapOutputWriter with Logging {
 
   private lazy val dispatcher = S3ShuffleDispatcher.get
 
@@ -21,12 +24,34 @@ class S3SingleSpillShuffleMapOutputWriter(shuffleId: Int, mapId: Long) extends S
                                      partitionLengths: Array[Long],
                                      checksums: Array[Long]
                                    ): Unit = {
-    val in = new FileInputStream(mapSpillFile)
     val block = ShuffleDataBlockId(shuffleId, mapId, IndexShuffleBlockResolver.NOOP_REDUCE_ID)
-    val out = new S3MeasureOutputStream(dispatcher.createBlock(block), block.name)
 
-    // Note: HDFS does not exposed a nio-buffered write interface.
-    Utils.copyStream(in, out, closeStreams = true)
+    if (dispatcher.rootIsLocal) {
+      // Use NIO to move the file if the folder is local.
+      val now = System.nanoTime()
+      val path = dispatcher.getPath(block)
+      val fileDestination = path.toUri.getRawPath
+      val dir = path.getParent
+      if (!dispatcher.fs.exists(dir)) {
+        dispatcher.fs.mkdirs(dir)
+      }
+      Files.move(mapSpillFile.toPath, Path.of(fileDestination))
+      val timings = System.nanoTime() - now
+
+      val bytes = partitionLengths.sum
+      val tc = TaskContext.get()
+      val sId = tc.stageId()
+      val sAt = tc.stageAttemptNumber()
+      val t = timings / 1000000
+      val bw = bytes.toDouble / (t.toDouble / 1000) / (1024 * 1024)
+      logInfo(s"Statistics: Stage ${sId}.${sAt} TID ${tc.taskAttemptId()} -- " +
+                s"Writing ${block.name} ${bytes} took ${t} ms (${bw} MiB/s)")
+    } else {
+      // Copy using a stream.
+      val in = new FileInputStream(mapSpillFile)
+      val out = new S3MeasureOutputStream(dispatcher.createBlock(block), block.name)
+      Utils.copyStream(in, out, closeStreams = true)
+    }
 
     if (dispatcher.checksumEnabled) {
       S3ShuffleHelper.writeChecksum(shuffleId, mapId, checksums)

diff --git a/src/main/scala/org/apache/spark/shuffle/helper/S3ShuffleDispatcher.scala b/src/main/scala/org/apache/spark/shuffle/helper/S3ShuffleDispatcher.scala
@@ -46,6 +46,7 @@ class S3ShuffleDispatcher extends Logging {
   }
   private val rootDir_ = if (useSparkShuffleFetch) fallbackStoragePath else conf.get("spark.shuffle.s3.rootDir", defaultValue = "sparkS3shuffle/")
   val rootDir: String = if (rootDir_.endsWith("/")) rootDir_ else rootDir_ + "/"
+  val rootIsLocal: Boolean = URI.create(rootDir).getScheme == "file"
 
   // Optional
   val bufferSize: Int = conf.getInt("spark.shuffle.s3.bufferSize", defaultValue = 8 * 1024 * 1024)