apache · onebox-li · Jan 21, 2025 · Jan 22, 2025 · RexXiong · Jan 22, 2025
diff --git a/client/src/main/java/org/apache/celeborn/client/read/CelebornInputStream.java b/client/src/main/java/org/apache/celeborn/client/read/CelebornInputStream.java
@@ -38,7 +38,9 @@
 import org.apache.celeborn.client.compress.Decompressor;
 import org.apache.celeborn.common.CelebornConf;
 import org.apache.celeborn.common.exception.CelebornIOException;
+import org.apache.celeborn.common.network.client.TransportClient;
 import org.apache.celeborn.common.network.client.TransportClientFactory;
+import org.apache.celeborn.common.network.protocol.TransportMessage;
 import org.apache.celeborn.common.protocol.*;
 import org.apache.celeborn.common.unsafe.Platform;
 import org.apache.celeborn.common.util.ExceptionMaker;
@@ -322,14 +324,10 @@ private boolean isExcluded(PartitionLocation location) {
 
     private PartitionReader createReaderWithRetry(
         PartitionLocation location, PbStreamHandler pbStreamHandler) throws IOException {
-      // For the first time, the location will be selected according to attemptNumber
-      if (fetchChunkRetryCnt == 0 && attemptNumber % 2 == 1 && location.hasPeer()) {
-        location = location.getPeer();
-        logger.debug("Read peer {} for attempt {}.", location, attemptNumber);
-      }
       Exception lastException = null;
       while (fetchChunkRetryCnt < fetchChunkMaxRetry) {
         try {
+          logger.debug("Create reader for location {}", location);
           if (isExcluded(location)) {
             throw new CelebornIOException("Fetch data from excluded worker! " + location);
           }
@@ -351,6 +349,28 @@ private PartitionReader createReaderWithRetry(
                 location,
                 e);
             location = location.getPeer();
+            if (pbStreamHandler != null) {
+              try {
+                TransportClient client =
+                    clientFactory.createClient(location.getHost(), location.getFetchPort());
+                TransportMessage bufferStreamEnd =
+                    new TransportMessage(
+                        MessageType.BUFFER_STREAM_END,
+                        PbBufferStreamEnd.newBuilder()
+                            .setStreamType(StreamType.ChunkStream)
+                            .setStreamId(pbStreamHandler.getStreamId())
+                            .build()
+                            .toByteArray());
+                client.sendRpc(bufferStreamEnd.toByteBuffer());
+              } catch (InterruptedException | IOException ex) {
+                logger.warn(
+                    "Close {} stream {} failed",
+                    location.hostAndFetchPort(),
+                    pbStreamHandler.getStreamId(),
+                    ex);
+              }
+              pbStreamHandler = null;
+            }
           } else {
             logger.warn(
                 "CreatePartitionReader failed {}/{} times for location {}, retry the same location",
@@ -422,7 +442,6 @@ private PartitionReader createReader(
         int fetchChunkRetryCnt,
         int fetchChunkMaxRetry)
         throws IOException, InterruptedException {
-      logger.debug("Create reader for location {}", location);
 
       StorageInfo storageInfo = location.getStorageInfo();
       switch (storageInfo.getType()) {