From 6129c9a56d52ebb060417cb397e0764cdd8791bc Mon Sep 17 00:00:00 2001
From: liferoad <huxiangqian@gmail.com>
Date: Mon, 7 Oct 2024 11:05:34 -0400
Subject: [PATCH 01/14] allow numpy 2.1.x

---
 sdks/python/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 721cb4c1a8dd7..6ce7cfdfd556e 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -366,7 +366,7 @@ def get_portability_package_data():
           'jsonpickle>=3.0.0,<4.0.0',
           # numpy can have breaking changes in minor versions.
           # Use a strict upper bound.
-          'numpy>=1.14.3,<1.27.0',  # Update pyproject.toml as well.
+          'numpy>=1.14.3,<2.2.0',  # Update pyproject.toml as well.
           'objsize>=0.6.1,<0.8.0',
           'packaging>=22.0',
           'pymongo>=3.8.0,<5.0.0',

From b8accd2583c6b89acf03fcdeccf13895ae77ef0c Mon Sep 17 00:00:00 2001
From: liferoad <huxiangqian@gmail.com>
Date: Mon, 7 Oct 2024 16:49:37 -0400
Subject: [PATCH 02/14] fixed the mypy

---
 sdks/python/apache_beam/ml/inference/tensorrt_inference.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdks/python/apache_beam/ml/inference/tensorrt_inference.py b/sdks/python/apache_beam/ml/inference/tensorrt_inference.py
index b38947b494c20..9563aa05232a5 100644
--- a/sdks/python/apache_beam/ml/inference/tensorrt_inference.py
+++ b/sdks/python/apache_beam/ml/inference/tensorrt_inference.py
@@ -125,7 +125,7 @@ def __init__(self, engine: trt.ICudaEngine):
     # TODO(https://github.com/NVIDIA/TensorRT/issues/2557):
     # Clean up when fixed upstream.
     try:
-      _ = np.bool  # type: ignore
+      _ = np.bool
     except AttributeError:
       # numpy >= 1.24.0
       np.bool = np.bool_  # type: ignore
@@ -258,7 +258,7 @@ def __init__(
       model_copies: The exact number of models that you would like loaded
         onto your machine. This can be useful if you exactly know your CPU or
         GPU capacity and want to maximize resource utilization.
-      max_batch_duration_secs: the maximum amount of time to buffer 
+      max_batch_duration_secs: the maximum amount of time to buffer
         a batch before emitting; used in streaming contexts.
       kwargs: Additional arguments like 'engine_path' and 'onnx_path' are
         currently supported. 'env_vars' can be used to set environment variables

From 8a6f248c57eff70ac31956fe512a91f7453efeb6 Mon Sep 17 00:00:00 2001
From: Yi Hu <yathu@google.com>
Date: Tue, 8 Oct 2024 19:31:11 -0400
Subject: [PATCH 03/14] Enforce a size limit on StringSetData (#32650)

* Enforce a size limit on StringSetData

* Make StringSetData set mutable. This avoids
  copy and create new ImutableSet every time

* adjust warning log
---
 .../runners/core/metrics/StringSetCell.java   |   9 +-
 .../runners/core/metrics/StringSetData.java   | 102 ++++++++++++---
 .../core/metrics/StringSetDataTest.java       |  34 ++++-
 sdks/python/apache_beam/metrics/cells.pxd     |   2 +-
 sdks/python/apache_beam/metrics/cells.py      | 116 ++++++++++++++++--
 sdks/python/apache_beam/metrics/cells_test.py |  16 ++-
 sdks/python/apache_beam/metrics/execution.py  |   3 +-
 .../apache_beam/metrics/execution_test.py     |  11 +-
 .../apache_beam/metrics/monitoring_infos.py   |   5 +-
 9 files changed, 255 insertions(+), 43 deletions(-)

diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/StringSetCell.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/StringSetCell.java
index 8455f154c0f8f..fc8dcb49894fe 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/StringSetCell.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/StringSetCell.java
@@ -22,7 +22,6 @@
 import org.apache.beam.sdk.metrics.MetricName;
 import org.apache.beam.sdk.metrics.MetricsContainer;
 import org.apache.beam.sdk.metrics.StringSet;
-import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet;
 import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
@@ -101,11 +100,15 @@ public void add(String value) {
     if (this.setValue.get().stringSet().contains(value)) {
       return;
     }
-    update(StringSetData.create(ImmutableSet.of(value)));
+    add(new String[] {value});
   }
 
   @Override
   public void add(String... values) {
-    update(StringSetData.create(ImmutableSet.copyOf(values)));
+    StringSetData original;
+    do {
+      original = setValue.get();
+    } while (!setValue.compareAndSet(original, original.addAll(values)));
+    dirty.afterModification();
   }
 }
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/StringSetData.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/StringSetData.java
index 466d4ad46eb6f..4fc5d3beca31e 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/StringSetData.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/StringSetData.java
@@ -19,25 +19,49 @@
 
 import com.google.auto.value.AutoValue;
 import java.io.Serializable;
+import java.util.Arrays;
+import java.util.HashSet;
 import java.util.Set;
-import java.util.stream.Collectors;
-import java.util.stream.StreamSupport;
 import org.apache.beam.sdk.metrics.StringSetResult;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
- * Data describing the StringSet. The {@link StringSetData} hold an immutable copy of the set from
- * which it was initially created. This should retain enough detail that it can be combined with
- * other {@link StringSetData}.
+ * Data describing the StringSet. The {@link StringSetData} hold a copy of the set from which it was
+ * initially created. This should retain enough detail that it can be combined with other {@link
+ * StringSetData}.
+ *
+ * <p>The underlying set is mutable for {@link #addAll} operation, otherwise a copy set will be
+ * generated.
+ *
+ * <p>The summation of all string length for a {@code StringSetData} cannot exceed 1 MB. Further
+ * addition of elements are dropped.
  */
 @AutoValue
 public abstract class StringSetData implements Serializable {
+  private static final Logger LOG = LoggerFactory.getLogger(StringSetData.class);
+  // 1 MB
+  @VisibleForTesting static final long STRING_SET_SIZE_LIMIT = 1_000_000L;
 
   public abstract Set<String> stringSet();
 
+  public abstract long stringSize();
+
   /** Returns a {@link StringSetData} which is made from an immutable copy of the given set. */
   public static StringSetData create(Set<String> set) {
-    return new AutoValue_StringSetData(ImmutableSet.copyOf(set));
+    if (set.isEmpty()) {
+      return empty();
+    }
+    HashSet<String> combined = new HashSet<>();
+    long stringSize = addUntilCapacity(combined, 0L, set);
+    return new AutoValue_StringSetData(combined, stringSize);
+  }
+
+  /** Returns a {@link StringSetData} which is made from the given set in place. */
+  private static StringSetData createInPlace(HashSet<String> set, long stringSize) {
+    return new AutoValue_StringSetData(set, stringSize);
   }
 
   /** Return a {@link EmptyStringSetData#INSTANCE} representing an empty {@link StringSetData}. */
@@ -45,6 +69,23 @@ public static StringSetData empty() {
     return EmptyStringSetData.INSTANCE;
   }
 
+  /**
+   * Add strings into this {@code StringSetData} and return the result {@code StringSetData}. Reuse
+   * the original StringSetData's set. As a result, current StringSetData will become invalid.
+   *
+   * <p>>Should only be used by {@link StringSetCell#add}.
+   */
+  public StringSetData addAll(String... strings) {
+    HashSet<String> combined;
+    if (this.stringSet() instanceof HashSet) {
+      combined = (HashSet<String>) this.stringSet();
+    } else {
+      combined = new HashSet<>(this.stringSet());
+    }
+    long stringSize = addUntilCapacity(combined, this.stringSize(), Arrays.asList(strings));
+    return StringSetData.createInPlace(combined, stringSize);
+  }
+
   /**
    * Combines this {@link StringSetData} with other, both original StringSetData are left intact.
    */
@@ -54,10 +95,9 @@ public StringSetData combine(StringSetData other) {
     } else if (other.stringSet().isEmpty()) {
       return this;
     } else {
-      ImmutableSet.Builder<String> combined = ImmutableSet.builder();
-      combined.addAll(this.stringSet());
-      combined.addAll(other.stringSet());
-      return StringSetData.create(combined.build());
+      HashSet<String> combined = new HashSet<>(this.stringSet());
+      long stringSize = addUntilCapacity(combined, this.stringSize(), other.stringSet());
+      return StringSetData.createInPlace(combined, stringSize);
     }
   }
 
@@ -65,12 +105,12 @@ public StringSetData combine(StringSetData other) {
    * Combines this {@link StringSetData} with others, all original StringSetData are left intact.
    */
   public StringSetData combine(Iterable<StringSetData> others) {
-    Set<String> combined =
-        StreamSupport.stream(others.spliterator(), true)
-            .flatMap(other -> other.stringSet().stream())
-            .collect(Collectors.toSet());
-    combined.addAll(this.stringSet());
-    return StringSetData.create(combined);
+    HashSet<String> combined = new HashSet<>(this.stringSet());
+    long stringSize = this.stringSize();
+    for (StringSetData other : others) {
+      stringSize = addUntilCapacity(combined, stringSize, other.stringSet());
+    }
+    return StringSetData.createInPlace(combined, stringSize);
   }
 
   /** Returns a {@link StringSetResult} representing this {@link StringSetData}. */
@@ -78,6 +118,31 @@ public StringSetResult extractResult() {
     return StringSetResult.create(stringSet());
   }
 
+  /** Add strings into set until reach capacity. Return the all string size of added set. */
+  private static long addUntilCapacity(
+      HashSet<String> combined, long currentSize, Iterable<String> others) {
+    if (currentSize > STRING_SET_SIZE_LIMIT) {
+      // already at capacity
+      return currentSize;
+    }
+    for (String string : others) {
+      if (combined.add(string)) {
+        currentSize += string.length();
+
+        // check capacity both before insert and after insert one, so the warning only emit once.
+        if (currentSize > STRING_SET_SIZE_LIMIT) {
+          LOG.warn(
+              "StringSet metrics reaches capacity. Further incoming elements won't be recorded."
+                  + " Current size: {}, last element size: {}.",
+              currentSize,
+              string.length());
+          break;
+        }
+      }
+    }
+    return currentSize;
+  }
+
   /** Empty {@link StringSetData}, representing no values reported and is immutable. */
   public static class EmptyStringSetData extends StringSetData {
 
@@ -91,6 +156,11 @@ public Set<String> stringSet() {
       return ImmutableSet.of();
     }
 
+    @Override
+    public long stringSize() {
+      return 0L;
+    }
+
     /** Return a {@link StringSetResult#empty()} which is immutable empty set. */
     @Override
     public StringSetResult extractResult() {
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/metrics/StringSetDataTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/metrics/StringSetDataTest.java
index 665ce3743c511..534db203ff3c3 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/metrics/StringSetDataTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/metrics/StringSetDataTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertTrue;
 
 import java.util.Collections;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet;
 import org.junit.Rule;
 import org.junit.Test;
@@ -81,6 +82,14 @@ public void testStringSetDataEmptyIsImmutable() {
     assertThrows(UnsupportedOperationException.class, () -> empty.stringSet().add("aa"));
   }
 
+  @Test
+  public void testStringSetDataEmptyCanAdd() {
+    ImmutableSet<String> contents = ImmutableSet.of("ab", "cd");
+    StringSetData stringSetData = StringSetData.empty();
+    stringSetData = stringSetData.addAll(contents.toArray(new String[] {}));
+    assertEquals(stringSetData.stringSet(), contents);
+  }
+
   @Test
   public void testEmptyExtract() {
     assertTrue(StringSetData.empty().extractResult().getStringSet().isEmpty());
@@ -94,9 +103,26 @@ public void testExtract() {
   }
 
   @Test
-  public void testExtractReturnsImmutable() {
-    StringSetData stringSetData = StringSetData.create(ImmutableSet.of("ab", "cd"));
-    // check that immutable copy is returned
-    assertThrows(UnsupportedOperationException.class, () -> stringSetData.stringSet().add("aa"));
+  public void testStringSetAddUntilCapacity() {
+    StringSetData combined = StringSetData.empty();
+    @SuppressWarnings("InlineMeInliner") // Inline representation is Java11+ only
+    String commonPrefix = Strings.repeat("*", 1000);
+    long stringSize = 0;
+    for (int i = 0; i < 1000; ++i) {
+      String s = commonPrefix + i;
+      stringSize += s.length();
+      combined = combined.addAll(s);
+    }
+    assertTrue(combined.stringSize() < stringSize);
+    assertTrue(combined.stringSize() > StringSetData.STRING_SET_SIZE_LIMIT);
+  }
+
+  @Test
+  public void testStringSetAddSizeTrackedCorrectly() {
+    StringSetData combined = StringSetData.empty();
+    combined = combined.addAll("a", "b", "c", "b");
+    assertEquals(3, combined.stringSize());
+    combined = combined.addAll("c", "d", "e");
+    assertEquals(5, combined.stringSize());
   }
 }
diff --git a/sdks/python/apache_beam/metrics/cells.pxd b/sdks/python/apache_beam/metrics/cells.pxd
index a8f4003d89808..98bb5eff09775 100644
--- a/sdks/python/apache_beam/metrics/cells.pxd
+++ b/sdks/python/apache_beam/metrics/cells.pxd
@@ -45,7 +45,7 @@ cdef class GaugeCell(MetricCell):
 
 
 cdef class StringSetCell(MetricCell):
-  cdef readonly set data
+  cdef readonly object data
 
   cdef inline bint _update(self, value) except -1
 
diff --git a/sdks/python/apache_beam/metrics/cells.py b/sdks/python/apache_beam/metrics/cells.py
index 407106342fb81..63fc9f3f7cc9e 100644
--- a/sdks/python/apache_beam/metrics/cells.py
+++ b/sdks/python/apache_beam/metrics/cells.py
@@ -23,11 +23,14 @@
 
 # pytype: skip-file
 
+import logging
 import threading
 import time
 from datetime import datetime
 from typing import Any
+from typing import Iterable
 from typing import Optional
+from typing import Set
 from typing import SupportsInt
 
 try:
@@ -47,6 +50,8 @@ class fake_cython:
     'GaugeResult'
 ]
 
+_LOGGER = logging.getLogger(__name__)
+
 
 class MetricCell(object):
   """For internal use only; no backwards-compatibility guarantees.
@@ -297,9 +302,9 @@ def _update(self, value):
     self.data.add(value)
 
   def get_cumulative(self):
-    # type: () -> set
+    # type: () -> StringSetData
     with self._lock:
-      return set(self.data)
+      return self.data.get_cumulative()
 
   def combine(self, other):
     # type: (StringSetCell) -> StringSetCell
@@ -522,6 +527,98 @@ def singleton(value):
     return DistributionData(value, 1, value, value)
 
 
+class StringSetData(object):
+  """For internal use only; no backwards-compatibility guarantees.
+
+  The data structure that holds data about a StringSet metric.
+
+  StringSet metrics are restricted to set of strings only.
+
+  This object is not thread safe, so it's not supposed to be modified
+  by other than the StringSetCell that contains it.
+
+  The summation of all string length for a StringSetData cannot exceed 1 MB.
+  Further addition of elements are dropped.
+  """
+
+  _STRING_SET_SIZE_LIMIT = 1_000_000
+
+  def __init__(self, string_set: Optional[Set] = None, string_size: int = 0):
+    self.string_set = string_set or set()
+    if not string_size:
+      string_size = 0
+      for s in self.string_set:
+        string_size += len(s)
+    self.string_size = string_size
+
+  def __eq__(self, other: object) -> bool:
+    if isinstance(other, StringSetData):
+      return (
+          self.string_size == other.string_size and
+          self.string_set == other.string_set)
+    else:
+      return False
+
+  def __hash__(self) -> int:
+    return hash(self.string_set)
+
+  def __repr__(self) -> str:
+    return 'StringSetData{}:{}'.format(self.string_set, self.string_size)
+
+  def get_cumulative(self) -> "StringSetData":
+    return StringSetData(set(self.string_set), self.string_size)
+
+  def add(self, *strings):
+    """
+    Add strings into this StringSetData and return the result StringSetData.
+    Reuse the original StringSetData's set.
+    """
+    self.string_size = self.add_until_capacity(
+        self.string_set, self.string_size, strings)
+    return self
+
+  def combine(self, other: "StringSetData") -> "StringSetData":
+    """
+    Combines this StringSetData with other, both original StringSetData are left
+    intact.
+    """
+    if other is None:
+      return self
+
+    combined = set(self.string_set)
+    string_size = self.add_until_capacity(
+        combined, self.string_size, other.string_set)
+    return StringSetData(combined, string_size)
+
+  @classmethod
+  def add_until_capacity(
+      cls, combined: set, current_size: int, others: Iterable[str]):
+    """
+    Add strings into set until reach capacity. Return the all string size of
+    added set.
+    """
+    if current_size > cls._STRING_SET_SIZE_LIMIT:
+      return current_size
+
+    for string in others:
+      if string not in combined:
+        combined.add(string)
+        current_size += len(string)
+        if current_size > cls._STRING_SET_SIZE_LIMIT:
+          _LOGGER.warning(
+              "StringSet metrics reaches capacity. Further incoming elements "
+              "won't be recorded. Current size: %d, last element size: %d.",
+              current_size,
+              len(string))
+          break
+    return current_size
+
+  @staticmethod
+  def singleton(value):
+    # type: (int) -> DistributionData
+    return DistributionData(value, 1, value, value)
+
+
 class MetricAggregator(object):
   """For internal use only; no backwards-compatibility guarantees.
 
@@ -612,17 +709,18 @@ def result(self, x):
 class StringSetAggregator(MetricAggregator):
   @staticmethod
   def identity_element():
-    # type: () -> set
-    return set()
+    # type: () -> StringSetData
+    return StringSetData()
 
   def combine(self, x, y):
-    # type: (set, set) -> set
-    if len(x) == 0:
+    # type: (StringSetData, StringSetData) -> StringSetData
+    if len(x.string_set) == 0:
       return y
-    elif len(y) == 0:
+    elif len(y.string_set) == 0:
       return x
     else:
-      return set.union(x, y)
+      return x.combine(y)
 
   def result(self, x):
-    return x
+    # type: (StringSetData) -> set
+    return set(x.string_set)
diff --git a/sdks/python/apache_beam/metrics/cells_test.py b/sdks/python/apache_beam/metrics/cells_test.py
index 052ff051bf964..d1ee37b8ed820 100644
--- a/sdks/python/apache_beam/metrics/cells_test.py
+++ b/sdks/python/apache_beam/metrics/cells_test.py
@@ -26,6 +26,7 @@
 from apache_beam.metrics.cells import GaugeCell
 from apache_beam.metrics.cells import GaugeData
 from apache_beam.metrics.cells import StringSetCell
+from apache_beam.metrics.cells import StringSetData
 from apache_beam.metrics.metricbase import MetricName
 
 
@@ -176,9 +177,9 @@ def test_not_leak_mutable_set(self):
     c.add('test')
     c.add('another')
     s = c.get_cumulative()
-    self.assertEqual(s, set(('test', 'another')))
+    self.assertEqual(s, StringSetData({'test', 'another'}, 11))
     s.add('yet another')
-    self.assertEqual(c.get_cumulative(), set(('test', 'another')))
+    self.assertEqual(c.get_cumulative(), StringSetData({'test', 'another'}, 11))
 
   def test_combine_appropriately(self):
     s1 = StringSetCell()
@@ -190,7 +191,16 @@ def test_combine_appropriately(self):
     s2.add('3')
 
     result = s2.combine(s1)
-    self.assertEqual(result.data, set(('1', '2', '3')))
+    self.assertEqual(result.data, StringSetData({'1', '2', '3'}))
+
+  def test_add_size_tracked_correctly(self):
+    s = StringSetCell()
+    s.add('1')
+    s.add('2')
+    self.assertEqual(s.data.string_size, 2)
+    s.add('2')
+    s.add('3')
+    self.assertEqual(s.data.string_size, 3)
 
 
 if __name__ == '__main__':
diff --git a/sdks/python/apache_beam/metrics/execution.py b/sdks/python/apache_beam/metrics/execution.py
index 37007add91638..fa70d3a4d9c01 100644
--- a/sdks/python/apache_beam/metrics/execution.py
+++ b/sdks/python/apache_beam/metrics/execution.py
@@ -47,6 +47,7 @@
 from apache_beam.metrics.cells import DistributionCell
 from apache_beam.metrics.cells import GaugeCell
 from apache_beam.metrics.cells import StringSetCell
+from apache_beam.metrics.cells import StringSetData
 from apache_beam.runners.worker import statesampler
 from apache_beam.runners.worker.statesampler import get_current_tracker
 
@@ -356,7 +357,7 @@ def __init__(
       counters=None,  # type: Optional[Dict[MetricKey, int]]
       distributions=None,  # type: Optional[Dict[MetricKey, DistributionData]]
       gauges=None,  # type: Optional[Dict[MetricKey, GaugeData]]
-      string_sets=None,  # type: Optional[Dict[MetricKey, set]]
+      string_sets=None,  # type: Optional[Dict[MetricKey, StringSetData]]
   ):
     # type: (...) -> None
 
diff --git a/sdks/python/apache_beam/metrics/execution_test.py b/sdks/python/apache_beam/metrics/execution_test.py
index b157aeb20e9ed..38e27f1f3d0c4 100644
--- a/sdks/python/apache_beam/metrics/execution_test.py
+++ b/sdks/python/apache_beam/metrics/execution_test.py
@@ -110,11 +110,12 @@ def test_get_cumulative_or_updates(self):
     self.assertEqual(
         set(all_values), {v.value
                           for _, v in cumulative.gauges.items()})
-    self.assertEqual({str(i % 7)
-                      for i in all_values},
-                     functools.reduce(
-                         set.union,
-                         (v for _, v in cumulative.string_sets.items())))
+    self.assertEqual(
+        {str(i % 7)
+         for i in all_values},
+        functools.reduce(
+            set.union,
+            (v.string_set for _, v in cumulative.string_sets.items())))
 
 
 if __name__ == '__main__':
diff --git a/sdks/python/apache_beam/metrics/monitoring_infos.py b/sdks/python/apache_beam/metrics/monitoring_infos.py
index a9540f2846adc..09cb350b38268 100644
--- a/sdks/python/apache_beam/metrics/monitoring_infos.py
+++ b/sdks/python/apache_beam/metrics/monitoring_infos.py
@@ -31,6 +31,7 @@
 from apache_beam.metrics.cells import DistributionResult
 from apache_beam.metrics.cells import GaugeData
 from apache_beam.metrics.cells import GaugeResult
+from apache_beam.metrics.cells import StringSetData
 from apache_beam.portability import common_urns
 from apache_beam.portability.api import metrics_pb2
 
@@ -305,10 +306,12 @@ def user_set_string(namespace, name, metric, ptransform=None):
   Args:
     namespace: User-defined namespace of StringSet.
     name: Name of StringSet.
-    metric: The set representing the metrics.
+    metric: The StringSetData representing the metrics.
     ptransform: The ptransform id used as a label.
   """
   labels = create_labels(ptransform=ptransform, namespace=namespace, name=name)
+  if isinstance(metric, StringSetData):
+    metric = metric.string_set
   if isinstance(metric, set):
     metric = list(metric)
   if isinstance(metric, list):

From c4b41708ffa68652cbead0a67aae0c0f0f358a3b Mon Sep 17 00:00:00 2001
From: Jeff Kinard <jeff@thekinards.com>
Date: Tue, 8 Oct 2024 19:31:35 -0400
Subject: [PATCH 04/14] Force kafka 3.1.2 for expansion-service jar (#32703)

Signed-off-by: Jeffrey Kinard <jeff@thekinards.com>
---
 sdks/java/io/expansion-service/build.gradle | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sdks/java/io/expansion-service/build.gradle b/sdks/java/io/expansion-service/build.gradle
index d7fef3d823324..26a001b6ea240 100644
--- a/sdks/java/io/expansion-service/build.gradle
+++ b/sdks/java/io/expansion-service/build.gradle
@@ -27,6 +27,12 @@ applyJavaNature(
   shadowClosure: {},
 )
 
+// TODO(https://github.com/apache/beam/pull/32486/) Use library.java.kafka_clients once >=3.1.0 is set as default
+configurations.runtimeClasspath {
+  // Pin kafka-clients version due to <3.1.0 missing auth callback classes
+  resolutionStrategy.force 'org.apache.kafka:kafka-clients:3.1.2'
+}
+
 shadowJar {
   mergeServiceFiles()
 }
@@ -52,8 +58,7 @@ dependencies {
   runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:1.4.2")
   runtimeOnly project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow")
 
-  // TODO(https://github.com/apache/beam/pull/32486/) Use library.java.kafka_clients once 3.1.2 is set as default
-  runtimeOnly ("org.apache.kafka:kafka-clients:3.1.2")
+  runtimeOnly library.java.kafka_clients
   runtimeOnly library.java.slf4j_jdk14
 }
 

From 14793629dfef2547d4d8d454abee0894da18bbb6 Mon Sep 17 00:00:00 2001
From: Yi Hu <yathu@google.com>
Date: Tue, 8 Oct 2024 19:51:38 -0400
Subject: [PATCH 05/14] Report File Lineage on directory (#32662)

* Report File Lineage on directory

* added comments, restore lineage assert in TextIOIT

* Report bucket level Lineage for files larger than 100

* fix lint
---
 .../org/apache/beam/sdk/io/FileBasedSink.java | 16 +++++-
 .../apache/beam/sdk/io/FileBasedSource.java   | 36 +++++++++++-
 .../org/apache/beam/sdk/io/FileSystem.java    | 14 ++++-
 .../org/apache/beam/sdk/io/FileSystems.java   | 29 +++++++++-
 .../ReadAllViaFileBasedSourceTransform.java   | 33 ++++++++++-
 .../extensions/gcp/storage/GcsFileSystem.java | 12 +++-
 .../gcp/storage/GcsFileSystemTest.java        | 18 ++++++
 .../beam/sdk/io/aws/s3/S3FileSystem.java      | 12 +++-
 .../beam/sdk/io/aws/s3/S3FileSystemTest.java  | 17 ++++++
 .../beam/sdk/io/aws2/s3/S3FileSystem.java     | 12 +++-
 .../beam/sdk/io/aws2/s3/S3FileSystemTest.java | 17 ++++++
 .../blobstore/AzureBlobStoreFileSystem.java   |  7 ++-
 .../AzureBlobStoreFileSystemTest.java         | 18 ++++++
 .../org/apache/beam/sdk/io/text/TextIOIT.java | 13 ++++-
 .../python/apache_beam/io/aws/s3filesystem.py |  8 ++-
 .../apache_beam/io/aws/s3filesystem_test.py   |  9 +++
 .../io/azure/blobstoragefilesystem.py         |  9 ++-
 .../io/azure/blobstoragefilesystem_test.py    | 12 ++++
 sdks/python/apache_beam/io/filebasedsink.py   | 24 +++++++-
 sdks/python/apache_beam/io/filebasedsource.py | 57 ++++++++++++++++++-
 sdks/python/apache_beam/io/filesystem.py      |  6 +-
 sdks/python/apache_beam/io/filesystems.py     | 26 +++++++--
 .../apache_beam/io/gcp/gcsfilesystem.py       | 10 +++-
 .../apache_beam/io/gcp/gcsfilesystem_test.py  |  9 +++
 24 files changed, 393 insertions(+), 31 deletions(-)

diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java
index b7523ee12b56a..7eb04519555b2 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java
@@ -687,11 +687,25 @@ protected final List<KV<FileResult<DestinationT>, ResourceId>> finalizeDestinati
             distinctFilenames.get(finalFilename));
         distinctFilenames.put(finalFilename, result);
         outputFilenames.add(KV.of(result, finalFilename));
-        FileSystems.reportSinkLineage(finalFilename);
       }
+      reportSinkLineage(outputFilenames);
       return outputFilenames;
     }
 
+    /**
+     * Report sink Lineage. Report every file if number of files no more than 100, otherwise only
+     * report at directory level.
+     */
+    private void reportSinkLineage(List<KV<FileResult<DestinationT>, ResourceId>> outputFilenames) {
+      if (outputFilenames.size() <= 100) {
+        for (KV<FileResult<DestinationT>, ResourceId> kv : outputFilenames) {
+          FileSystems.reportSinkLineage(kv.getValue());
+        }
+      } else {
+        FileSystems.reportSinkLineage(outputFilenames.get(0).getValue().getCurrentDirectory());
+      }
+    }
+
     private Collection<FileResult<DestinationT>> createMissingEmptyShards(
         @Nullable DestinationT dest,
         @Nullable Integer numShards,
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSource.java
index 7ddfde441aedc..8d6e52c64a527 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSource.java
@@ -26,10 +26,12 @@
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.ListIterator;
 import java.util.NoSuchElementException;
 import java.util.concurrent.atomic.AtomicReference;
+import org.apache.beam.sdk.io.FileSystem.LineageLevel;
 import org.apache.beam.sdk.io.fs.EmptyMatchTreatment;
 import org.apache.beam.sdk.io.fs.MatchResult;
 import org.apache.beam.sdk.io.fs.MatchResult.Metadata;
@@ -297,9 +299,10 @@ public final List<? extends FileBasedSource<T>> split(
           System.currentTimeMillis() - startTime,
           expandedFiles.size(),
           splitResults.size());
+
+      reportSourceLineage(expandedFiles);
       return splitResults;
     } else {
-      FileSystems.reportSourceLineage(getSingleFileMetadata().resourceId());
       if (isSplittable()) {
         @SuppressWarnings("unchecked")
         List<FileBasedSource<T>> splits =
@@ -315,6 +318,37 @@ public final List<? extends FileBasedSource<T>> split(
     }
   }
 
+  /**
+   * Report source Lineage. Due to the size limit of Beam metrics, report full file name or only dir
+   * depend on the number of files.
+   *
+   * <p>- Number of files<=100, report full file paths;
+   *
+   * <p>- Number of directory<=100, report directory names (one level up);
+   *
+   * <p>- Otherwise, report top level only.
+   */
+  private static void reportSourceLineage(List<Metadata> expandedFiles) {
+    if (expandedFiles.size() <= 100) {
+      for (Metadata metadata : expandedFiles) {
+        FileSystems.reportSourceLineage(metadata.resourceId());
+      }
+    } else {
+      HashSet<ResourceId> uniqueDirs = new HashSet<>();
+      for (Metadata metadata : expandedFiles) {
+        ResourceId dir = metadata.resourceId().getCurrentDirectory();
+        uniqueDirs.add(dir);
+        if (uniqueDirs.size() > 100) {
+          FileSystems.reportSourceLineage(dir, LineageLevel.TOP_LEVEL);
+          return;
+        }
+      }
+      for (ResourceId uniqueDir : uniqueDirs) {
+        FileSystems.reportSourceLineage(uniqueDir);
+      }
+    }
+  }
+
   /**
    * Determines whether a file represented by this source is can be split into bundles.
    *
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystem.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystem.java
index 11314a318b256..73caa7284e986 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystem.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystem.java
@@ -157,10 +157,20 @@ protected abstract void rename(
    */
   protected abstract String getScheme();
 
+  public enum LineageLevel {
+    FILE,
+    TOP_LEVEL
+  }
+
+  /** Report {@link Lineage} metrics for resource id at file level. */
+  protected void reportLineage(ResourceIdT resourceId, Lineage lineage) {
+    reportLineage(resourceId, lineage, LineageLevel.FILE);
+  }
+
   /**
-   * Report {@link Lineage} metrics for resource id.
+   * Report {@link Lineage} metrics for resource id to a given level.
    *
    * <p>Unless override by FileSystem implementations, default to no-op.
    */
-  protected void reportLineage(ResourceIdT unusedId, Lineage unusedLineage) {}
+  protected void reportLineage(ResourceIdT unusedId, Lineage unusedLineage, LineageLevel level) {}
 }
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
index a4ca9b80dce37..fb25cac6262f9 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java
@@ -39,6 +39,7 @@
 import java.util.regex.Pattern;
 import javax.annotation.Nonnull;
 import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.io.FileSystem.LineageLevel;
 import org.apache.beam.sdk.io.fs.CreateOptions;
 import org.apache.beam.sdk.io.fs.CreateOptions.StandardCreateOptions;
 import org.apache.beam.sdk.io.fs.EmptyMatchTreatment;
@@ -398,12 +399,36 @@ public ResourceId apply(@Nonnull Metadata input) {
 
   /** Report source {@link Lineage} metrics for resource id. */
   public static void reportSourceLineage(ResourceId resourceId) {
-    getFileSystemInternal(resourceId.getScheme()).reportLineage(resourceId, Lineage.getSources());
+    reportSourceLineage(resourceId, LineageLevel.FILE);
   }
 
   /** Report sink {@link Lineage} metrics for resource id. */
   public static void reportSinkLineage(ResourceId resourceId) {
-    getFileSystemInternal(resourceId.getScheme()).reportLineage(resourceId, Lineage.getSinks());
+    reportSinkLineage(resourceId, LineageLevel.FILE);
+  }
+
+  /**
+   * Report source {@link Lineage} metrics for resource id at given level.
+   *
+   * <p>Internal API, no backward compatibility guaranteed.
+   */
+  public static void reportSourceLineage(ResourceId resourceId, LineageLevel level) {
+    reportLineage(resourceId, Lineage.getSources(), level);
+  }
+
+  /**
+   * Report source {@link Lineage} metrics for resource id at given level.
+   *
+   * <p>Internal API, no backward compatibility guaranteed.
+   */
+  public static void reportSinkLineage(ResourceId resourceId, LineageLevel level) {
+    reportLineage(resourceId, Lineage.getSinks(), level);
+  }
+
+  /** Report {@link Lineage} metrics for resource id at given level to given Lineage container. */
+  private static void reportLineage(ResourceId resourceId, Lineage lineage, LineageLevel level) {
+    FileSystem fileSystem = getFileSystemInternal(resourceId.getScheme());
+    fileSystem.reportLineage(resourceId, lineage, level);
   }
 
   private static class FilterResult {
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ReadAllViaFileBasedSourceTransform.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ReadAllViaFileBasedSourceTransform.java
index bbac337f2d0fe..843deb5cab320 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ReadAllViaFileBasedSourceTransform.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ReadAllViaFileBasedSourceTransform.java
@@ -19,7 +19,9 @@
 
 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
 import java.io.IOException;
+import java.util.HashSet;
 import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.io.FileSystem.LineageLevel;
 import org.apache.beam.sdk.io.fs.MatchResult;
 import org.apache.beam.sdk.io.fs.ResourceId;
 import org.apache.beam.sdk.io.range.OffsetRange;
@@ -30,6 +32,7 @@
 import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 public abstract class ReadAllViaFileBasedSourceTransform<InT, T>
     extends PTransform<PCollection<FileIO.ReadableFile>, PCollection<T>> {
@@ -81,6 +84,9 @@ public static class SplitIntoRangesFn
       extends DoFn<FileIO.ReadableFile, KV<FileIO.ReadableFile, OffsetRange>> {
     private final long desiredBundleSizeBytes;
 
+    // track unique resourceId met. Access it only inside reportSourceLineage
+    private transient @Nullable HashSet<ResourceId> uniqueIds;
+
     public SplitIntoRangesFn(long desiredBundleSizeBytes) {
       this.desiredBundleSizeBytes = desiredBundleSizeBytes;
     }
@@ -88,6 +94,7 @@ public SplitIntoRangesFn(long desiredBundleSizeBytes) {
     @ProcessElement
     public void process(ProcessContext c) {
       MatchResult.Metadata metadata = c.element().getMetadata();
+      reportSourceLineage(metadata.resourceId());
       if (!metadata.isReadSeekEfficient()) {
         c.output(KV.of(c.element(), new OffsetRange(0, metadata.sizeBytes())));
         return;
@@ -97,6 +104,31 @@ public void process(ProcessContext c) {
         c.output(KV.of(c.element(), range));
       }
     }
+
+    /**
+     * Report source Lineage. Due to the size limit of Beam metrics, report full file name or only
+     * top level depend on the number of files.
+     *
+     * <p>- Number of files<=100, report full file paths;
+     *
+     * <p>- Otherwise, report top level only.
+     */
+    @SuppressWarnings("nullness") // only called in processElement, guaranteed to be non-null
+    private void reportSourceLineage(ResourceId resourceId) {
+      if (uniqueIds == null) {
+        uniqueIds = new HashSet<>();
+      } else if (uniqueIds.isEmpty()) {
+        // already at capacity
+        FileSystems.reportSourceLineage(resourceId, LineageLevel.TOP_LEVEL);
+        return;
+      }
+      uniqueIds.add(resourceId);
+      FileSystems.reportSourceLineage(resourceId, LineageLevel.FILE);
+      if (uniqueIds.size() >= 100) {
+        // avoid reference leak
+        uniqueIds.clear();
+      }
+    }
   }
 
   public abstract static class AbstractReadFileRangesFn<InT, T>
@@ -140,7 +172,6 @@ public void process(ProcessContext c) throws IOException {
           throw e;
         }
       }
-      FileSystems.reportSourceLineage(resourceId);
     }
   }
 }
diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystem.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystem.java
index 6332051c0ddc7..32079ebf55a38 100644
--- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystem.java
+++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystem.java
@@ -217,9 +217,19 @@ protected String getScheme() {
 
   @Override
   protected void reportLineage(GcsResourceId resourceId, Lineage lineage) {
+    reportLineage(resourceId, lineage, LineageLevel.FILE);
+  }
+
+  @Override
+  protected void reportLineage(GcsResourceId resourceId, Lineage lineage, LineageLevel level) {
     GcsPath path = resourceId.getGcsPath();
     if (!path.getBucket().isEmpty()) {
-      lineage.add("gcs", ImmutableList.of(path.getBucket(), path.getObject()));
+      ImmutableList.Builder<String> segments =
+          ImmutableList.<String>builder().add(path.getBucket());
+      if (level != LineageLevel.TOP_LEVEL && !path.getObject().isEmpty()) {
+        segments.add(path.getObject());
+      }
+      lineage.add("gcs", segments.build());
     } else {
       LOG.warn("Report Lineage on relative path {} is unsupported", path.getObject());
     }
diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemTest.java
index 0b79cde1f187d..f2ff7118f95de 100644
--- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemTest.java
+++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemTest.java
@@ -23,6 +23,9 @@
 import static org.mockito.ArgumentMatchers.anyString;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Matchers.isNull;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import com.google.api.services.storage.model.Objects;
@@ -38,6 +41,7 @@
 import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
 import org.apache.beam.sdk.io.fs.MatchResult;
 import org.apache.beam.sdk.io.fs.MatchResult.Status;
+import org.apache.beam.sdk.metrics.Lineage;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.FluentIterable;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
@@ -235,6 +239,20 @@ public void testMatchNonGlobs() throws Exception {
         contains(toFilenames(matchResults.get(4)).toArray()));
   }
 
+  @Test
+  public void testReportLineageOnBucket() {
+    verifyLineage("gs://testbucket", ImmutableList.of("testbucket"));
+    verifyLineage("gs://testbucket/", ImmutableList.of("testbucket"));
+    verifyLineage("gs://testbucket/foo/bar.txt", ImmutableList.of("testbucket", "foo/bar.txt"));
+  }
+
+  private void verifyLineage(String uri, List<String> expected) {
+    GcsResourceId path = GcsResourceId.fromGcsPath(GcsPath.fromUri(uri));
+    Lineage mockLineage = mock(Lineage.class);
+    gcsFileSystem.reportLineage(path, mockLineage);
+    verify(mockLineage, times(1)).add("gcs", expected);
+  }
+
   private StorageObject createStorageObject(String gcsFilename, long fileSize) {
     GcsPath gcsPath = GcsPath.fromUri(gcsFilename);
     // Google APIs will use null for empty files.
diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java
index 7ed56efa44bda..75d66c46478a7 100644
--- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java
+++ b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java
@@ -627,7 +627,17 @@ protected S3ResourceId matchNewResource(String singleResourceSpec, boolean isDir
 
   @Override
   protected void reportLineage(S3ResourceId resourceId, Lineage lineage) {
-    lineage.add("s3", ImmutableList.of(resourceId.getBucket(), resourceId.getKey()));
+    reportLineage(resourceId, lineage, LineageLevel.FILE);
+  }
+
+  @Override
+  protected void reportLineage(S3ResourceId resourceId, Lineage lineage, LineageLevel level) {
+    ImmutableList.Builder<String> segments =
+        ImmutableList.<String>builder().add(resourceId.getBucket());
+    if (level != LineageLevel.TOP_LEVEL && !resourceId.getKey().isEmpty()) {
+      segments.add(resourceId.getKey());
+    }
+    lineage.add("s3", segments.build());
   }
 
   /**
diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java
index fbef40f4b5c04..db749d7080e2c 100644
--- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java
+++ b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java
@@ -34,6 +34,7 @@
 import static org.mockito.ArgumentMatchers.argThat;
 import static org.mockito.Matchers.anyObject;
 import static org.mockito.Matchers.notNull;
+import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
@@ -74,6 +75,7 @@
 import org.apache.beam.sdk.io.aws.options.S3Options;
 import org.apache.beam.sdk.io.fs.CreateOptions;
 import org.apache.beam.sdk.io.fs.MatchResult;
+import org.apache.beam.sdk.metrics.Lineage;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -1209,6 +1211,21 @@ public void testWriteAndReadWithS3Options() throws IOException {
     open.close();
   }
 
+  @Test
+  public void testReportLineageOnBucket() {
+    verifyLineage("s3://testbucket", ImmutableList.of("testbucket"));
+    verifyLineage("s3://testbucket/", ImmutableList.of("testbucket"));
+    verifyLineage("s3://testbucket/foo/bar.txt", ImmutableList.of("testbucket", "foo/bar.txt"));
+  }
+
+  private void verifyLineage(String uri, List<String> expected) {
+    S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("mys3"), client);
+    S3ResourceId path = S3ResourceId.fromUri(uri);
+    Lineage mockLineage = mock(Lineage.class);
+    s3FileSystem.reportLineage(path, mockLineage);
+    verify(mockLineage, times(1)).add("s3", expected);
+  }
+
   /** A mockito argument matcher to implement equality on GetObjectMetadataRequest. */
   private static class GetObjectMetadataRequestMatcher
       implements ArgumentMatcher<GetObjectMetadataRequest> {
diff --git a/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystem.java b/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystem.java
index 384c8c627ee7f..e851f8333d0b2 100644
--- a/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystem.java
+++ b/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystem.java
@@ -658,7 +658,17 @@ protected S3ResourceId matchNewResource(String singleResourceSpec, boolean isDir
 
   @Override
   protected void reportLineage(S3ResourceId resourceId, Lineage lineage) {
-    lineage.add("s3", ImmutableList.of(resourceId.getBucket(), resourceId.getKey()));
+    reportLineage(resourceId, lineage, LineageLevel.FILE);
+  }
+
+  @Override
+  protected void reportLineage(S3ResourceId resourceId, Lineage lineage, LineageLevel level) {
+    ImmutableList.Builder<String> segments =
+        ImmutableList.<String>builder().add(resourceId.getBucket());
+    if (level != LineageLevel.TOP_LEVEL && !resourceId.getKey().isEmpty()) {
+      segments.add(resourceId.getKey());
+    }
+    lineage.add("s3", segments.build());
   }
 
   /**
diff --git a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystemTest.java b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystemTest.java
index 423176e52a75f..39995b8b31670 100644
--- a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystemTest.java
+++ b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystemTest.java
@@ -34,6 +34,7 @@
 import static org.mockito.ArgumentMatchers.argThat;
 import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.ArgumentMatchers.notNull;
+import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
@@ -55,6 +56,7 @@
 import org.apache.beam.sdk.io.aws2.options.S3Options;
 import org.apache.beam.sdk.io.fs.CreateOptions;
 import org.apache.beam.sdk.io.fs.MatchResult;
+import org.apache.beam.sdk.metrics.Lineage;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -1068,6 +1070,21 @@ public void testWriteAndRead() throws IOException {
     open.close();
   }
 
+  @Test
+  public void testReportLineageOnBucket() {
+    verifyLineage("s3://testbucket", ImmutableList.of("testbucket"));
+    verifyLineage("s3://testbucket/", ImmutableList.of("testbucket"));
+    verifyLineage("s3://testbucket/foo/bar.txt", ImmutableList.of("testbucket", "foo/bar.txt"));
+  }
+
+  private void verifyLineage(String uri, List<String> expected) {
+    S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("mys3"), client);
+    S3ResourceId path = S3ResourceId.fromUri(uri);
+    Lineage mockLineage = mock(Lineage.class);
+    s3FileSystem.reportLineage(path, mockLineage);
+    verify(mockLineage, times(1)).add("s3", expected);
+  }
+
   /** A mockito argument matcher to implement equality on GetHeadObjectRequest. */
   private static class GetHeadObjectRequestMatcher implements ArgumentMatcher<HeadObjectRequest> {
 
diff --git a/sdks/java/io/azure/src/main/java/org/apache/beam/sdk/io/azure/blobstore/AzureBlobStoreFileSystem.java b/sdks/java/io/azure/src/main/java/org/apache/beam/sdk/io/azure/blobstore/AzureBlobStoreFileSystem.java
index 5137eaf9bb2dc..bbb2e22d94ce6 100644
--- a/sdks/java/io/azure/src/main/java/org/apache/beam/sdk/io/azure/blobstore/AzureBlobStoreFileSystem.java
+++ b/sdks/java/io/azure/src/main/java/org/apache/beam/sdk/io/azure/blobstore/AzureBlobStoreFileSystem.java
@@ -453,7 +453,12 @@ protected AzfsResourceId matchNewResource(String singleResourceSpec, boolean isD
 
   @Override
   protected void reportLineage(AzfsResourceId resourceId, Lineage lineage) {
-    if (!Strings.isNullOrEmpty(resourceId.getBlob())) {
+    reportLineage(resourceId, lineage, LineageLevel.FILE);
+  }
+
+  @Override
+  protected void reportLineage(AzfsResourceId resourceId, Lineage lineage, LineageLevel level) {
+    if (level != LineageLevel.TOP_LEVEL && !Strings.isNullOrEmpty(resourceId.getBlob())) {
       lineage.add(
           "abs",
           ImmutableList.of(
diff --git a/sdks/java/io/azure/src/test/java/org/apache/beam/sdk/io/azure/blobstore/AzureBlobStoreFileSystemTest.java b/sdks/java/io/azure/src/test/java/org/apache/beam/sdk/io/azure/blobstore/AzureBlobStoreFileSystemTest.java
index 545f314688c3c..27a2220c2e447 100644
--- a/sdks/java/io/azure/src/test/java/org/apache/beam/sdk/io/azure/blobstore/AzureBlobStoreFileSystemTest.java
+++ b/sdks/java/io/azure/src/test/java/org/apache/beam/sdk/io/azure/blobstore/AzureBlobStoreFileSystemTest.java
@@ -25,6 +25,7 @@
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.ArgumentMatchers.anyString;
 import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
@@ -51,6 +52,7 @@
 import org.apache.beam.sdk.io.azure.options.BlobstoreOptions;
 import org.apache.beam.sdk.io.fs.CreateOptions;
 import org.apache.beam.sdk.io.fs.MatchResult;
+import org.apache.beam.sdk.metrics.Lineage;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.FluentIterable;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
@@ -338,4 +340,20 @@ public void testMatchNonGlobs() throws Exception {
 
     blobContainerClient.delete();
   }
+
+  @Test
+  public void testReportLineageOnBucket() {
+    verifyLineage("azfs://account/container", ImmutableList.of("account", "container"));
+    verifyLineage("azfs://account/container/", ImmutableList.of("account", "container"));
+    verifyLineage(
+        "azfs://account/container/foo/bar.txt",
+        ImmutableList.of("account", "container", "foo/bar.txt"));
+  }
+
+  private void verifyLineage(String uri, List<String> expected) {
+    AzfsResourceId path = AzfsResourceId.fromUri(uri);
+    Lineage mockLineage = mock(Lineage.class);
+    azureBlobStoreFileSystem.reportLineage(path, mockLineage);
+    verify(mockLineage, times(1)).add("abs", expected);
+  }
 }
diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
index 859c03ed7750d..ecdde5cbc8fe7 100644
--- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
+++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
@@ -154,9 +154,16 @@ public void writeThenReadAll() {
 
     PipelineResult result = pipeline.run();
     PipelineResult.State pipelineState = result.waitUntilFinish();
-    assertEquals(
-        Lineage.query(result.metrics(), Lineage.Type.SOURCE),
-        Lineage.query(result.metrics(), Lineage.Type.SINK));
+
+    Set<String> sources = Lineage.query(result.metrics(), Lineage.Type.SOURCE);
+    Set<String> sinks = Lineage.query(result.metrics(), Lineage.Type.SINK);
+    if (numShards <= 100) {
+      // both should be the full files, if supported by the runner
+      assertEquals(sources, sinks);
+    } else {
+      // if supported by runner, both should be non-empty
+      assertEquals(sources.isEmpty(), sinks.isEmpty());
+    }
 
     collectAndPublishMetrics(result);
     // Fail the test if pipeline failed.
diff --git a/sdks/python/apache_beam/io/aws/s3filesystem.py b/sdks/python/apache_beam/io/aws/s3filesystem.py
index e181beac4a584..ffbce5893a969 100644
--- a/sdks/python/apache_beam/io/aws/s3filesystem.py
+++ b/sdks/python/apache_beam/io/aws/s3filesystem.py
@@ -315,10 +315,14 @@ def delete(self, paths):
     if exceptions:
       raise BeamIOError("Delete operation failed", exceptions)
 
-  def report_lineage(self, path, lineage):
+  def report_lineage(self, path, lineage, level=None):
     try:
-      components = s3io.parse_s3_path(path, get_account=True)
+      components = s3io.parse_s3_path(path, object_optional=True)
     except ValueError:
       # report lineage is fail-safe
       return
+    if level == FileSystem.LineageLevel.TOP_LEVEL or \
+        (len(components) > 1 and components[-1] == ''):
+      # bucket only
+      components = components[:-1]
     lineage.add('s3', *components)
diff --git a/sdks/python/apache_beam/io/aws/s3filesystem_test.py b/sdks/python/apache_beam/io/aws/s3filesystem_test.py
index 60e6f319b2c96..87403f482bd25 100644
--- a/sdks/python/apache_beam/io/aws/s3filesystem_test.py
+++ b/sdks/python/apache_beam/io/aws/s3filesystem_test.py
@@ -265,6 +265,15 @@ def test_rename(self, unused_mock_arg):
     src_dest_pairs = list(zip(sources, destinations))
     s3io_mock.rename_files.assert_called_once_with(src_dest_pairs)
 
+  def test_lineage(self):
+    self._verify_lineage("s3://bucket/", ("bucket", ))
+    self._verify_lineage("s3://bucket/foo/bar.txt", ("bucket", "foo/bar.txt"))
+
+  def _verify_lineage(self, uri, expected_segments):
+    lineage_mock = mock.MagicMock()
+    self.fs.report_lineage(uri, lineage_mock)
+    lineage_mock.add.assert_called_once_with("s3", *expected_segments)
+
 
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
diff --git a/sdks/python/apache_beam/io/azure/blobstoragefilesystem.py b/sdks/python/apache_beam/io/azure/blobstoragefilesystem.py
index bb56fa09d3703..4495245dc54a3 100644
--- a/sdks/python/apache_beam/io/azure/blobstoragefilesystem.py
+++ b/sdks/python/apache_beam/io/azure/blobstoragefilesystem.py
@@ -317,10 +317,15 @@ def delete(self, paths):
     if exceptions:
       raise BeamIOError("Delete operation failed", exceptions)
 
-  def report_lineage(self, path, lineage):
+  def report_lineage(self, path, lineage, level=None):
     try:
-      components = blobstorageio.parse_azfs_path(path, get_account=True)
+      components = blobstorageio.parse_azfs_path(
+          path, blob_optional=True, get_account=True)
     except ValueError:
       # report lineage is fail-safe
       return
+    if level == FileSystem.LineageLevel.TOP_LEVEL \
+      or(len(components) > 1 and components[-1] == ''):
+      # bucket only
+      components = components[:-1]
     lineage.add('abs', *components)
diff --git a/sdks/python/apache_beam/io/azure/blobstoragefilesystem_test.py b/sdks/python/apache_beam/io/azure/blobstoragefilesystem_test.py
index cee459f5b8a20..138fe5f78b20c 100644
--- a/sdks/python/apache_beam/io/azure/blobstoragefilesystem_test.py
+++ b/sdks/python/apache_beam/io/azure/blobstoragefilesystem_test.py
@@ -320,6 +320,18 @@ def test_rename(self, unused_mock_blobstorageio):
     src_dest_pairs = list(zip(sources, destinations))
     blobstorageio_mock.rename_files.assert_called_once_with(src_dest_pairs)
 
+  def test_lineage(self):
+    self._verify_lineage(
+        "azfs://storageaccount/container/", ("storageaccount", "container"))
+    self._verify_lineage(
+        "azfs://storageaccount/container/foo/bar.txt",
+        ("storageaccount", "container", "foo/bar.txt"))
+
+  def _verify_lineage(self, uri, expected_segments):
+    lineage_mock = mock.MagicMock()
+    self.fs.report_lineage(uri, lineage_mock)
+    lineage_mock.add.assert_called_once_with("abs", *expected_segments)
+
 
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
diff --git a/sdks/python/apache_beam/io/filebasedsink.py b/sdks/python/apache_beam/io/filebasedsink.py
index c708e117c3a1d..f9d4303c8c785 100644
--- a/sdks/python/apache_beam/io/filebasedsink.py
+++ b/sdks/python/apache_beam/io/filebasedsink.py
@@ -280,9 +280,31 @@ def _check_state_for_finalize_write(self, writer_results, num_shards):
 
       src_files.append(src)
       dst_files.append(dst)
-      FileSystems.report_sink_lineage(dst)
+
+    self._report_sink_lineage(dst_glob, dst_files)
     return src_files, dst_files, delete_files, num_skipped
 
+  def _report_sink_lineage(self, dst_glob, dst_files):
+    """
+    Report sink Lineage. Report every file if number of files no more than 100,
+    otherwise only report at directory level.
+    """
+    if len(dst_files) <= 100:
+      for dst in dst_files:
+        FileSystems.report_sink_lineage(dst)
+    else:
+      dst = dst_glob
+      # dst_glob has a wildcard for shard number (see _shard_name_template)
+      sep = dst_glob.find('*')
+      if sep > 0:
+        dst = dst[:sep]
+      try:
+        dst, _ = FileSystems.split(dst)
+      except ValueError:
+        return  # lineage report is fail-safe
+
+      FileSystems.report_sink_lineage(dst)
+
   @check_accessible(['file_path_prefix'])
   def finalize_write(
       self, init_result, writer_results, unused_pre_finalize_results):
diff --git a/sdks/python/apache_beam/io/filebasedsource.py b/sdks/python/apache_beam/io/filebasedsource.py
index efd863810ed75..a02bc6de32c73 100644
--- a/sdks/python/apache_beam/io/filebasedsource.py
+++ b/sdks/python/apache_beam/io/filebasedsource.py
@@ -39,6 +39,7 @@
 from apache_beam.io import range_trackers
 from apache_beam.io.filesystem import CompressionTypes
 from apache_beam.io.filesystem import FileMetadata
+from apache_beam.io.filesystem import FileSystem
 from apache_beam.io.filesystems import FileSystems
 from apache_beam.io.restriction_trackers import OffsetRange
 from apache_beam.options.value_provider import StaticValueProvider
@@ -168,10 +169,38 @@ def _get_concat_source(self) -> concat_source.ConcatSource:
             min_bundle_size=self._min_bundle_size,
             splittable=splittable)
         single_file_sources.append(single_file_source)
-        FileSystems.report_source_lineage(file_name)
+
+      self._report_source_lineage(files_metadata)
       self._concat_source = concat_source.ConcatSource(single_file_sources)
+
     return self._concat_source
 
+  def _report_source_lineage(self, files_metadata):
+    """
+    Report source Lineage. depend on the number of files, report full file
+    name, only dir, or only top level
+    """
+    if len(files_metadata) <= 100:
+      for file_metadata in files_metadata:
+        FileSystems.report_source_lineage(file_metadata.path)
+    else:
+      size_track = set()
+      for file_metadata in files_metadata:
+        if len(size_track) >= 100:
+          FileSystems.report_source_lineage(
+              file_metadata.path, level=FileSystem.LineageLevel.TOP_LEVEL)
+          return
+
+        try:
+          base, _ = FileSystems.split(file_metadata.path)
+        except ValueError:
+          pass
+        else:
+          size_track.add(base)
+
+      for base in size_track:
+        FileSystems.report_source_lineage(base)
+
   def open_file(self, file_name):
     return FileSystems.open(
         file_name,
@@ -343,6 +372,7 @@ def __init__(
     self._min_bundle_size = min_bundle_size
     self._splittable = splittable
     self._compression_type = compression_type
+    self._size_track = None
 
   def process(self, element: Union[str, FileMetadata], *args,
               **kwargs) -> Iterable[Tuple[FileMetadata, OffsetRange]]:
@@ -352,7 +382,8 @@ def process(self, element: Union[str, FileMetadata], *args,
       match_results = FileSystems.match([element])
       metadata_list = match_results[0].metadata_list
     for metadata in metadata_list:
-      FileSystems.report_source_lineage(metadata.path)
+      self._report_source_lineage(metadata.path)
+
       splittable = (
           self._splittable and _determine_splittability_from_compression_type(
               metadata.path, self._compression_type))
@@ -366,6 +397,28 @@ def process(self, element: Union[str, FileMetadata], *args,
             metadata,
             OffsetRange(0, range_trackers.OffsetRangeTracker.OFFSET_INFINITY))
 
+  def _report_source_lineage(self, path):
+    """
+    Report source Lineage. Due to the size limit of Beam metrics, report full
+    file name or only top level depend on the number of files.
+
+    * Number of files<=100, report full file paths;
+
+    * Otherwise, report top level only.
+    """
+    if self._size_track is None:
+      self._size_track = set()
+    elif len(self._size_track) == 0:
+      FileSystems.report_source_lineage(
+          path, level=FileSystem.LineageLevel.TOP_LEVEL)
+      return
+
+    self._size_track.add(path)
+    FileSystems.report_source_lineage(path)
+
+    if len(self._size_track) >= 100:
+      self._size_track.clear()
+
 
 class _ReadRange(DoFn):
   def __init__(
diff --git a/sdks/python/apache_beam/io/filesystem.py b/sdks/python/apache_beam/io/filesystem.py
index bdc25dcf0fe54..840fdf3309e7b 100644
--- a/sdks/python/apache_beam/io/filesystem.py
+++ b/sdks/python/apache_beam/io/filesystem.py
@@ -934,7 +934,11 @@ def delete(self, paths):
     """
     raise NotImplementedError
 
-  def report_lineage(self, path, unused_lineage):
+  class LineageLevel:
+    FILE = 'FILE'
+    TOP_LEVEL = 'TOP_LEVEL'
+
+  def report_lineage(self, path, unused_lineage, level=None):
     """
     Report Lineage metrics for path.
 
diff --git a/sdks/python/apache_beam/io/filesystems.py b/sdks/python/apache_beam/io/filesystems.py
index ccbeac640765c..a32b85332b608 100644
--- a/sdks/python/apache_beam/io/filesystems.py
+++ b/sdks/python/apache_beam/io/filesystems.py
@@ -391,13 +391,27 @@ def get_chunk_size(path):
     return filesystem.CHUNK_SIZE
 
   @staticmethod
-  def report_source_lineage(path):
-    """Report source :class:`~apache_beam.metrics.metric.Lineage`."""
+  def report_source_lineage(path, level=None):
+    """
+    Report source :class:`~apache_beam.metrics.metric.Lineage`.
+
+    Args:
+      path: string path to be reported.
+      level: the level of file path. default to
+        :class:`~apache_beam.io.filesystem.FileSystem.Lineage`.FILE.
+    """
     filesystem = FileSystems.get_filesystem(path)
-    filesystem.report_lineage(path, Lineage.sources())
+    filesystem.report_lineage(path, Lineage.sources(), level=level)
 
   @staticmethod
-  def report_sink_lineage(path):
-    """Report sink :class:`~apache_beam.metrics.metric.Lineage`."""
+  def report_sink_lineage(path, level=None):
+    """
+    Report sink :class:`~apache_beam.metrics.metric.Lineage`.
+
+    Args:
+      path: string path to be reported.
+      level: the level of file path. default to
+        :class:`~apache_beam.io.filesystem.FileSystem.Lineage`.FILE.
+    """
     filesystem = FileSystems.get_filesystem(path)
-    filesystem.report_lineage(path, Lineage.sinks())
+    filesystem.report_lineage(path, Lineage.sinks(), level=level)
diff --git a/sdks/python/apache_beam/io/gcp/gcsfilesystem.py b/sdks/python/apache_beam/io/gcp/gcsfilesystem.py
index 053b02d325a5c..325f70ddfd96d 100644
--- a/sdks/python/apache_beam/io/gcp/gcsfilesystem.py
+++ b/sdks/python/apache_beam/io/gcp/gcsfilesystem.py
@@ -366,10 +366,14 @@ def delete(self, paths):
     if exceptions:
       raise BeamIOError("Delete operation failed", exceptions)
 
-  def report_lineage(self, path, lineage):
+  def report_lineage(self, path, lineage, level=None):
     try:
-      bucket, blob = gcsio.parse_gcs_path(path)
+      components = gcsio.parse_gcs_path(path, object_optional=True)
     except ValueError:
       # report lineage is fail-safe
       return
-    lineage.add('gcs', bucket, blob)
+    if level == FileSystem.LineageLevel.TOP_LEVEL \
+      or(len(components) > 1 and components[-1] == ''):
+      # bucket only
+      components = components[:-1]
+    lineage.add('gcs', *components)
diff --git a/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py b/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py
index 1206529faf01c..ec7fa94b05fd4 100644
--- a/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py
+++ b/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py
@@ -375,6 +375,15 @@ def test_delete_error(self, mock_gcsio):
       self.fs.delete(files)
     gcsio_mock.delete_batch.assert_called()
 
+  def test_lineage(self):
+    self._verify_lineage("gs://bucket/", ("bucket", ))
+    self._verify_lineage("gs://bucket/foo/bar.txt", ("bucket", "foo/bar.txt"))
+
+  def _verify_lineage(self, uri, expected_segments):
+    lineage_mock = mock.MagicMock()
+    self.fs.report_lineage(uri, lineage_mock)
+    lineage_mock.add.assert_called_once_with("gcs", *expected_segments)
+
 
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)

From 42cad40532afea05d1001c4dc7f00714f2af4e0d Mon Sep 17 00:00:00 2001
From: Arun Pandian <pandiana@google.com>
Date: Wed, 9 Oct 2024 03:44:15 -0700
Subject: [PATCH 06/14] [Dataflow Streaming] Remove call to Thread.setName and
 track thread name inside Work. (#32715)

Thread.setName is expensive and uses upto 4% cpu on jobs with many keys.
---
 .../worker/streaming/ActiveWorkState.java     |  4 ++-
 .../dataflow/worker/streaming/Work.java       |  9 +++++
 .../worker/util/BoundedQueueExecutor.java     | 15 --------
 .../processing/StreamingWorkScheduler.java    |  2 ++
 .../worker/util/BoundedQueueExecutorTest.java | 36 -------------------
 5 files changed, 14 insertions(+), 52 deletions(-)

diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkState.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkState.java
index c80c3a882e528..4607096dd66af 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkState.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkState.java
@@ -338,7 +338,7 @@ synchronized void printActiveWork(PrintWriter writer, Instant now) {
         "<table border=\"1\" "
             + "style=\"border-collapse:collapse;padding:5px;border-spacing:5px;border:1px\">");
     writer.println(
-        "<tr><th>Key</th><th>Token</th><th>Queued</th><th>Active For</th><th>State</th><th>State Active For</th></tr>");
+        "<tr><th>Key</th><th>Token</th><th>Queued</th><th>Active For</th><th>State</th><th>State Active For</th><th>Processing Thread</th></tr>");
     // Use StringBuilder because we are appending in loop.
     StringBuilder activeWorkStatus = new StringBuilder();
     int commitsPendingCount = 0;
@@ -364,6 +364,8 @@ synchronized void printActiveWork(PrintWriter writer, Instant now) {
       activeWorkStatus.append(activeWork.getState());
       activeWorkStatus.append("</td><td>");
       activeWorkStatus.append(elapsedString(activeWork.getStateStartTime(), now));
+      activeWorkStatus.append("</td><td>");
+      activeWorkStatus.append(activeWork.getProcessingThreadName());
       activeWorkStatus.append("</td></tr>\n");
     }
 
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/Work.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/Work.java
index e77823602eda7..03d1e1ae469a3 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/Work.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/Work.java
@@ -72,6 +72,7 @@ public final class Work implements RefreshableWork {
   private final String latencyTrackingId;
   private TimedState currentState;
   private volatile boolean isFailed;
+  private volatile String processingThreadName = "";
 
   private Work(
       WorkItem workItem,
@@ -188,6 +189,14 @@ public void setState(State state) {
     this.currentState = TimedState.create(state, now);
   }
 
+  public String getProcessingThreadName() {
+    return processingThreadName;
+  }
+
+  public void setProcessingThreadName(String processingThreadName) {
+    this.processingThreadName = processingThreadName;
+  }
+
   @Override
   public void setFailed() {
     this.isFailed = true;
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/BoundedQueueExecutor.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/BoundedQueueExecutor.java
index 5e3f293f7d5b6..9286be84ceaa3 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/BoundedQueueExecutor.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/BoundedQueueExecutor.java
@@ -22,8 +22,6 @@
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import javax.annotation.concurrent.GuardedBy;
-import org.apache.beam.runners.dataflow.worker.streaming.ExecutableWork;
-import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.Monitor;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.Monitor.Guard;
 
@@ -223,18 +221,10 @@ private void executeMonitorHeld(Runnable work, long workBytes) {
     try {
       executor.execute(
           () -> {
-            String threadName = Thread.currentThread().getName();
             try {
-              if (work instanceof ExecutableWork) {
-                String workToken =
-                    debugFormattedWorkToken(
-                        ((ExecutableWork) work).work().getWorkItem().getWorkToken());
-                Thread.currentThread().setName(threadName + ":" + workToken);
-              }
               work.run();
             } finally {
               decrementCounters(workBytes);
-              Thread.currentThread().setName(threadName);
             }
           });
     } catch (RuntimeException e) {
@@ -244,11 +234,6 @@ private void executeMonitorHeld(Runnable work, long workBytes) {
     }
   }
 
-  @VisibleForTesting
-  public static String debugFormattedWorkToken(long workToken) {
-    return String.format("%016x", workToken);
-  }
-
   private void decrementCounters(long workBytes) {
     monitor.enter();
     --elementsOutstanding;
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/work/processing/StreamingWorkScheduler.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/work/processing/StreamingWorkScheduler.java
index 965a29126dc27..9a3e6eb6b099a 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/work/processing/StreamingWorkScheduler.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/work/processing/StreamingWorkScheduler.java
@@ -225,6 +225,7 @@ private void processWork(ComputationState computationState, Work work) {
     Windmill.WorkItem workItem = work.getWorkItem();
     String computationId = computationState.getComputationId();
     ByteString key = workItem.getKey();
+    work.setProcessingThreadName(Thread.currentThread().getName());
     work.setState(Work.State.PROCESSING);
     setUpWorkLoggingContext(work.getLatencyTrackingId(), computationId);
     LOG.debug("Starting processing for {}:\n{}", computationId, work);
@@ -288,6 +289,7 @@ private void processWork(ComputationState computationState, Work work) {
       }
 
       resetWorkLoggingContext(work.getLatencyTrackingId());
+      work.setProcessingThreadName("");
     }
   }
 
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/BoundedQueueExecutorTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/BoundedQueueExecutorTest.java
index ad77958837a12..7349252899202 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/BoundedQueueExecutorTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/BoundedQueueExecutorTest.java
@@ -293,40 +293,4 @@ public void testRenderSummaryHtml() {
             + "Work Queue Bytes: 0/10000000<br>/n";
     assertEquals(expectedSummaryHtml, executor.summaryHtml());
   }
-
-  @Test
-  public void testExecute_updatesThreadNameForExecutableWork() throws InterruptedException {
-    CountDownLatch waitForWorkExecution = new CountDownLatch(1);
-    ExecutableWork executableWork =
-        createWork(
-            work -> {
-              assertTrue(
-                  Thread.currentThread()
-                      .getName()
-                      .contains(
-                          BoundedQueueExecutor.debugFormattedWorkToken(
-                              work.getWorkItem().getWorkToken())));
-              waitForWorkExecution.countDown();
-            });
-    executor.execute(executableWork, executableWork.getWorkItem().getSerializedSize());
-    waitForWorkExecution.await();
-  }
-
-  @Test
-  public void testForceExecute_updatesThreadNameForExecutableWork() throws InterruptedException {
-    CountDownLatch waitForWorkExecution = new CountDownLatch(1);
-    ExecutableWork executableWork =
-        createWork(
-            work -> {
-              assertTrue(
-                  Thread.currentThread()
-                      .getName()
-                      .contains(
-                          BoundedQueueExecutor.debugFormattedWorkToken(
-                              work.getWorkItem().getWorkToken())));
-              waitForWorkExecution.countDown();
-            });
-    executor.forceExecute(executableWork, executableWork.getWorkItem().getSerializedSize());
-    waitForWorkExecution.await();
-  }
 }

From b781b82842a11e79396fa6177a8944d0f50c68d5 Mon Sep 17 00:00:00 2001
From: DKPHUONG <82434977+DKER2@users.noreply.github.com>
Date: Wed, 9 Oct 2024 21:01:42 +0700
Subject: [PATCH 07/14] [Bug] fix fillna function on a single column fail
 (#32594)

* fix bug all arg add as inputs

* fix bug for fillna

* Revert "fix bug for fillna"

This reverts commit 2a5736c8b4af8ffcac6336a79f759f73da67dad1.

* fix bug for fillna

* add test for fillna a column

* add test for fillna a column

* add test for fillna a column

* revert add test to frames_test

* Move test from transforms to frames
---
 sdks/python/apache_beam/dataframe/frames_test.py | 11 +++++++++++
 sdks/python/apache_beam/dataframe/transforms.py  |  6 +++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/sdks/python/apache_beam/dataframe/frames_test.py b/sdks/python/apache_beam/dataframe/frames_test.py
index 076ab504addec..55d9fc5f4dfbc 100644
--- a/sdks/python/apache_beam/dataframe/frames_test.py
+++ b/sdks/python/apache_beam/dataframe/frames_test.py
@@ -1025,6 +1025,17 @@ def test_series_fillna_series_as_value(self):
 
     self._run_test(lambda df, df2: df.A.fillna(df2.A), df, df2)
 
+  def test_dataframe_column_fillna_constant_as_value(self):
+    from apache_beam.dataframe import convert
+    from apache_beam.testing.util import assert_that
+    from apache_beam.testing.util import equal_to
+    with beam.Pipeline(None) as p:
+      pcoll = (
+          p | beam.Create([1.0, np.nan, -1.0]) | beam.Select(x=lambda x: x))
+      df = convert.to_dataframe(pcoll)
+      df_new = df['x'].fillna(0)
+      assert_that(convert.to_pcollection(df_new), equal_to([1.0, 0.0, -1.0]))
+
   @unittest.skipIf(PD_VERSION >= (2, 0), 'append removed in Pandas 2.0')
   def test_append_verify_integrity(self):
     df1 = pd.DataFrame({'A': range(10), 'B': range(10)}, index=range(10))
diff --git a/sdks/python/apache_beam/dataframe/transforms.py b/sdks/python/apache_beam/dataframe/transforms.py
index 852b49c4e2ed6..d0b5be4eb2a9b 100644
--- a/sdks/python/apache_beam/dataframe/transforms.py
+++ b/sdks/python/apache_beam/dataframe/transforms.py
@@ -395,7 +395,11 @@ def expr_to_stages(expr):
 
       if stage is None:
         # No stage available, compute this expression as part of a new stage.
-        stage = Stage(expr.args(), expr.requires_partition_by())
+        stage = Stage([
+            arg for arg in expr.args()
+            if not isinstance(arg, expressions.ConstantExpression)
+        ],
+                      expr.requires_partition_by())
         for arg in expr.args():
           # For each argument, declare that it is also available in
           # this new stage.

From 5f83e1d083d22d6230646348e4385383bc0f952e Mon Sep 17 00:00:00 2001
From: Yi Hu <yathu@google.com>
Date: Wed, 9 Oct 2024 10:52:50 -0400
Subject: [PATCH 08/14] Fix assert in TextIOIT (#32717)

---
 .../src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
index ecdde5cbc8fe7..e50a8aba41624 100644
--- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
+++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
@@ -157,7 +157,7 @@ public void writeThenReadAll() {
 
     Set<String> sources = Lineage.query(result.metrics(), Lineage.Type.SOURCE);
     Set<String> sinks = Lineage.query(result.metrics(), Lineage.Type.SINK);
-    if (numShards <= 100) {
+    if (numShards != null && numShards <= 100) {
       // both should be the full files, if supported by the runner
       assertEquals(sources, sinks);
     } else {

From 6e570d6e5651a7b1ff42cc035db9954776e5c2f2 Mon Sep 17 00:00:00 2001
From: Yi Hu <yathu@google.com>
Date: Wed, 9 Oct 2024 10:55:28 -0400
Subject: [PATCH 09/14] Bump to Dataproc 2.2 and Flink 1.17 for load tests
 (#32632)

---
 .../beam_LoadTests_Go_CoGBK_Flink_batch.yml   |   4 +-
 .../beam_LoadTests_Go_Combine_Flink_Batch.yml |   4 +-
 .../beam_LoadTests_Go_GBK_Flink_Batch.yml     |   4 +-
 .../beam_LoadTests_Go_ParDo_Flink_Batch.yml   |   4 +-
 ...eam_LoadTests_Go_SideInput_Flink_Batch.yml |   4 +-
 ...eam_LoadTests_Python_CoGBK_Flink_Batch.yml |   4 +-
 ...m_LoadTests_Python_Combine_Flink_Batch.yml |   4 +-
 ...adTests_Python_Combine_Flink_Streaming.yml |   4 +-
 .../beam_LoadTests_Python_GBK_Flink_Batch.yml |   4 +-
 ...eam_LoadTests_Python_ParDo_Flink_Batch.yml |   4 +-
 ...LoadTests_Python_ParDo_Flink_Streaming.yml |   4 +-
 .../beam_Publish_Docker_Snapshots.yml         |   2 +-
 .test-infra/dataproc/flink_cluster.sh         |  10 +-
 .../jenkins/CommonTestProperties.groovy       |   2 +-
 .test-infra/jenkins/Flink.groovy              | 120 ------------------
 15 files changed, 29 insertions(+), 149 deletions(-)
 delete mode 100644 .test-infra/jenkins/Flink.groovy

diff --git a/.github/workflows/beam_LoadTests_Go_CoGBK_Flink_batch.yml b/.github/workflows/beam_LoadTests_Go_CoGBK_Flink_batch.yml
index fae86961ea27b..a2c347ebddb6e 100644
--- a/.github/workflows/beam_LoadTests_Go_CoGBK_Flink_batch.yml
+++ b/.github/workflows/beam_LoadTests_Go_CoGBK_Flink_batch.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-go-cogbk-flink-batch-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-cogbk-flink-batch-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_LoadTests_Go_Combine_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Go_Combine_Flink_Batch.yml
index e814cc809be24..cdb034edcd272 100644
--- a/.github/workflows/beam_LoadTests_Go_Combine_Flink_Batch.yml
+++ b/.github/workflows/beam_LoadTests_Go_Combine_Flink_Batch.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-go-combine-flink-batch-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-combine-flink-batch-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_LoadTests_Go_GBK_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Go_GBK_Flink_Batch.yml
index 8c01bc1cf3049..f95e1c831da70 100644
--- a/.github/workflows/beam_LoadTests_Go_GBK_Flink_Batch.yml
+++ b/.github/workflows/beam_LoadTests_Go_GBK_Flink_Batch.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-go-gbk-flink-batch-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-gbk-flink-batch-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_LoadTests_Go_ParDo_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Go_ParDo_Flink_Batch.yml
index ba7323a8b63cb..89b31e02261d7 100644
--- a/.github/workflows/beam_LoadTests_Go_ParDo_Flink_Batch.yml
+++ b/.github/workflows/beam_LoadTests_Go_ParDo_Flink_Batch.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-go-pardo-flink-batch-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-pardo-flink-batch-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_LoadTests_Go_SideInput_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Go_SideInput_Flink_Batch.yml
index 5440ce968898e..7ab3d837721bb 100644
--- a/.github/workflows/beam_LoadTests_Go_SideInput_Flink_Batch.yml
+++ b/.github/workflows/beam_LoadTests_Go_SideInput_Flink_Batch.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-go-sideinput-flink-batch-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-sideinput-flink-batch-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml
index e2afb2e2cfd70..9b0dec2249f6f 100644
--- a/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml
+++ b/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-py-cogbk-flink-batch-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-cogbk-flink-batch-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_LoadTests_Python_Combine_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Python_Combine_Flink_Batch.yml
index 0f666a0b7db61..6363de044149f 100644
--- a/.github/workflows/beam_LoadTests_Python_Combine_Flink_Batch.yml
+++ b/.github/workflows/beam_LoadTests_Python_Combine_Flink_Batch.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-py-cmb-flink-batch-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-py-cmb-flink-batch-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_LoadTests_Python_Combine_Flink_Streaming.yml b/.github/workflows/beam_LoadTests_Python_Combine_Flink_Streaming.yml
index 6f491e6b9fa98..baf950589c8e8 100644
--- a/.github/workflows/beam_LoadTests_Python_Combine_Flink_Streaming.yml
+++ b/.github/workflows/beam_LoadTests_Python_Combine_Flink_Streaming.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-py-cmb-flink-streaming-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-py-cmb-flink-streaming-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_LoadTests_Python_GBK_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Python_GBK_Flink_Batch.yml
index c938b284a8660..e058852460909 100644
--- a/.github/workflows/beam_LoadTests_Python_GBK_Flink_Batch.yml
+++ b/.github/workflows/beam_LoadTests_Python_GBK_Flink_Batch.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-py-gbk-flk-batch-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-py-gbk-flk-batch-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Batch.yml
index b6c86e01c2995..8d907cf643bfc 100644
--- a/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Batch.yml
+++ b/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Batch.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-py-pardo-flink-batch-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-pardo-flink-batch-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Streaming.yml b/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Streaming.yml
index a6443c0df10b7..142d1b5e2dc2a 100644
--- a/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Streaming.yml
+++ b/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Streaming.yml
@@ -50,12 +50,12 @@ env:
   GCLOUD_ZONE: us-central1-a
   CLUSTER_NAME: beam-loadtests-py-pardo-flink-stream-${{ github.run_id }}
   GCS_BUCKET: gs://beam-flink-cluster
-  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
+  FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz
   HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
   FLINK_TASKMANAGER_SLOTS: 1
   DETACHED_MODE: true
   HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
-  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
+  JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest
   ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-pardo-flink-stream-${{ github.run_id }}
 
 jobs:
diff --git a/.github/workflows/beam_Publish_Docker_Snapshots.yml b/.github/workflows/beam_Publish_Docker_Snapshots.yml
index 334fa537be565..e37a202267c4d 100644
--- a/.github/workflows/beam_Publish_Docker_Snapshots.yml
+++ b/.github/workflows/beam_Publish_Docker_Snapshots.yml
@@ -83,7 +83,7 @@ jobs:
       - name: run Publish Docker Snapshots script for Flink
         uses: ./.github/actions/gradle-command-self-hosted-action
         with:
-          gradle-command: :runners:flink:1.15:job-server-container:dockerPush
+          gradle-command: :runners:flink:1.17:job-server-container:dockerPush
           arguments: |
             -Pdocker-repository-root=gcr.io/apache-beam-testing/beam_portability \
             -Pdocker-tag-list=latest
\ No newline at end of file
diff --git a/.test-infra/dataproc/flink_cluster.sh b/.test-infra/dataproc/flink_cluster.sh
index b623e890d08f9..759d7a6fcc38b 100755
--- a/.test-infra/dataproc/flink_cluster.sh
+++ b/.test-infra/dataproc/flink_cluster.sh
@@ -17,7 +17,7 @@
 #    Provide the following environment to run this script:
 #
 #    GCLOUD_ZONE: Google cloud zone. Optional. Default: "us-central1-a"
-#    DATAPROC_VERSION: Dataproc version. Optional. Default: 2.1
+#    DATAPROC_VERSION: Dataproc version. Optional. Default: 2.2
 #    CLUSTER_NAME: Cluster name
 #    GCS_BUCKET: GCS bucket url for Dataproc resources (init actions)
 #    HARNESS_IMAGES_TO_PULL: Urls to SDK Harness' images to pull on dataproc workers (optional: 0, 1 or multiple urls for every harness image)
@@ -35,8 +35,8 @@
 #    HARNESS_IMAGES_TO_PULL='gcr.io/<IMAGE_REPOSITORY>/python:latest gcr.io/<IMAGE_REPOSITORY>/java:latest' \
 #    JOB_SERVER_IMAGE=gcr.io/<IMAGE_REPOSITORY>/job-server-flink:latest \
 #    ARTIFACTS_DIR=gs://<bucket-for-artifacts> \
-#    FLINK_DOWNLOAD_URL=https://archive.apache.org/dist/flink/flink-1.12.3/flink-1.12.3-bin-scala_2.11.tgz \
-#    HADOOP_DOWNLOAD_URL=https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-9.0/flink-shaded-hadoop-2-uber-2.8.3-9.0.jar \
+#    FLINK_DOWNLOAD_URL=https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz \
+#    HADOOP_DOWNLOAD_URL=https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-9.0.jar \
 #    FLINK_NUM_WORKERS=2 \
 #    FLINK_TASKMANAGER_SLOTS=1 \
 #    DETACHED_MODE=false \
@@ -46,7 +46,7 @@ set -Eeuxo pipefail
 
 # GCloud properties
 GCLOUD_ZONE="${GCLOUD_ZONE:=us-central1-a}"
-DATAPROC_VERSION="${DATAPROC_VERSION:=2.1-debian}"
+DATAPROC_VERSION="${DATAPROC_VERSION:=2.2-debian}"
 GCLOUD_REGION=`echo $GCLOUD_ZONE | sed -E "s/(-[a-z])?$//"`
 
 MASTER_NAME="$CLUSTER_NAME-m"
@@ -133,7 +133,7 @@ function create_cluster() {
   # This is why flink init action is invoked last.
   # TODO(11/11/2022) remove --worker-machine-type and --master-machine-type once N2 CPUs quota relaxed
   # Dataproc 2.1 uses n2-standard-2 by default but there is N2 CPUs=24 quota limit
-  gcloud dataproc clusters create $CLUSTER_NAME --region=$GCLOUD_REGION --num-workers=$FLINK_NUM_WORKERS \
+  gcloud dataproc clusters create $CLUSTER_NAME --region=$GCLOUD_REGION --num-workers=$FLINK_NUM_WORKERS --public-ip-address \
   --master-machine-type=n1-standard-2 --worker-machine-type=n1-standard-2 --metadata "${metadata}", \
   --image-version=$image_version --zone=$GCLOUD_ZONE  --optional-components=FLINK,DOCKER  --quiet
 }
diff --git a/.test-infra/jenkins/CommonTestProperties.groovy b/.test-infra/jenkins/CommonTestProperties.groovy
index c6870dea59a10..0670b96ef47c3 100644
--- a/.test-infra/jenkins/CommonTestProperties.groovy
+++ b/.test-infra/jenkins/CommonTestProperties.groovy
@@ -26,7 +26,7 @@ class CommonTestProperties {
   }
 
   static String getFlinkVersion() {
-    return "1.15"
+    return "1.17"
   }
 
   static String getSparkVersion() {
diff --git a/.test-infra/jenkins/Flink.groovy b/.test-infra/jenkins/Flink.groovy
deleted file mode 100644
index 34f3b60709c0d..0000000000000
--- a/.test-infra/jenkins/Flink.groovy
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-class Flink {
-  private static final String flinkDownloadUrl = 'https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz'
-  private static final String hadoopDownloadUrl = 'https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar'
-  private static final String FLINK_DIR = '"$WORKSPACE/src/.test-infra/dataproc"'
-  private static final String FLINK_SCRIPT = 'flink_cluster.sh'
-  private def job
-  private String jobName
-
-  Flink(job, String jobName) {
-    this.job = job
-    this.jobName = jobName
-  }
-
-  /**
-   * Creates Flink cluster and specifies cleanup steps.
-   *
-   * @param sdkHarnessImages - the list of published SDK Harness images tags
-   * @param workerCount - the initial number of worker nodes
-   * @param jobServerImage -  the Flink job server image tag. If left empty, cluster will be set up without the job server.
-   * @param slotsPerTaskmanager - the number of slots per Flink task manager
-   */
-  void setUp(List<String> sdkHarnessImages, Integer workerCount, String jobServerImage = '', Integer slotsPerTaskmanager = 1) {
-    setupFlinkCluster(sdkHarnessImages, workerCount, jobServerImage, slotsPerTaskmanager)
-    addTeardownFlinkStep()
-  }
-
-  private void setupFlinkCluster(List<String> sdkHarnessImages, Integer workerCount, String jobServerImage, Integer slotsPerTaskmanager) {
-    String gcsBucket = 'gs://beam-flink-cluster'
-    String clusterName = getClusterName()
-    String artifactsDir = "${gcsBucket}/${clusterName}"
-    String imagesToPull = sdkHarnessImages.join(' ')
-
-    job.steps {
-      environmentVariables {
-        env("GCLOUD_ZONE", "us-central1-a")
-        env("CLUSTER_NAME", clusterName)
-        env("GCS_BUCKET", gcsBucket)
-        env("FLINK_DOWNLOAD_URL", flinkDownloadUrl)
-        env("HADOOP_DOWNLOAD_URL", hadoopDownloadUrl)
-        env("FLINK_NUM_WORKERS", workerCount)
-        env("FLINK_TASKMANAGER_SLOTS", slotsPerTaskmanager)
-        env("DETACHED_MODE", 'true')
-
-        if(imagesToPull) {
-          env("HARNESS_IMAGES_TO_PULL", imagesToPull)
-        }
-
-        if(jobServerImage) {
-          env("JOB_SERVER_IMAGE", jobServerImage)
-          env("ARTIFACTS_DIR", artifactsDir)
-        }
-      }
-
-      shell('echo Setting up flink cluster')
-      shell("cd ${FLINK_DIR}; ./${FLINK_SCRIPT} create")
-    }
-  }
-
-  /**
-   * Updates the number of worker nodes in a cluster.
-   *
-   * @param workerCount - the new number of worker nodes in the cluster
-   */
-  void scaleCluster(Integer workerCount) {
-    job.steps {
-      shell("echo Changing number of workers to ${workerCount}")
-      environmentVariables {
-        env("FLINK_NUM_WORKERS", workerCount)
-      }
-      shell("cd ${FLINK_DIR}; ./${FLINK_SCRIPT} restart")
-    }
-  }
-
-  private GString getClusterName() {
-    return "${jobName.toLowerCase().replace("_", "-")}-\$BUILD_ID"
-  }
-
-  private void addTeardownFlinkStep() {
-    job.publishers {
-      postBuildScript {
-        buildSteps {
-          postBuildStep {
-            stopOnFailure(false)
-            results([
-              'SUCCESS',
-              'UNSTABLE',
-              'FAILURE',
-              'NOT_BUILT',
-              'ABORTED'
-            ])
-            buildSteps {
-              shell {
-                command("cd ${FLINK_DIR}; ./${FLINK_SCRIPT} delete")
-              }
-            }
-          }
-        }
-        markBuildUnstable(false)
-      }
-    }
-  }
-}

From c31d81ca875637f8b586050cb3c80ae3f41a255d Mon Sep 17 00:00:00 2001
From: Yi Hu <yathu@google.com>
Date: Wed, 9 Oct 2024 10:58:37 -0400
Subject: [PATCH 10/14] Invoke teardown when DoFn throws in portable runners
 (#32522)

* Invoke teardown when DoFn throws in portable runners

* update CHANGES.md

* adjusted comment and logging
---
 .../beam_PostCommit_Java_PVR_Flink_Batch.json |  2 +-
 ...m_PostCommit_Java_PVR_Flink_Streaming.json |  2 +-
 .../beam_PostCommit_Java_PVR_Samza.json       |  2 +-
 ..._PostCommit_Java_PVR_Spark3_Streaming.json |  2 +-
 .../beam_PostCommit_Java_PVR_Spark_Batch.json |  2 +-
 CHANGES.md                                    |  1 +
 .../flink/job-server/flink_job_server.gradle  |  1 -
 .../google-cloud-dataflow-java/build.gradle   |  2 +-
 runners/samza/job-server/build.gradle         |  3 ++-
 .../spark/job-server/spark_job_server.gradle  |  2 --
 .../harness/control/ProcessBundleHandler.java | 19 ++++++++++++++++++-
 11 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Batch.json b/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Batch.json
index b970762c83970..e3d6056a5de96 100644
--- a/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Batch.json
+++ b/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Batch.json
@@ -1,4 +1,4 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
-  "https://github.com/apache/beam/pull/31156": "noting that PR #31156 should run this test"
+  "modification": 1
 }
diff --git a/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Streaming.json b/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Streaming.json
index b60f5c4cc3c80..e3d6056a5de96 100644
--- a/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Streaming.json
+++ b/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Streaming.json
@@ -1,4 +1,4 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
-  "modification": 0
+  "modification": 1
 }
diff --git a/.github/trigger_files/beam_PostCommit_Java_PVR_Samza.json b/.github/trigger_files/beam_PostCommit_Java_PVR_Samza.json
index b60f5c4cc3c80..e3d6056a5de96 100644
--- a/.github/trigger_files/beam_PostCommit_Java_PVR_Samza.json
+++ b/.github/trigger_files/beam_PostCommit_Java_PVR_Samza.json
@@ -1,4 +1,4 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
-  "modification": 0
+  "modification": 1
 }
diff --git a/.github/trigger_files/beam_PostCommit_Java_PVR_Spark3_Streaming.json b/.github/trigger_files/beam_PostCommit_Java_PVR_Spark3_Streaming.json
index b60f5c4cc3c80..e3d6056a5de96 100644
--- a/.github/trigger_files/beam_PostCommit_Java_PVR_Spark3_Streaming.json
+++ b/.github/trigger_files/beam_PostCommit_Java_PVR_Spark3_Streaming.json
@@ -1,4 +1,4 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
-  "modification": 0
+  "modification": 1
 }
diff --git a/.github/trigger_files/beam_PostCommit_Java_PVR_Spark_Batch.json b/.github/trigger_files/beam_PostCommit_Java_PVR_Spark_Batch.json
index b60f5c4cc3c80..e3d6056a5de96 100644
--- a/.github/trigger_files/beam_PostCommit_Java_PVR_Spark_Batch.json
+++ b/.github/trigger_files/beam_PostCommit_Java_PVR_Spark_Batch.json
@@ -1,4 +1,4 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
-  "modification": 0
+  "modification": 1
 }
diff --git a/CHANGES.md b/CHANGES.md
index fcb02d1d996af..b9d5f2c191c91 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -80,6 +80,7 @@
 ## Bugfixes
 
 * Fixed X (Java/Python) ([#X](https://github.com/apache/beam/issues/X)).
+* (Java) Fixed tearDown not invoked when DoFn throws on Portable Runners ([#18592](https://github.com/apache/beam/issues/18592), [#31381](https://github.com/apache/beam/issues/31381)).
 
 ## Security Fixes
 * Fixed (CVE-YYYY-NNNN)[https://www.cve.org/CVERecord?id=CVE-YYYY-NNNN] (Java/Python/Go) ([#X](https://github.com/apache/beam/issues/X)).
diff --git a/runners/flink/job-server/flink_job_server.gradle b/runners/flink/job-server/flink_job_server.gradle
index 56a58df4fb093..1c610477a4442 100644
--- a/runners/flink/job-server/flink_job_server.gradle
+++ b/runners/flink/job-server/flink_job_server.gradle
@@ -171,7 +171,6 @@ def portableValidatesRunnerTask(String name, boolean streaming, boolean checkpoi
         excludeCategories 'org.apache.beam.sdk.testing.UsesCustomWindowMerging'
         excludeCategories 'org.apache.beam.sdk.testing.UsesFailureMessage'
         excludeCategories 'org.apache.beam.sdk.testing.UsesGaugeMetrics'
-        excludeCategories 'org.apache.beam.sdk.testing.UsesParDoLifecycle'
         excludeCategories 'org.apache.beam.sdk.testing.UsesMapState'
         excludeCategories 'org.apache.beam.sdk.testing.UsesMultimapState'
         excludeCategories 'org.apache.beam.sdk.testing.UsesSetState'
diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle
index df2270d3b653f..4906d9cf9cb83 100644
--- a/runners/google-cloud-dataflow-java/build.gradle
+++ b/runners/google-cloud-dataflow-java/build.gradle
@@ -185,7 +185,7 @@ def commonLegacyExcludeCategories = [
   'org.apache.beam.sdk.testing.UsesGaugeMetrics',
   'org.apache.beam.sdk.testing.UsesMultimapState',
   'org.apache.beam.sdk.testing.UsesTestStream',
-  'org.apache.beam.sdk.testing.UsesParDoLifecycle',
+  'org.apache.beam.sdk.testing.UsesParDoLifecycle', // doesn't support remote runner
   'org.apache.beam.sdk.testing.UsesMetricsPusher',
   'org.apache.beam.sdk.testing.UsesBundleFinalizer',
 ]
diff --git a/runners/samza/job-server/build.gradle b/runners/samza/job-server/build.gradle
index f972f376e5c8c..6fc8db98a4f9c 100644
--- a/runners/samza/job-server/build.gradle
+++ b/runners/samza/job-server/build.gradle
@@ -90,7 +90,6 @@ def portableValidatesRunnerTask(String name, boolean docker) {
             excludeCategories 'org.apache.beam.sdk.testing.UsesCustomWindowMerging'
             excludeCategories 'org.apache.beam.sdk.testing.UsesFailureMessage'
             excludeCategories 'org.apache.beam.sdk.testing.UsesGaugeMetrics'
-            excludeCategories 'org.apache.beam.sdk.testing.UsesParDoLifecycle'
             excludeCategories 'org.apache.beam.sdk.testing.UsesMapState'
             excludeCategories 'org.apache.beam.sdk.testing.UsesMultimapState'
             excludeCategories 'org.apache.beam.sdk.testing.UsesSetState'
@@ -127,6 +126,8 @@ def portableValidatesRunnerTask(String name, boolean docker) {
             excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimestampTests.testParDoShiftTimestampInvalid'
             // TODO(https://github.com/apache/beam/issues/21144)
             excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimestampTests.testParDoShiftTimestampInvalidZeroAllowed'
+            // TODO(https://github.com/apache/beam/issues/32520)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoLifecycleTest.testTeardownCalledAfterExceptionIn*Stateful'
             // TODO(https://github.com/apache/beam/issues/21145)
             excludeTestsMatching 'org.apache.beam.sdk.transforms.DeduplicateTest.testEventTime'
             // TODO(https://github.com/apache/beam/issues/21146)
diff --git a/runners/spark/job-server/spark_job_server.gradle b/runners/spark/job-server/spark_job_server.gradle
index 6d2d4b2bafbf6..5ed5f4277bf4b 100644
--- a/runners/spark/job-server/spark_job_server.gradle
+++ b/runners/spark/job-server/spark_job_server.gradle
@@ -118,7 +118,6 @@ def portableValidatesRunnerTask(String name, boolean streaming, boolean docker,
         excludeCategories 'org.apache.beam.sdk.testing.UsesFailureMessage'
         excludeCategories 'org.apache.beam.sdk.testing.UsesGaugeMetrics'
         excludeCategories 'org.apache.beam.sdk.testing.UsesPerKeyOrderedDelivery'
-        excludeCategories 'org.apache.beam.sdk.testing.UsesParDoLifecycle'
         excludeCategories 'org.apache.beam.sdk.testing.UsesMapState'
         excludeCategories 'org.apache.beam.sdk.testing.UsesSetState'
         excludeCategories 'org.apache.beam.sdk.testing.UsesOrderedListState'
@@ -187,7 +186,6 @@ def portableValidatesRunnerTask(String name, boolean streaming, boolean docker,
         excludeCategories 'org.apache.beam.sdk.testing.UsesGaugeMetrics'
         excludeCategories 'org.apache.beam.sdk.testing.UsesPerKeyOrderedDelivery'
         excludeCategories 'org.apache.beam.sdk.testing.UsesPerKeyOrderInBundle'
-        excludeCategories 'org.apache.beam.sdk.testing.UsesParDoLifecycle'
         excludeCategories 'org.apache.beam.sdk.testing.UsesMapState'
         excludeCategories 'org.apache.beam.sdk.testing.UsesMultimapState'
         excludeCategories 'org.apache.beam.sdk.testing.UsesSetState'
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
index 0d520dcf7f5c3..c91d5ba71b89e 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
@@ -596,7 +596,11 @@ public BeamFnApi.InstructionResponse.Builder processBundle(BeamFnApi.Instruction
           request.getProcessBundle().getProcessBundleDescriptorId(), bundleProcessor);
       return BeamFnApi.InstructionResponse.newBuilder().setProcessBundle(response);
     } catch (Exception e) {
-      // Make sure we clean-up from the active set of bundle processors.
+      // Make sure we clean up from the active set of bundle processors.
+      LOG.debug(
+          "Discard bundleProcessor for {} after exception: {}",
+          request.getProcessBundle().getProcessBundleDescriptorId(),
+          e.getMessage());
       bundleProcessorCache.discard(bundleProcessor);
       throw e;
     }
@@ -1168,6 +1172,18 @@ void discard() {
         if (this.bundleCache != null) {
           this.bundleCache.clear();
         }
+        // setupFunctions are invoked in createBundleProcessor. Invoke teardownFunction here as the
+        // BundleProcessor is already removed from cache and won't be re-used.
+        for (ThrowingRunnable teardownFunction : Lists.reverse(this.getTearDownFunctions())) {
+          try {
+            teardownFunction.run();
+          } catch (Throwable e) {
+            LOG.warn(
+                "Exceptions are thrown from DoFn.teardown method when trying to discard "
+                    + "ProcessBundleHandler",
+                e);
+          }
+        }
         getMetricsEnvironmentStateForBundle().discard();
         for (BeamFnDataOutboundAggregator aggregator : getOutboundAggregators().values()) {
           aggregator.discard();
@@ -1175,6 +1191,7 @@ void discard() {
       }
     }
 
+    // this is called in cachedBundleProcessors removal listener
     void shutdown() {
       for (ThrowingRunnable tearDownFunction : getTearDownFunctions()) {
         LOG.debug("Tearing down function {}", tearDownFunction);

From 7177baf717dc9ce080885f8c86cd83403ad96e0d Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Wed, 9 Oct 2024 12:47:17 -0400
Subject: [PATCH 11/14] Support ordered list states in python sdk and fnapi
 runner (#32326)

* Support ordered list state in python sdk and fnapi runner.

* Add test to verify integrity of multiple iterators

* Add fuzz tests and fix two edge cases.

* Add sortedcontainer in package dependency

* Code refactoring and add a check for the supported maximum key

* regenerate requirments for python images.

* Refactor portable runner code for ordered list state

* Return continuation tokens in portable runnner for ordered list state

* Fix some lints

* Apply yapf

* Fix lints

* Sync base image requirements with master.

* Add typing for ordered list state apis.

* Add typing to orderedliststate user state.

* Fix a typo.

* Refactor some code based on the feedback.

* Fix lints

* Remove the support of int argument type in ordered list state apis.

* Fix formats and lints

* More lints

* Refactor the code to use the continuation token logic.

* Fix lints
---
 .../fn_api_runner/worker_handlers.py          |  70 +++-
 .../runners/worker/bundle_processor.py        | 195 ++++++++++-
 .../runners/worker/bundle_processor_test.py   | 314 ++++++++++++++++++
 .../apache_beam/transforms/userstate.py       |  29 ++
 sdks/python/setup.py                          |   1 +
 5 files changed, 604 insertions(+), 5 deletions(-)

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py
index bcfa965c04692..c5423e167026a 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py
@@ -48,7 +48,9 @@
 from typing import overload
 
 import grpc
+from sortedcontainers import SortedSet
 
+from apache_beam import coders
 from apache_beam.io import filesystems
 from apache_beam.io.filesystems import CompressionTypes
 from apache_beam.portability import common_urns
@@ -959,7 +961,8 @@ class StateServicer(beam_fn_api_pb2_grpc.BeamFnStateServicer,
       'multimap_keys_values_side_input',
       'iterable_side_input',
       'bag_user_state',
-      'multimap_user_state'
+      'multimap_user_state',
+      'ordered_list_user_state'
   ])
 
   class CopyOnWriteState(object):
@@ -1021,6 +1024,8 @@ def __init__(self):
     self._checkpoint = None  # type: Optional[StateServicer.StateType]
     self._use_continuation_tokens = False
     self._continuations = {}  # type: Dict[bytes, Tuple[bytes, ...]]
+    self._ordered_list_keys = collections.defaultdict(
+        SortedSet)  # type: DefaultDict[bytes, SortedSet]
 
   def checkpoint(self):
     # type: () -> None
@@ -1050,6 +1055,14 @@ def process_instruction_id(self, unused_instruction_id):
     # type: (Any) -> Iterator
     yield
 
+  def _get_one_interval_key(self, state_key, start):
+    # type: (beam_fn_api_pb2.StateKey, int) -> bytes
+    state_key_copy = beam_fn_api_pb2.StateKey()
+    state_key_copy.CopyFrom(state_key)
+    state_key_copy.ordered_list_user_state.range.start = start
+    state_key_copy.ordered_list_user_state.range.end = start + 1
+    return self._to_key(state_key_copy)
+
   def get_raw(self,
       state_key,  # type: beam_fn_api_pb2.StateKey
       continuation_token=None  # type: Optional[bytes]
@@ -1061,7 +1074,30 @@ def get_raw(self,
           'Unknown state type: ' + state_key.WhichOneof('type'))
 
     with self._lock:
-      full_state = self._state[self._to_key(state_key)]
+      if not continuation_token:
+        # Compute full_state only when no continuation token is provided.
+        # If there is continuation token, full_state is already in
+        # continuation cache. No need to recompute.
+        full_state = []  # type: List[bytes]
+        if state_key.WhichOneof('type') == 'ordered_list_user_state':
+          maybe_start = state_key.ordered_list_user_state.range.start
+          maybe_end = state_key.ordered_list_user_state.range.end
+          persistent_state_key = beam_fn_api_pb2.StateKey()
+          persistent_state_key.CopyFrom(state_key)
+          persistent_state_key.ordered_list_user_state.ClearField("range")
+
+          available_keys = self._ordered_list_keys[self._to_key(
+              persistent_state_key)]
+
+          for i in available_keys.irange(maybe_start,
+                                         maybe_end,
+                                         inclusive=(True, False)):
+            entries = self._state[self._get_one_interval_key(
+                persistent_state_key, i)]
+            full_state.extend(entries)
+        else:
+          full_state.extend(self._state[self._to_key(state_key)])
+
       if self._use_continuation_tokens:
         # The token is "nonce:index".
         if not continuation_token:
@@ -1087,14 +1123,40 @@ def append_raw(
   ):
     # type: (...) -> _Future
     with self._lock:
-      self._state[self._to_key(state_key)].append(data)
+      if state_key.WhichOneof('type') == 'ordered_list_user_state':
+        coder = coders.TupleCoder([
+            coders.VarIntCoder(),
+            coders.coders.LengthPrefixCoder(coders.BytesCoder())
+        ]).get_impl()
+
+        for key, value in coder.decode_all(data):
+          self._state[self._get_one_interval_key(state_key, key)].append(
+              coder.encode((key, value)))
+          self._ordered_list_keys[self._to_key(state_key)].add(key)
+      else:
+        self._state[self._to_key(state_key)].append(data)
     return _Future.done()
 
   def clear(self, state_key):
     # type: (beam_fn_api_pb2.StateKey) -> _Future
     with self._lock:
       try:
-        del self._state[self._to_key(state_key)]
+        if state_key.WhichOneof('type') == 'ordered_list_user_state':
+          start = state_key.ordered_list_user_state.range.start
+          end = state_key.ordered_list_user_state.range.end
+          persistent_state_key = beam_fn_api_pb2.StateKey()
+          persistent_state_key.CopyFrom(state_key)
+          persistent_state_key.ordered_list_user_state.ClearField("range")
+          available_keys = self._ordered_list_keys[self._to_key(
+              persistent_state_key)]
+
+          for i in list(available_keys.irange(start,
+                                              end,
+                                              inclusive=(True, False))):
+            del self._state[self._get_one_interval_key(persistent_state_key, i)]
+            available_keys.remove(i)
+        else:
+          del self._state[self._to_key(state_key)]
       except KeyError:
         # This may happen with the caching layer across bundles. Caching may
         # skip this storage layer for a blocking_get(key) request. Without
diff --git a/sdks/python/apache_beam/runners/worker/bundle_processor.py b/sdks/python/apache_beam/runners/worker/bundle_processor.py
index fdb13a03bb946..0f1700f524860 100644
--- a/sdks/python/apache_beam/runners/worker/bundle_processor.py
+++ b/sdks/python/apache_beam/runners/worker/bundle_processor.py
@@ -19,16 +19,21 @@
 
 # pytype: skip-file
 
+from __future__ import annotations
+
 import base64
 import bisect
 import collections
 import copy
+import heapq
+import itertools
 import json
 import logging
 import random
 import threading
 from dataclasses import dataclass
 from dataclasses import field
+from itertools import chain
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
@@ -50,6 +55,8 @@
 
 from google.protobuf import duration_pb2
 from google.protobuf import timestamp_pb2
+from sortedcontainers import SortedDict
+from sortedcontainers import SortedList
 
 import apache_beam as beam
 from apache_beam import coders
@@ -104,7 +111,8 @@
 FnApiUserRuntimeStateTypes = Union['ReadModifyWriteRuntimeState',
                                    'CombiningValueRuntimeState',
                                    'SynchronousSetRuntimeState',
-                                   'SynchronousBagRuntimeState']
+                                   'SynchronousBagRuntimeState',
+                                   'SynchronousOrderedListRuntimeState']
 
 DATA_INPUT_URN = 'beam:runner:source:v1'
 DATA_OUTPUT_URN = 'beam:runner:sink:v1'
@@ -704,6 +712,180 @@ def commit(self):
       to_await.get()
 
 
+class RangeSet:
+  """For Internal Use only. A simple range set for ranges of [x,y)."""
+  def __init__(self) -> None:
+    # The start points and end points are stored separately in order.
+    self._sorted_starts = SortedList()
+    self._sorted_ends = SortedList()
+
+  def add(self, start: int, end: int) -> None:
+    if start >= end:
+      return
+
+    # ranges[:min_idx] and ranges[max_idx:] is unaffected by this insertion
+    # the first range whose end point >= the start of the new range
+    min_idx = self._sorted_ends.bisect_left(start)
+    # the first range whose start point > the end point of the new range
+    max_idx = self._sorted_starts.bisect_right(end)
+
+    if min_idx >= len(self._sorted_starts) or max_idx <= 0:
+      # the new range is beyond any current ranges
+      new_start = start
+      new_end = end
+    else:
+      # the new range overlaps with ranges[min_idx:max_idx]
+      new_start = min(start, self._sorted_starts[min_idx])
+      new_end = max(end, self._sorted_ends[max_idx - 1])
+
+      del self._sorted_starts[min_idx:max_idx]
+      del self._sorted_ends[min_idx:max_idx]
+
+    self._sorted_starts.add(new_start)
+    self._sorted_ends.add(new_end)
+
+  def __contains__(self, key: int) -> bool:
+    idx = self._sorted_starts.bisect_left(key)
+    return (idx < len(self._sorted_starts) and self._sorted_starts[idx] == key
+            ) or (idx > 0 and self._sorted_ends[idx - 1] > key)
+
+  def __len__(self) -> int:
+    assert len(self._sorted_starts) == len(self._sorted_ends)
+    return len(self._sorted_starts)
+
+  def __iter__(self) -> Iterator[Tuple[int, int]]:
+    return zip(self._sorted_starts, self._sorted_ends)
+
+  def __str__(self) -> str:
+    return str(list(zip(self._sorted_starts, self._sorted_ends)))
+
+
+class SynchronousOrderedListRuntimeState(userstate.OrderedListRuntimeState):
+  RANGE_MIN = -(1 << 63)
+  RANGE_MAX = (1 << 63) - 1
+  TIMESTAMP_RANGE_MIN = timestamp.Timestamp(micros=RANGE_MIN)
+  TIMESTAMP_RANGE_MAX = timestamp.Timestamp(micros=RANGE_MAX)
+
+  def __init__(
+      self,
+      state_handler: sdk_worker.CachingStateHandler,
+      state_key: beam_fn_api_pb2.StateKey,
+      value_coder: coders.Coder) -> None:
+    self._state_handler = state_handler
+    self._state_key = state_key
+    self._elem_coder = beam.coders.TupleCoder(
+        [coders.VarIntCoder(), coders.coders.LengthPrefixCoder(value_coder)])
+    self._cleared = False
+    self._pending_adds = SortedDict()
+    self._pending_removes = RangeSet()
+
+  def add(self, elem: Tuple[timestamp.Timestamp, Any]) -> None:
+    assert len(elem) == 2
+    key_ts, value = elem
+    key = key_ts.micros
+
+    if key >= self.RANGE_MAX or key < self.RANGE_MIN:
+      raise ValueError("key value %d is out of range" % key)
+    self._pending_adds.setdefault(key, []).append(value)
+
+  def read(self) -> Iterable[Tuple[timestamp.Timestamp, Any]]:
+    return self.read_range(self.TIMESTAMP_RANGE_MIN, self.TIMESTAMP_RANGE_MAX)
+
+  def read_range(
+      self,
+      min_timestamp: timestamp.Timestamp,
+      limit_timestamp: timestamp.Timestamp
+  ) -> Iterable[Tuple[timestamp.Timestamp, Any]]:
+    # convert timestamp to int, as sort keys are stored as int internally.
+    min_key = min_timestamp.micros
+    limit_key = limit_timestamp.micros
+
+    keys_to_add = self._pending_adds.irange(
+        min_key, limit_key, inclusive=(True, False))
+
+    # use list interpretation here to construct the actual list
+    # of iterators of the selected range.
+    local_items = chain.from_iterable([
+        itertools.islice(
+            zip(itertools.cycle([
+                k,
+            ]), self._pending_adds[k]),
+            len(self._pending_adds[k])) for k in keys_to_add
+    ])
+
+    if not self._cleared:
+      range_query_state_key = beam_fn_api_pb2.StateKey()
+      range_query_state_key.CopyFrom(self._state_key)
+      range_query_state_key.ordered_list_user_state.range.start = min_key
+      range_query_state_key.ordered_list_user_state.range.end = limit_key
+
+      # make a deep copy here because there could be other operations occur in
+      # the middle of an iteration and change pending_removes
+      pending_removes_snapshot = copy.deepcopy(self._pending_removes)
+      persistent_items = filter(
+          lambda kv: kv[0] not in pending_removes_snapshot,
+          _StateBackedIterable(
+              self._state_handler, range_query_state_key, self._elem_coder))
+
+      return map(
+          lambda x: (timestamp.Timestamp(micros=x[0]), x[1]),
+          heapq.merge(persistent_items, local_items))
+
+    return map(lambda x: (timestamp.Timestamp(micros=x[0]), x[1]), local_items)
+
+  def clear(self) -> None:
+    self._cleared = True
+    self._pending_adds = SortedDict()
+    self._pending_removes = RangeSet()
+    self._pending_removes.add(self.RANGE_MIN, self.RANGE_MAX)
+
+  def clear_range(
+      self,
+      min_timestamp: timestamp.Timestamp,
+      limit_timestamp: timestamp.Timestamp) -> None:
+    min_key = min_timestamp.micros
+    limit_key = limit_timestamp.micros
+
+    # materialize the keys to remove before the actual removal
+    keys_to_remove = list(
+        self._pending_adds.irange(min_key, limit_key, inclusive=(True, False)))
+    for k in keys_to_remove:
+      del self._pending_adds[k]
+
+    if not self._cleared:
+      self._pending_removes.add(min_key, limit_key)
+
+  def commit(self) -> None:
+    futures = []
+    if self._pending_removes:
+      for start, end in self._pending_removes:
+        range_query_state_key = beam_fn_api_pb2.StateKey()
+        range_query_state_key.CopyFrom(self._state_key)
+        range_query_state_key.ordered_list_user_state.range.start = start
+        range_query_state_key.ordered_list_user_state.range.end = end
+        futures.append(self._state_handler.clear(range_query_state_key))
+
+      self._pending_removes = RangeSet()
+
+    if self._pending_adds:
+      items_to_add = []
+      for k in self._pending_adds:
+        items_to_add.extend(zip(itertools.cycle([
+            k,
+        ]), self._pending_adds[k]))
+      futures.append(
+          self._state_handler.extend(
+              self._state_key, self._elem_coder.get_impl(), items_to_add))
+      self._pending_adds = SortedDict()
+
+    if len(futures):
+      # To commit, we need to wait on every state request futures to complete.
+      for to_await in futures:
+        to_await.get()
+
+    self._cleared = False
+
+
 class OutputTimer(userstate.BaseTimer):
   def __init__(self,
                key,
@@ -850,6 +1032,17 @@ def _create_state(self,
                   # State keys are expected in nested encoding format
                   key=self._key_coder.encode_nested(key))),
           value_coder=state_spec.coder)
+    elif isinstance(state_spec, userstate.OrderedListStateSpec):
+      return SynchronousOrderedListRuntimeState(
+          self._state_handler,
+          state_key=beam_fn_api_pb2.StateKey(
+              ordered_list_user_state=beam_fn_api_pb2.StateKey.
+              OrderedListUserState(
+                  transform_id=self._transform_id,
+                  user_state_id=state_spec.name,
+                  window=self._window_coder.encode(window),
+                  key=self._key_coder.encode_nested(key))),
+          value_coder=state_spec.coder)
     else:
       raise NotImplementedError(state_spec)
 
diff --git a/sdks/python/apache_beam/runners/worker/bundle_processor_test.py b/sdks/python/apache_beam/runners/worker/bundle_processor_test.py
index dafb4dbd4bf05..0eb4dd9485fd3 100644
--- a/sdks/python/apache_beam/runners/worker/bundle_processor_test.py
+++ b/sdks/python/apache_beam/runners/worker/bundle_processor_test.py
@@ -18,24 +18,31 @@
 """Unit tests for bundle processing."""
 # pytype: skip-file
 
+import random
 import unittest
 
 import apache_beam as beam
+from apache_beam.coders import StrUtf8Coder
 from apache_beam.coders.coders import FastPrimitivesCoder
 from apache_beam.portability import common_urns
 from apache_beam.portability.api import beam_fn_api_pb2
 from apache_beam.runners import common
+from apache_beam.runners.portability.fn_api_runner.worker_handlers import StateServicer
 from apache_beam.runners.worker import bundle_processor
 from apache_beam.runners.worker import operations
 from apache_beam.runners.worker.bundle_processor import BeamTransformFactory
 from apache_beam.runners.worker.bundle_processor import BundleProcessor
 from apache_beam.runners.worker.bundle_processor import DataInputOperation
 from apache_beam.runners.worker.bundle_processor import FnApiUserStateContext
+from apache_beam.runners.worker.bundle_processor import SynchronousOrderedListRuntimeState
 from apache_beam.runners.worker.bundle_processor import TimerInfo
 from apache_beam.runners.worker.data_plane import SizeBasedBufferingClosableOutputStream
 from apache_beam.runners.worker.data_sampler import DataSampler
+from apache_beam.runners.worker.sdk_worker import GlobalCachingStateHandler
+from apache_beam.runners.worker.statecache import StateCache
 from apache_beam.transforms import userstate
 from apache_beam.transforms.window import GlobalWindow
+from apache_beam.utils import timestamp
 from apache_beam.utils.windowed_value import WindowedValue
 
 
@@ -422,5 +429,312 @@ def test_user_modified_sdks_need_to_be_installed_in_runtime_env(self):
             "beam:version:sdk_base:apache/beam_python3.5_sdk:2.1.0-custom"))
 
 
+class OrderedListStateTest(unittest.TestCase):
+  class NoStateCache(StateCache):
+    def __init__(self):
+      super().__init__(max_weight=0)
+
+  @staticmethod
+  def _create_state(window=b"my_window", key=b"my_key", coder=StrUtf8Coder()):
+    state_handler = GlobalCachingStateHandler(
+        OrderedListStateTest.NoStateCache(), StateServicer())
+    state_key = beam_fn_api_pb2.StateKey(
+        ordered_list_user_state=beam_fn_api_pb2.StateKey.OrderedListUserState(
+            window=window, key=key))
+    return SynchronousOrderedListRuntimeState(state_handler, state_key, coder)
+
+  def setUp(self):
+    self.state = self._create_state()
+
+  def test_read_range(self):
+    T0 = timestamp.Timestamp.of(0)
+    T1 = timestamp.Timestamp.of(1)
+    T2 = timestamp.Timestamp.of(2)
+    T3 = timestamp.Timestamp.of(3)
+    T4 = timestamp.Timestamp.of(4)
+    T5 = timestamp.Timestamp.of(5)
+    T9 = timestamp.Timestamp.of(9)
+    A1, B1, A4 = [(T1, "a1"), (T1, "b1"), (T4, "a4")]
+    self.assertEqual([], list(self.state.read_range(T0, T5)))
+
+    self.state.add(A1)
+    self.assertEqual([A1], list(self.state.read_range(T0, T5)))
+
+    self.state.add(B1)
+    self.assertEqual([A1, B1], list(self.state.read_range(T0, T5)))
+
+    self.state.add(A4)
+    self.assertEqual([A1, B1, A4], list(self.state.read_range(T0, T5)))
+
+    self.assertEqual([], list(self.state.read_range(T0, T1)))
+    self.assertEqual([], list(self.state.read_range(T5, T9)))
+    self.assertEqual([A1, B1], list(self.state.read_range(T1, T2)))
+    self.assertEqual([], list(self.state.read_range(T2, T3)))
+    self.assertEqual([], list(self.state.read_range(T2, T4)))
+    self.assertEqual([A4], list(self.state.read_range(T4, T5)))
+
+  def test_read(self):
+    T1 = timestamp.Timestamp.of(1)
+    T4 = timestamp.Timestamp.of(4)
+    A1, B1, A4 = [(T1, "a1"), (T1, "b1"), (T4, "a4")]
+    self.assertEqual([], list(self.state.read()))
+
+    self.state.add(A1)
+    self.assertEqual([A1], list(self.state.read()))
+
+    self.state.add(A1)
+    self.assertEqual([A1, A1], list(self.state.read()))
+
+    self.state.add(B1)
+    self.assertEqual([A1, A1, B1], list(self.state.read()))
+
+    self.state.add(A4)
+    self.assertEqual([A1, A1, B1, A4], list(self.state.read()))
+
+  def test_clear_range(self):
+    T0 = timestamp.Timestamp.of(0)
+    T1 = timestamp.Timestamp.of(1)
+    T2 = timestamp.Timestamp.of(2)
+    T3 = timestamp.Timestamp.of(3)
+    T4 = timestamp.Timestamp.of(4)
+    T5 = timestamp.Timestamp.of(5)
+    A1, B1, A4, A5 = [(T1, "a1"), (T1, "b1"), (T4, "a4"), (T5, "a5")]
+    self.state.clear_range(T0, T1)
+    self.assertEqual([], list(self.state.read()))
+
+    self.state.add(A1)
+    self.state.add(B1)
+    self.state.add(A4)
+    self.state.add(A5)
+    self.assertEqual([A1, B1, A4, A5], list(self.state.read()))
+
+    self.state.clear_range(T0, T1)
+    self.assertEqual([A1, B1, A4, A5], list(self.state.read()))
+
+    self.state.clear_range(T1, T2)
+    self.assertEqual([A4, A5], list(self.state.read()))
+
+    # no side effect on clearing the same range twice
+    self.state.clear_range(T1, T2)
+    self.assertEqual([A4, A5], list(self.state.read()))
+
+    self.state.clear_range(T3, T4)
+    self.assertEqual([A4, A5], list(self.state.read()))
+
+    self.state.clear_range(T3, T5)
+    self.assertEqual([A5], list(self.state.read()))
+
+  def test_add_and_clear_range_after_commit(self):
+    T1 = timestamp.Timestamp.of(1)
+    T4 = timestamp.Timestamp.of(4)
+    T5 = timestamp.Timestamp.of(5)
+    T6 = timestamp.Timestamp.of(6)
+    A1, B1, C1, A4, A5, A6 = [(T1, "a1"), (T1, "b1"), (T1, "c1"),
+                              (T4, "a4"), (T5, "a5"), (T6, "a6")]
+    self.state.add(A1)
+    self.state.add(B1)
+    self.state.add(A4)
+    self.state.add(A5)
+    self.state.clear_range(T4, T5)
+    self.assertEqual([A1, B1, A5], list(self.state.read()))
+
+    self.state.commit()
+    self.assertEqual(len(self.state._pending_adds), 0)
+    self.assertEqual(len(self.state._pending_removes), 0)
+    self.assertEqual([A1, B1, A5], list(self.state.read()))
+
+    self.state.add(C1)
+    self.state.add(A6)
+    self.assertEqual([A1, B1, C1, A5, A6], list(self.state.read()))
+
+    self.state.clear_range(T5, T6)
+    self.assertEqual([A1, B1, C1, A6], list(self.state.read()))
+
+    self.state.commit()
+    self.assertEqual(len(self.state._pending_adds), 0)
+    self.assertEqual(len(self.state._pending_removes), 0)
+    self.assertEqual([A1, B1, C1, A6], list(self.state.read()))
+
+  def test_clear(self):
+    T1 = timestamp.Timestamp.of(1)
+    T4 = timestamp.Timestamp.of(4)
+    T5 = timestamp.Timestamp.of(5)
+    T9 = timestamp.Timestamp.of(9)
+    A1, B1, C1, A4, A5, B5 = [(T1, "a1"), (T1, "b1"), (T1, "c1"),
+                              (T4, "a4"), (T5, "a5"), (T5, "b5")]
+    self.state.add(A1)
+    self.state.add(B1)
+    self.state.add(A4)
+    self.state.add(A5)
+    self.state.clear_range(T4, T5)
+    self.assertEqual([A1, B1, A5], list(self.state.read()))
+    self.state.commit()
+
+    self.state.add(C1)
+    self.state.clear_range(T5, T9)
+    self.assertEqual([A1, B1, C1], list(self.state.read()))
+    self.state.clear()
+    self.assertEqual(len(self.state._pending_adds), 0)
+    self.assertEqual(len(self.state._pending_removes), 1)
+
+    self.state.add(B5)
+    self.assertEqual([B5], list(self.state.read()))
+    self.state.commit()
+
+    self.assertEqual(len(self.state._pending_adds), 0)
+    self.assertEqual(len(self.state._pending_removes), 0)
+
+    self.assertEqual([B5], list(self.state.read()))
+
+  def test_multiple_iterators(self):
+    T1 = timestamp.Timestamp.of(1)
+    T3 = timestamp.Timestamp.of(3)
+    T9 = timestamp.Timestamp.of(9)
+    A1, B1, A3, B3 = [(T1, "a1"), (T1, "b1"), (T3, "a3"), (T3, "b3")]
+    self.state.add(A1)
+    self.state.add(A3)
+    self.state.commit()
+
+    iter_before_b1 = iter(self.state.read())
+    self.assertEqual(A1, next(iter_before_b1))
+
+    self.state.add(B1)
+    self.assertEqual(A3, next(iter_before_b1))
+    self.assertRaises(StopIteration, lambda: next(iter_before_b1))
+
+    self.state.add(B3)
+    iter_before_clear_range = iter(self.state.read())
+    self.assertEqual(A1, next(iter_before_clear_range))
+    self.state.clear_range(T3, T9)
+    self.assertEqual(B1, next(iter_before_clear_range))
+    self.assertEqual(A3, next(iter_before_clear_range))
+    self.assertEqual(B3, next(iter_before_clear_range))
+    self.assertRaises(StopIteration, lambda: next(iter_before_clear_range))
+    self.assertEqual([A1, B1], list(self.state.read()))
+
+    iter_before_clear = iter(self.state.read())
+    self.assertEqual(A1, next(iter_before_clear))
+    self.state.clear()
+    self.assertEqual(B1, next(iter_before_clear))
+    self.assertRaises(StopIteration, lambda: next(iter_before_clear))
+
+    self.assertEqual([], list(self.state.read()))
+
+  def fuzz_test_helper(self, seed=0, lower=0, upper=20):
+    class NaiveState:
+      def __init__(self):
+        self._data = [[] for i in range((upper - lower + 1))]
+        self._logs = []
+
+      def add(self, elem):
+        k, v = elem
+        k = k.micros
+        self._data[k - lower].append(v)
+        self._logs.append("add(%d, %s)" % (k, v))
+
+      def clear_range(self, lo, hi):
+        lo = lo.micros
+        hi = hi.micros
+        for i in range(lo, hi):
+          self._data[i - lower] = []
+        self._logs.append("clear_range(%d, %d)" % (lo, hi))
+
+      def clear(self):
+        for i in range(len(self._data)):
+          self._data[i] = []
+        self._logs.append("clear()")
+
+      def read(self):
+        self._logs.append("read()")
+        for i in range(len(self._data)):
+          for v in self._data[i]:
+            yield (timestamp.Timestamp(micros=(i + lower)), v)
+
+    random.seed(seed)
+
+    state = self._create_state()
+    bench_state = NaiveState()
+
+    steps = random.randint(20, 50)
+    for i in range(steps):
+      op = random.randint(1, 100)
+      if 1 <= op < 70:
+        num = random.randint(lower, upper)
+        state.add((timestamp.Timestamp(micros=num), "a%d" % num))
+        bench_state.add((timestamp.Timestamp(micros=num), "a%d" % num))
+      elif 70 <= op < 95:
+        num1 = random.randint(lower, upper)
+        num2 = random.randint(lower, upper)
+        min_time = timestamp.Timestamp(micros=min(num1, num2))
+        max_time = timestamp.Timestamp(micros=max(num1, num2))
+        state.clear_range(min_time, max_time)
+        bench_state.clear_range(min_time, max_time)
+      elif op >= 95:
+        state.clear()
+        bench_state.clear()
+
+      op = random.randint(1, 10)
+      if 1 <= op <= 9:
+        pass
+      else:
+        state.commit()
+
+      a = list(bench_state.read())
+      b = list(state.read())
+      self.assertEqual(
+          a,
+          b,
+          "Mismatch occurred on seed=%d, step=%d, logs=%s" %
+          (seed, i, ';'.join(bench_state._logs)))
+
+  def test_fuzz(self):
+    for _ in range(1000):
+      seed = random.randint(0, 0xffffffffffffffff)
+      try:
+        self.fuzz_test_helper(seed=seed)
+      except Exception as e:
+        raise RuntimeError("Exception occurred on seed=%d: %s" % (seed, e))
+
+  def test_min_max(self):
+    T_MIN = timestamp.Timestamp(micros=(-(1 << 63)))
+    T_MAX_MINUS_ONE = timestamp.Timestamp(micros=((1 << 63) - 2))
+    T_MAX = timestamp.Timestamp(micros=((1 << 63) - 1))
+    T0 = timestamp.Timestamp(micros=0)
+    INT64_MIN, INT64_MAX_MINUS_ONE, INT64_MAX = [(T_MIN, "min"),
+                                                 (T_MAX_MINUS_ONE, "max"),
+                                                 (T_MAX, "err")]
+    self.state.add(INT64_MIN)
+    self.state.add(INT64_MAX_MINUS_ONE)
+    self.assertRaises(ValueError, lambda: self.state.add(INT64_MAX))
+
+    self.assertEqual([INT64_MIN, INT64_MAX_MINUS_ONE], list(self.state.read()))
+    self.assertEqual([INT64_MIN], list(self.state.read_range(T_MIN, T0)))
+    self.assertEqual([INT64_MAX_MINUS_ONE],
+                     list(self.state.read_range(T0, T_MAX)))
+
+  def test_continuation_token(self):
+    T1 = timestamp.Timestamp.of(1)
+    T2 = timestamp.Timestamp.of(2)
+    T7 = timestamp.Timestamp.of(7)
+    T8 = timestamp.Timestamp.of(8)
+    A1, A2, A7, B7, A8 = [(T1, "a1"), (T2, "a2"), (T7, "a7"),
+                          (T7, "b7"), (T8, "a8")]
+    self.state._state_handler._underlying._use_continuation_tokens = True
+    self.assertEqual([], list(self.state.read_range(T1, T8)))
+
+    self.state.add(A1)
+    self.state.add(A2)
+    self.state.add(A7)
+    self.state.add(B7)
+    self.state.add(A8)
+
+    self.assertEqual([A2, A7, B7], list(self.state.read_range(T2, T8)))
+
+    self.state.commit()
+    self.assertEqual([A2, A7, B7], list(self.state.read_range(T2, T8)))
+
+    self.assertEqual([A1, A2, A7, B7, A8], list(self.state.read()))
+
+
 if __name__ == '__main__':
   unittest.main()
diff --git a/sdks/python/apache_beam/transforms/userstate.py b/sdks/python/apache_beam/transforms/userstate.py
index ada0b755bd6c9..cad7335381111 100644
--- a/sdks/python/apache_beam/transforms/userstate.py
+++ b/sdks/python/apache_beam/transforms/userstate.py
@@ -150,6 +150,17 @@ def to_runner_api(
             urn=common_urns.user_state.BAG.urn))
 
 
+class OrderedListStateSpec(StateSpec):
+  """Specification for a user DoFn ordered list state cell."""
+  def to_runner_api(
+      self, context: 'PipelineContext') -> beam_runner_api_pb2.StateSpec:
+    return beam_runner_api_pb2.StateSpec(
+        ordered_list_spec=beam_runner_api_pb2.OrderedListStateSpec(
+            element_coder_id=context.coders.get_id(self.coder)),
+        protocol=beam_runner_api_pb2.FunctionSpec(
+            urn=common_urns.user_state.ORDERED_LIST.urn))
+
+
 # TODO(BEAM-9562): Update Timer to have of() and clear() APIs.
 Timer = NamedTuple(
     'Timer',
@@ -372,6 +383,24 @@ class CombiningValueRuntimeState(AccumulatingRuntimeState):
   """Combining value state interface object passed to user code."""
 
 
+class OrderedListRuntimeState(AccumulatingRuntimeState):
+  """Ordered list state interface object passed to user code."""
+  def read(self) -> Iterable[Tuple[Timestamp, Any]]:
+    raise NotImplementedError(type(self))
+
+  def add(self, value: Tuple[Timestamp, Any]) -> None:
+    raise NotImplementedError(type(self))
+
+  def read_range(
+      self, min_time_stamp: Timestamp,
+      limit_time_stamp: Timestamp) -> Iterable[Tuple[Timestamp, Any]]:
+    raise NotImplementedError(type(self))
+
+  def clear_range(
+      self, min_time_stamp: Timestamp, limit_time_stamp: Timestamp) -> None:
+    raise NotImplementedError(type(self))
+
+
 class UserStateContext(object):
   """Wrapper allowing user state and timers to be accessed by a DoFnInvoker."""
   def get_timer(
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index c3189e18d2c81..6eb74e9099c18 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -388,6 +388,7 @@ def get_portability_package_data():
           'redis>=5.0.0,<6',
           'regex>=2020.6.8',
           'requests>=2.24.0,<3.0.0',
+          'sortedcontainers>=2.4.0',
           'typing-extensions>=3.7.0',
           'zstandard>=0.18.0,<1',
           # Dynamic dependencies must be specified in a separate list, otherwise

From 20d0f6e5a85c6b5738098e13e212337d86f49412 Mon Sep 17 00:00:00 2001
From: Sergei Lilichenko <slilichenko@google.com>
Date: Wed, 9 Oct 2024 09:47:48 -0700
Subject: [PATCH 12/14] Add support for global sequence processing to the
 "ordered" extension in Java SDK (#32540)

* Initial changes to support processing global sequences.

* Refactor the DoFns out of the transform and into a class hierarchy.

* Next round of implementation of Global Sequence handling.

* Added ticker timers in global sequence processing.

* Corrected the emission batch logic.

* Reworked some tests and fixed the batch output logic.

* Pluggable combiner for the global sequence.

* First iteration of the efficient merging accumulator

* Mostly complete implementation of the accumulator and corresponding tests.

* Additional round of test refinements.

* Added logic to DQL the records below the global sequence range.

* Added providing a global sequence combiner through a handler.

* Added SequenceRangeAccumulatorCoder and tests. Improved logic of creating timers.

* Fixed logging levels (moved them to "trace") on several transforms.

* Round of code improvements and cleanups.

* Tests to verify that the the global sequence is correctly produced by the transform.

* Added batch processing verification to the global sequence processing.

* A round of documentation update and minor clean up.

* Fixed the description in CHANGES.md

* Polish by "spotless"

* Polish by "spotless"

* Removed unneeded logging configuration file.

* Made ContiguousSequenceRange open ended.

* Removed details from 2.60.0 section in CHANGES.md.

* Update sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/DefaultSequenceCombiner.java

Co-authored-by: Danny McCormick <dannymccormick@google.com>

* Fixed spotless related errors.

* Added a note about the new functionality to CHANGES.md

* Added clarification around the data structure used in the sequence combiner.

* Added clarification around the data structure used in the sequence combiner.

* Fixed the problem with allowed lateness being set to 0 in the global sequence tracker.

* Parameterized the GlobalSequenceTracker with the max number of events to trigger the re-evaluation. Fixed accidentally disabled unit tests.

* Made the event timer used to wait for the event arrival respect the lateness of the input.

* Created new failure reason code - "before initial sequence"

---------

Co-authored-by: Danny McCormick <dannymccormick@google.com>
---
 CHANGES.md                                    |   1 +
 sdks/java/extensions/ordered/build.gradle     |   6 +
 .../ordered/ContiguousSequenceRange.java      |  83 +++
 .../sdk/extensions/ordered/EventExaminer.java |   9 +-
 .../ordered/GlobalSequenceTracker.java        | 112 +++
 .../ordered/GlobalSequencesProcessorDoFn.java | 276 +++++++
 .../ordered/OrderedEventProcessor.java        | 685 +++++-------------
 .../ordered/OrderedEventProcessorResult.java  |  37 +-
 .../ordered/OrderedProcessingHandler.java     |  80 ++
 .../ordered/OrderedProcessingStatus.java      |  17 +-
 .../ordered/PerKeyTickerGenerator.java        | 132 ++++
 .../extensions/ordered/ProcessingState.java   |  87 ++-
 .../sdk/extensions/ordered/ProcessorDoFn.java | 427 +++++++++++
 .../ordered/SequencePerKeyProcessorDoFn.java  | 294 ++++++++
 .../extensions/ordered/UnprocessedEvent.java  |   3 +-
 .../combiner/DefaultSequenceCombiner.java     | 122 ++++
 .../combiner/SequenceRangeAccumulator.java    | 296 ++++++++
 .../ordered/combiner/package-info.java        |  23 +
 .../sdk/extensions/ordered/package-info.java  |   4 +-
 ...deredEventProcessorGlobalSequenceTest.java | 534 ++++++++++++++
 ...eredEventProcessorPerKeySequenceTest.java} | 358 ++-------
 .../OrderedEventProcessorTestBase.java        | 395 ++++++++++
 .../StringBufferOrderedProcessingHandler.java |  18 +
 .../SequenceRangeAccumulatorCoderTest.java    |  71 ++
 .../SequenceRangeAccumulatorTest.java         | 400 ++++++++++
 25 files changed, 3639 insertions(+), 831 deletions(-)
 create mode 100644 sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ContiguousSequenceRange.java
 create mode 100644 sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/GlobalSequenceTracker.java
 create mode 100644 sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/GlobalSequencesProcessorDoFn.java
 create mode 100644 sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/PerKeyTickerGenerator.java
 create mode 100644 sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ProcessorDoFn.java
 create mode 100644 sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/SequencePerKeyProcessorDoFn.java
 create mode 100644 sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/DefaultSequenceCombiner.java
 create mode 100644 sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulator.java
 create mode 100644 sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/package-info.java
 create mode 100644 sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorGlobalSequenceTest.java
 rename sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/{OrderedEventProcessorTest.java => OrderedEventProcessorPerKeySequenceTest.java} (71%)
 create mode 100644 sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorTestBase.java
 create mode 100644 sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulatorCoderTest.java
 create mode 100644 sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulatorTest.java

diff --git a/CHANGES.md b/CHANGES.md
index b9d5f2c191c91..774abefcb0661 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -68,6 +68,7 @@
 ## New Features / Improvements
 
 * X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)).
+* Added support for processing events which use a global sequence to "ordered" extension (Java) [#32540](https://github.com/apache/beam/pull/32540)
 
 ## Breaking Changes
 
diff --git a/sdks/java/extensions/ordered/build.gradle b/sdks/java/extensions/ordered/build.gradle
index 10c9785b9eed6..8bee1901bd3ad 100644
--- a/sdks/java/extensions/ordered/build.gradle
+++ b/sdks/java/extensions/ordered/build.gradle
@@ -28,6 +28,12 @@ dependencies {
     implementation library.java.vendored_guava_32_1_2_jre
     testImplementation library.java.junit
     testImplementation library.java.hamcrest
+    testImplementation library.java.slf4j_jdk14
     testImplementation project(path: ':sdks:java:core')
+    testImplementation 'junit:junit:4.13.1'
+    testImplementation project(path: ':runners:google-cloud-dataflow-java')
     testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
+    testImplementation project(path: ":runners:google-cloud-dataflow-java")
+    testImplementation project(path: ":sdks:java:extensions:google-cloud-platform-core")
+    testImplementation project(path: ":sdks:java:io:google-cloud-platform")
 }
\ No newline at end of file
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ContiguousSequenceRange.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ContiguousSequenceRange.java
new file mode 100644
index 0000000000000..c16cf9328dcd6
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ContiguousSequenceRange.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered;
+
+import com.google.auto.value.AutoValue;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.CustomCoder;
+import org.apache.beam.sdk.coders.InstantCoder;
+import org.apache.beam.sdk.coders.VarLongCoder;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
+import org.joda.time.Instant;
+
+/** A range of contiguous event sequences and the latest timestamp of the events in the range. */
+@AutoValue
+public abstract class ContiguousSequenceRange {
+  public static final ContiguousSequenceRange EMPTY =
+      ContiguousSequenceRange.of(
+          Long.MIN_VALUE, Long.MIN_VALUE, Instant.ofEpochMilli(Long.MIN_VALUE));
+
+  /** @return inclusive starting sequence */
+  public abstract long getStart();
+
+  /** @return exclusive end sequence */
+  public abstract long getEnd();
+
+  /** @return latest timestamp of all events in the range */
+  public abstract Instant getTimestamp();
+
+  public static ContiguousSequenceRange of(long start, long end, Instant timestamp) {
+    return new AutoValue_ContiguousSequenceRange(start, end, timestamp);
+  }
+
+  static class CompletedSequenceRangeCoder extends CustomCoder<ContiguousSequenceRange> {
+
+    private static final CompletedSequenceRangeCoder INSTANCE = new CompletedSequenceRangeCoder();
+
+    static CompletedSequenceRangeCoder of() {
+      return INSTANCE;
+    }
+
+    private CompletedSequenceRangeCoder() {}
+
+    @Override
+    public void encode(
+        ContiguousSequenceRange value, @UnknownKeyFor @NonNull @Initialized OutputStream outStream)
+        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
+            @Initialized IOException {
+      VarLongCoder.of().encode(value.getStart(), outStream);
+      VarLongCoder.of().encode(value.getEnd(), outStream);
+      InstantCoder.of().encode(value.getTimestamp(), outStream);
+    }
+
+    @Override
+    public ContiguousSequenceRange decode(@UnknownKeyFor @NonNull @Initialized InputStream inStream)
+        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
+            @Initialized IOException {
+      long start = VarLongCoder.of().decode(inStream);
+      long end = VarLongCoder.of().decode(inStream);
+      Instant timestamp = InstantCoder.of().decode(inStream);
+      return ContiguousSequenceRange.of(start, end, timestamp);
+    }
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/EventExaminer.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/EventExaminer.java
index 1e4fe75655178..b5de67f16ceda 100644
--- a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/EventExaminer.java
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/EventExaminer.java
@@ -31,7 +31,8 @@ public interface EventExaminer<EventT, StateT extends MutableState<EventT, ?>>
     extends Serializable {
 
   /**
-   * Is this event the first expected event for the given key and window?
+   * Is this event the first expected event for the given key and window if the per key sequence is
+   * used? In case of global sequence it determines the first global sequence event.
    *
    * @param sequenceNumber the sequence number of the event as defined by the key of the input
    *     PCollection to {@link OrderedEventProcessor}
@@ -41,8 +42,8 @@ public interface EventExaminer<EventT, StateT extends MutableState<EventT, ?>>
   boolean isInitialEvent(long sequenceNumber, EventT event);
 
   /**
-   * If the event was the first event in the sequence, create the state to hold the required data
-   * needed for processing. This data will be persisted.
+   * If the event was the first event for a given key, create the state to hold the required data
+   * needed for processing. This data will be persisted in a Beam state.
    *
    * @param event the first event in the sequence.
    * @return the state to persist.
@@ -53,6 +54,8 @@ public interface EventExaminer<EventT, StateT extends MutableState<EventT, ?>>
   /**
    * Is this event the last expected event for a given key and window?
    *
+   * <p>Note, this method is not used yet with global sequences.
+   *
    * @param sequenceNumber of the event
    * @param event being processed
    * @return true if the last event. There are cases where it's impossible to know whether it's the
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/GlobalSequenceTracker.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/GlobalSequenceTracker.java
new file mode 100644
index 0000000000000..aa12c30a5317c
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/GlobalSequenceTracker.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered;
+
+import org.apache.beam.sdk.extensions.ordered.ContiguousSequenceRange.CompletedSequenceRangeCoder;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.windowing.AfterFirst;
+import org.apache.beam.sdk.transforms.windowing.AfterPane;
+import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime;
+import org.apache.beam.sdk.transforms.windowing.Repeatedly;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TimestampedValue;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.joda.time.Duration;
+
+/**
+ * PTransform to produce the side input of the maximum contiguous range of sequence numbers.
+ *
+ * @param <EventKeyT> type of event key
+ * @param <EventT> type of event
+ * @param <ResultT> type of processing result
+ * @param <StateT> type of state
+ */
+class GlobalSequenceTracker<
+        EventKeyT, EventT, ResultT, StateT extends MutableState<EventT, ResultT>>
+    extends PTransform<
+        PCollection<TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>>,
+        PCollectionView<ContiguousSequenceRange>> {
+
+  private final Combine.GloballyAsSingletonView<
+          TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>, ContiguousSequenceRange>
+      sideInputProducer;
+  private final @Nullable Duration frequencyOfGeneration;
+  private final int maxElementsBeforeReevaluatingGlobalSequence;
+
+  /**
+   * Constructor used in batch pipelines.
+   *
+   * @param sideInputProducer
+   */
+  public GlobalSequenceTracker(
+      Combine.GloballyAsSingletonView<
+              TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>, ContiguousSequenceRange>
+          sideInputProducer) {
+    this.sideInputProducer = sideInputProducer;
+    this.frequencyOfGeneration = null;
+    this.maxElementsBeforeReevaluatingGlobalSequence = 0;
+  }
+
+  public GlobalSequenceTracker(
+      Combine.GloballyAsSingletonView<
+              TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>, ContiguousSequenceRange>
+          sideInputProducer,
+      Duration globalSequenceGenerationFrequency,
+      int maxElementsBeforeReevaluatingGlobalSequence) {
+    this.sideInputProducer = sideInputProducer;
+    this.frequencyOfGeneration = globalSequenceGenerationFrequency;
+    this.maxElementsBeforeReevaluatingGlobalSequence = maxElementsBeforeReevaluatingGlobalSequence;
+  }
+
+  @Override
+  public PCollectionView<ContiguousSequenceRange> expand(
+      PCollection<TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>> input) {
+    input
+        .getPipeline()
+        .getCoderRegistry()
+        .registerCoderForClass(ContiguousSequenceRange.class, CompletedSequenceRangeCoder.of());
+
+    if (frequencyOfGeneration != null) {
+      // This branch will only be executed in case of streaming pipelines.
+      // For batch pipelines the side input should only be computed once.
+      input =
+          input.apply(
+              "Triggering Setup",
+              // Reproduce the windowing of the input PCollection, but change the triggering
+              // in order to create a slowing changing side input
+              Window.<TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>>into(
+                      (WindowFn<? super TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>, ?>)
+                          input.getWindowingStrategy().getWindowFn())
+                  .accumulatingFiredPanes()
+                  .withAllowedLateness(input.getWindowingStrategy().getAllowedLateness())
+                  .triggering(
+                      Repeatedly.forever(
+                          AfterFirst.of(
+                              AfterPane.elementCountAtLeast(
+                                  maxElementsBeforeReevaluatingGlobalSequence),
+                              AfterProcessingTime.pastFirstElementInPane()
+                                  .plusDelayOf(frequencyOfGeneration)))));
+    }
+    return input.apply("Create Side Input", sideInputProducer);
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/GlobalSequencesProcessorDoFn.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/GlobalSequencesProcessorDoFn.java
new file mode 100644
index 0000000000000..64c2d119c97d5
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/GlobalSequencesProcessorDoFn.java
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered;
+
+import org.apache.beam.sdk.coders.BooleanCoder;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.extensions.ordered.ProcessingState.ProcessingStateCoder;
+import org.apache.beam.sdk.state.OrderedListState;
+import org.apache.beam.sdk.state.StateSpec;
+import org.apache.beam.sdk.state.StateSpecs;
+import org.apache.beam.sdk.state.TimeDomain;
+import org.apache.beam.sdk.state.Timer;
+import org.apache.beam.sdk.state.TimerSpec;
+import org.apache.beam.sdk.state.TimerSpecs;
+import org.apache.beam.sdk.state.ValueState;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.joda.time.Duration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Main Stateful DoFn used to process events in the global sequence mode.
+ *
+ * @param <EventT>
+ * @param <EventKeyT>
+ * @param <ResultT>
+ * @param <StateT>
+ */
+class GlobalSequencesProcessorDoFn<
+        EventT, EventKeyT, ResultT, StateT extends MutableState<EventT, ResultT>>
+    extends ProcessorDoFn<EventT, EventKeyT, ResultT, StateT> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(GlobalSequencesProcessorDoFn.class);
+
+  private static final String BATCH_EMISSION_TIMER = "batchTimer";
+
+  @TimerId(BATCH_EMISSION_TIMER)
+  @SuppressWarnings("unused")
+  private final TimerSpec batchTimerSpec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
+
+  private static final String BUFFERED_EVENTS = "bufferedEvents";
+
+  @StateId(BUFFERED_EVENTS)
+  @SuppressWarnings("unused")
+  private final StateSpec<OrderedListState<EventT>> bufferedEventsSpec;
+
+  @StateId(PROCESSING_STATE)
+  @SuppressWarnings("unused")
+  private final StateSpec<ValueState<ProcessingState<EventKeyT>>> processingStateSpec;
+
+  @StateId(MUTABLE_STATE)
+  @SuppressWarnings("unused")
+  private final StateSpec<ValueState<StateT>> mutableStateSpec;
+
+  @StateId(WINDOW_CLOSED)
+  @SuppressWarnings("unused")
+  private final StateSpec<ValueState<Boolean>> windowClosedSpec;
+
+  @TimerId(STATUS_EMISSION_TIMER)
+  @SuppressWarnings("unused")
+  private final TimerSpec statusEmissionTimer = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
+
+  private final PCollectionView<ContiguousSequenceRange> latestContiguousRangeSideInput;
+
+  private final Duration maxLateness;
+
+  GlobalSequencesProcessorDoFn(
+      EventExaminer<EventT, StateT> eventExaminer,
+      Coder<EventT> eventCoder,
+      Coder<StateT> stateCoder,
+      Coder<EventKeyT> keyCoder,
+      TupleTag<KV<EventKeyT, ResultT>> mainOutputTupleTag,
+      TupleTag<KV<EventKeyT, OrderedProcessingStatus>> statusTupleTag,
+      Duration statusUpdateFrequency,
+      TupleTag<KV<EventKeyT, KV<Long, UnprocessedEvent<EventT>>>> unprocessedEventTupleTag,
+      boolean produceStatusUpdateOnEveryEvent,
+      long maxNumberOfResultsToProduce,
+      PCollectionView<ContiguousSequenceRange> latestContiguousRangeSideInput,
+      Duration maxLateness) {
+    super(
+        eventExaminer,
+        mainOutputTupleTag,
+        statusTupleTag,
+        statusUpdateFrequency,
+        unprocessedEventTupleTag,
+        produceStatusUpdateOnEveryEvent,
+        maxNumberOfResultsToProduce);
+
+    this.latestContiguousRangeSideInput = latestContiguousRangeSideInput;
+    this.bufferedEventsSpec = StateSpecs.orderedList(eventCoder);
+    this.processingStateSpec = StateSpecs.value(ProcessingStateCoder.of(keyCoder));
+    this.mutableStateSpec = StateSpecs.value(stateCoder);
+    this.windowClosedSpec = StateSpecs.value(BooleanCoder.of());
+    this.maxLateness = maxLateness;
+  }
+
+  @Override
+  boolean checkForFirstOrLastEvent() {
+    return false;
+  }
+
+  @Override
+  boolean checkForSequenceGapInBufferedEvents() {
+    return false;
+  }
+
+  @ProcessElement
+  public void processElement(
+      ProcessContext context,
+      @Element KV<EventKeyT, KV<Long, EventT>> eventAndSequence,
+      @StateId(BUFFERED_EVENTS) OrderedListState<EventT> bufferedEventsProxy,
+      @AlwaysFetched @StateId(PROCESSING_STATE)
+          ValueState<ProcessingState<EventKeyT>> processingStateProxy,
+      @StateId(MUTABLE_STATE) ValueState<StateT> mutableStateProxy,
+      @TimerId(STATUS_EMISSION_TIMER) Timer statusEmissionTimer,
+      @TimerId(BATCH_EMISSION_TIMER) Timer batchEmissionTimer,
+      MultiOutputReceiver outputReceiver,
+      BoundedWindow window) {
+
+    ContiguousSequenceRange lastContiguousRange = context.sideInput(latestContiguousRangeSideInput);
+
+    EventT event = eventAndSequence.getValue().getValue();
+    EventKeyT key = eventAndSequence.getKey();
+    long sequence = eventAndSequence.getValue().getKey();
+
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(key + ": " + sequence + " lastRange: " + lastContiguousRange);
+    }
+
+    ProcessingState<EventKeyT> processingState = processingStateProxy.read();
+
+    if (processingState == null) {
+      // This is the first time we see this key/window pair
+      processingState = new ProcessingState<>(key);
+      if (statusUpdateFrequency != null) {
+        // Set up the timer to produce the status of the processing on a regular basis
+        statusEmissionTimer.offset(statusUpdateFrequency).setRelative();
+      }
+    }
+
+    processingState.updateGlobalSequenceDetails(lastContiguousRange);
+
+    if (event == null) {
+      // This is a ticker event. We only need to update the state as it relates to the global
+      // sequence.
+      processingStateProxy.write(processingState);
+
+      setBatchEmissionTimerIfNeeded(batchEmissionTimer, processingState);
+
+      return;
+    }
+
+    if (numberOfResultsBeforeBundleStart == null) {
+      // Per key processing is synchronized by Beam. There is no need to have it here.
+      numberOfResultsBeforeBundleStart = processingState.getResultCount();
+    }
+
+    processingState.eventReceived();
+
+    StateT state =
+        processNewEvent(
+            sequence,
+            event,
+            processingState,
+            mutableStateProxy,
+            bufferedEventsProxy,
+            outputReceiver);
+
+    saveStates(
+        processingStateProxy,
+        processingState,
+        mutableStateProxy,
+        state,
+        outputReceiver,
+        window.maxTimestamp());
+
+    setBatchEmissionTimerIfNeeded(batchEmissionTimer, processingState);
+  }
+
+  private void setBatchEmissionTimerIfNeeded(
+      Timer batchEmissionTimer, ProcessingState<EventKeyT> processingState) {
+    ContiguousSequenceRange lastCompleteGlobalSequence = processingState.getLastContiguousRange();
+    if (lastCompleteGlobalSequence != null
+        && processingState.thereAreGloballySequencedEventsToBeProcessed()) {
+      batchEmissionTimer.set(lastCompleteGlobalSequence.getTimestamp().plus(maxLateness));
+    }
+  }
+
+  @OnTimer(BATCH_EMISSION_TIMER)
+  public void onBatchEmission(
+      OnTimerContext context,
+      @StateId(BUFFERED_EVENTS) OrderedListState<EventT> bufferedEventsState,
+      @AlwaysFetched @StateId(PROCESSING_STATE)
+          ValueState<ProcessingState<EventKeyT>> processingStatusState,
+      @AlwaysFetched @StateId(MUTABLE_STATE) ValueState<StateT> mutableStateState,
+      @TimerId(BATCH_EMISSION_TIMER) Timer batchEmissionTimer,
+      MultiOutputReceiver outputReceiver) {
+
+    // At this point everything in the buffered state is ready to be processed up to the latest
+    // global sequence.
+    @Nullable ProcessingState<EventKeyT> processingState = processingStatusState.read();
+    if (processingState == null) {
+      LOG.warn("Missing the processing state. Probably occurred during pipeline drainage");
+      return;
+    }
+
+    StateT state = mutableStateState.read();
+
+    ContiguousSequenceRange lastContiguousRange = processingState.getLastContiguousRange();
+    if (lastContiguousRange == null) {
+      LOG.warn("Last complete global instance is null.");
+      return;
+    }
+
+    Long earliestBufferedSequence = processingState.getEarliestBufferedSequence();
+    if (earliestBufferedSequence == null) {
+      LOG.warn("Earliest buffered sequence is null.");
+      return;
+    }
+
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Emission timer: " + processingState);
+    }
+
+    this.numberOfResultsBeforeBundleStart = processingState.getResultCount();
+
+    state =
+        processBufferedEventRange(
+            processingState,
+            state,
+            bufferedEventsState,
+            outputReceiver,
+            batchEmissionTimer,
+            lastContiguousRange);
+
+    saveStates(
+        processingStatusState,
+        processingState,
+        mutableStateState,
+        state,
+        outputReceiver,
+        // TODO: validate that this is correct.
+        context.window().maxTimestamp());
+  }
+
+  @OnTimer(STATUS_EMISSION_TIMER)
+  @SuppressWarnings("unused")
+  public void onStatusEmission(
+      MultiOutputReceiver outputReceiver,
+      @TimerId(STATUS_EMISSION_TIMER) Timer statusEmissionTimer,
+      @StateId(WINDOW_CLOSED) ValueState<Boolean> windowClosedState,
+      @StateId(PROCESSING_STATE) ValueState<ProcessingState<EventKeyT>> processingStateState) {
+
+    processStatusTimerEvent(
+        outputReceiver, statusEmissionTimer, windowClosedState, processingStateState);
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessor.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessor.java
index 935647c0e7e5e..fb23a7c8667af 100644
--- a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessor.java
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessor.java
@@ -19,52 +19,44 @@
 
 import com.google.auto.value.AutoValue;
 import java.util.Arrays;
-import java.util.Iterator;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.BooleanCoder;
 import org.apache.beam.sdk.coders.CannotProvideCoderException;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.VarLongCoder;
-import org.apache.beam.sdk.extensions.ordered.ProcessingState.ProcessingStateCoder;
-import org.apache.beam.sdk.extensions.ordered.UnprocessedEvent.Reason;
+import org.apache.beam.sdk.extensions.ordered.OrderedProcessingHandler.OrderedProcessingGlobalSequenceHandler;
 import org.apache.beam.sdk.extensions.ordered.UnprocessedEvent.UnprocessedEventCoder;
 import org.apache.beam.sdk.schemas.NoSuchSchemaException;
 import org.apache.beam.sdk.schemas.SchemaCoder;
 import org.apache.beam.sdk.schemas.SchemaRegistry;
-import org.apache.beam.sdk.state.OrderedListState;
-import org.apache.beam.sdk.state.StateSpec;
-import org.apache.beam.sdk.state.StateSpecs;
-import org.apache.beam.sdk.state.TimeDomain;
-import org.apache.beam.sdk.state.Timer;
-import org.apache.beam.sdk.state.TimerSpec;
-import org.apache.beam.sdk.state.TimerSpecs;
-import org.apache.beam.sdk.state.ValueState;
 import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.Flatten;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollection.IsBounded;
+import org.apache.beam.sdk.values.PCollectionList;
 import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TimestampedValue;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.TupleTagList;
 import org.apache.beam.sdk.values.TypeDescriptor;
-import org.joda.time.Duration;
 import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * Transform for processing ordered events. Events are grouped by the key and within each key they
  * are applied according to the provided sequence. Events which arrive out of sequence are buffered
  * and processed after all the missing events for a given key have arrived.
  *
- * @param <EventT>
- * @param <EventKeyT>
- * @param <StateT>
+ * <p>There are two sequencing modes - a sequence per key and a global sequence. See {@link
+ * OrderedProcessingHandler} for details on how to configure this transform.
+ *
+ * @param <EventT> type of event
+ * @param <EventKeyT> type of event key
+ * @param <StateT> type of the state
  */
 @AutoValue
 @SuppressWarnings({"nullness", "TypeNameShadowing"})
@@ -74,6 +66,18 @@ public abstract class OrderedEventProcessor<
         PCollection<KV<EventKeyT, KV<Long, EventT>>>,
         OrderedEventProcessorResult<EventKeyT, ResultT, EventT>> {
 
+  public static final String GLOBAL_SEQUENCE_TRACKER = "global_sequence_tracker";
+
+  /**
+   * Create the transform.
+   *
+   * @param handler provides the configuration of this transform
+   * @param <EventTypeT> type of event
+   * @param <EventKeyTypeT> type of event key
+   * @param <ResultTypeT> type of the result object
+   * @param <StateTypeT> type of the state to store
+   * @return the transform
+   */
   public static <
           EventTypeT,
           EventKeyTypeT,
@@ -129,10 +133,67 @@ public OrderedEventProcessorResult<EventKeyT, ResultT, EventT> expand(
       throw new RuntimeException("Unable to get result coder", e);
     }
 
-    PCollectionTuple processingResult =
+    KvCoder<EventKeyT, ResultT> mainOutputCoder = KvCoder.of(keyCoder, resultCoder);
+    KvCoder<EventKeyT, OrderedProcessingStatus> processingStatusCoder =
+        KvCoder.of(keyCoder, getOrderedProcessingStatusCoder(pipeline));
+    KvCoder<EventKeyT, KV<Long, UnprocessedEvent<EventT>>> unprocessedEventsCoder =
+        KvCoder.of(
+            keyCoder, KvCoder.of(VarLongCoder.of(), new UnprocessedEventCoder<>(eventCoder)));
+
+    if (handler instanceof OrderedProcessingGlobalSequenceHandler) {
+      OrderedProcessingGlobalSequenceHandler<EventT, EventKeyT, StateT, ResultT>
+          globalSequenceHandler =
+              (OrderedProcessingGlobalSequenceHandler<EventT, EventKeyT, StateT, ResultT>) handler;
+
+      return expandGlobalSequenceProcessing(
+          input,
+          mainOutput,
+          statusOutput,
+          unprocessedEventOutput,
+          handler,
+          pipeline,
+          keyCoder,
+          eventCoder,
+          stateCoder,
+          mainOutputCoder,
+          processingStatusCoder,
+          unprocessedEventsCoder,
+          globalSequenceHandler);
+    } else {
+      return expandPerKeyProcessing(
+          input,
+          mainOutput,
+          statusOutput,
+          unprocessedEventOutput,
+          handler,
+          pipeline,
+          keyCoder,
+          eventCoder,
+          stateCoder,
+          mainOutputCoder,
+          processingStatusCoder,
+          unprocessedEventsCoder);
+    }
+  }
+
+  private OrderedEventProcessorResult<EventKeyT, ResultT, EventT> expandPerKeyProcessing(
+      PCollection<KV<EventKeyT, KV<Long, EventT>>> input,
+      TupleTag<KV<EventKeyT, ResultT>> mainOutput,
+      TupleTag<KV<EventKeyT, OrderedProcessingStatus>> statusOutput,
+      TupleTag<KV<EventKeyT, KV<Long, UnprocessedEvent<EventT>>>> unprocessedEventOutput,
+      OrderedProcessingHandler<EventT, EventKeyT, StateT, ResultT> handler,
+      Pipeline pipeline,
+      Coder<EventKeyT> keyCoder,
+      Coder<EventT> eventCoder,
+      Coder<StateT> stateCoder,
+      KvCoder<EventKeyT, ResultT> mainOutputCoder,
+      KvCoder<EventKeyT, OrderedProcessingStatus> processingStatusCoder,
+      KvCoder<EventKeyT, KV<Long, UnprocessedEvent<EventT>>> unprocessedEventsCoder) {
+    PCollectionTuple processingResult;
+    processingResult =
         input.apply(
             ParDo.of(
-                    new OrderedProcessorDoFn<>(
+                    new SequencePerKeyProcessorDoFn<>(
                         handler.getEventExaminer(),
                         eventCoder,
                         stateCoder,
@@ -146,13 +207,6 @@ public OrderedEventProcessorResult<EventKeyT, ResultT, EventT> expand(
                 .withOutputTags(
                     mainOutput,
                     TupleTagList.of(Arrays.asList(statusOutput, unprocessedEventOutput))));
-
-    KvCoder<EventKeyT, ResultT> mainOutputCoder = KvCoder.of(keyCoder, resultCoder);
-    KvCoder<EventKeyT, OrderedProcessingStatus> processingStatusCoder =
-        KvCoder.of(keyCoder, getOrderedProcessingStatusCoder(pipeline));
-    KvCoder<EventKeyT, KV<Long, UnprocessedEvent<EventT>>> unprocessedEventsCoder =
-        KvCoder.of(
-            keyCoder, KvCoder.of(VarLongCoder.of(), new UnprocessedEventCoder<>(eventCoder)));
     return new OrderedEventProcessorResult<>(
         pipeline,
         processingResult.get(mainOutput).setCoder(mainOutputCoder),
@@ -163,6 +217,84 @@ public OrderedEventProcessorResult<EventKeyT, ResultT, EventT> expand(
         unprocessedEventOutput);
   }
 
+  private OrderedEventProcessorResult<EventKeyT, ResultT, EventT> expandGlobalSequenceProcessing(
+      PCollection<KV<EventKeyT, KV<Long, EventT>>> input,
+      TupleTag<KV<EventKeyT, ResultT>> mainOutput,
+      TupleTag<KV<EventKeyT, OrderedProcessingStatus>> statusOutput,
+      TupleTag<KV<EventKeyT, KV<Long, UnprocessedEvent<EventT>>>> unprocessedEventOutput,
+      OrderedProcessingHandler<EventT, EventKeyT, StateT, ResultT> handler,
+      Pipeline pipeline,
+      Coder<EventKeyT> keyCoder,
+      Coder<EventT> eventCoder,
+      Coder<StateT> stateCoder,
+      KvCoder<EventKeyT, ResultT> mainOutputCoder,
+      KvCoder<EventKeyT, OrderedProcessingStatus> processingStatusCoder,
+      KvCoder<EventKeyT, KV<Long, UnprocessedEvent<EventT>>> unprocessedEventsCoder,
+      OrderedProcessingGlobalSequenceHandler<EventT, EventKeyT, StateT, ResultT>
+          globalSequenceHandler) {
+    PCollectionTuple processingResult;
+    boolean streamingProcessing = input.isBounded() == IsBounded.UNBOUNDED;
+
+    final PCollectionView<ContiguousSequenceRange> latestContiguousRange =
+        input
+            .apply("Convert to SequenceAndTimestamp", ParDo.of(new ToTimestampedEventConverter<>()))
+            .apply(
+                "Global Sequence Tracker",
+                streamingProcessing
+                    ? new GlobalSequenceTracker<>(
+                        globalSequenceHandler.getGlobalSequenceCombiner(),
+                        globalSequenceHandler.getContiguousSequenceRangeReevaluationFrequency(),
+                        globalSequenceHandler
+                            .getMaxElementCountToTriggerContinuousSequenceRangeReevaluation())
+                    : new GlobalSequenceTracker<>(
+                        globalSequenceHandler.getGlobalSequenceCombiner()));
+
+    if (streamingProcessing) {
+      PCollection<KV<EventKeyT, KV<Long, EventT>>> tickers =
+          input.apply(
+              "Create Tickers",
+              new PerKeyTickerGenerator<>(
+                  keyCoder,
+                  eventCoder,
+                  globalSequenceHandler.getContiguousSequenceRangeReevaluationFrequency()));
+
+      input =
+          PCollectionList.of(input)
+              .and(tickers)
+              .apply("Combine Events and Tickers", Flatten.pCollections())
+              .setCoder(tickers.getCoder());
+    }
+    processingResult =
+        input.apply(
+            ParDo.of(
+                    new GlobalSequencesProcessorDoFn<>(
+                        handler.getEventExaminer(),
+                        eventCoder,
+                        stateCoder,
+                        keyCoder,
+                        mainOutput,
+                        statusOutput,
+                        handler.getStatusUpdateFrequency(),
+                        unprocessedEventOutput,
+                        handler.isProduceStatusUpdateOnEveryEvent(),
+                        handler.getMaxOutputElementsPerBundle(),
+                        latestContiguousRange,
+                        input.getWindowingStrategy().getAllowedLateness()))
+                .withOutputTags(
+                    mainOutput,
+                    TupleTagList.of(Arrays.asList(statusOutput, unprocessedEventOutput)))
+                .withSideInput(GLOBAL_SEQUENCE_TRACKER, latestContiguousRange));
+    return new OrderedEventProcessorResult<>(
+        pipeline,
+        processingResult.get(mainOutput).setCoder(mainOutputCoder),
+        mainOutput,
+        processingResult.get(statusOutput).setCoder(processingStatusCoder),
+        statusOutput,
+        processingResult.get(unprocessedEventOutput).setCoder(unprocessedEventsCoder),
+        unprocessedEventOutput,
+        latestContiguousRange);
+  }
+
   private static Coder<OrderedProcessingStatus> getOrderedProcessingStatusCoder(Pipeline pipeline) {
     SchemaRegistry schemaRegistry = pipeline.getSchemaRegistry();
     Coder<OrderedProcessingStatus> result;
@@ -179,497 +311,16 @@ private static Coder<OrderedProcessingStatus> getOrderedProcessingStatusCoder(Pi
     return result;
   }
 
-  /**
-   * Main DoFn for processing ordered events.
-   *
-   * @param <EventTypeT>
-   * @param <EventKeyTypeT>
-   * @param <StateTypeT>
-   */
-  static class OrderedProcessorDoFn<
-          EventTypeT,
-          EventKeyTypeT,
-          ResultTypeT,
-          StateTypeT extends MutableState<EventTypeT, ResultTypeT>>
-      extends DoFn<KV<EventKeyTypeT, KV<Long, EventTypeT>>, KV<EventKeyTypeT, ResultTypeT>> {
-
-    private static final Logger LOG = LoggerFactory.getLogger(OrderedProcessorDoFn.class);
-
-    private static final String PROCESSING_STATE = "processingState";
-    private static final String MUTABLE_STATE = "mutableState";
-    private static final String BUFFERED_EVENTS = "bufferedEvents";
-    private static final String STATUS_EMISSION_TIMER = "statusTimer";
-    private static final String LARGE_BATCH_EMISSION_TIMER = "largeBatchTimer";
-    private static final String WINDOW_CLOSED = "windowClosed";
-    private final EventExaminer<EventTypeT, StateTypeT> eventExaminer;
-
-    @StateId(BUFFERED_EVENTS)
-    @SuppressWarnings("unused")
-    private final StateSpec<OrderedListState<EventTypeT>> bufferedEventsSpec;
-
-    @StateId(PROCESSING_STATE)
-    @SuppressWarnings("unused")
-    private final StateSpec<ValueState<ProcessingState<EventKeyTypeT>>> processingStateSpec;
-
-    @SuppressWarnings("unused")
-    @StateId(MUTABLE_STATE)
-    private final StateSpec<ValueState<StateTypeT>> mutableStateSpec;
-
-    @StateId(WINDOW_CLOSED)
-    @SuppressWarnings("unused")
-    private final StateSpec<ValueState<Boolean>> windowClosedSpec;
-
-    @TimerId(STATUS_EMISSION_TIMER)
-    @SuppressWarnings("unused")
-    private final TimerSpec statusEmissionTimer = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
-
-    @TimerId(LARGE_BATCH_EMISSION_TIMER)
-    @SuppressWarnings("unused")
-    private final TimerSpec largeBatchEmissionTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
-
-    private final TupleTag<KV<EventKeyTypeT, OrderedProcessingStatus>> statusTupleTag;
-    private final Duration statusUpdateFrequency;
-
-    private final TupleTag<KV<EventKeyTypeT, ResultTypeT>> mainOutputTupleTag;
-    private final TupleTag<KV<EventKeyTypeT, KV<Long, UnprocessedEvent<EventTypeT>>>>
-        unprocessedEventsTupleTag;
-    private final boolean produceStatusUpdateOnEveryEvent;
-
-    private final long maxNumberOfResultsToProduce;
-
-    private Long numberOfResultsBeforeBundleStart;
-
-    /**
-     * Stateful DoFn to do the bulk of processing.
-     *
-     * @param eventExaminer
-     * @param eventCoder
-     * @param stateCoder
-     * @param keyCoder
-     * @param mainOutputTupleTag
-     * @param statusTupleTag
-     * @param statusUpdateFrequency
-     * @param unprocessedEventTupleTag
-     * @param produceStatusUpdateOnEveryEvent
-     * @param maxNumberOfResultsToProduce
-     */
-    OrderedProcessorDoFn(
-        EventExaminer<EventTypeT, StateTypeT> eventExaminer,
-        Coder<EventTypeT> eventCoder,
-        Coder<StateTypeT> stateCoder,
-        Coder<EventKeyTypeT> keyCoder,
-        TupleTag<KV<EventKeyTypeT, ResultTypeT>> mainOutputTupleTag,
-        TupleTag<KV<EventKeyTypeT, OrderedProcessingStatus>> statusTupleTag,
-        Duration statusUpdateFrequency,
-        TupleTag<KV<EventKeyTypeT, KV<Long, UnprocessedEvent<EventTypeT>>>>
-            unprocessedEventTupleTag,
-        boolean produceStatusUpdateOnEveryEvent,
-        long maxNumberOfResultsToProduce) {
-      this.eventExaminer = eventExaminer;
-      this.bufferedEventsSpec = StateSpecs.orderedList(eventCoder);
-      this.mutableStateSpec = StateSpecs.value(stateCoder);
-      this.processingStateSpec = StateSpecs.value(ProcessingStateCoder.of(keyCoder));
-      this.windowClosedSpec = StateSpecs.value(BooleanCoder.of());
-      this.mainOutputTupleTag = mainOutputTupleTag;
-      this.statusTupleTag = statusTupleTag;
-      this.unprocessedEventsTupleTag = unprocessedEventTupleTag;
-      this.statusUpdateFrequency = statusUpdateFrequency;
-      this.produceStatusUpdateOnEveryEvent = produceStatusUpdateOnEveryEvent;
-      this.maxNumberOfResultsToProduce = maxNumberOfResultsToProduce;
-    }
-
-    @StartBundle
-    public void onBundleStart() {
-      numberOfResultsBeforeBundleStart = null;
-    }
-
-    @FinishBundle
-    public void onBundleFinish() {
-      // This might be necessary because this field is also used in a Timer
-      numberOfResultsBeforeBundleStart = null;
-    }
+  static class ToTimestampedEventConverter<EventKeyT, EventT>
+      extends DoFn<
+          KV<EventKeyT, KV<Long, EventT>>, TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>> {
 
     @ProcessElement
-    public void processElement(
-        @StateId(BUFFERED_EVENTS) OrderedListState<EventTypeT> bufferedEventsState,
-        @AlwaysFetched @StateId(PROCESSING_STATE)
-            ValueState<ProcessingState<EventKeyTypeT>> processingStateState,
-        @StateId(MUTABLE_STATE) ValueState<StateTypeT> mutableStateState,
-        @TimerId(STATUS_EMISSION_TIMER) Timer statusEmissionTimer,
-        @TimerId(LARGE_BATCH_EMISSION_TIMER) Timer largeBatchEmissionTimer,
-        @Element KV<EventKeyTypeT, KV<Long, EventTypeT>> eventAndSequence,
-        MultiOutputReceiver outputReceiver,
-        BoundedWindow window) {
-
-      EventKeyTypeT key = eventAndSequence.getKey();
-      long sequence = eventAndSequence.getValue().getKey();
-      EventTypeT event = eventAndSequence.getValue().getValue();
-
-      ProcessingState<EventKeyTypeT> processingState = processingStateState.read();
-
-      if (processingState == null) {
-        // This is the first time we see this key/window pair
-        processingState = new ProcessingState<>(key);
-        if (statusUpdateFrequency != null) {
-          // Set up the timer to produce the status of the processing on a regular basis
-          statusEmissionTimer.offset(statusUpdateFrequency).setRelative();
-        }
-      }
-
-      if (numberOfResultsBeforeBundleStart == null) {
-        // Per key processing is synchronized by Beam. There is no need to have it here.
-        numberOfResultsBeforeBundleStart = processingState.getResultCount();
-      }
-
-      processingState.eventReceived();
-
-      StateTypeT state =
-          processNewEvent(
-              sequence,
-              event,
-              processingState,
-              mutableStateState,
-              bufferedEventsState,
-              outputReceiver);
-
-      processBufferedEvents(
-          processingState, state, bufferedEventsState, outputReceiver, largeBatchEmissionTimer);
-
-      saveStates(
-          processingStateState,
-          processingState,
-          mutableStateState,
-          state,
-          outputReceiver,
-          window.maxTimestamp());
-
-      checkIfProcessingIsCompleted(processingState);
-    }
-
-    private boolean checkIfProcessingIsCompleted(ProcessingState<EventKeyTypeT> processingState) {
-      boolean result = processingState.isProcessingCompleted();
-      if (result) {
-        LOG.info("Processing for key '" + processingState.getKey() + "' is completed.");
-      }
-      return result;
-    }
-
-    private void saveStates(
-        ValueState<ProcessingState<EventKeyTypeT>> processingStatusState,
-        ProcessingState<EventKeyTypeT> processingStatus,
-        ValueState<StateTypeT> currentStateState,
-        StateTypeT state,
-        MultiOutputReceiver outputReceiver,
-        Instant windowTimestamp) {
-      // There is always a change to the processing status
-      processingStatusState.write(processingStatus);
-
-      // Stored state may not have changes if the element was out of sequence.
-      if (state != null) {
-        currentStateState.write(state);
-      }
-
-      if (produceStatusUpdateOnEveryEvent) {
-        // During pipeline draining the window timestamp is set to a large value in the future.
-        // Producing an event before that results in error, that's why this logic exist.
-        Instant statusTimestamp = windowTimestamp;
-
-        emitProcessingStatus(processingStatus, outputReceiver, statusTimestamp);
-      }
-    }
-
-    private void emitProcessingStatus(
-        ProcessingState<EventKeyTypeT> processingState,
-        MultiOutputReceiver outputReceiver,
-        Instant statusTimestamp) {
-      outputReceiver
-          .get(statusTupleTag)
-          .outputWithTimestamp(
-              KV.of(
-                  processingState.getKey(),
-                  OrderedProcessingStatus.create(
-                      processingState.getLastOutputSequence(),
-                      processingState.getBufferedEventCount(),
-                      processingState.getEarliestBufferedSequence(),
-                      processingState.getLatestBufferedSequence(),
-                      processingState.getEventsReceived(),
-                      processingState.getResultCount(),
-                      processingState.getDuplicates(),
-                      processingState.isLastEventReceived())),
-              statusTimestamp);
-    }
-
-    /**
-     * Process the just received event.
-     *
-     * @return newly created or updated State. If null is returned - the event wasn't processed.
-     */
-    private StateTypeT processNewEvent(
-        long currentSequence,
-        EventTypeT currentEvent,
-        ProcessingState<EventKeyTypeT> processingState,
-        ValueState<StateTypeT> currentStateState,
-        OrderedListState<EventTypeT> bufferedEventsState,
-        MultiOutputReceiver outputReceiver) {
-      if (currentSequence == Long.MAX_VALUE) {
-        // OrderedListState can't handle the timestamp based on MAX_VALUE.
-        // To avoid exceptions, we DLQ this event.
-        outputReceiver
-            .get(unprocessedEventsTupleTag)
-            .output(
-                KV.of(
-                    processingState.getKey(),
-                    KV.of(
-                        currentSequence,
-                        UnprocessedEvent.create(
-                            currentEvent, Reason.sequence_id_outside_valid_range))));
-        return null;
-      }
-
-      if (processingState.hasAlreadyBeenProcessed(currentSequence)) {
-        outputReceiver
-            .get(unprocessedEventsTupleTag)
-            .output(
-                KV.of(
-                    processingState.getKey(),
-                    KV.of(
-                        currentSequence, UnprocessedEvent.create(currentEvent, Reason.duplicate))));
-        return null;
-      }
-
-      StateTypeT state;
-      boolean thisIsTheLastEvent = eventExaminer.isLastEvent(currentSequence, currentEvent);
-      if (eventExaminer.isInitialEvent(currentSequence, currentEvent)) {
-        // First event of the key/window
-        // What if it's a duplicate event - it will reset everything. Shall we drop/DLQ anything
-        // that's before the processingState.lastOutputSequence?
-        state = eventExaminer.createStateOnInitialEvent(currentEvent);
-
-        processingState.eventAccepted(currentSequence, thisIsTheLastEvent);
-
-        ResultTypeT result = state.produceResult();
-        if (result != null) {
-          outputReceiver.get(mainOutputTupleTag).output(KV.of(processingState.getKey(), result));
-          processingState.resultProduced();
-        }
-
-        // Nothing else to do. We will attempt to process buffered events later.
-        return state;
-      }
-
-      if (processingState.isNextEvent(currentSequence)) {
-        // Event matches expected sequence
-        state = currentStateState.read();
-
-        try {
-          state.mutate(currentEvent);
-        } catch (Exception e) {
-          outputReceiver
-              .get(unprocessedEventsTupleTag)
-              .output(
-                  KV.of(
-                      processingState.getKey(),
-                      KV.of(currentSequence, UnprocessedEvent.create(currentEvent, e))));
-          return null;
-        }
-
-        ResultTypeT result = state.produceResult();
-        if (result != null) {
-          outputReceiver.get(mainOutputTupleTag).output(KV.of(processingState.getKey(), result));
-          processingState.resultProduced();
-        }
-        processingState.eventAccepted(currentSequence, thisIsTheLastEvent);
-
-        return state;
-      }
-
-      // Event is not ready to be processed yet
-      Instant eventTimestamp = Instant.ofEpochMilli(currentSequence);
-      bufferedEventsState.add(TimestampedValue.of(currentEvent, eventTimestamp));
-      processingState.eventBuffered(currentSequence, thisIsTheLastEvent);
-
-      // This will signal that the state hasn't been mutated and we don't need to save it.
-      return null;
-    }
-
-    /** Process buffered events. */
-    private void processBufferedEvents(
-        ProcessingState<EventKeyTypeT> processingState,
-        StateTypeT state,
-        OrderedListState<EventTypeT> bufferedEventsState,
-        MultiOutputReceiver outputReceiver,
-        Timer largeBatchEmissionTimer) {
-      if (state == null) {
-        // Only when the current event caused a state mutation and the state is passed to this
-        // method should we attempt to process buffered events
-        return;
-      }
-
-      if (!processingState.readyToProcessBufferedEvents()) {
-        return;
-      }
-
-      if (reachedMaxResultCountForBundle(processingState, largeBatchEmissionTimer)) {
-        // No point in trying to process buffered events
-        return;
-      }
-
-      Instant startRange = Instant.ofEpochMilli(processingState.getEarliestBufferedSequence());
-      Instant endRange = Instant.ofEpochMilli(processingState.getLatestBufferedSequence() + 1);
-      Instant endClearRange = null;
-
-      // readRange is efficiently implemented and will bring records in batches
-      Iterable<TimestampedValue<EventTypeT>> events =
-          bufferedEventsState.readRange(startRange, endRange);
-
-      Iterator<TimestampedValue<EventTypeT>> bufferedEventsIterator = events.iterator();
-      while (bufferedEventsIterator.hasNext()) {
-        TimestampedValue<EventTypeT> timestampedEvent = bufferedEventsIterator.next();
-        Instant eventTimestamp = timestampedEvent.getTimestamp();
-        long eventSequence = eventTimestamp.getMillis();
-
-        EventTypeT bufferedEvent = timestampedEvent.getValue();
-        if (processingState.checkForDuplicateBatchedEvent(eventSequence)) {
-          outputReceiver
-              .get(unprocessedEventsTupleTag)
-              .output(
-                  KV.of(
-                      processingState.getKey(),
-                      KV.of(
-                          eventSequence,
-                          UnprocessedEvent.create(bufferedEvent, Reason.duplicate))));
-          continue;
-        }
-
-        if (eventSequence > processingState.getLastOutputSequence() + 1) {
-          processingState.foundSequenceGap(eventSequence);
-          // Records will be cleared up to this element
-          endClearRange = Instant.ofEpochMilli(eventSequence);
-          break;
-        }
-
-        // This check needs to be done after we checked for sequence gap and before we
-        // attempt to process the next element which can result in a new result.
-        if (reachedMaxResultCountForBundle(processingState, largeBatchEmissionTimer)) {
-          endClearRange = Instant.ofEpochMilli(eventSequence);
-          break;
-        }
-
-        try {
-          state.mutate(bufferedEvent);
-        } catch (Exception e) {
-          outputReceiver
-              .get(unprocessedEventsTupleTag)
-              .output(
-                  KV.of(
-                      processingState.getKey(),
-                      KV.of(eventSequence, UnprocessedEvent.create(bufferedEvent, e))));
-          // There is a chance that the next event will have the same sequence number and will
-          // process successfully.
-          continue;
-        }
-
-        ResultTypeT result = state.produceResult();
-        if (result != null) {
-          outputReceiver.get(mainOutputTupleTag).output(KV.of(processingState.getKey(), result));
-          processingState.resultProduced();
-        }
-        processingState.processedBufferedEvent(eventSequence);
-        // Remove this record also
-        endClearRange = Instant.ofEpochMilli(eventSequence + 1);
-      }
-
-      bufferedEventsState.clearRange(startRange, endClearRange);
-    }
-
-    private boolean reachedMaxResultCountForBundle(
-        ProcessingState<EventKeyTypeT> processingState, Timer largeBatchEmissionTimer) {
-      boolean exceeded =
-          processingState.resultsProducedInBundle(numberOfResultsBeforeBundleStart)
-              >= maxNumberOfResultsToProduce;
-      if (exceeded) {
-        LOG.info(
-            "Setting the timer to output next batch of events for key '"
-                + processingState.getKey()
-                + "'");
-        // See GroupIntoBatches for examples on how to hold the timestamp.
-        // TODO: test that on draining the pipeline all the results are still produced correctly.
-        // See: https://github.com/apache/beam/issues/30781
-        largeBatchEmissionTimer.offset(Duration.millis(1)).setRelative();
-      }
-      return exceeded;
-    }
-
-    @OnTimer(LARGE_BATCH_EMISSION_TIMER)
-    public void onBatchEmission(
-        OnTimerContext context,
-        @StateId(BUFFERED_EVENTS) OrderedListState<EventTypeT> bufferedEventsState,
-        @AlwaysFetched @StateId(PROCESSING_STATE)
-            ValueState<ProcessingState<EventKeyTypeT>> processingStatusState,
-        @AlwaysFetched @StateId(MUTABLE_STATE) ValueState<StateTypeT> currentStateState,
-        @TimerId(LARGE_BATCH_EMISSION_TIMER) Timer largeBatchEmissionTimer,
-        MultiOutputReceiver outputReceiver) {
-      ProcessingState<EventKeyTypeT> processingState = processingStatusState.read();
-      if (processingState == null) {
-        LOG.warn("Processing state is empty. Ignore it if the pipeline is being cancelled.");
-        return;
-      }
-      StateTypeT state = currentStateState.read();
-      if (state == null) {
-        LOG.warn("Mutable state is empty. Ignore it if the pipeline is being cancelled.");
-        return;
-      }
-
-      LOG.debug("Starting to process batch for key '" + processingState.getKey() + "'");
-
-      this.numberOfResultsBeforeBundleStart = processingState.getResultCount();
-
-      processBufferedEvents(
-          processingState, state, bufferedEventsState, outputReceiver, largeBatchEmissionTimer);
-
-      saveStates(
-          processingStatusState,
-          processingState,
-          currentStateState,
-          state,
-          outputReceiver,
-          // TODO: validate that this is correct.
-          context.window().maxTimestamp());
-
-      checkIfProcessingIsCompleted(processingState);
-    }
-
-    @OnTimer(STATUS_EMISSION_TIMER)
-    @SuppressWarnings("unused")
-    public void onStatusEmission(
-        MultiOutputReceiver outputReceiver,
-        @TimerId(STATUS_EMISSION_TIMER) Timer statusEmissionTimer,
-        @StateId(WINDOW_CLOSED) ValueState<Boolean> windowClosedState,
-        @StateId(PROCESSING_STATE)
-            ValueState<ProcessingState<EventKeyTypeT>> processingStateState) {
-
-      ProcessingState<EventKeyTypeT> currentState = processingStateState.read();
-      if (currentState == null) {
-        // This could happen if the state has been purged already during the draining.
-        // It means that there is nothing that we can do and we just need to return.
-        LOG.warn(
-            "Current processing state is null in onStatusEmission() - most likely the pipeline is shutting down.");
-        return;
-      }
-
-      emitProcessingStatus(currentState, outputReceiver, Instant.now());
-
-      Boolean windowClosed = windowClosedState.read();
-      if (!currentState.isProcessingCompleted()
-          // Stop producing statuses if we are finished for a particular key
-          && (windowClosed == null || !windowClosed)) {
-        statusEmissionTimer.offset(statusUpdateFrequency).setRelative();
-      }
-    }
-
-    @OnWindowExpiration
-    public void onWindowExpiration(@StateId(WINDOW_CLOSED) ValueState<Boolean> windowClosedState) {
-      windowClosedState.write(true);
+    public void convert(
+        @Element KV<EventKeyT, KV<Long, EventT>> element,
+        @Timestamp Instant timestamp,
+        OutputReceiver<TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>> outputReceiver) {
+      outputReceiver.output(TimestampedValue.of(element, timestamp));
     }
   }
 }
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorResult.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorResult.java
index f61df6254b253..48b9fafc99af7 100644
--- a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorResult.java
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorResult.java
@@ -18,10 +18,12 @@
 package org.apache.beam.sdk.extensions.ordered;
 
 import java.util.Map;
+import javax.annotation.Nullable;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.PInput;
 import org.apache.beam.sdk.values.POutput;
 import org.apache.beam.sdk.values.PValue;
@@ -29,10 +31,15 @@
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
 
 /**
- * The result of the ordered processing. Two PCollections are returned:
+ * The result of the ordered processing. Three PCollections are returned:
  * <li>output - the key/value of the mutated states
+ * <li>unprocessedEvents - the key/value of the events that failed to be processed and the failure
+ *     reason
  * <li>processingStatuses - the key/value of the status of processing for a particular key
  *
+ *     <p>In case of global sequence processing, the result also contains PCollectionView of the
+ *     latest contiguous sequence range
+ *
  * @param <KeyT>
  * @param <ResultT>
  */
@@ -48,6 +55,8 @@ public class OrderedEventProcessorResult<KeyT, ResultT, EventT> implements POutp
       unprocessedEventPCollection;
   private final TupleTag<KV<KeyT, KV<Long, UnprocessedEvent<EventT>>>> unprocessedEventTupleTag;
 
+  private final @Nullable PCollectionView<ContiguousSequenceRange> latestContiguousRange;
+
   OrderedEventProcessorResult(
       Pipeline pipeline,
       PCollection<KV<KeyT, ResultT>> outputPCollection,
@@ -57,6 +66,27 @@ public class OrderedEventProcessorResult<KeyT, ResultT, EventT> implements POutp
       PCollection<KV<KeyT, KV<Long, UnprocessedEvent<EventT>>>> unprocessedEventPCollection,
       TupleTag<KV<KeyT, KV<Long, UnprocessedEvent<EventT>>>> unprocessedEventTupleTag) {
 
+    this(
+        pipeline,
+        outputPCollection,
+        outputPCollectionTupleTag,
+        eventProcessingStatusPCollection,
+        eventProcessingStatusTupleTag,
+        unprocessedEventPCollection,
+        unprocessedEventTupleTag,
+        null);
+  }
+
+  OrderedEventProcessorResult(
+      Pipeline pipeline,
+      PCollection<KV<KeyT, ResultT>> outputPCollection,
+      TupleTag<KV<KeyT, ResultT>> outputPCollectionTupleTag,
+      PCollection<KV<KeyT, OrderedProcessingStatus>> eventProcessingStatusPCollection,
+      TupleTag<KV<KeyT, OrderedProcessingStatus>> eventProcessingStatusTupleTag,
+      PCollection<KV<KeyT, KV<Long, UnprocessedEvent<EventT>>>> unprocessedEventPCollection,
+      TupleTag<KV<KeyT, KV<Long, UnprocessedEvent<EventT>>>> unprocessedEventTupleTag,
+      @Nullable PCollectionView<ContiguousSequenceRange> latestContiguousRange) {
+
     this.pipeline = pipeline;
     this.outputPCollection = outputPCollection;
     this.outputPCollectionTupleTag = outputPCollectionTupleTag;
@@ -64,6 +94,7 @@ public class OrderedEventProcessorResult<KeyT, ResultT, EventT> implements POutp
     this.eventProcessingStatusTupleTag = eventProcessingStatusTupleTag;
     this.unprocessedEventPCollection = unprocessedEventPCollection;
     this.unprocessedEventTupleTag = unprocessedEventTupleTag;
+    this.latestContiguousRange = latestContiguousRange;
   }
 
   private final Pipeline pipeline;
@@ -104,4 +135,8 @@ public PCollection<KV<KeyT, ResultT>> output() {
   public PCollection<KV<KeyT, KV<Long, UnprocessedEvent<EventT>>>> unprocessedEvents() {
     return unprocessedEventPCollection;
   }
+
+  public @Nullable PCollectionView<ContiguousSequenceRange> latestContiguousRange() {
+    return latestContiguousRange;
+  }
 }
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedProcessingHandler.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedProcessingHandler.java
index 444fdb118091b..d8ad13330a1a9 100644
--- a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedProcessingHandler.java
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedProcessingHandler.java
@@ -22,7 +22,11 @@
 import org.apache.beam.sdk.coders.CannotProvideCoderException;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.extensions.ordered.combiner.DefaultSequenceCombiner;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.Combine.GloballyAsSingletonView;
 import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.TimestampedValue;
 import org.checkerframework.checker.nullness.qual.NonNull;
 import org.checkerframework.checker.nullness.qual.Nullable;
 import org.joda.time.Duration;
@@ -30,6 +34,11 @@
 /**
  * Parent class for Ordered Processing configuration handlers.
  *
+ * <p>There are two types of processing - when the sequence numbers are contiguous per key and these
+ * sequences per keys are independent of each other, and when there is a global sequence shared by
+ * all keys. In case of the global sequence processing the custom handler must extend from {@see
+ * OrderedProcessingGlobalSequenceHandler}.
+ *
  * @param <EventT> type of events to be processed
  * @param <KeyT> type of keys which will be used to group the events
  * @param <StateT> type of internal State which will be used for processing
@@ -217,4 +226,75 @@ public int getMaxOutputElementsPerBundle() {
   public void setMaxOutputElementsPerBundle(int maxOutputElementsPerBundle) {
     this.maxOutputElementsPerBundle = maxOutputElementsPerBundle;
   }
+
+  /**
+   * Parent class for Ordered Processing configuration handlers to handle processing of the events
+   * where global sequence is used.
+   *
+   * @param <EventT> type of events to be processed
+   * @param <KeyT> type of keys which will be used to group the events
+   * @param <StateT> type of internal State which will be used for processing
+   * @param <ResultT> type of the result of the processing which will be output
+   */
+  public abstract static class OrderedProcessingGlobalSequenceHandler<
+          EventT, KeyT, StateT extends MutableState<EventT, ?>, ResultT>
+      extends OrderedProcessingHandler<EventT, KeyT, StateT, ResultT> {
+
+    public OrderedProcessingGlobalSequenceHandler(
+        Class<EventT> eventTClass,
+        Class<KeyT> keyTClass,
+        Class<StateT> stateTClass,
+        Class<ResultT> resultTClass) {
+      super(eventTClass, keyTClass, stateTClass, resultTClass);
+    }
+
+    /**
+     * Provide the global sequence combiner. Default is to use {@link DefaultSequenceCombiner}.
+     *
+     * @return combiner
+     */
+    public GloballyAsSingletonView<
+            TimestampedValue<KV<KeyT, KV<Long, EventT>>>, ContiguousSequenceRange>
+        getGlobalSequenceCombiner() {
+      return Combine.globally(new DefaultSequenceCombiner<KeyT, EventT, StateT>(getEventExaminer()))
+          .asSingletonView();
+    }
+
+    /**
+     * How frequently the combiner should reevaluate the maximum range? This parameter only affects
+     * the behaviour of streaming pipelines.
+     *
+     * <p>This parameter is used together with {@link
+     * OrderedProcessingGlobalSequenceHandler#getMaxElementCountToTriggerContinuousSequenceRangeReevaluation()}.
+     * The re-evaluation will occur as soon as the number of new elements exceeds the threshold or
+     * the time exceeds the frequency.
+     *
+     * <p>Notice that some runners cache the output of side inputs and this parameter might not
+     * appear to have an effect unless the cache time-to-live is equal or less than this frequency.
+     * For Dataflow runner, see {@link <a
+     * href="https://beam.apache.org/releases/javadoc/current/org/apache/beam/runners/dataflow/options/DataflowStreamingPipelineOptions.html#getStreamingSideInputCacheExpirationMillis--">this
+     * Dataflow streaming pipeline option</a>}
+     *
+     * @return frequency of reevaluating the {@link ContiguousSequenceRange}. Default - every
+     *     second.
+     * @see
+     *     OrderedProcessingGlobalSequenceHandler#getMaxElementCountToTriggerContinuousSequenceRangeReevaluation()
+     */
+    public Duration getContiguousSequenceRangeReevaluationFrequency() {
+      return Duration.standardSeconds(1);
+    }
+
+    /**
+     * Number of new elements to trigger the re-evaluation.
+     *
+     * <p>See {@link
+     * OrderedProcessingGlobalSequenceHandler#getContiguousSequenceRangeReevaluationFrequency()} for
+     * additional details.
+     *
+     * @return batch size. Default - 1000.
+     */
+    public int getMaxElementCountToTriggerContinuousSequenceRangeReevaluation() {
+      return 1000;
+    }
+  }
 }
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedProcessingStatus.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedProcessingStatus.java
index 6659bd2e2b922..7a556de1017b7 100644
--- a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedProcessingStatus.java
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/OrderedProcessingStatus.java
@@ -30,16 +30,16 @@
 public abstract class OrderedProcessingStatus {
 
   public static OrderedProcessingStatus create(
-      Long lastOutputSequence,
+      @Nullable Long lastProcessedSequence,
       long numberOfBufferedEvents,
-      Long earliestBufferedSequence,
-      Long latestBufferedSequence,
+      @Nullable Long earliestBufferedSequence,
+      @Nullable Long latestBufferedSequence,
       long numberOfReceivedEvents,
       long resultCount,
       long duplicateCount,
       boolean lastEventReceived) {
     return new AutoValue_OrderedProcessingStatus.Builder()
-        .setLastProcessedSequence(lastOutputSequence)
+        .setLastProcessedSequence(lastProcessedSequence)
         .setNumberOfBufferedEvents(numberOfBufferedEvents)
         .setEarliestBufferedSequence(earliestBufferedSequence)
         .setLatestBufferedSequence(latestBufferedSequence)
@@ -55,8 +55,7 @@ public static OrderedProcessingStatus create(
    * @return Last sequence processed. If null is returned - no elements for the given key and window
    *     have been processed yet.
    */
-  @Nullable
-  public abstract Long getLastProcessedSequence();
+  public abstract @Nullable Long getLastProcessedSequence();
 
   /** @return Number of events received out of sequence and buffered. */
   public abstract long getNumberOfBufferedEvents();
@@ -129,13 +128,13 @@ public final int hashCode() {
   @AutoValue.Builder
   public abstract static class Builder {
 
-    public abstract Builder setLastProcessedSequence(Long value);
+    public abstract Builder setLastProcessedSequence(@Nullable Long value);
 
     public abstract Builder setNumberOfBufferedEvents(long value);
 
-    public abstract Builder setEarliestBufferedSequence(Long value);
+    public abstract Builder setEarliestBufferedSequence(@Nullable Long value);
 
-    public abstract Builder setLatestBufferedSequence(Long value);
+    public abstract Builder setLatestBufferedSequence(@Nullable Long value);
 
     public abstract Builder setNumberOfReceivedEvents(long value);
 
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/PerKeyTickerGenerator.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/PerKeyTickerGenerator.java
new file mode 100644
index 0000000000000..a18ba53f5266c
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/PerKeyTickerGenerator.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered;
+
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.NullableCoder;
+import org.apache.beam.sdk.coders.VarLongCoder;
+import org.apache.beam.sdk.state.StateSpec;
+import org.apache.beam.sdk.state.StateSpecs;
+import org.apache.beam.sdk.state.TimeDomain;
+import org.apache.beam.sdk.state.Timer;
+import org.apache.beam.sdk.state.TimerSpec;
+import org.apache.beam.sdk.state.TimerSpecs;
+import org.apache.beam.sdk.state.ValueState;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
+import org.joda.time.Duration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * PTransform to generate per key tickers with certain frequency.
+ *
+ * @param <EventKeyT>
+ * @param <EventT>
+ */
+class PerKeyTickerGenerator<EventKeyT, EventT>
+    extends PTransform<
+        PCollection<KV<EventKeyT, KV<Long, EventT>>>,
+        PCollection<KV<EventKeyT, KV<Long, EventT>>>> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(PerKeyTickerGenerator.class);
+
+  private final Coder<EventKeyT> eventKeyCoder;
+  private final Coder<EventT> eventCoder;
+  private final Duration tickerFrequency;
+
+  PerKeyTickerGenerator(
+      Coder<EventKeyT> eventKeyCoder, Coder<EventT> eventCoder, Duration tickerFrequency) {
+    this.eventKeyCoder = eventKeyCoder;
+    this.eventCoder = eventCoder;
+    this.tickerFrequency = tickerFrequency;
+  }
+
+  @Override
+  public @UnknownKeyFor @NonNull @Initialized PCollection<KV<EventKeyT, KV<Long, EventT>>> expand(
+      PCollection<KV<EventKeyT, KV<Long, EventT>>> input) {
+    return input
+        .apply(
+            "Generate Tickers",
+            ParDo.of(new PerKeyTickerGeneratorDoFn<>(eventKeyCoder, tickerFrequency)))
+        .setCoder(
+            KvCoder.of(eventKeyCoder, KvCoder.of(VarLongCoder.of(), NullableCoder.of(eventCoder))));
+  }
+
+  static class PerKeyTickerGeneratorDoFn<EventKeyT, EventT>
+      extends DoFn<KV<EventKeyT, KV<Long, EventT>>, KV<EventKeyT, KV<Long, EventT>>> {
+
+    private static final String STATE = "state";
+    private static final String TIMER = "timer";
+
+    @StateId(STATE)
+    @SuppressWarnings("unused")
+    private final StateSpec<ValueState<EventKeyT>> stateSpec;
+
+    @TimerId(TIMER)
+    @SuppressWarnings("unused")
+    private final TimerSpec tickerTimer = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
+
+    private final Duration tickerFrequency;
+
+    PerKeyTickerGeneratorDoFn(Coder<EventKeyT> keyCoder, Duration tickerFrequency) {
+      stateSpec = StateSpecs.value(keyCoder);
+      this.tickerFrequency = tickerFrequency;
+    }
+
+    @ProcessElement
+    public void process(
+        @Element KV<EventKeyT, KV<Long, EventT>> element,
+        @AlwaysFetched @StateId(STATE) ValueState<EventKeyT> state,
+        @TimerId(TIMER) Timer tickerTimer) {
+      @Nullable EventKeyT keyValue = state.read();
+      if (keyValue != null) {
+        return;
+      }
+
+      tickerTimer.offset(tickerFrequency).setRelative();
+
+      state.write(element.getKey());
+    }
+
+    @OnTimer(TIMER)
+    public void onTimer(
+        @StateId(STATE) ValueState<EventKeyT> state,
+        @TimerId(TIMER) Timer tickerTimer,
+        OutputReceiver<KV<EventKeyT, KV<Long, EventT>>> outputReceiver) {
+
+      @Nullable EventKeyT key = state.read();
+      if (key == null) {
+        LOG.error("Expected to get the key from the state, but got null");
+        return;
+      }
+
+      // Null value will be an indicator to the main transform that the element is a ticker
+      outputReceiver.output(KV.of(key, KV.of(0L, null)));
+      tickerTimer.offset(tickerFrequency).setRelative();
+    }
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ProcessingState.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ProcessingState.java
index 4b591a37faab8..425eb4444a634 100644
--- a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ProcessingState.java
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ProcessingState.java
@@ -51,6 +51,8 @@ class ProcessingState<KeyT> {
 
   private long resultCount;
 
+  @Nullable private ContiguousSequenceRange lastCompleteGlobalSequence;
+
   private KeyT key;
 
   public ProcessingState(KeyT key) {
@@ -59,6 +61,7 @@ public ProcessingState(KeyT key) {
     this.lastOutputSequence = null;
     this.earliestBufferedSequence = null;
     this.latestBufferedSequence = null;
+    this.lastCompleteGlobalSequence = null;
   }
 
   /**
@@ -130,6 +133,15 @@ public KeyT getKey() {
     return key;
   }
 
+  public @Nullable ContiguousSequenceRange getLastContiguousRange() {
+    return lastCompleteGlobalSequence;
+  }
+
+  public void setLastCompleteGlobalSequence(
+      @Nullable ContiguousSequenceRange lastCompleteGlobalSequence) {
+    this.lastCompleteGlobalSequence = lastCompleteGlobalSequence;
+  }
+
   /**
    * Current event matched the sequence and was processed.
    *
@@ -229,6 +241,32 @@ public int hashCode() {
         key);
   }
 
+  @Override
+  public String toString() {
+    return "ProcessingState{"
+        + "lastOutputSequence="
+        + lastOutputSequence
+        + ", latestBufferedSequence="
+        + latestBufferedSequence
+        + ", earliestBufferedSequence="
+        + earliestBufferedSequence
+        + ", bufferedEventCount="
+        + bufferedEventCount
+        + ", lastEventReceived="
+        + lastEventReceived
+        + ", eventsReceived="
+        + eventsReceived
+        + ", duplicates="
+        + duplicates
+        + ", resultCount="
+        + resultCount
+        + ", lastCompleteGlobalSequence="
+        + lastCompleteGlobalSequence
+        + ", key="
+        + key
+        + '}';
+  }
+
   public boolean isProcessingCompleted() {
     return lastEventReceived && bufferedEventCount == 0;
   }
@@ -274,6 +312,23 @@ public long resultsProducedInBundle(long numberOfResultsBeforeBundleStart) {
     return resultCount - numberOfResultsBeforeBundleStart;
   }
 
+  public void updateGlobalSequenceDetails(ContiguousSequenceRange updated) {
+    if (thereAreGloballySequencedEventsToBeProcessed()) {
+      // We don't update the timer if we can already process events in the onTimer batch.
+      // Otherwise, it's possible that we will be pushing the timer to later timestamps
+      // without a chance to run and produce output.
+      return;
+    }
+    this.lastCompleteGlobalSequence = updated;
+  }
+
+  public boolean thereAreGloballySequencedEventsToBeProcessed() {
+    return bufferedEventCount > 0
+        && lastCompleteGlobalSequence != null
+        && earliestBufferedSequence != null
+        && earliestBufferedSequence < lastCompleteGlobalSequence.getEnd();
+  }
+
   /**
    * Coder for the processing status.
    *
@@ -287,6 +342,9 @@ static class ProcessingStateCoder<KeyT> extends Coder<ProcessingState<KeyT>> {
     private static final VarIntCoder INTEGER_CODER = VarIntCoder.of();
     private static final BooleanCoder BOOLEAN_CODER = BooleanCoder.of();
 
+    private static final NullableCoder<ContiguousSequenceRange> SEQUENCE_AND_TIMESTAMP_CODER =
+        NullableCoder.of(ContiguousSequenceRange.CompletedSequenceRangeCoder.of());
+
     private Coder<KeyT> keyCoder;
 
     private ProcessingStateCoder(Coder<KeyT> keyCoder) {
@@ -308,6 +366,7 @@ public void encode(ProcessingState<KeyT> value, OutputStream outStream) throws I
       LONG_CODER.encode(value.getResultCount(), outStream);
       BOOLEAN_CODER.encode(value.isLastEventReceived(), outStream);
       keyCoder.encode(value.getKey(), outStream);
+      SEQUENCE_AND_TIMESTAMP_CODER.encode(value.getLastContiguousRange(), outStream);
     }
 
     @Override
@@ -321,17 +380,23 @@ public ProcessingState<KeyT> decode(InputStream inStream) throws IOException {
       long resultCount = LONG_CODER.decode(inStream);
       boolean isLastEventReceived = BOOLEAN_CODER.decode(inStream);
       KeyT key = keyCoder.decode(inStream);
-
-      return new ProcessingState<>(
-          key,
-          lastOutputSequence,
-          earliestBufferedSequence,
-          latestBufferedSequence,
-          bufferedRecordCount,
-          recordsReceivedCount,
-          duplicates,
-          resultCount,
-          isLastEventReceived);
+      ContiguousSequenceRange lastCompleteGlobalSequence =
+          SEQUENCE_AND_TIMESTAMP_CODER.decode(inStream);
+
+      ProcessingState<KeyT> result =
+          new ProcessingState<>(
+              key,
+              lastOutputSequence,
+              earliestBufferedSequence,
+              latestBufferedSequence,
+              bufferedRecordCount,
+              recordsReceivedCount,
+              duplicates,
+              resultCount,
+              isLastEventReceived);
+      result.setLastCompleteGlobalSequence(lastCompleteGlobalSequence);
+
+      return result;
     }
 
     @Override
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ProcessorDoFn.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ProcessorDoFn.java
new file mode 100644
index 0000000000000..a05b0829074af
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/ProcessorDoFn.java
@@ -0,0 +1,427 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered;
+
+import java.util.Iterator;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.extensions.ordered.UnprocessedEvent.Reason;
+import org.apache.beam.sdk.state.OrderedListState;
+import org.apache.beam.sdk.state.Timer;
+import org.apache.beam.sdk.state.ValueState;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.TimestampedValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Base DoFn for processing ordered events.
+ *
+ * @param <EventT> type of the events to process
+ * @param <EventKeyT> event key type
+ * @param <StateT> state type
+ */
+abstract class ProcessorDoFn<
+        EventT, EventKeyT, ResultT, StateT extends MutableState<EventT, ResultT>>
+    extends DoFn<KV<EventKeyT, KV<Long, EventT>>, KV<EventKeyT, ResultT>> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(ProcessorDoFn.class);
+
+  protected static final String PROCESSING_STATE = "processingState";
+  protected static final String MUTABLE_STATE = "mutableState";
+
+  protected static final String STATUS_EMISSION_TIMER = "statusTimer";
+  protected static final String WINDOW_CLOSED = "windowClosed";
+  protected final EventExaminer<EventT, StateT> eventExaminer;
+
+  private final TupleTag<KV<EventKeyT, OrderedProcessingStatus>> statusTupleTag;
+  protected final Duration statusUpdateFrequency;
+
+  protected final TupleTag<KV<EventKeyT, ResultT>> mainOutputTupleTag;
+  protected final TupleTag<KV<EventKeyT, KV<Long, UnprocessedEvent<EventT>>>>
+      unprocessedEventsTupleTag;
+  private final boolean produceStatusUpdateOnEveryEvent;
+
+  private final long maxNumberOfResultsToProduce;
+
+  protected @Nullable Long numberOfResultsBeforeBundleStart = 0L;
+
+  ProcessorDoFn(
+      EventExaminer<EventT, StateT> eventExaminer,
+      TupleTag<KV<EventKeyT, ResultT>> mainOutputTupleTag,
+      TupleTag<KV<EventKeyT, OrderedProcessingStatus>> statusTupleTag,
+      Duration statusUpdateFrequency,
+      TupleTag<KV<EventKeyT, KV<Long, UnprocessedEvent<EventT>>>> unprocessedEventTupleTag,
+      boolean produceStatusUpdateOnEveryEvent,
+      long maxNumberOfResultsToProduce) {
+    this.eventExaminer = eventExaminer;
+
+    this.mainOutputTupleTag = mainOutputTupleTag;
+    this.statusTupleTag = statusTupleTag;
+    this.unprocessedEventsTupleTag = unprocessedEventTupleTag;
+    this.statusUpdateFrequency = statusUpdateFrequency;
+    this.produceStatusUpdateOnEveryEvent = produceStatusUpdateOnEveryEvent;
+    this.maxNumberOfResultsToProduce = maxNumberOfResultsToProduce;
+  }
+
+  @StartBundle
+  public void onBundleStart() {
+    numberOfResultsBeforeBundleStart = null;
+  }
+
+  @FinishBundle
+  public void onBundleFinish() {
+    // This might be necessary because this field is also used in a Timer
+    numberOfResultsBeforeBundleStart = null;
+  }
+
+  /** @return true if each event needs to be examined. */
+  abstract boolean checkForFirstOrLastEvent();
+
+  /**
+   * Process the just received event.
+   *
+   * @return newly created or updated State. If null is returned - the event wasn't processed.
+   */
+  protected @javax.annotation.Nullable StateT processNewEvent(
+      long currentSequence,
+      EventT currentEvent,
+      ProcessingState<EventKeyT> processingState,
+      ValueState<StateT> currentStateState,
+      OrderedListState<EventT> bufferedEventsState,
+      MultiOutputReceiver outputReceiver) {
+    if (currentSequence == Long.MAX_VALUE) {
+      // OrderedListState can't handle the timestamp based on MAX_VALUE.
+      // To avoid exceptions, we DLQ this event.
+      outputReceiver
+          .get(unprocessedEventsTupleTag)
+          .output(
+              KV.of(
+                  processingState.getKey(),
+                  KV.of(
+                      currentSequence,
+                      UnprocessedEvent.create(
+                          currentEvent, Reason.sequence_id_outside_valid_range))));
+      return null;
+    }
+
+    if (processingState.hasAlreadyBeenProcessed(currentSequence)) {
+      outputReceiver
+          .get(unprocessedEventsTupleTag)
+          .output(
+              KV.of(
+                  processingState.getKey(),
+                  KV.of(currentSequence, UnprocessedEvent.create(currentEvent, Reason.duplicate))));
+      return null;
+    }
+
+    StateT state;
+    boolean thisIsTheLastEvent =
+        checkForFirstOrLastEvent() && eventExaminer.isLastEvent(currentSequence, currentEvent);
+    if (checkForFirstOrLastEvent() && eventExaminer.isInitialEvent(currentSequence, currentEvent)) {
+      // First event of the key/window
+      // What if it's a duplicate event - it will reset everything. Shall we drop/DLQ anything
+      // that's before the processingState.lastOutputSequence?
+      state = eventExaminer.createStateOnInitialEvent(currentEvent);
+
+      processingState.eventAccepted(currentSequence, thisIsTheLastEvent);
+
+      ResultT result = state.produceResult();
+      if (result != null) {
+        outputReceiver.get(mainOutputTupleTag).output(KV.of(processingState.getKey(), result));
+        processingState.resultProduced();
+      }
+
+      // Nothing else to do. We will attempt to process buffered events later.
+      return state;
+    }
+
+    if (processingState.isNextEvent(currentSequence)) {
+      // Event matches expected sequence
+      state = currentStateState.read();
+      if (state == null) {
+        LOG.warn("Unexpectedly got an empty state. Most likely cause is pipeline drainage.");
+        return null;
+      }
+
+      try {
+        state.mutate(currentEvent);
+      } catch (Exception e) {
+        outputReceiver
+            .get(unprocessedEventsTupleTag)
+            .output(
+                KV.of(
+                    processingState.getKey(),
+                    KV.of(currentSequence, UnprocessedEvent.create(currentEvent, e))));
+        return null;
+      }
+
+      ResultT result = state.produceResult();
+      if (result != null) {
+        outputReceiver.get(mainOutputTupleTag).output(KV.of(processingState.getKey(), result));
+        processingState.resultProduced();
+      }
+      processingState.eventAccepted(currentSequence, thisIsTheLastEvent);
+
+      return state;
+    }
+
+    // Event is not ready to be processed yet
+    bufferEvent(
+        currentSequence, currentEvent, processingState, bufferedEventsState, thisIsTheLastEvent);
+
+    // This will signal that the state hasn't been mutated. We don't need to save it.
+    return null;
+  }
+
+  protected void saveStates(
+      ValueState<ProcessingState<EventKeyT>> processingStatusState,
+      ProcessingState<EventKeyT> processingStatus,
+      ValueState<StateT> currentStateState,
+      @Nullable StateT state,
+      MultiOutputReceiver outputReceiver,
+      Instant windowTimestamp) {
+    // There is always a change to the processing status
+    processingStatusState.write(processingStatus);
+
+    // Stored state may not have changes if the element was out of sequence.
+    if (state != null) {
+      currentStateState.write(state);
+    }
+
+    if (produceStatusUpdateOnEveryEvent) {
+      // During pipeline draining the window timestamp is set to a large value in the future.
+      // Producing an event before that results in error, that's why this logic exist.
+      Instant statusTimestamp = windowTimestamp;
+
+      emitProcessingStatus(processingStatus, outputReceiver, statusTimestamp);
+    }
+  }
+
+  void processStatusTimerEvent(
+      MultiOutputReceiver outputReceiver,
+      Timer statusEmissionTimer,
+      ValueState<Boolean> windowClosedState,
+      ValueState<ProcessingState<EventKeyT>> processingStateState) {
+    ProcessingState<EventKeyT> currentState = processingStateState.read();
+    if (currentState == null) {
+      // This could happen if the state has been purged already during the draining.
+      // It means that there is nothing that we can do.
+      LOG.warn(
+          "Current processing state is null in onStatusEmission() - most likely the pipeline is shutting down.");
+      return;
+    }
+
+    emitProcessingStatus(currentState, outputReceiver, Instant.now());
+
+    Boolean windowClosed = windowClosedState.read();
+    if (!currentState.isProcessingCompleted()
+        // Stop producing statuses if we are finished for a particular key
+        && (windowClosed == null || !windowClosed)) {
+      statusEmissionTimer.offset(statusUpdateFrequency).setRelative();
+    }
+  }
+
+  protected void emitProcessingStatus(
+      ProcessingState<EventKeyT> processingState,
+      MultiOutputReceiver outputReceiver,
+      Instant statusTimestamp) {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Emitting status for: " + processingState.getKey() + ", " + processingState);
+    }
+    outputReceiver
+        .get(statusTupleTag)
+        .outputWithTimestamp(
+            KV.of(
+                processingState.getKey(),
+                OrderedProcessingStatus.create(
+                    processingState.getLastOutputSequence(),
+                    processingState.getBufferedEventCount(),
+                    processingState.getEarliestBufferedSequence(),
+                    processingState.getLatestBufferedSequence(),
+                    processingState.getEventsReceived(),
+                    processingState.getResultCount(),
+                    processingState.getDuplicates(),
+                    processingState.isLastEventReceived())),
+            statusTimestamp);
+  }
+
+  protected boolean reachedMaxResultCountForBundle(
+      ProcessingState<EventKeyT> processingState, Timer largeBatchEmissionTimer) {
+    boolean exceeded =
+        processingState.resultsProducedInBundle(
+                numberOfResultsBeforeBundleStart == null ? 0 : numberOfResultsBeforeBundleStart)
+            >= maxNumberOfResultsToProduce;
+    if (exceeded) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(
+            "Setting the timer to output next batch of events for key '"
+                + processingState.getKey()
+                + "'");
+      }
+      // See GroupIntoBatches for examples on how to hold the timestamp.
+      // TODO: test that on draining the pipeline all the results are still produced correctly.
+      // See: https://github.com/apache/beam/issues/30781
+      largeBatchEmissionTimer.offset(Duration.millis(1)).setRelative();
+    }
+    return exceeded;
+  }
+
+  private void bufferEvent(
+      long currentSequence,
+      EventT currentEvent,
+      ProcessingState<EventKeyT> processingState,
+      OrderedListState<EventT> bufferedEventsState,
+      boolean thisIsTheLastEvent) {
+    Instant eventTimestamp = fromLong(currentSequence);
+    bufferedEventsState.add(TimestampedValue.of(currentEvent, eventTimestamp));
+    processingState.eventBuffered(currentSequence, thisIsTheLastEvent);
+  }
+
+  abstract boolean checkForSequenceGapInBufferedEvents();
+
+  @Nullable
+  StateT processBufferedEventRange(
+      ProcessingState<EventKeyT> processingState,
+      @Nullable StateT state,
+      OrderedListState<EventT> bufferedEventsState,
+      MultiOutputReceiver outputReceiver,
+      Timer largeBatchEmissionTimer,
+      ContiguousSequenceRange contiguousSequenceRange) {
+    Long earliestBufferedSequence = processingState.getEarliestBufferedSequence();
+    Long latestBufferedSequence = processingState.getLatestBufferedSequence();
+    if (earliestBufferedSequence == null || latestBufferedSequence == null) {
+      return state;
+    }
+    Instant startRange = fromLong(earliestBufferedSequence);
+    Instant endRange = fromLong(latestBufferedSequence + 1);
+
+    // readRange is efficiently implemented and will bring records in batches
+    Iterable<TimestampedValue<EventT>> events = bufferedEventsState.readRange(startRange, endRange);
+
+    Instant endClearRange = startRange; // it will get re-adjusted later.
+
+    Iterator<TimestampedValue<EventT>> bufferedEventsIterator = events.iterator();
+    while (bufferedEventsIterator.hasNext()) {
+      TimestampedValue<EventT> timestampedEvent = bufferedEventsIterator.next();
+      Instant eventTimestamp = timestampedEvent.getTimestamp();
+      long eventSequence = eventTimestamp.getMillis();
+
+      EventT bufferedEvent = timestampedEvent.getValue();
+      boolean skipProcessing = false;
+      boolean beforeInitialSequence = false;
+
+      if (contiguousSequenceRange != null && eventSequence < contiguousSequenceRange.getStart()) {
+        // In case of global sequence processing - remove the elements below the range start
+        skipProcessing = true;
+        beforeInitialSequence = true;
+        endClearRange = fromLong(eventSequence);
+      }
+      if (processingState.checkForDuplicateBatchedEvent(eventSequence)) {
+        // There could be multiple events under the same sequence number. Only the first one
+        // will get processed. The rest are considered duplicates.
+        skipProcessing = true;
+      }
+
+      if (skipProcessing) {
+        outputReceiver
+            .get(unprocessedEventsTupleTag)
+            .output(
+                KV.of(
+                    processingState.getKey(),
+                    KV.of(
+                        eventSequence,
+                        UnprocessedEvent.create(
+                            bufferedEvent,
+                            beforeInitialSequence
+                                ? Reason.before_initial_sequence
+                                : Reason.duplicate))));
+        // TODO: When there is a large number of duplicates this can cause a situation where
+        // we produce too much output and the runner will start throwing unrecoverable errors.
+        // Need to add counting logic to accumulate both the normal and DLQ outputs.
+        continue;
+      }
+
+      Long lastOutputSequence = processingState.getLastOutputSequence();
+      boolean currentEventIsNextInSequence =
+          lastOutputSequence != null && eventSequence == lastOutputSequence + 1;
+      boolean continueProcessing =
+          checkForSequenceGapInBufferedEvents()
+              ? currentEventIsNextInSequence
+              : (eventSequence < contiguousSequenceRange.getEnd() || currentEventIsNextInSequence);
+      if (!continueProcessing) {
+        processingState.foundSequenceGap(eventSequence);
+        // Records will be cleared up to this element
+        endClearRange = fromLong(eventSequence);
+        break;
+      }
+
+      // This check needs to be done after we checked for sequence gap and before we
+      // attempt to process the next element which can result in a new result.
+      if (reachedMaxResultCountForBundle(processingState, largeBatchEmissionTimer)) {
+        endClearRange = fromLong(eventSequence);
+        break;
+      }
+
+      // Remove this record also
+      endClearRange = fromLong(eventSequence + 1);
+
+      try {
+        if (state == null) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace("Creating a new state: " + processingState.getKey() + " " + bufferedEvent);
+          }
+          state = eventExaminer.createStateOnInitialEvent(bufferedEvent);
+        } else {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace("Mutating " + processingState.getKey() + " " + bufferedEvent);
+          }
+          state.mutate(bufferedEvent);
+        }
+      } catch (Exception e) {
+        outputReceiver
+            .get(unprocessedEventsTupleTag)
+            .output(
+                KV.of(
+                    processingState.getKey(),
+                    KV.of(eventSequence, UnprocessedEvent.create(bufferedEvent, e))));
+        // There is a chance that the next event will have the same sequence number and will
+        // process successfully.
+        continue;
+      }
+
+      ResultT result = state.produceResult();
+      if (result != null) {
+        outputReceiver.get(mainOutputTupleTag).output(KV.of(processingState.getKey(), result));
+        processingState.resultProduced();
+      }
+      processingState.processedBufferedEvent(eventSequence);
+    }
+
+    bufferedEventsState.clearRange(startRange, endClearRange);
+
+    return state;
+  }
+
+  static Instant fromLong(long value) {
+    return Instant.ofEpochMilli(value);
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/SequencePerKeyProcessorDoFn.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/SequencePerKeyProcessorDoFn.java
new file mode 100644
index 0000000000000..878a0664ac875
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/SequencePerKeyProcessorDoFn.java
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered;
+
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.BooleanCoder;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.extensions.ordered.ProcessingState.ProcessingStateCoder;
+import org.apache.beam.sdk.state.OrderedListState;
+import org.apache.beam.sdk.state.StateSpec;
+import org.apache.beam.sdk.state.StateSpecs;
+import org.apache.beam.sdk.state.TimeDomain;
+import org.apache.beam.sdk.state.Timer;
+import org.apache.beam.sdk.state.TimerSpec;
+import org.apache.beam.sdk.state.TimerSpecs;
+import org.apache.beam.sdk.state.ValueState;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.TupleTag;
+import org.joda.time.Duration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Stateful DoFn to process per key sequences.
+ *
+ * @param <EventTypeT> event type
+ * @param <EventKeyTypeT> event key type
+ * @param <ResultTypeT> result type
+ * @param <StateTypeT> state type
+ */
+class SequencePerKeyProcessorDoFn<
+        EventTypeT,
+        EventKeyTypeT,
+        ResultTypeT,
+        StateTypeT extends MutableState<EventTypeT, ResultTypeT>>
+    extends ProcessorDoFn<EventTypeT, EventKeyTypeT, ResultTypeT, StateTypeT> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(SequencePerKeyProcessorDoFn.class);
+
+  private static final String LARGE_BATCH_EMISSION_TIMER = "largeBatchTimer";
+  protected static final String BUFFERED_EVENTS = "bufferedEvents";
+
+  @TimerId(LARGE_BATCH_EMISSION_TIMER)
+  @SuppressWarnings("unused")
+  private final TimerSpec largeBatchEmissionTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
+
+  @StateId(BUFFERED_EVENTS)
+  @SuppressWarnings("unused")
+  private final StateSpec<OrderedListState<EventTypeT>> bufferedEventsSpec;
+
+  @SuppressWarnings("unused")
+  @StateId(MUTABLE_STATE)
+  private final StateSpec<ValueState<StateTypeT>> mutableStateSpec;
+
+  @StateId(WINDOW_CLOSED)
+  @SuppressWarnings("unused")
+  private final StateSpec<ValueState<Boolean>> windowClosedSpec;
+
+  @TimerId(STATUS_EMISSION_TIMER)
+  @SuppressWarnings("unused")
+  private final TimerSpec statusEmissionTimer = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
+
+  @StateId(PROCESSING_STATE)
+  @SuppressWarnings("unused")
+  private final StateSpec<ValueState<ProcessingState<EventKeyTypeT>>> processingStateSpec;
+
+  /**
+   * Stateful DoFn to do the bulk of processing.
+   *
+   * @param eventExaminer
+   * @param eventCoder
+   * @param stateCoder
+   * @param keyCoder
+   * @param mainOutputTupleTag
+   * @param statusTupleTag
+   * @param statusUpdateFrequency
+   * @param unprocessedEventTupleTag
+   * @param produceStatusUpdateOnEveryEvent
+   * @param maxNumberOfResultsToProduce
+   */
+  SequencePerKeyProcessorDoFn(
+      EventExaminer<EventTypeT, StateTypeT> eventExaminer,
+      Coder<EventTypeT> eventCoder,
+      Coder<StateTypeT> stateCoder,
+      Coder<EventKeyTypeT> keyCoder,
+      TupleTag<KV<EventKeyTypeT, ResultTypeT>> mainOutputTupleTag,
+      TupleTag<KV<EventKeyTypeT, OrderedProcessingStatus>> statusTupleTag,
+      Duration statusUpdateFrequency,
+      TupleTag<KV<EventKeyTypeT, KV<Long, UnprocessedEvent<EventTypeT>>>> unprocessedEventTupleTag,
+      boolean produceStatusUpdateOnEveryEvent,
+      long maxNumberOfResultsToProduce) {
+    super(
+        eventExaminer,
+        mainOutputTupleTag,
+        statusTupleTag,
+        statusUpdateFrequency,
+        unprocessedEventTupleTag,
+        produceStatusUpdateOnEveryEvent,
+        maxNumberOfResultsToProduce);
+    this.bufferedEventsSpec = StateSpecs.orderedList(eventCoder);
+    this.processingStateSpec = StateSpecs.value(ProcessingStateCoder.of(keyCoder));
+    this.mutableStateSpec = StateSpecs.value(stateCoder);
+    this.windowClosedSpec = StateSpecs.value(BooleanCoder.of());
+  }
+
+  @Override
+  boolean checkForFirstOrLastEvent() {
+    return true;
+  }
+
+  @Override
+  boolean checkForSequenceGapInBufferedEvents() {
+    return true;
+  }
+
+  @ProcessElement
+  public void processElement(
+      @StateId(BUFFERED_EVENTS) OrderedListState<EventTypeT> bufferedEventsState,
+      @AlwaysFetched @StateId(PROCESSING_STATE)
+          ValueState<ProcessingState<EventKeyTypeT>> processingStateState,
+      @StateId(MUTABLE_STATE) ValueState<StateTypeT> mutableStateState,
+      @TimerId(STATUS_EMISSION_TIMER) Timer statusEmissionTimer,
+      @TimerId(LARGE_BATCH_EMISSION_TIMER) Timer largeBatchEmissionTimer,
+      @Element KV<EventKeyTypeT, KV<Long, EventTypeT>> eventAndSequence,
+      MultiOutputReceiver outputReceiver,
+      BoundedWindow window,
+      ProcessContext context) {
+    EventKeyTypeT key = eventAndSequence.getKey();
+    long sequence = eventAndSequence.getValue().getKey();
+    EventTypeT event = eventAndSequence.getValue().getValue();
+
+    ProcessingState<EventKeyTypeT> processingState = processingStateState.read();
+
+    if (processingState == null) {
+      // This is the first time we see this key/window pair
+      processingState = new ProcessingState<>(key);
+      if (statusUpdateFrequency != null) {
+        // Set up the timer to produce the status of the processing on a regular basis
+        statusEmissionTimer.offset(statusUpdateFrequency).setRelative();
+      }
+    }
+
+    if (numberOfResultsBeforeBundleStart == null) {
+      // Per key processing is synchronized by Beam. There is no need to have it here.
+      numberOfResultsBeforeBundleStart = processingState.getResultCount();
+    }
+
+    processingState.eventReceived();
+
+    StateTypeT state =
+        processNewEvent(
+            sequence,
+            event,
+            processingState,
+            mutableStateState,
+            bufferedEventsState,
+            outputReceiver);
+
+    processBufferedEvents(
+        processingState, state, bufferedEventsState, outputReceiver, largeBatchEmissionTimer);
+
+    saveStates(
+        processingStateState,
+        processingState,
+        mutableStateState,
+        state,
+        outputReceiver,
+        window.maxTimestamp());
+
+    checkIfProcessingIsCompleted(processingState);
+  }
+
+  private boolean checkIfProcessingIsCompleted(ProcessingState<EventKeyTypeT> processingState) {
+    boolean result = processingState.isProcessingCompleted();
+    if (result && LOG.isTraceEnabled()) {
+      LOG.trace("Processing for key '" + processingState.getKey() + "' is completed.");
+    }
+    return result;
+  }
+
+  /** Process buffered events. */
+  private void processBufferedEvents(
+      ProcessingState<EventKeyTypeT> processingState,
+      @Nullable StateTypeT state,
+      OrderedListState<EventTypeT> bufferedEventsState,
+      MultiOutputReceiver outputReceiver,
+      Timer largeBatchEmissionTimer) {
+    if (state == null) {
+      // Only when the current event caused a state mutation and the state is passed to this
+      // method should we attempt to process buffered events
+      return;
+    }
+
+    if (!processingState.readyToProcessBufferedEvents()) {
+      return;
+    }
+
+    if (reachedMaxResultCountForBundle(processingState, largeBatchEmissionTimer)) {
+      // No point in trying to process buffered events
+      return;
+    }
+
+    // Technically this block is not needed because these preconditions are checked
+    // earlier. Included to keep the linter happy.
+    Long earliestBufferedSequence = processingState.getEarliestBufferedSequence();
+    if (earliestBufferedSequence == null) {
+      return;
+    }
+    Long latestBufferedSequence = processingState.getLatestBufferedSequence();
+    if (latestBufferedSequence == null) {
+      return;
+    }
+
+    processBufferedEventRange(
+        processingState,
+        state,
+        bufferedEventsState,
+        outputReceiver,
+        largeBatchEmissionTimer,
+        ContiguousSequenceRange.EMPTY);
+  }
+
+  @OnTimer(LARGE_BATCH_EMISSION_TIMER)
+  public void onBatchEmission(
+      OnTimerContext context,
+      @StateId(BUFFERED_EVENTS) OrderedListState<EventTypeT> bufferedEventsState,
+      @AlwaysFetched @StateId(PROCESSING_STATE)
+          ValueState<ProcessingState<EventKeyTypeT>> processingStatusState,
+      @AlwaysFetched @StateId(MUTABLE_STATE) ValueState<StateTypeT> currentStateState,
+      @TimerId(LARGE_BATCH_EMISSION_TIMER) Timer largeBatchEmissionTimer,
+      MultiOutputReceiver outputReceiver) {
+    ProcessingState<EventKeyTypeT> processingState = processingStatusState.read();
+    if (processingState == null) {
+      LOG.warn("Processing state is empty. Ignore it if the pipeline is being cancelled.");
+      return;
+    }
+    StateTypeT state = currentStateState.read();
+    if (state == null) {
+      LOG.warn("Mutable state is empty. Ignore it if the pipeline is being cancelled.");
+      return;
+    }
+
+    LOG.debug("Starting to process batch for key '" + processingState.getKey() + "'");
+
+    this.numberOfResultsBeforeBundleStart = processingState.getResultCount();
+
+    processBufferedEvents(
+        processingState, state, bufferedEventsState, outputReceiver, largeBatchEmissionTimer);
+
+    saveStates(
+        processingStatusState,
+        processingState,
+        currentStateState,
+        state,
+        outputReceiver,
+        // TODO: validate that this is correct.
+        context.window().maxTimestamp());
+
+    checkIfProcessingIsCompleted(processingState);
+  }
+
+  @OnTimer(STATUS_EMISSION_TIMER)
+  @SuppressWarnings("unused")
+  public void onStatusEmission(
+      MultiOutputReceiver outputReceiver,
+      @TimerId(STATUS_EMISSION_TIMER) Timer statusEmissionTimer,
+      @StateId(WINDOW_CLOSED) ValueState<Boolean> windowClosedState,
+      @StateId(PROCESSING_STATE) ValueState<ProcessingState<EventKeyTypeT>> processingStateState) {
+
+    processStatusTimerEvent(
+        outputReceiver, statusEmissionTimer, windowClosedState, processingStateState);
+  }
+
+  @OnWindowExpiration
+  public void onWindowExpiration(@StateId(WINDOW_CLOSED) ValueState<Boolean> windowClosedState) {
+    windowClosedState.write(true);
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/UnprocessedEvent.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/UnprocessedEvent.java
index 2131ef384e22f..d7c599277567c 100644
--- a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/UnprocessedEvent.java
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/UnprocessedEvent.java
@@ -72,7 +72,8 @@ public enum Reason {
     duplicate,
     buffered,
     sequence_id_outside_valid_range,
-    exception_thrown
+    exception_thrown,
+    before_initial_sequence
   };
 
   public abstract EventT getEvent();
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/DefaultSequenceCombiner.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/DefaultSequenceCombiner.java
new file mode 100644
index 0000000000000..32e5cbc36e4e6
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/DefaultSequenceCombiner.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered.combiner;
+
+import java.util.Iterator;
+import java.util.function.BiFunction;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderRegistry;
+import org.apache.beam.sdk.extensions.ordered.ContiguousSequenceRange;
+import org.apache.beam.sdk.extensions.ordered.EventExaminer;
+import org.apache.beam.sdk.extensions.ordered.MutableState;
+import org.apache.beam.sdk.extensions.ordered.combiner.SequenceRangeAccumulator.SequenceRangeAccumulatorCoder;
+import org.apache.beam.sdk.transforms.Combine.CombineFn;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.TimestampedValue;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Default global sequence combiner.
+ *
+ * <p>Produces the largest {@link ContiguousSequenceRange} of contiguous longs which starts from the
+ * initial event identified by {@link EventExaminer#isInitialEvent(long, EventT)}.
+ *
+ * <p>This combiner currently doesn't use {@link EventExaminer#isLastEvent(long, EventT)}.
+ *
+ * @param <EventKeyT> type of key
+ * @param <EventT> type of event
+ * @param <StateT> type of state
+ */
+public class DefaultSequenceCombiner<EventKeyT, EventT, StateT extends MutableState<EventT, ?>>
+    extends CombineFn<
+        TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>,
+        SequenceRangeAccumulator,
+        ContiguousSequenceRange> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(DefaultSequenceCombiner.class);
+
+  public static final BiFunction<@NonNull Instant, @Nullable Instant, @Nullable Instant>
+      OLDEST_TIMESTAMP_SELECTOR =
+          (instant1, instant2) -> {
+            if (instant2 == null) {
+              return instant1;
+            }
+            @NonNull Instant nonNullableSecondValue = instant2;
+            return instant1.isAfter(nonNullableSecondValue) ? instant1 : nonNullableSecondValue;
+          };
+  private final EventExaminer<EventT, StateT> eventExaminer;
+
+  public DefaultSequenceCombiner(EventExaminer<EventT, StateT> eventExaminer) {
+    this.eventExaminer = eventExaminer;
+  }
+
+  @Override
+  public SequenceRangeAccumulator createAccumulator() {
+    return new SequenceRangeAccumulator();
+  }
+
+  @Override
+  public SequenceRangeAccumulator addInput(
+      SequenceRangeAccumulator accum, TimestampedValue<KV<EventKeyT, KV<Long, EventT>>> event) {
+    long sequence = event.getValue().getValue().getKey();
+
+    accum.add(
+        sequence,
+        event.getTimestamp(),
+        eventExaminer.isInitialEvent(sequence, event.getValue().getValue().getValue()));
+
+    return accum;
+  }
+
+  @Override
+  public SequenceRangeAccumulator mergeAccumulators(
+      Iterable<SequenceRangeAccumulator> accumulators) {
+    // There should be at least one accumulator.
+    Iterator<SequenceRangeAccumulator> iterator = accumulators.iterator();
+    SequenceRangeAccumulator result = iterator.next();
+    while (iterator.hasNext()) {
+      result.merge(iterator.next());
+    }
+    return result;
+  }
+
+  @Override
+  public ContiguousSequenceRange extractOutput(SequenceRangeAccumulator accum) {
+    ContiguousSequenceRange result = accum.largestContinuousRange();
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Returning completed sequence range: " + result);
+    }
+    return result;
+  }
+
+  @Override
+  public @UnknownKeyFor @NonNull @Initialized Coder<SequenceRangeAccumulator> getAccumulatorCoder(
+      @UnknownKeyFor @NonNull @Initialized CoderRegistry registry,
+      @UnknownKeyFor @NonNull @Initialized
+          Coder<TimestampedValue<KV<EventKeyT, KV<Long, EventT>>>> inputCoder)
+      throws @UnknownKeyFor @NonNull @Initialized CannotProvideCoderException {
+    return SequenceRangeAccumulatorCoder.of();
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulator.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulator.java
new file mode 100644
index 0000000000000..89dc912afc90c
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulator.java
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered.combiner;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.CustomCoder;
+import org.apache.beam.sdk.coders.NullableCoder;
+import org.apache.beam.sdk.coders.VarIntCoder;
+import org.apache.beam.sdk.coders.VarLongCoder;
+import org.apache.beam.sdk.extensions.ordered.ContiguousSequenceRange;
+import org.apache.commons.lang3.tuple.Pair;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
+import org.joda.time.Instant;
+
+/** Default accumulator used to combine sequence ranges. */
+public class SequenceRangeAccumulator {
+
+  private static Instant max(Instant a, Instant b) {
+    return a.isAfter(b) ? a : b;
+  }
+
+  /**
+   * The tree contains a set of non-overlapping contiguous ranges, where the key is the lower
+   * inclusive start of the range, left value of the pair is the inclusive end of the range and the
+   * right value of the pair is the maximum timestamp in the range.
+   *
+   * <p>The maximum timestamp is critical for the correctness of the ordered processing. During the
+   * merge process the merged range is assigned the maximum timestamp of the two ranges that created
+   * this new range.
+   */
+  private final TreeMap<Long, Pair<Long, Instant>> data = new TreeMap<>();
+
+  private @Nullable Long initialSequence = null;
+
+  public void add(long sequence, Instant timestamp, boolean isInitialSequence) {
+    if (isInitialSequence && this.initialSequence != null && sequence != this.initialSequence) {
+      throw new IllegalStateException(
+          "There are different initial sequences detected: "
+              + initialSequence
+              + " and "
+              + sequence);
+    }
+
+    if (sequence == Long.MAX_VALUE) {
+      // This is an invalid value and DoFns will not process this element. This will also allow
+      // to produce a ContiguousSequenceRange with the exclusive end value.
+      return;
+    }
+
+    if (isInitialSequence) {
+      this.initialSequence = sequence;
+      clearRangesBelowInitialSequence(sequence, timestamp);
+    } else if (initialSequence != null && sequence <= initialSequence) {
+      // No need to add anything lower than the initial sequence to the accumulator.
+      return;
+    }
+
+    long lowerBound = sequence, upperBound = sequence;
+
+    Entry<Long, Pair<Long, Instant>> lowerRange = data.floorEntry(sequence);
+    if (lowerRange != null) {
+      long inclusiveUpperBoundary = lowerRange.getValue().getLeft();
+      if (sequence <= inclusiveUpperBoundary) {
+        // Duplicate. No need to adjust the timestamp.
+        return;
+      }
+
+      if (inclusiveUpperBoundary + 1 == sequence) {
+        // The new element extends the lower range. Remove the range.
+        timestamp = max(timestamp, lowerRange.getValue().getValue());
+        lowerBound = lowerRange.getKey();
+        data.remove(lowerRange.getKey());
+      }
+    }
+
+    long nextSequenceNumber = sequence + 1;
+    Pair<Long, Instant> upperRange = data.get(nextSequenceNumber);
+    if (upperRange != null) {
+      // The new element will extend the upper range. Remove the range.
+      timestamp = max(timestamp, upperRange.getRight());
+      upperBound = upperRange.getLeft();
+      data.remove(nextSequenceNumber);
+    }
+
+    data.put(lowerBound, Pair.of(upperBound, timestamp));
+  }
+
+  private void clearRangesBelowInitialSequence(long sequence, Instant timestamp) {
+    // First, adjust the current range, if any
+    Entry<Long, Pair<Long, Instant>> lowerRange = data.floorEntry(sequence);
+    if (lowerRange != null
+        && lowerRange.getKey() < sequence
+        && lowerRange.getValue().getLeft() > sequence) {
+      // The sequence is in the middle of the range. Adjust it.
+      data.remove(lowerRange.getKey());
+      data.put(
+          sequence,
+          Pair.of(
+              lowerRange.getValue().getKey(), max(timestamp, lowerRange.getValue().getValue())));
+    }
+    data.subMap(Long.MIN_VALUE, sequence).clear();
+  }
+
+  public ContiguousSequenceRange largestContinuousRange() {
+    if (initialSequence == null) {
+      return ContiguousSequenceRange.EMPTY;
+    }
+
+    Entry<Long, Pair<Long, Instant>> firstEntry = data.firstEntry();
+    if (firstEntry == null) {
+      throw new IllegalStateException("First entry is null when initial sequence is set.");
+    }
+    Long start = firstEntry.getKey();
+    Long end = firstEntry.getValue().getLeft();
+    Instant latestTimestamp = firstEntry.getValue().getRight();
+    // Upper bound is inclusive, but the ContiguousSequenceRange's end is exclusive.
+    // The numeric overflow is prevented by dropping the value of Long.MAX.
+    return ContiguousSequenceRange.of(start, end + 1, latestTimestamp);
+  }
+
+  public int numberOfRanges() {
+    return data.size();
+  }
+
+  public void merge(SequenceRangeAccumulator another) {
+    if (this.initialSequence != null
+        && another.initialSequence != null
+        && !this.initialSequence.equals(another.initialSequence)) {
+      throw new IllegalStateException(
+          "Two accumulators contain different initial sequences: "
+              + this.initialSequence
+              + " and "
+              + another.initialSequence);
+    }
+
+    if (another.initialSequence != null) {
+      long newInitialSequence = another.initialSequence;
+      this.initialSequence = newInitialSequence;
+      Entry<Long, Pair<Long, Instant>> firstEntry = another.data.firstEntry();
+      if (firstEntry != null) {
+        Instant timestampOfTheInitialRange = firstEntry.getValue().getRight();
+        clearRangesBelowInitialSequence(newInitialSequence, timestampOfTheInitialRange);
+      }
+    }
+
+    another
+        .data
+        .entrySet()
+        .forEach(
+            entry -> {
+              long lowerBound = entry.getKey();
+              long upperBound = entry.getValue().getLeft();
+              if (this.initialSequence != null) {
+                if (upperBound < initialSequence) {
+                  // The whole range is below the initial sequence. Ignore it.
+                  return;
+                }
+                if (lowerBound < initialSequence) {
+                  // This will cause pruning of the range up to the initial sequence
+                  lowerBound = this.initialSequence;
+                }
+              }
+
+              Entry<Long, Pair<Long, Instant>> lowerRange = this.data.floorEntry(lowerBound);
+
+              if (lowerRange != null) {
+                if (lowerRange.getValue().getLeft() < lowerBound - 1) {
+                  // Nothing to do. There is a lower non-adjacent range.
+                } else {
+                  // We found an overlapping range and will replace it with a new one
+                  upperBound = Math.max(upperBound, lowerRange.getValue().getLeft());
+                  lowerBound = lowerRange.getKey();
+                }
+              }
+
+              Entry<Long, Pair<Long, Instant>> upperRange = this.data.floorEntry(upperBound + 1);
+              if (upperRange == null
+                  || (lowerRange != null
+                      && Objects.equals(upperRange.getKey(), lowerRange.getKey()))) {
+                // Nothing to do - either there is no adjacent upper range or it equals the lower
+                // range
+              } else {
+                upperBound = Math.max(upperBound, upperRange.getValue().getLeft());
+              }
+
+              Instant latestTimestamp =
+                  removeAllRanges(lowerBound, upperBound, entry.getValue().getRight());
+
+              this.data.put(lowerBound, Pair.of(upperBound, latestTimestamp));
+            });
+  }
+
+  private Instant removeAllRanges(long lowerBound, long upperBound, Instant currentTimestamp) {
+    Instant result = currentTimestamp;
+    SortedMap<Long, Pair<Long, Instant>> rangesToRemove = data.subMap(lowerBound, upperBound);
+    for (Pair<Long, Instant> value : rangesToRemove.values()) {
+      result = result.isAfter(value.getRight()) ? result : value.getRight();
+    }
+    rangesToRemove.clear();
+    return result;
+  }
+
+  @Override
+  public boolean equals(@Nullable Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof SequenceRangeAccumulator)) {
+      return false;
+    }
+    SequenceRangeAccumulator that = (SequenceRangeAccumulator) o;
+    return data.equals(that.data) && Objects.equals(initialSequence, that.initialSequence);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(data, initialSequence);
+  }
+
+  @Override
+  public String toString() {
+    return "SequenceRangeAccumulator{initialSequence=" + initialSequence + ", data=" + data + '}';
+  }
+
+  public static class SequenceRangeAccumulatorCoder extends CustomCoder<SequenceRangeAccumulator> {
+
+    private static final SequenceRangeAccumulatorCoder INSTANCE =
+        new SequenceRangeAccumulatorCoder();
+
+    public static SequenceRangeAccumulatorCoder of() {
+      return INSTANCE;
+    }
+
+    private SequenceRangeAccumulatorCoder() {}
+
+    private final NullableCoder<Long> initialSequenceCoder = NullableCoder.of(VarLongCoder.of());
+    private final VarIntCoder numberOfRangesCoder = VarIntCoder.of();
+    private final VarLongCoder dataCoder = VarLongCoder.of();
+
+    @Override
+    public void encode(
+        SequenceRangeAccumulator value, @UnknownKeyFor @NonNull @Initialized OutputStream outStream)
+        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
+            @Initialized IOException {
+      numberOfRangesCoder.encode(value.numberOfRanges(), outStream);
+      initialSequenceCoder.encode(value.initialSequence, outStream);
+      for (Entry<Long, Pair<Long, Instant>> entry : value.data.entrySet()) {
+        dataCoder.encode(entry.getKey(), outStream);
+        dataCoder.encode(entry.getValue().getLeft(), outStream);
+        dataCoder.encode(entry.getValue().getRight().getMillis(), outStream);
+      }
+    }
+
+    @Override
+    public SequenceRangeAccumulator decode(
+        @UnknownKeyFor @NonNull @Initialized InputStream inStream)
+        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
+            @Initialized IOException {
+      SequenceRangeAccumulator result = new SequenceRangeAccumulator();
+      int numberOfRanges = numberOfRangesCoder.decode(inStream);
+      result.initialSequence = initialSequenceCoder.decode(inStream);
+      for (int i = 0; i < numberOfRanges; i++) {
+        long key = dataCoder.decode(inStream);
+        long upperBound = dataCoder.decode(inStream);
+        long millis = dataCoder.decode(inStream);
+        result.data.put(key, Pair.of(upperBound, Instant.ofEpochMilli(millis)));
+      }
+      return result;
+    }
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/package-info.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/package-info.java
new file mode 100644
index 0000000000000..0d730d55fb9f8
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/combiner/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Default implementation of the global sequence combiner used by {@link
+ * org.apache.beam.sdk.extensions.ordered.OrderedEventProcessor} when processing events using global
+ * sequences.
+ */
+package org.apache.beam.sdk.extensions.ordered.combiner;
diff --git a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/package-info.java b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/package-info.java
index f9d7e3d67bff1..4cbbca82a8cfd 100644
--- a/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/package-info.java
+++ b/sdks/java/extensions/ordered/src/main/java/org/apache/beam/sdk/extensions/ordered/package-info.java
@@ -16,7 +16,9 @@
  * limitations under the License.
  */
 /**
- * Provides a transform for ordered processing.
+ * Provides a transform for ordered processing. For a detailed reference implementation which uses
+ * this transform visit {@link <a
+ * href="">https://github.com/GoogleCloudPlatform/dataflow-ordered-processing</a>}
  *
  * @see org.apache.beam.sdk.extensions.ordered.OrderedEventProcessor
  */
diff --git a/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorGlobalSequenceTest.java b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorGlobalSequenceTest.java
new file mode 100644
index 0000000000000..98bc7591f4d7a
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorGlobalSequenceTest.java
@@ -0,0 +1,534 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
+import org.apache.beam.sdk.extensions.ordered.StringBufferOrderedProcessingHandler.StringBufferOrderedProcessingWithGlobalSequenceHandler;
+import org.apache.beam.sdk.extensions.ordered.UnprocessedEvent.Reason;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.TestStream;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.windowing.FixedWindows;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.TimestampedValue;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+
+public class OrderedEventProcessorGlobalSequenceTest extends OrderedEventProcessorTestBase {
+
+  public static final boolean GLOBAL_SEQUENCE = true;
+
+  static {
+    Logger logger = Logger.getLogger(GlobalSequencesProcessorDoFn.class.getName());
+    logger.setLevel(Level.FINEST);
+  }
+
+  @org.junit.Test
+  public void testPerfectOrderingProcessing() throws CannotProvideCoderException {
+    Event[] events = {
+      Event.create(0, "id-1", "a"),
+      Event.create(1, "id-1", "b"),
+      Event.create(2, "id-1", "c"),
+      Event.create(3, "id-1", "d"),
+      Event.create(4, "id-2", "a"),
+      Event.create(5, "id-2", "b")
+    };
+
+    Collection<KV<String, String>> expectedOutput = new ArrayList<>();
+    expectedOutput.add(KV.of("id-1", "a"));
+    expectedOutput.add(KV.of("id-1", "ab"));
+    expectedOutput.add(KV.of("id-1", "abc"));
+    expectedOutput.add(KV.of("id-1", "abcd"));
+    expectedOutput.add(KV.of("id-2", "a"));
+    expectedOutput.add(KV.of("id-2", "ab"));
+
+    testGlobalSequenceProcessing(
+        events,
+        expectedOutput,
+        EMISSION_FREQUENCY_ON_EVERY_ELEMENT,
+        INITIAL_SEQUENCE_OF_0,
+        LARGE_MAX_RESULTS_PER_OUTPUT,
+        ContiguousSequenceRange.of(0, 6, new Instant()));
+  }
+
+  @Test
+  public void testOutOfSequenceProcessing() throws CannotProvideCoderException {
+    Event[] events = {
+      Event.create(2, "id-1", "c"),
+      Event.create(1, "id-1", "b"),
+      Event.create(0, "id-1", "a"),
+      Event.create(3, "id-1", "d"),
+      Event.create(5, "id-2", "b"),
+      Event.create(6, "id-2", "c"),
+      Event.create(8, "id-2", "e"),
+      Event.create(4, "id-2", "a"),
+      Event.create(7, "id-2", "d")
+    };
+
+    Collection<KV<String, String>> expectedOutput = new ArrayList<>();
+    expectedOutput.add(KV.of("id-1", "a"));
+    expectedOutput.add(KV.of("id-1", "ab"));
+    expectedOutput.add(KV.of("id-1", "abc"));
+    expectedOutput.add(KV.of("id-1", "abcd"));
+    expectedOutput.add(KV.of("id-2", "a"));
+    expectedOutput.add(KV.of("id-2", "ab"));
+    expectedOutput.add(KV.of("id-2", "abc"));
+    expectedOutput.add(KV.of("id-2", "abcd"));
+    expectedOutput.add(KV.of("id-2", "abcde"));
+
+    testGlobalSequenceProcessing(
+        events,
+        expectedOutput,
+        EMISSION_FREQUENCY_ON_EVERY_ELEMENT,
+        INITIAL_SEQUENCE_OF_0,
+        LARGE_MAX_RESULTS_PER_OUTPUT,
+        ContiguousSequenceRange.of(0, 9, new Instant()));
+  }
+
+  @Test
+  public void testHandlingOfDuplicateSequences() throws CannotProvideCoderException {
+    Event[] events = {
+      Event.create(3, "id-1", "d"),
+      Event.create(2, "id-1", "c"),
+
+      // Duplicates
+      Event.create(3, "id-1", "d"),
+      Event.create(3, "id-1", "d"),
+      Event.create(0, "id-1", "a"),
+      Event.create(1, "id-1", "b"),
+
+      // Additional duplicates
+      Event.create(1, "id-1", "b"),
+      Event.create(3, "id-1", "d"),
+    };
+
+    Collection<KV<String, String>> expectedOutput = new ArrayList<>();
+    expectedOutput.add(KV.of("id-1", "a"));
+    expectedOutput.add(KV.of("id-1", "ab"));
+    expectedOutput.add(KV.of("id-1", "abc"));
+    expectedOutput.add(KV.of("id-1", "abcd"));
+
+    Collection<KV<String, KV<Long, UnprocessedEvent<String>>>> duplicates = new ArrayList<>();
+    duplicates.add(KV.of("id-1", KV.of(3L, UnprocessedEvent.create("d", Reason.duplicate))));
+    duplicates.add(KV.of("id-1", KV.of(3L, UnprocessedEvent.create("d", Reason.duplicate))));
+    duplicates.add(KV.of("id-1", KV.of(1L, UnprocessedEvent.create("b", Reason.duplicate))));
+    duplicates.add(KV.of("id-1", KV.of(3L, UnprocessedEvent.create("d", Reason.duplicate))));
+
+    testGlobalSequenceProcessing(
+        events,
+        expectedOutput,
+        duplicates,
+        EMISSION_FREQUENCY_ON_EVERY_ELEMENT,
+        INITIAL_SEQUENCE_OF_0,
+        LARGE_MAX_RESULTS_PER_OUTPUT,
+        ContiguousSequenceRange.of(0, 4, new Instant()));
+  }
+
+  @Test
+  public void testTreatingSequencesBelowInitialAsDuplicates() throws CannotProvideCoderException {
+    Event[] events = {
+      Event.create(3, "id-1", "d"),
+      Event.create(2, "id-1", "c"),
+
+      // Earlier events
+      Event.create(-1, "id-1", "early-1"),
+      Event.create(-2, "id-1", "early-2"),
+      Event.create(0, "id-1", "a"),
+      Event.create(1, "id-1", "b")
+    };
+
+    Collection<KV<String, String>> expectedOutput = new ArrayList<>();
+    expectedOutput.add(KV.of("id-1", "a"));
+    expectedOutput.add(KV.of("id-1", "ab"));
+    expectedOutput.add(KV.of("id-1", "abc"));
+    expectedOutput.add(KV.of("id-1", "abcd"));
+
+    Collection<KV<String, KV<Long, UnprocessedEvent<String>>>> duplicates = new ArrayList<>();
+    duplicates.add(
+        KV.of(
+            "id-1",
+            KV.of(-1L, UnprocessedEvent.create("early-1", Reason.before_initial_sequence))));
+    duplicates.add(
+        KV.of(
+            "id-1",
+            KV.of(-2L, UnprocessedEvent.create("early-2", Reason.before_initial_sequence))));
+
+    testGlobalSequenceProcessing(
+        events,
+        expectedOutput,
+        duplicates,
+        EMISSION_FREQUENCY_ON_EVERY_ELEMENT,
+        INITIAL_SEQUENCE_OF_0,
+        LARGE_MAX_RESULTS_PER_OUTPUT,
+        ContiguousSequenceRange.of(0, 4, new Instant()));
+  }
+
+  @Test
+  public void testHandlingOfCheckedExceptions() throws CannotProvideCoderException {
+    Event[] events = {
+      Event.create(0, "id-1", "a"),
+      Event.create(1, "id-1", "b"),
+      Event.create(2, "id-1", StringBuilderState.BAD_VALUE),
+      Event.create(3, "id-1", "c"),
+    };
+
+    // This is an interesting case - even though event #2 is not processed it doesn't affect
+    // the global sequence calculations. It is not considered a gap, and all the subsequent
+    // events will be processed.
+    Collection<KV<String, String>> expectedOutput = new ArrayList<>();
+    expectedOutput.add(KV.of("id-1", "a"));
+    expectedOutput.add(KV.of("id-1", "ab"));
+    expectedOutput.add(KV.of("id-1", "abc"));
+
+    Collection<KV<String, KV<Long, UnprocessedEvent<String>>>> failedEvents = new ArrayList<>();
+    failedEvents.add(
+        KV.of(
+            "id-1",
+            KV.of(
+                2L,
+                UnprocessedEvent.create(StringBuilderState.BAD_VALUE, Reason.exception_thrown))));
+
+    testGlobalSequenceProcessing(
+        events,
+        expectedOutput,
+        failedEvents,
+        EMISSION_FREQUENCY_ON_EVERY_ELEMENT,
+        INITIAL_SEQUENCE_OF_0,
+        LARGE_MAX_RESULTS_PER_OUTPUT,
+        // Sequence matcher doesn't know if the element is valid or not.
+        // That's why the elements that are get rejected in the processor still count  when
+        // calculating the global sequence
+        ContiguousSequenceRange.of(0, 4, new Instant()));
+  }
+
+  @Test
+  public void testProcessingWithEveryOtherResultEmission() throws CannotProvideCoderException {
+    Event[] events = {
+      Event.create(2, "id-1", "c"),
+      Event.create(1, "id-1", "b"),
+      Event.create(0, "id-1", "a"),
+      Event.create(3, "id-1", "d"),
+      Event.create(4, "id-2", "a"),
+      Event.create(5, "id-2", "b"),
+    };
+
+    Collection<KV<String, String>> expectedOutput = new ArrayList<>();
+    expectedOutput.add(KV.of("id-1", "a"));
+    //  Skipped        KV.of("id-1", "ab"),
+    expectedOutput.add(KV.of("id-1", "abc"));
+    //  Skipped        KV.of("id-1", "abcd"),
+    expectedOutput.add(KV.of("id-2", "a"));
+    //  Skipped        KV.of("id-2", "ab")
+    testGlobalSequenceProcessing(
+        events,
+        expectedOutput,
+        EMISSION_FREQUENCY_ON_EVERY_OTHER_EVENT,
+        INITIAL_SEQUENCE_OF_0,
+        LARGE_MAX_RESULTS_PER_OUTPUT,
+        ContiguousSequenceRange.of(0, 6, new Instant()));
+  }
+
+  @Test
+  public void testLargeBufferedOutputInTimer() throws CannotProvideCoderException {
+    int maxResultsPerOutput = 100;
+
+    // Array of sequences starting with 2 and the last element - 1.
+    // Output will be buffered until the last event arrives
+    long[] sequences = new long[maxResultsPerOutput * 3];
+    for (int i = 0; i < sequences.length - 1; i++) {
+      sequences[i] = i + 2L;
+    }
+    sequences[sequences.length - 1] = 1;
+
+    List<Event> events = new ArrayList<>(sequences.length);
+    Collection<KV<String, String>> expectedOutput = new ArrayList<>(sequences.length);
+
+    StringBuilder output = new StringBuilder();
+    String outputPerElement = ".";
+    String key = "id-1";
+
+    for (long sequence : sequences) {
+      events.add(Event.create(sequence, key, outputPerElement));
+      output.append(outputPerElement);
+      expectedOutput.add(KV.of(key, output.toString()));
+    }
+
+    testGlobalSequenceProcessing(
+        events.toArray(new Event[events.size()]),
+        expectedOutput,
+        EMISSION_FREQUENCY_ON_EVERY_ELEMENT,
+        1L /* This dataset assumes 1 as the starting sequence */,
+        maxResultsPerOutput,
+        ContiguousSequenceRange.of(1, sequences.length + 1, new Instant()));
+  }
+
+  @Test
+  public void testSequenceGapProcessingInBufferedOutput() throws CannotProvideCoderException {
+    int maxResultsPerOutput = 3;
+
+    long[] sequences = new long[] {2, 3, 7, 8, 9, 10, 1, 4, 5, 6};
+
+    List<Event> events = new ArrayList<>(sequences.length);
+    List<KV<String, String>> expectedOutput = new ArrayList<>(sequences.length);
+
+    String key = "id-1";
+
+    for (long sequence : sequences) {
+      events.add(Event.create(sequence, key, sequence + "-"));
+    }
+
+    StringBuilder output = new StringBuilder();
+    Arrays.stream(sequences)
+        .sorted()
+        .forEach(
+            sequence -> {
+              output.append(sequence + "-");
+              expectedOutput.add(KV.of(key, output.toString()));
+            });
+
+    testGlobalSequenceProcessing(
+        events.toArray(new Event[events.size()]),
+        expectedOutput,
+        EMISSION_FREQUENCY_ON_EVERY_ELEMENT,
+        1L /* This dataset assumes 1 as the starting sequence */,
+        maxResultsPerOutput,
+        ContiguousSequenceRange.of(1, 11, new Instant()));
+  }
+
+  @Test
+  public void testHandlingOfMaxSequenceNumber() throws CannotProvideCoderException {
+    Event[] events = {
+      Event.create(1, "id-1", "b"),
+      Event.create(0, "id-1", "a"),
+      Event.create(Long.MAX_VALUE, "id-1", "d"),
+      Event.create(2, "id-1", "c")
+    };
+
+    Collection<KV<String, String>> expectedOutput = new ArrayList<>();
+    expectedOutput.add(KV.of("id-1", "a"));
+    expectedOutput.add(KV.of("id-1", "ab"));
+    expectedOutput.add(KV.of("id-1", "abc"));
+
+    Collection<KV<String, KV<Long, UnprocessedEvent<String>>>> unprocessedEvents =
+        new ArrayList<>();
+    unprocessedEvents.add(
+        KV.of(
+            "id-1",
+            KV.of(
+                Long.MAX_VALUE,
+                UnprocessedEvent.create("d", Reason.sequence_id_outside_valid_range))));
+
+    testGlobalSequenceProcessing(
+        events,
+        expectedOutput,
+        unprocessedEvents,
+        EMISSION_FREQUENCY_ON_EVERY_ELEMENT,
+        INITIAL_SEQUENCE_OF_0,
+        LARGE_MAX_RESULTS_PER_OUTPUT,
+        ContiguousSequenceRange.of(0, 3, Instant.now()));
+  }
+
+  @Test
+  public void testProcessingOfTheLastInput() throws CannotProvideCoderException {
+    // TODO: fix the test. Need to see that the resulting status reflects the last input
+    Event[] events = {
+      Event.create(0, "id-1", "a"),
+      Event.create(1, "id-1", "b"),
+      Event.create(2, "id-1", StringEventExaminer.LAST_INPUT)
+    };
+
+    Collection<KV<String, String>> expectedOutput = new ArrayList<>();
+    expectedOutput.add(KV.of("id-1", "a"));
+    expectedOutput.add(KV.of("id-1", "ab"));
+    expectedOutput.add(KV.of("id-1", "ab" + StringEventExaminer.LAST_INPUT));
+
+    testGlobalSequenceProcessing(
+        events,
+        expectedOutput,
+        EMISSION_FREQUENCY_ON_EVERY_ELEMENT,
+        INITIAL_SEQUENCE_OF_0,
+        LARGE_MAX_RESULTS_PER_OUTPUT,
+        ContiguousSequenceRange.of(0, 3, new Instant()));
+  }
+
+  private void testGlobalSequenceProcessing(
+      Event[] events,
+      Collection<KV<String, String>> expectedOutput,
+      int emissionFrequency,
+      long initialSequence,
+      int maxResultsPerOutput,
+      ContiguousSequenceRange expectedLastCompleteRange)
+      throws CannotProvideCoderException {
+    testGlobalSequenceProcessing(
+        events,
+        expectedOutput,
+        NO_EXPECTED_DLQ_EVENTS,
+        emissionFrequency,
+        initialSequence,
+        maxResultsPerOutput,
+        expectedLastCompleteRange);
+  }
+
+  private void testGlobalSequenceProcessing(
+      Event[] events,
+      Collection<KV<String, String>> expectedOutput,
+      Collection<KV<String, KV<Long, UnprocessedEvent<String>>>> expectedUnprocessedEvents,
+      int emissionFrequency,
+      long initialSequence,
+      int maxResultsPerOutput,
+      ContiguousSequenceRange expectedLastCompleteRange)
+      throws CannotProvideCoderException {
+    // Test a streaming pipeline
+    doTest(
+        events,
+        null /* expectedStatuses */,
+        expectedOutput,
+        expectedUnprocessedEvents,
+        emissionFrequency,
+        initialSequence,
+        maxResultsPerOutput,
+        false /* produceStatusOnEveryEvent */,
+        STREAMING,
+        GLOBAL_SEQUENCE,
+        expectedLastCompleteRange);
+
+    // Test a batch pipeline
+    if (runTestsOnDataflowRunner()) {
+      doTest(
+          events,
+          null /* expectedStatuses */,
+          expectedOutput,
+          expectedUnprocessedEvents,
+          emissionFrequency,
+          initialSequence,
+          maxResultsPerOutput,
+          false /* produceStatusOnEveryEvent */,
+          BATCH,
+          GLOBAL_SEQUENCE,
+          expectedLastCompleteRange);
+    } else {
+      System.err.println(
+          "Warning - batch tests didn't run. "
+              + "DirectRunner doesn't work correctly with this transform in batch mode."
+              + "Run the tests using Dataflow runner to validate.");
+    }
+  }
+
+  @Test
+  public void testWindowedProcessing() throws CannotProvideCoderException {
+
+    Instant base = new Instant(0);
+    TestStream<Event> values =
+        TestStream.create(streamingPipeline.getCoderRegistry().getCoder(Event.class))
+            .advanceWatermarkTo(base)
+            .addElements(
+                // Start of first window
+                TimestampedValue.of(
+                    Event.create(0, "id-1", "a"), base.plus(Duration.standardSeconds(1))),
+                TimestampedValue.of(
+                    Event.create(1, "id-1", "b"), base.plus(Duration.standardSeconds(2))),
+                TimestampedValue.of(
+                    Event.create(0, "id-2", "x"), base.plus(Duration.standardSeconds(1))),
+                TimestampedValue.of(
+                    Event.create(1, "id-2", "y"), base.plus(Duration.standardSeconds(2))),
+                TimestampedValue.of(
+                    Event.create(2, "id-2", "z"), base.plus(Duration.standardSeconds(2))),
+
+                // Start of second window. Numbering must start with 0 again.
+                TimestampedValue.of(
+                    Event.create(0, "id-1", "c"), base.plus(Duration.standardSeconds(10))),
+                TimestampedValue.of(
+                    Event.create(1, "id-1", "d"), base.plus(Duration.standardSeconds(11))))
+            .advanceProcessingTime(Duration.standardMinutes(15))
+            .advanceWatermarkToInfinity();
+
+    Pipeline pipeline = streamingPipeline;
+
+    PCollection<Event> rawInput = pipeline.apply("Create Streaming Events", values);
+    PCollection<KV<String, KV<Long, String>>> input =
+        rawInput.apply("To KV", ParDo.of(new MapEventsToKV()));
+
+    input = input.apply("Window input", Window.into(FixedWindows.of(Duration.standardSeconds(5))));
+
+    StringBufferOrderedProcessingWithGlobalSequenceHandler handler =
+        new StringBufferOrderedProcessingWithGlobalSequenceHandler(
+            EMISSION_FREQUENCY_ON_EVERY_ELEMENT, INITIAL_SEQUENCE_OF_0);
+    handler.setMaxOutputElementsPerBundle(LARGE_MAX_RESULTS_PER_OUTPUT);
+    handler.setStatusUpdateFrequency(null);
+    handler.setProduceStatusUpdateOnEveryEvent(false);
+
+    OrderedEventProcessor<String, String, String, StringBuilderState> orderedEventProcessor =
+        OrderedEventProcessor.create(handler);
+
+    OrderedEventProcessorResult<String, String, String> processingResult =
+        input.apply("Process Events", orderedEventProcessor);
+
+    IntervalWindow window1 = new IntervalWindow(base, base.plus(Duration.standardSeconds(5)));
+    PAssert.that("Output matches in window 1", processingResult.output())
+        .inWindow(window1)
+        .containsInAnyOrder(
+            KV.of("id-1", "a"),
+            KV.of("id-1", "ab"),
+            KV.of("id-2", "x"),
+            KV.of("id-2", "xy"),
+            KV.of("id-2", "xyz"));
+
+    IntervalWindow window2 =
+        new IntervalWindow(
+            base.plus(Duration.standardSeconds(10)), base.plus(Duration.standardSeconds(15)));
+    PAssert.that("Output matches in window 2", processingResult.output())
+        .inWindow(window2)
+        .containsInAnyOrder(KV.of("id-1", "c"), KV.of("id-1", "cd"));
+
+    // TODO: can we make the status assertions work?
+    //    PAssert.that("Statuses match in window 1", processingResult.processingStatuses())
+    //        .inWindow(window1)
+    //        .containsInAnyOrder(
+    ////            KV.of("id-1", OrderedProcessingStatus.create(0L, 0, null, null, 1, 1, 0,
+    // false)),
+    //            KV.of("id-1", OrderedProcessingStatus.create(1L, 0, null, null, 2, 2, 0, false)),
+    ////            KV.of("id-2", OrderedProcessingStatus.create(0L, 0, null, null, 1, 1, 0,
+    // false)),
+    ////            KV.of("id-2", OrderedProcessingStatus.create(1L, 0, null, null, 2, 2, 0,
+    // false)),
+    //            KV.of("id-2", OrderedProcessingStatus.create(2L, 0, null, null, 3, 3, 0, false))
+    //        );
+
+    //    PAssert.that("Statuses match in window 2", processingResult.processingStatuses())
+    //        .inWindow(window2)
+    //        .containsInAnyOrder(
+    //            KV.of("id-1", OrderedProcessingStatus.create(0L, 0, null, null, 1, 1, 0, false)),
+    //            KV.of("id-1", OrderedProcessingStatus.create(1L, 0, null, null, 2, 2, 0, false)));
+
+    PAssert.that("Unprocessed events match", processingResult.unprocessedEvents())
+        .containsInAnyOrder(NO_EXPECTED_DLQ_EVENTS);
+
+    pipeline.run();
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorTest.java b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorPerKeySequenceTest.java
similarity index 71%
rename from sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorTest.java
rename to sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorPerKeySequenceTest.java
index 6a24021ad667d..6909a3bb992c1 100644
--- a/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorTest.java
+++ b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorPerKeySequenceTest.java
@@ -20,82 +20,24 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.List;
-import java.util.Set;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.coders.CannotProvideCoderException;
 import org.apache.beam.sdk.extensions.ordered.UnprocessedEvent.Reason;
 import org.apache.beam.sdk.testing.PAssert;
-import org.apache.beam.sdk.testing.SerializableMatcher;
-import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.testing.TestStream;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.Reshuffle;
-import org.apache.beam.sdk.transforms.windowing.AfterWatermark;
 import org.apache.beam.sdk.transforms.windowing.FixedWindows;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
 import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
-import org.apache.beam.sdk.transforms.windowing.Repeatedly;
 import org.apache.beam.sdk.transforms.windowing.Window;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.TimestampedValue;
-import org.checkerframework.checker.initialization.qual.Initialized;
-import org.checkerframework.checker.nullness.qual.NonNull;
-import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
-import org.hamcrest.BaseMatcher;
-import org.hamcrest.Description;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
-import org.junit.Rule;
 import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
 
-/**
- * Ordered Processing tests use the same testing scenario. Events are sent in or out of sequence.
- * Each event is a string for a particular key. The output is a concatenation of all strings.
- */
-@RunWith(JUnit4.class)
-public class OrderedEventProcessorTest {
-
-  public static final boolean LAST_EVENT_RECEIVED = true;
-  public static final int EMISSION_FREQUENCY_ON_EVERY_ELEMENT = 1;
-  public static final int INITIAL_SEQUENCE_OF_0 = 0;
-  public static final boolean DONT_PRODUCE_STATUS_ON_EVERY_EVENT = false;
-  public static final int LARGE_MAX_RESULTS_PER_OUTPUT = 1000;
-  public static final int EMISSION_FREQUENCY_ON_EVERY_OTHER_EVENT = 2;
-  public static final boolean PRODUCE_STATUS_ON_EVERY_EVENT = true;
-  public static final boolean STREAMING = true;
-  public static final boolean BATCH = false;
-  public static final Set<KV<String, KV<Long, UnprocessedEvent<String>>>> NO_EXPECTED_DLQ_EVENTS =
-      Collections.emptySet();
-  @Rule public final transient TestPipeline streamingPipeline = TestPipeline.create();
-  @Rule public final transient TestPipeline batchPipeline = TestPipeline.create();
-
-  static class MapEventsToKV extends DoFn<Event, KV<String, KV<Long, String>>> {
-
-    @ProcessElement
-    public void convert(
-        @Element Event event, OutputReceiver<KV<String, KV<Long, String>>> outputReceiver) {
-      outputReceiver.output(KV.of(event.getKey(), KV.of(event.getSequence(), event.getValue())));
-    }
-  }
-
-  static class MapStringBufferStateToString
-      extends DoFn<KV<String, StringBuilderState>, KV<String, String>> {
-
-    @ProcessElement
-    public void map(
-        @Element KV<String, StringBuilderState> element,
-        OutputReceiver<KV<String, String>> outputReceiver) {
-      outputReceiver.output(KV.of(element.getKey(), element.getValue().toString()));
-    }
-  }
+public class OrderedEventProcessorPerKeySequenceTest extends OrderedEventProcessorTestBase {
 
   @Test
   public void testPerfectOrderingProcessing() throws CannotProvideCoderException {
@@ -142,7 +84,7 @@ public void testPerfectOrderingProcessing() throws CannotProvideCoderException {
     expectedOutput.add(KV.of("id-2", "a"));
     expectedOutput.add(KV.of("id-2", "ab"));
 
-    testProcessing(
+    testPerKeySequenceProcessing(
         events,
         expectedStatuses,
         expectedOutput,
@@ -203,7 +145,7 @@ public void testOutOfSequenceProcessing() throws CannotProvideCoderException {
     expectedOutput.add(KV.of("id-2", "abcd"));
     expectedOutput.add(KV.of("id-2", "abcde"));
 
-    testProcessing(
+    testPerKeySequenceProcessing(
         events,
         expectedStatuses,
         expectedOutput,
@@ -235,7 +177,7 @@ public void testUnfinishedProcessing() throws CannotProvideCoderException {
     expectedOutput.add(KV.of("id-2", "a"));
     expectedOutput.add(KV.of("id-2", "ab"));
 
-    testProcessing(events, expectedStatuses, expectedOutput, 1, 0, 1000, false);
+    testPerKeySequenceProcessing(events, expectedStatuses, expectedOutput, 1, 0, 1000, false);
   }
 
   @Test
@@ -275,7 +217,7 @@ public void testHandlingOfDuplicateSequences() throws CannotProvideCoderExceptio
     duplicates.add(KV.of("id-1", KV.of(1L, UnprocessedEvent.create("b", Reason.duplicate))));
     duplicates.add(KV.of("id-1", KV.of(3L, UnprocessedEvent.create("d", Reason.duplicate))));
 
-    testProcessing(
+    testPerKeySequenceProcessing(
         events,
         expectedStatuses,
         expectedOutput,
@@ -311,7 +253,7 @@ public void testHandlingOfCheckedExceptions() throws CannotProvideCoderException
                 2L,
                 UnprocessedEvent.create(StringBuilderState.BAD_VALUE, Reason.exception_thrown))));
 
-    testProcessing(
+    testPerKeySequenceProcessing(
         events,
         expectedStatuses,
         expectedOutput,
@@ -346,7 +288,7 @@ public void testProcessingWithEveryOtherResultEmission() throws CannotProvideCod
     //  Skipped        KV.of("id-1", "abcd"),
     expectedOutput.add(KV.of("id-2", "a"));
     //  Skipped        KV.of("id-2", "ab")
-    testProcessing(
+    testPerKeySequenceProcessing(
         events,
         expectedStatuses,
         expectedOutput,
@@ -428,7 +370,7 @@ public void testLargeBufferedOutputInTimer() throws CannotProvideCoderException
                 0,
                 false)));
 
-    testProcessing(
+    testPerKeySequenceProcessing(
         events.toArray(new Event[events.size()]),
         expectedStatuses,
         expectedOutput,
@@ -523,7 +465,7 @@ public void testSequenceGapProcessingInBufferedOutput() throws CannotProvideCode
             OrderedProcessingStatus.create(
                 10L, 0, null, null, numberOfReceivedEvents, 10L, 0, false)));
 
-    testProcessing(
+    testPerKeySequenceProcessing(
         events.toArray(new Event[events.size()]),
         expectedStatuses,
         expectedOutput,
@@ -558,7 +500,7 @@ public void testHandlingOfMaxSequenceNumber() throws CannotProvideCoderException
                 Long.MAX_VALUE,
                 UnprocessedEvent.create("c", Reason.sequence_id_outside_valid_range))));
 
-    testProcessing(
+    testPerKeySequenceProcessing(
         events,
         expectedStatuses,
         expectedOutput,
@@ -589,7 +531,7 @@ public void testProcessingOfTheLastInput() throws CannotProvideCoderException {
     expectedOutput.add(KV.of("id-1", "ab"));
     expectedOutput.add(KV.of("id-1", "ab" + StringEventExaminer.LAST_INPUT));
 
-    testProcessing(
+    testPerKeySequenceProcessing(
         events,
         expectedStatuses,
         expectedOutput,
@@ -599,6 +541,65 @@ public void testProcessingOfTheLastInput() throws CannotProvideCoderException {
         DONT_PRODUCE_STATUS_ON_EVERY_EVENT);
   }
 
+  protected void testPerKeySequenceProcessing(
+      Event[] events,
+      Collection<KV<String, OrderedProcessingStatus>> expectedStatuses,
+      Collection<KV<String, String>> expectedOutput,
+      int emissionFrequency,
+      long initialSequence,
+      int maxResultsPerOutput,
+      boolean produceStatusOnEveryEvent)
+      throws CannotProvideCoderException {
+    testPerKeySequenceProcessing(
+        events,
+        expectedStatuses,
+        expectedOutput,
+        NO_EXPECTED_DLQ_EVENTS,
+        emissionFrequency,
+        initialSequence,
+        maxResultsPerOutput,
+        produceStatusOnEveryEvent);
+  }
+
+  protected void testPerKeySequenceProcessing(
+      Event[] events,
+      Collection<KV<String, OrderedProcessingStatus>> expectedStatuses,
+      Collection<KV<String, String>> expectedOutput,
+      Collection<KV<String, KV<Long, UnprocessedEvent<String>>>> expectedUnprocessedEvents,
+      int emissionFrequency,
+      long initialSequence,
+      int maxResultsPerOutput,
+      boolean produceStatusOnEveryEvent)
+      throws CannotProvideCoderException {
+    // Test a streaming pipeline
+    doTest(
+        events,
+        expectedStatuses,
+        expectedOutput,
+        expectedUnprocessedEvents,
+        emissionFrequency,
+        initialSequence,
+        maxResultsPerOutput,
+        produceStatusOnEveryEvent,
+        STREAMING,
+        false,
+        ContiguousSequenceRange.EMPTY);
+
+    // Test a batch pipeline
+    doTest(
+        events,
+        expectedStatuses,
+        expectedOutput,
+        expectedUnprocessedEvents,
+        emissionFrequency,
+        initialSequence,
+        maxResultsPerOutput,
+        produceStatusOnEveryEvent,
+        BATCH,
+        false,
+        ContiguousSequenceRange.EMPTY);
+  }
+
   @Test
   public void testWindowedProcessing() throws CannotProvideCoderException {
 
@@ -684,223 +685,4 @@ public void testWindowedProcessing() throws CannotProvideCoderException {
 
     pipeline.run();
   }
-
-  private void testProcessing(
-      Event[] events,
-      Collection<KV<String, OrderedProcessingStatus>> expectedStatuses,
-      Collection<KV<String, String>> expectedOutput,
-      int emissionFrequency,
-      long initialSequence,
-      int maxResultsPerOutput,
-      boolean produceStatusOnEveryEvent)
-      throws CannotProvideCoderException {
-    testProcessing(
-        events,
-        expectedStatuses,
-        expectedOutput,
-        NO_EXPECTED_DLQ_EVENTS,
-        emissionFrequency,
-        initialSequence,
-        maxResultsPerOutput,
-        produceStatusOnEveryEvent);
-  }
-
-  private void testProcessing(
-      Event[] events,
-      Collection<KV<String, OrderedProcessingStatus>> expectedStatuses,
-      Collection<KV<String, String>> expectedOutput,
-      Collection<KV<String, KV<Long, UnprocessedEvent<String>>>> expectedUnprocessedEvents,
-      int emissionFrequency,
-      long initialSequence,
-      int maxResultsPerOutput,
-      boolean produceStatusOnEveryEvent)
-      throws CannotProvideCoderException {
-    doTest(
-        events,
-        expectedStatuses,
-        expectedOutput,
-        expectedUnprocessedEvents,
-        emissionFrequency,
-        initialSequence,
-        maxResultsPerOutput,
-        produceStatusOnEveryEvent,
-        STREAMING);
-    doTest(
-        events,
-        expectedStatuses,
-        expectedOutput,
-        expectedUnprocessedEvents,
-        emissionFrequency,
-        initialSequence,
-        maxResultsPerOutput,
-        produceStatusOnEveryEvent,
-        BATCH);
-  }
-
-  /**
-   * The majority of the tests use this method. Testing is done in the global window.
-   *
-   * @param events
-   * @param expectedStatuses
-   * @param expectedOutput
-   * @param expectedUnprocessedEvents
-   * @param emissionFrequency
-   * @param initialSequence
-   * @param maxResultsPerOutput
-   * @param produceStatusOnEveryEvent
-   * @param streaming
-   * @throws @UnknownKeyFor @NonNull @Initialized CannotProvideCoderException
-   */
-  private void doTest(
-      Event[] events,
-      Collection<KV<String, OrderedProcessingStatus>> expectedStatuses,
-      Collection<KV<String, String>> expectedOutput,
-      Collection<KV<String, KV<Long, UnprocessedEvent<String>>>> expectedUnprocessedEvents,
-      int emissionFrequency,
-      long initialSequence,
-      int maxResultsPerOutput,
-      boolean produceStatusOnEveryEvent,
-      boolean streaming)
-      throws @UnknownKeyFor @NonNull @Initialized CannotProvideCoderException {
-
-    Pipeline pipeline = streaming ? streamingPipeline : batchPipeline;
-
-    PCollection<Event> rawInput =
-        streaming
-            ? createStreamingPCollection(pipeline, events)
-            : createBatchPCollection(pipeline, events);
-    PCollection<KV<String, KV<Long, String>>> input =
-        rawInput.apply("To KV", ParDo.of(new MapEventsToKV()));
-
-    StringBufferOrderedProcessingHandler handler =
-        new StringBufferOrderedProcessingHandler(emissionFrequency, initialSequence);
-    handler.setMaxOutputElementsPerBundle(maxResultsPerOutput);
-    if (produceStatusOnEveryEvent) {
-      handler.setProduceStatusUpdateOnEveryEvent(true);
-      // This disables status updates emitted on timers.
-      handler.setStatusUpdateFrequency(null);
-    } else {
-      handler.setStatusUpdateFrequency(
-          streaming ? Duration.standardMinutes(5) : Duration.standardSeconds(1));
-    }
-    OrderedEventProcessor<String, String, String, StringBuilderState> orderedEventProcessor =
-        OrderedEventProcessor.create(handler);
-
-    OrderedEventProcessorResult<String, String, String> processingResult =
-        input.apply("Process Events", orderedEventProcessor);
-
-    PAssert.that("Output matches", processingResult.output()).containsInAnyOrder(expectedOutput);
-
-    if (streaming) {
-      // Only in streaming the events will arrive in a pre-determined order and the statuses
-      // will be deterministic. In batch pipelines events can be processed in any order,
-      // so we skip status verification and rely on the output and unprocessed event matches.
-      PAssert.that("Statuses match", processingResult.processingStatuses())
-          .containsInAnyOrder(expectedStatuses);
-    }
-
-    // This is a temporary workaround until PAssert changes.
-    boolean unprocessedEventsHaveExceptionStackTrace = false;
-    for (KV<String, KV<Long, UnprocessedEvent<String>>> event : expectedUnprocessedEvents) {
-      if (event.getValue().getValue().getReason() == Reason.exception_thrown) {
-        unprocessedEventsHaveExceptionStackTrace = true;
-        break;
-      }
-    }
-
-    if (unprocessedEventsHaveExceptionStackTrace) {
-      PAssert.thatSingleton(
-              "Unprocessed event count",
-              processingResult
-                  .unprocessedEvents()
-                  .apply(
-                      "Window",
-                      Window.<KV<String, KV<Long, UnprocessedEvent<String>>>>into(
-                              new GlobalWindows())
-                          .triggering(Repeatedly.forever(AfterWatermark.pastEndOfWindow()))
-                          .discardingFiredPanes())
-                  .apply("Count", Count.globally()))
-          .isEqualTo((long) expectedUnprocessedEvents.size());
-    } else {
-      PAssert.that("Unprocessed events match", processingResult.unprocessedEvents())
-          .containsInAnyOrder(expectedUnprocessedEvents);
-    }
-    pipeline.run();
-  }
-
-  private @UnknownKeyFor @NonNull @Initialized PCollection<Event> createBatchPCollection(
-      Pipeline pipeline, Event[] events) {
-    return pipeline
-        .apply("Create Batch Events", Create.of(Arrays.asList(events)))
-        .apply("Reshuffle", Reshuffle.viaRandomKey());
-  }
-
-  private @UnknownKeyFor @NonNull @Initialized PCollection<Event> createStreamingPCollection(
-      Pipeline pipeline, Event[] events)
-      throws @UnknownKeyFor @NonNull @Initialized CannotProvideCoderException {
-    Instant now = Instant.now().minus(Duration.standardMinutes(20));
-    TestStream.Builder<Event> messageFlow =
-        TestStream.create(pipeline.getCoderRegistry().getCoder(Event.class))
-            .advanceWatermarkTo(now);
-
-    int delayInMilliseconds = 0;
-    for (Event e : events) {
-      messageFlow =
-          messageFlow
-              .advanceWatermarkTo(now.plus(Duration.millis(++delayInMilliseconds)))
-              .addElements(e);
-    }
-
-    // Needed to force the processing time based timers.
-    messageFlow = messageFlow.advanceProcessingTime(Duration.standardMinutes(15));
-    return pipeline.apply("Create Streaming Events", messageFlow.advanceWatermarkToInfinity());
-  }
-
-  /**
-   * Unprocessed event's explanation contains stacktraces which makes tests very brittle because it
-   * requires hardcoding the line numbers in the code. We use this matcher to only compare on the
-   * first line of the explanation.
-   */
-  static class UnprocessedEventMatcher
-      extends BaseMatcher<KV<String, KV<Long, UnprocessedEvent<String>>>>
-      implements SerializableMatcher<KV<String, KV<Long, UnprocessedEvent<String>>>> {
-
-    private KV<String, KV<Long, UnprocessedEvent<String>>> element;
-
-    public UnprocessedEventMatcher(KV<String, KV<Long, UnprocessedEvent<String>>> element) {
-      this.element = element;
-    }
-
-    @Override
-    public boolean matches(Object actual) {
-      KV<String, KV<Long, UnprocessedEvent<String>>> toMatch =
-          (KV<String, KV<Long, UnprocessedEvent<String>>>) actual;
-
-      UnprocessedEvent<String> originalEvent = element.getValue().getValue();
-      UnprocessedEvent<String> eventToMatch = toMatch.getValue().getValue();
-
-      return element.getKey().equals(toMatch.getKey())
-          && element.getValue().getKey().equals(toMatch.getValue().getKey())
-          && originalEvent.getEvent().equals(eventToMatch.getEvent())
-          && originalEvent.getReason() == eventToMatch.getReason()
-          && normalizeExplanation(originalEvent.getExplanation())
-              .equals(normalizeExplanation(eventToMatch.getExplanation()));
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      description.appendText("Just some text...");
-    }
-
-    static String normalizeExplanation(String value) {
-      if (value == null) {
-        return "";
-      }
-      String firstLine = value.split("\n", 1)[0];
-      if (firstLine.contains("Exception")) {
-        return firstLine;
-      }
-      return value;
-    }
-  }
 }
diff --git a/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorTestBase.java b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorTestBase.java
new file mode 100644
index 0000000000000..fd651b919df1b
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/OrderedEventProcessorTestBase.java
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Set;
+import javax.annotation.Nullable;
+import org.apache.beam.runners.dataflow.TestDataflowPipelineOptions;
+import org.apache.beam.runners.dataflow.TestDataflowRunner;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.extensions.ordered.StringBufferOrderedProcessingHandler.StringBufferOrderedProcessingWithGlobalSequenceHandler;
+import org.apache.beam.sdk.extensions.ordered.UnprocessedEvent.Reason;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.SerializableMatcher;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.testing.TestStream;
+import org.apache.beam.sdk.transforms.Count;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.Reshuffle;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.windowing.AfterWatermark;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
+import org.apache.beam.sdk.transforms.windowing.Repeatedly;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollection.IsBounded;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
+import org.hamcrest.BaseMatcher;
+import org.hamcrest.Description;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Rule;
+
+/**
+ * Ordered Processing tests use the same testing scenario. Events are sent in or out of sequence.
+ * Each event is a string for a particular key. The output is a concatenation of all strings.
+ */
+public class OrderedEventProcessorTestBase {
+
+  public static final boolean LAST_EVENT_RECEIVED = true;
+  public static final int EMISSION_FREQUENCY_ON_EVERY_ELEMENT = 1;
+  public static final int INITIAL_SEQUENCE_OF_0 = 0;
+  public static final boolean DONT_PRODUCE_STATUS_ON_EVERY_EVENT = false;
+  public static final int LARGE_MAX_RESULTS_PER_OUTPUT = 1000;
+  public static final int EMISSION_FREQUENCY_ON_EVERY_OTHER_EVENT = 2;
+  public static final boolean PRODUCE_STATUS_ON_EVERY_EVENT = true;
+  public static final boolean STREAMING = true;
+  public static final boolean BATCH = false;
+  public static final Set<KV<String, KV<Long, UnprocessedEvent<String>>>> NO_EXPECTED_DLQ_EVENTS =
+      Collections.emptySet();
+  @Rule public final transient TestPipeline streamingPipeline = TestPipeline.create();
+  @Rule public final transient TestPipeline batchPipeline = TestPipeline.create();
+
+  protected boolean runTestsOnDataflowRunner() {
+    return Boolean.getBoolean("run-tests-on-dataflow");
+  }
+
+  protected String getSystemProperty(String name) {
+    String property = System.getProperty(name);
+    if (property == null) {
+      throw new IllegalStateException("Unable to find system property '" + name + "'");
+    }
+    return property;
+  }
+
+  static class MapEventsToKV extends DoFn<Event, KV<String, KV<Long, String>>> {
+
+    @ProcessElement
+    public void convert(
+        @Element Event event, OutputReceiver<KV<String, KV<Long, String>>> outputReceiver) {
+      outputReceiver.output(KV.of(event.getKey(), KV.of(event.getSequence(), event.getValue())));
+    }
+  }
+
+  static class MapStringBufferStateToString
+      extends DoFn<KV<String, StringBuilderState>, KV<String, String>> {
+
+    @ProcessElement
+    public void map(
+        @Element KV<String, StringBuilderState> element,
+        OutputReceiver<KV<String, String>> outputReceiver) {
+      outputReceiver.output(KV.of(element.getKey(), element.getValue().toString()));
+    }
+  }
+
+  /**
+   * The majority of the tests use this method. Testing is done in the global window.
+   *
+   * @throws @UnknownKeyFor @NonNull @Initialized CannotProvideCoderException
+   */
+  protected void doTest(
+      Event[] events,
+      @Nullable Collection<KV<String, OrderedProcessingStatus>> expectedStatuses,
+      Collection<KV<String, String>> expectedOutput,
+      Collection<KV<String, KV<Long, UnprocessedEvent<String>>>> expectedUnprocessedEvents,
+      int emissionFrequency,
+      long initialSequence,
+      int maxResultsPerOutput,
+      boolean produceStatusOnEveryEvent,
+      boolean streaming,
+      boolean isGlobalSequence,
+      @Nullable ContiguousSequenceRange expectedLastCompletedSequence)
+      throws @UnknownKeyFor @NonNull @Initialized CannotProvideCoderException {
+
+    Pipeline pipeline = streaming ? streamingPipeline : batchPipeline;
+    if (runTestsOnDataflowRunner()) {
+      pipeline.getOptions().setRunner(TestDataflowRunner.class);
+      TestDataflowPipelineOptions options =
+          pipeline.getOptions().as(TestDataflowPipelineOptions.class);
+      options.setExperiments(Arrays.asList("disable_runner_v2"));
+      options.setTempRoot("gs://" + getSystemProperty("temp_dataflow_bucket"));
+    }
+    PCollection<Event> rawInput =
+        streaming
+            ? createStreamingPCollection(pipeline, events)
+            : createBatchPCollection(pipeline, events);
+    PCollection<KV<String, KV<Long, String>>> input =
+        rawInput.apply("To KV", ParDo.of(new MapEventsToKV()));
+
+    OrderedProcessingHandler<String, String, StringBuilderState, String> handler =
+        isGlobalSequence
+            ? new StringBufferOrderedProcessingWithGlobalSequenceHandler(
+                emissionFrequency, initialSequence)
+            : new StringBufferOrderedProcessingHandler(emissionFrequency, initialSequence);
+    handler.setMaxOutputElementsPerBundle(maxResultsPerOutput);
+    if (produceStatusOnEveryEvent) {
+      handler.setProduceStatusUpdateOnEveryEvent(true);
+      // This disables status updates emitted on timers.
+      handler.setStatusUpdateFrequency(null);
+    } else {
+      handler.setStatusUpdateFrequency(
+          streaming ? Duration.standardMinutes(5) : Duration.standardSeconds(1));
+    }
+
+    OrderedEventProcessor<String, String, String, StringBuilderState> orderedEventProcessor =
+        OrderedEventProcessor.create(handler);
+
+    OrderedEventProcessorResult<String, String, String> processingResult =
+        input.apply("Process Events", orderedEventProcessor);
+
+    PAssert.that("Output matches", processingResult.output()).containsInAnyOrder(expectedOutput);
+
+    if (streaming && expectedStatuses != null) {
+      // Only in a streaming pipeline the events will arrive in a pre-determined order and the
+      // statuses
+      // will be deterministic. In batch pipelines events can be processed in any order,
+      // so we skip status verification and rely on the output and unprocessed event matches.
+      PAssert.that("Statuses match", processingResult.processingStatuses())
+          .containsInAnyOrder(expectedStatuses);
+    }
+
+    // This is a temporary workaround until PAssert changes.
+    boolean unprocessedEventsHaveExceptionStackTrace = false;
+    for (KV<String, KV<Long, UnprocessedEvent<String>>> event : expectedUnprocessedEvents) {
+      if (event.getValue().getValue().getReason() == Reason.exception_thrown) {
+        unprocessedEventsHaveExceptionStackTrace = true;
+        break;
+      }
+    }
+
+    if (unprocessedEventsHaveExceptionStackTrace) {
+      PAssert.thatSingleton(
+              "Unprocessed event count",
+              processingResult
+                  .unprocessedEvents()
+                  .apply(
+                      "Window",
+                      Window.<KV<String, KV<Long, UnprocessedEvent<String>>>>into(
+                              new GlobalWindows())
+                          .triggering(Repeatedly.forever(AfterWatermark.pastEndOfWindow()))
+                          .discardingFiredPanes())
+                  .apply("Count", Count.globally()))
+          .isEqualTo((long) expectedUnprocessedEvents.size());
+    } else {
+      PAssert.that("Unprocessed events match", processingResult.unprocessedEvents())
+          .containsInAnyOrder(expectedUnprocessedEvents);
+    }
+
+    if (expectedLastCompletedSequence != null && processingResult.latestContiguousRange() != null) {
+      PCollection<ContiguousSequenceRange> globalSequences =
+          rawInput.apply(
+              "Publish Global Sequences",
+              new GlobalSequenceRangePublisher(
+                  processingResult.latestContiguousRange(),
+                  handler.getKeyCoder(pipeline, input.getCoder()),
+                  handler.getEventCoder(pipeline, input.getCoder())));
+      PAssert.that("CompletedSequenceRange verification", globalSequences)
+          .satisfies(new LastExpectedGlobalSequenceRangeMatcher(expectedLastCompletedSequence));
+    }
+    pipeline.run();
+  }
+
+  static class LastExpectedGlobalSequenceRangeMatcher
+      implements SerializableFunction<Iterable<ContiguousSequenceRange>, Void> {
+
+    private final long expectedStart;
+    private final long expectedEnd;
+
+    LastExpectedGlobalSequenceRangeMatcher(ContiguousSequenceRange expected) {
+      this.expectedStart = expected.getStart();
+      this.expectedEnd = expected.getEnd();
+    }
+
+    @Override
+    public Void apply(Iterable<ContiguousSequenceRange> input) {
+      StringBuilder listOfRanges = new StringBuilder("[");
+      Iterator<ContiguousSequenceRange> iterator = input.iterator();
+      ContiguousSequenceRange lastRange = null;
+      while (iterator.hasNext()) {
+        lastRange = iterator.next();
+
+        if (listOfRanges.length() > 1) {
+          listOfRanges.append(", ");
+        }
+        listOfRanges.append(lastRange);
+      }
+      listOfRanges.append(']');
+      boolean foundExpectedRange =
+          lastRange != null
+              && lastRange.getStart() == expectedStart
+              && lastRange.getEnd() == expectedEnd;
+
+      assertThat(
+          "Expected range not found: ["
+              + expectedStart
+              + '-'
+              + expectedEnd
+              + "], received ranges: "
+              + listOfRanges,
+          foundExpectedRange);
+      return null;
+    }
+  }
+
+  private @UnknownKeyFor @NonNull @Initialized PCollection<Event> createBatchPCollection(
+      Pipeline pipeline, Event[] events) {
+    return pipeline
+        .apply("Create Batch Events", Create.of(Arrays.asList(events)))
+        .apply("Reshuffle", Reshuffle.viaRandomKey());
+  }
+
+  private @UnknownKeyFor @NonNull @Initialized PCollection<Event> createStreamingPCollection(
+      Pipeline pipeline, Event[] events)
+      throws @UnknownKeyFor @NonNull @Initialized CannotProvideCoderException {
+    Instant now = Instant.now().minus(Duration.standardMinutes(20));
+    TestStream.Builder<Event> messageFlow =
+        TestStream.create(pipeline.getCoderRegistry().getCoder(Event.class))
+            .advanceWatermarkTo(now);
+
+    int delayInMilliseconds = 0;
+    for (Event e : events) {
+      messageFlow =
+          messageFlow
+              .advanceWatermarkTo(now.plus(Duration.millis(++delayInMilliseconds)))
+              .addElements(e);
+    }
+
+    // Needed to force the processing time based timers.
+    messageFlow = messageFlow.advanceProcessingTime(Duration.standardMinutes(15));
+    return pipeline.apply("Create Streaming Events", messageFlow.advanceWatermarkToInfinity());
+  }
+
+  /**
+   * Unprocessed event's explanation contains stacktraces which makes tests very brittle because it
+   * requires hardcoding the line numbers in the code. We use this matcher to only compare on the
+   * first line of the explanation.
+   */
+  static class UnprocessedEventMatcher
+      extends BaseMatcher<KV<String, KV<Long, UnprocessedEvent<String>>>>
+      implements SerializableMatcher<KV<String, KV<Long, UnprocessedEvent<String>>>> {
+
+    private KV<String, KV<Long, UnprocessedEvent<String>>> element;
+
+    public UnprocessedEventMatcher(KV<String, KV<Long, UnprocessedEvent<String>>> element) {
+      this.element = element;
+    }
+
+    @Override
+    public boolean matches(Object actual) {
+      KV<String, KV<Long, UnprocessedEvent<String>>> toMatch =
+          (KV<String, KV<Long, UnprocessedEvent<String>>>) actual;
+
+      UnprocessedEvent<String> originalEvent = element.getValue().getValue();
+      UnprocessedEvent<String> eventToMatch = toMatch.getValue().getValue();
+
+      return element.getKey().equals(toMatch.getKey())
+          && element.getValue().getKey().equals(toMatch.getValue().getKey())
+          && originalEvent.getEvent().equals(eventToMatch.getEvent())
+          && originalEvent.getReason() == eventToMatch.getReason()
+          && normalizeExplanation(originalEvent.getExplanation())
+              .equals(normalizeExplanation(eventToMatch.getExplanation()));
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description.appendText("Just some text...");
+    }
+
+    static String normalizeExplanation(String value) {
+      if (value == null) {
+        return "";
+      }
+      String firstLine = value.split("\n", 1)[0];
+      if (firstLine.contains("Exception")) {
+        return firstLine;
+      }
+      return value;
+    }
+  }
+
+  static class GlobalSequenceRangePublisher
+      extends PTransform<PCollection<Event>, PCollection<ContiguousSequenceRange>> {
+
+    private final PCollectionView<ContiguousSequenceRange> lastCompletedSequenceRangeView;
+    private final Coder<String> keyCoder;
+    private final Coder<String> eventCoder;
+
+    public GlobalSequenceRangePublisher(
+        PCollectionView<ContiguousSequenceRange> latestCompletedSequenceRange,
+        Coder<String> keyCoder,
+        Coder<String> eventCoder) {
+      this.lastCompletedSequenceRangeView = latestCompletedSequenceRange;
+      this.keyCoder = keyCoder;
+      this.eventCoder = eventCoder;
+    }
+
+    @Override
+    public PCollection<ContiguousSequenceRange> expand(PCollection<Event> input) {
+      PCollection<KV<String, KV<Long, String>>> events =
+          input
+              // In production pipelines the global sequence will typically be obtained
+              // by using GenerateSequence. But GenerateSequence doesn't work well with TestStream,
+              // That's why we use the input events here.
+              //              .apply("Create Ticker",
+              //                  GenerateSequence.from(0).to(2).withRate(1,
+              // Duration.standardSeconds(5)))
+              .apply("To KV", ParDo.of(new MapEventsToKV()));
+      if (input.isBounded() == IsBounded.BOUNDED) {
+        return events.apply(
+            "Emit SideInput",
+            ParDo.of(new SideInputEmitter())
+                .withSideInput("lastCompletedSequence", lastCompletedSequenceRangeView));
+      } else {
+        PCollection<KV<String, KV<Long, String>>> tickers =
+            events.apply(
+                "Create Tickers",
+                new PerKeyTickerGenerator<>(keyCoder, eventCoder, Duration.standardSeconds(1)));
+        return tickers.apply(
+            "Emit SideInput",
+            ParDo.of(new SideInputEmitter())
+                .withSideInput("lastCompletedSequence", lastCompletedSequenceRangeView));
+      }
+    }
+
+    static class SideInputEmitter
+        extends DoFn<KV<String, KV<Long, String>>, ContiguousSequenceRange> {
+
+      @ProcessElement
+      public void produceCompletedRange(
+          @SideInput("lastCompletedSequence") ContiguousSequenceRange sideInput,
+          OutputReceiver<ContiguousSequenceRange> outputReceiver) {
+        outputReceiver.output(sideInput);
+      }
+    }
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/StringBufferOrderedProcessingHandler.java b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/StringBufferOrderedProcessingHandler.java
index 72f3a3cf21b68..1da46c3262e4c 100644
--- a/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/StringBufferOrderedProcessingHandler.java
+++ b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/StringBufferOrderedProcessingHandler.java
@@ -27,6 +27,24 @@
 public class StringBufferOrderedProcessingHandler
     extends OrderedProcessingHandler<String, String, StringBuilderState, String> {
 
+  public static class StringBufferOrderedProcessingWithGlobalSequenceHandler
+      extends OrderedProcessingGlobalSequenceHandler<String, String, StringBuilderState, String> {
+
+    private final EventExaminer<String, StringBuilderState> eventExaminer;
+
+    public StringBufferOrderedProcessingWithGlobalSequenceHandler(
+        int emissionFrequency, long initialSequence) {
+      super(String.class, String.class, StringBuilderState.class, String.class);
+      this.eventExaminer = new StringEventExaminer(initialSequence, emissionFrequency);
+    }
+
+    @Override
+    @NonNull
+    public EventExaminer<String, StringBuilderState> getEventExaminer() {
+      return eventExaminer;
+    }
+  }
+
   private final EventExaminer<String, StringBuilderState> eventExaminer;
 
   public StringBufferOrderedProcessingHandler(int emissionFrequency, long initialSequence) {
diff --git a/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulatorCoderTest.java b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulatorCoderTest.java
new file mode 100644
index 0000000000000..0e5b0b7c819a5
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulatorCoderTest.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered.combiner;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import org.apache.beam.sdk.extensions.ordered.combiner.SequenceRangeAccumulator.SequenceRangeAccumulatorCoder;
+import org.joda.time.Instant;
+import org.junit.Test;
+
+public class SequenceRangeAccumulatorCoderTest {
+
+  private SequenceRangeAccumulatorCoder coder = SequenceRangeAccumulatorCoder.of();
+
+  @Test
+  public void testEncodingEmptyAccumulator() throws IOException {
+    SequenceRangeAccumulator empty = new SequenceRangeAccumulator();
+
+    doTestEncodingAndDecoding(empty);
+  }
+
+  @Test
+  public void testEncodingAccumulatorWithoutInitialSequence() throws IOException {
+    SequenceRangeAccumulator accumulator = new SequenceRangeAccumulator();
+    accumulator.add(1, Instant.now(), false);
+    accumulator.add(2, Instant.now(), false);
+    accumulator.add(3, Instant.now(), false);
+    accumulator.add(5, Instant.now(), false);
+    accumulator.add(6, Instant.now(), false);
+
+    doTestEncodingAndDecoding(accumulator);
+  }
+
+  @Test
+  public void testEncodingAccumulatorWithInitialSequence() throws IOException {
+    SequenceRangeAccumulator accumulator = new SequenceRangeAccumulator();
+    accumulator.add(1, Instant.now(), true);
+    accumulator.add(2, Instant.now(), false);
+    accumulator.add(3, Instant.now(), false);
+    accumulator.add(5, Instant.now(), false);
+    accumulator.add(6, Instant.now(), false);
+
+    doTestEncodingAndDecoding(accumulator);
+  }
+
+  private void doTestEncodingAndDecoding(SequenceRangeAccumulator value) throws IOException {
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+    coder.encode(value, output);
+
+    SequenceRangeAccumulator decoded = coder.decode(new ByteArrayInputStream(output.toByteArray()));
+    assertEquals("Accumulator", value, decoded);
+  }
+}
diff --git a/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulatorTest.java b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulatorTest.java
new file mode 100644
index 0000000000000..4082ce6de7585
--- /dev/null
+++ b/sdks/java/extensions/ordered/src/test/java/org/apache/beam/sdk/extensions/ordered/combiner/SequenceRangeAccumulatorTest.java
@@ -0,0 +1,400 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.ordered.combiner;
+
+import java.util.Arrays;
+import java.util.concurrent.atomic.AtomicLong;
+import org.apache.beam.sdk.extensions.ordered.ContiguousSequenceRange;
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class SequenceRangeAccumulatorTest {
+
+  // Atomic just in case tests are run in parallel
+  private static final AtomicLong currentTicker = new AtomicLong();
+
+  static Instant nextTimestamp() {
+    return Instant.ofEpochMilli(currentTicker.getAndIncrement());
+  }
+
+  static Instant eventTimestamp(Event[] events, long eventSequence) {
+    for (Event e : events) {
+      if (e.sequence == eventSequence) {
+        return e.timestamp;
+      }
+    }
+    throw new IllegalStateException("Unable to find event with sequence " + eventSequence);
+  }
+
+  static class Event {
+
+    long sequence;
+    Instant timestamp;
+    boolean initialEvent;
+
+    Event(long sequence, Instant ts) {
+      this(sequence, ts, false);
+    }
+
+    Event(long sequence, Instant ts, boolean initialEvent) {
+      this.sequence = sequence;
+      this.timestamp = ts;
+      this.initialEvent = initialEvent;
+    }
+  }
+
+  @Test
+  public void testSimpleAccumulation() {
+    Event[] events =
+        new Event[] {
+          new Event(1, nextTimestamp(), true),
+          new Event(2, nextTimestamp()),
+          new Event(3, nextTimestamp())
+        };
+
+    doTestAccumulation(events, ContiguousSequenceRange.of(1, 4, eventTimestamp(events, 3)), 1);
+  }
+
+  @Test
+  public void testReverseArrivalHandling() {
+    Event[] events =
+        new Event[] {
+          new Event(3, nextTimestamp()),
+          new Event(2, nextTimestamp()),
+          new Event(1, nextTimestamp(), true)
+        };
+
+    Instant timestampOfEventNumber1 = eventTimestamp(events, 1);
+    doTestAccumulation(events, ContiguousSequenceRange.of(1, 4, timestampOfEventNumber1), 1);
+  }
+
+  @Test
+  public void testPartialRangeAccumulation() {
+    Event[] events =
+        new Event[] {
+          new Event(1, nextTimestamp(), true),
+          new Event(2, nextTimestamp()),
+          new Event(3, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+          new Event(7, nextTimestamp()),
+        };
+
+    doTestAccumulation(events, ContiguousSequenceRange.of(1, 4, eventTimestamp(events, 3)), 3);
+  }
+
+  @Test
+  public void testMergingRangeAccumulation() {
+    Event[] events =
+        new Event[] {
+          new Event(1, nextTimestamp(), true),
+          new Event(2, nextTimestamp()),
+          new Event(3, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+          new Event(7, nextTimestamp()),
+          new Event(6, nextTimestamp()),
+        };
+
+    doTestAccumulation(events, ContiguousSequenceRange.of(1, 4, eventTimestamp(events, 3)), 2);
+  }
+
+  @Test
+  public void testNoStartEvent() {
+    Event[] events =
+        new Event[] {
+          new Event(2, nextTimestamp()),
+          new Event(3, nextTimestamp()),
+          new Event(1, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+        };
+
+    doTestAccumulation(events, ContiguousSequenceRange.EMPTY, 2);
+  }
+
+  @Test
+  public void testNoEventsAccumulation() {
+    Event[] events = new Event[] {};
+
+    doTestAccumulation(events, ContiguousSequenceRange.EMPTY, 0);
+  }
+
+  @Test
+  public void testRemovingRangesBelowInitialSequenceDuringAccumulation() {
+    Event[] events =
+        new Event[] {
+          // First range
+          new Event(2, nextTimestamp()),
+          new Event(3, nextTimestamp()),
+          new Event(1, nextTimestamp()),
+
+          // Second range
+          new Event(5, nextTimestamp()),
+          new Event(6, nextTimestamp()),
+
+          // This event should prune everything below
+          new Event(7, nextTimestamp(), true),
+        };
+
+    doTestAccumulation(events, ContiguousSequenceRange.of(7, 8, eventTimestamp(events, 7)), 1);
+  }
+
+  @Test
+  public void testRemovingElementsBelowInitialSequenceDuringAccumulation() {
+
+    Event[] events =
+        new Event[] {
+          // First range
+          new Event(2, nextTimestamp()),
+          new Event(3, nextTimestamp()),
+          new Event(1, nextTimestamp()),
+
+          // Second range
+          new Event(5, nextTimestamp()),
+          new Event(6, nextTimestamp()),
+          new Event(7, nextTimestamp()),
+          new Event(8, nextTimestamp()),
+
+          // This event should reduce the range.
+          new Event(7, nextTimestamp(), true),
+        };
+
+    Instant timestampOfTheLastEvent = events[events.length - 1].timestamp;
+    doTestAccumulation(events, ContiguousSequenceRange.of(7, 9, timestampOfTheLastEvent), 1);
+  }
+
+  private static void doTestAccumulation(
+      Event[] events, ContiguousSequenceRange expectedResult, int expectedNumberOfRanges) {
+    SequenceRangeAccumulator accumulator = new SequenceRangeAccumulator();
+    Arrays.stream(events).forEach(e -> accumulator.add(e.sequence, e.timestamp, e.initialEvent));
+
+    Assert.assertEquals(
+        "Accumulated results", expectedResult, accumulator.largestContinuousRange());
+
+    Assert.assertEquals("Number of ranges", expectedNumberOfRanges, accumulator.numberOfRanges());
+  }
+
+  @Test
+  public void testEmptyMerge() {
+    Event[] set1 = new Event[] {};
+    Event[] set2 = new Event[] {};
+
+    ContiguousSequenceRange expectedResult = ContiguousSequenceRange.EMPTY;
+    int expectedNumberOfRanges = 0;
+
+    doTestMerging(set1, set2, expectedResult, expectedNumberOfRanges);
+  }
+
+  @Test
+  public void testMergingNonEmptyWithEmpty() {
+    Event[] set1 =
+        new Event[] {
+          new Event(3, nextTimestamp()),
+          new Event(2, nextTimestamp()),
+          new Event(1, nextTimestamp(), true)
+        };
+    Event[] set2 = new Event[] {};
+
+    ContiguousSequenceRange expectedResult =
+        ContiguousSequenceRange.of(1, 4, eventTimestamp(set1, 1L));
+    int expectedNumberOfRanges = 1;
+
+    doTestMerging(set1, set2, expectedResult, expectedNumberOfRanges);
+  }
+
+  @Test
+  public void testMergingWithLowerNonAdjacentRange() {
+    Event[] set1 =
+        new Event[] {
+          new Event(1, nextTimestamp(), true), new Event(2, nextTimestamp()),
+        };
+    Event[] set2 =
+        new Event[] {
+          new Event(4, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+          new Event(6, nextTimestamp())
+        };
+
+    ContiguousSequenceRange expectedResult =
+        ContiguousSequenceRange.of(1, 3, eventTimestamp(set1, 2L));
+    int expectedNumberOfRanges = 2;
+
+    doTestMerging(set1, set2, expectedResult, expectedNumberOfRanges);
+  }
+
+  @Test
+  public void testMergingWithoutAnyInitialEvents() {
+    Event[] set1 =
+        new Event[] {
+          new Event(1, nextTimestamp()), new Event(2, nextTimestamp()),
+        };
+    Event[] set2 =
+        new Event[] {
+          new Event(4, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+          new Event(6, nextTimestamp())
+        };
+
+    ContiguousSequenceRange expectedResult = ContiguousSequenceRange.EMPTY;
+    int expectedNumberOfRanges = 2;
+
+    doTestMerging(set1, set2, expectedResult, expectedNumberOfRanges);
+  }
+
+  @Test
+  public void testMergingAdjacentRanges() {
+    Event[] set1 =
+        new Event[] {
+          new Event(1, nextTimestamp(), true), new Event(2, nextTimestamp()),
+        };
+    Event[] set2 =
+        new Event[] {
+          new Event(3, nextTimestamp()),
+          new Event(4, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+          new Event(6, nextTimestamp())
+        };
+
+    ContiguousSequenceRange expectedResult =
+        ContiguousSequenceRange.of(1, 7, eventTimestamp(set2, 6L));
+    int expectedNumberOfRanges = 1;
+
+    doTestMerging(set1, set2, expectedResult, expectedNumberOfRanges);
+  }
+
+  @Test
+  public void testPruningSequencesBelowInitial() {
+    Event[] set1 =
+        new Event[] {
+          new Event(1, nextTimestamp()), new Event(2, nextTimestamp()),
+        };
+    Event[] set2 =
+        new Event[] {
+          new Event(3, nextTimestamp(), true),
+          new Event(4, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+          new Event(6, nextTimestamp())
+        };
+
+    ContiguousSequenceRange expectedResult =
+        ContiguousSequenceRange.of(3, 7, eventTimestamp(set2, 6L));
+    int expectedNumberOfRanges = 1;
+
+    doTestMerging(set1, set2, expectedResult, expectedNumberOfRanges);
+  }
+
+  @Test
+  public void testDuplicateHandling() {
+    Event[] set1 =
+        new Event[] {
+          new Event(1, nextTimestamp(), true),
+          new Event(2, nextTimestamp()),
+          new Event(3, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+        };
+    Event[] set2 =
+        new Event[] {
+          new Event(3, nextTimestamp()),
+          new Event(4, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+          new Event(6, nextTimestamp())
+        };
+
+    ContiguousSequenceRange expectedResult =
+        ContiguousSequenceRange.of(1, 7, eventTimestamp(set2, 6L));
+    int expectedNumberOfRanges = 1;
+
+    doTestMerging(set1, set2, expectedResult, expectedNumberOfRanges);
+  }
+
+  @Test
+  public void testExceptionThrownIfThereAreDifferentInitialSequences() {
+    Event[] set1 =
+        new Event[] {
+          new Event(1, nextTimestamp(), true), new Event(2, nextTimestamp()),
+        };
+    Event[] set2 =
+        new Event[] {
+          new Event(3, nextTimestamp(), true),
+          new Event(4, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+          new Event(6, nextTimestamp())
+        };
+
+    try {
+      doTestMerging(set1, set2, ContiguousSequenceRange.EMPTY, 0);
+      Assert.fail("Expected to throw an exception");
+    } catch (IllegalStateException e) {
+      Assert.assertEquals(
+          "Exception message",
+          "Two accumulators contain different initial sequences: 1 and 3",
+          e.getMessage());
+    }
+  }
+
+  @Test
+  public void testSelectingHighestTimestampWhenMerging() {
+    Event[] set1 =
+        new Event[] {
+          new Event(1, nextTimestamp(), true),
+          new Event(2, Instant.ofEpochMilli(currentTicker.get() + 10000)),
+        };
+    Event[] set2 =
+        new Event[] {
+          new Event(3, nextTimestamp()),
+          new Event(4, nextTimestamp()),
+          new Event(5, nextTimestamp()),
+          new Event(6, nextTimestamp())
+        };
+
+    ContiguousSequenceRange expectedResult =
+        ContiguousSequenceRange.of(1, 7, eventTimestamp(set1, 2L));
+    int expectedNumberOfRanges = 1;
+    doTestMerging(set1, set2, expectedResult, expectedNumberOfRanges);
+  }
+
+  private static void doTestMerging(
+      Event[] set1,
+      Event[] set2,
+      ContiguousSequenceRange expectedResult,
+      int expectedNumberOfRanges) {
+    // Try to merge both set2 to set1 and set1 to set2 - both must return the same results
+    mergeAndTest(set1, set2, expectedResult, expectedNumberOfRanges, "set1");
+    mergeAndTest(set2, set1, expectedResult, expectedNumberOfRanges, "set2");
+  }
+
+  private static void mergeAndTest(
+      Event[] set1,
+      Event[] set2,
+      ContiguousSequenceRange expectedResult,
+      int expectedNumberOfRanges,
+      String firstSetName) {
+    final SequenceRangeAccumulator a1 = new SequenceRangeAccumulator();
+    Arrays.stream(set1).forEach(e -> a1.add(e.sequence, e.timestamp, e.initialEvent));
+
+    final SequenceRangeAccumulator a2 = new SequenceRangeAccumulator();
+    Arrays.stream(set2).forEach(e -> a2.add(e.sequence, e.timestamp, e.initialEvent));
+
+    a1.merge(a2);
+
+    Assert.assertEquals(
+        "Accumulated results - " + firstSetName, expectedResult, a1.largestContinuousRange());
+
+    Assert.assertEquals(
+        "Number of ranges - " + firstSetName, expectedNumberOfRanges, a1.numberOfRanges());
+  }
+}

From c243491254896e039e912662e4cfbe4bd38c766f Mon Sep 17 00:00:00 2001
From: reuvenlax <relax@google.com>
Date: Wed, 9 Oct 2024 11:09:45 -0700
Subject: [PATCH 13/14] Merge pull request #32705: fix schema inference for
 parameterized types

---
 .../java/org/apache/beam/sdk/Pipeline.java    |  2 +-
 .../apache/beam/sdk/coders/CoderRegistry.java | 44 ++++++++++++++-----
 .../beam/sdk/schemas/SchemaRegistryTest.java  | 20 +++++++++
 3 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java
index d3b58dd26bd24..9006035279f32 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java
@@ -335,7 +335,7 @@ public PipelineResult run(PipelineOptions options) {
   /** Returns the {@link CoderRegistry} that this {@link Pipeline} uses. */
   public CoderRegistry getCoderRegistry() {
     if (coderRegistry == null) {
-      coderRegistry = CoderRegistry.createDefault();
+      coderRegistry = CoderRegistry.createDefault(getSchemaRegistry());
     }
     return coderRegistry;
   }
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/CoderRegistry.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/CoderRegistry.java
index df64789ac3d27..e404665e4f66d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/CoderRegistry.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/CoderRegistry.java
@@ -42,6 +42,8 @@
 import org.apache.beam.sdk.io.fs.MetadataCoder;
 import org.apache.beam.sdk.io.fs.ResourceId;
 import org.apache.beam.sdk.io.fs.ResourceIdCoder;
+import org.apache.beam.sdk.schemas.NoSuchSchemaException;
+import org.apache.beam.sdk.schemas.SchemaRegistry;
 import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
 import org.apache.beam.sdk.util.CoderUtils;
@@ -195,11 +197,17 @@ public <T> Coder<T> coderFor(
    *       the lexicographically smallest {@link Class#getName() class name} being used.
    * </ul>
    */
+  public static CoderRegistry createDefault(@Nullable SchemaRegistry schemaRegistry) {
+    return new CoderRegistry(schemaRegistry);
+  }
+
+  /** Backwards compatible version of createDefault. */
   public static CoderRegistry createDefault() {
-    return new CoderRegistry();
+    return new CoderRegistry(null);
   }
 
-  private CoderRegistry() {
+  private CoderRegistry(@Nullable SchemaRegistry schemaRegistry) {
+    this.schemaRegistry = schemaRegistry;
     coderProviders = new ArrayDeque<>(REGISTERED_CODER_FACTORIES);
   }
 
@@ -590,6 +598,8 @@ private static boolean isNullOrEmpty(Collection<?> c) {
   /** The list of {@link CoderProvider coder providers} to use to provide Coders. */
   private ArrayDeque<CoderProvider> coderProviders;
 
+  private final @Nullable SchemaRegistry schemaRegistry;
+
   /**
    * Returns a {@link Coder} to use for values of the given type, in a context where the given types
    * use the given coders.
@@ -650,16 +660,28 @@ private Coder<?> getCoderFromParameterizedType(
 
     List<Coder<?>> typeArgumentCoders = new ArrayList<>();
     for (Type typeArgument : type.getActualTypeArguments()) {
-      try {
-        Coder<?> typeArgumentCoder =
-            getCoderFromTypeDescriptor(TypeDescriptor.of(typeArgument), typeCoderBindings);
-        typeArgumentCoders.add(typeArgumentCoder);
-      } catch (CannotProvideCoderException exc) {
-        throw new CannotProvideCoderException(
-            String.format(
-                "Cannot provide coder for parameterized type %s: %s", type, exc.getMessage()),
-            exc);
+      Coder<?> typeArgumentCoder = null;
+      if (schemaRegistry != null) {
+        TypeDescriptor<?> typeDescriptor = TypeDescriptor.of(typeArgument);
+        try {
+          typeArgumentCoder = schemaRegistry.getSchemaCoder(typeDescriptor);
+        } catch (NoSuchSchemaException e) {
+          // No schema.
+        }
+      }
+
+      if (typeArgumentCoder == null) {
+        try {
+          typeArgumentCoder =
+              getCoderFromTypeDescriptor(TypeDescriptor.of(typeArgument), typeCoderBindings);
+        } catch (CannotProvideCoderException exc) {
+          throw new CannotProvideCoderException(
+              String.format(
+                  "Cannot provide coder for parameterized type %s: %s", type, exc.getMessage()),
+              exc);
+        }
       }
+      typeArgumentCoders.add(typeArgumentCoder);
     }
     return getCoderFromFactories(TypeDescriptor.of(type), typeArgumentCoders);
   }
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaRegistryTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaRegistryTest.java
index 55a16e9faf391..54c80747b13bc 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaRegistryTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaRegistryTest.java
@@ -26,6 +26,10 @@
 import com.google.auto.service.AutoService;
 import com.google.auto.value.AutoValue;
 import java.util.List;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderRegistry;
+import org.apache.beam.sdk.coders.IterableCoder;
 import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
 import org.apache.beam.sdk.schemas.utils.TestJavaBeans.SimpleBean;
 import org.apache.beam.sdk.schemas.utils.TestPOJOs.SimplePOJO;
@@ -223,6 +227,22 @@ public void testRegisterPojo() throws NoSuchSchemaException {
     assertTrue(SIMPLE_POJO_SCHEMA.equivalent(schema));
   }
 
+  @Test
+  public void testSchemaTypeParameterInsideCoder() throws CannotProvideCoderException {
+    SchemaRegistry schemaRegistry = SchemaRegistry.createDefault();
+    schemaRegistry.registerPOJO(SimplePOJO.class);
+
+    CoderRegistry coderRegistry = CoderRegistry.createDefault(schemaRegistry);
+    Coder<Iterable<SimplePOJO>> coder =
+        coderRegistry.getCoder(TypeDescriptors.iterables(TypeDescriptor.of(SimplePOJO.class)));
+    assertTrue(coder instanceof IterableCoder);
+    assertEquals(1, coder.getCoderArguments().size());
+    assertTrue(coder.getCoderArguments().get(0) instanceof SchemaCoder);
+    assertTrue(
+        SIMPLE_POJO_SCHEMA.equivalent(
+            ((SchemaCoder<SimplePOJO>) coder.getCoderArguments().get(0)).getSchema()));
+  }
+
   @Test
   public void testRegisterJavaBean() throws NoSuchSchemaException {
     SchemaRegistry registry = SchemaRegistry.createDefault();

From 2ee6100980b4661a9db88d507c8b2c667f07b1d4 Mon Sep 17 00:00:00 2001
From: Dmitry Ulyumdzhiev <59957689+deadb0d4@users.noreply.github.com>
Date: Wed, 9 Oct 2024 20:11:38 +0100
Subject: [PATCH 14/14] Handle Date type in HCatToRow (#32695)

* Handle Date type in HCatToRow

Some initial notes:
- The issue (#20685) deals with java.sql.Date, which I wasn't able to
  reproduce fully (I can currently write hcatalog hadoop.hive date)
- On this note, 267f76f3c2036c27dcbc94c563ecd1a2d4481f65 changed the
  code involved so that there's a direct cast to AbstractInstant in
  RowUtils.java. This doesn't change much, but jfyi.

* Run: ./gradlew :sdks:java:io:hcatalog:spotlessApply

* review cr: castTypes util

- s/castHDate/maybeCastHDate/ to be more concise
- move values manipulation to a separate util (hopefully, I understood
  the cr in the right way)
---
 .../beam/sdk/io/hcatalog/HCatToRow.java       | 17 +++++++-
 .../beam/sdk/io/hcatalog/HCatalogIOTest.java  | 41 +++++++++++++++++++
 .../io/hcatalog/test/HCatalogIOTestUtils.java | 10 +++++
 3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/sdks/java/io/hcatalog/src/main/java/org/apache/beam/sdk/io/hcatalog/HCatToRow.java b/sdks/java/io/hcatalog/src/main/java/org/apache/beam/sdk/io/hcatalog/HCatToRow.java
index 8e29650f3fc3e..e5bdf18ecbcf4 100644
--- a/sdks/java/io/hcatalog/src/main/java/org/apache/beam/sdk/io/hcatalog/HCatToRow.java
+++ b/sdks/java/io/hcatalog/src/main/java/org/apache/beam/sdk/io/hcatalog/HCatToRow.java
@@ -17,6 +17,8 @@
  */
 package org.apache.beam.sdk.io.hcatalog;
 
+import java.util.List;
+import java.util.stream.Collectors;
 import org.apache.beam.sdk.schemas.Schema;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.PTransform;
@@ -25,6 +27,7 @@
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.Row;
 import org.apache.hive.hcatalog.data.HCatRecord;
+import org.joda.time.Instant;
 
 /** Utilities to convert {@link HCatRecord HCatRecords} to {@link Row Rows}. */
 @SuppressWarnings({
@@ -74,6 +77,18 @@ public PCollection<Row> expand(PBegin input) {
   private static class HCatToRowFn extends DoFn<HCatRecord, Row> {
     private final Schema schema;
 
+    private Object maybeCastHDate(Object obj) {
+      if (obj instanceof org.apache.hadoop.hive.common.type.Date) {
+        return new Instant(((org.apache.hadoop.hive.common.type.Date) obj).toEpochMilli());
+      }
+      return obj;
+    }
+
+    /** Cast objects of the types that aren't supported by {@link Row}. */
+    private List<Object> castTypes(List<Object> values) {
+      return values.stream().map(this::maybeCastHDate).collect(Collectors.toList());
+    }
+
     HCatToRowFn(Schema schema) {
       this.schema = schema;
     }
@@ -81,7 +96,7 @@ private static class HCatToRowFn extends DoFn<HCatRecord, Row> {
     @ProcessElement
     public void processElement(ProcessContext c) {
       HCatRecord hCatRecord = c.element();
-      c.output(Row.withSchema(schema).addValues(hCatRecord.getAll()).build());
+      c.output(Row.withSchema(schema).addValues(castTypes(hCatRecord.getAll())).build());
     }
   }
 }
diff --git a/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/HCatalogIOTest.java b/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/HCatalogIOTest.java
index 4bb7e1bd70441..3d97a2ccc1d98 100644
--- a/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/HCatalogIOTest.java
+++ b/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/HCatalogIOTest.java
@@ -22,6 +22,7 @@
 import static org.apache.beam.sdk.io.hcatalog.test.HCatalogIOTestUtils.TEST_RECORDS_COUNT;
 import static org.apache.beam.sdk.io.hcatalog.test.HCatalogIOTestUtils.TEST_TABLE;
 import static org.apache.beam.sdk.io.hcatalog.test.HCatalogIOTestUtils.buildHCatRecords;
+import static org.apache.beam.sdk.io.hcatalog.test.HCatalogIOTestUtils.buildHCatRecordsWithDate;
 import static org.apache.beam.sdk.io.hcatalog.test.HCatalogIOTestUtils.getConfigPropertiesAsMap;
 import static org.apache.beam.sdk.io.hcatalog.test.HCatalogIOTestUtils.getExpectedRecords;
 import static org.apache.beam.sdk.io.hcatalog.test.HCatalogIOTestUtils.getReaderContext;
@@ -54,12 +55,14 @@
 import org.apache.beam.sdk.testing.SourceTestUtils;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.Distinct;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.Watch;
 import org.apache.beam.sdk.util.SerializableUtils;
 import org.apache.beam.sdk.util.UserCodeException;
 import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hive.hcatalog.data.DefaultHCatRecord;
@@ -230,6 +233,44 @@ public void processElement(ProcessContext c) {
     readAfterWritePipeline.run();
   }
 
+  /** Perform test for reading Date column type from an hcatalog. */
+  @Test
+  public void testReadHCatalogDateType() throws Exception {
+    service.executeQuery("drop table if exists " + TEST_TABLE);
+    service.executeQuery("create table " + TEST_TABLE + "(mycol1 string, mycol2 date)");
+
+    defaultPipeline
+        .apply(Create.of(buildHCatRecordsWithDate(TEST_RECORDS_COUNT)))
+        .apply(
+            HCatalogIO.write()
+                .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
+                .withDatabase(TEST_DATABASE)
+                .withTable(TEST_TABLE)
+                .withPartition(new java.util.HashMap<>()));
+    defaultPipeline.run().waitUntilFinish();
+
+    final PCollection<String> output =
+        readAfterWritePipeline
+            .apply(
+                HCatToRow.fromSpec(
+                    HCatalogIO.read()
+                        .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
+                        .withDatabase(TEST_DATABASE)
+                        .withTable(TEST_TABLE)
+                        .withFilter(TEST_FILTER)))
+            .apply(
+                ParDo.of(
+                    new DoFn<Row, String>() {
+                      @ProcessElement
+                      public void processElement(ProcessContext c) {
+                        c.output(c.element().getDateTime("mycol2").toString("yyyy-MM-dd HH:mm:ss"));
+                      }
+                    }))
+            .apply(Distinct.create());
+    PAssert.that(output).containsInAnyOrder(ImmutableList.of("2014-01-20 00:00:00"));
+    readAfterWritePipeline.run();
+  }
+
   /** Test of Write to a non-existent table. */
   @Test
   public void testWriteFailureTableDoesNotExist() {
diff --git a/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/test/HCatalogIOTestUtils.java b/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/test/HCatalogIOTestUtils.java
index d0d1d850a6cbe..c09c2c906d649 100644
--- a/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/test/HCatalogIOTestUtils.java
+++ b/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/test/HCatalogIOTestUtils.java
@@ -26,6 +26,7 @@
 import java.util.Map.Entry;
 import org.apache.beam.sdk.annotations.Internal;
 import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hive.common.type.Date;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hive.hcatalog.common.HCatException;
 import org.apache.hive.hcatalog.data.DefaultHCatRecord;
@@ -120,4 +121,13 @@ public static Map<String, String> getConfigPropertiesAsMap(HiveConf hiveConf) {
   private static DefaultHCatRecord toHCatRecord(int value) {
     return new DefaultHCatRecord(Arrays.asList("record " + value, value));
   }
+
+  /** Returns a list of HCatRecords of passed size with some dummy date as a field. */
+  public static List<HCatRecord> buildHCatRecordsWithDate(int size) {
+    List<HCatRecord> expected = new ArrayList<>();
+    for (int i = 0; i < size; i++) {
+      expected.add(new DefaultHCatRecord(Arrays.asList("record " + i, Date.valueOf("2014-01-20"))));
+    }
+    return expected;
+  }
 }