getsentry
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.vscode/settings.json
Lines changed: 4 additions & 1 deletion b/‎.vscode/settings.json
Lines changed: 4 additions & 1 deletion
diff --git a/‎sentry_flink/pyproject.toml
Lines changed: 1 addition & 1 deletion b/‎sentry_flink/pyproject.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎sentry_flink/uv.lock
Lines changed: 8 additions & 9 deletions b/‎sentry_flink/uv.lock
Lines changed: 8 additions & 9 deletions
diff --git a/‎sentry_streams/pyproject.toml
Lines changed: 1 addition & 0 deletions b/‎sentry_streams/pyproject.toml
Lines changed: 1 addition & 0 deletions
diff --git a/‎sentry_streams/sentry_streams/adapters/arroyo/__init__.py
Lines changed: 3 additions & 0 deletions b/‎sentry_streams/sentry_streams/adapters/arroyo/__init__.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎sentry_streams/sentry_streams/adapters/arroyo/adapter.py
Lines changed: 227 additions & 0 deletions b/‎sentry_streams/sentry_streams/adapters/arroyo/adapter.py
Lines changed: 227 additions & 0 deletions
diff --git a/‎sentry_streams/sentry_streams/adapters/arroyo/consumer.py
Lines changed: 79 additions & 0 deletions b/‎sentry_streams/sentry_streams/adapters/arroyo/consumer.py
Lines changed: 79 additions & 0 deletions
@@ -39,6 +39,7 @@ repos:
           pytest==7.1.2,
           types-requests,
           responses,
+          "sentry-arroyo>=2.18.2",
         ]
         files: ^sentry_streams/.+
   - repo: https://github.com/pycqa/isort
 
@@ -5,5 +5,8 @@
   ],
   "python.testing.unittestEnabled": false,
   "python.testing.pytestEnabled": true,
-  "mypy-type-checker.args": ["--strict"],
+  "mypy-type-checker.args": [
+    "--strict"
+  ],
+  "editor.formatOnSave": true
 }
@@ -12,7 +12,7 @@ requires-python = ">=3.11,<3.12"
 dependencies = [
     "apache-flink==1.20.0",
     "requests>=2.32.3",
-    "sentry-streams>=0.0.9",
+    "sentry-streams==0.0.9",
     # Figure out a way to get Flink work without setuptools.
     "setuptools==75.8.0",
 ]
 
@@ -11,6 +11,7 @@ readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
     "requests>=2.32.3",
+    "sentry-arroyo>=2.18.2",
 ]
 
 [dependency-groups]
 
@@ -0,0 +1,3 @@
+from sentry_streams.adapters.arroyo.adapter import ArroyoAdapter
+
+__all__ = ["ArroyoAdapter"]
@@ -0,0 +1,227 @@
+from __future__ import annotations
+
+from typing import Any, Mapping, MutableMapping, TypedDict
+
+from arroyo.backends.kafka.configuration import (
+    build_kafka_configuration,
+    build_kafka_consumer_configuration,
+)
+from arroyo.backends.kafka.consumer import KafkaConsumer, KafkaPayload, KafkaProducer
+from arroyo.processing.processor import StreamProcessor
+from arroyo.types import Topic
+
+from sentry_streams.adapters.arroyo.consumer import (
+    ArroyoConsumer,
+    ArroyoStreamingFactory,
+)
+from sentry_streams.adapters.arroyo.routes import Route
+from sentry_streams.adapters.arroyo.steps import FilterStep, KafkaSinkStep, MapStep
+from sentry_streams.adapters.stream_adapter import PipelineConfig, StreamAdapter
+from sentry_streams.pipeline.pipeline import (
+    Filter,
+    FlatMapStep,
+    KafkaSink,
+    KafkaSource,
+    Map,
+    Reduce,
+    Sink,
+    Source,
+)
+
+
+class KafkaConsumerConfig(TypedDict):
+    bootstrap_servers: str
+    auto_offset_reset: str
+    consumer_group: str
+    additional_settings: Mapping[str, Any]
+
+
+class KafkaProducerConfig(TypedDict):
+    bootstrap_servers: str
+    additional_settings: Mapping[str, Any]
+
+
+class KafkaSources:
+    def __init__(
+        self,
+        sources_config: Mapping[str, KafkaConsumerConfig],
+        sources_override: Mapping[str, KafkaConsumer] = {},
+    ) -> None:
+        super().__init__()
+
+        self.__sources_config = sources_config
+
+        # Overrides are for unit testing purposes
+        self.__source_topics: MutableMapping[str, Topic] = {}
+        self.__sources: MutableMapping[str, KafkaConsumer] = {**sources_override}
+
+    def add_source(self, step: Source) -> None:
+        """
+        Builds an Arroyo Kafka consumer as a stream source.
+        By default it uses the configuration provided to the adapter.
+
+        It is possible to override the configuration by providing an
+        instantiated consumer for unit testing purposes.
+        """
+        # TODO: Provide a better way to get the logical stream name from
+        # the Sink step. We should not have to assert it is a Kafka sink
+        assert isinstance(step, KafkaSource), "Only Kafka Sources are supported"
+        source_name = step.name
+        if source_name not in self.__sources:
+            config = self.__sources_config.get(source_name)
+            assert config, f"Config not provided for source {source_name}"
+            self.__sources[source_name] = KafkaConsumer(
+                build_kafka_consumer_configuration(
+                    default_config=config["additional_settings"],
+                    bootstrap_servers=config["bootstrap_servers"],
+                    auto_offset_reset=config["auto_offset_reset"],
+                    group_id=config["consumer_group"],
+                )
+            )
+
+        self.__source_topics[source_name] = Topic(step.logical_topic)
+
+    def get_topic(self, source: str) -> Topic:
+        return self.__source_topics[source]
+
+    def get_consumer(self, source: str) -> KafkaConsumer:
+        return self.__sources[source]
+
+
+class ArroyoAdapter(StreamAdapter[Route, Route]):
+
+    def __init__(
+        self,
+        sources_config: Mapping[str, KafkaConsumerConfig],
+        sinks_config: Mapping[str, KafkaProducerConfig],
+        sources_override: Mapping[str, KafkaConsumer] = {},
+        sinks_override: Mapping[str, KafkaProducer] = {},
+    ) -> None:
+        super().__init__()
+
+        self.__sources = KafkaSources(sources_config, sources_override)
+        self.__sinks_config = sinks_config
+
+        # Overrides are for unit testing purposes
+        self.__sinks: MutableMapping[str, Any] = {**sinks_override}
+
+        self.__consumers: MutableMapping[str, ArroyoConsumer] = {}
+
+    @classmethod
+    def build(cls, config: PipelineConfig) -> ArroyoAdapter:
+        return cls(
+            config["sources_config"],
+            config["sinks_config"],
+            config.get("sources_override", {}),
+            config.get("sinks_override", {}),
+        )
+
+    def source(self, step: Source) -> Route:
+        """
+        Builds an Arroyo Kafka consumer as a stream source.
+        By default it uses the configuration provided to the adapter.
+
+        It is possible to override the configuration by providing an
+        instantiated consumer for unit testing purposes.
+        """
+        source_name = step.name
+        self.__sources.add_source(step)
+        self.__consumers[source_name] = ArroyoConsumer(source_name)
+
+        return Route(source_name, [])
+
+    def sink(self, step: Sink, stream: Route) -> Route:
+        """
+        Builds an Arroyo Kafka producer as a stream sink.
+        By default it uses the configuration provided to the adapter.
+
+        It is possible to override the configuration by providing an
+        instantiated consumer for unit testing purposes.
+        """
+        # TODO: Provide a better way to get the logical stream name from
+        # the Sink step. We should not have to assert it is a Kafka sink
+        assert isinstance(step, KafkaSink), "Only Kafka Sinks are supported"
+
+        sink_name = step.name
+        if sink_name not in self.__sinks:
+            config = self.__sinks_config.get(sink_name)
+            assert config, f"Config not provided for sink {sink_name}"
+            producer = KafkaProducer(
+                build_kafka_configuration(
+                    default_config=config["additional_settings"],
+                    bootstrap_servers=config["bootstrap_servers"],
+                )
+            )
+        else:
+            producer = self.__sinks[sink_name]
+
+        assert (
+            stream.source in self.__consumers
+        ), f"Stream starting at source {stream.source} not found when adding a producer"
+
+        self.__consumers[stream.source].add_step(
+            KafkaSinkStep(route=stream, producer=producer, topic_name=step.logical_topic)
+        )
+
+        return stream
+
+    def map(self, step: Map, stream: Route) -> Route:
+        """
+        Builds a map operator for the platform the adapter supports.
+        """
+        assert (
+            stream.source in self.__consumers
+        ), f"Stream starting at source {stream.source} not found when adding a map"
+
+        self.__consumers[stream.source].add_step(MapStep(route=stream, pipeline_step=step))
+        return stream
+
+    def flat_map(self, step: FlatMapStep, stream: Route) -> Route:
+        """
+        Builds a flat-map operator for the platform the adapter supports.
+        """
+        raise NotImplementedError
+
+    def filter(self, step: Filter, stream: Route) -> Route:
+        """
+        Builds a filter operator for the platform the adapter supports.
+        """
+        assert (
+            stream.source in self.__consumers
+        ), f"Stream starting at source {stream.source} not found when adding a filter"
+
+        self.__consumers[stream.source].add_step(FilterStep(route=stream, pipeline_step=step))
+        return stream
+
+    def reduce(
+        self,
+        step: Reduce,
+        stream: Route,
+    ) -> Route:
+        """
+        Build a map operator for the platform the adapter supports.
+        """
+        raise NotImplementedError
+
+    def get_processor(self, source: str) -> StreamProcessor[KafkaPayload]:
+        """
+        Returns the stream processor for the given source.
+        """
+        factory = ArroyoStreamingFactory(self.__consumers[source])
+
+        return StreamProcessor(
+            consumer=self.__sources.get_consumer(source),
+            topic=self.__sources.get_topic(source),
+            processor_factory=factory,
+        )
+
+    def run(self) -> None:
+        """
+        Starts the pipeline
+        """
+        # TODO: Support multiple consumers
+        assert len(self.__consumers) == 1, "Only one consumer is supported"
+        source = next(iter(self.__consumers))
+
+        processor = self.get_processor(source)
+        processor.run()
@@ -0,0 +1,79 @@
+from dataclasses import dataclass, field
+from typing import Any, Mapping, MutableSequence
+
+from arroyo.backends.kafka.consumer import KafkaPayload
+from arroyo.processing.strategies import CommitOffsets
+from arroyo.processing.strategies.abstract import (
+    ProcessingStrategy,
+    ProcessingStrategyFactory,
+)
+from arroyo.processing.strategies.run_task import RunTask
+from arroyo.types import (
+    Commit,
+    Message,
+    Partition,
+)
+
+from sentry_streams.adapters.arroyo.routes import Route, RoutedValue
+from sentry_streams.adapters.arroyo.steps import ArroyoStep
+
+
+@dataclass
+class ArroyoConsumer:
+    """
+    Intermediate representation of A single Arroyo application composed
+    of multiple steps.
+
+    Arroyo does not support branches. The streaming platform does, so
+    we need to fake it in arroyo. This is done by making the branched
+    pipeline a sequence and make all the messages go through all the
+    steps for all the branches. The route is used to filter out the
+    messages that do not belong to the branch.
+
+    Building an Arroyo application is done from the last step to the
+    first step. This is because every step references the following one.
+    The streaming platform allows you to define the pipeline in sequence
+    from the first to last step. This intermediate representation also
+    collects the pipeline to be built in reverse order in Arroyo.
+    """
+
+    source: str
+    steps: MutableSequence[ArroyoStep] = field(default_factory=list)
+
+    def add_step(self, step: ArroyoStep) -> None:
+        """
+        Append a pipeline step to the Arroyo consumer.
+        """
+        assert step.route.source == self.source
+        self.steps.append(step)
+
+    def build_strategy(self, commit: Commit) -> ProcessingStrategy[Any]:
+        """
+        Build the Arroyo consumer wiring up the steps in reverse order.
+
+        It also add a strategy at the beginning that makes each payload
+        a RoutedValue that contains the route the message is supposed to
+        follow.
+        """
+
+        def add_route(message: Message[KafkaPayload]) -> RoutedValue:
+            value = message.payload.value
+            return RoutedValue(route=Route(source=self.source, waypoints=[]), payload=value)
+
+        strategy: ProcessingStrategy[Any] = CommitOffsets(commit)
+        for step in reversed(self.steps):
+            strategy = step.build(strategy)
+
+        return RunTask(add_route, strategy)
+
+
+class ArroyoStreamingFactory(ProcessingStrategyFactory[Any]):
+    def __init__(self, consumer: ArroyoConsumer) -> None:
+        self.consumer = consumer
+
+    def create_with_partitions(
+        self,
+        commit: Commit,
+        _: Mapping[Partition, int],
+    ) -> ProcessingStrategy[Any]:
+        return self.consumer.build_strategy(commit)
Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,7 @@ repos:`
`39`	`39`	`pytest==7.1.2,`
`40`	`40`	`types-requests,`
`41`	`41`	`responses,`
	`42`	`+ "sentry-arroyo>=2.18.2",`
`42`	`43`	`]`
`43`	`44`	`files: ^sentry_streams/.+`
`44`	`45`	`- repo: https://github.com/pycqa/isort`
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@ requires-python = ">=3.11,<3.12"`
`12`	`12`	`dependencies = [`
`13`	`13`	`"apache-flink==1.20.0",`
`14`	`14`	`"requests>=2.32.3",`
`15`		`- "sentry-streams>=0.0.9",`
	`15`	`+ "sentry-streams==0.0.9",`
`16`	`16`	`# Figure out a way to get Flink work without setuptools.`
`17`	`17`	`"setuptools==75.8.0",`
`18`	`18`	`]`
Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,7 @@ readme = "README.md"`
`11`	`11`	`requires-python = ">=3.11"`
`12`	`12`	`dependencies = [`
`13`	`13`	`"requests>=2.32.3",`
	`14`	`+ "sentry-arroyo>=2.18.2",`
`14`	`15`	`]`
`15`	`16`
`16`	`17`	`[dependency-groups]`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from sentry_streams.adapters.arroyo.adapter import ArroyoAdapter`
	`2`	`+`
	`3`	`+__all__ = ["ArroyoAdapter"]`