honeybadger-io
diff --git a/‎lib/honeybadger.ex
Lines changed: 11 additions & 5 deletions b/‎lib/honeybadger.ex
Lines changed: 11 additions & 5 deletions
diff --git a/‎lib/honeybadger/client.ex
Lines changed: 79 additions & 3 deletions b/‎lib/honeybadger/client.ex
Lines changed: 79 additions & 3 deletions
diff --git a/‎lib/honeybadger/events_worker.ex
Lines changed: 242 additions & 0 deletions b/‎lib/honeybadger/events_worker.ex
Lines changed: 242 additions & 0 deletions
@@ -169,7 +169,7 @@ defmodule Honeybadger do
 
   require Logger
 
-  alias Honeybadger.{Client, Notice}
+  alias Honeybadger.{Client, Notice, EventsWorker}
   alias Honeybadger.Breadcrumbs.{Collector, Breadcrumb}
 
   @type notify_options ::
@@ -206,7 +206,9 @@ defmodule Honeybadger do
       Honeybadger.Breadcrumbs.Telemetry.attach()
     end
 
-    Supervisor.start_link([{Client, [config]}], strategy: :one_for_one)
+    children = [{Client, [config]}, EventsWorker]
+
+    Supervisor.start_link(children, strategy: :one_for_one)
   end
 
   @doc """
@@ -345,9 +347,13 @@ defmodule Honeybadger do
   def event(event_data) do
     ts = DateTime.utc_now() |> DateTime.to_string()
 
-    event_data
-    |> Map.put_new(:ts, ts)
-    |> Client.send_event()
+    data = Map.put_new(event_data, :ts, ts)
+
+    if get_env(:events_worker_enabled) do
+      EventsWorker.push(data)
+    else
+      Client.send_event(data)
+    end
   end
 
   @doc """
 
@@ -90,6 +90,23 @@ defmodule Honeybadger.Client do
     end
   end
 
+  @doc """
+  Send events in batches
+  """
+  @spec send_events(list) :: :ok | {:error, reason :: atom()}
+  def send_events(events) when is_list(events) do
+    if pid = Process.whereis(__MODULE__) do
+      # 30 second timeout
+      GenServer.call(pid, {:events, events}, 30_000)
+    else
+      Logger.warning(fn ->
+        "[Honeybadger] Unable to post events, the :honeybadger client isn't running"
+      end)
+
+      {:error, :client_not_running}
+    end
+  end
+
   @doc """
   Check whether reporting is enabled for the current environment.
 
@@ -148,7 +165,7 @@ defmodule Honeybadger.Client do
           |> Map.get(:hackney_opts)
           |> Keyword.merge(opts)
 
-        post_notice(url, headers, payload, hackney_opts)
+        post_payload(url, headers, payload, hackney_opts)
 
       {:error, %Jason.EncodeError{message: message}} ->
         Logger.warning(fn -> "[Honeybadger] Notice encoding failed: #{message}" end)
@@ -186,7 +203,7 @@ defmodule Honeybadger.Client do
           |> Keyword.merge(opts)
 
         # post logic for events is the same as notices
-        post_notice(event_url, headers, payload, hackney_opts)
+        post_payload(event_url, headers, payload, hackney_opts)
 
       {:error, %Jason.EncodeError{message: message}} ->
         Logger.warning(fn -> "[Honeybadger] Event encoding failed: #{message}" end)
@@ -205,13 +222,72 @@ defmodule Honeybadger.Client do
     {:noreply, state}
   end
 
+  # Events
+  #
+  @impl GenServer
+  def handle_call({:events, _}, _from, %{enabled: false} = state) do
+    {:reply, :ok, state}
+  end
+
+  def handle_call({:events, _}, _from, %{api_key: nil} = state) do
+    {:reply, {:error, :no_api_key}, state}
+  end
+
+  def handle_call({:events, events}, _from, state) do
+    %{event_url: event_url, headers: headers} = state
+    # Convert each event to JSON and join with newlines
+    encoded_events = Enum.map(events, &Honeybadger.JSON.encode/1)
+
+    # Check if any encoding failed
+    if Enum.any?(encoded_events, &match?({:error, _}, &1)) do
+      {:reply, {:error, :encoding_error}, state}
+    else
+      payload = Enum.map_join(encoded_events, "\n", &elem(&1, 1))
+
+      opts =
+        state
+        |> Map.take([:proxy, :proxy_auth])
+        |> Map.put(:pool, __MODULE__)
+        |> Keyword.new()
+
+      hackney_opts =
+        state
+        |> Map.get(:hackney_opts)
+        |> Keyword.merge(opts)
+
+      headers =
+        List.keyreplace(headers, "Content-Type", 0, {"Content-Type", "application/x-ndjson"})
+
+      response =
+        case HTTPAdapter.request(:post, event_url, payload, headers, hackney_opts) do
+          {:ok, %HTTPResponse{body: body, status: status}} when status in 200..399 ->
+            Logger.debug(fn -> "[Honeybadger] API success: #{inspect(body)}" end)
+            :ok
+
+          {:ok, %HTTPResponse{status: status}} when status == 429 ->
+            Logger.warning("[Honeybadger] API rate limited:")
+            {:error, :throttled}
+
+          {:ok, %HTTPResponse{status: status}} when status in 400..599 ->
+            Logger.warning("[Honeybadger] API failure")
+            {:error, :api_error}
+
+          {:error, reason} ->
+            Logger.warning(fn -> "[Honeybadger] connection error: #{inspect(reason)}" end)
+            {:error, :connection_error}
+        end
+
+      {:reply, response, state}
+    end
+  end
+
   # API Integration
 
   defp build_headers(opts) do
     [{"X-API-Key", get_env(opts, :api_key)}] ++ @headers
   end
 
-  defp post_notice(url, headers, payload, hackney_opts) do
+  defp post_payload(url, headers, payload, hackney_opts) do
     case HTTPAdapter.request(:post, url, payload, headers, hackney_opts) do
       {:ok, %HTTPResponse{body: body, status: status}} when status in 200..399 ->
         Logger.debug(fn -> "[Honeybadger] API success: #{inspect(body)}" end)
 
@@ -0,0 +1,242 @@
+defmodule Honeybadger.EventsWorker do
+  @moduledoc """
+  A GenServer that batches and sends events with retry and throttling logic.
+
+  It accumulates events in a queue, forms batches when the batch size is reached or
+  when a flush timeout expires, and then sends these batches to a backend module.
+  If a batch fails to send, it will be retried (up to a configurable maximum) or dropped.
+  In case of throttling (e.g. receiving a 429), the flush delay is increased.
+  """
+
+  @dropped_log_interval 60_000
+
+  use GenServer
+  require Logger
+
+  defmodule State do
+    @typedoc """
+    Function that accepts a list of events to be processed.
+    """
+    @type send_events_fn :: ([term()] -> :ok | {:error, :throttled} | {:error, term()})
+
+    @typedoc """
+    State for the event batching GenServer.
+    """
+    @type t :: %__MODULE__{
+            # Configuration
+            send_events_fn: send_events_fn(),
+            batch_size: pos_integer(),
+            max_queue_size: pos_integer(),
+            timeout: pos_integer(),
+            max_batch_retries: non_neg_integer(),
+            throttle_wait: pos_integer(),
+
+            # Internal state
+            timeout_started_at: non_neg_integer(),
+            throttling: boolean(),
+            dropped_events: non_neg_integer(),
+            last_dropped_log: non_neg_integer(),
+            queue: [any()],
+            batches: :queue.queue()
+          }
+
+    @enforce_keys [
+      :send_events_fn,
+      :batch_size,
+      :max_queue_size,
+      :max_batch_retries
+    ]
+
+    defstruct [
+      :send_events_fn,
+      :batch_size,
+      :max_queue_size,
+      :timeout,
+      :max_batch_retries,
+      :last_dropped_log,
+      timeout_started_at: 0,
+      throttle_wait: 60000,
+      throttling: false,
+      dropped_events: 0,
+      queue: [],
+      batches: :queue.new()
+    ]
+  end
+
+  @spec start_link(Keyword.t()) :: GenServer.on_start()
+  def start_link(opts \\ []) do
+    if Honeybadger.get_env(:events_worker_enabled) do
+      {name, opts} = Keyword.pop(opts, :name, __MODULE__)
+      GenServer.start_link(__MODULE__, opts, name: name)
+    else
+      :ignore
+    end
+  end
+
+  @spec push(event :: map(), GenServer.server()) :: :ok
+  def push(event, server \\ __MODULE__) do
+    GenServer.cast(server, {:push, event})
+  end
+
+  @spec state(GenServer.server()) :: State.t()
+  def state(server \\ __MODULE__) do
+    GenServer.call(server, {:state})
+  end
+
+  @impl true
+  def init(opts) do
+    config = %{
+      send_events_fn: Keyword.get(opts, :send_events_fn, &Honeybadger.Client.send_events/1),
+      batch_size: Keyword.get(opts, :batch_size, Honeybadger.get_env(:events_batch_size)),
+      timeout: Keyword.get(opts, :timeout, Honeybadger.get_env(:events_timeout)),
+      throttle_wait:
+        Keyword.get(opts, :throttle_wait, Honeybadger.get_env(:events_throttle_wait)),
+      max_queue_size:
+        Keyword.get(opts, :max_queue_size, Honeybadger.get_env(:events_max_queue_size)),
+      max_batch_retries:
+        Keyword.get(opts, :max_batch_retries, Honeybadger.get_env(:events_max_batch_retries)),
+      last_dropped_log: System.monotonic_time(:millisecond)
+    }
+
+    state = struct!(State, config)
+    {:ok, state}
+  end
+
+  @impl true
+  def handle_call({:state}, _from, %State{} = state) do
+    {:reply, state, state, current_timeout(state)}
+  end
+
+  @impl true
+  def handle_cast({:push, event}, %State{timeout_started_at: 0} = state) do
+    handle_cast({:push, event}, reset_timeout(state))
+  end
+
+  def handle_cast({:push, event}, %State{} = state) do
+    if total_event_count(state) >= state.max_queue_size do
+      {:noreply, %{state | dropped_events: state.dropped_events + 1}, current_timeout(state)}
+    else
+      queue = [event | state.queue]
+
+      if length(queue) >= state.batch_size do
+        flush(%{state | queue: queue})
+      else
+        {:noreply, %{state | queue: queue}, current_timeout(state)}
+      end
+    end
+  end
+
+  @impl true
+  def handle_info(:timeout, state), do: flush(state)
+
+  @impl true
+  def terminate(_reason, %State{} = state) do
+    Logger.debug("[Honeybadger] Terminating with #{total_event_count(state)} events unsent")
+    _ = flush(state)
+    :ok
+  end
+
+  @spec flush(State.t()) :: {:noreply, State.t(), pos_integer()}
+  defp flush(state) do
+    cond do
+      state.queue == [] and :queue.is_empty(state.batches) ->
+        # It's all empty so we stop the timeout and reset the
+        # timeout_started_at which will restart on the next push
+        {:noreply, %{state | timeout_started_at: 0}}
+
+      state.queue == [] ->
+        attempt_send(state)
+
+      true ->
+        batches = :queue.in(%{batch: Enum.reverse(state.queue), attempts: 0}, state.batches)
+        attempt_send(%{state | queue: [], batches: batches})
+    end
+  end
+
+  @spec attempt_send(State.t()) :: {:noreply, State.t(), pos_integer()}
+  # Sends pending batches, handling retries and throttling
+  defp attempt_send(%State{} = state) do
+    {new_batches_list, throttling} =
+      Enum.reduce(:queue.to_list(state.batches), {[], false}, fn
+        # If already throttled, skip sending and retain the batch.
+        b, {acc, true} ->
+          {acc ++ [b], true}
+
+        %{batch: batch, attempts: attempts} = b, {acc, false} ->
+          case state.send_events_fn.(batch) do
+            :ok ->
+              Logger.debug("[Honeybadger] Sent batch of #{length(batch)} events.")
+              {acc, false}
+
+            {:error, reason} ->
+              throttling = reason == :throttled
+              updated_attempts = attempts + 1
+
+              if throttling do
+                Logger.warning(
+                  "[Honeybadger] Rate limited (429) events - (batch attempt #{updated_attempts}) - waiting for #{state.throttle_wait}ms"
+                )
+              else
+                Logger.debug(
+                  "[Honeybadger] Failed to send events batch (attempt #{updated_attempts}): #{inspect(reason)}"
+                )
+              end
+
+              if updated_attempts < state.max_batch_retries do
+                {acc ++ [%{b | attempts: updated_attempts}], throttling}
+              else
+                Logger.debug(
+                  "[Honeybadger] Dropping events batch after #{updated_attempts} attempts."
+                )
+
+                {acc, throttling}
+              end
+          end
+      end)
+
+    current_time = System.monotonic_time(:millisecond)
+
+    # Log dropped events if present and we haven't logged within the last
+    # @dropped_log_interval
+    state =
+      if state.dropped_events > 0 and
+           current_time - state.last_dropped_log >= @dropped_log_interval do
+        Logger.info("[Honeybadger] Dropped #{state.dropped_events} events due to max queue limit")
+        %{state | dropped_events: 0, last_dropped_log: current_time}
+      else
+        state
+      end
+
+    new_state =
+      %{state | batches: :queue.from_list(new_batches_list), throttling: throttling}
+      |> reset_timeout()
+
+    {:noreply, new_state, current_timeout(new_state)}
+  end
+
+  @spec total_event_count(State.t()) :: non_neg_integer()
+  # Counts events in both the queue and pending batches.
+  defp total_event_count(%State{batches: batches, queue: queue}) do
+    events_count = length(queue)
+
+    batch_count = :queue.fold(fn %{batch: b}, acc -> acc + length(b) end, 0, batches)
+
+    events_count + batch_count
+  end
+
+  # Returns the time remaining until the next flush
+  defp current_timeout(%State{
+         throttling: throttling,
+         timeout: timeout,
+         throttle_wait: throttle_wait,
+         timeout_started_at: timeout_started_at
+       }) do
+    elapsed = System.monotonic_time(:millisecond) - timeout_started_at
+    timeout = if throttling, do: throttle_wait, else: timeout
+    max(1, timeout - elapsed)
+  end
+
+  defp reset_timeout(state) do
+    %{state | timeout_started_at: System.monotonic_time(:millisecond)}
+  end
+end