From 9affdd3ec38a81a523df2fd3ef192ae34d68ee1f Mon Sep 17 00:00:00 2001 From: Joaquin Date: Wed, 18 Sep 2024 15:57:34 -0300 Subject: [PATCH 1/2] add time groupping --- README.md | 61 +++++++--- lib/boom_notifier/error_storage.ex | 19 ++- lib/boom_notifier/notification_sender.ex | 69 +++++++---- .../example_app/lib/example_app_web/router.ex | 2 +- test/unit/error_storage_test.exs | 46 +++---- test/unit/notification_sender_test.exs | 114 ++++++++++++++---- 6 files changed, 214 insertions(+), 97 deletions(-) diff --git a/README.md b/README.md index 0c70eee..7d94352 100644 --- a/README.md +++ b/README.md @@ -91,34 +91,38 @@ defmodule YourApp.Router do options: # ... ] ] + + #... +end ``` -## Notification Trigger +## Throttling notifications By default, `BoomNotifier` will send a notification every time an exception is raised. -However, there are different strategies to decide when to send the -notifications using the `:notification_trigger` option with one of the -following values: `:always` and `:exponential`. - -### Always +There are two throttling mechanisms you can use to reduce notification rate. Counting +based and time based throttling. The former allows notifications to be delivered +on exponential error counting and the latter based on time throttling, that is +if a predefined amount of time has passed from the last error. -This option is the default one. It will trigger a notification for every -exception. +### Count Based Throttle ```elixir defmodule YourApp.Router do use Phoenix.Router use BoomNotifier, - notification_trigger: :always, notifiers: [ - # ... - ] + # ... + ], + groupping: :count, + count: :exponential, + time_limit: :timer.minutes(30) +end ``` -### Exponential +#### Exponential It uses a formula of `log2(errors_count)` to determine whether to send a notification, based on the accumulated error count for each specific @@ -132,7 +136,7 @@ defmodule YourApp.Router do use Phoenix.Router use BoomNotifier, - notification_trigger: :exponential, + count: :exponential, notifiers: [ # ... ] @@ -143,24 +147,42 @@ defmodule YourApp.Router do use Phoenix.Router use BoomNotifier, - notification_trigger: [exponential: [limit: 100]] + count: [exponential: [limit: 100]] notifiers: [ # ... ] ``` -### Notification trigger time limit +### Time Based Throttle + +```elixir +defmodule YourApp.Router do + use Phoenix.Router + + use BoomNotifier, + notifiers: [ + # ... + ], + groupping: :time, + throttle: :timer.minutes(1) +end +``` + +### Time Limit -If you've defined a triggering strategy which holds off notification delivering you can define a time limit value -which will be used to deliver the notification after a time limit milliseconds have passed from the last error. The -time counter is reset on new errors and only applies for cases where notifications are not sent. +Both groupping strategies allow you to define a time limit that tells boom to deliver a notification if the amount +of time has passed from the last time a notification was sent or error groupping started. + +If specified, a notification will be triggered even though the groupping strategy does not met the criteria. For +time based throttling this is usefull if errors keep appearing before the throttle time and for count based throttle +to reset (and notify) the grupping after a certain time period. ```elixir defmodule YourApp.Router do use Phoenix.Router use BoomNotifier, - notification_trigger: [:exponential], + count: [:exponential], time_limit: :timer.minutes(30), notifier: CustomNotifier @@ -168,7 +190,6 @@ defmodule YourApp.Router do end ``` - ## Custom data or Metadata By default, `BoomNotifier` will **not** include any custom data from your diff --git a/lib/boom_notifier/error_storage.ex b/lib/boom_notifier/error_storage.ex index f6b3b72..2830f24 100644 --- a/lib/boom_notifier/error_storage.ex +++ b/lib/boom_notifier/error_storage.ex @@ -15,7 +15,8 @@ defmodule BoomNotifier.ErrorStorage do ] @type t :: %__MODULE__{} - @type error_strategy :: :always | :exponential | [exponential: [limit: non_neg_integer()]] + @type error_strategy :: + :always | :none | :exponential | [exponential: [limit: non_neg_integer()]] use Agent, start: {__MODULE__, :start_link, []} @@ -32,8 +33,8 @@ defmodule BoomNotifier.ErrorStorage do occurrences is increased and it also updates the first and last time it happened. """ - @spec store_error(ErrorInfo.t()) :: :ok - def store_error(error_info) do + @spec accumulate(ErrorInfo.t()) :: :ok + def accumulate(error_info) do %{key: error_hash_key} = error_info timestamp = error_info.timestamp || DateTime.utc_now() @@ -137,7 +138,17 @@ defmodule BoomNotifier.ErrorStorage do |> Map.replace!(:last_occurrence, nil) end - @spec do_send_notification?(nil | __MODULE__.t()) :: boolean() + def eleapsed(nil), do: 0 + + def eleapsed(%__MODULE__{} = error_info) do + DateTime.diff( + error_info.last_occurrence, + error_info.first_occurrence, + :millisecond + ) + end + + @spec do_send_notification?(ErrorInfo.t() | nil) :: boolean() defp do_send_notification?(nil), do: false defp do_send_notification?(error_storage_item) do diff --git a/lib/boom_notifier/notification_sender.ex b/lib/boom_notifier/notification_sender.ex index 2ac200b..85c1888 100644 --- a/lib/boom_notifier/notification_sender.ex +++ b/lib/boom_notifier/notification_sender.ex @@ -14,48 +14,49 @@ defmodule BoomNotifier.NotificationSender do GenServer.start_link(__MODULE__, :ok, name: __MODULE__) end - def async_notify(notifier, occurrences, options) do - GenServer.cast(__MODULE__, {:notify, notifier, occurrences, options}) - end - def async_trigger_notify(settings, error_info) do GenServer.cast(__MODULE__, {:trigger_notify, settings, error_info}) end - def notify(notifier, occurrences, options) do - spawn_link(fn -> - notifier.notify(occurrences, options) - end) - end - - def notify_all(settings, error_info) do - notification_trigger = Keyword.get(settings, :notification_trigger, :always) - occurrences = ErrorStorage.reset_stats(error_info, notification_trigger) - error_info = Map.put(error_info, :occurrences, occurrences) + def trigger_notify(settings, error_info) do + ErrorStorage.accumulate(error_info) - BoomNotifier.walkthrough_notifiers( + do_trigger_notify( + Keyword.get(settings, :groupping, :count), settings, - fn notifier, options -> notify(notifier, error_info, options) end + error_info ) end - def trigger_notify(settings, error_info) do - timeout = Keyword.get(settings, :time_limit) - - ErrorStorage.store_error(error_info) + defp do_trigger_notify(:count, settings, error_info) do + time_limit = Keyword.get(settings, :time_limit) if ErrorStorage.send_notification?(error_info) do notify_all(settings, error_info) :ok else - if timeout do - {:schedule, timeout} + if time_limit do + {:schedule, time_limit} else - :ok + :ignored end end end + defp do_trigger_notify(:time, settings, error_info) do + throttle = Keyword.get(settings, :throttle, 100) + time_limit = Keyword.get(settings, :time_limit) + + stats = ErrorStorage.get_stats(error_info) + + if ErrorStorage.eleapsed(stats) >= time_limit do + notify_all(settings, error_info) + :ok + else + {:schedule, throttle} + end + end + # Server callbacks @impl true @@ -78,6 +79,9 @@ defmodule BoomNotifier.NotificationSender do cancel_timer(timer) case trigger_notify(settings, error_info) do + :ignored -> + {:noreply, state} + :ok -> {:noreply, state} @@ -115,6 +119,25 @@ defmodule BoomNotifier.NotificationSender do {:noreply, state |> Map.delete(error_info.key)} end + # Private methods + + defp notify(notifier, occurrences, options) do + spawn_link(fn -> + notifier.notify(occurrences, options) + end) + end + + defp notify_all(settings, error_info) do + count_strategy = Keyword.get(settings, :count) + occurrences = ErrorStorage.reset_stats(error_info, count_strategy) + error_info = Map.put(error_info, :occurrences, occurrences) + + BoomNotifier.walkthrough_notifiers( + settings, + fn notifier, options -> notify(notifier, error_info, options) end + ) + end + defp cancel_timer(nil), do: nil defp cancel_timer(timer), do: Process.cancel_timer(timer) end diff --git a/test/example_app/lib/example_app_web/router.ex b/test/example_app/lib/example_app_web/router.ex index 4a8b8f2..59ece0a 100644 --- a/test/example_app/lib/example_app_web/router.ex +++ b/test/example_app/lib/example_app_web/router.ex @@ -2,7 +2,7 @@ defmodule ExampleAppWeb.Router do use ExampleAppWeb, :router use BoomNotifier, - notification_trigger: [exponential: [limit: 8]], + count: [exponential: [limit: 8]], custom_data: [:assigns, :logger], ignore_exceptions: [IgnoreExceptionError], notifiers: [ diff --git a/test/unit/error_storage_test.exs b/test/unit/error_storage_test.exs index bf24ad6..09d65c9 100644 --- a/test/unit/error_storage_test.exs +++ b/test/unit/error_storage_test.exs @@ -24,7 +24,7 @@ defmodule ErrorStorageTest do clear_error_storage() end - describe "store_error/1" do + describe "accumulate/1" do test "groups errors by type" do another_timestamp = DateTime.utc_now() @@ -41,9 +41,9 @@ defmodule ErrorStorageTest do %{key: another_error_hash} = another_error_info - ErrorStorage.store_error(@error_info) - ErrorStorage.store_error(@error_info) - ErrorStorage.store_error(another_error_info) + ErrorStorage.accumulate(@error_info) + ErrorStorage.accumulate(@error_info) + ErrorStorage.accumulate(another_error_info) %{@error_hash => error_stat_1, ^another_error_hash => error_stat_2} = Agent.get(:boom_notifier, & &1) @@ -66,8 +66,8 @@ defmodule ErrorStorageTest do describe "get_stats/1" do test "returns the errors for the proper error kind" do - ErrorStorage.store_error(@error_info) - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) + ErrorStorage.accumulate(@error_info) assert ErrorStorage.get_stats(@error_info) == %ErrorStorage{ @@ -85,7 +85,7 @@ defmodule ErrorStorageTest do timestamp: another_timestamp } - ErrorStorage.store_error(another_error_info) + ErrorStorage.accumulate(another_error_info) assert ErrorStorage.get_stats(another_error_info) == %ErrorStorage{ @@ -104,20 +104,20 @@ defmodule ErrorStorageTest do describe "send_notification?/1" do test "returns false when count is smaller than the error length" do # increase the max capacity to 2 - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) ErrorStorage.reset_stats(@error_info, :exponential) - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) refute ErrorStorage.send_notification?(@error_info) end test "returns true when error length is greater or equal than count" do # creates the error key - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) # increase the max capacity to 2 ErrorStorage.reset_stats(@error_info, :exponential) - ErrorStorage.store_error(@error_info) - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) + ErrorStorage.accumulate(@error_info) another_error_info = %ErrorInfo{ reason: "Another error information", @@ -126,12 +126,12 @@ defmodule ErrorStorageTest do } # creates the error key - ErrorStorage.store_error(another_error_info) + ErrorStorage.accumulate(another_error_info) # increase the max capacity to 2 ErrorStorage.reset_stats(another_error_info, :exponential) - ErrorStorage.store_error(another_error_info) - ErrorStorage.store_error(another_error_info) - ErrorStorage.store_error(another_error_info) + ErrorStorage.accumulate(another_error_info) + ErrorStorage.accumulate(another_error_info) + ErrorStorage.accumulate(another_error_info) assert ErrorStorage.send_notification?(@error_info) assert ErrorStorage.send_notification?(another_error_info) @@ -144,7 +144,7 @@ defmodule ErrorStorageTest do describe "reset_stats/2" do test "it returns the error storage info" do - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) error_storage_item = ErrorStorage.reset_stats(@error_info) assert %{accumulated_occurrences: 1} = error_storage_item @@ -159,7 +159,7 @@ defmodule ErrorStorageTest do end test "increases the counter when notification trigger is :exponential" do - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) ErrorStorage.reset_stats(@error_info, :exponential) [error_stat] = Agent.get(:boom_notifier, fn state -> state end) |> Map.values() @@ -192,8 +192,8 @@ defmodule ErrorStorageTest do } end - test "increases the counter when notification trigger is :exponential and :limit is set" do - ErrorStorage.store_error(@error_info) + test "increases the counter when count is :exponential and :limit is set" do + ErrorStorage.accumulate(@error_info) ErrorStorage.reset_stats(@error_info, exponential: [limit: 5]) [error_stat] = Agent.get(:boom_notifier, fn state -> state end) |> Map.values() @@ -227,7 +227,7 @@ defmodule ErrorStorageTest do end test "does not increase the counter when notification_trigger is :always" do - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) ErrorStorage.reset_stats(@error_info, :always) [error_stat] = Agent.get(:boom_notifier, fn state -> state end) |> Map.values() @@ -266,8 +266,8 @@ defmodule ErrorStorageTest do %{key: another_error_hash} = another_error_info - ErrorStorage.store_error(@error_info) - ErrorStorage.store_error(another_error_info) + ErrorStorage.accumulate(@error_info) + ErrorStorage.accumulate(another_error_info) ErrorStorage.reset_stats(@error_info, :exponential) diff --git a/test/unit/notification_sender_test.exs b/test/unit/notification_sender_test.exs index 5f032b1..c83d1b3 100644 --- a/test/unit/notification_sender_test.exs +++ b/test/unit/notification_sender_test.exs @@ -10,6 +10,7 @@ defmodule BoomNotifier.NotificationSenderTest do } @time_limit 500 + @throttle 500 @receive_timeout 100 @settings_basic [ @@ -17,11 +18,16 @@ defmodule BoomNotifier.NotificationSenderTest do options: [pid_name: BoomNotifier.TestMessageProxy] ] - @settings_groupping @settings_basic ++ - [ - time_limit: @time_limit, - notification_trigger: :exponential - ] + @settings_groupping_count @settings_basic ++ + [ + time_limit: @time_limit, + count: :exponential + ] + @settings_groupping_time @settings_basic ++ + [ + groupping: :time, + throttle: @throttle + ] defmodule NotificationSenderTestNotifier do def notify(error_info, opts) do @@ -45,6 +51,11 @@ defmodule BoomNotifier.NotificationSenderTest do %{error_info: error_info} end + def notification_sent(error_info) do + ErrorStorage.accumulate(error_info) + ErrorStorage.reset_stats(error_info, :exponential) + end + setup do clear_error_storage() @@ -56,7 +67,7 @@ defmodule BoomNotifier.NotificationSenderTest do setup :build_error_info - describe "with default notification trigger (always)" do + describe "with default notification count (none)" do test "sends a notification", %{error_info: error_info} do NotificationSender.trigger_notify(@settings_basic, error_info) @@ -65,19 +76,19 @@ defmodule BoomNotifier.NotificationSenderTest do end end - describe "sync call with exponential notification trigger" do + describe "sync call with exponential notification count" do test "sends a notification", %{error_info: error_info} do - NotificationSender.trigger_notify(@settings_groupping, error_info) + NotificationSender.trigger_notify(@settings_groupping_count, error_info) {_, rcv_error_info} = assert_receive({:notify_called, _}, @receive_timeout) assert Map.delete(rcv_error_info, :occurrences) == Map.delete(error_info, :occurrences) end test "does not send a second notification", %{error_info: error_info} do - ErrorStorage.store_error(error_info) - ErrorStorage.reset_stats(error_info, :exponential) + notification_sent(error_info) - trigger_notify_resp = NotificationSender.trigger_notify(@settings_groupping, error_info) + trigger_notify_resp = + NotificationSender.trigger_notify(@settings_groupping_count, error_info) refute_receive({:notify_called, _}, @receive_timeout) assert {:schedule, @time_limit} = trigger_notify_resp @@ -85,7 +96,7 @@ defmodule BoomNotifier.NotificationSenderTest do test "sends notification occurrences along error info", %{error_info: error_info} do for _ <- 1..7 do - NotificationSender.trigger_notify(@settings_groupping, error_info) + NotificationSender.trigger_notify(@settings_groupping_count, error_info) end assert_receive({:notify_called, %{occurrences: %{accumulated_occurrences: 1}}}) @@ -94,33 +105,30 @@ defmodule BoomNotifier.NotificationSenderTest do end end - describe "async call with exponential notification trigger" do + describe "async call with exponential notification count" do test "sends a notification", %{error_info: error_info} do - NotificationSender.async_trigger_notify(@settings_groupping, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) assert_receive({:notify_called, _}, @receive_timeout) end end - describe "repeated async call with exponential notification trigger" do + describe "repeated async call with exponential notification count" do setup(%{error_info: error_info}) do - ErrorStorage.store_error(error_info) - ErrorStorage.reset_stats(error_info, :exponential) + notification_sent(error_info) :ok end test "sends a second notification after a timeout", %{error_info: error_info} do - NotificationSender.async_trigger_notify(@settings_groupping, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) assert_receive({:notify_called, _}, @time_limit + @receive_timeout) - - assert error_info |> ErrorStorage.get_stats() |> Map.get(:accumulated_occurrences) == - 0 + assert error_info |> ErrorStorage.get_stats() |> Map.get(:accumulated_occurrences) == 0 end test "does not send a second notification before a timeout", %{error_info: error_info} do - NotificationSender.async_trigger_notify(@settings_groupping, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) refute_receive({:notify_called, _}, @time_limit - 50) @@ -131,9 +139,9 @@ defmodule BoomNotifier.NotificationSenderTest do "it does not sends a scheduled notification if another error happens", %{error_info: error_info} ) do - NotificationSender.async_trigger_notify(@settings_groupping, error_info) - NotificationSender.async_trigger_notify(@settings_groupping, error_info) - NotificationSender.async_trigger_notify(@settings_groupping, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) notification_sender_state = NotificationSender @@ -150,7 +158,7 @@ defmodule BoomNotifier.NotificationSenderTest do "it does not schedule a notification if time_limit is not specified", %{error_info: error_info} ) do - settings = Keyword.delete(@settings_groupping, :time_limit) + settings = Keyword.delete(@settings_groupping_count, :time_limit) NotificationSender.async_trigger_notify(settings, error_info) NotificationSender.async_trigger_notify(settings, error_info) @@ -161,4 +169,58 @@ defmodule BoomNotifier.NotificationSenderTest do assert notification_sender_state |> Map.keys() |> length() == 0 end end + + describe "with time groupping" do + setup :build_error_info + + test "it returns a schedule action", %{error_info: error_info} do + result = NotificationSender.trigger_notify(@settings_groupping_time, error_info) + + assert {:schedule, @throttle} = result + end + + test "it does not send the notification before throttle", %{error_info: error_info} do + NotificationSender.async_trigger_notify(@settings_groupping_time, error_info) + + refute_receive({:notify_called, _}, @throttle - 50) + end + + test "it sends the notification after throttle", %{error_info: error_info} do + NotificationSender.async_trigger_notify(@settings_groupping_time, error_info) + + assert_receive({:notify_called, _}, @throttle + 50) + end + + test "it throttles repeated error notifications", %{error_info: error_info} do + for _ <- 1..20 do + NotificationSender.async_trigger_notify(@settings_groupping_time, error_info) + end + + state = :sys.get_state(NotificationSender) + assert_receive({:notify_called, _}, @throttle + 50) + error_key = error_info.key + assert %{^error_key => _} = state + assert state |> Map.keys() |> length() == 1 + end + + test "it sends a throttle notification if time limit is present", %{error_info: error_info} do + settings = + @settings_groupping_time + |> Keyword.merge( + throttle: 400, + time_limit: 100 + ) + + for _ <- 1..3 do + NotificationSender.async_trigger_notify( + settings, + error_info |> Map.put(:timestamp, DateTime.utc_now()) + ) + + :timer.sleep(100) + end + + assert_received({:notify_called, _}) + end + end end From 51cfb59b1de40f8fed6bfb2db2686026b83706e6 Mon Sep 17 00:00:00 2001 From: Joaquin Date: Thu, 19 Sep 2024 10:38:44 -0300 Subject: [PATCH 2/2] change notification_trigger occurrences to count Fix test that was not detecting this --- test/unit/error_storage_test.exs | 2 +- test/unit/notifier_test.exs | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/test/unit/error_storage_test.exs b/test/unit/error_storage_test.exs index 09d65c9..80f8a54 100644 --- a/test/unit/error_storage_test.exs +++ b/test/unit/error_storage_test.exs @@ -226,7 +226,7 @@ defmodule ErrorStorageTest do } end - test "does not increase the counter when notification_trigger is :always" do + test "does not increase the counter when count is :always" do ErrorStorage.accumulate(@error_info) ErrorStorage.reset_stats(@error_info, :always) diff --git a/test/unit/notifier_test.exs b/test/unit/notifier_test.exs index f857b5d..93987d3 100644 --- a/test/unit/notifier_test.exs +++ b/test/unit/notifier_test.exs @@ -118,7 +118,7 @@ defmodule NotifierTest do defmodule PlugErrorWithExponentialTriggerNotifier do use BoomNotifier, notifier: FakeNotifier, - notification_trigger: :exponential, + count: :exponential, options: [ subject: "BOOM error caught", sender_pid_name: TestMessageProxy @@ -132,7 +132,7 @@ defmodule NotifierTest do defmodule PlugErrorWithExponentialTriggerWithLimitNotifier do use BoomNotifier, notifier: FakeNotifier, - notification_trigger: [exponential: [limit: 3]], + count: [exponential: [limit: 3]], options: [ subject: "BOOM error caught", sender_pid_name: TestMessageProxy @@ -229,7 +229,7 @@ defmodule NotifierTest do assert_receive(%{exception: %{subject: "BOOM error caught: thrown error"}}, @receive_timeout) end - test "reports exception in groups when :notification_trigger setting is :exponential" do + test "reports exception in groups when :count setting is :exponential" do conn = conn(:get, "/") catch_error(PlugErrorWithExponentialTriggerNotifier.call(conn, [])) @@ -239,11 +239,10 @@ defmodule NotifierTest do catch_error(PlugErrorWithExponentialTriggerNotifier.call(conn, [])) assert_receive(%{exception: _}, @receive_timeout) - {:message_queue_len, exceptions} = Process.info(self(), :message_queue_len) - assert exceptions == 0 + refute_receive(%{exception: _}, @receive_timeout) end - test "reports exception in groups when :notification_trigger setting is :exponential with limit" do + test "reports exception in groups when :count setting is :exponential with limit" do conn = conn(:get, "/") catch_error(PlugErrorWithExponentialTriggerWithLimitNotifier.call(conn, [])) @@ -263,11 +262,10 @@ defmodule NotifierTest do catch_error(PlugErrorWithExponentialTriggerWithLimitNotifier.call(conn, [])) assert_receive(%{exception: _}, @receive_timeout) - {:message_queue_len, exceptions} = Process.info(self(), :message_queue_len) - assert exceptions == 0 + refute_receive(%{exception: _}, @receive_timeout) end - test "reports every exception when :notification_trigger setting is not set" do + test "reports every exception when :count setting is not set" do conn = conn(:get, "/") catch_error(PlugErrorWithSingleNotifier.call(conn, []))