diff --git a/.gitignore b/.gitignore index 57b2186..861d4b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,12 @@ out/bin/* +out/profiled/* tools/bin/* tmp/* target/* .make/* .tasty-rerun-log **/.stack-work/* +**/.stack-work_* *.cabal /tutorial.md -.dir-locals.el \ No newline at end of file +.dir-locals.el diff --git a/.tools.stack.yaml b/.tools.stack.yaml index be5c4b2..79f3052 100644 --- a/.tools.stack.yaml +++ b/.tools.stack.yaml @@ -1,7 +1,2 @@ -resolver: lts-10.0 -extra-deps: -- czipwith-1.0.0.0 -- data-tree-print-0.1.0.0 -- deque-0.2 -- monad-memo-0.4.1 -- butcher-1.2.1.0 +resolver: lts-10.2 +extra-deps: [] diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ed24c6..e883ef5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,37 @@ The change log is available [on GitHub][2]. [1]: http://semver.org/spec/v2.0.0.html [2]: https://github.com/roman/capataz/releases +## v0.1.0.0 Who supervises the supervisor? + +**BREAKING CHANGES** + +* Introduction of the `Process` type which is composed of both `Supervisor` and + `Worker` types +* Replace `defWorkerSpec` in favor of `workerSpec` and `workerSpecWithDefaults` + to build static workers +* Replace of `defWorkerOptions` in favor of `buildWorkerOptions` and + `buildWorkerOptionsWithDefaults` to build dynamic workers +* Replace `terminateWorker` in favor of `terminateProcess` +* Add `supervisorSpec`, `supervisorSpecWithDefaults` to build static supervision + trees +* Add `forkSupervisor`, `buildSupervisorOptions` and + `buildSupervisorOptionsWithDefaults` to build dynamic supervision trees +* Replace usage of default records semantics in favor of Lenses +* Add `joinCapatazThread` to avoid providing direct access to async of root + supervision tree +* Add `getSupervisorProcessId` to access the `ProcessId` of a given `Supervisor` + record (for dynamic termination) +* Add `getSupervisorAsync` to access the `Async ()` record of a supervisor + process thread +* Add `getCapatazTeardown` to access the `Teardown` record of the capataz system +* Move `CapatazEvent` records to new module `Control.Concurrent.Capataz.Event` + to avoid requiring `DuplicateRecordFields` extension on API users +* Remove `WorkerAction` alias as it is used for library development + documentation +* Add capataz-repo-watcher example to showcase static supervision trees +* Update capataz-simple-example unix-process example +* `forkCapataz` signature now requires name for root supervisor + ## v0.0.0.2 * Bump bounds of `tasty` dependency diff --git a/Makefile b/Makefile index dc9d559..6f06546 100644 --- a/Makefile +++ b/Makefile @@ -7,34 +7,37 @@ ################################################################################ ## VARIABLE +PROJECT_NAME:=$(shell cat package.yaml | grep 'name:' | awk '{print $$2}') PROJECT_VERSION:=$(shell cat package.yaml | grep -v '\#' | grep version | awk '{print $$2}' | sed -e "s;'\(.*\)';\1;") RESOLVER ?= $(shell cat stack.yaml | grep -v '\#' | grep resolver | awk '{print $$2}') +PROJECT_SETUP_FILE=./.make/setup_done -FIND_HASKELL_SOURCES=find . -name "*.hs" -not -path '*.stack-work*' -HASKELL_FILES:=$(shell $(FIND_HASKELL_SOURCES) | grep 'src\|test') +FIND_HASKELL_FILES=find . -name "*.hs" -not -path '*.stack-work*' +HASKELL_FILES:=$(shell $(FIND_HASKELL_FILES) | grep 'src\|test') -BIN_DIR:=./out/bin +PROJECT_BIN_DIR:=./out/bin -STACK_DIST_DIR:=$(shell stack path --dist-dir) -SDIST_DIR_NAME:=capataz-$(PROJECT_VERSION) -INTERNAL_SDIST_TAR:=$(STACK_DIST_DIR)/$(SDIST_DIR_NAME).tar.gz +SDIST_DIR_NAME:=$(PROJECT_NAME)-$(PROJECT_VERSION) +INTERNAL_SDIST_TAR:=$(shell stack path --dist-dir)/$(SDIST_DIR_NAME).tar.gz +PROJECT_SDIST_TAR=target/$(SDIST_DIR_NAME).tar.gz -TOOLS_DIR=./tools/bin -BRITTANY_BIN:=$(TOOLS_DIR)/brittany -STYLISH_BIN:=$(TOOLS_DIR)/stylish-haskell -HLINT_BIN:=$(TOOLS_DIR)/hlint -PPSH_BIN:=$(TOOLS_DIR)/ppsh +PROJECT_TOOLS_DIR=./tools/bin +BRITTANY_BIN:=$(PROJECT_TOOLS_DIR)/brittany +STYLISH_BIN:=$(PROJECT_TOOLS_DIR)/stylish-haskell +HLINT_BIN:=$(PROJECT_TOOLS_DIR)/hlint +PPSH_BIN:=$(PROJECT_TOOLS_DIR)/ppsh +REFACTOR_BIN:=$(PROJECT_TOOLS_DIR)/refactor -EXAMPLE1_BIN=$(BIN_DIR)/example1 -EXAMPLE2_BIN=$(BIN_DIR)/example2 +EXAMPLE1_BIN=$(PROJECT_BIN_DIR)/example1 +EXAMPLE2_BIN=$(PROJECT_BIN_DIR)/example2 -BRITTANY=$(BRITTANY_BIN) --config-file .brittany.yml --write-mode inplace {} \; -STYLISH=$(STYLISH_BIN) -i {} \; -HLINT=$(HLINT_BIN) --refactor --refactor-options -i {} \; +BRITTANY_FIND_EXEC=$(BRITTANY_BIN) --config-file .brittany.yml --write-mode inplace {} \; +STYLISH_FIND_EXEC=$(STYLISH_BIN) -i {} \; +HLINT_FIND_EXEC=$(HLINT_BIN) --with-refactor=$$(pwd)/$(REFACTOR_BIN) --refactor --refactor-options -i {} \; STACK:=stack --resolver $(RESOLVER) --install-ghc --local-bin-path ./target/bin NIGHTLY_STACK:=stack --resolver nightly --install-ghc -TOOLS_STACK:=stack --stack-yaml .tools.stack.yaml --install-ghc --local-bin-path $(TOOLS_DIR) +TOOLS_STACK:=stack --stack-yaml .tools.stack.yaml --install-ghc --local-bin-path $(PROJECT_TOOLS_DIR) ################################################################################ @@ -43,29 +46,25 @@ help: ## Display this message ################################################################################ -$(HLINT_BIN): - $(TOOLS_STACK) install hlint - -$(STYLISH_BIN): - $(TOOLS_STACK) install stylish-haskell +$(EXAMPLE1_BIN): $(HASKELL_FILES) + $(STACK) build --copy-bins --local-bin-path $(PROJECT_BIN_DIR) --test --no-run-tests --haddock --no-haddock-deps --pedantic -$(BRITTANY_BIN): - $(TOOLS_STACK) install brittany +$(EXAMPLE2_BIN) : $(EXAMPLE1_BIN) -$(PPSH_BIN): - $(STACK) install pretty-show +$(INTERNAL_SDIST_TAR): + @mkdir -p target + $(NIGHTLY_STACK) sdist . --pvp-bounds both -$(EXAMPLE1_BIN): $(HASKELL_FILES) - $(STACK) build --copy-bins --local-bin-path $(BIN_DIR) --test --no-run-tests --haddock --no-haddock-deps --pedantic +$(PROJECT_SDIST_TAR): $(INTERNAL_SDIST_TAR) + cp $(INTERNAL_SDIST_TAR) target -$(EXAMPLE2_BIN) : $(EXAMPLE1_BIN) -.make/setup_done: - $(TOOLS_STACK) install hlint stylish-haskell pretty-show brittany refactor +$(PROJECT_SETUP_FILE): + $(TOOLS_STACK) install hlint stylish-haskell pretty-show brittany apply-refact chmod -R go-w .stack-work chmod go-w .ghci @mkdir -p .make - @touch .make/setup_done + @touch $(PROJECT_SETUP_FILE) ################################################################################ @@ -74,12 +73,9 @@ build: $(EXAMPLE1_BIN) ## Build library and example binaries test: $(EXAMPLE1_BIN) ## Execute test suites $(STACK) test --dump-logs -sdist: clean ## Build a release - @mkdir -p target - $(NIGHTLY_STACK) sdist . --pvp-bounds both - cp $(INTERNAL_SDIST_TAR) target +sdist: $(PROJECT_SDIST_TAR) ## Build a release -untar-sdist: sdist +untar-sdist: $(INTERNAL_SDIST_TAR) @mkdir -p tmp tar xzf $(INTERNAL_SDIST_TAR) @rm -rf tmp/$(SDIST_DIR_NAME) || true @@ -88,24 +84,22 @@ untar-sdist: sdist test-sdist: untar-sdist cd tmp/$(SDIST_DIR_NAME) && $(NIGHTLY_STACK) init --force && $(NIGHTLY_STACK) build --test --bench --haddock --no-run-benchmarks -format: $(BRITTANY_BIN) $(STYLISH_BIN) ## Normalize style of source files - $(FIND_HASKELL_SOURCES) -exec $(BRITTANY) -exec $(STYLISH) && git diff --exit-code +format: $(PROJECT_SETUP_FILE) ## Normalize style of source files + $(FIND_HASKELL_FILES) -exec $(BRITTANY_FIND_EXEC) -exec $(STYLISH_FIND_EXEC) && git diff --exit-code -lint: $(HLINT_BIN) ## Execute linter - $(HLINT_BIN) $$($(FIND_HASKELL_SOURCES)) +lint: $(PROJECT_SETUP_FILE) ## Execute linter + $(FIND_HASKELL_FILES) -exec $(HLINT_FIND_EXEC) && git diff --exit-code -repl: $(PPSH_BIN) ## Start project's repl - @chmod go-w -R .stack-work - @chmod go-w .ghci +repl: $(PROJECT_SETUP_FILE) ## Start project's repl stack ghci clean: ## Clean built artifacts - rm -f $(BIN_DIR)/* + rm -f $(PROJECT_BIN_DIR)/* rm -f target/* rm -rf tmp/* stack clean -dev-setup: .make/setup_done ## Install development dependencies +dev-setup: $(PROJECT_SETUP_FILE) ## Install development dependencies ################################################################################ ## Demo tasks diff --git a/README.md b/README.md index cadb207..163e626 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ * [Documentation](#documentation) * [Development](#development) -## Raison d'etre +## Raison d'être As time progresses, I've come to love developing concurrent applications in Haskell, its API (STM, MVars, etc.) and light threading RTS bring a lot to the @@ -29,16 +29,16 @@ provides a simple Supervisor API. This library is intended to be a drop-in replacement to `forkIO` invocations throughout your codebase, the difference being, you'll need to do a bit more of setup specifying supervision rules, and also pass along a reference of a -capataz descriptor to every thread fork. +supervisor for every thread you fork. ### Why not [distributed-process](https://hackage.haskell.org/package/distributed-process)? `distributed-process` is an impressive library, and brings many great utilities -if you need to develop applications that are reliable. However, it is a -heavyweight solution that will enforce serious changes to your application. It -also optimizes its implementation around the *distributed* part of its name. -This library is intended to provide some benefits of `distributed-process` , -without the baggage. +if you need to develop applications that need to be distributed and reliable. +However, it is a heavyweight solution that will enforce serious changes to your +application. This library is intended to provide the reliability benefits of +`distributed-process`, without the constraints imposed by the *distributed* +part. ### Why not a complete actor system? @@ -91,12 +91,15 @@ dependencies: ## Development [![Build Status](https://travis-ci.org/roman/Haskell-capataz.svg?branch=master)](https://travis-ci.org/roman/Haskell-capataz) -[![Github](https://img.shields.io/github/commits-since/roman/haskell-capataz/v0.0.0.2.svg)](https://img.shields.io/github/commits-since/roman/haskell-capataz/v0.0.0.2.svg) +[![Github](https://img.shields.io/github/commits-since/roman/haskell-capataz/v0.1.0.0.svg)](https://img.shields.io/github/commits-since/roman/haskell-capataz/v0.1.0.0.svg) [![Hackage Dependencies](https://img.shields.io/hackage-deps/v/capataz.svg)](http://packdeps.haskellers.com/feed?needle=capataz) Follow the [developer guidelines](https://romanandreg.gitbooks.io/capataz/content/developer-guidelines.html) -## In next release +## In future releases -* Add support for supervising supervisors +* Replace Protolude in favor of RIO +* Documentation of performance analysis +* Documentation improvements +* capataz-dashboard package that provides web-ui with Supervisor statistics * Ensure unit tests always finish on all concurrent scenarios (dejafu experiment) diff --git a/docs/developer-guidelines.md b/docs/CONTRIBUTING.md similarity index 55% rename from docs/developer-guidelines.md rename to docs/CONTRIBUTING.md index 19b9aeb..1967905 100644 --- a/docs/developer-guidelines.md +++ b/docs/CONTRIBUTING.md @@ -1,36 +1,29 @@ # Developer Guidelines [![Build Status](https://travis-ci.org/roman/Haskell-capataz.svg?branch=master)](https://travis-ci.org/roman/Haskell-capataz) -[![Github](https://img.shields.io/github/commits-since/roman/haskell-capataz/v0.0.0.2.svg)](https://img.shields.io/github/commits-since/roman/haskell-capataz/v0.0.0.2.svg) +[![Github](https://img.shields.io/github/commits-since/roman/haskell-capataz/v0.1.0.0.svg)](https://img.shields.io/github/commits-since/roman/haskell-capataz/v0.1.0.0.svg) [![Hackage Dependencies](https://img.shields.io/hackage-deps/v/capataz.svg)](https://img.shields.io/hackage/v/capataz.svg) ## Dependencies -You'll need to install [Stack](https://github.com/commercialhaskell/stack), once installed, you can execute the `make` command. +You'll need to install [Stack](https://github.com/commercialhaskell/stack), once installed, you can execute the `make` command and learn tasks supported in the project. You'll need to make sure you invoke `make format` and `make lint` when pushing changes, otherwise the Pull Request builder will fail. ## General Overview -This project heavily relies in two (2) Haskell extensions, [`NamedFieldPuns`]() and [`DuplicateRecordFields`](). +This project heavily relies in two (2) Haskell extensions, [`NamedFieldPuns`](https://downloads.haskell.org/~ghc/8.2.1/docs/html/users_guide/glasgow_exts.html#record-puns) and [`DuplicateRecordFields`](https://downloads.haskell.org/~ghc/8.2.1/docs/html/users_guide/glasgow_exts.html#duplicate-record-fields). -You can tell that _many_ records share the same fields, this is because these fields represent the same data in different contexts. This makes IMO the code more readable because we don't use different names (say, with a prefixes) to represent the same piece of information. +You can tell that many records share the same fields, this is because these fields represent the same data in different contexts. This makes IMO the code more readable because we don't use different names (say, with a prefixes) to represent the same piece of information. However, this has the unfortunate side-effect that when using the field name as a function, we get ambiguous symbol errors from the compiler. To alliviate this, we only access the fields through _field pun_ notation. +Also, to avoid the requirement to use the `DuplicateRecordFields` extension on clients of the API, we provide lenses of the public API fields. + The code has been throughly documented, if you believe the documentation could be better, please create a ticket in Github with suggestions. ## Notes on testsuite -All tests related to this API are in a single module `Control.Concurrent.CapatazTest`, in this file we have defined: - -* Assertion functions to get attributes from a `CapatazEvent` - -* Helpers to run the test (reduce boilerplate) - -* Actual tests - -The module contains documentation for all the helper functions, and hopefully the test descriptions should be -enough to understand what is being tested. +The library is tested through integration tests that collect `CapatazEvent` from the system and assert they happen, this approach works great to avoid testing internal parts of the code that can change, however, the test-suite is not stable between executions because of timing. If you have strong opinions about this testing approach, please reach out, I'm trying to validate if this is a good idea or not. diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index cd3d2e0..db9f86f 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -1,7 +1,11 @@ * Tutorials - * [Basic Tutorial using Unix Processes](tutorial.md) + * v0.0 + * [Basic Tutorial using Unix Processes](v0.0/tutorial.md) + * v0.1 + * [Unix Processes](v0.1/unix-process-tutorial.md) + * [Git Repository Synchronizer](v0.1/git-synchronizer-tutorial.md) * Guide * [Supervisor Theory](supervisor-theory.md) -* [Developer Guidelines](developer-guidelines.md) +* [Developer Guidelines](CONTRIBUTING.md) diff --git a/docs/rationale.md b/docs/rationale.md deleted file mode 100644 index bad9307..0000000 --- a/docs/rationale.md +++ /dev/null @@ -1 +0,0 @@ -# What this library is about diff --git a/docs/tutorial.md b/docs/v0.0/tutorial.md similarity index 97% rename from docs/tutorial.md rename to docs/v0.0/tutorial.md index fd0bf8e..19e5b73 100644 --- a/docs/tutorial.md +++ b/docs/v0.0/tutorial.md @@ -1,5 +1,10 @@ # Basic Tutorial using Unix Processes +> NOTE: This tutorial is for version v0.0.0.2 of the capataz library, follow +> [this +> link](https://romanandreg.gitbooks.io/capataz/content/v0.1/unix-process-tutorial.md) +> to read this tutorial with the newest version + In this tutorial, we will build a small CLI application that spawns processes through Haskell's Unix Process API. We will keep all our threads running smoothly despite having one of the threads killing the spawned Unix processes through Unix `SIGTERM` signals. We will implement two (2) different versions of our CLI program, one using standard Haskell threading utilities, and another using the Capataz library. @@ -17,7 +22,7 @@ If you are not familiar with the topics above, we recommend following along and Our CLI program will receive an input parameter `procNumber` that will be used to spawn some green threads, each of them generating a Unix process. Each Unix process executes a simple bash script that echoes a number and increments it in an infinite while loop. Our Haskell program will also run a Haskell thread that will kill one of the many bash script executions. -You can find the code for this tutorial in the [`examples`](https://github.com/roman/Haskell-capataz/tree/examples/examples) directory of the project's Github repository. +You can find the code for this tutorial in the [`examples`](https://github.com/roman/Haskell-capataz/tree/v0.0.0.2/examples/capataz-example) directory of the project's Github repository. ## Setting up the stage - A trivial library for Processes @@ -205,7 +210,7 @@ import Control.Concurrent.Capataz -- (0) ( WorkerOptions(..) , CapatazOptions(..) , WorkerRestartStrategy(..) - , CapatazRestartStrategy(..) + , SupervisorRestartStrategy(..) , forkCapataz , forkWorker , defWorkerOptions diff --git a/docs/v0.1/git-synchronizer-tutorial.md b/docs/v0.1/git-synchronizer-tutorial.md new file mode 100644 index 0000000..2aef814 --- /dev/null +++ b/docs/v0.1/git-synchronizer-tutorial.md @@ -0,0 +1,388 @@ +# Git repository synchronizer + +We will build a CLI application that automatically synchronizes files in a git repository with its remote server whenever they are modified. To do this, we will keep track of file modifications using the iNotify UNIX API. + +For this tutorial, we assume the reader is familiar with: + +* [Stack](https://docs.haskellstack.org/en/stable/README/) projects +* [GHC Extensions](https://downloads.haskell.org/~ghc/8.2.2/docs/html/users_guide/glasgow_exts.html) +* [Shelly library](https://hackage.haskell.org/package/shelly) +* [Haskell concurrency](http://chimera.labs.oreilly.com/books/1230000000929/pt02.html) (threads and STM) + +If you are not familiar with the topics above, we recommend reading the tutorial while looking information from the given links. + +## What you'll learn: + +* How to create supervision trees using Capataz +* How to use the `ComponentM` monad from the [teardown library](https://stackage.org/packages/teardown) +* How to use the shelly library to run commands +* How to use RIO for logging + +## Summary of what our program will do + +Our CLI program will receive various strings as CLI arguments; each argument represents a file path that points to a git repository which we want to synchronize with a remote repository automatically; by just saving a file a commit should be created; also, our repository will sync with its remote host in a regular basis. + +You can find the code for this tutorial in the [`examples`](https://github.com/roman/Haskell-capataz/tree/master/examples/) directory of the project's Github repository. + +## Setting up the stage - Modeling our domain problem as a supervisor tree + +We first define a diagram of how our supervision tree is going to look like: + +```text +capataz-system/ +├── logger-worker -- ^ performs logging to a console +└── repository-supervisor(*) + ├── git-worker -- ^ performs all git operations on a repository + ├── repo-file-watcher -- ^ monitors changes on files + └── sync-interval-worker -- ^ notifies every interval of time an event +``` + +Our Capataz' root supervisor tree is composed of a worker green thread responsible for logging, and at least one (1) `repository-supervisor` that monitors two (2) worker threads, one responsible for sync interval (to schedule git pull/push commands) and one to execute git commands. + +Why this organization? We want to make sure that each repository is self-contained, if one of them is faulty, we wouldn't want to restart the whole application, only the workers related to that repository. We can go as far as to add an `AllForOne` strategy at the `repository-supervisor` level. + +## How do our workers communicate? + +Given Capataz does not enforce a message passing communication scheme, we can use whatever we want to communicate our workers, from third-party services like SQS to external processes like RabbitMQ or Redis, to memory STM Queues and MVars. + +Given we don't care about messages getting lost in case of catastrophic failure, in this example we are going to use `STM` channels (`TChan`) to communicate threads between them. + +## Implementing our supervision tree + +```haskell +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} +module Lib where + +import qualified Prelude +import RIO -- (1) +import qualified RIO.Logger as Logger + +import Control.Monad.Component (ComponentM) + +import Control.Concurrent (threadDelay) +import Control.Concurrent.Capataz (Capataz, ProcessSpec) +import Control.Concurrent.STM (orElse) + +import System.Environment (getArgs) + +import qualified RIO.Text as Text + +import qualified Control.Monad.Component as Component +import qualified Control.Concurrent.STM.TChan as TChan +import qualified Control.Concurrent.Capataz as Capataz + +import qualified Shelly -- (1) +import qualified System.INotify as INotify -- (1) + +``` + +`(1)` For this example we use [`RIO`](https://github.com/commercialhaskell/rio) as our custom Prelude, we use [`shelly`](https://hackage.org/package/shelly) to execute CLI commands from Haskell, and we use [`hinotify`](https://hackage.org/hinotify) to keep track of files changes. + +```haskell +-------------------------------------------------------------------------------- + +-- | An instance of HasLogFunc for a LogFunc, I create this instance given +-- there is no obvious way to use a LogFunc in a reader. I'm confident there +-- must be a better way to do this. For the sake of the tutorial, this is a +-- non-essential implementation detail. +instance Logger.HasLogFunc Logger.LogFunc where + logFuncL = id + +-- | Utility function that runs a component builder and uses it's result +-- on a callback on a safe way. +withComponent :: Text -> ComponentM a -> (a -> IO ()) -> IO () +withComponent !appDesc !buildComponent !f = mask $ \restore -> do + component <- Component.runComponentM appDesc buildComponent + (restore . f $ Component.fromComponent component) + `finally` + (Component.teardown component >>= Prelude.print) + +-------------------------------------------------------------------------------- + +-- | A combination of both file modification events and git repository +-- synchronization. +data WatcherMsg -- (2) + -- | Message triggered when monitored file changes + = FileChanged !FilePath + -- | Message triggered when a sync request is made + | SyncRequested +``` + +`(2)` We use a record that helps us notify threads when an interval period is met (`SyncRequest`) or when a monitored file has changed (triggered by `hinotify`). + +```haskell +-------------------------------------------------------------------------------- + +-- | Returns an INotify descriptor, necessary to build watches for a directory +buildINotify :: ComponentM INotify.INotify +buildINotify = Component.buildComponentWithCleanup $ do + inotify <- INotify.initINotify + return (inotify, ("inotify descriptor", INotify.killINotify inotify)) + +-- | Returns an STM sub-routine that returns a filepath that has been modified, +-- this sub-routine retries until such change happens in the filesystem. +buildFileWatcher + :: INotify.INotify + -> (FilePath -> IO ()) + -> FilePath -- ^ Directory where changes are tracked + -> IO (IO ()) +buildFileWatcher inotify notifyFileChange !dir = do + -- (3) + fileWatch <- INotify.addWatch inotify [INotify.CloseWrite, INotify.Modify] dir $ \ev -> do + case ev of + INotify.Modified {INotify.isDirectory, INotify.maybeFilePath} + -- we ignore all changes that happen to a Directory + | isDirectory -> return () + | otherwise -> + maybe (return ()) notifyFileChange maybeFilePath + _ -> return () + + return (INotify.removeWatch fileWatch) +``` + +`(3)` Using the `INotify.addWatch` function, we can register what files we are interested in receiving events on modification; we also need to provide a callback that writes the `INotify` events to a `TChan` that we created in step `(4)` + +```haskell +-- | Returns both an STM sub-routine that blocks until a given period +-- has passed, and a "ProcessSpec" for supervision of this interval thread. +buildIntervalWorker + :: Text -- ^ Name of the worker process + -> Int -- ^ Number of seconds between notifications + -> ComponentM (STM (), ProcessSpec) +buildIntervalWorker !workerName !delaySeconds = Component.buildComponent $ do + -- (4) + intervalChan <- TChan.newTChanIO + + let + triggerEvent :: IO () + triggerEvent = forever $ do + threadDelay (delaySeconds * 1000100) + atomically $ TChan.writeTChan intervalChan () + + -- (5) + intervalSpec :: Capataz.ProcessSpec + intervalSpec = + Capataz.workerSpec workerName triggerEvent + (set Capataz.workerRestartStrategyL Capataz.Permanent) + + return (TChan.readTChan intervalChan, intervalSpec) +``` + +`(4)` We use a `TChan` to communicate to other interesting threads that a git synchronization should happen + +`(5)` We build a `ProcessSpec` using the `workerSpec` function; this worker will emit a signal after sleeping for a few seconds. Note, we use the `workerRestartStrategyL` lens to override the default options of a new worker. + +```haskell +-- | Builds a "ProcessSpec" that monitors a green thread that receives messages +-- from the given "WatcherMsg" notifier, it receives a path where the git +-- repository is. +buildGitWorker + :: FilePath -- ^ Location of git repository where changes are kept + -> IO WatcherMsg -- ^ An IO sub-routine that gets triggered everytime a + -- WatcherMsg happens + -> ProcessSpec +buildGitWorker !repoPath !getWatcherMsg = + let + executeCmd :: IO () + executeCmd = forever $ do + msg <- getWatcherMsg + case msg of + FileChanged {} -> + Shelly.shelly + $ Shelly.chdir (Shelly.fromText $ Text.pack repoPath) + $ do Shelly.run_ "git" ["add", "."] + Shelly.run_ "git" ["commit", "-m", "file changes"] + + SyncRequested -> do + Shelly.shelly + $ Shelly.chdir (Shelly.fromText $ Text.pack repoPath) + $ do Shelly.run_ "git" ["pull", "-r", "origin", "master"] + Shelly.run_ "git" ["push", "origin", "master"] + in + -- (6) + Capataz.workerSpec "git-worker" executeCmd + (set Capataz.workerRestartStrategyL Capataz.Permanent) +``` + +`(6)` We build a `ProcessSpec` using the `workerSpec` function; this worker receives notifications and performs bash operations using the `Shelly` API. + +```haskell +-- | Returns both an utility function for logging and a "ProcessSpec" to +-- supervise a thread that receives log messages and displays them to stdout. +buildEventLogger :: ComponentM (DisplayBuilder -> IO (), ProcessSpec) +buildEventLogger = Component.buildComponent $ do + logChan <- TChan.newTChanIO + let + logOptions :: Logger.LogOptions + logOptions = + Logger.LogOptions + { + Logger.logMinLevel = Logger.LevelDebug + , Logger.logVerboseFormat = True + , Logger.logTerminal = True + , Logger.logUseTime = True + , Logger.logUseColor = True + , Logger.logUseUnicode = True + } + + logLoop :: IO () + logLoop = Logger.withStickyLogger logOptions $ \logger -> do + flip runReaderT logger $ forever $ do + bs <- liftIO $ atomically $ TChan.readTChan logChan + Logger.logDebug bs + + return ( + atomically . TChan.writeTChan logChan + -- (7) + , Capataz.workerSpec "logger" logLoop + (set Capataz.workerRestartStrategyL Capataz.Permanent) + ) +``` + +`(7)` As with the previous examples, we create a `ProcessSpec` with the `workerSpec` function; this worker listens to a channel for messages to print to `stdout`. + +```haskell +-- | Creates a RepoWatcher supervisor, which is composed by: +-- +-- * A file watcher +-- * An interval worker +-- * A git worker +-- +-- NOTE: when we restart our repo file watcher, we need to make sure that our +-- watch gets restarted as well. +buildRepoFileWatcher :: INotify.INotify -> FilePath -> ComponentM ProcessSpec +buildRepoFileWatcher !inotify !repoDir = do + -- We create functions that workers will use to communicate between each + -- other + changesChan <- liftIO $ TChan.newTChanIO + + + let notifyFileChange = atomically . TChan.writeTChan changesChan + onFileChange = TChan.readTChan changesChan + + fileWatchCleanupRef <- liftIO $ do + fileWatchCleanup <- buildFileWatcher inotify notifyFileChange repoDir + newIORef fileWatchCleanup + + (onSync, syncIntervalSpec) <- buildIntervalWorker "git-sync-interval" (60 * 2) + + let + -- We compose both Sync interval requests and file changes notifications + onMsg :: IO WatcherMsg + onMsg = + atomically + $ (FileChanged <$> onFileChange) + `orElse` (onSync $> SyncRequested) + + cleanupWatch :: IO () + cleanupWatch = + -- Invokes the `IO ()` operation contained inside our `IORef` + join (readIORef fileWatchCleanupRef) + + -- We restart the inotify watch when supervisor fails; We mask to make sure + -- that our ref is not corrupted with async exceptions + onRepoWatcherFailure :: IO () + onRepoWatcherFailure = mask $ \unmask -> do + unmask cleanupWatch + fileWatchCleanup <- buildFileWatcher inotify notifyFileChange repoDir + writeIORef fileWatchCleanupRef fileWatchCleanup + + gitWorkerSpec :: ProcessSpec + gitWorkerSpec = + buildGitWorker repoDir onMsg + + -- (8) + Component.buildComponentWithCleanup + $ return + $ ( + -- (9) + Capataz.supervisorSpec ("repo-file-watcher:" <> Text.pack repoDir) + ( set Capataz.supervisorRestartStrategyL Capataz.AllForOne -- (10) + . set Capataz.supervisorOnFailureL (const $ onRepoWatcherFailure) -- (11) + . set Capataz.supervisorProcessSpecListL [gitWorkerSpec, syncIntervalSpec] + ) + , ("repo-file-watcher:" <> Text.pack repoDir, cleanupWatch) + ) +``` + +`(8)` The `Component.buildComponentWithCleanup` allows us to allocate resources, returning a value from this allocation (say, a supervisor spec) and a named `IO ()` sub-routine that gets composed with other cleanup tasks by the `ComponentM` monad. + + +`(9)` With the `supervisorSpec` function, we create a supervisor `ProcessSpec`. Note we use the `supervisorProcessSpecListL` lens to create a _static_ supervision tree; we use the `ProcessSpec` created on steps `(7)` and `(8)`. This supervisor will start a `git-worker` and a `sync-interval-worker` process on startup, and it will monitor and restart workers when they fail. + +`(10)` We use an `AllForOne` strategy, meaning, if either `gitWorkerSpec` or `syncIntervalSpec` fail, the other is going to be restarted. + +`(11)` We use the `supervisorOnFailureL` lens to override the repo supervisor failure callback, in here, we make sure we restart the `INotify` watch of the repository. + +```haskell +-- | Creates a Capataz supervision tree which contains a RepoWatcher +-- supervisor per repository path +createRepoWatcherSystem :: [FilePath] -> ComponentM Capataz +createRepoWatcherSystem repoPathList = do + (logFn, loggerProcessSpec) <- buildEventLogger + inotify <- buildINotify + repoProcessSpecList <- mapM (buildRepoFileWatcher inotify) repoPathList + + let + procList = + loggerProcessSpec:repoProcessSpecList + + Component.buildComponentWithTeardown $ mask $ \_ -> do + -- (12) + capataz <- Capataz.forkCapataz "repo-watcher-capataz" + ( set Capataz.onSystemEventL (logFn . displayShow) + . set Capataz.supervisorProcessSpecListL procList + ) + + -- (13) + return (capataz, Capataz.getCapatazTeardown capataz) +``` + +`(12)` We connect all our components using the `forkCapataz` function. The logger notification function is used as our `onSystemEventL` callback so that we have proper logging around what our Capataz system is doing. + +`(13)` We return our Capataz record and a [`teardown`](https://hackage.org/package/teardown) sub-routine. + +```haskell +main :: IO () +main = do + input <- getArgs + case input of + [] -> + error "Expecting repository paths as inputs; got nothing." + repoPaths -> + -- (14) + withComponent ("repo-watcher-system") + (createRepoWatcherSystem repoPaths) + Capataz.joinCapatazThread -- (15) + +``` + +`(14)` We execute our `ComponentM` sub-routine and provide a callback that receives the result from that sub-routine, also guaranteeing that all resources are cleaned up (even in the case of failure). + +`(15)` We use the `joinCapatazThread` function, which allows us to connect both our current thread and the root supervisor thread and lock it until the Capataz root supervisor finishes its execution. + +## What have we accomplished + +By now we have a pretty reliable program that will stay running smoothly even in errors scenarios like: + +* We monitor a directory that is _not_ a git repository +* The repository is not configured to have a remote host +* The repository gets deleted + +## Try it out! + +1) Clone the [capataz repository](https://github.com/roman/Haskell-capataz) + +2) Run `make build` + +3) Run `./out/bin/repo-watcher ` + +4) Try modifying a file in the monitored repo and see how it perform a commit automatically. diff --git a/docs/v0.1/unix-process-tutorial.md b/docs/v0.1/unix-process-tutorial.md new file mode 100644 index 0000000..1aca58a --- /dev/null +++ b/docs/v0.1/unix-process-tutorial.md @@ -0,0 +1,313 @@ +# Unix Processes + +We will build a small CLI application that spawns processes through Haskell's Unix Process API. We will keep all our threads running smoothly despite having one of the threads killing the spawned Unix processes through Unix `SIGTERM` signals. + +We will implement two (2) different versions of our CLI program, one using standard Haskell threading utilities, and another using the Capataz library. + +For this tutorial, we assume the reader is familiar with: + +* [GHC Extensions](https://downloads.haskell.org/~ghc/8.2.2/docs/html/users_guide/glasgow_exts.html) +* [Stack](https://docs.haskellstack.org/en/stable/README/) projects +* [OptParse Generic library](https://hackage.haskell.org/package/optparse-generic) for CLI programs +* [Turtle library](https://hackage.haskell.org/package/turtle) +* [Haskell concurrency](http://chimera.labs.oreilly.com/books/1230000000929/pt02.html) (threads and STM) + +If you are not familiar with the topics above, we recommend reading the tutorial while looking information from the given links. + +## What you'll learn: + +* How to add worker into a running supervision tree in a dynamic fashion +* How to spawn Unix processes from Haskell using Turtle +* How to kill Unix processes from Haskell using Turtle + +## Summary of what our program will do + +Our CLI program will receive an input parameter `procNumber` that will be used to spawn some green threads, each of them generating a Unix process. Each Unix process executes a simple bash script that echoes a number and increments it in an infinite while loop. Our Haskell program will also run a Haskell thread that sends an UNIX signal to one of the processes spawned by our program. + +You can find the code for this tutorial in the [`examples`](https://github.com/roman/Haskell-capataz/tree/master/examples/) directory of the project's Github repository. + +## Setting up the stage - A trivial library for Processes + +Let's start by explaining the `Lib` module; it contains utility functions to spawn and kill Unix processes, first the header: + +```haskell +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeOperators #-} +module Lib where + +import qualified Data.ByteString.Char8 as C +import Data.List ((!!)) +import qualified Data.Text as T +import Options.Generic (ParseRecord) +import Protolude +import System.IO (hGetLine, hIsEOF) +import qualified System.Process as Process +import qualified System.Random as Random +import qualified Turtle + +-- (0) +newtype Cli = + Cli { procNumber :: Int } + deriving (Generic, Show) + +instance ParseRecord Cli + +-- (1) +data SimpleProcess = + SimpleProcess { readStdOut :: !(IO (Either ExitCode ByteString)) + , terminateProcess :: !(IO ()) + , waitProcess :: !(IO ExitCode) + } + +-- (1) +spawnSimpleProcess :: Text -> [Text] -> IO SimpleProcess +spawnSimpleProcess program args = do + let processSpec = (Process.proc (T.unpack program) (fmap T.unpack args)) + { Process.std_out = Process.CreatePipe + } + + (_, Just hout, _, procHandle) <- Process.createProcess processSpec + + let readStdOut :: IO (Either ExitCode ByteString) + readStdOut = do + isEof <- hIsEOF hout + if not isEof + then (Right . C.pack) <$> hGetLine hout + else Left <$> Process.waitForProcess procHandle + + terminateProcess :: IO () + terminateProcess = Process.terminateProcess procHandle + + waitProcess :: IO ExitCode + waitProcess = Process.waitForProcess procHandle + + return SimpleProcess {readStdOut , terminateProcess , waitProcess } +``` + +`(0)` We have a `Cli` record that we use to gather values for our CLI program. Using the [optparse-generic](https://hackage.haskell.org/package/optparse-generic) library, this becomes a trivial affair. We make this work by adding an instance for `Generic` and `ParseRecord`. + +`(1)` We create a `SimpleProcess` record. This record contains the logic to read the `stdout` of the spawned process and provides sub-routines for _terminating_ or _waiting_ for termination of the Unix process. This utility record limits the scope of the Haskell Unix process API to our small use case. + +Next, we implement the function that will spawn a Unix process that performs an `echo` of numbers from 1 to infinity in bash: + +```haskell +spawnNumbersProcess + :: (Int -> IO ()) -- ^ sub-routine that writes number to other resource + -> IO () +spawnNumbersProcess writeNumber = do + -- We are going to execute a while loop that echoes numbers to stdout + proc' <- + spawnSimpleProcess + "/bin/bash" + ["-c" + , "COUNTER=1; while [ $COUNTER -gt 0 ]; do " + <> "echo $COUNTER; sleep 1; let COUNTER=COUNTER+1; " + <> "done" + ] + + let loop = do + -- read a string from stdout and transform it into a number, this sub-routine + -- returns an Either where the Right value is a an stdout line, and the + -- Left value is an exit code, in case the exit code is not a success, finish + -- with an exception + case eInput of + Left exitCode + | exitCode == ExitSuccess -> return () + | otherwise -> throwIO exitCode + Right Nothing -> do + putText "didn't get a number?" + loop + Right (Just number) -> do + writeNumber number + loop + + -- Make sure we terminate the process if we stop the loop using + -- an async exception + loop `finally` terminateProcess proc' +``` + +Now, let's have another `IO` sub-routine that lists Unix processes PIDs and picks one of them at random to send a `SIGTERM` signal. We use the [`Turtle` library](https://hackage.haskell.org/package/turtle) to run bash commands, in particular, the function (`procStrict`) which returns the `stdout` and `exitCode` of a process. + +```haskell +processKiller + :: Text -- ^ Search processes with given name + -> IO () +processKiller processName = do + (_exitCode, pgrepOutput) <- + Turtle.procStrict "pgrep" ["-f", processName] Turtle.empty + -- pgrep lists all pids from processes that have a particular name + + -- Split output in lines, and get pid per line + let procNumbers = T.lines pgrepOutput + case procNumbers of + [] -> return () + _ -> do + -- get a random element from the list of proccess identifiers + theOneToKill <- Random.randomRIO (0, pred $ length procNumbers) + + putText $ "Process running: " <> show procNumbers + putText $ "Killing: " <> (procNumbers !! theOneToKill) + + void $ Turtle.procStrict "kill" [procNumbers !! theOneToKill] Turtle.empty +``` + +## Example 1 - Running program without supervision + +Once we have the API that spawns Unix processes, we implement a concurrent application that generates Haskell threads and calls this API; we build each thread using the standard [async](https://hackage.haskell.org/package/async) package: + +```haskell +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoImplicitPrelude #-} +module Main where + +import Protolude -- (0) +import Options.Generic (getRecord) +import Control.Concurrent.Async (async) +import Lib (Cli(..), SimpleProcess(..), spawnNumbersProcess, killNumberProcess) + +main :: IO () +main = do + n <- getRecord "Counter spawner" -- (1) + + let numberWriter i a = print (i, a) + delayMicros = 5000100 + + asyncList <- forM [1..procNumber n] $ \i -> + async $ spawnNumbersProcess (numberWriter i) -- (2) + + killerAsync <- + async $ forever $ threadDelay delayMicros >> killNumberProcess + + wait killerAsync `finally` mapM_ cancel asyncList +``` + +`(0)` We start by removing the default `Prelude` and use the batteries included [`protolude`](https://hackage.haskell.org/package/protolude) library, this provides most of the used functions from Haskell and some extra useful libraries. + +`(1)` We use the [`optparse-generic`](https://hackage.haskell.org/package/optparse-generic) library to get a quick CLI optparser that provides us with the number of processes to run. + +`(2)` We spawn an [async](https://hackage.haskell.org/package/async) (thread) where each of them is going to execute the `spawnProcessNumber` sub-routine. + +`(3)` We spawn another thread that kills Unix processes. + +When we run the previous program, it will fail slowly, removing the output of each of the threads that stop working after receiving an asynchronous exception. The Operative System throws this exception, but it originates from the execution of the `killerNumberProcess` sub-routine which sends a `SIGTERM` signal to the spawned process on each of the running threads. + +The next example will show a project that uses the same functions, but relies on our API, which restarts threads in case of failure from external factors (in this case a `SIGTERM` Unix signal). + +## Example 2 - Running program with supervision + +Now, instead of using async, let's build the Haskell threads using a capataz instance that monitors both a group of threads that execute the `spawnProcessNumber` sub-routine and, a thread that terminates particular Unix process in a random fashion. + +```haskell +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +module Main where + +import Control.Concurrent.Capataz -- (0) + ( SupervisorRestartStrategy (..) + , WorkerRestartStrategy (..) + , buildWorkerOptions + , buildWorkerOptionsWithDefaults + , joinCapatazThread + , forkCapataz + , forkWorker + , onSystemEventL + , set + , supervisorRestartStrategyL + , teardown + , workerRestartStrategyL + ) +import Lib (Cli (..), killNumberProcess, spawnNumbersProcess) +import Options.Generic (getRecord) +import Protolude +import Text.Show.Pretty (pPrint) + +main :: IO () +main = do + n <- getRecord "Counter spawner" + + capataz <- + forkCapataz "unix-process-capataz" -- (1) + ( set supervisorRestartStrategyL OneForOne -- (2) + . set onSystemEventL pPrint -- (3) + ) + + let numberWriter i a = print (i, a) + delayMicros = 5000100 + + _workerIdList <- forM [1 .. procNumber n] $ \i -> do + let + counterWorkerOptions = + buildWorkerOptions -- (4) + ("Worker (" <> show i <> ")") + (spawnNumbersProcess (numberWriter i)) -- (5) + (set workerRestartStrategyL Permanent) -- (6) + + forkWorker -- (7) + counterWorkerOptions + capataz + + let + workerKillerOptions = + buildWorkerOptionsWithDefaults -- (8) + "worker-killer" + (forever $ threadDelay delayMicros >> killNumberProcess) + + -- ignore returned ProcessId, as we won't use it in our example + void $ forkWorker workerKillerOptions capataz + + joinCapatazThread capataz -- (9) + `finally` + (teardown capataz >>= print) -- (10) +``` + +We start the `main` sub-routine building a capataz instance, using the `forkCapataz` function. + +`(0)` We import many symbols from our Capataz library; we need to provide some settings that will determine the restart mechanisms for the threads we want to keep running despite any errors that could happen. + +`(1)` We use the `forkCapataz` function to start the capataz system; this call returns a (root) supervisor that can be used to dynamically create a supervision tree with supervisors and/or workers. + +`(2)` We use the `supervisorRestartStrategyL` lens to override the default restart strategy. Some of the values may be: + +* `OneForOne` -- if a monitored sub-routine thread fails, the supervisor will only restart the failing thread. + +* `AllForOne` -- if a monitored sub-routine thread fails, the supervisor will restart all sub-routines that are monitored by it. + +`(3)` We use the `onSystemEventL` lens to override the default callback. This callback gets called everytime something happens inside the supervisor; in this simple example, we are using the `pPrint` (pretty print) function to debug the capataz instance execution. + +`(4)` We use the `buildWorkerOptions` function, we need this function to create a `WorkerOptions` record which requires three (3) arguments: + +1. The worker name, which is used on the events triggered to the `onSystemEvent` callback. + +2. The `IO ()` sub-routine to be executed on a supervised green thread. + +3. A function that allows us to modify the default settings of `WorkerOptions` + +`(5)` We pass our `spawnProcessNumber` IO sub-routine to execute it in a supervised green thread. This approach is no different from using a `forkIO`. Note the supervisor instance created in step `(1)` is the last parameter to the `forkWorker` function. + +`(6)` We use the `workerRestartStrategyL` lens to override the default worker restart strategy, possible values are: + +* `Permanent` -- The worker thread will _always_ get restarted, even it finishes without any errors. This strategy is ideal to monitor long-running servers. + +* `Transient` -- The worker thread gets restarted, if and only if it fails with an error, if it completes without any errors, the supervisor will drop the worker thread from its supervision. This strategy is ideal to monitor one-time execution `IO ()` sub-routines. + +* `Temporary` -- The worker thread will not be restarted, even in the case of failure, used for non-important sub-routines. + +`(7)` We continue the example by spawning a few _workers_. For this, we use the `forkWorker` function which receives the `WorkerOptions` record created on step `(4)`. + +`(8)` We create another `WorkerOptions` record with the `buildWorkerOptionsWithDefaults` function, which is similar to `builderWorkerOptions` but doesn't allow overrides to the default options. + +`(9)` We join the current thread with the capataz' root supervisor thread. + +`(10)` We make sure that we clean up the capataz system and supervised sub-routine threads using the [`teardown`](https://hackage.haskell.org/package/teardown) API. + +## Try it out! + +1) Clone the [capataz repository](https://github.com/roman/Haskell-capataz) + +1) Run `make run-example1` + +2) Run `make run-example2` diff --git a/examples/capataz-example/src/example2/Main.hs b/examples/capataz-example/src/example2/Main.hs deleted file mode 100644 index 04c92b0..0000000 --- a/examples/capataz-example/src/example2/Main.hs +++ /dev/null @@ -1,45 +0,0 @@ -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE OverloadedStrings #-} -module Main where - -import Control.Concurrent.Capataz - ( CapatazOptions (..) - , CapatazRestartStrategy (..) - , WorkerOptions (..) - , WorkerRestartStrategy (..) - , capatazToAsync - , defCapatazOptions - , defWorkerOptions - , forkCapataz - , forkWorker - , teardown - ) -import Lib (Cli (..), killNumberProcess, spawnNumbersProcess) -import Options.Generic (getRecord) -import Protolude -import Text.Show.Pretty (pPrint) - - -main :: IO () -main = do - n <- getRecord "Counter spawner" - capataz <- forkCapataz defCapatazOptions { capatazName = "Example Capataz" - , capatazRestartStrategy = OneForOne - , notifyEvent = pPrint - } - - let numberWriter i a = print (i, a) - delayMicros = 5000100 - - _workerIdList <- forM [1 .. procNumber n] $ \i -> forkWorker - defWorkerOptions { workerName = "Worker (" <> show i <> ")" - , workerRestartStrategy = Permanent - } - (spawnNumbersProcess (numberWriter i)) - capataz - - void $ forkWorker defWorkerOptions { workerName = "Worker Killer" } - (forever $ threadDelay delayMicros >> killNumberProcess) - capataz - - wait (capatazToAsync capataz) `finally` (teardown capataz >>= print) diff --git a/examples/capataz-repo-watcher/LICENSE b/examples/capataz-repo-watcher/LICENSE new file mode 100644 index 0000000..e204f1c --- /dev/null +++ b/examples/capataz-repo-watcher/LICENSE @@ -0,0 +1,18 @@ +Copyright (c) 2018, TODO: + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/examples/capataz-repo-watcher/README.md b/examples/capataz-repo-watcher/README.md new file mode 100644 index 0000000..83f712a --- /dev/null +++ b/examples/capataz-repo-watcher/README.md @@ -0,0 +1,9 @@ +# capataz-repo-watcher + +This is an example that showcases how the `capataz` static supervision tree API +works. Please refer to the [documentation +site](https://romanandreg.gitbooks.io/capataz/content/v0.1/git-synchronizer-tutorial) +for a detailed explanation on how this code works. + +For more information about `capataz` API and how to use it, see +the [`capataz` homepage](https://github.com/roman/Haskell-capataz). diff --git a/examples/capataz-example/Setup.hs b/examples/capataz-repo-watcher/Setup.hs similarity index 100% rename from examples/capataz-example/Setup.hs rename to examples/capataz-repo-watcher/Setup.hs diff --git a/examples/capataz-repo-watcher/app/Main.hs b/examples/capataz-repo-watcher/app/Main.hs new file mode 100644 index 0000000..551dae4 --- /dev/null +++ b/examples/capataz-repo-watcher/app/Main.hs @@ -0,0 +1,6 @@ +module Main where + +import qualified Lib + +main :: IO () +main = Lib.main diff --git a/examples/capataz-repo-watcher/package.yaml b/examples/capataz-repo-watcher/package.yaml new file mode 100644 index 0000000..4ac40c7 --- /dev/null +++ b/examples/capataz-repo-watcher/package.yaml @@ -0,0 +1,44 @@ +name: capataz-repo-watcher +version: '0.0.0.0' +category: TODO +author: Roman Gonzalez +maintainer: Roman Gonzalez +copyright: © 2018 Roman Gonzalez +stability: alpha (experimental) +license: MIT +license-file: LICENSE +github: /Haskell-capataz +tested-with: GHC==8.2.1 +extra-source-files: +- README.md +- CHANGELOG.md + +ghc-options: +- -Wall +# as recommended in: +# https://functor.tokyo/blog/2017-07-28-ghc-warnings-you-should-enable +- -Wincomplete-uni-patterns +- -Wincomplete-record-updates + +dependencies: +- base +- capataz +- hinotify +- rio +- safe-exceptions +- shelly +- stm +- teardown + +library: + source-dirs: src + exposed-modules: + - Lib + +executables: + repo-watcher: + source-dirs: app + main: Main.hs + dependencies: + - optparse-generic + - capataz-repo-watcher diff --git a/examples/capataz-repo-watcher/src/Lib.hs b/examples/capataz-repo-watcher/src/Lib.hs new file mode 100644 index 0000000..eb4dd03 --- /dev/null +++ b/examples/capataz-repo-watcher/src/Lib.hs @@ -0,0 +1,251 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} +module Lib where + +import qualified Prelude +import RIO +import qualified RIO.Logger as Logger + +import Control.Monad.Component (ComponentM) + +import Control.Concurrent (threadDelay) +import Control.Concurrent.Capataz (Capataz, ProcessSpec) +import Control.Concurrent.STM (orElse) + +import System.Environment (getArgs) + +import qualified RIO.Text as Text + +import qualified Control.Concurrent.Capataz as Capataz +import qualified Control.Concurrent.STM.TChan as TChan +import qualified Control.Monad.Component as Component + +import qualified Shelly +import qualified System.INotify as INotify + +-------------------------------------------------------------------------------- + +-- | An instance of HasLogFunc for a LogFunc, did this given it was not provided +-- by RIO, there must be a better way to do this. +instance Logger.HasLogFunc Logger.LogFunc where + logFuncL = id + +-- | A combination of both file modification events and git repository +-- synchronization. +data WatcherMsg + -- | Message triggered when a monitored file changes + = FileChanged !FilePath + -- | Message triggered when a sync request is made + | SyncRequested + +-------------------------------------------------------------------------------- + +-- | Utility function that should be included in the teardown library, no need +-- to understand what is this used for. +withComponent :: Text -> ComponentM a -> (a -> IO ()) -> IO () +withComponent !appDesc !buildComponent !f = mask $ \restore -> do + component <- Component.runComponentM appDesc buildComponent + (restore . f $ Component.fromComponent component) + `finally` (Component.teardown component >>= Prelude.print) + +-------------------------------------------------------------------------------- + +-- | Returns an INotify descriptor, necessary to build watches for a directory +buildINotify :: ComponentM INotify.INotify +buildINotify = Component.buildComponentWithCleanup $ do + inotify <- INotify.initINotify + return (inotify, ("inotify descriptor", INotify.killINotify inotify)) + +-- | Returns an STM sub-routine that returns a filepath that has been modified, +-- this sub-routine retries until such change happens in the filesystem. +buildFileWatcher + :: INotify.INotify + -> (FilePath -> IO ()) + -> FilePath -- ^ Directory where changes are tracked + -> IO (IO ()) +buildFileWatcher inotify notifyFileChange !dir = do + fileWatch <- + INotify.addWatch inotify [INotify.CloseWrite, INotify.Modify] dir $ \ev -> + + case ev of + INotify.Modified { INotify.isDirectory, INotify.maybeFilePath } + | + -- we ignore all changes that happen to a Directory + isDirectory -> return () + | otherwise -> maybe (return ()) notifyFileChange maybeFilePath + _ -> return () + + return (INotify.removeWatch fileWatch) + +-- | Returns both an STM sub-routine that blocks until a given period of time +-- has passed, and a "ProcessSpec" for supervision of this interval thread. +buildIntervalWorker + :: Text -- ^ Name of the worker process + -> Int -- ^ Number of seconds between notifications + -> ComponentM (STM (), ProcessSpec) +buildIntervalWorker !workerName !delaySeconds = Component.buildComponent $ do + intervalChan <- TChan.newTChanIO + + let triggerEvent :: IO () + triggerEvent = forever $ do + threadDelay (delaySeconds * 1000100) + atomically $ TChan.writeTChan intervalChan () + + intervalSpec :: Capataz.ProcessSpec + intervalSpec = Capataz.workerSpec + workerName + triggerEvent + (set Capataz.workerRestartStrategyL Capataz.Permanent) + + return (TChan.readTChan intervalChan, intervalSpec) + +-- | Builds a "ProcessSpec" that monitors a green thread that receives messages +-- from the given "WatcherMsg" notifier, it receives a path where the git +-- repository is. +buildGitWorker + :: FilePath -- ^ Location of git repository where changes are kept + -> IO WatcherMsg -- ^ An IO sub-routine that gets triggered everytime a + -- WatcherMsg happens + -> ProcessSpec +buildGitWorker !repoPath !getWatcherMsg = + let + executeCmd :: IO () + executeCmd = forever $ do + msg <- getWatcherMsg + case msg of + FileChanged{} -> + Shelly.shelly + $ Shelly.chdir (Shelly.fromText $ Text.pack repoPath) + $ do + Shelly.run_ "git" ["add", "."] + Shelly.run_ "git" ["commit", "-m", "file changes"] + + SyncRequested -> + Shelly.shelly + $ Shelly.chdir (Shelly.fromText $ Text.pack repoPath) + $ do + Shelly.run_ "git" ["pull", "-r", "origin", "master"] + Shelly.run_ "git" ["push", "origin", "master"] + in + Capataz.workerSpec "git-worker" + executeCmd + (set Capataz.workerRestartStrategyL Capataz.Permanent) + +-- | Returns both an utility function for logging and a "ProcessSpec" to +-- supervise a thread that receives log messages and displays them to stdout. +buildEventLogger :: ComponentM (DisplayBuilder -> IO (), ProcessSpec) +buildEventLogger = Component.buildComponent $ do + logChan <- TChan.newTChanIO + let logOptions :: Logger.LogOptions + logOptions = Logger.LogOptions + { Logger.logMinLevel = Logger.LevelDebug + , Logger.logVerboseFormat = True + , Logger.logTerminal = True + , Logger.logUseTime = True + , Logger.logUseColor = True + , Logger.logUseUnicode = True + } + + logLoop :: IO () + logLoop = Logger.withStickyLogger logOptions $ \logger -> + flip runReaderT logger $ forever $ do + bs <- liftIO $ atomically $ TChan.readTChan logChan + Logger.logDebug bs + + return + ( atomically . TChan.writeTChan logChan + , Capataz.workerSpec + "logger" + logLoop + (set Capataz.workerRestartStrategyL Capataz.Permanent) + ) + +-- | Creates a RepoWatcher supervisor, which is composed by: +-- +-- * A file watcher +-- * An interval worker +-- * A git worker +-- +-- NOTE: when we restart our repo file watcher, we need to make sure that our +-- watch gets restarted as well. +buildRepoFileWatcher :: INotify.INotify -> FilePath -> ComponentM ProcessSpec +buildRepoFileWatcher !inotify !repoDir = do + -- We create functions that workers will use to communicate between each + -- other + changesChan <- liftIO TChan.newTChanIO + let notifyFileChange = atomically . TChan.writeTChan changesChan + onFileChange = TChan.readTChan changesChan + + fileWatchCleanupRef <- liftIO $ do + fileWatchCleanup <- buildFileWatcher inotify notifyFileChange repoDir + newIORef fileWatchCleanup + + (onSync, syncIntervalSpec) <- buildIntervalWorker "git-sync-interval" (60 * 2) + + let + -- We compose both Sync interval requests and file changes notifications + onMsg :: IO WatcherMsg + onMsg = + atomically + $ (FileChanged <$> onFileChange) + `orElse` (onSync $> SyncRequested) + + cleanupWatch :: IO () + cleanupWatch = join (readIORef fileWatchCleanupRef) + + -- We restart the inotify watch when supervisor fails; We mask to make sure + -- that our ref is not corrupted with async exceptions + onRepoWatcherFailure :: IO () + onRepoWatcherFailure = mask $ \unmask -> do + unmask cleanupWatch + fileWatchCleanup <- buildFileWatcher inotify notifyFileChange repoDir + writeIORef fileWatchCleanupRef fileWatchCleanup + + gitWorkerSpec :: ProcessSpec + gitWorkerSpec = buildGitWorker repoDir onMsg + + Component.buildComponentWithCleanup $ return + ( Capataz.supervisorSpec + ("repo-file-watcher:" <> Text.pack repoDir) + ( set Capataz.supervisorRestartStrategyL Capataz.AllForOne + . set Capataz.supervisorOnFailureL (const onRepoWatcherFailure) + . set Capataz.supervisorProcessSpecListL + [gitWorkerSpec, syncIntervalSpec] + ) + , ("repo-file-watcher:" <> Text.pack repoDir, cleanupWatch) + ) + +-- | Creates a Capataz supervision tree which contains a RepoWatcher +-- supervisor per repository path +createRepoWatcherSystem :: [FilePath] -> ComponentM Capataz +createRepoWatcherSystem repoPathList = do + (logFn, loggerProcessSpec) <- buildEventLogger + inotify <- buildINotify + repoProcessSpecList <- mapM (buildRepoFileWatcher inotify) repoPathList + + let procList = loggerProcessSpec : repoProcessSpecList + + Component.buildComponentWithTeardown $ do + capataz <- Capataz.forkCapataz + "repo-watcher-capataz" + ( set Capataz.onSystemEventL (logFn . displayShow) + . set Capataz.supervisorProcessSpecListL procList + ) + + return (capataz, Capataz.getCapatazTeardown capataz) + + +main :: IO () +main = do + input <- getArgs + case input of + [] -> error "Expecting repository paths as inputs; got nothing" + repoPaths -> withComponent "repo-watcher" + (createRepoWatcherSystem repoPaths) + Capataz.joinCapatazThread diff --git a/examples/capataz-example/README.md b/examples/capataz-simple-example/README.md similarity index 88% rename from examples/capataz-example/README.md rename to examples/capataz-simple-example/README.md index 36596a2..e17c678 100644 --- a/examples/capataz-example/README.md +++ b/examples/capataz-simple-example/README.md @@ -1,4 +1,4 @@ -# capataz-example +# capataz-simple-example This is an example that showcases how the `capataz` API works diff --git a/examples/capataz-simple-example/Setup.hs b/examples/capataz-simple-example/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/examples/capataz-simple-example/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/examples/capataz-example/package.yaml b/examples/capataz-simple-example/package.yaml similarity index 82% rename from examples/capataz-example/package.yaml rename to examples/capataz-simple-example/package.yaml index 51a0082..08e156a 100644 --- a/examples/capataz-example/package.yaml +++ b/examples/capataz-simple-example/package.yaml @@ -1,4 +1,4 @@ -name: capataz-example +name: capataz-simple-example version: '0.0.0.1' category: TODO author: Roman Gonzalez @@ -29,20 +29,16 @@ executables: source-dirs: src/example1 ghc-options: - -threaded - - -rtsopts - - -with-rtsopts=-N - -O2 dependencies: - async - - capataz-example + - capataz-simple-example example2: main: Main.hs source-dirs: src/example2 ghc-options: - -threaded - - -rtsopts - - -with-rtsopts=-N - -O2 dependencies: - capataz - - capataz-example + - capataz-simple-example diff --git a/examples/capataz-example/src/example1/Main.hs b/examples/capataz-simple-example/src/example1/Main.hs similarity index 100% rename from examples/capataz-example/src/example1/Main.hs rename to examples/capataz-simple-example/src/example1/Main.hs diff --git a/examples/capataz-simple-example/src/example2/Main.hs b/examples/capataz-simple-example/src/example2/Main.hs new file mode 100644 index 0000000..95685bf --- /dev/null +++ b/examples/capataz-simple-example/src/example2/Main.hs @@ -0,0 +1,53 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +module Main where + +import Control.Concurrent.Capataz + ( SupervisorRestartStrategy (..) + , WorkerRestartStrategy (..) + , buildWorkerOptions + , buildWorkerOptionsWithDefaults + , forkCapataz + , forkWorker + , joinCapatazThread + , onSystemEventL + , set + , supervisorRestartStrategyL + , teardown + , workerRestartStrategyL + ) +import Lib (Cli (..), killNumberProcess, spawnNumbersProcess) +import Options.Generic (getRecord) +import Protolude +import Text.Show.Pretty (pPrint) + +main :: IO () +main = do + n <- getRecord "Counter spawner" + + capataz <- forkCapataz + "unix-process-capataz" -- (1) + (set supervisorRestartStrategyL OneForOne -- (2) + . set onSystemEventL pPrint) -- (3) + + let numberWriter i a = print (i, a) + delayMicros = 5000100 + + _workerIdList <- forM [1 .. procNumber n] $ \i -> do + let counterWorkerOptions = buildWorkerOptions -- (4) + ("Worker (" <> show i <> ")") + (spawnNumbersProcess (numberWriter i)) -- (5) + (set workerRestartStrategyL Permanent) -- (6) + + forkWorker -- (7) + counterWorkerOptions capataz + + let workerKillerOptions = buildWorkerOptionsWithDefaults -- (8) + "worker-killer" + (forever $ threadDelay delayMicros >> killNumberProcess) + + -- ignore returned ProcessId, as we won't use it in our example + void $ forkWorker workerKillerOptions capataz + + joinCapatazThread capataz -- (9) + `finally` (teardown capataz >>= print) -- (10) diff --git a/examples/capataz-example/src/lib/Lib.hs b/examples/capataz-simple-example/src/lib/Lib.hs similarity index 100% rename from examples/capataz-example/src/lib/Lib.hs rename to examples/capataz-simple-example/src/lib/Lib.hs diff --git a/package.yaml b/package.yaml index b18555e..c0b7def 100644 --- a/package.yaml +++ b/package.yaml @@ -1,5 +1,5 @@ name: capataz -version: '0.0.0.2' +version: '0.1.0.0' synopsis: OTP-like supervision trees in Haskell description: | `capataz` enhances the reliability of your concurrent applications by offering @@ -21,7 +21,7 @@ description: | category: Control, Concurrency author: Roman Gonzalez -maintainer: capataz@roman-gonzalez.info +maintainer: open-source@roman-gonzalez.info copyright: © 2018 Roman Gonzalez license: MIT license-file: LICENSE @@ -40,23 +40,32 @@ ghc-options: dependencies: - base -- protolude - async +- bytestring +- data-default +- microlens +- protolude - safe-exceptions +- stm +- teardown - text -- bytestring - time -- teardown -- uuid - unordered-containers +- uuid - vector -- data-default -- stm library: source-dirs: src exposed-modules: - Control.Concurrent.Capataz + - Control.Concurrent.Capataz.Event + - Control.Concurrent.Capataz.Lens + - Control.Concurrent.Capataz.Internal.Core + - Control.Concurrent.Capataz.Internal.Types + - Control.Concurrent.Capataz.Internal.Types.Lens + - Control.Concurrent.Capataz.Internal.Supervisor + - Control.Concurrent.Capataz.Internal.Util + - Control.Concurrent.Capataz.Internal.Worker tests: capataz-test: @@ -64,8 +73,6 @@ tests: source-dirs: test/testsuite ghc-options: - -threaded - - -rtsopts - - -with-rtsopts=-N dependencies: - tasty - tasty-hunit diff --git a/src/Control/Concurrent/Capataz.hs b/src/Control/Concurrent/Capataz.hs index 0ecd623..9eb3a83 100644 --- a/src/Control/Concurrent/Capataz.hs +++ b/src/Control/Concurrent/Capataz.hs @@ -8,51 +8,79 @@ module Control.Concurrent.Capataz ( -- * Types - CallbackType (..) -, WorkerAction -, WorkerError (..) -, WorkerOptions (..) -, WorkerRestartStrategy (..) -, WorkerSpec (..) -, WorkerTerminationOrder (..) -, WorkerTerminationPolicy (..) -, Capataz (..) -, CapatazEvent (..) -, CapatazOptions (..) -, CapatazRestartStrategy (..) -, CapatazStatus (..) -, defWorkerOptions -, defWorkerSpec -, defCapatazOptions + Control.Concurrent.Capataz.Internal.Core.HasSupervisor (..) +, Control.Concurrent.Capataz.Internal.Types.CallbackType (..) + +, Control.Concurrent.Capataz.Internal.Types.WorkerId +, Control.Concurrent.Capataz.Internal.Types.WorkerRestartStrategy (..) +, Control.Concurrent.Capataz.Internal.Types.WorkerTerminationPolicy (..) +, Control.Concurrent.Capataz.Internal.Types.WorkerOptions + +, Control.Concurrent.Capataz.Internal.Types.ProcessId +, Control.Concurrent.Capataz.Internal.Types.ProcessSpec (..) +, Control.Concurrent.Capataz.Internal.Types.ProcessType (..) +, Control.Concurrent.Capataz.Internal.Types.ProcessTerminationOrder (..) +, Control.Concurrent.Capataz.Internal.Types.ProcessError (..) + +, Control.Concurrent.Capataz.Internal.Types.SupervisorId +, Control.Concurrent.Capataz.Internal.Types.Supervisor +, Control.Concurrent.Capataz.Internal.Types.SupervisorRestartStrategy (..) +, Control.Concurrent.Capataz.Internal.Types.SupervisorStatus (..) +, Control.Concurrent.Capataz.Internal.Types.SupervisorOptions + +, Control.Concurrent.Capataz.Internal.Types.CapatazOptions + +, Control.Concurrent.Capataz.Internal.Types.Capataz + +-- * Default Options for Capataz Processes +, Control.Concurrent.Capataz.Internal.Types.buildSupervisorOptions +, Control.Concurrent.Capataz.Internal.Types.buildSupervisorOptionsWithDefaults +, Control.Concurrent.Capataz.Internal.Types.buildWorkerOptions +, Control.Concurrent.Capataz.Internal.Types.buildWorkerOptionsWithDefaults +, Control.Concurrent.Capataz.Internal.Types.supervisorSpec +, Control.Concurrent.Capataz.Internal.Types.supervisorSpecWithDefaults +, Control.Concurrent.Capataz.Internal.Types.workerSpec +, Control.Concurrent.Capataz.Internal.Types.workerSpecWithDefaults + +-- * Lenses to modify Option Records +, Control.Concurrent.Capataz.Lens.onSystemEventL +, Control.Concurrent.Capataz.Lens.supervisorIntensityL +, Control.Concurrent.Capataz.Lens.supervisorPeriodSecondsL +, Control.Concurrent.Capataz.Lens.supervisorRestartStrategyL +, Control.Concurrent.Capataz.Lens.supervisorProcessSpecListL +, Control.Concurrent.Capataz.Lens.supervisorProcessTerminationOrderL +, Control.Concurrent.Capataz.Lens.supervisorOnIntensityReachedL +, Control.Concurrent.Capataz.Lens.supervisorOnFailureL +, Control.Concurrent.Capataz.Lens.workerOnFailureL +, Control.Concurrent.Capataz.Lens.workerOnCompletionL +, Control.Concurrent.Capataz.Lens.workerOnTerminationL +, Control.Concurrent.Capataz.Lens.workerTerminationPolicyL +, Control.Concurrent.Capataz.Lens.workerRestartStrategyL + -- * Core functionality -, forkWorker -, forkCapataz -, terminateWorker +, Control.Concurrent.Capataz.Internal.Core.forkWorker +, Control.Concurrent.Capataz.Internal.Core.forkSupervisor +, Control.Concurrent.Capataz.Internal.Core.forkCapataz +, Control.Concurrent.Capataz.Internal.Core.terminateProcess + -- * Utility functions -, capatazToAsync +, Control.Concurrent.Capataz.Internal.Core.joinCapatazThread +, Control.Concurrent.Capataz.Internal.Core.getSupervisorProcessId +, Control.Concurrent.Capataz.Internal.Core.getSupervisorAsync +, Control.Concurrent.Capataz.Internal.Core.getCapatazTeardown + -- * Teardown (re-exported) -, teardown +, Control.Teardown.teardown + +-- * Lens (re-exported) +, (.~) +, (&) +, Control.Concurrent.Capataz.Lens.set ) where -import Control.Concurrent.Internal.Capataz.Core (forkCapataz, forkWorker, terminateWorker) -import Control.Concurrent.Internal.Capataz.Types - ( CallbackType (..) - , Capataz (..) - , CapatazEvent (..) - , CapatazOptions (..) - , CapatazRestartStrategy (..) - , CapatazStatus (..) - , WorkerAction - , WorkerError (..) - , WorkerOptions (..) - , WorkerRestartStrategy (..) - , WorkerSpec (..) - , WorkerTerminationOrder (..) - , WorkerTerminationPolicy (..) - , defCapatazOptions - , defWorkerOptions - , defWorkerSpec - ) -import Control.Concurrent.Internal.Capataz.Util (capatazToAsync) -import Control.Teardown (teardown) +import qualified Control.Concurrent.Capataz.Internal.Core +import qualified Control.Concurrent.Capataz.Internal.Types +import Control.Concurrent.Capataz.Lens ((&), (.~)) +import qualified Control.Concurrent.Capataz.Lens +import qualified Control.Teardown diff --git a/src/Control/Concurrent/Capataz/Event.hs b/src/Control/Concurrent/Capataz/Event.hs new file mode 100644 index 0000000..9cd9495 --- /dev/null +++ b/src/Control/Concurrent/Capataz/Event.hs @@ -0,0 +1,5 @@ +module Control.Concurrent.Capataz.Event + (CapatazEvent(..)) + where + +import Control.Concurrent.Capataz.Internal.Types (CapatazEvent (..)) diff --git a/src/Control/Concurrent/Capataz/Internal/Core.hs b/src/Control/Concurrent/Capataz/Internal/Core.hs new file mode 100644 index 0000000..ef5fd54 --- /dev/null +++ b/src/Control/Concurrent/Capataz/Internal/Core.hs @@ -0,0 +1,205 @@ +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} + +{-| This module contains: + +* Functions exported on the public API +* High level message handlers of the supervisor thread loop + +-} +module Control.Concurrent.Capataz.Internal.Core +( + HasSupervisor(..) +, forkWorker +, forkSupervisor +, forkCapataz +, terminateProcess +, joinCapatazThread +, getSupervisorProcessId +, getSupervisorAsync +, getCapatazTeardown +) +where + +import Protolude + +import Control.Concurrent.MVar (newEmptyMVar, takeMVar) +import Control.Teardown (Teardown, newTeardown) +import Data.Time.Clock (getCurrentTime) + +import qualified Data.UUID.V4 as UUID (nextRandom) + +import qualified Control.Concurrent.Capataz.Internal.Supervisor as Supervisor + +import Control.Concurrent.Capataz.Internal.Types +import qualified Control.Concurrent.Capataz.Internal.Util as Util + +-------------------------------------------------------------------------------- + +-- | Utility typeclass to call public supervision API with types +-- that contain a supervisor (e.g. Capataz record). +class HasSupervisor a where + -- | Fetches a supervisor from a record internals. + getSupervisor :: a -> Supervisor + +instance HasSupervisor Capataz where + getSupervisor Capataz {capatazSupervisor} = capatazSupervisor + +instance HasSupervisor Supervisor where + getSupervisor = identity + +-- | Creates a Capataz record, which holds both a root supervisor and a +-- "Teardown" to shut down the system. The root supervisor monitors failures on +-- process threads defined with "supervisorProcessSpecList" or created +-- dynamically using "forkWorker" or "forkSupervisor". +forkCapataz :: Text -> (CapatazOptions -> CapatazOptions) -> IO Capataz +forkCapataz capatazName modOptionsFn = do + capatazId <- UUID.nextRandom + supervisorId <- UUID.nextRandom + let + capatazOptions@CapatazOptions { notifyEvent } = + defCapatazOptions capatazName modOptionsFn + supervisorOptions@SupervisorOptions { supervisorName } = + Util.capatazOptionsToSupervisorOptions capatazOptions + parentSupervisorEnv = ParentSupervisorEnv + { supervisorId = capatazId + , supervisorName = "capataz-root" + , supervisorNotify = \supervisorEvent -> do + eventTime <- getCurrentTime + case supervisorEvent of + MonitorEvent ProcessFailed' { processError } -> notifyEvent + CapatazFailed + { supervisorId + , supervisorName + , eventTime + , supervisorError = processError + } + + MonitorEvent ProcessTerminated'{} -> notifyEvent CapatazTerminated + { supervisorId + , supervisorName + , eventTime + } + + MonitorEvent ProcessCompleted'{} -> + panic "Capataz completed; this should never happen" + + MonitorEvent ProcessForcedRestart{} -> + panic + "Capataz was restarted from a OneForAll strategy; this should never happen" + + ControlAction{} -> + panic "Capataz received a ControlAction message; bad implementation" + , notifyEvent + } + + capatazSupervisor@Supervisor { supervisorEnv } <- Supervisor.supervisorMain + parentSupervisorEnv + supervisorOptions + supervisorId + 0 + + capatazTeardown <- newTeardown + "capataz" + ( do + Supervisor.haltSupervisor "capataz system shutdown" supervisorEnv + eventTime <- getCurrentTime + notifyEvent CapatazTerminated {supervisorId , supervisorName , eventTime } + ) + + return Capataz {capatazSupervisor , capatazTeardown } + +-- | Creates a green thread from an "IO ()" sub-routine. Depending in options +-- defined in the "WorkerOptions" record, it will automatically restart this +-- sub-routine in case of failures. +-- +-- See documentation of related functions: +-- +-- * "buildWorkerOptionsWithDefault" +-- * "buildWorkerOptions" +-- +forkWorker + :: HasSupervisor supervisor + => WorkerOptions -- ^ Worker options (restart, name, callbacks, etc) + -> supervisor -- ^ "Supervisor" that supervises the worker + -> IO WorkerId -- ^ An identifier that can be used to terminate the "Worker" +forkWorker workerOptions sup = do + let Supervisor { supervisorNotify } = getSupervisor sup + workerIdVar <- newEmptyMVar + supervisorNotify + ( ControlAction ForkWorker + { workerOptions + , returnWorkerId = putMVar workerIdVar + } + ) + takeMVar workerIdVar + +-- | Creates a green thread which monitors other green threads for failures and +-- restarts them using settings defined on "SupervisorOptions". +-- +-- See documentation of related functions: +-- +-- * "buildSupervisorOptionsWithDefault" +-- * "buildSupervisorOptions" +-- +forkSupervisor + :: HasSupervisor parentSupervisor + => SupervisorOptions -- ^ Supervisor options + -> parentSupervisor -- ^ Parent supervisor instance that supervises new supervisor + -> IO Supervisor -- ^ A record used to dynamically create and supervise + -- other processes +forkSupervisor supervisorOptions parentSup = do + let Supervisor { supervisorNotify } = getSupervisor parentSup + supervisorVar <- newEmptyMVar + supervisorNotify + ( ControlAction ForkSupervisor + { supervisorOptions + , returnSupervisor = putMVar supervisorVar + } + ) + takeMVar supervisorVar + +-- | Stops the execution of a green thread being supervised by the given +-- supervisor. +-- +-- NOTE: If "ProcessId" maps to a worker that is configured with a "Permanent" +-- worker restart strategy, the worker green thread __will be restarted again__. +-- +terminateProcess + :: HasSupervisor supervisor => Text -> ProcessId -> supervisor -> IO Bool +terminateProcess processTerminationReason processId supervisor = do + let Supervisor { supervisorNotify } = getSupervisor supervisor + result <- newEmptyMVar + supervisorNotify + ( ControlAction TerminateProcess + { processId + , processTerminationReason + , notifyProcessTermination = putMVar result + } + ) + takeMVar result + +-- | Joins the thread of the root supervisor of the given capataz system to the +-- current thread. +joinCapatazThread :: Capataz -> IO () +joinCapatazThread Capataz { capatazSupervisor } = + let Supervisor { supervisorAsync } = capatazSupervisor + in wait supervisorAsync + +-- | Gets "Teardown" record of this capataz system. +getCapatazTeardown :: Capataz -> Teardown +getCapatazTeardown Capataz { capatazTeardown } = capatazTeardown + +-- | Gets the "Async" of a Supervisor thread. +-- +-- NOTE: There is no way to get the "Async" value of the root supervisor; this +-- is to avoid error scenarios. +getSupervisorAsync :: Supervisor -> Async () +getSupervisorAsync Supervisor { supervisorAsync } = supervisorAsync + +-- | Gets the process identifier of a Supervisor; normally used for termination. +getSupervisorProcessId :: Supervisor -> ProcessId +getSupervisorProcessId Supervisor { supervisorId } = supervisorId diff --git a/src/Control/Concurrent/Capataz/Internal/Process.hs b/src/Control/Concurrent/Capataz/Internal/Process.hs new file mode 100644 index 0000000..dd80212 --- /dev/null +++ b/src/Control/Concurrent/Capataz/Internal/Process.hs @@ -0,0 +1,379 @@ +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | This module contains functionality that works both for Supervisor and +-- Worker process types. +module Control.Concurrent.Capataz.Internal.Process where + +import Protolude + +import Control.Concurrent.Capataz.Internal.Types +import Control.Concurrent.Capataz.Internal.Util + (readProcessMap, sortProcessesByTerminationOrder) +import Data.Time.Clock (UTCTime, getCurrentTime) + +-- | Gets "Async" from a given Process. +getProcessAsync :: Process -> Async () +getProcessAsync process = case process of + WorkerProcess Worker { workerAsync } -> workerAsync + SupervisorProcess Supervisor { supervisorAsync } -> supervisorAsync + +-- | Gets "ThreadId" from a given Process. +getProcessThreadId :: Process -> ThreadId +getProcessThreadId = asyncThreadId . getProcessAsync + +-- | Gets "ProcessId" from a given Process. +getProcessId :: Process -> ProcessId +getProcessId process = case process of + WorkerProcess Worker { workerId } -> workerId + SupervisorProcess Supervisor { supervisorId } -> supervisorId + +-- | Gets "ProcessName" from a given "ProcessSpec". +getProcessName :: ProcessSpec -> ProcessName +getProcessName procSpec = case procSpec of + WorkerSpec WorkerOptions { workerName } -> workerName + SupervisorSpec SupervisorOptions { supervisorName } -> supervisorName + +-- | Gets "ProcessType" from a given "ProcessSpec". +getProcessType :: ProcessSpec -> ProcessType +getProcessType processSpec = case processSpec of + WorkerSpec{} -> WorkerType + SupervisorSpec{} -> SupervisorType + +-- | Gets "ProcessSpec" of a given "Process". +getProcessSpec :: Process -> ProcessSpec +getProcessSpec process = case process of + WorkerProcess Worker { workerOptions } -> WorkerSpec workerOptions + SupervisorProcess Supervisor { supervisorOptions } -> + SupervisorSpec supervisorOptions + +-- | Utility function to send notifications to the capataz system callback when +-- a Process fails. +notifyProcessFailed :: SupervisorEnv -> Process -> SomeException -> IO () +notifyProcessFailed SupervisorEnv { supervisorId, supervisorName, notifyEvent } process processError + = do + eventTime <- getCurrentTime + notifyEvent ProcessFailed + { supervisorId + , supervisorName + , processId = getProcessId process + , processName = getProcessName (getProcessSpec process) + , processType = getProcessType (getProcessSpec process) + , processThreadId = getProcessThreadId process + , processError + , eventTime + } + +-- | Utility function to send notifications to the capataz system callback when +-- a Process is terminated. +notifyProcessTerminated :: SupervisorEnv -> Process -> Text -> IO () +notifyProcessTerminated SupervisorEnv { supervisorId, supervisorName, notifyEvent } process terminationReason + = do + eventTime <- getCurrentTime + notifyEvent ProcessTerminated + { supervisorId + , supervisorName + , processId = getProcessId process + , processName = getProcessName (getProcessSpec process) + , processType = getProcessType (getProcessSpec process) + , processThreadId = getProcessThreadId process + , terminationReason + , eventTime + } + +-- | Utility function to send notifications to the capataz system callback when +-- a Process is started or restarted. +notifyProcessStarted + :: Maybe (ProcessId, RestartCount) -> ParentSupervisorEnv -> Process -> IO () +notifyProcessStarted mRestartInfo ParentSupervisorEnv { supervisorId, supervisorName, notifyEvent } process + = do + eventTime <- getCurrentTime + case mRestartInfo of + Just (_processId, processRestartCount) -> notifyEvent ProcessRestarted + { supervisorId + , supervisorName + , processId = getProcessId process + , processName = getProcessName (getProcessSpec process) + , processType = getProcessType (getProcessSpec process) + , processThreadId = getProcessThreadId process + , processRestartCount + , eventTime + } + Nothing -> notifyEvent ProcessStarted + { supervisorId + , supervisorName + , processId = getProcessId process + , processName = getProcessName (getProcessSpec process) + , processType = getProcessType (getProcessSpec process) + , processThreadId = getProcessThreadId process + , eventTime + } + +-- | Utility function to send notifications when a Process sub-routine completes +-- without errors. +notifyProcessCompleted :: SupervisorEnv -> Process -> UTCTime -> IO () +notifyProcessCompleted SupervisorEnv { supervisorId, supervisorName, notifyEvent } process eventTime + = notifyEvent ProcessCompleted + { supervisorId + , supervisorName + , processId = getProcessId process + , processName = getProcessName (getProcessSpec process) + , processType = getProcessType (getProcessSpec process) + , processThreadId = getProcessThreadId process + , eventTime + } + + +-- | Utility function to execute a Process onCompletion sub-routine. +callProcessOnCompletion :: ProcessSpec -> IO () +callProcessOnCompletion procSpec = case procSpec of + WorkerSpec WorkerOptions { workerOnCompletion } -> workerOnCompletion + _ -> return () + +-- | Utility function to execute a Process onFailure sub-routine. +callProcessOnFailure :: ProcessSpec -> SomeException -> IO () +callProcessOnFailure procSpec err = case procSpec of + WorkerSpec WorkerOptions { workerOnFailure } -> workerOnFailure err + SupervisorSpec SupervisorOptions { supervisorOnFailure } -> + supervisorOnFailure err + +-- | Utility function to execute a Process onTermination sub-routine. +callProcessOnTermination :: ProcessSpec -> IO () +callProcessOnTermination procSpec = case procSpec of + WorkerSpec WorkerOptions { workerOnTermination } -> workerOnTermination + _ -> return () + +-- | Handles errors produced - or thrown to - a process thread. +handleProcessException + :: (IO () -> IO a) + -> ParentSupervisorEnv + -> ProcessSpec + -> ProcessId + -> RestartCount + -> SomeException + -> IO MonitorEvent +handleProcessException unmask ParentSupervisorEnv { supervisorId, supervisorName, notifyEvent } procSpec processId restartCount err + = do + let processName = getProcessName procSpec + processThreadId <- myThreadId + monitorEventTime <- getCurrentTime + case fromException err of + Just RestartProcessException -> return ProcessForcedRestart + { processId + , processName + , monitorEventTime + } + + Just TerminateProcessException { processTerminationReason } -> do + eErrResult <- try $ unmask $ callProcessOnTermination procSpec + + notifyEvent ProcessCallbackExecuted + { supervisorId + , supervisorName + , processThreadId + , processId + , processName + , processType = getProcessType procSpec + , processCallbackError = either Just (const Nothing) eErrResult + , processCallbackType = OnTermination + , eventTime = monitorEventTime + } + + case eErrResult of + Left processCallbackError -> return ProcessFailed' + { processId + , processName + , processError = toException ProcessCallbackFailed + { processId + , processCallbackError + , processCallbackType = OnTermination + , processError = Just err + } + , processRestartCount = restartCount + , monitorEventTime + } + Right _ -> return ProcessTerminated' + { processId + , processName + , monitorEventTime + , processTerminationReason + , processRestartCount = restartCount + } + + Just BrutallyTerminateProcessException { processTerminationReason } -> + return ProcessTerminated' + { processId + , processName + , monitorEventTime + , processTerminationReason + , processRestartCount = restartCount + } + + -- This exception was an error from the given sub-routine + _ -> do + eErrResult <- try $ unmask $ callProcessOnFailure procSpec err + + notifyEvent ProcessCallbackExecuted + { supervisorId + , supervisorName + , processId + , processName + , processType = getProcessType procSpec + , processThreadId + , processCallbackError = either Just (const Nothing) eErrResult + , processCallbackType = OnFailure + , eventTime = monitorEventTime + } + + case eErrResult of + Left processCallbackError -> return ProcessFailed' + { processId + , processName + , monitorEventTime + , processRestartCount = restartCount + , processError = toException ProcessCallbackFailed + { processId + , processCallbackError + , processCallbackType = OnFailure + , processError = Just err + } + } + Right _ -> return ProcessFailed' + { processId + , processName + , processError = err + , processRestartCount = restartCount + , monitorEventTime + } + +-- | Handles completion of a Process sub-routine. +handleProcessCompletion + :: (IO () -> IO a) + -> ParentSupervisorEnv + -> ProcessSpec + -> ProcessId + -> RestartCount + -> IO MonitorEvent +handleProcessCompletion unmask ParentSupervisorEnv { supervisorId, supervisorName, notifyEvent } procSpec processId restartCount + = do + let processName = getProcessName procSpec + processThreadId <- myThreadId + monitorEventTime <- getCurrentTime + eCompResult <- try $ unmask $ callProcessOnCompletion procSpec + + notifyEvent ProcessCallbackExecuted + { supervisorId + , supervisorName + , processId + , processName + , processType = getProcessType procSpec + , processThreadId + , processCallbackError = either Just (const Nothing) eCompResult + , processCallbackType = OnCompletion + , eventTime = monitorEventTime + } + + case eCompResult of + Left err -> return ProcessFailed' + { processId + , processName + , processError = toException ProcessCallbackFailed + { processId + , processCallbackError = err + , processError = Nothing + , processCallbackType = OnCompletion + } + , processRestartCount = restartCount + , monitorEventTime + } + Right _ -> + return ProcessCompleted' {processName , processId , monitorEventTime } + +-- | Internal utility function to trigger termination of a Process. +-- +-- NOTE: The difference between public's API function and this, is that this +-- function gets executed on the supervisor's thread. +-- +terminateProcess + :: Text -- ^ Description that indicates _why_ there is a termination + -> SupervisorEnv + -> Process + -> IO () +terminateProcess processTerminationReason env process = do + case process of + WorkerProcess worker -> terminateWorker processTerminationReason worker + SupervisorProcess supervisor -> + terminateSupervisor processTerminationReason supervisor + + notifyProcessTerminated env process processTerminationReason + +-- | Internal utility function that manages execution of a termination policy +-- for a Worker. +terminateWorker :: Text -> Worker -> IO () +terminateWorker processTerminationReason Worker { workerId, workerOptions, workerAsync } + = do + let processId = workerId + WorkerOptions { workerTerminationPolicy } = workerOptions + case workerTerminationPolicy of + Infinity -> cancelWith + workerAsync + TerminateProcessException {processId , processTerminationReason } + + BrutalTermination -> cancelWith + workerAsync + BrutallyTerminateProcessException + { processId + , processTerminationReason + } + + TimeoutMillis millis -> race_ + ( do + threadDelay (millis * 1000) + cancelWith + workerAsync + BrutallyTerminateProcessException + { processId + , processTerminationReason + } + ) + ( cancelWith + workerAsync + TerminateProcessException {processId , processTerminationReason } + ) + +-- | Internal utility function that manages execution of a termination policy +-- for a Supervisor. +terminateSupervisor :: Text -> Supervisor -> IO () +terminateSupervisor processTerminationReason Supervisor { supervisorId = processId, supervisorAsync } + = cancelWith + supervisorAsync + TerminateProcessException {processId , processTerminationReason } + +-- | Internal sub-routine that terminates workers of a supervisor, used when a +-- supervisor instance is terminated. +terminateProcessMap :: Text -> SupervisorEnv -> IO () +terminateProcessMap terminationReason env@SupervisorEnv { supervisorId, supervisorName, supervisorProcessTerminationOrder, notifyEvent } + = do + eventTime <- getCurrentTime + processMap <- readProcessMap env + + let processList = sortProcessesByTerminationOrder + supervisorProcessTerminationOrder + processMap + + notifyEvent ProcessTerminationStarted + { supervisorId + , supervisorName + , terminationReason + , eventTime + } + + forM_ processList (terminateProcess terminationReason env) + + notifyEvent ProcessTerminationFinished + { supervisorId + , supervisorName + , terminationReason + , eventTime + } diff --git a/src/Control/Concurrent/Capataz/Internal/Supervisor.hs b/src/Control/Concurrent/Capataz/Internal/Supervisor.hs new file mode 100644 index 0000000..54f38f0 --- /dev/null +++ b/src/Control/Concurrent/Capataz/Internal/Supervisor.hs @@ -0,0 +1,569 @@ +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE ScopedTypeVariables #-} +module Control.Concurrent.Capataz.Internal.Supervisor where + +import Control.Concurrent.Async (asyncWithUnmask) +import Control.Concurrent.STM.TQueue (newTQueueIO, readTQueue, writeTQueue) +import Control.Concurrent.STM.TVar (newTVarIO) +import Data.IORef (newIORef) +import Data.Time.Clock (NominalDiffTime, UTCTime, diffUTCTime, getCurrentTime) +import Protolude + +import qualified Data.HashMap.Strict as HashMap +import qualified Data.UUID.V4 as UUID + +import Control.Concurrent.Capataz.Internal.Types + +import qualified Control.Concurrent.Capataz.Internal.Process as Process +import qualified Control.Concurrent.Capataz.Internal.Util as Util +import qualified Control.Concurrent.Capataz.Internal.Worker as Worker + +-- | Internal function that forks a supervisor thread; note this is different +-- from the public @forkSupervisor@ function which sends a message to the +-- supervisor loop. +forkSupervisor + :: ParentSupervisorEnv + -> SupervisorOptions + -> Maybe (ProcessId, RestartCount) + -> IO Supervisor +forkSupervisor parentEnv supervisorOptions mRestartInfo = do + (supervisorId, restartCount) <- case mRestartInfo of + Just (supervisorId, restartCount) -> pure (supervisorId, restartCount) + Nothing -> (,) <$> UUID.nextRandom <*> pure 0 + + + supervisor <- supervisorMain parentEnv + supervisorOptions + supervisorId + restartCount + + Process.notifyProcessStarted mRestartInfo + parentEnv + (SupervisorProcess supervisor) + return supervisor + +-- | Utility function that builds an utility record which is used on all +-- internal APIs of the supervision logic. +buildSupervisorEnv + :: (CapatazEvent -> IO ()) + -> (SupervisorMessage -> IO ()) + -> STM SupervisorMessage + -> SupervisorId + -> SupervisorOptions + -> IO SupervisorEnv +buildSupervisorEnv notifyEvent supervisorNotify supervisorGetNotification supervisorId supervisorOptions@SupervisorOptions {..} + = do + supervisorProcessMap <- newIORef mempty + supervisorStatusVar <- newTVarIO Initializing + return SupervisorEnv {..} + +-- | Handles an event produced by one of the processes this supervisor monitors. +handleMonitorEvent :: SupervisorEnv -> MonitorEvent -> IO Bool +handleMonitorEvent env monitorEv = do + case monitorEv of + ProcessForcedRestart{} -> + -- We do nothing, as restart is being handled on restartWorkers + -- sub-routine + return () + + ProcessCompleted' { processId, monitorEventTime } -> + handleProcessCompleted env processId monitorEventTime + + ProcessFailed' { processId, processError, processRestartCount } -> + handleProcessFailed env processId processError processRestartCount + + ProcessTerminated' { processId, processRestartCount, processTerminationReason } + -> handleProcessTerminated env + processId + processTerminationReason + processRestartCount + + + return True + +-- | Handles an action triggered by the public Capataz API. +handleControlAction :: SupervisorEnv -> ControlAction -> IO Bool +handleControlAction env controlAction = case controlAction of + ForkWorker { workerOptions, returnWorkerId } -> do + worker@Worker { workerId } <- Worker.forkWorker + (Util.toParentSupervisorEnv env) + workerOptions + Nothing + Util.appendProcessToMap env (WorkerProcess worker) + returnWorkerId workerId + return True + + ForkSupervisor { supervisorOptions, returnSupervisor } -> do + supervisor <- forkSupervisor (Util.toParentSupervisorEnv env) + supervisorOptions + Nothing + Util.appendProcessToMap env (SupervisorProcess supervisor) + returnSupervisor supervisor + return True + + TerminateProcess { processId, processTerminationReason, notifyProcessTermination } + -> do + mProcess <- Util.fetchProcess env processId + case mProcess of + Just process -> do + Process.terminateProcess processTerminationReason env process + notifyProcessTermination True + return True + _ -> do + notifyProcessTermination False + return True + +-- | Executes the shutdown operation of a Supervisor, including the termination +-- of its supervised processes. +haltSupervisor :: Text -> SupervisorEnv -> IO () +haltSupervisor reason env = do + Util.writeSupervisorStatus env Halting + Process.terminateProcessMap reason env + Util.resetProcessMap env (const HashMap.empty) + Util.writeSupervisorStatus env Halted + + +-- | Handles all messages that a Supervisor can receive from its monitored +-- processes or from the public API. +handleSupervisorMessage :: SupervisorEnv -> SupervisorMessage -> IO Bool +handleSupervisorMessage env message = case message of + ControlAction controlAction -> handleControlAction env controlAction + MonitorEvent monitorEvent -> handleMonitorEvent env monitorEvent + +-- | This sub-routine executes the main thread loop of a "Supervisor" instance. +supervisorLoop + :: (forall b . IO b -> IO b) + -> ParentSupervisorEnv + -> SupervisorEnv + -> RestartCount + -> IO () +supervisorLoop unmask parentEnv@ParentSupervisorEnv { supervisorId, supervisorName, supervisorNotify = notifyParentSupervisor } env@SupervisorEnv { supervisorId = processId, supervisorName = processName, supervisorOptions, supervisorStatusVar, supervisorGetNotification, notifyEvent } restartCount + = do + processThreadId <- myThreadId + loopResult <- + unmask + $ try + $ atomically + $ (,) + <$> Util.readSupervisorStatusSTM supervisorStatusVar + <*> supervisorGetNotification + + case loopResult of + Left supervisorError -> do + haltSupervisor (show supervisorError) env + result <- Process.handleProcessException + unmask + parentEnv + (SupervisorSpec supervisorOptions) + processId + restartCount + supervisorError + notifyParentSupervisor (MonitorEvent result) + + Right (status, message) -> case status of + Initializing -> do + eventTime <- getCurrentTime + notifyEvent InvalidSupervisorStatusReached + { supervisorId + , supervisorName + , eventTime + } + supervisorLoop unmask parentEnv env restartCount + + Running -> do + eContinueLoop <- try $ unmask $ handleSupervisorMessage env message + case eContinueLoop of + Left supervisorError -> do + haltSupervisor (show supervisorError) env + result <- Process.handleProcessException + unmask + parentEnv + (SupervisorSpec supervisorOptions) + processId + restartCount + supervisorError + notifyParentSupervisor (MonitorEvent result) + + Right continueLoop + | continueLoop -> supervisorLoop unmask parentEnv env restartCount + | otherwise -> do + eventTime <- getCurrentTime + notifyEvent ProcessTerminated + { supervisorId + , supervisorName + , eventTime + , processId + , processName + , processThreadId + , processType = SupervisorType + , terminationReason = "Supervisor normal termination" + } + + Halting -> + -- Discard messages when halting + return () + + Halted -> + -- Discard messages when halted + return () + +-- | This sub-routine starts a Supervisor thread and initializes its +-- processList. +supervisorMain + :: ParentSupervisorEnv + -> SupervisorOptions + -> SupervisorId + -> RestartCount + -> IO Supervisor +supervisorMain parentEnv@ParentSupervisorEnv { notifyEvent } supervisorOptions@SupervisorOptions { supervisorName, supervisorProcessSpecList } supervisorId restartCount + = do + supervisorCreationTime <- getCurrentTime + + supervisorQueue <- newTQueueIO + let supervisorNotify = atomically . writeTQueue supervisorQueue + supervisorGetNotification = readTQueue supervisorQueue + + supervisorEnv@SupervisorEnv{} <- buildSupervisorEnv + notifyEvent + supervisorNotify + supervisorGetNotification + supervisorId + supervisorOptions + + supervisorAsync <- asyncWithUnmask $ \unmask -> do + Util.setProcessThreadName supervisorId supervisorName + supervisorLoop unmask parentEnv supervisorEnv restartCount + + forM_ + supervisorProcessSpecList + ( \processSpec -> case processSpec of + WorkerSpec workerOptions -> do + worker <- Worker.forkWorker + (Util.toParentSupervisorEnv supervisorEnv) + workerOptions + Nothing + Util.appendProcessToMap supervisorEnv (WorkerProcess worker) + + SupervisorSpec childSupervisorOptions -> do + supervisor <- forkSupervisor + (Util.toParentSupervisorEnv supervisorEnv) + childSupervisorOptions + Nothing + Util.appendProcessToMap supervisorEnv (SupervisorProcess supervisor) + ) + + Util.writeSupervisorStatus supervisorEnv Running + + return Supervisor + { supervisorId + , supervisorName + , supervisorAsync + , supervisorOptions + , supervisorEnv + , supervisorNotify + , supervisorCreationTime + } + +-------------------------------------------------------------------------------- + +-- | Tracks difference between two timestamps so that we keep track of a +-- Supervisor error intensity. +calcDiffSeconds :: UTCTime -> IO NominalDiffTime +calcDiffSeconds creationTime = do + currentTime <- getCurrentTime + return $ diffUTCTime currentTime creationTime + +-- | Checks restart counts and worker start times to assess if the Supervisor +-- error intensity has been breached, see "ProcessRestartAction" for possible +-- outcomes. +calcRestartAction + :: SupervisorEnv -> Int -> NominalDiffTime -> ProcessRestartAction +calcRestartAction SupervisorEnv { supervisorIntensity, supervisorPeriodSeconds } restartCount diffSeconds + = case () of + _ + | diffSeconds + < supervisorPeriodSeconds + && restartCount + >= supervisorIntensity + -> HaltSupervisor + | diffSeconds > supervisorPeriodSeconds + -> ResetRestartCount + | otherwise + -> IncreaseRestartCount + +-- | Sub-routine responsible of executing a "SupervisorRestartStrategy". +execCapatazRestartStrategy + :: SupervisorEnv -> ProcessId -> ProcessSpec -> Int -> IO () +execCapatazRestartStrategy supervisorEnv@SupervisorEnv { supervisorRestartStrategy } processId processSpec processRestartCount + = case supervisorRestartStrategy of + AllForOne -> do + newProcessList <- restartProcessList supervisorEnv + processId + processRestartCount + let newProcessMap = + newProcessList + & fmap (\process -> (Util.getProcessId process, process)) + & HashMap.fromList + Util.resetProcessMap supervisorEnv (const newProcessMap) + + OneForOne -> do + Util.removeProcessFromMap supervisorEnv processId + newProcess <- case processSpec of + WorkerSpec workerOptions -> restartWorker supervisorEnv + workerOptions + processId + processRestartCount + + SupervisorSpec supervisorOptions -> restartSupervisor + (Util.toParentSupervisorEnv supervisorEnv) + supervisorOptions + processId + processRestartCount + + Util.appendProcessToMap supervisorEnv newProcess + +-- | Executes a restart action returned from the invokation of +-- "calcRestartAction". +execRestartAction + :: SupervisorEnv + -> ProcessId + -> ProcessSpec + -> Text + -> UTCTime + -> Int + -> IO () +execRestartAction supervisorEnv@SupervisorEnv { supervisorOnIntensityReached } processId processSpec processName processCreationTime processRestartCount + = do + restartAction <- calcRestartAction supervisorEnv processRestartCount + <$> calcDiffSeconds processCreationTime + + case restartAction of + HaltSupervisor -> do + -- skip exceptions on callback + (_ :: Either SomeException ()) <- try supervisorOnIntensityReached + throwIO SupervisorIntensityReached + { processId + , processName + , processRestartCount + } + + ResetRestartCount -> + execCapatazRestartStrategy supervisorEnv processId processSpec 0 + + IncreaseRestartCount -> execCapatazRestartStrategy + supervisorEnv + processId + processSpec + (succ processRestartCount) + + +-------------------------------------------------------------------------------- + +-- | Restarts _all_ processes that are supervised by Supervisor, invoked when +-- one worker green thread fails and causes sibling process threads to get +-- restarted as well (e.g. "AllForOne" supervisor restart strategy). +restartProcessList :: SupervisorEnv -> WorkerId -> RestartCount -> IO [Process] +restartProcessList supervisorEnv@SupervisorEnv { supervisorProcessTerminationOrder } failingProcessId restartCount + = do + processMap <- Util.readProcessMap supervisorEnv + + let processList = Util.sortProcessesByTerminationOrder + supervisorProcessTerminationOrder + processMap + + newProcessList <- forM processList $ \process -> do + unless (failingProcessId == Process.getProcessId process) + $ forceRestartProcess supervisorEnv process + + case process of + WorkerProcess Worker { workerId, workerOptions } -> do + let WorkerOptions { workerRestartStrategy } = workerOptions + case workerRestartStrategy of + Temporary -> return Nothing + _ -> + Just + <$> restartWorker supervisorEnv + workerOptions + workerId + restartCount + + SupervisorProcess Supervisor { supervisorId, supervisorOptions } -> + Just + <$> restartSupervisor (Util.toParentSupervisorEnv supervisorEnv) + supervisorOptions + supervisorId + restartCount + + + return $ catMaybes newProcessList + +-- | Sub-routine that is used when there is a restart request sent to a Process +-- caused by an "AllForOne" restart from a failing sibling process. +forceRestartProcess :: SupervisorEnv -> Process -> IO () +forceRestartProcess env process = do + Process.notifyProcessTerminated env process "forced restart" + cancelWith (Process.getProcessAsync process) RestartProcessException + +-- | Starts a new worker thread taking into account an existing "WorkerId" and +-- keeping a "RestartCount" to manage the Supervisor error intensity. +restartWorker + :: SupervisorEnv -> WorkerOptions -> WorkerId -> RestartCount -> IO Process +restartWorker supervisorEnv workerOptions workerId restartCount = + WorkerProcess <$> Worker.forkWorker + (Util.toParentSupervisorEnv supervisorEnv) + workerOptions + (Just (workerId, restartCount)) + +-- | Starts a new Supervisor thread taking into account an existing +-- "SupervisorId" and keeping a "RestartCount" to manage the parent Supervisor +-- error intensity. +restartSupervisor + :: ParentSupervisorEnv + -> SupervisorOptions + -> ProcessId + -> RestartCount + -> IO Process +restartSupervisor parentEnv supervisorOptions processId restartCount = + SupervisorProcess <$> forkSupervisor parentEnv + supervisorOptions + (Just (processId, restartCount)) + + +-------------------------------------------------------------------------------- + +-- | Executes restart strategy for when a worker finishes it execution because +-- of a completion (e.g. worker sub-routine finished without any errors). +handleWorkerCompleted :: SupervisorEnv -> Worker -> IO () +handleWorkerCompleted env worker = do + let Worker { workerId, workerOptions, workerCreationTime } = worker + WorkerOptions { workerName, workerRestartStrategy } = workerOptions + case workerRestartStrategy of + Permanent -> do + -- NOTE: Completed workers should never account as errors happening on + -- a supervised thread, ergo, they should be restarted every time. + + -- TODO: Notify a warning around having a workerRestartStrategy different + -- than Temporary on workers that may complete. + let restartCount = 0 + execRestartAction env + workerId + (WorkerSpec workerOptions) + workerName + workerCreationTime + restartCount + + _ -> Util.removeProcessFromMap env workerId + +-- | Executes restart strategy for when a process finishes it execution because +-- of a completion (e.g. worker sub-routine finished without any errors). +handleProcessCompleted :: SupervisorEnv -> ProcessId -> UTCTime -> IO () +handleProcessCompleted env processId completionTime = do + mProcess <- Util.fetchProcess env processId + case mProcess of + Nothing -> return () + + Just process -> do + Process.notifyProcessCompleted env process completionTime + case process of + WorkerProcess worker -> handleWorkerCompleted env worker + _ -> + panic + $ "ERROR: Supervisor (" + <> show (Process.getProcessId process) + <> ") should never complete" + +-- | Executes restart strategy for when a worker finishes it execution because +-- of a failure. +handleWorkerFailed :: SupervisorEnv -> Worker -> Int -> IO () +handleWorkerFailed env worker restartCount = do + let Worker { workerId, workerCreationTime, workerOptions } = worker + WorkerOptions { workerName, workerRestartStrategy } = workerOptions + case workerRestartStrategy of + Temporary -> Util.removeProcessFromMap env workerId + _ -> execRestartAction env + workerId + (WorkerSpec workerOptions) + workerName + workerCreationTime + restartCount + +-- | Executes restart strategy for when a supervisor finishes it execution because +-- of a failure. +handleSupervisorFailed :: SupervisorEnv -> Supervisor -> Int -> IO () +handleSupervisorFailed env supervisor restartCount = do + let Supervisor { supervisorId, supervisorCreationTime, supervisorOptions } = + supervisor + SupervisorOptions { supervisorName } = supervisorOptions + execRestartAction env + supervisorId + (SupervisorSpec supervisorOptions) + supervisorName + supervisorCreationTime + restartCount + +-- | Executes restart strategy for when a process finishes it execution because +-- of a failure. +handleProcessFailed + :: SupervisorEnv -> WorkerId -> SomeException -> Int -> IO () +handleProcessFailed env processId processError restartCount = do + mProcess <- Util.fetchProcess env processId + case mProcess of + Nothing -> return () + Just process -> do + Process.notifyProcessFailed env process processError + case process of + WorkerProcess worker -> handleWorkerFailed env worker restartCount + + SupervisorProcess supervisor -> + handleSupervisorFailed env supervisor restartCount + +-- | Executes restart strategy for when a worker finishes it execution because +-- of a termination from its supervisor. +handleWorkerTerminated :: SupervisorEnv -> Worker -> Int -> IO () +handleWorkerTerminated env worker restartCount = do + let Worker { workerId, workerCreationTime, workerOptions } = worker + WorkerOptions { workerName, workerRestartStrategy } = workerOptions + + case workerRestartStrategy of + Permanent -> execRestartAction env + workerId + (WorkerSpec workerOptions) + workerName + workerCreationTime + restartCount + + _ -> Util.removeProcessFromMap env workerId + +-- | Executes restart strategy for when a supervisor finishes it execution +-- because of a termination from its parent supervisor. +handleSupervisorTerminated :: SupervisorEnv -> Supervisor -> Int -> IO () +handleSupervisorTerminated env supervisor restartCount = do + let Supervisor { supervisorId, supervisorCreationTime, supervisorOptions } = + supervisor + SupervisorOptions { supervisorName } = supervisorOptions + execRestartAction env + supervisorId + (SupervisorSpec supervisorOptions) + supervisorName + supervisorCreationTime + restartCount + +-- | Executes restart strategy for when a process finishes it execution because +-- of a termination from its supervisor. +handleProcessTerminated :: SupervisorEnv -> ProcessId -> Text -> Int -> IO () +handleProcessTerminated env processId terminationReason restartCount = do + mProcess <- Util.fetchProcess env processId + case mProcess of + Nothing -> return () + Just process -> do + Process.notifyProcessTerminated env process terminationReason + case process of + WorkerProcess worker -> handleWorkerTerminated env worker restartCount + + SupervisorProcess supervisor -> + handleSupervisorTerminated env supervisor restartCount diff --git a/src/Control/Concurrent/Capataz/Internal/Types.hs b/src/Control/Concurrent/Capataz/Internal/Types.hs new file mode 100644 index 0000000..3ce4e9e --- /dev/null +++ b/src/Control/Concurrent/Capataz/Internal/Types.hs @@ -0,0 +1,691 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NoImplicitPrelude #-} + +{-| This module contains all the types used across all the other modules -} +module Control.Concurrent.Capataz.Internal.Types where + +import Protolude + +import Control.Concurrent.STM.TVar (TVar) +import Control.Teardown (ITeardown (..), Teardown) +import Data.Default (Default (..)) +import Data.HashMap.Strict (HashMap) +import Data.IORef (IORef) +import Data.Time.Clock (NominalDiffTime, UTCTime) +import Data.UUID (UUID) + +type CapatazId = UUID +type WorkerId = UUID +type SupervisorId = UUID +type ProcessId = UUID +type WorkerAction = IO () +type ProcessThreadId = ThreadId +type ProcessName = Text +type CapatazName = Text +type SupervisorName = Text +type WorkerName = Text +type RestartCount = Int +type ProcessMap = HashMap ProcessId Process +type ParentSupervisor = Supervisor + +-- | Event delivered to the "notifyEvent" callback sub-routine; these events can +-- be used to monitor the capataz system and track what is doing, providing high +-- levels of telemetry for all supervisors and workers of a capataz system, +-- ergo, should be used for logging, monitoring and testing purposes. +data CapatazEvent + = InvalidSupervisorStatusReached { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , eventTime :: !UTCTime + } + | SupervisorStatusChanged { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , prevSupervisorStatus :: !SupervisorStatus + , newSupervisorStatus :: !SupervisorStatus + , eventTime :: !UTCTime + } + | ProcessTerminated { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , processThreadId :: !ProcessThreadId + , processId :: !ProcessId + , processName :: !ProcessName + , processType :: !ProcessType + , terminationReason :: !Text + , eventTime :: !UTCTime + } + | ProcessStarted { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , processThreadId :: !ProcessThreadId + , processId :: !ProcessId + , processName :: !ProcessName + , processType :: !ProcessType + , eventTime :: !UTCTime + } + | ProcessRestarted { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , processThreadId :: !ProcessThreadId + , processId :: !ProcessId + , processName :: !ProcessName + , processType :: !ProcessType + , processRestartCount :: !Int + , eventTime :: !UTCTime + } + | ProcessCompleted { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , processThreadId :: !ProcessThreadId + , processId :: !ProcessId + , processName :: !ProcessName + , processType :: !ProcessType + , eventTime :: !UTCTime + } + | ProcessFailed { + supervisorName :: !SupervisorName + , supervisorId :: !SupervisorId + , processThreadId :: !ProcessThreadId + , processId :: !ProcessId + , processName :: !ProcessName + , processType :: !ProcessType + , processError :: !SomeException + , eventTime :: !UTCTime + } + | ProcessCallbackExecuted { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , processThreadId :: !ProcessThreadId + , processId :: !ProcessId + , processName :: !ProcessName + , processType :: !ProcessType + , processCallbackError :: !(Maybe SomeException) + , processCallbackType :: !CallbackType + , eventTime :: !UTCTime + } + | ProcessTerminationStarted { + supervisorName :: !SupervisorName + , supervisorId :: !SupervisorId + , terminationReason :: !Text + , eventTime :: !UTCTime + } + | ProcessTerminationFinished { + supervisorName :: !SupervisorName + , supervisorId :: !SupervisorId + , terminationReason :: !Text + , eventTime :: !UTCTime + } + | CapatazFailed { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , supervisorError :: !SomeException + , eventTime :: !UTCTime + } + | CapatazTerminated { + supervisorName :: !SupervisorName + , supervisorId :: !SupervisorId + , eventTime :: !UTCTime + } + deriving (Generic, Show) + +-- | Defines how a "Worker" process termination should be handled by its +-- supervisor. +data WorkerTerminationPolicy + -- | Supervisor waits until infinity for the worker termination callback to + -- finish execution. + = Infinity + + -- | Supervisor terminates worker process without a chance to call its + -- termination callback. + | BrutalTermination + + -- | Supervisor allows a number of milliseconds for worker termination + -- callback complete, if not completed by specified milliseconds the + -- termination is cancelled via a "BrutalTermination" signal. + | TimeoutMillis !Int + deriving (Generic, Show, Eq, Ord) + +instance Default WorkerTerminationPolicy where + -- | Default worker termination is a timeout of three (3) seconds. + def = TimeoutMillis 3000 + +instance NFData WorkerTerminationPolicy + +-- | Internal helper record that assesses if a Supervisor error intensity has +-- been breached. +data ProcessRestartAction + -- | Indicates a Supervisor to restart a failed process _and_ reset the + -- restart count given this Supervisor's intensity period timeout has passed. + = ResetRestartCount + + -- | Indicates a Supervisor to restart the failed process _and_ increase the restart + -- count (normal operation) of the supervised process. + | IncreaseRestartCount + + -- | Indicates a Supervisor stop executing given the error intensity has been + -- breached. + | HaltSupervisor + deriving (Generic, Show, Eq) + +instance NFData ProcessRestartAction + +-- | Specifies the order in which supervised process should be terminated by a +-- Supervisor in case of a restart or shutdown. +data ProcessTerminationOrder + -- | Supervisor terminates supervised process from most recent to oldest. + = NewestFirst + -- | Supervisor terminates supervised process from oldest to most recent. + | OldestFirst + deriving (Generic, Show, Eq, Ord) + +instance Default ProcessTerminationOrder where + -- | default is "OldestFirst". + def = OldestFirst + +instance NFData ProcessTerminationOrder + +-- | Specifies how a Supervisor restarts a failing process. +data SupervisorRestartStrategy + -- | Supervisor terminates all sibling supervised processes that didn't fail, + -- and then restarts all of them together. This strategy serves best when all + -- processes depend upon each other. + = AllForOne + + -- | Supervisor only restarts the supervised process that failed. + | OneForOne + deriving (Generic, Show, Eq, Ord) + +instance Default SupervisorRestartStrategy where + -- | Default restart strategy is "OneForOne". + def = OneForOne + +instance NFData SupervisorRestartStrategy + +-- | Allows to: +-- +-- * Specify options for The root supervisor of a capataz system. +-- +-- * Provie a "notifyEvent" callback to monitor or log a capataz system. +-- +data CapatazOptions + = CapatazOptions { + supervisorName :: !SupervisorName + , supervisorIntensity :: !Int + , supervisorPeriodSeconds :: !NominalDiffTime + , supervisorRestartStrategy :: !SupervisorRestartStrategy + , supervisorProcessSpecList :: ![ProcessSpec] + , supervisorProcessTerminationOrder :: !ProcessTerminationOrder + , supervisorOnIntensityReached :: !(IO ()) + -- | Callback sub-routine that gets executed when the root supervisor fails. + , supervisorOnFailure :: !(SomeException -> IO ()) + -- | Callback used for telemetry purposes. + , notifyEvent :: !(CapatazEvent -> IO ()) + } + + +-- | Specifies how a Supervisor deals with the lifecycle of worker process in +-- case of completion without errors and failure. +data WorkerRestartStrategy + -- | Supervisor will __always__ restart a worker process, in both completion + -- and failure scenarios. + = Permanent + + -- | Supervisor will __only__ restart worker process if it has a failure in + -- execution. + | Transient + + -- | Supervisor will __never__ restart a worker, even on failure. + | Temporary + + deriving (Generic, Show, Eq) + +instance NFData WorkerRestartStrategy +instance Default WorkerRestartStrategy where + -- | A worker default restart strategy is "Transient". + def = Transient + +-- | Specifies all options that can be used to create a Worker Process. You may +-- create a record of this type via the smart constructor "buildWorkerOptions". +data WorkerOptions + = WorkerOptions { + -- | An @IO ()@ sub-routine that will be executed when the worker + -- thread is created, this attribute is lazy given we want to this + -- value on a worker thread environment. + workerAction :: WorkerAction + -- | Name of the Worker (present on "CapatazEvent" records) + , workerName :: !WorkerName + -- | Callback used when the worker fails with an error + , workerOnFailure :: !(SomeException -> IO ()) + -- | Callback used when the worker completes execution without error + , workerOnCompletion :: !(IO ()) + -- | Callback used when the worker is terminated + , workerOnTermination :: !(IO ()) + -- | Indicates how a worker should be terminated + , workerTerminationPolicy :: !WorkerTerminationPolicy + -- | Indicates how a worker should be restarted + , workerRestartStrategy :: !WorkerRestartStrategy + } + deriving (Generic) + +-- | Record that contains the "Async" record (thread reference) of a worker +data Worker + = Worker { + -- | Unique identifier for a worker that is executing + workerId :: !WorkerId + -- | "Async" thread of a worker, this Async executes the @IO ()@ sub-routine + , workerAsync :: !(Async ()) + -- | Time where this worker was created (used for error intensity checks) + , workerCreationTime :: !UTCTime + -- | Name of the Worker (present on "CapatazEvent" records) + , workerName :: !WorkerName + -- | "WorkerOptions" contains all the options around restart and termination + -- policies + , workerOptions :: !WorkerOptions + } + +data ProcessEnv + = ProcessEnv { + processId :: !ProcessId + , processName :: !ProcessName + , processAsync :: !(Async ()) + , processCreationTime :: !UTCTime + , processRestartStrategy :: !WorkerRestartStrategy + } + +data SupervisorOptions + = SupervisorOptions { + -- | Name of the Supervisor (present on "CapatazEvent" records) + supervisorName :: Text + -- | How many errors is the Supervisor be able to handle; check: + -- http://erlang.org/doc/design_principles/sup_princ.html#max_intensity + , supervisorIntensity :: !Int + -- | Period of time where the Supervisor can receive "supervisorIntensity" amount + -- of errors + , supervisorPeriodSeconds :: !NominalDiffTime + -- | What is the "SupervisorRestartStrategy" for this Capataz + , supervisorRestartStrategy :: !SupervisorRestartStrategy + -- | Static set of workers that start as soon as the "Capataz" is created + , supervisorProcessSpecList :: ![ProcessSpec] + -- | In which order the "Supervisor" record is going to terminate it's workers + , supervisorProcessTerminationOrder :: !ProcessTerminationOrder + -- | Callback used when the error intensity is reached + , supervisorOnIntensityReached :: !(IO ()) + , supervisorOnFailure :: !(SomeException -> IO ()) + } + +data Supervisor + = Supervisor { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , supervisorOptions :: !SupervisorOptions + , supervisorCreationTime :: !UTCTime + , supervisorAsync :: !(Async ()) + , supervisorNotify :: SupervisorMessage -> IO () + , supervisorEnv :: !SupervisorEnv + } + +-- | Internal record that represents an action being sent from threads using +-- the Capataz public API. +data ControlAction + = ForkWorker { + workerOptions :: !WorkerOptions + , returnWorkerId :: !(WorkerId -> IO ()) + } + | ForkSupervisor { + supervisorOptions :: !SupervisorOptions + , returnSupervisor :: !(Supervisor -> IO ()) + } + | TerminateProcess { + processId :: !ProcessId + , processTerminationReason :: !Text + , notifyProcessTermination :: !(Bool -> IO ()) + } + deriving (Generic) + +-- | Internal exception thrown to the Capataz loop to indicate termination of +-- execution +data CapatazSignal + = CapatazFailure + | RestartProcessException + | TerminateProcessException { + processId :: !ProcessId + , processTerminationReason :: !Text + } + | BrutallyTerminateProcessException { + processId :: !ProcessId + , processTerminationReason :: !Text + } + deriving (Generic, Show) + +instance Exception CapatazSignal +instance NFData CapatazSignal + +-- | Internal exception triggered when a Worker violates error intensity +-- specification +data CapatazError + = SupervisorIntensityReached { + processId :: !ProcessId + , processName :: !ProcessName + , processRestartCount :: !Int + } + deriving (Generic, Show) + +instance Exception CapatazError +instance NFData CapatazError + +-- | Internal record that indicates what type of callback function is being +-- invoked; this is used for telemetry purposes +data CallbackType + = OnCompletion + | OnFailure + | OnTermination + deriving (Generic, Show, Eq) + +data ProcessType + = SupervisorType + | WorkerType + deriving (Show, Eq) + +-- | Internal exception triggered when a callback of a Worker fails +data ProcessError + = ProcessCallbackFailed { + processId :: !WorkerId + , processError :: !(Maybe SomeException) + , processCallbackError :: !SomeException + , processCallbackType :: !CallbackType + } + deriving (Generic, Show) + +instance Exception ProcessError + +-- | Internal event delivered from Worker threads to the Capataz thread to +-- indicate completion, failure or termination +data MonitorEvent + = ProcessTerminated' { + processId :: !ProcessId + , processName :: !ProcessName + , processRestartCount :: !RestartCount + , processTerminationReason :: !Text + , monitorEventTime :: !UTCTime + } + | ProcessFailed' { + processId :: !WorkerId + , processName :: !WorkerName + , processRestartCount :: !RestartCount + , processError :: !SomeException + , monitorEventTime :: !UTCTime + } + | ProcessCompleted' { + processId :: !ProcessId + , processName :: !ProcessName + , monitorEventTime :: !UTCTime + } + | ProcessForcedRestart { + processId :: !ProcessId + , processName :: !ProcessName + , monitorEventTime :: !UTCTime + } + deriving (Show) + +-- | Internal record used as a state machine, indicating the state of a +-- supervisor process +data SupervisorStatus + -- | This state is set when the process is created and it starts spawning its + -- static process list. + = Initializing + -- | This state is set when the supervisor process starts listenting to both + -- "ControlAction" and "MonitorEvent" messages. + | Running + -- | This state is set when the supervisor process is terminating it's + -- assigned worker + | Halting + -- | This state is set when the supervisor process is finished + | Halted + deriving (Generic, Show, Eq) + +instance NFData SupervisorStatus + +-- | Internal message delivered to a supervisor process that can either be a +-- call from public API or an event from its monitored worker process. +data SupervisorMessage + -- | Represents a request from done to the supervisor thread from another + -- thread using the public API + = ControlAction !ControlAction + -- | Represents an event (failure, completion, etc) from a monitored worker + -- process to the supervisor + | MonitorEvent !MonitorEvent + deriving (Generic) + +-- | Internal Type to manage both Worker and Supervisor processes +data Process + = WorkerProcess Worker + | SupervisorProcess Supervisor + +-- | Record used to specify how to __build__ a runtime "Process" in a static +-- supervision tree; to create values of this type, you must use: +-- +-- * "workerSpec" or "workerSpecWithDefaults" to build a worker process +-- +-- * "supervisorSpec" or "supervisorSpecWithDefaults" to build a supervisor +-- process +-- +data ProcessSpec + = WorkerSpec WorkerOptions + | SupervisorSpec SupervisorOptions + +-- | Record that contains the environment of a capataz monitor, this is used as +-- the main record to create workers and to stop the supervisor thread. +data Capataz + = Capataz { + capatazSupervisor :: !Supervisor + , capatazTeardown :: !Teardown + } + +instance ITeardown Capataz where + teardown Capataz {capatazTeardown} = + teardown capatazTeardown + +-- | Internal utility record used to hold part of the runtime information of a +-- supervisor that acts as a parent of another supervisor. +data ParentSupervisorEnv + = ParentSupervisorEnv { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , supervisorNotify :: !(SupervisorMessage -> IO ()) + , notifyEvent :: !(CapatazEvent -> IO ()) + } + +-- | Convenience internal utility record that contains all values related to a +-- supervisor process. +data SupervisorEnv + = SupervisorEnv { + supervisorId :: !SupervisorId + , supervisorName :: !SupervisorName + , supervisorNotify :: !(SupervisorMessage -> IO ()) + , supervisorGetNotification :: !(STM SupervisorMessage) + , supervisorProcessMap :: !(IORef ProcessMap) + , supervisorStatusVar :: !(TVar SupervisorStatus) + , supervisorOptions :: !SupervisorOptions + , supervisorIntensity :: !Int + -- ^ http://erlang.org/doc/design_principles/sup_princ.html#max_intensity + , supervisorPeriodSeconds :: !NominalDiffTime + , supervisorRestartStrategy :: !SupervisorRestartStrategy + , supervisorProcessTerminationOrder :: !ProcessTerminationOrder + , supervisorOnIntensityReached :: !(IO ()) + , supervisorOnIntensityReached :: !(SomeException -> IO ()) + , notifyEvent :: !(CapatazEvent -> IO ()) + } + +-- | Builds a "CapatazOptions" record with defaults on how to create a capataz +-- root supervisor, these defaults are: +-- +-- * Intensity error tolerance is set to 1 error every 5 seconds +-- +-- * A "SupervisorRestartStrategy" of "OneForOne" +-- +-- * A "ProcessTerminationOrder" of "OldestFirst" +-- +-- This function is intended to be used in combination with "forkCapataz". +-- +defCapatazOptions + :: Text + -> (CapatazOptions -> CapatazOptions) -- ^ Function to modify root supervisor + -> CapatazOptions +defCapatazOptions supervisorName modFn = modFn CapatazOptions + { supervisorName + , supervisorIntensity = 2 + , supervisorPeriodSeconds = 5 + , supervisorRestartStrategy = def + , supervisorProcessSpecList = [] + , supervisorProcessTerminationOrder = OldestFirst + , supervisorOnIntensityReached = return () + , supervisorOnFailure = const $ return () + , notifyEvent = const $ return () + } + +-- | Builds a "ProcessSpec" record for a supervisor process with defaults from +-- "supervisorSpecWithDefaults". This function allows overrides of these +-- defaults using lenses. +-- +-- This function is used when building a supervisor branch in a static +-- supervision trees. +-- +supervisorSpec + :: SupervisorName -- ^ Name used for telemetry purposes + -> (SupervisorOptions -> SupervisorOptions) -- ^ Function to modify default + -- supervisor options + -> ProcessSpec +supervisorSpec sName modFn = + SupervisorSpec (buildSupervisorOptions sName modFn) +{-# INLINE supervisorSpec #-} + +-- | Builds a "ProcessSpec" record for a supervisor process with defaults from +-- "buildSupervisorOptionsWithDefaults". +-- +-- This function is used when building a supervisor branch in a static +-- supervision trees. +-- +supervisorSpecWithDefaults + :: SupervisorName -- ^ Name used for telemetry purposes + -> ProcessSpec +supervisorSpecWithDefaults sName = supervisorSpec sName identity +{-# INLINE supervisorSpecWithDefaults #-} + +-- | Builds a "ProcessSpec" record for a worker process with defaults from +-- "workerSpecWithDefaults". This function allows overrides of these +-- defaults using lenses. +-- +-- This function is used when building a worker in a static supervision tree. +-- +workerSpec + :: WorkerName -- ^ Name used for telemetry purposes + -> IO () -- ^ IO sub-routine to be supervised + -> (WorkerOptions -> WorkerOptions) -- ^ Function to modify default worker + -- options + -> ProcessSpec +workerSpec wName wAction modFn = + WorkerSpec (buildWorkerOptions wName wAction modFn) +{-# INLINE workerSpec #-} + +-- | Builds a "ProcessSpec" record for a worker process with defaults from +-- "buildSupervisorOptionsWithDefaults". +-- +-- This function is used when building a worker in a static supervision tree. +-- +workerSpecWithDefaults + :: WorkerName -- ^ Name used for telemetry purposes + -> IO () -- ^ IO sub-routine to be supervised + -> ProcessSpec +workerSpecWithDefaults wName wAction = workerSpec wName wAction identity +{-# INLINE workerSpecWithDefaults #-} + +-- | Builds a "SupervisorOptions" record with defaults from +-- "buildSupervisorOptionsWithDefaults". This function allows overrides of these +-- defaults using lenses. +-- +-- This function is intended to be used in combination with "forkSupervisor". +-- +buildSupervisorOptions + :: SupervisorName -- ^ Name used for telemetry purposes + -> (SupervisorOptions -> SupervisorOptions) -- ^ Function to modify default + -- supervisor options + -> SupervisorOptions +buildSupervisorOptions supervisorName modFn = modFn SupervisorOptions + { supervisorName + , supervisorIntensity = 2 + , supervisorPeriodSeconds = 5 + , supervisorRestartStrategy = def + , supervisorProcessSpecList = [] + , supervisorProcessTerminationOrder = OldestFirst + , supervisorOnIntensityReached = return () + , supervisorOnFailure = const $ return () + } +{-# INLINE buildSupervisorOptions #-} + +-- | Builds a "SupervisorOptions" record with defaults to create a supervisor +-- process, these defaults are: +-- +-- * Intensity error tolerance is set to 1 error every 5 seconds +-- +-- * A "SupervisorRestartStrategy" of "OneForOne" +-- +-- * A "ProcessTerminationOrder" of "OldestFirst" +-- +-- This function is intended to be used in combination with "forkSupervisor". +-- +buildSupervisorOptionsWithDefaults + :: SupervisorName -- ^ Name used for telemetry purposes + -> SupervisorOptions +buildSupervisorOptionsWithDefaults = flip buildSupervisorOptions identity +{-# INLINE buildSupervisorOptionsWithDefaults #-} + +-- | Builds a "WorkerOptions" record, keeps the defaults from +-- "buildWorkerOptionsWithDefaults" but allows overrides using lenses. +-- +-- This function is intended to be used in combination with "forkWorker". See +-- the ... example in the examples directory for a demonstration. +-- +buildWorkerOptions + :: WorkerName -- ^ Name used for telemetry purposes + -> IO () -- ^ IO sub-routine to be supervised + -> (WorkerOptions -> WorkerOptions) -- ^ Function to modify default worker + -- options + -> WorkerOptions +buildWorkerOptions workerName workerAction f = f WorkerOptions + { workerName + , workerAction + , workerOnFailure = const $ return () + , workerOnCompletion = return () + , workerOnTermination = return () + , workerTerminationPolicy = def + , workerRestartStrategy = def + } +{-# INLINE buildWorkerOptions #-} + +-- | Builds a "WorkerOptions" record with defaults to create a worker process, +-- the defaults are: +-- +-- * A "Transient" "WorkerRestartStrategy" +-- +-- * A "WorkerTerminationPolicy" of a 3 seconds timeout +-- +-- * A _completion_ callback that just returns unit +-- +-- * A _termination_ callback that just returns unit +-- +-- * A _failure_ callback that just returns unit +-- +-- This function is intended to be used in combination with "forkWorker", for +-- creating a worker in an static supervision tree, use "workerSpecWithDefaults" +-- instead. See the ... example for a demonstration. +-- +buildWorkerOptionsWithDefaults + :: WorkerName -- ^ Name used for telemetry purposes + -> IO () -- ^ IO sub-routine to be supervised + -> WorkerOptions +buildWorkerOptionsWithDefaults wName wAction = + buildWorkerOptions wName wAction identity +{-# INLINE buildWorkerOptionsWithDefaults #-} diff --git a/src/Control/Concurrent/Capataz/Internal/Types/Lens.hs b/src/Control/Concurrent/Capataz/Internal/Types/Lens.hs new file mode 100644 index 0000000..a9467c5 --- /dev/null +++ b/src/Control/Concurrent/Capataz/Internal/Types/Lens.hs @@ -0,0 +1,233 @@ +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE RecordWildCards #-} +module Control.Concurrent.Capataz.Internal.Types.Lens where + +import Control.Concurrent.Capataz.Internal.Types +import Data.Time.Clock (NominalDiffTime) +import Protolude + +-------------------------------------------------------------------------------- + +class HasSupervisorIntensity s where + -- | Specifies how many errors is a supervisor able to handle; check: + -- http://erlang.org/doc/design_principles/sup_princ.html#max_intensity. + supervisorIntensityL + :: Functor f + => (Int -> f Int) + -> s + -> f s + +class HasSupervisorPeriodSeconds s where + -- | Specifies period of time in which a supervisor can receive a number of + -- errors specified in "supervisorIntensityL". + supervisorPeriodSecondsL + :: Functor f + => (NominalDiffTime -> f NominalDiffTime) + -> s + -> f s + +class HasSupervisorRestartStrategy s where + -- | Specifies the "SupervisorRestartStrategy" for a root supervisor. + supervisorRestartStrategyL + :: Functor f + => (SupervisorRestartStrategy -> f SupervisorRestartStrategy) + -> s + -> f s + +class HasSupervisorProcessSpecList s where + -- | Specifies a static list of processes that start automatically with a + -- supervisor. + supervisorProcessSpecListL + :: Functor f + => ([ProcessSpec] -> f [ProcessSpec]) + -> s + -> f s + +class HasSupervisorProcessTerminationOrder s where + -- | Specifies order in which a supervisor is going to terminate its + -- supervised processes. + supervisorProcessTerminationOrderL + :: Functor f + => (ProcessTerminationOrder -> f ProcessTerminationOrder) + -> s + -> f s + +class HasSupervisorIntensityReachedCallback s where + -- | Specifies a callback sub-routine that gets executed when there is a + -- breach in a supervisor's error intensity. + supervisorOnIntensityReachedL + :: Functor f + => (IO () -> f (IO ())) + -> s + -> f s + +class HasSupervisorFailureCallback s where + -- | Specifies callback sub-routine that gets executed when a supervisor + -- fails. + supervisorOnFailureL + :: Functor f + => ((SomeException -> IO ()) -> f (SomeException -> IO ())) + -> s + -> f s + + +instance HasSupervisorIntensity SupervisorOptions where + supervisorIntensityL k SupervisorOptions {supervisorIntensity, ..} = + fmap (\newSupIntensity -> SupervisorOptions { supervisorIntensity = newSupIntensity, .. }) + (k supervisorIntensity) + +instance HasSupervisorPeriodSeconds SupervisorOptions where + supervisorPeriodSecondsL k SupervisorOptions {supervisorPeriodSeconds, ..} = + fmap (\newSupPeriodSeconds -> SupervisorOptions { supervisorPeriodSeconds = newSupPeriodSeconds, .. }) + (k supervisorPeriodSeconds) + +instance HasSupervisorRestartStrategy SupervisorOptions where + supervisorRestartStrategyL k SupervisorOptions {supervisorRestartStrategy, ..} = + fmap (\newSupRestartStrategy -> + SupervisorOptions { supervisorRestartStrategy = newSupRestartStrategy, .. }) + (k supervisorRestartStrategy) + +instance HasSupervisorProcessSpecList SupervisorOptions where + supervisorProcessSpecListL k SupervisorOptions {supervisorProcessSpecList, ..} = + fmap (\newSupProcessSpecList -> + SupervisorOptions { supervisorProcessSpecList = newSupProcessSpecList, .. }) + (k supervisorProcessSpecList) + +instance HasSupervisorProcessTerminationOrder SupervisorOptions where + supervisorProcessTerminationOrderL k SupervisorOptions {supervisorProcessTerminationOrder, ..} = + fmap (\newSupProcessTerminationOrder -> + SupervisorOptions { supervisorProcessTerminationOrder = newSupProcessTerminationOrder, .. }) + (k supervisorProcessTerminationOrder) + +instance HasSupervisorIntensityReachedCallback SupervisorOptions where + supervisorOnIntensityReachedL k SupervisorOptions {supervisorOnIntensityReached, ..} = + fmap (\newSupOnIntensityReached -> SupervisorOptions { supervisorOnIntensityReached = newSupOnIntensityReached, .. }) + (k supervisorOnIntensityReached) + +instance HasSupervisorFailureCallback SupervisorOptions where + supervisorOnFailureL k SupervisorOptions {supervisorOnFailure, ..} = + fmap (\newSupOnFailure -> SupervisorOptions { supervisorOnFailure = newSupOnFailure, .. }) + (k supervisorOnFailure) + +-------------------- + +instance HasSupervisorIntensity CapatazOptions where + supervisorIntensityL k CapatazOptions {supervisorIntensity, ..} = + fmap (\newSupIntensity -> CapatazOptions { supervisorIntensity = newSupIntensity, .. }) + (k supervisorIntensity) + +instance HasSupervisorPeriodSeconds CapatazOptions where + supervisorPeriodSecondsL k CapatazOptions {supervisorPeriodSeconds, ..} = + fmap (\newSupPeriodSeconds -> CapatazOptions { supervisorPeriodSeconds = newSupPeriodSeconds, .. }) + (k supervisorPeriodSeconds) + +instance HasSupervisorRestartStrategy CapatazOptions where + supervisorRestartStrategyL k CapatazOptions {supervisorRestartStrategy, ..} = + fmap (\newSupRestartStrategy -> + CapatazOptions { supervisorRestartStrategy = newSupRestartStrategy, .. }) + (k supervisorRestartStrategy) + +instance HasSupervisorProcessSpecList CapatazOptions where + supervisorProcessSpecListL k CapatazOptions {supervisorProcessSpecList, ..} = + fmap (\newSupProcessSpecList -> + CapatazOptions { supervisorProcessSpecList = newSupProcessSpecList, .. }) + (k supervisorProcessSpecList) + +instance HasSupervisorProcessTerminationOrder CapatazOptions where + supervisorProcessTerminationOrderL k CapatazOptions {supervisorProcessTerminationOrder, ..} = + fmap (\newSupProcessTerminationOrder -> + CapatazOptions { supervisorProcessTerminationOrder = newSupProcessTerminationOrder, .. }) + (k supervisorProcessTerminationOrder) + +instance HasSupervisorIntensityReachedCallback CapatazOptions where + supervisorOnIntensityReachedL k CapatazOptions {supervisorOnIntensityReached, ..} = + fmap (\newSupOnIntensityReached -> CapatazOptions { supervisorOnIntensityReached = newSupOnIntensityReached, .. }) + (k supervisorOnIntensityReached) + +instance HasSupervisorFailureCallback CapatazOptions where + supervisorOnFailureL k CapatazOptions {supervisorOnFailure, ..} = + fmap (\newSupOnFailure -> CapatazOptions { supervisorOnFailure = newSupOnFailure, .. }) + (k supervisorOnFailure) + + -- | Specifies a callback sub-routine that gets triggered everytime something + -- important happens on the capataz system. This callback should be used for + -- telemetry purposes (e.g. logging, monitoring, etc). +onSystemEventL + :: Functor f + => ((CapatazEvent -> IO ()) -> f (CapatazEvent -> IO ())) + -> CapatazOptions + -> f CapatazOptions +onSystemEventL k CapatazOptions { notifyEvent, ..} = fmap + (\newNotifyEvent -> CapatazOptions {notifyEvent = newNotifyEvent, ..}) + (k notifyEvent) + +-------------------- + +-- | Specifies callback that gets executed when worker sub-routine has runtime +-- error. +-- +-- NOTE: the given sub-routine execution may be interrupted depending on the +-- worker "WorkerTerminationPolicy". +-- +workerOnFailureL + :: Functor f + => ((SomeException -> IO ()) -> f (SomeException -> IO ())) + -> WorkerOptions + -> f WorkerOptions +workerOnFailureL k WorkerOptions { workerOnFailure, ..} = fmap + (\newWorkerAction -> WorkerOptions {workerOnFailure = newWorkerAction, ..}) + (k workerOnFailure) + +-- | Specifies callback that gets executed when worker sub-routine completes +-- with no errors. +-- +-- NOTE: the given sub-routine execution may be interrupted depending on the +-- worker "WorkerTerminationPolicy". +-- +workerOnCompletionL + :: Functor f => (IO () -> f (IO ())) -> WorkerOptions -> f WorkerOptions +workerOnCompletionL k WorkerOptions { workerOnCompletion, ..} = fmap + (\newWorkerAction -> WorkerOptions {workerOnCompletion = newWorkerAction, ..}) + (k workerOnCompletion) + +-- | Specifies callback that gets executed when worker sub-routine is terminated +-- by its supervisor; this may happen in case of a capataz system shutdown or +-- when there is an "AllForOne" restart policy in place. +-- +-- NOTE: the given sub-routine execution may be interrupted depending on the +-- worker "WorkerTerminationPolicy". +-- +workerOnTerminationL + :: Functor f => (IO () -> f (IO ())) -> WorkerOptions -> f WorkerOptions +workerOnTerminationL k WorkerOptions { workerOnTermination, ..} = fmap + ( \newWorkerAction -> + WorkerOptions {workerOnTermination = newWorkerAction, ..} + ) + (k workerOnTermination) + +-- | Specifies how to handle a worker termination. See "WorkerTerminationPolicy" +-- documentation for more details. +workerTerminationPolicyL + :: Functor f + => (WorkerTerminationPolicy -> f WorkerTerminationPolicy) + -> WorkerOptions + -> f WorkerOptions +workerTerminationPolicyL k WorkerOptions { workerTerminationPolicy, ..} = fmap + ( \newWorkerAction -> + WorkerOptions {workerTerminationPolicy = newWorkerAction, ..} + ) + (k workerTerminationPolicy) + +-- | Specifies how supervisor should deal with an error when worker fails or +-- completes. See "WorkerRestartStrategy" documentation for more details. +workerRestartStrategyL + :: Functor f + => (WorkerRestartStrategy -> f WorkerRestartStrategy) + -> WorkerOptions + -> f WorkerOptions +workerRestartStrategyL k WorkerOptions { workerRestartStrategy, ..} = fmap + ( \newWorkerAction -> + WorkerOptions {workerRestartStrategy = newWorkerAction, ..} + ) + (k workerRestartStrategy) diff --git a/src/Control/Concurrent/Capataz/Internal/Util.hs b/src/Control/Concurrent/Capataz/Internal/Util.hs new file mode 100644 index 0000000..f687a41 --- /dev/null +++ b/src/Control/Concurrent/Capataz/Internal/Util.hs @@ -0,0 +1,173 @@ +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-| This module contains: + +* Functions to manipulate the state of the Supervisor record +* Utility functions used for communication between threads +* Public API utility functions + +-} +module Control.Concurrent.Capataz.Internal.Util where + +import Protolude + +import Control.Concurrent.STM (STM, atomically, retry) +import Control.Concurrent.STM.TVar (TVar, readTVar, writeTVar) +import Data.IORef (atomicModifyIORef', readIORef) +import qualified Data.Text as T +import Data.Time.Clock (getCurrentTime) + +import qualified Data.HashMap.Strict as HashMap +import GHC.Conc (labelThread) + +import Control.Concurrent.Capataz.Internal.Types + +-- | Returns only the number of the ThreadId. +getTidNumber :: ThreadId -> Maybe Text +getTidNumber tid = case T.words $ show tid of + (_:tidNumber:_) -> Just tidNumber + _ -> Nothing + +-------------------------------------------------------------------------------- + +-- | Internal functions that overwrites the GHC thread name, for increasing +-- traceability on GHC internals. +setProcessThreadName :: WorkerId -> WorkerName -> IO () +setProcessThreadName workerId workerName = do + tid <- myThreadId + let workerIdentifier = + T.unpack workerName <> "_" <> show workerId <> "_" <> maybe + "" + T.unpack + (getTidNumber tid) + labelThread tid workerIdentifier + +-- | Gets the "ProcessId" of both a Worker or Supervisor process. +getProcessId :: Process -> ProcessId +getProcessId process = case process of + WorkerProcess Worker { workerId } -> workerId + SupervisorProcess Supervisor { supervisorId } -> supervisorId + +-- | Gets a supervised "Process" from a "Supervisor" instance. +fetchProcess :: SupervisorEnv -> ProcessId -> IO (Maybe Process) +fetchProcess SupervisorEnv { supervisorProcessMap } processId = do + processMap <- readIORef supervisorProcessMap + case HashMap.lookup processId processMap of + Just process -> return $ Just process + _ -> return Nothing + +-- | Appends a new "Process" to the "Supervisor" existing process map. +appendProcessToMap :: SupervisorEnv -> Process -> IO () +appendProcessToMap SupervisorEnv { supervisorProcessMap } process = + atomicModifyIORef' supervisorProcessMap + (\processMap -> (appendProcess processMap, ())) + where + appendProcess = HashMap.alter (const $ Just process) (getProcessId process) + +-- | Removes a "Process" from a "Supervisor" existing process map. +removeProcessFromMap :: SupervisorEnv -> ProcessId -> IO () +removeProcessFromMap SupervisorEnv { supervisorProcessMap } processId = + atomicModifyIORef' + supervisorProcessMap + ( \processMap -> maybe (processMap, ()) + (const (HashMap.delete processId processMap, ())) + (HashMap.lookup processId processMap) + ) + +-- | Function to modify a "Supervisor" process map using a pure function. +resetProcessMap :: SupervisorEnv -> (ProcessMap -> ProcessMap) -> IO () +resetProcessMap SupervisorEnv { supervisorProcessMap } processMapFn = + atomicModifyIORef' supervisorProcessMap + (\processMap -> (processMapFn processMap, ())) + +-- | Function to get a snapshot of a "Supervisor" process map. +readProcessMap :: SupervisorEnv -> IO ProcessMap +readProcessMap SupervisorEnv { supervisorProcessMap } = + readIORef supervisorProcessMap + +-- | Returns all processes of a "Supervisor" by "ProcessTerminationOrder". This +-- is used on "AllForOne" restarts and shutdown operations. +sortProcessesByTerminationOrder + :: ProcessTerminationOrder -> ProcessMap -> [Process] +sortProcessesByTerminationOrder terminationOrder processMap = + case terminationOrder of + OldestFirst -> workers + NewestFirst -> reverse workers + where + -- NOTE: dissambiguates workerCreationTime field + processCreationTime (WorkerProcess Worker { workerCreationTime }) = + workerCreationTime + processCreationTime (SupervisorProcess Supervisor { supervisorCreationTime }) + = supervisorCreationTime + + workers = sortBy (comparing processCreationTime) (HashMap.elems processMap) + +-------------------------------------------------------------------------------- + +-- | Returns the "SupervisorStatus", this sub-routine will retry transaction +-- until its associated "Supervisor" has a status different from "Initializing". +readSupervisorStatusSTM :: TVar SupervisorStatus -> STM SupervisorStatus +readSupervisorStatusSTM statusVar = do + status <- readTVar statusVar + if status == Initializing then retry else return status + +-- | Executes transaction that returns the "SupervisorStatus". +readSupervisorStatus :: SupervisorEnv -> IO SupervisorStatus +readSupervisorStatus SupervisorEnv { supervisorStatusVar } = + atomically $ readTVar supervisorStatusVar + +-- | Modifes the "Supervisor" status. +-- +-- IMPORTANT: this is the only function that should be used for this purpose +-- given it has the side-effect of notifying a status change via the +-- "notifyEvent" sub-routine, orginally given in the "CapatazOption" record. +writeSupervisorStatus :: SupervisorEnv -> SupervisorStatus -> IO () +writeSupervisorStatus SupervisorEnv { supervisorId, supervisorName, supervisorStatusVar, notifyEvent } newSupervisorStatus + = do + + prevSupervisorStatus <- atomically $ do + prevStatus <- readTVar supervisorStatusVar + writeTVar supervisorStatusVar newSupervisorStatus + return prevStatus + + eventTime <- getCurrentTime + notifyEvent SupervisorStatusChanged + { supervisorId = supervisorId + , supervisorName = supervisorName + , prevSupervisorStatus + , newSupervisorStatus + , eventTime + } + +-- | Used from public API functions to send "ControlAction" messages to a +-- Supervisor thread loop. +sendControlMsg :: SupervisorEnv -> ControlAction -> IO () +sendControlMsg SupervisorEnv { supervisorNotify } ctrlMsg = + supervisorNotify (ControlAction ctrlMsg) + +-- | Used from public API functions to send ControlAction messages to the a +-- Supervisor thread loop, it receives an IO sub-routine that expects an IO +-- operation that blocks a thread until the message is done. +sendSyncControlMsg + :: SupervisorEnv + -> (IO () -> ControlAction) -- ^ Blocking sub-routine used from the caller + -> IO () +sendSyncControlMsg SupervisorEnv { supervisorNotify } mkCtrlMsg = do + result <- newEmptyMVar + supervisorNotify (ControlAction $ mkCtrlMsg (putMVar result ())) + takeMVar result + +-- | Utility function to transform a "CapatazOptions" record to a +-- "SupervisorOptions" record. +capatazOptionsToSupervisorOptions :: CapatazOptions -> SupervisorOptions +capatazOptionsToSupervisorOptions CapatazOptions {..} = SupervisorOptions {..} + +-- | Utility function to transform a "SupervisorEnv" record to a +-- "ParentSupervisorEnv" record; used on functions where supervision of +-- supervisors is managed. +toParentSupervisorEnv :: SupervisorEnv -> ParentSupervisorEnv +toParentSupervisorEnv SupervisorEnv { supervisorId, supervisorName, supervisorNotify, notifyEvent } + = ParentSupervisorEnv {..} diff --git a/src/Control/Concurrent/Capataz/Internal/Worker.hs b/src/Control/Concurrent/Capataz/Internal/Worker.hs new file mode 100644 index 0000000..87d23a8 --- /dev/null +++ b/src/Control/Concurrent/Capataz/Internal/Worker.hs @@ -0,0 +1,76 @@ +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NoImplicitPrelude #-} + +{-| This module contains all logic related to error handling when spawning threads + to execute Worker sub-routines +-} +module Control.Concurrent.Capataz.Internal.Worker where + +import Protolude + +import Control.Concurrent.Async (asyncWithUnmask) +import Data.Time.Clock (getCurrentTime) + +import qualified Data.UUID.V4 as UUID + +import qualified Control.Concurrent.Capataz.Internal.Process as Process +import qualified Control.Concurrent.Capataz.Internal.Util as Util + +import Control.Concurrent.Capataz.Internal.Types + +-- | Decorates the given @IO ()@ sub-routine with failure handling +workerMain + :: ParentSupervisorEnv + -> WorkerOptions + -> WorkerId + -> RestartCount + -> IO Worker +workerMain env@ParentSupervisorEnv { supervisorNotify } workerOptions@WorkerOptions { workerName, workerAction } workerId restartCount + = do + workerCreationTime <- getCurrentTime + workerAsync <- asyncWithUnmask $ \unmask -> do + + eResult <- try $ do + Util.setProcessThreadName workerId workerName + unmask workerAction + + resultEvent <- case eResult of + Left err -> Process.handleProcessException unmask + env + (WorkerSpec workerOptions) + workerId + restartCount + err + Right _ -> Process.handleProcessCompletion unmask + env + (WorkerSpec workerOptions) + workerId + restartCount + + supervisorNotify (MonitorEvent resultEvent) + + return Worker + { workerId + , workerName + , workerAsync + , workerCreationTime + , workerOptions + } + +-- | Internal function that forks a worker thread on the Capataz thread; note +-- this is different from the public @forkWorker@ function which sends a message +-- to the capataz loop +forkWorker + :: ParentSupervisorEnv + -> WorkerOptions + -> Maybe (WorkerId, RestartCount) + -> IO Worker +forkWorker env workerOptions mRestartInfo = do + (workerId, restartCount) <- case mRestartInfo of + Just (workerId, restartCount) -> pure (workerId, restartCount) + Nothing -> (,) <$> UUID.nextRandom <*> pure 0 + + worker <- workerMain env workerOptions workerId restartCount + Process.notifyProcessStarted mRestartInfo env (WorkerProcess worker) + return worker diff --git a/src/Control/Concurrent/Capataz/Lens.hs b/src/Control/Concurrent/Capataz/Lens.hs new file mode 100644 index 0000000..7738a4b --- /dev/null +++ b/src/Control/Concurrent/Capataz/Lens.hs @@ -0,0 +1,21 @@ +module Control.Concurrent.Capataz.Lens + ( module X ) + where + +import Control.Concurrent.Capataz.Internal.Types.Lens as X + ( onSystemEventL + , supervisorIntensityL + , supervisorOnFailureL + , supervisorOnIntensityReachedL + , supervisorPeriodSecondsL + , supervisorProcessSpecListL + , supervisorProcessTerminationOrderL + , supervisorRestartStrategyL + , workerOnCompletionL + , workerOnFailureL + , workerOnTerminationL + , workerRestartStrategyL + , workerTerminationPolicyL + ) +import Lens.Micro as X (set, (&), (.~), (^.)) +import Lens.Micro.Extras as X (view) diff --git a/src/Control/Concurrent/Internal/Capataz/Core.hs b/src/Control/Concurrent/Internal/Capataz/Core.hs deleted file mode 100644 index 89f5579..0000000 --- a/src/Control/Concurrent/Internal/Capataz/Core.hs +++ /dev/null @@ -1,259 +0,0 @@ -{-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE RecordWildCards #-} -{-| This module contains: - -* Functions exported on the public API -* The supervisor thread loop -* High level message handlers of the supervisor thread loop - --} -module Control.Concurrent.Internal.Capataz.Core where - -import Protolude - -import Control.Concurrent.Async (asyncWithUnmask) -import Control.Concurrent.MVar (newEmptyMVar, takeMVar) -import Control.Concurrent.STM (atomically) -import Control.Concurrent.STM.TQueue (newTQueueIO, readTQueue, writeTQueue) -import Control.Concurrent.STM.TVar (newTVarIO) -import Control.Teardown (newTeardown) -import Data.IORef (newIORef) -import Data.Time.Clock (getCurrentTime) - -import qualified Data.HashMap.Strict as HashMap -import qualified Data.UUID.V4 as UUID (nextRandom) - -import qualified Control.Concurrent.Internal.Capataz.Restart as Restart -import qualified Control.Concurrent.Internal.Capataz.Worker as Worker - -import Control.Concurrent.Internal.Capataz.Types -import Control.Concurrent.Internal.Capataz.Util - ( appendWorkerToMap - , capatazToEnv - , fetchWorker - , readCapatazStatus - , readCapatazStatusSTM - , resetWorkerMap - , sendSyncControlMsg - , workerOptionsToSpec - , writeCapatazStatus - ) - --------------------------------------------------------------------------------- - --- | Executes the shutdown operation of a Capataz, including the termination of --- Workers being supervised by it. -haltCapataz :: CapatazEnv -> IO () -haltCapataz env = do - writeCapatazStatus env Halting - Worker.terminateWorkers "capataz shutdown" env - resetWorkerMap env (const HashMap.empty) - writeCapatazStatus env Halted - --- | Handles an event produced by one of the workers this capataz monitors -handleMonitorEvent :: CapatazEnv -> MonitorEvent -> IO Bool -handleMonitorEvent env monitorEv = do - case monitorEv of - WorkerForcedRestart{} -> - -- We do nothing, as restart is being handled on restartWorkers - -- sub-routine - return () - - WorkerCompleted' { workerId, monitorEventTime } -> - Restart.handleWorkerCompleted env workerId monitorEventTime - - WorkerFailed' { workerId, workerError, workerRestartCount } -> - Restart.handleWorkerFailed env workerId workerError workerRestartCount - - WorkerTerminated' { workerId, workerRestartCount, workerTerminationReason } - -> Restart.handleWorkerTerminated env - workerId - workerTerminationReason - workerRestartCount - - - return True - --- | Handles an action triggered by the public API -handleControlAction :: CapatazEnv -> ControlAction -> IO Bool -handleControlAction env controlAction = case controlAction of - ForkWorker { workerSpec, returnWorkerId } -> do - worker@Worker { workerId } <- Worker.forkWorker env workerSpec Nothing - appendWorkerToMap env worker - returnWorkerId workerId - return True - - TerminateWorker { terminationReason, workerId, notifyWorkerTermination } -> - do - mWorker <- fetchWorker env workerId - case mWorker of - Nothing -> return True - Just worker -> do - Worker.terminateWorker terminationReason env worker - -- removeWorkerFromMap env workerId - notifyWorkerTermination - return True - - TerminateCapataz { notifyCapatazTermination } -> do - haltCapataz env - notifyCapatazTermination - return False - --- | Handles all messages that a capataz instance can receive -handleCapatazMessage :: CapatazEnv -> CapatazMessage -> IO Bool -handleCapatazMessage env message = case message of - ControlAction controlAction -> handleControlAction env controlAction - MonitorEvent monitorEvent -> handleMonitorEvent env monitorEvent - --- | Handles errors caused by the execution of the "runCapatazLoop" sub-routine -handleCapatazException :: CapatazEnv -> SomeException -> IO () -handleCapatazException env@CapatazEnv { capatazId, capatazName, notifyEvent } capatazError - = do - eventTime <- getCurrentTime - notifyEvent CapatazFailed - { capatazId - , capatazName - , capatazError - , eventTime - } - haltCapataz env - throwIO capatazError - --- | This is the main thread loop of a "Capataz" instance -runCapatazLoop :: (forall b . IO b -> IO b) -> CapatazEnv -> IO () -runCapatazLoop unmask env@CapatazEnv { capatazId, capatazName, capatazStatusVar, capatazQueue, notifyEvent } - = do - loopResult <- - unmask - $ try - $ atomically - $ (,) - <$> readCapatazStatusSTM capatazStatusVar - <*> readTQueue capatazQueue - - case loopResult of - Left capatazError -> handleCapatazException env capatazError - - Right (status, message) -> case status of - Initializing -> do - eventTime <- getCurrentTime - notifyEvent InvalidCapatazStatusReached - { capatazId - , capatazName - , eventTime - } - runCapatazLoop unmask env - - Running -> do - eContinueLoop <- try $ unmask $ handleCapatazMessage env message - case eContinueLoop of - Left capatazError -> handleCapatazException env capatazError - - Right continueLoop - | continueLoop -> runCapatazLoop unmask env - | otherwise -> do - eventTime <- getCurrentTime - notifyEvent CapatazTerminated - { capatazId - , capatazName - , eventTime - } - - Halting -> - -- Discard messages when halting - return () - - Halted -> panic "TODO: Pending halted state" - --- | Builds a record that contains runtime values of a "Capataz" (id, queue, status, etc.) -buildCapatazRuntime :: CapatazOptions -> IO CapatazRuntime -buildCapatazRuntime capatazOptions = do - capatazId <- UUID.nextRandom - capatazQueue <- newTQueueIO - capatazStatusVar <- newTVarIO Initializing - capatazWorkerMap <- newIORef HashMap.empty - return CapatazRuntime {..} - --- | Creates a Capataz record, which represents a supervision thread which --- monitors failure on worker threads defined in the "CapatazOptions" or worker --- threads that are created dynamically using "forkWorker". -forkCapataz :: CapatazOptions -> IO Capataz -forkCapataz capatazOptions@CapatazOptions { capatazName, capatazWorkerSpecList, notifyEvent } - = do - capatazRuntime <- buildCapatazRuntime capatazOptions - - let capatazEnv@CapatazEnv { capatazId } = capatazToEnv capatazRuntime - - capatazAsync <- asyncWithUnmask - $ \unmask -> runCapatazLoop unmask capatazEnv - - forM_ - capatazWorkerSpecList - ( \workerSpec -> do - worker <- Worker.forkWorker capatazEnv workerSpec Nothing - appendWorkerToMap capatazEnv worker - ) - - writeCapatazStatus capatazEnv Running - - capatazTeardown <- newTeardown - ("capataz[" <> capatazName <> "]") - ( do - status <- readCapatazStatus capatazEnv - case status of - Halted -> return () - Halting -> return () - _ -> do - eventTime <- getCurrentTime - notifyEvent CapatazShutdownInvoked - { capatazId - , capatazName - , eventTime - } - sendSyncControlMsg capatazEnv TerminateCapataz - ) - - return Capataz {..} - --- | Creates a worker green thread "IO ()" sub-routine, and depending in options --- defined in the "WorkerOptions" record, it will restart the Worker sub-routine --- in case of failures -forkWorker - :: WorkerOptions -- ^ Worker options (restart, name, callbacks, etc) - -> IO () -- ^ IO sub-routine that will be executed on worker thread - -> Capataz -- ^ "Capataz" instance that supervises the worker - -> IO WorkerId -- ^ An identifier that can be used to terminate the "Worker" -forkWorker workerOptions workerAction Capataz { capatazEnv } = do - let workerSpec = workerOptionsToSpec workerOptions workerAction - CapatazEnv { capatazQueue } = capatazEnv - - workerIdVar <- newEmptyMVar - atomically $ writeTQueue - capatazQueue - ( ControlAction ForkWorker - { workerSpec - , returnWorkerId = putMVar workerIdVar - } - ) - takeMVar workerIdVar - --- | Stops the execution of a worker green thread being supervised by the given --- "Capataz" instance, if the WorkerId does not belong to the Capataz, the --- operation does not perform any side-effect. --- --- Note: If your worker has a "Permanent" worker restart strategy, the worker --- thread __will be restarted again__; so use a "Transient" restart strategy --- instead. -terminateWorker :: Text -> WorkerId -> Capataz -> IO () -terminateWorker terminationReason workerId Capataz { capatazEnv } = - sendSyncControlMsg - capatazEnv - ( \notifyWorkerTermination -> TerminateWorker - { terminationReason - , workerId - , notifyWorkerTermination - } - ) diff --git a/src/Control/Concurrent/Internal/Capataz/Restart.hs b/src/Control/Concurrent/Internal/Capataz/Restart.hs deleted file mode 100644 index 420fc28..0000000 --- a/src/Control/Concurrent/Internal/Capataz/Restart.hs +++ /dev/null @@ -1,219 +0,0 @@ -{-# LANGUAGE DuplicateRecordFields #-} -{-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-| This module contains all logic related to the restart of workers -} -module Control.Concurrent.Internal.Capataz.Restart where - -import Data.Time.Clock (NominalDiffTime, UTCTime, diffUTCTime, getCurrentTime) - -import Protolude - -import Control.Concurrent.Internal.Capataz.Types -import Control.Concurrent.Internal.Capataz.Util - ( appendWorkerToMap - , fetchWorkerEnv - , readWorkerMap - , removeWorkerFromMap - , resetWorkerMap - , sortWorkersByTerminationOrder - ) -import qualified Control.Concurrent.Internal.Capataz.Worker as Worker -import qualified Data.HashMap.Strict as HashMap - --------------------------------------------------------------------------------- - --- | Function used to track difference between two timestamps to track capataz --- error intensity -calcDiffSeconds :: UTCTime -> IO NominalDiffTime -calcDiffSeconds creationTime = do - currentTime <- getCurrentTime - return $ diffUTCTime currentTime creationTime - --- | Function that checks restart counts and worker start time to assess if the --- capataz error intensity has been breached, see "WorkerRestartAction" for --- possible outcomes. -calcRestartAction :: CapatazEnv -> Int -> NominalDiffTime -> WorkerRestartAction -calcRestartAction CapatazEnv { capatazIntensity, capatazPeriodSeconds } restartCount diffSeconds - = case () of - _ - | diffSeconds < capatazPeriodSeconds && restartCount > capatazIntensity - -> HaltCapataz - | diffSeconds > capatazPeriodSeconds - -> ResetRestartCount - | otherwise - -> IncreaseRestartCount - --- | Sub-routine responsible of executing a "CapatazRestartStrategy" -execCapatazRestartStrategy :: CapatazEnv -> WorkerEnv -> Int -> IO () -execCapatazRestartStrategy capatazEnv@CapatazEnv { capatazRestartStrategy } WorkerEnv { workerId, workerSpec } workerRestartCount - = case capatazRestartStrategy of - AllForOne -> do - newWorkers <- restartWorkers capatazEnv workerId workerRestartCount - let newWorkersMap = - newWorkers - & fmap (\worker@Worker { workerId = cid } -> (cid, worker)) - & HashMap.fromList - resetWorkerMap capatazEnv (const newWorkersMap) - - OneForOne -> do - removeWorkerFromMap capatazEnv workerId - newWorker <- restartWorker capatazEnv - workerSpec - workerId - workerRestartCount - appendWorkerToMap capatazEnv newWorker - --- | Executes a restart action returned from the invokation of "calcRestartAction" -execRestartAction :: CapatazEnv -> WorkerEnv -> Int -> IO () -execRestartAction capatazEnv@CapatazEnv { onCapatazIntensityReached } workerEnv@WorkerEnv { workerId, workerName, workerCreationTime } workerRestartCount - = do - restartAction <- calcRestartAction capatazEnv workerRestartCount - <$> calcDiffSeconds workerCreationTime - - case restartAction of - HaltCapataz -> do - -- skip exceptions on callback - (_ :: Either SomeException ()) <- try onCapatazIntensityReached - throwIO CapatazIntensityReached - { workerId - , workerName - , workerRestartCount = succ workerRestartCount - } - - ResetRestartCount -> execCapatazRestartStrategy capatazEnv workerEnv 0 - - IncreaseRestartCount -> execCapatazRestartStrategy - capatazEnv - workerEnv - (succ workerRestartCount) - --------------------------------------------------------------------------------- - --- | Restarts _all_ the worker green thread of a Capataz, invoked when one --- worker green thread fails and causes sibling worker threads to get restarted --- as well -restartWorkers :: CapatazEnv -> WorkerId -> RestartCount -> IO [Worker] -restartWorkers capatazEnv@CapatazEnv { capatazWorkerTerminationOrder } failingWorkerId restartCount - = do - workerMap <- readWorkerMap capatazEnv - - let workers = - sortWorkersByTerminationOrder capatazWorkerTerminationOrder workerMap - - newWorkers <- forM workers $ \worker@Worker { workerId, workerSpec } -> do - unless (failingWorkerId == workerId) - $ forceRestartWorker capatazEnv worker - - let WorkerSpec { workerRestartStrategy } = workerSpec - case workerRestartStrategy of - Temporary -> return Nothing - _ -> Just <$> restartWorker capatazEnv workerSpec workerId restartCount - - return $ catMaybes newWorkers - --- | Sub-routine that is used when there is a restart request to a Worker caused --- by an "AllForOne" restart from a failing sibling worker. -forceRestartWorker :: CapatazEnv -> Worker -> IO () -forceRestartWorker CapatazEnv { capatazName, capatazId, notifyEvent } Worker { workerId, workerName, workerAsync } - = do - eventTime <- getCurrentTime - notifyEvent WorkerTerminated - { capatazName - , capatazId - , workerId - , workerName - , eventTime - , workerThreadId = asyncThreadId workerAsync - , terminationReason = "forced restart" - } - cancelWith workerAsync RestartWorkerException - --- | Starts a new worker thread taking into account an existing "WorkerId" and --- keeping a "RestartCount" to manage Capataz error intensity. -restartWorker - :: CapatazEnv -> WorkerSpec -> WorkerId -> RestartCount -> IO Worker -restartWorker capatazEnv workerSpec workerId restartCount = - Worker.forkWorker capatazEnv workerSpec (Just (workerId, restartCount)) - --------------------------------------------------------------------------------- - --- | This sub-routine is responsible of the restart strategies execution when a --- supervised worker finishes it execution because of a completion (e.g. worker --- sub-routine finished without any errors). -handleWorkerCompleted :: CapatazEnv -> WorkerId -> UTCTime -> IO () -handleWorkerCompleted env@CapatazEnv { capatazName, capatazId, notifyEvent } workerId eventTime - = do - mWorkerEnv <- fetchWorkerEnv env workerId - case mWorkerEnv of - Nothing -> return () - Just workerEnv@WorkerEnv { workerName, workerAsync, workerRestartStrategy } - -> do - notifyEvent WorkerCompleted - { capatazId - , capatazName - , workerId - , workerName - , eventTime - , workerThreadId = asyncThreadId workerAsync - } - case workerRestartStrategy of - Permanent -> do - -- NOTE: Completed workers should never account as errors happening on - -- a supervised thread, ergo, they should be restarted every time. - - -- TODO: Notify a warning around having a workerRestartStrategy different - -- than Temporary on workers that may complete. - let restartCount = 0 - execRestartAction env workerEnv restartCount - - _ -> removeWorkerFromMap env workerId - --- | This sub-routine is responsible of the restart strategies execution when a --- supervised worker finishes it execution because of a failure. -handleWorkerFailed :: CapatazEnv -> WorkerId -> SomeException -> Int -> IO () -handleWorkerFailed env@CapatazEnv { capatazName, capatazId, notifyEvent } workerId workerError restartCount - = do - mWorkerEnv <- fetchWorkerEnv env workerId - case mWorkerEnv of - Nothing -> return () - Just workerEnv@WorkerEnv { workerName, workerAsync, workerRestartStrategy } - -> do - eventTime <- getCurrentTime - notifyEvent WorkerFailed - { capatazName - , capatazId - , workerId - , workerName - , workerError - , workerThreadId = asyncThreadId workerAsync - , eventTime - } - case workerRestartStrategy of - Temporary -> removeWorkerFromMap env workerId - _ -> execRestartAction env workerEnv restartCount - --- | This sub-routine is responsible of the restart strategies execution when a --- supervised worker finishes it execution because of a termination. -handleWorkerTerminated :: CapatazEnv -> WorkerId -> Text -> Int -> IO () -handleWorkerTerminated env@CapatazEnv { capatazName, capatazId, notifyEvent } workerId terminationReason workerRestartCount - = do - mWorkerEnv <- fetchWorkerEnv env workerId - case mWorkerEnv of - Nothing -> return () - Just workerEnv@WorkerEnv { workerName, workerAsync, workerRestartStrategy } - -> do - eventTime <- getCurrentTime - notifyEvent WorkerTerminated - { capatazName - , capatazId - , workerId - , workerName - , eventTime - , terminationReason - , workerThreadId = asyncThreadId workerAsync - } - case workerRestartStrategy of - Permanent -> execRestartAction env workerEnv workerRestartCount - _ -> removeWorkerFromMap env workerId diff --git a/src/Control/Concurrent/Internal/Capataz/Types.hs b/src/Control/Concurrent/Internal/Capataz/Types.hs deleted file mode 100644 index 4dd92e2..0000000 --- a/src/Control/Concurrent/Internal/Capataz/Types.hs +++ /dev/null @@ -1,519 +0,0 @@ -{-# LANGUAGE DeriveGeneric #-} -{-# LANGUAGE DuplicateRecordFields #-} -{-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE OverloadedStrings #-} -{-| This module contains all the types used across all the other modules -} -module Control.Concurrent.Internal.Capataz.Types where - -import Protolude - -import Control.Concurrent.STM.TQueue (TQueue) -import Control.Concurrent.STM.TVar (TVar) -import Control.Teardown (ITeardown (..), Teardown) -import Data.Default (Default (..)) -import Data.HashMap.Strict (HashMap) -import Data.IORef (IORef) -import Data.Time.Clock (NominalDiffTime, UTCTime) -import Data.UUID (UUID) - -type CapatazId = UUID -type WorkerId = UUID -type WorkerAction = IO () -type WorkerThreadId = ThreadId -type CapatazName = Text -type WorkerName = Text -type RestartCount = Int -type WorkerMap = HashMap WorkerId Worker - --- | Event passed to the "notifyEvent" callback sub-routine, this events can be --- used to monitor the capataz system and understanding what is doing. This --- provides high levels of telemetry for the Capataz instance, so is mainly used --- for logging, monitoring and testing purposes. -data CapatazEvent - = InvalidCapatazStatusReached { - capatazId :: !CapatazId - , capatazName :: !CapatazName - , eventTime :: !UTCTime - } - | CapatazStatusChanged { - capatazId :: !CapatazId - , capatazName :: !CapatazName - , prevCapatazStatus :: !CapatazStatus - , newCapatazStatus :: !CapatazStatus - , eventTime :: !UTCTime - } - | WorkerTerminated { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , workerThreadId :: !WorkerThreadId - , workerId :: !WorkerId - , workerName :: !WorkerName - , terminationReason :: !Text - , eventTime :: !UTCTime - } - | WorkerStarted { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , workerThreadId :: !WorkerThreadId - , workerId :: !WorkerId - , workerName :: !WorkerName - , eventTime :: !UTCTime - } - | WorkerRestarted { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , workerThreadId :: !WorkerThreadId - , workerId :: !WorkerId - , workerName :: !WorkerName - , workerRestartCount :: !Int - , eventTime :: !UTCTime - } - | WorkerCompleted { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , workerThreadId :: !WorkerThreadId - , workerId :: !WorkerId - , workerName :: !WorkerName - , eventTime :: !UTCTime - } - | WorkerFailed { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , workerThreadId :: !WorkerThreadId - , workerId :: !WorkerId - , workerName :: !WorkerName - , workerError :: !SomeException - , eventTime :: !UTCTime - } - | WorkerCallbackExecuted { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , workerThreadId :: !WorkerThreadId - , workerId :: !WorkerId - , workerName :: !WorkerName - , workerCallbackError :: !(Maybe SomeException) - , callbackType :: !CallbackType - , eventTime :: !UTCTime - } - | WorkersTerminationStarted { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , terminationReason :: !Text - , eventTime :: !UTCTime - } - | WorkersTerminationFinished { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , terminationReason :: !Text - , eventTime :: !UTCTime - } - | CapatazFailed { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , capatazError :: !SomeException - , eventTime :: !UTCTime - } - | CapatazTerminated { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , eventTime :: !UTCTime - } - | CapatazShutdownInvoked { - capatazName :: !CapatazName - , capatazId :: !CapatazId - , eventTime :: !UTCTime - } - deriving (Generic, Show) - --- | Defines how a "Worker" termination should be handled, default --- "WorkerTerminationPolicy" is 3 seconds -data WorkerTerminationPolicy - -- | Waits until infinity for the worker to terminate - = Infinity - - -- | Worker is terminated wihtout a chance to call its callback - | BrutalTermination - - -- | Allows n milliseconds for worker termination callback to be - -- executed, otherwise "BrutalTermination occurs" - | TimeoutMillis !Int - deriving (Generic, Show, Eq, Ord) - -instance Default WorkerTerminationPolicy where - def = TimeoutMillis 3000 - -instance NFData WorkerTerminationPolicy - --- | Helper record to assess if the capataz error intensity has been breached -data WorkerRestartAction - -- | The capataz will restart the failed worker and reset the restart count - -- given intensity period has passed - = ResetRestartCount - - -- | The capataz will restart the failed worker and increase the restart count - | IncreaseRestartCount - - -- | The error intensity has been reached - | HaltCapataz - deriving (Generic, Show, Eq) - -instance NFData WorkerRestartAction - --- | Specifies how order in which workers should be terminated by a Capataz in --- case of restart or shutdown; default is "OldestFirst" -data WorkerTerminationOrder - -- | Terminate worker threads from most recent to oldest - = NewestFirst - -- | Terminate worker threads from oldest to most recent - | OldestFirst - deriving (Generic, Show, Eq, Ord) - -instance Default WorkerTerminationOrder where - def = OldestFirst - -instance NFData WorkerTerminationOrder - --- | Specifies how a Capataz should restart a failing worker. Default is --- "OneForOne" -data CapatazRestartStrategy - -- | Terminate all workers threads when one fails and restart them all - = AllForOne - - -- | Only restart worker thread that failed - | OneForOne - deriving (Generic, Show, Eq, Ord) - -instance Default CapatazRestartStrategy where - def = OneForOne - -instance NFData CapatazRestartStrategy - --- | Utility record used to specify options to a "Capataz" instance -data CapatazOptions - = CapatazOptions { - -- | Name of the Capataz (present on "CapatazEvent" records) - capatazName :: Text - -- | How many errors is the Capataz be able to handle; check: - -- http://erlang.org/doc/design_principles/sup_princ.html#max_intensity - , capatazIntensity :: !Int - -- | Period of time where the Capataz can receive "capatazIntensity" amount - -- of errors - , capatazPeriodSeconds :: !NominalDiffTime - -- | What is the "CapatazRestartStrategy" for this Capataz - , capatazRestartStrategy :: !CapatazRestartStrategy - -- | Static set of workers that start as soon as the "Capataz" is created - , capatazWorkerSpecList :: ![WorkerSpec] - -- | In which order the "Capataz" record is going to terminate it's workers - , capatazWorkerTerminationOrder :: !WorkerTerminationOrder - -- | Callback used when the error intensity is reached - , onCapatazIntensityReached :: !(IO ()) - -- | Callback used for telemetry purposes - , notifyEvent :: !(CapatazEvent -> IO ()) - } - - --- | Utility record used to specify options to a "Worker" instance -data WorkerOptions - = WorkerOptions { - -- | Name of the Worker (present on "CapatazEvent" records) - workerName :: !WorkerName - -- | Callback used when the worker fails with an error - , workerOnFailure :: !(SomeException -> IO ()) - -- | Callback used when the worker completes execution without error - , workerOnCompletion :: !(IO ()) - -- | Callback used when the worker is terminated - , workerOnTermination :: !(IO ()) - -- | Indicates how a worker should be terminated - , workerTerminationPolicy :: !WorkerTerminationPolicy - -- | Indicates how a worker should be restarted - , workerRestartStrategy :: !WorkerRestartStrategy - } - deriving (Generic) - --- | Specifies how a "Worker" should restart on failure. Default is "Transient" -data WorkerRestartStrategy - -- | Worker thread is __always__ restarted - = Permanent - - -- | Worker thread is restarted only if it failed - | Transient - - -- | Worker thread is __never__ restarted - | Temporary - - deriving (Generic, Show, Eq) - -instance NFData WorkerRestartStrategy -instance Default WorkerRestartStrategy where - def = Transient - --- | WorkerSpec is a representation of the "WorkerOptions" record that embeds --- the @"IO" ()@ sub-routine of the worker thread. This record is used when we --- want to bound worker threads to a "Capataz" instance -data WorkerSpec - = WorkerSpec { - -- | An @IO ()@ sub-routine that will be executed when the worker - -- thread is created, this attribute is lazy given we want to this - -- value on a worker thread environment. - workerAction :: WorkerAction - -- | Name of the Worker (present on "CapatazEvent" records) - , workerName :: !WorkerName - -- | Callback used when the worker fails with an error - , workerOnFailure :: !(SomeException -> IO ()) - -- | Callback used when the worker completes execution without error - , workerOnCompletion :: !(IO ()) - -- | Callback used when the worker is terminated - , workerOnTermination :: !(IO ()) - -- | Indicates how a worker should be terminated - , workerTerminationPolicy :: !WorkerTerminationPolicy - -- | Indicates how a worker should be restarted - , workerRestartStrategy :: !WorkerRestartStrategy - } - deriving (Generic) - --- | Record that contains the "Async" record (thread reference) of a worker -data Worker - = Worker { - -- | Unique identifier for a worker that is executing - workerId :: !WorkerId - -- | "Async" thread of a worker, this Async executes the @IO ()@ sub-routine - , workerAsync :: !(Async ()) - -- | Time where this worker was created (used for error intensity checks) - , workerCreationTime :: !UTCTime - -- | Name of the Worker (present on "CapatazEvent" records) - , workerName :: !WorkerName - -- | "WorkerSpec" contains all the options around restart and termination - -- policies - , workerSpec :: !WorkerSpec - } - --- | Convenience utility record that contains all values related to a "Worker"; --- this is used on internal functions of the Capataz library. -data WorkerEnv - = WorkerEnv { - workerAction :: WorkerAction - , workerId :: !WorkerId - , workerAsync :: !(Async ()) - , workerCreationTime :: !UTCTime - , workerName :: !WorkerName - , workerSpec :: !WorkerSpec - , workerOnFailure :: !(SomeException -> IO ()) - , workerOnCompletion :: !(IO ()) - , workerOnTermination :: !(IO ()) - , workerRestartStrategy :: !WorkerRestartStrategy - } - --- | Internal record that represents an action being sent from threads using --- the Capataz public API. -data ControlAction - = ForkWorker { - workerSpec :: !WorkerSpec - , returnWorkerId :: !(WorkerId -> IO ()) - } - | TerminateWorker { - workerId :: !WorkerId - , terminationReason :: !Text - , notifyWorkerTermination :: !(IO ()) - } - | TerminateCapataz { - notifyCapatazTermination :: !(IO ()) - } - deriving (Generic) - --- | Internal exception thrown to the Capataz loop to indicate termination of --- execution -data CapatazSignal - = RestartWorkerException - | TerminateWorkerException { - workerId :: !WorkerId - , workerTerminationReason :: !Text - } - | BrutallyTerminateWorkerException { - workerId :: !WorkerId - , workerTerminationReason :: !Text - } - deriving (Generic, Show) - -instance Exception CapatazSignal -instance NFData CapatazSignal - --- | Internal exception triggered when a Worker violates error intensity --- specification -data CapatazError - = CapatazIntensityReached { - workerId :: !WorkerId - , workerName :: !WorkerName - , workerRestartCount :: !Int - } - deriving (Generic, Show) - -instance Exception CapatazError -instance NFData CapatazError - --- | Internal record that indicates what type of callback function is being --- invoked; this is used for telemetry purposes -data CallbackType - = OnCompletion - | OnFailure - | OnTermination - deriving (Generic, Show, Eq) - --- | Internal exception triggered when a callback of a Worker fails -data WorkerError - = WorkerCallbackFailed { - workerId :: !WorkerId - , workerActionError :: !(Maybe SomeException) - , callbackType :: !CallbackType - , workerCallbackError :: !SomeException - } - deriving (Generic, Show) - -instance Exception WorkerError - --- | Internal event delivered from Worker threads to the Capataz thread to --- indicate completion, failure or termination -data MonitorEvent - = WorkerTerminated' { - workerId :: !WorkerId - , workerName :: !WorkerName - , workerRestartCount :: !RestartCount - , workerTerminationReason :: !Text - , monitorEventTime :: !UTCTime - } - | WorkerFailed' { - workerId :: !WorkerId - , workerName :: !WorkerName - , workerRestartCount :: !RestartCount - , workerError :: !SomeException - , monitorEventTime :: !UTCTime - } - | WorkerCompleted' { - workerId :: !WorkerId - , workerName :: !WorkerName - , monitorEventTime :: !UTCTime - } - | WorkerForcedRestart { - workerId :: !WorkerId - , workerName :: !WorkerName - , monitorEventTime :: !UTCTime - } - deriving (Show) - --- | Internal state machine record that indicates the state of a Capataz -data CapatazStatus - -- | This state is set when Worker is created and it spawn static worker - -- threads - = Initializing - -- | This state is set when the Capataz thread is listenting to both - -- "ControlAction" and "MonitorEvent" messages - | Running - -- | This state is set when the Capataz thread is terminating it's assigned - -- worker - | Halting - -- | The Capataz thread is done - | Halted - deriving (Generic, Show, Eq) - -instance NFData CapatazStatus - --- | Internal message delivered to a Capataz thread that can either be a call --- from public API or an event from a monitored Worker -data CapatazMessage - = ControlAction !ControlAction - | MonitorEvent !MonitorEvent - deriving (Generic) - --- | Record that contains the environment of a capataz monitor, this is used as --- the main record to create workers and to stop the supervisor thread. -data Capataz - = Capataz { - capatazRuntime :: !CapatazRuntime - , capatazEnv :: !CapatazEnv - , capatazAsync :: !(Async ()) - , capatazTeardown :: !Teardown - } - -instance ITeardown Capataz where - teardown Capataz {capatazTeardown} = - teardown capatazTeardown - --- | Internal record used to hold part of the runtime information of a "Capataz" --- record -data CapatazRuntime - = CapatazRuntime { - capatazId :: !CapatazId - , capatazQueue :: !(TQueue CapatazMessage) - , capatazWorkerMap :: !(IORef (HashMap WorkerId Worker)) - , capatazStatusVar :: !(TVar CapatazStatus) - , capatazOptions :: !CapatazOptions - } - --- | Convenience utility record that contains all values related to a "Capataz"; --- this is used on internal functions of the Capataz library. -data CapatazEnv - = CapatazEnv { - capatazId :: !CapatazId - , capatazName :: !CapatazName - , capatazQueue :: !(TQueue CapatazMessage) - , capatazWorkerMap :: !(IORef (HashMap WorkerId Worker)) - , capatazStatusVar :: !(TVar CapatazStatus) - , capatazOptions :: !CapatazOptions - , capatazRuntime :: !CapatazRuntime - , capatazIntensity :: !Int - -- ^ http://erlang.org/doc/design_principles/sup_princ.html#max_intensity - , capatazPeriodSeconds :: !NominalDiffTime - , capatazRestartStrategy :: !CapatazRestartStrategy - , capatazWorkerTerminationOrder :: !WorkerTerminationOrder - , onCapatazIntensityReached :: !(IO ()) - , notifyEvent :: !(CapatazEvent -> IO ()) - } - --- | Default options to easily create capataz instances: --- * name defaults to \"default-capataz\" --- * intensity error tolerance is set to 1 error every 5 seconds --- * has a "OneForOne " capataz restart strategy --- * has a termination order of "OldestFirst" -defCapatazOptions :: CapatazOptions -defCapatazOptions = CapatazOptions - { capatazName = "default-capataz" - - -- One (1) restart every five (5) seconds - , capatazIntensity = 1 - , capatazPeriodSeconds = 5 - , capatazRestartStrategy = def - , capatazWorkerSpecList = [] - , capatazWorkerTerminationOrder = OldestFirst - , onCapatazIntensityReached = return () - , notifyEvent = const $ return () - } - --- | Default options to easily create worker instances: --- * name defaults to \"default-worker\" --- * has a "Transient" worker restart strategy --- * has a termination policy of three (3) seconds -defWorkerOptions :: WorkerOptions -defWorkerOptions = WorkerOptions - { workerName = "default-worker" - , workerOnFailure = const $ return () - , workerOnCompletion = return () - , workerOnTermination = return () - , workerTerminationPolicy = def - , workerRestartStrategy = def - } - --- | Default spec to easily create worker instances: --- * @IO ()@ sub-routine simply returns unit --- * name defaults to \"default-worker\" --- * has a "Transient" worker restart strategy --- * has a termination policy of three (3) seconds -defWorkerSpec :: WorkerSpec -defWorkerSpec = WorkerSpec - { workerName = "default-worker" - , workerAction = return () - , workerOnFailure = const $ return () - , workerOnCompletion = return () - , workerOnTermination = return () - , workerTerminationPolicy = def - , workerRestartStrategy = def - } diff --git a/src/Control/Concurrent/Internal/Capataz/Util.hs b/src/Control/Concurrent/Internal/Capataz/Util.hs deleted file mode 100644 index 8086ffa..0000000 --- a/src/Control/Concurrent/Internal/Capataz/Util.hs +++ /dev/null @@ -1,165 +0,0 @@ -{-# LANGUAGE DuplicateRecordFields #-} -{-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE RecordWildCards #-} -{-| This module contains: - -* Functions to manipulate the state of the Capataz record -* Utility functions used for communication between threads -* Public API utility functions - --} -module Control.Concurrent.Internal.Capataz.Util where - -import Protolude - -import Control.Concurrent.STM (STM, atomically, retry) -import Control.Concurrent.STM.TQueue (writeTQueue) -import Control.Concurrent.STM.TVar (TVar, readTVar, writeTVar) -import Data.IORef (atomicModifyIORef', readIORef) -import qualified Data.Text as T -import Data.Time.Clock (getCurrentTime) - -import qualified Data.HashMap.Strict as HashMap - -import Control.Concurrent.Internal.Capataz.Types - --- | Returns only the number of the ThreadId -getTidNumber :: ThreadId -> Maybe Text -getTidNumber tid = case T.words $ show tid of - (_:tidNumber:_) -> Just tidNumber - _ -> Nothing - --------------------------------------------------------------------------------- - --- | Fetches a "Worker" from the "Capataz" instance environment -fetchWorker :: CapatazEnv -> WorkerId -> IO (Maybe Worker) -fetchWorker CapatazEnv { capatazWorkerMap } workerId = - HashMap.lookup workerId <$> readIORef capatazWorkerMap - --- | Fetches a "WorkerEnv" from the "Capataz" instance environment -fetchWorkerEnv :: CapatazEnv -> WorkerId -> IO (Maybe WorkerEnv) -fetchWorkerEnv CapatazEnv { capatazWorkerMap } workerId = - ((workerToEnv <$>) . HashMap.lookup workerId) <$> readIORef capatazWorkerMap - --- | Appends a new "Worker" to the "Capataz" existing worker map. -appendWorkerToMap :: CapatazEnv -> Worker -> IO () -appendWorkerToMap CapatazEnv { capatazWorkerMap } worker@Worker { workerId } = - atomicModifyIORef' capatazWorkerMap - (\workerMap -> (appendWorker workerMap, ())) - where appendWorker = HashMap.alter (const $ Just worker) workerId - --- | Removes a "Worker" from the "Capataz" existing worker map. -removeWorkerFromMap :: CapatazEnv -> WorkerId -> IO () -removeWorkerFromMap CapatazEnv { capatazWorkerMap } workerId = - atomicModifyIORef' - capatazWorkerMap - ( \workerMap -> maybe (workerMap, ()) - (const (HashMap.delete workerId workerMap, ())) - (HashMap.lookup workerId workerMap) - ) - --- | Function to modify a "Capataz" worker map using a pure function. -resetWorkerMap :: CapatazEnv -> (WorkerMap -> WorkerMap) -> IO () -resetWorkerMap CapatazEnv { capatazWorkerMap } workerMapFn = atomicModifyIORef' - capatazWorkerMap - (\workerMap -> (workerMapFn workerMap, ())) - --- | Function to get a snapshot of the "Capataz"' worker map -readWorkerMap :: CapatazEnv -> IO WorkerMap -readWorkerMap CapatazEnv { capatazWorkerMap } = readIORef capatazWorkerMap - --- | Returns all worker's of a "Capataz" by "WorkerTerminationOrder". This is --- used "AllForOne" restarts and shutdown operations. -sortWorkersByTerminationOrder :: WorkerTerminationOrder -> WorkerMap -> [Worker] -sortWorkersByTerminationOrder terminationOrder workerMap = - case terminationOrder of - OldestFirst -> workers - NewestFirst -> reverse workers - where - -- NOTE: dissambiguates workerCreationTime field - workerCreationTime' Worker { workerCreationTime } = workerCreationTime - - workers = sortBy (comparing workerCreationTime') (HashMap.elems workerMap) - --------------------------------------------------------------------------------- - --- | Sub-routine that returns the "CapatazStatus", this sub-routine will block --- until the "Capataz" has a status different from "Initializing". -readCapatazStatusSTM :: TVar CapatazStatus -> STM CapatazStatus -readCapatazStatusSTM statusVar = do - status <- readTVar statusVar - if status == Initializing then retry else return status - --- | Sub-routine that returns the "CapatazStatus" on the IO monad -readCapatazStatus :: CapatazEnv -> IO CapatazStatus -readCapatazStatus CapatazEnv { capatazStatusVar } = - atomically $ readTVar capatazStatusVar - --- | Modifes the "Capataz" status, this is the only function that should be used --- to this end given it has the side-effect of notifying a status change via the --- "notifyEvent" sub-routine, given via an attribute of the "CapatazOption" --- record. -writeCapatazStatus :: CapatazEnv -> CapatazStatus -> IO () -writeCapatazStatus CapatazEnv { capatazId, capatazName, capatazStatusVar, notifyEvent } newCapatazStatus - = do - - prevCapatazStatus <- atomically $ do - prevStatus <- readTVar capatazStatusVar - writeTVar capatazStatusVar newCapatazStatus - return prevStatus - - eventTime <- getCurrentTime - notifyEvent CapatazStatusChanged - { capatazId - , capatazName - , prevCapatazStatus - , newCapatazStatus - , eventTime - } - - --- | Used from public API functions to send a ControlAction to the Capataz --- supervisor thread loop -sendControlMsg :: CapatazEnv -> ControlAction -> IO () -sendControlMsg CapatazEnv { capatazQueue } ctrlMsg = - atomically $ writeTQueue capatazQueue (ControlAction ctrlMsg) - --- | Used from public API functions to send a ControlAction to the Capataz --- supervisor thread loop, it receives an IO sub-routine that expects an IO --- operation that blocks a thread until the message is done. -sendSyncControlMsg - :: CapatazEnv - -> (IO () -> ControlAction) -- ^ Blocking sub-routine used from the caller - -> IO () -sendSyncControlMsg CapatazEnv { capatazQueue } mkCtrlMsg = do - result <- newEmptyMVar - atomically - $ writeTQueue capatazQueue (ControlAction $ mkCtrlMsg (putMVar result ())) - takeMVar result - --- | Utility function to transform a "CapatazRuntime" into a "CapatazEnv" -capatazToEnv :: CapatazRuntime -> CapatazEnv -capatazToEnv capatazRuntime@CapatazRuntime {..} = - let CapatazOptions {..} = capatazOptions in CapatazEnv {..} - --- | Utility function to transform a "Worker" into a "WorkerEnv" -workerToEnv :: Worker -> WorkerEnv -workerToEnv Worker {..} = - let - WorkerSpec { workerAction, workerOnFailure, workerOnCompletion, workerOnTermination, workerRestartStrategy } - = workerSpec - in - WorkerEnv {..} - --- | Utility function to transform a "WorkerEnv" into a "Worker" -envToWorker :: WorkerEnv -> Worker -envToWorker WorkerEnv {..} = Worker {..} - --- | Utility function to transform a "WorkerOptions" into a "WorkerSpec" -workerOptionsToSpec :: WorkerOptions -> IO () -> WorkerSpec -workerOptionsToSpec WorkerOptions {..} workerAction = WorkerSpec {..} - --- | Utility function to transform a "Capataz" into an @"Async" ()@ -capatazToAsync :: Capataz -> Async () -capatazToAsync = capatazAsync diff --git a/src/Control/Concurrent/Internal/Capataz/Worker.hs b/src/Control/Concurrent/Internal/Capataz/Worker.hs deleted file mode 100644 index 411a2b9..0000000 --- a/src/Control/Concurrent/Internal/Capataz/Worker.hs +++ /dev/null @@ -1,312 +0,0 @@ -{-# LANGUAGE DuplicateRecordFields #-} -{-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE OverloadedStrings #-} -{-| This module contains all logic related to error handling when spawning threads - to execute Worker sub-routines --} -module Control.Concurrent.Internal.Capataz.Worker where - -import Protolude - -import Control.Concurrent.Async (asyncWithUnmask) -import Control.Concurrent.STM.TQueue (writeTQueue) -import Data.Time.Clock (getCurrentTime) -import GHC.Conc (labelThread) - -import qualified Data.Text as T -import qualified Data.UUID.V4 as UUID - -import Control.Concurrent.Internal.Capataz.Types -import Control.Concurrent.Internal.Capataz.Util - (getTidNumber, readWorkerMap, sortWorkersByTerminationOrder) - --- | Internal functions that overwrites the GHC thread name, for increasing --- traceability on GHC internals -setWorkerThreadName :: WorkerId -> WorkerName -> IO () -setWorkerThreadName workerId workerName = do - tid <- myThreadId - let workerIdentifier = - T.unpack workerName <> "_" <> show workerId <> "_" <> maybe - "" - T.unpack - (getTidNumber tid) - labelThread tid workerIdentifier - --- | Handles errors caused by the execution of the "workerMain" sub-routine -handleWorkerException - :: (IO () -> IO a) - -> CapatazEnv - -> WorkerSpec - -> WorkerId - -> RestartCount - -> SomeException - -> IO MonitorEvent -handleWorkerException unmask CapatazEnv { capatazId, capatazName, notifyEvent } WorkerSpec { workerName, workerOnFailure, workerOnTermination } workerId restartCount err - = do - workerThreadId <- myThreadId - monitorEventTime <- getCurrentTime - case fromException err of - Just RestartWorkerException -> - return WorkerForcedRestart {workerId , workerName , monitorEventTime } - - Just TerminateWorkerException { workerTerminationReason } -> do - eErrResult <- try $ unmask workerOnTermination - - notifyEvent WorkerCallbackExecuted - { capatazId - , capatazName - , workerId - , workerName - , workerThreadId - , workerCallbackError = either Just (const Nothing) eErrResult - , callbackType = OnTermination - , eventTime = monitorEventTime - } - - case eErrResult of - Left workerCallbackError -> return WorkerFailed' - { workerName - , workerId - , monitorEventTime - , workerError = toException WorkerCallbackFailed - { workerId - , workerCallbackError - , callbackType = OnTermination - , workerActionError = Just err - } - , workerRestartCount = restartCount - } - Right _ -> return WorkerTerminated' - { workerId - , workerName - , monitorEventTime - , workerTerminationReason - , workerRestartCount = restartCount - } - - Just BrutallyTerminateWorkerException { workerTerminationReason } -> - return WorkerTerminated' - { workerId - , workerName - , monitorEventTime - , workerTerminationReason - , workerRestartCount = restartCount - } - - -- This exception was an error from the given sub-routine - Nothing -> do - eErrResult <- try $ unmask $ workerOnFailure err - - notifyEvent WorkerCallbackExecuted - { capatazId - , capatazName - , workerId - , workerName - , workerThreadId - , workerCallbackError = either Just (const Nothing) eErrResult - , callbackType = OnFailure - , eventTime = monitorEventTime - } - - case eErrResult of - Left workerCallbackError -> return WorkerFailed' - { workerName - , workerId - , monitorEventTime - , workerRestartCount = restartCount - , workerError = toException WorkerCallbackFailed - { workerId - , workerCallbackError - , callbackType = OnFailure - , workerActionError = Just err - } - } - Right _ -> return WorkerFailed' - { workerName - , workerId - , monitorEventTime - , workerError = err - , workerRestartCount = restartCount - } - --- | Handles completion of the "workerMain" sub-routine -handleWorkerCompletion - :: (IO () -> IO a) - -> CapatazEnv - -> WorkerSpec - -> WorkerId - -> RestartCount - -> IO MonitorEvent -handleWorkerCompletion unmask CapatazEnv { capatazId, capatazName, notifyEvent } WorkerSpec { workerName, workerOnCompletion } workerId restartCount - = do - workerThreadId <- myThreadId - monitorEventTime <- getCurrentTime - eCompResult <- try $ unmask workerOnCompletion - - notifyEvent WorkerCallbackExecuted - { capatazId - , capatazName - , workerId - , workerName - , workerThreadId - , workerCallbackError = either Just (const Nothing) eCompResult - , callbackType = OnCompletion - , eventTime = monitorEventTime - } - - case eCompResult of - Left err -> return WorkerFailed' - { workerName - , workerId - , monitorEventTime - , workerError = toException WorkerCallbackFailed - { workerId - , workerCallbackError = err - , callbackType = OnCompletion - , workerActionError = Nothing - } - , workerRestartCount = restartCount - } - Right _ -> - return WorkerCompleted' {workerName , workerId , monitorEventTime } - --- | Decorates the given @IO ()@ sub-routine with failure handling -workerMain :: CapatazEnv -> WorkerSpec -> WorkerId -> RestartCount -> IO Worker -workerMain env@CapatazEnv { capatazQueue } workerSpec@WorkerSpec { workerName, workerAction } workerId restartCount - = do - workerCreationTime <- getCurrentTime - workerAsync <- asyncWithUnmask $ \unmask -> do - - eResult <- try $ do - setWorkerThreadName workerId workerName - unmask workerAction - - resultEvent <- case eResult of - Left err -> - handleWorkerException unmask env workerSpec workerId restartCount err - Right _ -> - handleWorkerCompletion unmask env workerSpec workerId restartCount - - atomically $ writeTQueue capatazQueue (MonitorEvent resultEvent) - - return Worker - { workerId - , workerName - , workerAsync - , workerCreationTime - , workerSpec - } - --- | Internal function used to send a proper "CapatazEvent" to the "notifyEvent" --- callback, this event can either be a @WorkerStarted@ or a @WorkerRestarted@ -notifyWorkerStarted :: Maybe (WorkerId, Int) -> CapatazEnv -> Worker -> IO () -notifyWorkerStarted mRestartInfo CapatazEnv { capatazId, capatazName, notifyEvent } Worker { workerId, workerName, workerAsync } - = do - eventTime <- getCurrentTime - case mRestartInfo of - Just (_workerId, workerRestartCount) -> notifyEvent WorkerRestarted - { capatazId - , capatazName - , workerId - , workerName - , workerRestartCount - , workerThreadId = asyncThreadId workerAsync - , eventTime - } - Nothing -> notifyEvent WorkerStarted - { capatazId - , capatazName - , workerId - , workerName - , eventTime - , workerThreadId = asyncThreadId workerAsync - } - --- | Internal function that forks a worker thread on the Capataz thread; note --- this is different from the public @forkWorker@ function which sends a message --- to the capataz loop -forkWorker - :: CapatazEnv -> WorkerSpec -> Maybe (WorkerId, RestartCount) -> IO Worker -forkWorker env workerSpec mRestartInfo = do - (workerId, restartCount) <- case mRestartInfo of - Just (workerId, restartCount) -> pure (workerId, restartCount) - Nothing -> (,) <$> UUID.nextRandom <*> pure 0 - - worker <- workerMain env workerSpec workerId restartCount - notifyWorkerStarted mRestartInfo env worker - return worker - --- | Internal function that forks a worker thread on the Capataz thread; note --- this is different from the public @forkWorker@ function which sends a message --- to the capataz loop -terminateWorker - :: Text -- ^ Text that indicates why there is a termination - -> CapatazEnv - -> Worker - -> IO () -terminateWorker workerTerminationReason CapatazEnv { capatazId, capatazName, notifyEvent } Worker { workerId, workerName, workerSpec, workerAsync } - = do - let WorkerSpec { workerTerminationPolicy } = workerSpec - case workerTerminationPolicy of - Infinity -> cancelWith - workerAsync - TerminateWorkerException {workerId , workerTerminationReason } - - BrutalTermination -> cancelWith - workerAsync - BrutallyTerminateWorkerException {workerId , workerTerminationReason } - - TimeoutMillis millis -> race_ - ( do - threadDelay (millis * 1000) - cancelWith - workerAsync - BrutallyTerminateWorkerException - { workerId - , workerTerminationReason - } - ) - ( cancelWith - workerAsync - TerminateWorkerException {workerId , workerTerminationReason } - ) - - eventTime <- getCurrentTime - notifyEvent WorkerTerminated - { capatazId - , capatazName - , eventTime - , workerId - , workerName - , workerThreadId = asyncThreadId workerAsync - , terminationReason = workerTerminationReason - } - - --- | Internal sub-routine that terminates workers of a Capataz, used when a --- Capataz instance is terminated -terminateWorkers :: Text -> CapatazEnv -> IO () -terminateWorkers terminationReason env@CapatazEnv { capatazName, capatazId, capatazWorkerTerminationOrder, notifyEvent } - = do - eventTime <- getCurrentTime - workerMap <- readWorkerMap env - - let workers = - sortWorkersByTerminationOrder capatazWorkerTerminationOrder workerMap - - notifyEvent WorkersTerminationStarted - { capatazName - , capatazId - , terminationReason - , eventTime - } - - forM_ workers (terminateWorker terminationReason env) - - notifyEvent WorkersTerminationFinished - { capatazName - , capatazId - , terminationReason - , eventTime - } diff --git a/stack.yaml b/stack.yaml index 795f3ba..b827cce 100644 --- a/stack.yaml +++ b/stack.yaml @@ -15,7 +15,7 @@ # resolver: # name: custom-snapshot # location: "./custom-snapshot.yaml" -resolver: lts-10.2 +resolver: lts-10.3 # User packages to be built. # Various formats can be used as shown in the example below. @@ -37,11 +37,14 @@ resolver: lts-10.2 # will not be run. This is useful for tweaking upstream packages. packages: - . -- examples/capataz-example +- examples/capataz-simple-example +- examples/capataz-repo-watcher # Dependency packages to be pulled from upstream that are not in the resolver # (e.g., acme-missiles-0.3) extra-deps: - teardown-0.3.0.0 +- rio-0.0.0.0 +- unliftio-0.2.4.0 # Override default flag values for local packages and extra-deps flags: {} diff --git a/test/testsuite/Control/Concurrent/Capataz/SupervisorTest.hs b/test/testsuite/Control/Concurrent/Capataz/SupervisorTest.hs new file mode 100644 index 0000000..3c70d4c --- /dev/null +++ b/test/testsuite/Control/Concurrent/Capataz/SupervisorTest.hs @@ -0,0 +1,147 @@ +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +module Control.Concurrent.Capataz.SupervisorTest where + +import Protolude + +import Control.Concurrent.Capataz (set) +import qualified Control.Concurrent.Capataz as SUT +import Test.Util + +import Test.Tasty (TestTree, testGroup) +import Test.Tasty.HUnit (testCase) + +tests :: TestTree +tests = testGroup + "supervision trees" + [ testCase "initialize and teardown of supervision tree works as expected" + $ testCapatazStreamWithOptions + ( & set + SUT.supervisorProcessSpecListL + [ SUT.supervisorSpec + "tree-1" + ( set + SUT.supervisorProcessSpecListL + [ SUT.workerSpecWithDefaults "1-A" + (forever $ threadDelay 10001000) + , SUT.workerSpecWithDefaults "1-B" + (forever $ threadDelay 10001000) + ] + ) + , SUT.supervisorSpec + "tree-2" + ( set + SUT.supervisorProcessSpecListL + [ SUT.workerSpecWithDefaults "2-A" + (forever $ threadDelay 10001000) + , SUT.workerSpecWithDefaults "2-B" + (forever $ threadDelay 10001000) + ] + ) + ] + ) + [ andP [assertSupervisorName "tree-1", assertWorkerStarted "1-A"] + , andP [assertSupervisorName "tree-1", assertWorkerStarted "1-B"] + , andP + [ assertSupervisorName "tree-1" + , assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Initializing SUT.Running + ] + , andP [assertSupervisorName "tree-2", assertWorkerStarted "2-A"] + , andP [assertSupervisorName "tree-2", assertWorkerStarted "2-B"] + , andP + [ assertSupervisorName "tree-2" + , assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Initializing SUT.Running + ] + , andP + [ assertRootSupervisor + , assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Initializing SUT.Running + ] + ] + (const $ return ()) + [] + [ andP + [ assertRootSupervisor + , assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Running SUT.Halting + ] + , andP [assertRootSupervisor, assertEventType ProcessTerminationStarted] + , andP [assertSupervisorName "tree-1", assertWorkerTerminated "1-A"] + , andP [assertSupervisorName "tree-1", assertWorkerTerminated "1-B"] + , andP [assertSupervisorName "tree-2", assertWorkerTerminated "2-A"] + , andP [assertSupervisorName "tree-2", assertWorkerTerminated "2-B"] + , andP + [assertRootSupervisor, assertEventType ProcessTerminationFinished] + , andP + [ assertRootSupervisor + , assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Halting SUT.Halted + ] + ] + Nothing + , testCase "supervision sub-tree gets restarted on failure" $ do + failingAction <- mkFailingSubRoutine 2 + testCapatazStreamWithOptions + ( set SUT.supervisorIntensityL 3 . set + SUT.supervisorProcessSpecListL + [ SUT.supervisorSpec + "tree-1" + ( set SUT.supervisorIntensityL 1 + . set SUT.supervisorPeriodSecondsL 10 + . set + SUT.supervisorProcessSpecListL + [SUT.workerSpecWithDefaults "failing-worker" failingAction] + ) + ] + ) + [] + (const $ threadDelay 1000) + [ assertWorkerFailed "failing-worker" + , assertSupervisorFailed "tree-1" + , assertWorkerStarted "failing-worker" + , assertSupervisorRestarted "tree-1" + ] + [] + Nothing + , testCase "AllForOne strategy restarts sibling supervision tree" $ do + failingAction <- mkFailingSubRoutine 2 + testCapatazStreamWithOptions + ( set SUT.supervisorIntensityL 3 + . set SUT.supervisorRestartStrategyL SUT.AllForOne + . set + SUT.supervisorProcessSpecListL + [ SUT.supervisorSpec + "tree-1" + ( set SUT.supervisorIntensityL 1 + . set SUT.supervisorPeriodSecondsL 10 + . set + SUT.supervisorProcessSpecListL + [SUT.workerSpecWithDefaults "failing-worker" failingAction] + ) + , SUT.supervisorSpec + "tree-2" + ( set + SUT.supervisorProcessSpecListL + [ SUT.workerSpec "stable-worker" + (forever $ threadDelay 1000100) + identity + ] + ) + ] + ) + [] + (const $ threadDelay 9000) + [ assertWorkerFailed "failing-worker" + , assertWorkerStarted "stable-worker" + , assertSupervisorFailed "tree-1" + , assertSupervisorRestarted "tree-1" + , assertSupervisorTerminated "tree-2" + , assertWorkerStarted "stable-worker" + , assertSupervisorRestarted "tree-2" + ] + [] + Nothing + ] diff --git a/test/testsuite/Control/Concurrent/CapatazTest.hs b/test/testsuite/Control/Concurrent/CapatazTest.hs index 36edf2a..83d4a29 100644 --- a/test/testsuite/Control/Concurrent/CapatazTest.hs +++ b/test/testsuite/Control/Concurrent/CapatazTest.hs @@ -22,522 +22,202 @@ module Control.Concurrent.CapatazTest (tests) where import Protolude -import qualified Data.Text as T - -import Data.IORef (atomicModifyIORef', newIORef, readIORef, writeIORef) -import Text.Show.Pretty (ppShow) - import Test.Tasty (TestTree, testGroup) -import Test.Tasty.HUnit (assertBool, assertFailure, testCase) +import Test.Tasty.HUnit (testCase) -import Control.Concurrent.STM.TQueue (newTQueueIO, readTQueue, writeTQueue) -import Control.Concurrent.STM.TVar (modifyTVar', newTVarIO, readTVar) +import Control.Concurrent.STM.TVar (modifyTVar', newTVarIO, readTVar) +import Control.Concurrent.Capataz (set) import qualified Control.Concurrent.Capataz as SUT --------------------------------------------------------------------------------- --- Util - --- | Utility function that gets the type name of a Record through it's Show --- output. -fetchRecordName :: Show a => a -> Text -fetchRecordName = T.takeWhile (/= ' ') . show - --- | Composes two predicate functions together with a boolean AND -andP :: [a -> Bool] -> a -> Bool -andP predList a = all ($ a) predList - --------------------------------------------------------------------------------- --- Assertions and Testers - --- | This record duplicate the same event names as the ones found in the --- "CapatazEvent" type, we use this to avoid using Text comparisons on assertion --- helper functions. The "CapatazEvent" record is imported qualified, so there --- is no conflict happening. -data EventType - = InvalidCapatazStatusReached - | CapatazStatusChanged - | WorkerTerminated - | WorkerStarted - | WorkerRestarted - | WorkerCompleted - | WorkerFailed - | WorkerCallbackExecuted - | WorkersTerminationStarted - | WorkersTerminationFinished - | CapatazFailed - | CapatazTerminated - deriving (Show) - --- | Predicate function to assert "CapatazEvent" types -assertEventType :: EventType -> SUT.CapatazEvent -> Bool -assertEventType evType ev = fetchRecordName ev == show evType - --- | Predicate function to assert "CapatazEvent" worker name -assertWorkerName :: Text -> SUT.CapatazEvent -> Bool -assertWorkerName workerName' ev = case ev of - SUT.WorkerRestarted { workerName } -> workerName' == workerName - SUT.WorkerFailed { workerName } -> workerName' == workerName - SUT.WorkerTerminated { workerName } -> workerName' == workerName - SUT.WorkerStarted { workerName } -> workerName' == workerName - _ -> False - --- | Predicate function to assert type of an error inside a "CapatazEvent" -assertErrorType :: Text -> SUT.CapatazEvent -> Bool -assertErrorType errType ev = case ev of - SUT.WorkerFailed { workerError } -> fetchRecordName workerError == errType - SUT.CapatazFailed { capatazError } -> fetchRecordName capatazError == errType - SUT.WorkerCallbackExecuted { workerCallbackError } -> - case workerCallbackError of - Nothing -> False - Just originalError -> fetchRecordName originalError == errType - _ -> False - --- | Predicate function to assert type of callback executed inside a --- "CapatazEvent" -assertCallbackType :: SUT.CallbackType -> SUT.CapatazEvent -> Bool -assertCallbackType cbType ev = case ev of - SUT.WorkerFailed { workerError } -> case fromException workerError of - Just SUT.WorkerCallbackFailed { callbackType } -> cbType == callbackType - _ -> False - SUT.WorkerCallbackExecuted { callbackType } -> cbType == callbackType - _ -> False - --- | Predicate function to assert restart count inside a "CapatazEvent" -assertRestartCount :: (Int -> Bool) -> SUT.CapatazEvent -> Bool -assertRestartCount predFn ev = case ev of - SUT.WorkerRestarted { workerRestartCount } -> predFn workerRestartCount - _ -> False - --- | Predicate function to assert a Capataz status change -assertCapatazStatusChanged - :: SUT.CapatazStatus -> SUT.CapatazStatus -> SUT.CapatazEvent -> Bool -assertCapatazStatusChanged fromEv toEv ev = case ev of - SUT.CapatazStatusChanged { prevCapatazStatus, newCapatazStatus } -> - fromEv == prevCapatazStatus && toEv == newCapatazStatus - _ -> False - --- | Predicate function to assert a worker was started -assertWorkerStarted :: Text -> SUT.CapatazEvent -> Bool -assertWorkerStarted workerName = - andP [assertEventType WorkerStarted, assertWorkerName workerName] - --- | Predicate function to assert a worker was terminated -assertWorkerTerminated :: Text -> SUT.CapatazEvent -> Bool -assertWorkerTerminated workerName = - andP [assertEventType WorkerTerminated, assertWorkerName workerName] - --- | Predicate function to assert a capataz thread failed with error type -assertCapatazFailedWith :: Text -> SUT.CapatazEvent -> Bool -assertCapatazFailedWith errorName = - andP [assertEventType CapatazFailed, assertErrorType errorName] - --------------------------------------------------------------------------------- - --- | Exception used to test failures inside Worker sub-routines -data RestartingWorkerError - = RestartingWorkerError - deriving (Show) - -instance Exception RestartingWorkerError - --- | Exception used to test failures inside Worker callback sub-routines -data TimeoutError - = TimeoutError - deriving (Show) - -instance Exception TimeoutError - --- | Utility function to create a Worker sub-routine that fails at least a --- number of times -mkFailingSubRoutine - :: Int -- ^ Number of times the Worker sub-routine will fail - -> IO (IO ()) -- ^ Sub-routine used on worker creation -mkFailingSubRoutine failCount = do - countRef <- newIORef failCount - let subRoutine = do - shouldFail <- atomicModifyIORef' countRef - (\count -> (pred count, count > 0)) - when shouldFail (throwIO RestartingWorkerError) - - return subRoutine - --- | A sub-routine that will complete for `initCount` amount of times. This --- function works great when testing `Permanent` strategies, as you would like --- to assert restart events once (if it keeps completing it will fill up the log --- with restart events) -mkCompletingBeforeNRestartsSubRoutine :: Int -> IO (IO ()) -mkCompletingBeforeNRestartsSubRoutine initCount = do - countRef <- newIORef initCount - let subRoutine = do - shouldStop <- atomicModifyIORef' countRef - (\count -> (pred count, count > 0)) - if shouldStop then return () else forever $ threadDelay 1000100 - return subRoutine - --- | A sub-routine that will complete once. This function works great when --- testing `Permanent` strategies, as you would like to assert restart events --- once (if it keeps completing it will fill up the log with restart events) -mkCompletingOnceSubRoutine :: IO (IO ()) -mkCompletingOnceSubRoutine = mkCompletingBeforeNRestartsSubRoutine 1 - --- | Utility function to build a test environment for a Capataz execution. --- It is composed by: --- --- * List of assertions that represent events that should be triggered by the --- capataz instance in order --- --- * A function to modify the default "CapatazOptions", this utility function injects --- a special @notifyEvent@ callback to execute given assertions. -testCapatazStreamWithOptions - :: [SUT.CapatazEvent -> Bool] -- ^ Assertions happening before setup function - -- is called - -> (SUT.CapatazOptions -> SUT.CapatazOptions) -- ^ Function to modify default - -- @CapatazOptions@ - -> (SUT.Capataz -> IO ()) -- ^ Function used to test public the supervisor - -- public API (a.k.a setup function) - -> [SUT.CapatazEvent -> Bool] -- ^ Assertions happening after the setup - -- function - -> [SUT.CapatazEvent -> Bool] -- ^ Assertions happening after the capataz - -- record is terminated - -> Maybe (SUT.CapatazEvent -> Bool) -- ^ An assertion checked across all - -- @CapatazEvents@ that happened in a - -- test, great when testing that an event - -- __did not__ happen - -> IO () -testCapatazStreamWithOptions preSetupAssertion optionModFn setupFn postSetupAssertions postTeardownAssertions mAllEventsAssertion - = do - - eventStream <- newTQueueIO - accRef <- newIORef [] - pendingCountVar <- newIORef - ( sum $ fmap - length - [preSetupAssertion, postSetupAssertions, postTeardownAssertions] - ) - - capataz <- SUT.forkCapataz $ (optionModFn SUT.defCapatazOptions) - { SUT.notifyEvent = trackEvent accRef eventStream - } - - -- We check preSetup assertions are met before we execute the setup - -- function. This serves to test initialization of capataz instance - runAssertions "PRE-SETUP" - (eventStream, accRef) - pendingCountVar - preSetupAssertion - capataz - - -- We execute the setup sub-routine, which is going to use the Capataz public - -- API to assert events - setupResult <- try (setupFn capataz) - - case setupResult of - -- If the sub-routine fails, show exception - Left err -> assertFailure (show (err :: SomeException)) - Right _ -> do - -- We now run post-setup assertions - runAssertions "POST-SETUP" - (eventStream, accRef) - pendingCountVar - postSetupAssertions - capataz - - -- We now shutdown the capataz instance - void $ SUT.teardown capataz - - -- We run assertions for after the capataz has been shut down - runAssertions "POST-TEARDOWN" - (eventStream, accRef) - pendingCountVar - postTeardownAssertions - capataz - - -- Lastly, we check if there is a function that we want to execute - -- across all events that happened in the test, this is to assert the - -- absence of an event - case mAllEventsAssertion of - Nothing -> return () - Just allEventsAssertion -> do - events <- reverse <$> readIORef accRef - assertBool - ( "On AFTER-TEST, expected all events to match predicate, but didn't (" - <> show (length events) - <> " events tried)\n" - <> ppShow (zip ([0 ..] :: [Int]) events) - ) - (all allEventsAssertion events) - where - -- Utility functions that runs the readEventLoop function with a timeout - -- of a second, this way we can guarantee assertions are met without having - -- to add @threadDelays@ to the test execution - runAssertions stageName (eventStream, accRef) pendingCountVar assertions capataz - = do - raceResult <- race - (threadDelay 1000100) - (readEventLoop eventStream pendingCountVar assertions) - case raceResult of - Left _ -> do - events <- reverse <$> readIORef accRef - pendingCount <- readIORef pendingCountVar - void $ SUT.teardown capataz - assertFailure - ( "On " - <> stageName - <> " stage, expected all assertions to match, but didn't (" - <> show pendingCount - <> " assertions remaining, " - <> show (length events) - <> " events tried)\n" - <> ppShow (zip ([0 ..] :: [Int]) events) - ) - Right _ -> return () - - - -- Sub-routine that accumulates all events that have happened in the Capataz - -- instance so far - trackEvent accRef eventStream event = do - atomicModifyIORef' accRef (\old -> (event : old, ())) - atomically $ writeTQueue eventStream event - - -- Sub-routine that reads the event stream, and ensures that all assertions - -- are executed, this loop won't stop until all assertions are met - readEventLoop eventStream pendingCount assertions = do - writeIORef pendingCount (length assertions) - case assertions of - [] -> return () - (assertionFn:assertions1) -> do - event <- atomically $ readTQueue eventStream - if assertionFn event - then readEventLoop eventStream pendingCount assertions1 - else readEventLoop eventStream pendingCount assertions - - --- | A version of "testCapatazStreamWithOptions" that does not receive the --- function that modifies a "CapatazOptions" record. -testCapatazStream - :: [SUT.CapatazEvent -> Bool] -- ^ Assertions happening before setup function - -- is called - -> (SUT.Capataz -> IO ()) -- ^ Function used to test public the supervisor - -- public API (a.k.a setup function) - -> [SUT.CapatazEvent -> Bool] -- ^ Assertions happening after the setup - -- function - -> [SUT.CapatazEvent -> Bool] -- ^ Assertions happening after the capataz - -- record is terminated - -> Maybe (SUT.CapatazEvent -> Bool) -- ^ An assertion checked across all - -- @CapatazEvents@ that happened in a - -- test, great when testing that an event - -- __did not__ happen - -> IO () -testCapatazStream preSetupAssertions = - testCapatazStreamWithOptions preSetupAssertions identity +import Test.Util -------------------------------------------------------------------------------- -- Actual Tests -tests :: [TestTree] -tests - = [ testGroup - "capataz without workerSpecList" - [ testCase "initialize and teardown works as expected" $ testCapatazStream - [ andP - [ assertEventType CapatazStatusChanged - , assertCapatazStatusChanged SUT.Initializing SUT.Running - ] +tests :: TestTree +tests = testGroup + "capataz core" + [ testGroup + "capataz without workerOptionsList" + [ testCase "initialize and teardown works as expected" $ testCapatazStream + [ andP + [ assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Initializing SUT.Running + ] + ] + (const $ return ()) + [] + [ andP + [ assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Running SUT.Halting + ] + , andP + [ assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Halting SUT.Halted + ] + ] + Nothing + ] + , testGroup + "capataz with processSpecList" + [ testCase "initialize and teardown of workers works as expected" + $ testCapatazStreamWithOptions + ( set + SUT.supervisorProcessSpecListL + [ SUT.workerSpec "A" (forever $ threadDelay 10001000) identity + , SUT.workerSpec "B" (forever $ threadDelay 10001000) identity + ] + ) + [ assertWorkerStarted "A" + , assertWorkerStarted "B" + , andP + [ assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Initializing SUT.Running + ] ] (const $ return ()) [] [ andP - [ assertEventType CapatazStatusChanged - , assertCapatazStatusChanged SUT.Running SUT.Halting + [ assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Running SUT.Halting ] + , assertEventType ProcessTerminationStarted + , assertWorkerTerminated "A" + , assertWorkerTerminated "B" + , assertEventType ProcessTerminationFinished , andP - [ assertEventType CapatazStatusChanged - , assertCapatazStatusChanged SUT.Halting SUT.Halted + [ assertEventType SupervisorStatusChanged + , assertSupervisorStatusChanged SUT.Halting SUT.Halted ] ] Nothing - ] - , testGroup - "capataz with workerSpecList" - [ testCase "initialize and teardown works as expected" - $ testCapatazStreamWithOptions - [ assertWorkerStarted "A" - , assertWorkerStarted "B" - , andP - [ assertEventType CapatazStatusChanged - , assertCapatazStatusChanged SUT.Initializing SUT.Running - ] - ] - ( \supOptions -> supOptions - { SUT.capatazWorkerSpecList = [ SUT.defWorkerSpec - { SUT.workerName = "A" - , SUT.workerAction = forever - (threadDelay 10001000) - } - , SUT.defWorkerSpec - { SUT.workerName = "B" - , SUT.workerAction = forever - (threadDelay 10001000) - } - ] - } - ) - (const $ return ()) - [] - [ andP - [ assertEventType CapatazStatusChanged - , assertCapatazStatusChanged SUT.Running SUT.Halting - ] - , assertEventType WorkersTerminationStarted - , assertWorkerTerminated "A" - , assertWorkerTerminated "B" - , assertEventType WorkersTerminationFinished - , andP - [ assertEventType CapatazStatusChanged - , assertCapatazStatusChanged SUT.Halting SUT.Halted - ] - ] - Nothing - ] - , testCase "reports error when capataz thread receives async exception" - $ testCapatazStream - [ andP - [ assertEventType CapatazStatusChanged - , assertCapatazStatusChanged SUT.Initializing SUT.Running - ] - ] - ( \SUT.Capataz { capatazAsync } -> do - threadDelay 100 -- leave enough room for capataz to start - cancelWith capatazAsync (ErrorCall "async exception") - ) - [assertEventType CapatazFailed] - [] - Nothing , testCase "reports error when worker retries violate restart intensity" $ do lockVar <- newEmptyMVar let (signalIntensityReached, waitTillIntensityReached) = (putMVar lockVar (), takeMVar lockVar) testCapatazStreamWithOptions + (set SUT.supervisorOnIntensityReachedL signalIntensityReached) [] - ( \supOptions -> supOptions - { SUT.onCapatazIntensityReached = signalIntensityReached - } - ) ( \capataz -> do - _workerId <- SUT.forkWorker SUT.defWorkerOptions - (throwIO RestartingWorkerError) - capataz + _workerId <- SUT.forkWorker + ( SUT.buildWorkerOptions "test-worker" + (throwIO RestartingWorkerError) + identity + ) + capataz waitTillIntensityReached ) - [ assertEventType WorkerFailed - , assertEventType WorkerFailed - , assertEventType WorkerFailed - , assertCapatazFailedWith "CapatazIntensityReached" + [ assertEventType ProcessFailed + , assertEventType ProcessFailed + , assertEventType ProcessFailed + , assertCapatazFailedWith "SupervisorIntensityReached" ] [] Nothing , testGroup - "single supervised IO sub-routine" + "single supervised worker" [ testGroup "callbacks" [ testGroup "workerOnCompletion" - [ testCase "does execute callback when sub-routine is completed" + [ testCase + "does execute callback when worker sub-routine is completed" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - } + ( SUT.buildWorkerOptions + "test-worker" + (return ()) + (set SUT.workerRestartStrategyL SUT.Temporary) ) - (return ()) capataz return () ) [ andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnCompletion ] - , assertEventType WorkerCompleted + , assertEventType ProcessCompleted ] [] Nothing - , testCase "does not execute callback when sub-routine fails" + , testCase "does not execute callback when worker sub-routine fails" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - } + ( SUT.buildWorkerOptions "test-worker" + (throwIO RestartingWorkerError) + identity ) - (throwIO RestartingWorkerError) capataz return () ) [ andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnFailure ] - , assertEventType WorkerFailed + , assertEventType ProcessFailed ] [assertEventType CapatazTerminated] ( Just $ not . andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnCompletion ] ) - , testCase "does not execute callback when sub-routine is terminated" + , testCase + "does not execute callback when worker sub-routine is terminated" $ testCapatazStream [] ( \capataz -> do workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - } + ( SUT.buildWorkerOptions + "test-worker" + (forever $ threadDelay 1000100) + (set SUT.workerRestartStrategyL SUT.Temporary) ) - (forever $ threadDelay 1000100) capataz - _workerId <- SUT.terminateWorker + _workerId <- void $ SUT.terminateProcess "testing onCompletion callback" workerId capataz return () ) - [assertEventType WorkerTerminated] + [assertEventType ProcessTerminated] [assertEventType CapatazTerminated] ( Just $ not . andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnCompletion ] ) - , testCase "treats as sub-routine failed if callback fails" + , testCase "treats as worker sub-routine failed if callback fails" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - , SUT.workerOnCompletion = throwIO TimeoutError - } + ( SUT.buildWorkerOptions + "test-worker" + (return ()) + ( set SUT.workerRestartStrategyL SUT.Temporary + . set SUT.workerOnCompletionL (throwIO TimeoutError) + ) ) - (return ()) capataz return () ) [ andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnCompletion , assertErrorType "TimeoutError" ] , andP - [ assertEventType WorkerFailed - , assertErrorType "WorkerCallbackFailed" + [ assertEventType ProcessFailed + , assertErrorType "ProcessCallbackFailed" ] ] [] @@ -545,102 +225,107 @@ tests ] , testGroup "workerOnFailure" - [ testCase "does execute callback when sub-routine fails" + [ testCase "does execute callback when worker sub-routine fails" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - } + ( SUT.buildWorkerOptions + "test-worker" + (throwIO RestartingWorkerError) + (set SUT.workerRestartStrategyL SUT.Temporary) ) - (throwIO RestartingWorkerError) capataz return () ) [ andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnFailure ] - , assertEventType WorkerFailed + , assertEventType ProcessFailed ] [assertEventType CapatazTerminated] Nothing - , testCase "does not execute callback when sub-routine is completed" + , testCase + "does not execute callback when worker sub-routine is completed" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - } + ( SUT.buildWorkerOptions + "test-worker" + (return ()) + (set SUT.workerRestartStrategyL SUT.Temporary) ) - (return ()) capataz return () ) [ andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnCompletion ] - , assertEventType WorkerCompleted + , assertEventType ProcessCompleted ] [] ( Just $ not . andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnFailure ] ) - , testCase "does not execute callback when sub-routine is terminated" + , testCase + "does not execute callback when worker sub-routine is terminated" $ testCapatazStream [] ( \capataz -> do workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - } + ( SUT.buildWorkerOptions + "test-worker" + (forever $ threadDelay 1000100) + (set SUT.workerRestartStrategyL SUT.Temporary) ) - (forever $ threadDelay 1000100) capataz - SUT.terminateWorker "testing onFailure callback" - workerId - capataz + void $ SUT.terminateProcess "testing onFailure callback" + workerId + capataz ) - [assertEventType WorkerTerminated] + [assertEventType ProcessTerminated] [] ( Just $ not . andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnFailure ] ) - , testCase "treats as sub-routine failed if callback fails" + , testCase "treats as worker sub-routine failed if callback fails" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - , SUT.workerOnFailure = const $ throwIO TimeoutError - } + ( SUT.buildWorkerOptions + "test-worker" + (throwIO RestartingWorkerError) + ( set SUT.workerRestartStrategyL SUT.Temporary + . set + SUT.workerOnFailureL + (const $ throwIO TimeoutError) + ) ) - (throwIO RestartingWorkerError) capataz return () ) [ andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnFailure , assertErrorType "TimeoutError" ] , andP - [ assertEventType WorkerFailed - , assertErrorType "WorkerCallbackFailed" + [ assertEventType ProcessFailed + , assertErrorType "ProcessCallbackFailed" ] ] [] @@ -654,121 +339,129 @@ tests [] ( \capataz -> do workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - , SUT.workerTerminationPolicy = SUT.TimeoutMillis 1 - , SUT.workerOnTermination = forever $ threadDelay 100100 - } + ( SUT.buildWorkerOptions + "test-worker" + (forever $ threadDelay 10001000) + ( set SUT.workerRestartStrategyL SUT.Temporary + . set SUT.workerTerminationPolicyL (SUT.TimeoutMillis 1) + . set SUT.workerOnTerminationL + (forever $ threadDelay 100100) + ) ) - (forever $ threadDelay 10001000) capataz - SUT.terminateWorker "testing workerOnTermination callback" - workerId - capataz + void $ SUT.terminateProcess + "testing workerOnTermination callback" + workerId + capataz ) [ andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnTermination - , assertErrorType "BrutallyTerminateWorkerException" + , assertErrorType "BrutallyTerminateProcessException" ] , andP - [ assertEventType WorkerFailed - , assertErrorType "WorkerCallbackFailed" + [ assertEventType ProcessFailed , assertCallbackType SUT.OnTermination + , assertErrorType "ProcessCallbackFailed" ] ] [] Nothing - , testCase "does execute callback when sub-routine is terminated" + , testCase + "does execute callback when worker sub-routine is terminated" $ testCapatazStream [] ( \capataz -> do workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - } + ( SUT.buildWorkerOptions + "test-worker" + (forever $ threadDelay 1000100) + (set SUT.workerRestartStrategyL SUT.Temporary) ) - (forever $ threadDelay 1000100) capataz - SUT.terminateWorker "testing workerOnTermination callback" - workerId - capataz + void $ SUT.terminateProcess + "testing workerOnTermination callback" + workerId + capataz ) [ andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnTermination ] - , assertEventType WorkerTerminated + , assertEventType ProcessTerminated ] [assertEventType CapatazTerminated] Nothing - , testCase "does not execute callback when sub-routine is completed" + , testCase + "does not execute callback when worker sub-routine is completed" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - } + ( SUT.buildWorkerOptions + "test-worker" + (return ()) + (set SUT.workerRestartStrategyL SUT.Temporary) ) - (return ()) capataz return () ) - [assertEventType WorkerCompleted] + [assertEventType ProcessCompleted] [] ( Just $ not . andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnTermination ] ) - , testCase "does not execute callback when sub-routine fails" + , testCase "does not execute callback when worker sub-routine fails" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - } + ( SUT.buildWorkerOptions + "test-worker" + (throwIO (ErrorCall "surprise!")) + (set SUT.workerRestartStrategyL SUT.Temporary) ) - (throwIO (ErrorCall "surprise!")) capataz return () ) - [assertEventType WorkerFailed] + [assertEventType ProcessFailed] [] ( Just $ not . andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnTermination ] ) - , testCase "treats as sub-routine failed if callback fails" + , testCase "treats as worker sub-routine failed if callback fails" $ testCapatazStream [] ( \capataz -> do workerId <- SUT.forkWorker - ( SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Temporary - , SUT.workerOnTermination = throwIO TimeoutError - } + ( SUT.buildWorkerOptions + "test-worker" + (forever $ threadDelay 10001000) + ( set SUT.workerRestartStrategyL SUT.Temporary + . set SUT.workerOnTerminationL (throwIO TimeoutError) + ) ) - (forever $ threadDelay 10001000) capataz - SUT.terminateWorker "testing workerOnTermination callback" - workerId - capataz + void $ SUT.terminateProcess + "testing workerOnTermination callback" + workerId + capataz ) [ andP - [ assertEventType WorkerCallbackExecuted + [ assertEventType ProcessCallbackExecuted , assertCallbackType SUT.OnTermination , assertErrorType "TimeoutError" ] , andP - [ assertEventType WorkerFailed - , assertErrorType "WorkerCallbackFailed" + [ assertEventType ProcessFailed + , assertErrorType "ProcessCallbackFailed" ] ] [] @@ -777,304 +470,346 @@ tests ] , testGroup "with transient strategy" - [ testCase "does not restart on completion" $ testCapatazStream + [ testCase "does not restart worker on completion" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerRestartStrategy = SUT.Transient } - (return ()) + ( SUT.buildWorkerOptions + "test-worker" + (return ()) + (set SUT.workerRestartStrategyL SUT.Transient) + ) capataz return () ) - [assertEventType WorkerStarted, assertEventType WorkerCompleted] + [assertEventType ProcessStarted, assertEventType ProcessCompleted] [assertEventType CapatazTerminated] - (Just $ not . assertEventType WorkerRestarted) - , testCase "does not restart on termination" $ testCapatazStream + (Just $ not . assertEventType ProcessRestarted) + , testCase "does not restart worker on termination" $ testCapatazStream [] ( \capataz -> do workerId <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerRestartStrategy = SUT.Transient } - (forever $ threadDelay 1000100) + ( SUT.buildWorkerOptions + "test-worker" + (forever $ threadDelay 1000100) + (set SUT.workerRestartStrategyL SUT.Transient) + ) capataz - SUT.terminateWorker "termination test (1)" workerId capataz + void $ SUT.terminateProcess "termination test (1)" workerId capataz ) - [assertEventType WorkerTerminated] + [assertEventType ProcessTerminated] [assertEventType CapatazTerminated] - (Just $ not . assertEventType WorkerRestarted) + (Just $ not . assertEventType ProcessRestarted) , testCase "does restart on failure" $ testCapatazStream [] ( \capataz -> do subRoutineAction <- mkFailingSubRoutine 1 _workerId <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerRestartStrategy = SUT.Transient } - subRoutineAction + ( SUT.buildWorkerOptions + "test-worker" + subRoutineAction + (set SUT.workerRestartStrategyL SUT.Transient) + ) capataz return () ) - [ assertEventType WorkerStarted - , assertEventType WorkerFailed - , andP [assertEventType WorkerRestarted, assertRestartCount (== 1)] + [ assertEventType ProcessStarted + , assertEventType ProcessFailed + , andP [assertEventType ProcessRestarted, assertRestartCount (== 1)] ] [] Nothing - , testCase "does increase restart count on multiple failures" + , testCase "does increase restart count on multiple worker failures" $ testCapatazStream [] ( \capataz -> do subRoutineAction <- mkFailingSubRoutine 2 _workerId <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Transient - } - subRoutineAction + ( SUT.buildWorkerOptions + "test-worker" + subRoutineAction + (set SUT.workerRestartStrategyL SUT.Transient) + ) capataz return () ) [ andP - [assertEventType WorkerRestarted, assertRestartCount (== 1)] + [assertEventType ProcessRestarted, assertRestartCount (== 1)] , andP - [assertEventType WorkerRestarted, assertRestartCount (== 2)] + [assertEventType ProcessRestarted, assertRestartCount (== 2)] ] [] Nothing ] , testGroup "with permanent strategy" - [ testCase "does restart on completion" $ testCapatazStream + [ testCase "does restart worker on completion" $ testCapatazStream [] ( \capataz -> do subRoutineAction <- mkCompletingOnceSubRoutine _workerId <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerRestartStrategy = SUT.Permanent } - subRoutineAction + ( SUT.buildWorkerOptions + "test-worker" + subRoutineAction + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz return () ) - [ assertEventType WorkerStarted - , assertEventType WorkerCompleted - , assertEventType WorkerRestarted + [ assertEventType ProcessStarted + , assertEventType ProcessCompleted + , assertEventType ProcessRestarted ] [assertEventType CapatazTerminated] Nothing - , testCase "does not increase restart count on multiple completions" + , testCase + "does not increase restart count on multiple worker completions" $ testCapatazStream [] ( \capataz -> do - -- Note the number is two (2) given the assertion list has two `WorkerRestarted` assertions + -- Note the number is two (2) given the assertion list has two `ProcessRestarted` assertions let expectedRestartCount = 2 subRoutineAction <- mkCompletingBeforeNRestartsSubRoutine expectedRestartCount _workerId <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Permanent - } - subRoutineAction + ( SUT.buildWorkerOptions + "test-worker" + subRoutineAction + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz return () ) [ andP - [assertEventType WorkerRestarted, assertRestartCount (== 1)] + [assertEventType ProcessRestarted, assertRestartCount (== 1)] , andP - [assertEventType WorkerRestarted, assertRestartCount (== 1)] + [assertEventType ProcessRestarted, assertRestartCount (== 1)] ] [] Nothing - , testCase "does restart on termination" $ testCapatazStream + , testCase "does restart on worker termination" $ testCapatazStream [] ( \capataz -> do workerId <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerRestartStrategy = SUT.Permanent } - (forever $ threadDelay 10001000) + ( SUT.buildWorkerOptions + "test-worker" + (forever $ threadDelay 10001000) + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz - SUT.terminateWorker "testing termination (1)" workerId capataz + void $ SUT.terminateProcess "testing termination (1)" + workerId + capataz ) - [assertEventType WorkerTerminated, assertEventType WorkerRestarted] + [assertEventType ProcessTerminated, assertEventType ProcessRestarted] [] Nothing - , testCase "does increase restart count on multiple terminations" $ do - terminationCountVar <- newTVarIO (0 :: Int) - let signalWorkerTermination = - atomically (modifyTVar' terminationCountVar (+ 1)) - waitWorkerTermination i = atomically $ do - n <- readTVar terminationCountVar - when (n /= i) retry - testCapatazStream - [] - ( \capataz -> do - workerId <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Permanent - , SUT.workerOnTermination = signalWorkerTermination - } - (forever $ threadDelay 10001000) - capataz + , testCase "does increase restart count on multiple worker terminations" + $ do + terminationCountVar <- newTVarIO (0 :: Int) + let signalWorkerTermination = + atomically (modifyTVar' terminationCountVar (+ 1)) + waitWorkerTermination i = atomically $ do + n <- readTVar terminationCountVar + when (n /= i) retry + testCapatazStream + [] + ( \capataz -> do + workerId <- SUT.forkWorker + ( SUT.buildWorkerOptions + "test-worker" + (forever $ threadDelay 10001000) + ( set SUT.workerRestartStrategyL SUT.Permanent + . set SUT.workerOnTerminationL signalWorkerTermination + ) + ) + capataz - SUT.terminateWorker "testing termination (1)" workerId capataz - waitWorkerTermination 1 - SUT.terminateWorker "testing termination (2)" workerId capataz - waitWorkerTermination 2 - ) - [ assertEventType WorkerTerminated - , andP [assertEventType WorkerRestarted, assertRestartCount (== 1)] - , assertEventType WorkerTerminated - , andP [assertEventType WorkerRestarted, assertRestartCount (== 2)] - ] - [] - Nothing - , testCase "does restart on failure" $ testCapatazStream + void $ SUT.terminateProcess "testing termination (1)" + workerId + capataz + waitWorkerTermination 1 + void $ SUT.terminateProcess "testing termination (2)" + workerId + capataz + waitWorkerTermination 2 + ) + [ assertEventType ProcessTerminated + , andP + [assertEventType ProcessRestarted, assertRestartCount (== 1)] + , assertEventType ProcessTerminated + , andP + [assertEventType ProcessRestarted, assertRestartCount (== 2)] + ] + [] + Nothing + , testCase "does restart on worker failure" $ testCapatazStream [] ( \capataz -> do subRoutineAction <- mkFailingSubRoutine 1 _workerId <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerRestartStrategy = SUT.Permanent } - subRoutineAction + ( SUT.buildWorkerOptions + "test-worker" + subRoutineAction + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz return () ) - [ assertEventType WorkerStarted - , assertEventType WorkerFailed - , andP [assertEventType WorkerRestarted, assertRestartCount (== 1)] + [ assertEventType ProcessStarted + , assertEventType ProcessFailed + , andP [assertEventType ProcessRestarted, assertRestartCount (== 1)] ] [] Nothing - , testCase "does increase restart count on multiple failures" + , testCase "does increase restart count on multiple worker failures" $ testCapatazStream [] ( \capataz -> do subRoutineAction <- mkFailingSubRoutine 2 _workerId <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerRestartStrategy = SUT.Permanent - } - subRoutineAction + ( SUT.buildWorkerOptions + "test-worker" + subRoutineAction + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz return () ) [ andP - [assertEventType WorkerRestarted, assertRestartCount (== 1)] + [assertEventType ProcessRestarted, assertRestartCount (== 1)] , andP - [assertEventType WorkerRestarted, assertRestartCount (== 2)] + [assertEventType ProcessRestarted, assertRestartCount (== 2)] ] [] Nothing ] , testGroup "with temporary strategy" - [ testCase "does not restart on completion" $ testCapatazStream + [ testCase "does not restart on worker completion" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerRestartStrategy = SUT.Temporary } - (return ()) + ( SUT.buildWorkerOptions + "test-worker" + (return ()) + (set SUT.workerRestartStrategyL SUT.Temporary) + ) capataz return () ) - [assertEventType WorkerStarted, assertEventType WorkerCompleted] + [assertEventType ProcessStarted, assertEventType ProcessCompleted] [assertEventType CapatazTerminated] - (Just $ not . assertEventType WorkerRestarted) - , testCase "does not restart on termination" $ testCapatazStream + (Just $ not . assertEventType ProcessRestarted) + , testCase "does not restart on worker termination" $ testCapatazStream [] ( \capataz -> do workerId <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerRestartStrategy = SUT.Temporary } - (forever $ threadDelay 1000100) + ( SUT.buildWorkerOptions + "test-worker" + (forever $ threadDelay 1000100) + (set SUT.workerRestartStrategyL SUT.Temporary) + ) capataz - SUT.terminateWorker "termination test (1)" workerId capataz + void $ SUT.terminateProcess "termination test (1)" workerId capataz threadDelay 100 ) - [assertEventType WorkerStarted, assertEventType WorkerTerminated] + [assertEventType ProcessStarted, assertEventType ProcessTerminated] [assertEventType CapatazTerminated] - (Just $ not . assertEventType WorkerRestarted) - , testCase "does not restart on failure" $ testCapatazStream + (Just $ not . assertEventType ProcessRestarted) + , testCase "does not restart on worker failure" $ testCapatazStream [] ( \capataz -> do _workerId <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerRestartStrategy = SUT.Temporary } - (panic "worker failed!") + ( SUT.buildWorkerOptions + "failing-worker" + (panic "worker failed!") + (set SUT.workerRestartStrategyL SUT.Temporary) + ) capataz threadDelay 100 ) - [assertEventType WorkerStarted, assertEventType WorkerFailed] + [assertEventType ProcessStarted, assertEventType ProcessFailed] [assertEventType CapatazTerminated] - (Just $ not . assertEventType WorkerRestarted) + (Just $ not . assertEventType ProcessRestarted) ] ] , testGroup "multiple supervised IO sub-routines" - [ testCase "terminates all supervised sub-routines on teardown" + [ testCase "terminates all supervised worker sub-routines on teardown" $ testCapatazStream [] ( \capataz -> do _workerA <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerName = "A" - , SUT.workerRestartStrategy = SUT.Permanent - } - (forever $ threadDelay 1000100) + ( SUT.buildWorkerOptions + "A" + (forever $ threadDelay 1000100) + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz _workerB <- SUT.forkWorker - SUT.defWorkerOptions { SUT.workerName = "B" - , SUT.workerRestartStrategy = SUT.Permanent - } - (forever $ threadDelay 1000100) + ( SUT.buildWorkerOptions + "B" + (forever $ threadDelay 1000100) + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz return () ) - [ andP [assertEventType WorkerStarted, assertWorkerName "A"] - , andP [assertEventType WorkerStarted, assertWorkerName "B"] + [ andP [assertEventType ProcessStarted, assertProcessName "A"] + , andP [assertEventType ProcessStarted, assertProcessName "B"] ] - [ andP [assertEventType WorkerTerminated, assertWorkerName "A"] - , andP [assertEventType WorkerTerminated, assertWorkerName "B"] + [ andP [assertEventType ProcessTerminated, assertProcessName "A"] + , andP [assertEventType ProcessTerminated, assertProcessName "B"] , assertEventType CapatazTerminated ] Nothing , testGroup - "with one for one capataz restart strategy" - [ testCase "restarts failing sub-routine only" + "with one for one supervisor restart strategy" + [ testCase "restarts failing worker sub-routine only" $ testCapatazStreamWithOptions + (set SUT.supervisorRestartStrategyL SUT.OneForOne) [] - ( \supOptions -> - supOptions { SUT.capatazRestartStrategy = SUT.OneForOne } - ) ( \capataz -> do _workerA <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "A" - , SUT.workerRestartStrategy = SUT.Temporary - } - (forever $ threadDelay 1000100) + ( SUT.buildWorkerOptions + "A" + (forever $ threadDelay 1000100) + (set SUT.workerRestartStrategyL SUT.Temporary) + ) capataz ioB <- mkFailingSubRoutine 1 _workerB <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "B" - , SUT.workerRestartStrategy = SUT.Permanent - } - (forever $ ioB >> threadDelay 1000100) + ( SUT.buildWorkerOptions + "B" + (forever $ ioB >> threadDelay 1000100) + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz return () ) - [andP [assertEventType WorkerRestarted, assertWorkerName "B"]] + [andP [assertEventType ProcessRestarted, assertProcessName "B"]] [] ( Just $ not . andP - [assertEventType WorkerRestarted, assertWorkerName "A"] + [assertEventType ProcessRestarted, assertProcessName "A"] ) ] , testGroup - "with all for one capataz restart strategy with newest first order" + "with all for one supervisor restart strategy with newest first order" [ testCase "does terminate all other workers that did not fail" $ testCapatazStreamWithOptions - [] - ( \supOptions -> supOptions - { SUT.capatazRestartStrategy = SUT.AllForOne - , SUT.capatazWorkerTerminationOrder = SUT.OldestFirst - } + ( \supOptions -> + supOptions + & set SUT.supervisorRestartStrategyL SUT.AllForOne + & set SUT.supervisorProcessTerminationOrderL SUT.OldestFirst ) + [] ( \capataz -> do -- This lockVar guarantees that workerB executes before workerA lockVar <- newEmptyMVar @@ -1082,120 +817,120 @@ tests ioA <- mkFailingSubRoutine 1 _workerA <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "A" - , SUT.workerRestartStrategy = SUT.Permanent - } - (forever $ readMVar lockVar >> ioA) + ( SUT.buildWorkerOptions + "A" + (forever $ readMVar lockVar >> ioA) + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz _workerB <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "B" - , SUT.workerRestartStrategy = SUT.Permanent - } - (putMVar lockVar () >> forever (threadDelay 10)) + ( SUT.buildWorkerOptions + "B" + (putMVar lockVar () >> forever (threadDelay 10)) + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz return () ) - [ andP [assertEventType WorkerStarted, assertWorkerName "A"] - , andP [assertEventType WorkerStarted, assertWorkerName "B"] - , andP [assertEventType WorkerFailed, assertWorkerName "A"] - , andP [assertEventType WorkerRestarted, assertWorkerName "A"] - , andP [assertEventType WorkerTerminated, assertWorkerName "B"] - , andP [assertEventType WorkerRestarted, assertWorkerName "B"] + [ andP [assertEventType ProcessStarted, assertProcessName "A"] + , andP [assertEventType ProcessStarted, assertProcessName "B"] + , andP [assertEventType ProcessFailed, assertProcessName "A"] + , andP [assertEventType ProcessRestarted, assertProcessName "A"] + , andP [assertEventType ProcessTerminated, assertProcessName "B"] + , andP [assertEventType ProcessRestarted, assertProcessName "B"] ] [] Nothing - , testCase "does not restart sub-routines that are temporary" + , testCase "does not restart workers that are temporary" $ testCapatazStreamWithOptions - [] - ( \supOptions -> supOptions - { SUT.capatazRestartStrategy = SUT.AllForOne - , SUT.capatazWorkerTerminationOrder = SUT.OldestFirst - } + ( \supOptions -> + supOptions + & set SUT.supervisorRestartStrategyL SUT.AllForOne + & set SUT.supervisorProcessTerminationOrderL SUT.OldestFirst ) + [] ( \capataz -> do lockVar <- newEmptyMVar ioA <- mkFailingSubRoutine 1 _workerA <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "A" - , SUT.workerRestartStrategy = SUT.Permanent - } - (forever $ readMVar lockVar >> ioA) + ( SUT.buildWorkerOptions + "A" + (forever $ readMVar lockVar >> ioA) + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz _workerB <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "B" - , SUT.workerRestartStrategy = SUT.Temporary - } - (putMVar lockVar () >> forever (threadDelay 10)) + ( SUT.buildWorkerOptions + "B" + (putMVar lockVar () >> forever (threadDelay 10)) + (set SUT.workerRestartStrategyL SUT.Temporary) + ) capataz return () ) - [ andP [assertEventType WorkerStarted, assertWorkerName "A"] - , andP [assertEventType WorkerStarted, assertWorkerName "B"] - , andP [assertEventType WorkerFailed, assertWorkerName "A"] - , andP [assertEventType WorkerRestarted, assertWorkerName "A"] - , andP [assertEventType WorkerTerminated, assertWorkerName "B"] + [ andP [assertEventType ProcessStarted, assertProcessName "A"] + , andP [assertEventType ProcessStarted, assertProcessName "B"] + , andP [assertEventType ProcessFailed, assertProcessName "A"] + , andP [assertEventType ProcessRestarted, assertProcessName "A"] + , andP [assertEventType ProcessTerminated, assertProcessName "B"] ] [] ( Just $ not . andP - [assertEventType WorkerRestarted, assertWorkerName "B"] + [assertEventType ProcessRestarted, assertProcessName "B"] ) - , testCase "restarts sub-routines that are not temporary" + , testCase "restarts workers that are not temporary" $ testCapatazStreamWithOptions - [] - ( \supOptions -> supOptions - { SUT.capatazRestartStrategy = SUT.AllForOne - , SUT.capatazWorkerTerminationOrder = SUT.NewestFirst - } + ( \supOptions -> + supOptions + & set SUT.supervisorRestartStrategyL SUT.AllForOne + & set SUT.supervisorProcessTerminationOrderL SUT.NewestFirst ) + [] ( \capataz -> do ioA <- mkFailingSubRoutine 1 lockVar <- newEmptyMVar _workerA <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "A" - , SUT.workerRestartStrategy = SUT.Permanent - } - (forever $ readMVar lockVar >> ioA) + ( SUT.buildWorkerOptions + "A" + (forever $ readMVar lockVar >> ioA) + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz _workerB <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "B" - , SUT.workerRestartStrategy = SUT.Transient - } - (putMVar lockVar () >> forever (threadDelay 10)) + ( SUT.buildWorkerOptions + "B" + (putMVar lockVar () >> forever (threadDelay 10)) + (set SUT.workerRestartStrategyL SUT.Transient) + ) capataz return () ) - [ andP [assertEventType WorkerRestarted, assertWorkerName "B"] - , andP [assertEventType WorkerRestarted, assertWorkerName "A"] + [ andP [assertEventType ProcessRestarted, assertProcessName "B"] + , andP [assertEventType ProcessRestarted, assertProcessName "A"] ] [] Nothing ] , testGroup "with all for one capataz restart strategy with oldest first order" - [ testCase "does not restart sub-routines that are temporary" + [ testCase "does not restart workers that are temporary" $ testCapatazStreamWithOptions - [] - ( \supOptions -> supOptions - { SUT.capatazRestartStrategy = SUT.AllForOne - , SUT.capatazWorkerTerminationOrder = SUT.OldestFirst - } + ( \supOptions -> + supOptions + & set SUT.supervisorRestartStrategyL SUT.AllForOne + & set SUT.supervisorProcessTerminationOrderL SUT.OldestFirst ) + [] ( \capataz -> do ioA <- mkFailingSubRoutine 1 @@ -1203,36 +938,36 @@ tests lockVar <- newEmptyMVar _workerA <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "A" - , SUT.workerRestartStrategy = SUT.Permanent - } - (forever $ readMVar lockVar >> ioA) + ( SUT.buildWorkerOptions + "A" + (forever $ readMVar lockVar >> ioA) + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz _workerB <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "B" - , SUT.workerRestartStrategy = SUT.Temporary - } - (putMVar lockVar () >> forever (threadDelay 10)) + ( SUT.buildWorkerOptions + "B" + (putMVar lockVar () >> forever (threadDelay 10)) + (set SUT.workerRestartStrategyL SUT.Temporary) + ) capataz return () ) - [andP [assertEventType WorkerRestarted, assertWorkerName "A"]] + [andP [assertEventType ProcessRestarted, assertProcessName "A"]] [] ( Just $ not . andP - [assertEventType WorkerRestarted, assertWorkerName "B"] + [assertEventType ProcessRestarted, assertProcessName "B"] ) - , testCase "restarts sub-routines that are not temporary" + , testCase "restarts workers that are not temporary" $ testCapatazStreamWithOptions - [] - ( \supOptions -> supOptions - { SUT.capatazRestartStrategy = SUT.AllForOne - , SUT.capatazWorkerTerminationOrder = SUT.OldestFirst - } + ( \supOptions -> + supOptions + & set SUT.supervisorRestartStrategyL SUT.AllForOne + & set SUT.supervisorProcessTerminationOrderL SUT.OldestFirst ) + [] ( \capataz -> do ioA <- mkFailingSubRoutine 1 @@ -1240,28 +975,29 @@ tests lockVar <- newEmptyMVar _workerA <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "A" - , SUT.workerRestartStrategy = SUT.Permanent - } - (forever $ readMVar lockVar >> ioA) + ( SUT.buildWorkerOptions + "A" + (forever $ readMVar lockVar >> ioA) + (set SUT.workerRestartStrategyL SUT.Permanent) + ) capataz _workerB <- SUT.forkWorker - SUT.defWorkerOptions - { SUT.workerName = "B" - , SUT.workerRestartStrategy = SUT.Transient - } - (putMVar lockVar () >> forever (threadDelay 10)) + ( SUT.buildWorkerOptions + "B" + (putMVar lockVar () >> forever (threadDelay 10)) + (set SUT.workerRestartStrategyL SUT.Transient) + ) capataz return () ) - [ andP [assertEventType WorkerRestarted, assertWorkerName "A"] - , andP [assertEventType WorkerRestarted, assertWorkerName "B"] + [ andP [assertEventType ProcessRestarted, assertProcessName "A"] + , andP [assertEventType ProcessRestarted, assertProcessName "B"] ] [] Nothing ] ] ] + ] diff --git a/test/testsuite/Main.hs b/test/testsuite/Main.hs index 5417cab..40dfb5d 100644 --- a/test/testsuite/Main.hs +++ b/test/testsuite/Main.hs @@ -7,12 +7,15 @@ module Main where import Protolude -import Control.Concurrent.CapatazTest -import Test.Tasty (defaultMainWithIngredients, testGroup) -import Test.Tasty.Ingredients.Rerun (rerunningTests) -import Test.Tasty.Runners (consoleTestReporter, listingTests) +import qualified Control.Concurrent.Capataz.SupervisorTest as Supervisor +import qualified Control.Concurrent.CapatazTest as Capataz +import Test.Tasty + (defaultMainWithIngredients, testGroup) +import Test.Tasty.Ingredients.Rerun (rerunningTests) +import Test.Tasty.Runners + (consoleTestReporter, listingTests) main :: IO () main = defaultMainWithIngredients [rerunningTests [listingTests, consoleTestReporter]] - (testGroup "capataz" tests) + (testGroup "capataz" [Capataz.tests, Supervisor.tests]) diff --git a/test/testsuite/Test/Util.hs b/test/testsuite/Test/Util.hs new file mode 100644 index 0000000..c34575f --- /dev/null +++ b/test/testsuite/Test/Util.hs @@ -0,0 +1,417 @@ +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +module Test.Util where + +import Protolude + +import Control.Concurrent.STM.TQueue (newTQueueIO, readTQueue, writeTQueue) +import Data.IORef (atomicModifyIORef', newIORef, readIORef, writeIORef) +import Test.Tasty.HUnit (assertBool, assertFailure) +import Text.Show.Pretty (ppShow) + +import qualified Data.Text as T + +import qualified Control.Concurrent.Capataz as SUT +import qualified Control.Concurrent.Capataz.Event as SUT + +-------------------------------------------------------------------------------- +-- Util + +-- | Utility function that gets the type name of a Record through it's Show +-- output. +fetchRecordName :: Show a => a -> Text +fetchRecordName = T.takeWhile (/= ' ') . show + +-- | Composes two predicate functions together with a boolean AND +andP :: [a -> Bool] -> a -> Bool +andP predList a = all ($ a) predList + +-------------------------------------------------------------------------------- +-- Assertions and Testers + +-- | This record duplicate the same event names as the ones found in the +-- "CapatazEvent" type, we use this to avoid using Text comparisons on assertion +-- helper functions. The "CapatazEvent" record is imported qualified, so there +-- is no conflict happening. +data EventType + = InvalidCapatazStatusReached + | SupervisorStatusChanged + | ProcessTerminated + | ProcessStarted + | ProcessRestarted + | ProcessCompleted + | ProcessFailed + | ProcessCallbackExecuted + | ProcessTerminationStarted + | ProcessTerminationFinished + | CapatazFailed + | CapatazTerminated + deriving (Show, Eq) + +toEventType :: SUT.CapatazEvent -> EventType +toEventType ev = case ev of + SUT.InvalidSupervisorStatusReached{} -> InvalidCapatazStatusReached + SUT.SupervisorStatusChanged{} -> SupervisorStatusChanged + SUT.ProcessTerminated{} -> ProcessTerminated + SUT.ProcessStarted{} -> ProcessStarted + SUT.ProcessRestarted{} -> ProcessRestarted + SUT.ProcessCompleted{} -> ProcessCompleted + SUT.ProcessFailed{} -> ProcessFailed + SUT.ProcessCallbackExecuted{} -> ProcessCallbackExecuted + SUT.ProcessTerminationStarted{} -> ProcessTerminationStarted + SUT.ProcessTerminationFinished{} -> ProcessTerminationFinished + SUT.CapatazFailed{} -> CapatazFailed + SUT.CapatazTerminated{} -> CapatazTerminated + +-- | Predicate function to assert "CapatazEvent" types +assertEventType :: EventType -> SUT.CapatazEvent -> Bool +assertEventType evType ev = toEventType ev == evType + +-- | Predicate function to assert "CapatazEvent" process type +assertProcessType :: SUT.ProcessType -> SUT.CapatazEvent -> Bool +assertProcessType processTy ev = case ev of + SUT.ProcessFailed { processType } -> processTy == processType + SUT.ProcessTerminated { processType } -> processTy == processType + SUT.ProcessStarted { processType } -> processTy == processType + SUT.ProcessCallbackExecuted { processType } -> processTy == processType + SUT.ProcessRestarted { processType } -> processTy == processType + _ -> False + +-- | Predicate function to assert "CapatazEvent" worker name +assertProcessName :: Text -> SUT.CapatazEvent -> Bool +assertProcessName processName' ev = case ev of + SUT.ProcessRestarted { processName } -> processName' == processName + SUT.ProcessFailed { processName } -> processName' == processName + SUT.ProcessTerminated { processName } -> processName' == processName + SUT.ProcessStarted { processName } -> processName' == processName + _ -> False + +-- | Predicate function to assert type of an error inside a "CapatazEvent" +assertErrorType :: Text -> SUT.CapatazEvent -> Bool +assertErrorType errType ev = case ev of + SUT.CapatazFailed { supervisorError } -> + fetchRecordName supervisorError == errType + SUT.ProcessFailed { processError } -> fetchRecordName processError == errType + SUT.ProcessCallbackExecuted { processCallbackError } -> + case processCallbackError of + Nothing -> False + Just originalError -> fetchRecordName originalError == errType + _ -> False + +-- | Predicate function to assert type of callback executed inside a +-- "CapatazEvent" +assertCallbackType :: SUT.CallbackType -> SUT.CapatazEvent -> Bool +assertCallbackType cbType ev = case ev of + SUT.ProcessFailed { processError } -> case fromException processError of + Just SUT.ProcessCallbackFailed { processCallbackType } -> + cbType == processCallbackType + _ -> False + SUT.ProcessCallbackExecuted { processCallbackType } -> + cbType == processCallbackType + _ -> False + +-- | Predicate function to assert restart count inside a "CapatazEvent" +assertRestartCount :: (Int -> Bool) -> SUT.CapatazEvent -> Bool +assertRestartCount predFn ev = case ev of + SUT.ProcessRestarted { processRestartCount } -> predFn processRestartCount + _ -> False + +-- | Predicate function to assert a Capataz status change +assertSupervisorStatusChanged + :: SUT.SupervisorStatus -> SUT.SupervisorStatus -> SUT.CapatazEvent -> Bool +assertSupervisorStatusChanged fromEv toEv ev = case ev of + SUT.SupervisorStatusChanged { prevSupervisorStatus, newSupervisorStatus } -> + fromEv == prevSupervisorStatus && toEv == newSupervisorStatus + _ -> False + +-- | Predicate function to assert process was supervised by a a supervisor +-- indicated by name +assertSupervisorName :: Text -> SUT.CapatazEvent -> Bool +assertSupervisorName supervisorName' ev = case ev of + SUT.SupervisorStatusChanged { supervisorName } -> + supervisorName' == supervisorName + SUT.ProcessStarted { supervisorName } -> supervisorName' == supervisorName + SUT.ProcessTerminated { supervisorName } -> supervisorName' == supervisorName + SUT.ProcessRestarted { supervisorName } -> supervisorName' == supervisorName + SUT.ProcessCompleted { supervisorName } -> supervisorName' == supervisorName + SUT.ProcessCallbackExecuted { supervisorName } -> + supervisorName' == supervisorName + SUT.ProcessTerminationStarted { supervisorName } -> + supervisorName' == supervisorName + SUT.ProcessTerminationFinished { supervisorName } -> + supervisorName' == supervisorName + _ -> False + +-- | Predicate function to assert a supervisor was started +assertSupervisorStarted :: Text -> SUT.CapatazEvent -> Bool +assertSupervisorStarted supervisorName = andP + [ assertEventType ProcessStarted + , assertProcessType SUT.SupervisorType + , assertProcessName supervisorName + ] + +-- | Predicate function to assert a supervisor was terminated +assertSupervisorTerminated :: Text -> SUT.CapatazEvent -> Bool +assertSupervisorTerminated supervisorName = andP + [ assertEventType ProcessTerminated + , assertProcessType SUT.SupervisorType + , assertProcessName supervisorName + ] + +assertSupervisorFailed :: Text -> SUT.CapatazEvent -> Bool +assertSupervisorFailed supervisorName = andP + [ assertEventType ProcessFailed + , assertProcessType SUT.SupervisorType + , assertProcessName supervisorName + ] + +assertSupervisorRestarted :: Text -> SUT.CapatazEvent -> Bool +assertSupervisorRestarted supervisorName = andP + [ assertEventType ProcessRestarted + , assertProcessType SUT.SupervisorType + , assertProcessName supervisorName + ] + +-- | Predicate function to assert a worker was started +assertWorkerStarted :: Text -> SUT.CapatazEvent -> Bool +assertWorkerStarted workerName = andP + [ assertEventType ProcessStarted + , assertProcessType SUT.WorkerType + , assertProcessName workerName + ] + +-- | Predicate function to assert a worker was terminated +assertWorkerTerminated :: Text -> SUT.CapatazEvent -> Bool +assertWorkerTerminated workerName = andP + [ assertEventType ProcessTerminated + , assertProcessType SUT.WorkerType + , assertProcessName workerName + ] + +assertWorkerFailed :: Text -> SUT.CapatazEvent -> Bool +assertWorkerFailed workerName = andP + [ assertEventType ProcessFailed + , assertProcessType SUT.WorkerType + , assertProcessName workerName + ] + +assertWorkerRestarted :: Text -> SUT.CapatazEvent -> Bool +assertWorkerRestarted workerName = andP + [ assertEventType ProcessRestarted + , assertProcessType SUT.WorkerType + , assertProcessName workerName + ] + +-- | Predicate function to assert a capataz thread failed with error type +assertCapatazFailedWith :: Text -> SUT.CapatazEvent -> Bool +assertCapatazFailedWith errorName = + andP [assertEventType CapatazFailed, assertErrorType errorName] + +rootSupervisorName :: Text +rootSupervisorName = "capataz-root-supervisor" + +assertRootSupervisor :: SUT.CapatazEvent -> Bool +assertRootSupervisor = assertSupervisorName rootSupervisorName + + +-------------------------------------------------------------------------------- + +-- | Exception used to test failures inside Worker sub-routines +data RestartingWorkerError + = RestartingWorkerError + deriving (Show) + +instance Exception RestartingWorkerError + +-- | Exception used to test failures inside Worker callback sub-routines +data TimeoutError + = TimeoutError + deriving (Show) + +instance Exception TimeoutError + +-- | Utility function to create a Worker sub-routine that fails at least a +-- number of times +mkFailingSubRoutine + :: Int -- ^ Number of times the Worker sub-routine will fail + -> IO (IO ()) -- ^ Sub-routine used on worker creation +mkFailingSubRoutine failCount = do + countRef <- newIORef failCount + let subRoutine = do + shouldFail <- atomicModifyIORef' countRef + (\count -> (pred count, count > 0)) + when shouldFail (throwIO RestartingWorkerError) + + return subRoutine + +-- | A sub-routine that will complete for `initCount` amount of times. This +-- function works great when testing `Permanent` strategies, as you would like +-- to assert restart events once (if it keeps completing it will fill up the log +-- with restart events) +mkCompletingBeforeNRestartsSubRoutine :: Int -> IO (IO ()) +mkCompletingBeforeNRestartsSubRoutine initCount = do + countRef <- newIORef initCount + let subRoutine = do + shouldStop <- atomicModifyIORef' countRef + (\count -> (pred count, count > 0)) + if shouldStop then return () else forever $ threadDelay 1000100 + return subRoutine + +-- | A sub-routine that will complete once. This function works great when +-- testing `Permanent` strategies, as you would like to assert restart events +-- once (if it keeps completing it will fill up the log with restart events) +mkCompletingOnceSubRoutine :: IO (IO ()) +mkCompletingOnceSubRoutine = mkCompletingBeforeNRestartsSubRoutine 1 + +-- | Utility function to build a test environment for a Capataz execution. +-- It is composed by: +-- +-- * List of assertions that represent events that should be triggered by the +-- capataz instance in order +-- +-- * A function to modify the default "CapatazOptions", this utility function injects +-- a special @notifyEvent@ callback to execute given assertions. +testCapatazStreamWithOptions + :: (SUT.CapatazOptions -> SUT.CapatazOptions) -- ^ Function to modify default + -- @CapatazOptions@ + -> [SUT.CapatazEvent -> Bool] -- ^ Assertions happening before setup function + -- is called + -> (SUT.Capataz -> IO ()) -- ^ Function used to test public the supervisor + -- public API (a.k.a setup function) + -> [SUT.CapatazEvent -> Bool] -- ^ Assertions happening after the setup + -- function + -> [SUT.CapatazEvent -> Bool] -- ^ Assertions happening after the capataz + -- record is terminated + -> Maybe (SUT.CapatazEvent -> Bool) -- ^ An assertion checked across all + -- @CapatazEvents@ that happened in a + -- test, great when testing that an event + -- __did not__ happen + -> IO () +testCapatazStreamWithOptions optionModFn preSetupAssertion setupFn postSetupAssertions postTeardownAssertions mAllEventsAssertion + = do + + eventStream <- newTQueueIO + accRef <- newIORef [] + pendingCountVar <- newIORef + ( sum $ fmap + length + [preSetupAssertion, postSetupAssertions, postTeardownAssertions] + ) + + capataz <- SUT.forkCapataz + rootSupervisorName + (SUT.set SUT.onSystemEventL (trackEvent accRef eventStream) . optionModFn) + + -- We check preSetup assertions are met before we execute the setup + -- function. This serves to test initialization of capataz instance + runAssertions "PRE-SETUP" + (eventStream, accRef) + pendingCountVar + preSetupAssertion + capataz + + -- We execute the setup sub-routine, which is going to use the Capataz public + -- API to assert events + setupResult <- try (setupFn capataz) + + case setupResult of + -- If the sub-routine fails, show exception + Left err -> assertFailure (show (err :: SomeException)) + Right _ -> do + -- We now run post-setup assertions + runAssertions "POST-SETUP" + (eventStream, accRef) + pendingCountVar + postSetupAssertions + capataz + + -- We now shutdown the capataz instance + void $ SUT.teardown capataz + + -- We run assertions for after the capataz has been shut down + runAssertions "POST-TEARDOWN" + (eventStream, accRef) + pendingCountVar + postTeardownAssertions + capataz + + -- Lastly, we check if there is a function that we want to execute + -- across all events that happened in the test, this is to assert the + -- absence of an event + case mAllEventsAssertion of + Nothing -> return () + Just allEventsAssertion -> do + events <- reverse <$> readIORef accRef + assertBool + ( "On AFTER-TEST, expected all events to match predicate, but didn't (" + <> show (length events) + <> " events tried)\n" + <> ppShow (zip ([0 ..] :: [Int]) events) + ) + (all allEventsAssertion events) + where + -- Utility functions that runs the readEventLoop function with a timeout + -- of a second, this way we can guarantee assertions are met without having + -- to add @threadDelays@ to the test execution + runAssertions stageName (eventStream, accRef) pendingCountVar assertions capataz + = do + raceResult <- race + (threadDelay 1000100) + (readEventLoop eventStream pendingCountVar assertions) + case raceResult of + Left _ -> do + events <- reverse <$> readIORef accRef + pendingCount <- readIORef pendingCountVar + void $ SUT.teardown capataz + assertFailure + ( "On " + <> stageName + <> " stage, expected all assertions to match, but didn't (" + <> show pendingCount + <> " assertions remaining, " + <> show (length events) + <> " events tried)\n" + <> ppShow (zip ([0 ..] :: [Int]) events) + ) + Right _ -> return () + + + -- Sub-routine that accumulates all events that have happened in the Capataz + -- instance so far + trackEvent accRef eventStream event = do + atomicModifyIORef' accRef (\old -> (event : old, ())) + atomically $ writeTQueue eventStream event + + -- Sub-routine that reads the event stream, and ensures that all assertions + -- are executed, this loop won't stop until all assertions are met + readEventLoop eventStream pendingCount assertions = do + writeIORef pendingCount (length assertions) + case assertions of + [] -> return () + (assertionFn:assertions1) -> do + event <- atomically $ readTQueue eventStream + if assertionFn event + then readEventLoop eventStream pendingCount assertions1 + else readEventLoop eventStream pendingCount assertions + + +-- | A version of "testCapatazStreamWithOptions" that does not receive the +-- function that modifies a "CapatazOptions" record. +testCapatazStream + :: [SUT.CapatazEvent -> Bool] -- ^ Assertions happening before setup function + -- is called + -> (SUT.Capataz -> IO ()) -- ^ Function used to test public the supervisor + -- public API (a.k.a setup function) + -> [SUT.CapatazEvent -> Bool] -- ^ Assertions happening after the setup + -- function + -> [SUT.CapatazEvent -> Bool] -- ^ Assertions happening after the capataz + -- record is terminated + -> Maybe (SUT.CapatazEvent -> Bool) -- ^ An assertion checked across all + -- @CapatazEvents@ that happened in a + -- test, great when testing that an event + -- __did not__ happen + -> IO () +testCapatazStream preSetupAssertions = + testCapatazStreamWithOptions identity preSetupAssertions