From 86ca3e483624005c5588dd73e264379b134c8f98 Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Fri, 7 Jul 2023 15:07:42 +0900 Subject: [PATCH] refactor: first-pass at runtime interface, signals, shutdown, and more (#180) This work precedes subsequent work on running workflows from the Homestar runner/server/runtime but sets everything into gear. Includes: * overall refactoring of events and event handling, streamlining how to work with senders, etc * introduce a runner which can - interact with task/batch specific-abort handles introduced in joinsets within a worker - interact with commands, which will happen through the CLI interface, i.e. star, run-workflow, akin to something like temporal-server - start-up event handler, websocket server, takes in db and can mock via implementation * lots of cleanup(s) --- .envrc | 2 +- Cargo.lock | 453 ++++++++++++---- Cargo.toml | 4 +- flake.lock | 18 +- flake.nix | 3 +- homestar-core/Cargo.toml | 5 +- homestar-core/src/test_utils/cid.rs | 23 + homestar-core/src/test_utils/mod.rs | 2 + homestar-core/src/workflow/input.rs | 13 +- .../src/workflow/receipt/metadata.rs | 4 +- homestar-runtime/Cargo.toml | 11 +- homestar-runtime/fixtures/settings.toml | 8 + homestar-runtime/src/cli.rs | 30 +- homestar-runtime/src/db.rs | 2 +- homestar-runtime/src/event_handler.rs | 106 ++++ homestar-runtime/src/event_handler/channel.rs | 31 ++ homestar-runtime/src/event_handler/event.rs | 232 ++++++++ .../src/event_handler/swarm_event.rs | 282 ++++++++++ homestar-runtime/src/lib.rs | 13 +- homestar-runtime/src/logger.rs | 2 - homestar-runtime/src/main.rs | 104 ++-- homestar-runtime/src/network/eventloop.rs | 499 ------------------ homestar-runtime/src/network/ipfs.rs | 13 +- homestar-runtime/src/network/mod.rs | 11 +- homestar-runtime/src/network/pubsub.rs | 19 +- homestar-runtime/src/network/swarm.rs | 50 +- homestar-runtime/src/network/ws.rs | 127 +++-- homestar-runtime/src/runner.rs | 336 ++++++++++++ homestar-runtime/src/runtime.rs | 18 - homestar-runtime/src/scheduler.rs | 84 +-- homestar-runtime/src/settings.rs | 255 +++++---- homestar-runtime/src/tasks/wasm.rs | 3 + homestar-runtime/src/test_utils/event.rs | 6 + homestar-runtime/src/test_utils/mod.rs | 2 + homestar-runtime/src/worker.rs | 482 ++++++++++------- homestar-runtime/src/workflow/info.rs | 29 +- homestar-wasm/Cargo.toml | 6 +- homestar-wasm/src/wasmtime/ipld.rs | 8 +- 38 files changed, 2130 insertions(+), 1166 deletions(-) create mode 100644 homestar-core/src/test_utils/cid.rs create mode 100644 homestar-runtime/fixtures/settings.toml create mode 100644 homestar-runtime/src/event_handler.rs create mode 100644 homestar-runtime/src/event_handler/channel.rs create mode 100644 homestar-runtime/src/event_handler/event.rs create mode 100644 homestar-runtime/src/event_handler/swarm_event.rs delete mode 100644 homestar-runtime/src/network/eventloop.rs create mode 100644 homestar-runtime/src/runner.rs delete mode 100644 homestar-runtime/src/runtime.rs create mode 100644 homestar-runtime/src/test_utils/event.rs diff --git a/.envrc b/.envrc index c36e651a..5a4b3d00 100644 --- a/.envrc +++ b/.envrc @@ -1,5 +1,5 @@ use_flake -export RUST_LOG=homestar_runtime=debug,atuin_client=warn,libp2p=info,libp2p_gossipsub::behaviour=debug +export RUST_LOG=homestar_runtime=debug,libp2p=info,libp2p_gossipsub::behaviour=debug export RUST_BACKTRACE=full export RUSTFLAGS="--cfg tokio_unstable" diff --git a/Cargo.lock b/Cargo.lock index b058caf9..b517bbb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -381,18 +381,18 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] name = "async-trait" -version = "0.1.68" +version = "0.1.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" +checksum = "a564d521dd56509c4c47480d00b80ee55f7e385ae48db5744c67ad50c92d2ebf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -713,6 +713,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "771fe0050b883fcc3ea2359b1a96bcfbc090b7116eae7c3c512c7a083fdf23d3" +[[package]] +name = "bs58" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5353f36341f7451062466f0b755b96ac3a9547e4d7f6b70d603fc721a7d7896" +dependencies = [ + "tinyvec", +] + [[package]] name = "bumpalo" version = "3.12.2" @@ -971,9 +980,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.3.8" +version = "4.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9394150f5b4273a1763355bd1c2ec54cc5a2593f790587bcd6b2c947cfa9211" +checksum = "1640e5cc7fb47dbb8338fd471b105e7ed6c3cb2aeb00c2e067127ffd3764a05d" dependencies = [ "clap_builder", "clap_derive", @@ -982,13 +991,12 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.3.8" +version = "4.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a78fbdd3cc2914ddf37ba444114bc7765bbdcb55ec9cbe6fa054f0137400717" +checksum = "98c59138d527eeaf9b53f35a77fcc1fad9d883116070c63d5de1c7dc7b00c72b" dependencies = [ "anstream", "anstyle", - "bitflags 1.3.2", "clap_lex", "strsim", ] @@ -1002,7 +1010,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -1087,9 +1095,9 @@ dependencies = [ [[package]] name = "console-subscriber" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57ab2224a0311582eb03adba4caaf18644f7b1f10a760803a803b9b605187fc7" +checksum = "d4cf42660ac07fcebed809cfe561dd8730bcd35b075215e6479c516bcd0d11cb" dependencies = [ "console-api", "crossbeam-channel", @@ -1171,7 +1179,7 @@ version = "0.95.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1277fbfa94bc82c8ec4af2ded3e639d49ca5f7f3c7eeab2c66accd135ece4e70" dependencies = [ - "cranelift-entity", + "cranelift-entity 0.95.1", ] [[package]] @@ -1184,7 +1192,7 @@ dependencies = [ "cranelift-bforest", "cranelift-codegen-meta", "cranelift-codegen-shared", - "cranelift-entity", + "cranelift-entity 0.95.1", "cranelift-isle", "gimli", "hashbrown 0.13.2", @@ -1218,6 +1226,15 @@ dependencies = [ "serde", ] +[[package]] +name = "cranelift-entity" +version = "0.97.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6565198b5684367371e2b946ceca721eb36965e75e3592fad12fc2e15f65d7b" +dependencies = [ + "serde", +] + [[package]] name = "cranelift-frontend" version = "0.95.1" @@ -1254,13 +1271,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff3220489a3d928ad91e59dd7aeaa8b3de18afb554a6211213673a71c90737ac" dependencies = [ "cranelift-codegen", - "cranelift-entity", + "cranelift-entity 0.95.1", "cranelift-frontend", - "itertools", + "itertools 0.10.5", "log", "smallvec", "wasmparser 0.102.0", - "wasmtime-types", + "wasmtime-types 8.0.1", ] [[package]] @@ -1299,7 +1316,7 @@ dependencies = [ "clap", "criterion-plot", "is-terminal", - "itertools", + "itertools 0.10.5", "num-traits", "once_cell", "oorandom", @@ -1320,7 +1337,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -1541,7 +1558,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -1563,7 +1580,20 @@ checksum = "29a358ff9f12ec09c3e61fef9b5a9902623a695a46a917b07f269bff1445611a" dependencies = [ "darling_core 0.20.1", "quote", - "syn 2.0.16", + "syn 2.0.23", +] + +[[package]] +name = "dashmap" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" +dependencies = [ + "cfg-if", + "hashbrown 0.12.3", + "lock_api", + "once_cell", + "parking_lot_core 0.9.7", ] [[package]] @@ -1592,6 +1622,15 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + [[package]] name = "der" version = "0.6.1" @@ -1694,7 +1733,7 @@ dependencies = [ "diesel_table_macro_syntax", "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -1714,7 +1753,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc5557efc453706fed5e4fa85006fe9817c224c3f480a34c7e5959fd700921c5" dependencies = [ - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -1786,7 +1825,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -1912,7 +1951,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -1972,6 +2011,17 @@ version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" +[[package]] +name = "evmap" +version = "10.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e3ea06a83f97d3dc2eb06e51e7a729b418f0717a5558a5c870e3d5156dc558d" +dependencies = [ + "hashbag", + "slab", + "smallvec", +] + [[package]] name = "exr" version = "1.6.3" @@ -2181,7 +2231,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -2240,6 +2290,19 @@ dependencies = [ "byteorder", ] +[[package]] +name = "fxprof-processed-profile" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d12c0aed7f1e24276a241aadc4cb8ea9f83000f34bc062b7cc2d51e3b0fabd" +dependencies = [ + "bitflags 2.3.1", + "debugid", + "fxhash", + "serde", + "serde_json", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -2361,6 +2424,12 @@ dependencies = [ "crunchy", ] +[[package]] +name = "hashbag" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d44c238cb72d3e8993a30c32e97b2b2c2c1a12388603f28c4f19a44c4396bc" + [[package]] name = "hashbrown" version = "0.12.3" @@ -2524,6 +2593,7 @@ dependencies = [ "libipld", "libsqlite3-sys", "proptest", + "rand 0.8.5", "serde", "signature 2.1.0", "thiserror", @@ -2558,10 +2628,13 @@ dependencies = [ "criterion", "crossbeam", "dagga", + "dashmap", "diesel", "diesel_migrations", "dotenvy", "enum-assoc", + "evmap", + "fnv", "futures", "headers", "homestar-core", @@ -2571,11 +2644,11 @@ dependencies = [ "indexmap 2.0.0", "ipfs-api", "ipfs-api-backend-hyper", - "itertools", "json", "libipld", "libp2p", "libsqlite3-sys", + "nix 0.26.2", "openssl", "proptest", "rand 0.8.5", @@ -2608,7 +2681,7 @@ dependencies = [ "heck", "homestar-core", "image", - "itertools", + "itertools 0.11.0", "libipld", "rust_decimal", "serde_ipld_dagcbor", @@ -2618,8 +2691,8 @@ dependencies = [ "tracing", "wasi-common", "wasmparser 0.104.0", - "wasmtime", - "wasmtime-component-util", + "wasmtime 8.0.1", + "wasmtime-component-util 10.0.1", "wat", "wit-component", ] @@ -3024,6 +3097,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.6" @@ -3123,9 +3205,9 @@ checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" [[package]] name = "libc" -version = "0.2.144" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libipld" @@ -3390,7 +3472,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e2d584751cecb2aabaa56106be6be91338a60a0f4e420cf2af639204f596fc1" dependencies = [ "asn1_der", - "bs58", + "bs58 0.4.0", "ed25519-dalek", "libsecp256k1", "log", @@ -3878,6 +3960,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + [[package]] name = "memoffset" version = "0.8.0" @@ -4169,6 +4260,20 @@ dependencies = [ "memoffset 0.6.5", ] +[[package]] +name = "nix" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", + "memoffset 0.7.1", + "pin-utils", + "static_assertions", +] + [[package]] name = "nohash-hasher" version = "0.2.0" @@ -4318,7 +4423,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -4526,7 +4631,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -4557,7 +4662,7 @@ checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -4820,7 +4925,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" dependencies = [ "anyhow", - "itertools", + "itertools 0.10.5", "proc-macro2", "quote", "syn 1.0.109", @@ -4943,9 +5048,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.27" +version = "1.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" +checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" dependencies = [ "proc-macro2", ] @@ -5309,7 +5414,7 @@ dependencies = [ "log", "netlink-packet-route", "netlink-proto", - "nix", + "nix 0.24.3", "thiserror", "tokio", ] @@ -5638,7 +5743,7 @@ checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -5719,7 +5824,7 @@ dependencies = [ "darling 0.20.1", "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -5956,6 +6061,12 @@ dependencies = [ "der 0.7.7", ] +[[package]] +name = "sptr" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -5995,15 +6106,15 @@ checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" [[package]] name = "strum_macros" -version = "0.24.3" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232" dependencies = [ "heck", "proc-macro2", "quote", "rustversion", - "syn 1.0.109", + "syn 2.0.23", ] [[package]] @@ -6053,9 +6164,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.16" +version = "2.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" +checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" dependencies = [ "proc-macro2", "quote", @@ -6165,7 +6276,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -6243,11 +6354,12 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.28.2" +version = "1.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2" +checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da" dependencies = [ "autocfg", + "backtrace", "bytes", "libc", "mio", @@ -6279,7 +6391,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -6477,7 +6589,7 @@ checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -6671,15 +6783,15 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "ucan" -version = "0.3.2" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2d1efb030f1b24acd7f6a84ef7346d4b6f390000c5304f1d4db11428b9ac5bb" +checksum = "b3722c8cba706d28123758300ca0738852b5132b37a7c656f59b9484ac8f2435" dependencies = [ "anyhow", "async-recursion", "async-trait", "base64 0.21.0", - "bs58", + "bs58 0.5.0", "cid 0.10.1", "futures", "getrandom 0.2.9", @@ -6836,9 +6948,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.3.4" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa2982af2eec27de306107c027578ff7f423d65f7250e40ce0fea8f45248b81" +checksum = "d023da39d1fde5a8a3fe1f3e01ca9632ada0a63e9797de55a879d6e2236277be" dependencies = [ "getrandom 0.2.9", "rand 0.8.5", @@ -6926,9 +7038,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasi-common" -version = "8.0.1" +version = "10.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "008136464e438c5049a614b6ea1bae9f6c4d354ce9ee2b4d9a1ac6e73f31aafc" +checksum = "3b422ae2403cae9ca603864272a402cf5001dd6fef8632e090e00c4fb475741b" dependencies = [ "anyhow", "bitflags 1.3.2", @@ -6936,12 +7048,12 @@ dependencies = [ "cap-std", "io-extras", "log", - "rustix 0.36.13", + "rustix 0.37.19", "thiserror", "tracing", - "wasmtime", + "wasmtime 10.0.1", "wiggle", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -6965,7 +7077,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", "wasm-bindgen-shared", ] @@ -6999,7 +7111,7 @@ checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -7095,6 +7207,16 @@ dependencies = [ "url", ] +[[package]] +name = "wasmparser" +version = "0.107.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29e3ac9b780c7dda0cac7a52a5d6d2d6707cc6e3451c9db209b6c758f40d7acb" +dependencies = [ + "indexmap 1.9.3", + "semver", +] + [[package]] name = "wasmprinter" version = "0.2.57" @@ -7129,16 +7251,44 @@ dependencies = [ "wasmparser 0.102.0", "wasmtime-cache", "wasmtime-component-macro", - "wasmtime-component-util", + "wasmtime-component-util 8.0.1", "wasmtime-cranelift", - "wasmtime-environ", + "wasmtime-environ 8.0.1", "wasmtime-fiber", - "wasmtime-jit", - "wasmtime-runtime", + "wasmtime-jit 8.0.1", + "wasmtime-runtime 8.0.1", "wat", "windows-sys 0.45.0", ] +[[package]] +name = "wasmtime" +version = "10.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd02b992d828b91efaf2a7499b21205fe4ab3002e401e3fe0f227aaeb4001d93" +dependencies = [ + "anyhow", + "bincode", + "bumpalo", + "cfg-if", + "fxprof-processed-profile", + "indexmap 1.9.3", + "libc", + "log", + "object", + "once_cell", + "paste", + "psm", + "serde", + "serde_json", + "target-lexicon", + "wasmparser 0.107.0", + "wasmtime-environ 10.0.1", + "wasmtime-jit 10.0.1", + "wasmtime-runtime 10.0.1", + "windows-sys 0.48.0", +] + [[package]] name = "wasmtime-asm-macros" version = "8.0.1" @@ -7148,6 +7298,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "wasmtime-asm-macros" +version = "10.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284466ef356ce2d909bc0ad470b60c4d0df5df2de9084457e118131b3c779b92" +dependencies = [ + "cfg-if", +] + [[package]] name = "wasmtime-cache" version = "8.0.1" @@ -7178,7 +7337,7 @@ dependencies = [ "proc-macro2", "quote", "syn 1.0.109", - "wasmtime-component-util", + "wasmtime-component-util 8.0.1", "wasmtime-wit-bindgen", "wit-parser 0.6.4", ] @@ -7189,6 +7348,12 @@ version = "8.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74e02ca7a4a3c69d72b88f26f0192e333958df6892415ac9ab84dcc42c9000c2" +[[package]] +name = "wasmtime-component-util" +version = "10.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f20a5135ec5ef01080e674979b02d6fa5eebaa2b0c2d6660513ee9956a1bf624" + [[package]] name = "wasmtime-cranelift" version = "8.0.1" @@ -7197,7 +7362,7 @@ checksum = "b1cefde0cce8cb700b1b21b6298a3837dba46521affd7b8c38a9ee2c869eee04" dependencies = [ "anyhow", "cranelift-codegen", - "cranelift-entity", + "cranelift-entity 0.95.1", "cranelift-frontend", "cranelift-native", "cranelift-wasm", @@ -7208,7 +7373,7 @@ dependencies = [ "thiserror", "wasmparser 0.102.0", "wasmtime-cranelift-shared", - "wasmtime-environ", + "wasmtime-environ 8.0.1", ] [[package]] @@ -7223,7 +7388,7 @@ dependencies = [ "gimli", "object", "target-lexicon", - "wasmtime-environ", + "wasmtime-environ 8.0.1", ] [[package]] @@ -7233,7 +7398,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a990198cee4197423045235bf89d3359e69bd2ea031005f4c2d901125955c949" dependencies = [ "anyhow", - "cranelift-entity", + "cranelift-entity 0.95.1", "gimli", "indexmap 1.9.3", "log", @@ -7244,8 +7409,27 @@ dependencies = [ "wasm-encoder 0.25.0", "wasmparser 0.102.0", "wasmprinter", - "wasmtime-component-util", - "wasmtime-types", + "wasmtime-component-util 8.0.1", + "wasmtime-types 8.0.1", +] + +[[package]] +name = "wasmtime-environ" +version = "10.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f9e58e0ee7d43ff13e75375c726b16bce022db798d3a099a65eeaa7d7a544b" +dependencies = [ + "anyhow", + "cranelift-entity 0.97.1", + "gimli", + "indexmap 1.9.3", + "log", + "object", + "serde", + "target-lexicon", + "thiserror", + "wasmparser 0.107.0", + "wasmtime-types 10.0.1", ] [[package]] @@ -7257,7 +7441,7 @@ dependencies = [ "cc", "cfg-if", "rustix 0.36.13", - "wasmtime-asm-macros", + "wasmtime-asm-macros 8.0.1", "windows-sys 0.45.0", ] @@ -7279,13 +7463,37 @@ dependencies = [ "rustc-demangle", "serde", "target-lexicon", - "wasmtime-environ", - "wasmtime-jit-debug", - "wasmtime-jit-icache-coherence", - "wasmtime-runtime", + "wasmtime-environ 8.0.1", + "wasmtime-jit-debug 8.0.1", + "wasmtime-jit-icache-coherence 8.0.1", + "wasmtime-runtime 8.0.1", "windows-sys 0.45.0", ] +[[package]] +name = "wasmtime-jit" +version = "10.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0f2eaeb01bb67266416507829bd8e0bb60278444e4cbd048e280833ebeaa02" +dependencies = [ + "addr2line", + "anyhow", + "bincode", + "cfg-if", + "cpp_demangle", + "gimli", + "log", + "object", + "rustc-demangle", + "rustix 0.37.19", + "serde", + "target-lexicon", + "wasmtime-environ 10.0.1", + "wasmtime-jit-icache-coherence 10.0.1", + "wasmtime-runtime 10.0.1", + "windows-sys 0.48.0", +] + [[package]] name = "wasmtime-jit-debug" version = "8.0.1" @@ -7297,6 +7505,15 @@ dependencies = [ "rustix 0.36.13", ] +[[package]] +name = "wasmtime-jit-debug" +version = "10.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f42e59d62542bfb73ce30672db7eaf4084a60b434b688ac4f05b287d497de082" +dependencies = [ + "once_cell", +] + [[package]] name = "wasmtime-jit-icache-coherence" version = "8.0.1" @@ -7308,6 +7525,17 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "wasmtime-jit-icache-coherence" +version = "10.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b49ceb7e2105a8ebe5614d7bbab6f6ef137a284e371633af60b34925493081f" +dependencies = [ + "cfg-if", + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "wasmtime-runtime" version = "8.0.1" @@ -7327,25 +7555,62 @@ dependencies = [ "paste", "rand 0.8.5", "rustix 0.36.13", - "wasmtime-asm-macros", - "wasmtime-environ", + "wasmtime-asm-macros 8.0.1", + "wasmtime-environ 8.0.1", "wasmtime-fiber", - "wasmtime-jit-debug", + "wasmtime-jit-debug 8.0.1", "windows-sys 0.45.0", ] +[[package]] +name = "wasmtime-runtime" +version = "10.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a5de4762421b0b2b19e02111ca403632852b53e506e03b4b227ffb0fbfa63c2" +dependencies = [ + "anyhow", + "cc", + "cfg-if", + "indexmap 1.9.3", + "libc", + "log", + "mach", + "memfd", + "memoffset 0.8.0", + "paste", + "rand 0.8.5", + "rustix 0.37.19", + "sptr", + "wasmtime-asm-macros 10.0.1", + "wasmtime-environ 10.0.1", + "wasmtime-jit-debug 10.0.1", + "windows-sys 0.48.0", +] + [[package]] name = "wasmtime-types" version = "8.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4f6fffd2a1011887d57f07654dd112791e872e3ff4a2e626aee8059ee17f06f" dependencies = [ - "cranelift-entity", + "cranelift-entity 0.95.1", "serde", "thiserror", "wasmparser 0.102.0", ] +[[package]] +name = "wasmtime-types" +version = "10.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb7c138f797192f46afdd3ec16f85ef007c3bb45fa8e5174031f17b0be4c4a" +dependencies = [ + "cranelift-entity 0.97.1", + "serde", + "thiserror", + "wasmparser 0.107.0", +] + [[package]] name = "wasmtime-wit-bindgen" version = "8.0.1" @@ -7628,7 +7893,7 @@ dependencies = [ "lazy_static", "libc", "log", - "nix", + "nix 0.24.3", "rand 0.8.5", "thiserror", "tokio", @@ -7649,24 +7914,24 @@ checksum = "17882f045410753661207383517a6f62ec3dbeb6a4ed2acce01f0728238d1983" [[package]] name = "wiggle" -version = "8.0.1" +version = "10.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b16a7462893c46c6d3dd2a1f99925953bdbb921080606e1a4c9344864492fa4" +checksum = "ea93d31f59f2b2fa4196990b684771500072d385eaac12587c63db2bc185d705" dependencies = [ "anyhow", "async-trait", "bitflags 1.3.2", "thiserror", "tracing", - "wasmtime", + "wasmtime 10.0.1", "wiggle-macro", ] [[package]] name = "wiggle-generate" -version = "8.0.1" +version = "10.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "489499e186ab24c8ac6d89e9934c54ced6f19bd473730e6a74f533bd67ecd905" +checksum = "7df96ee6bea595fabf0346c08c553f684b08e88fad6fdb125e6efde047024f7b" dependencies = [ "anyhow", "heck", @@ -7679,9 +7944,9 @@ dependencies = [ [[package]] name = "wiggle-macro" -version = "8.0.1" +version = "10.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9142e7fce24a4344c85a43c8b719ef434fc6155223bade553e186cb4183b6cc" +checksum = "8649011a011ecca6197c4db6ee630735062ba20595ea56ce58529b3b1c20aa2f" dependencies = [ "proc-macro2", "quote", @@ -8009,7 +8274,7 @@ checksum = "42c131da5d2ba7746908e1401d474640371c31ad05281528c2a9e945a87d19be" dependencies = [ "anyhow", "proc-macro2", - "syn 2.0.16", + "syn 2.0.23", "wit-bindgen-core", "wit-bindgen-rust", "wit-component", @@ -8207,7 +8472,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d1a62b88..499b9bbb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,10 +23,12 @@ byte-unit = { version = "4.0", default-features = false } enum-assoc = " 1.1" enum-as-inner = "0.6" libipld = { version = "0.16", features = ["serde-codec"] } +rand = "0.8" serde_ipld_dagcbor = "0.3" thiserror = "1.0" -tokio = { version = "1.28", features = ["fs", "io-util", "io-std", "macros", "rt", "rt-multi-thread", "tracing"] } +tokio = { version = "1.29", features = ["fs", "io-util", "io-std", "macros", "rt", "rt-multi-thread", "signal", "tracing"] } tracing = "0.1" +ucan = "0.4" # Speedup build on macOS # See https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#splitting-debug-information diff --git a/flake.lock b/flake.lock index 5a7c0391..0dd70562 100644 --- a/flake.lock +++ b/flake.lock @@ -5,11 +5,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1685518550, - "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=", + "lastModified": 1687709756, + "narHash": "sha256-Y5wKlQSkgEK2weWdOu4J3riRd+kV/VCgHsqLNTTWQ/0=", "owner": "numtide", "repo": "flake-utils", - "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef", + "rev": "dbabf0ca0c0c4bce6ea5eaf65af5cb694d2082c7", "type": "github" }, "original": { @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1686331006, - "narHash": "sha256-hElRDWUNG655aqF0awu+h5cmDN+I/dQcChRt2tGuGGU=", + "lastModified": 1688389917, + "narHash": "sha256-RKiK1QeommEsjQ8fLgxt4831x9O6n2gD7wAhVZTrr8M=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "85bcb95aa83be667e562e781e9d186c57a07d757", + "rev": "aed4b19d312525ae7ca9bceb4e1efe3357d0e2eb", "type": "github" }, "original": { @@ -50,11 +50,11 @@ ] }, "locked": { - "lastModified": 1686537156, - "narHash": "sha256-mJD80brS6h6P4jzwdKID0S9RvfyiruxgJbXvPPIDqF0=", + "lastModified": 1688438033, + "narHash": "sha256-wOmpZis06pVKTR+5meGwhrW10/buf98lnA26uQLaqek=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "e75da5cfc7da874401decaa88f4ccb3b4d64d20d", + "rev": "c3e43223dece545cfe06ddd92fd782adc73d56c3", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index b1f69717..35bc4f6d 100644 --- a/flake.nix +++ b/flake.nix @@ -102,7 +102,7 @@ && cargo test --doc --all-features" ''; - xFuncTestNoDefault = pkgs.writeScriptBin "x-test-all" '' + xFuncTestNoDefault = pkgs.writeScriptBin "x-test-0" '' cargo watch -c -s "cargo nextest run --no-default-features --nocapture \ && cargo test --doc --no-default-features" ''; @@ -160,7 +160,6 @@ nightly-rustfmt rust-toolchain rust-analyzer - rustup pkg-config pre-commit protobuf diff --git a/homestar-core/Cargo.toml b/homestar-core/Cargo.toml index fe385557..7a4e9e21 100644 --- a/homestar-core/Cargo.toml +++ b/homestar-core/Cargo.toml @@ -31,13 +31,14 @@ indexmap = "2.0" libipld = { workspace = true } libsqlite3-sys = { version = "0.26", features = ["bundled"] } proptest = { version = "1.2", optional = true } +rand = { workspace = true } serde = { version = "1.0", features = ["derive"] } signature = "2.0" thiserror = { workspace = true } tracing = { workspace = true } -ucan = "0.3" +ucan = { workspace = true } url = { version = "2.3", features = ["serde"] } -uuid = { version = "1.3", default-features = false, features = ["v4", "fast-rng"] } +uuid = { version = "1.4", default-features = false, features = ["v4", "fast-rng"] } xid = "1.0" [dev-dependencies] diff --git a/homestar-core/src/test_utils/cid.rs b/homestar-core/src/test_utils/cid.rs new file mode 100644 index 00000000..9c2076f3 --- /dev/null +++ b/homestar-core/src/test_utils/cid.rs @@ -0,0 +1,23 @@ +//! CID test and generation utilities. + +use libipld::{Cid, Multihash}; +use rand::RngCore; + +fn get_random_bytes(rng: &mut impl RngCore) -> [u8; N] { + let mut bytes = [0u8; N]; + rng.fill_bytes(&mut bytes); + bytes +} + +/// Generate a random [Cid] with a `0x55` prefix. +pub fn generate_cid(rng: &mut impl RngCore) -> Cid { + let bytes = { + let mut tmp = [0u8; 10]; + let (a, b) = tmp.split_at_mut(2); + a.copy_from_slice(&[0x55, 0x08]); + b.copy_from_slice(&get_random_bytes::<8>(rng)); + tmp + }; + + Cid::new_v1(0x55, Multihash::from_bytes(&bytes).unwrap()) +} diff --git a/homestar-core/src/test_utils/mod.rs b/homestar-core/src/test_utils/mod.rs index 991f3bab..cb6162ab 100644 --- a/homestar-core/src/test_utils/mod.rs +++ b/homestar-core/src/test_utils/mod.rs @@ -1,5 +1,7 @@ //! Test Utilities. +#[cfg(feature = "test_utils")] +pub mod cid; /// Random value generator for sampling data. #[cfg(feature = "test_utils")] mod rvg; diff --git a/homestar-core/src/workflow/input.rs b/homestar-core/src/workflow/input.rs index 28711e5f..4fc83d55 100644 --- a/homestar-core/src/workflow/input.rs +++ b/homestar-core/src/workflow/input.rs @@ -67,9 +67,8 @@ where /// [resolving Ipld links]: resolve_links pub fn resolve(self, lookup_fn: F) -> Result where - F: Fn(Cid) -> Result, ResolveError> + Clone, + F: FnMut(Cid) -> Result, ResolveError> + Clone, Ipld: From, - T: Clone, { let inputs = resolve_args(self.0, lookup_fn); Ok(Args(inputs)) @@ -145,9 +144,9 @@ impl Input { /// [awaited promises]: Await /// [inputs]: Input /// [resolving Ipld links]: resolve_links - pub fn resolve(self, lookup_fn: F) -> Input + pub fn resolve(self, mut lookup_fn: F) -> Input where - F: Fn(Cid) -> Result, ResolveError> + Clone, + F: FnMut(Cid) -> Result, ResolveError> + Clone, Ipld: From, { match self { @@ -232,7 +231,7 @@ where fn resolve_args(args: Vec>, lookup_fn: F) -> Vec> where - F: Fn(Cid) -> Result, ResolveError> + Clone, + F: FnMut(Cid) -> Result, ResolveError> + Clone, Ipld: From, { let args = args.into_iter().map(|v| v.resolve(lookup_fn.clone())); @@ -242,9 +241,9 @@ where /// Resolve [awaited promises] for *only* [Ipld] data, given a lookup function. /// /// [awaited promises]: Await -pub fn resolve_links(ipld: Ipld, lookup_fn: F) -> Ipld +pub fn resolve_links(ipld: Ipld, mut lookup_fn: F) -> Ipld where - F: Fn(Cid) -> Result, ResolveError> + Clone, + F: FnMut(Cid) -> Result, ResolveError> + Clone, Ipld: From, { match ipld { diff --git a/homestar-core/src/workflow/receipt/metadata.rs b/homestar-core/src/workflow/receipt/metadata.rs index ff72d9ae..3698b87d 100644 --- a/homestar-core/src/workflow/receipt/metadata.rs +++ b/homestar-core/src/workflow/receipt/metadata.rs @@ -1,4 +1,6 @@ -//! Metadata related to [Receipt]s. +//! Metadata related to [receipts]. +//! +//! [receipts]: crate::workflow::Receipt /// Metadata key for an operation or function name. pub const OP_KEY: &str = "op"; diff --git a/homestar-runtime/Cargo.toml b/homestar-runtime/Cargo.toml index 2465f29f..babc13b5 100644 --- a/homestar-runtime/Cargo.toml +++ b/homestar-runtime/Cargo.toml @@ -31,7 +31,7 @@ ansi_term = { version = "0.12", optional = true, default-features = false } # https://github.com/DevinR528/cargo-sort/issues/47 anyhow = { workspace = true } async-trait = "0.1" -axum = { version = "0.6", features = ["ws", "headers"] } +axum = { version = "0.6", features = ["ws", "headers"], optional = true } byte-unit = { workspace = true } clap = { version = "4.3", features = ["derive", "color", "help"] } concat-in-place = "1.1" @@ -39,9 +39,12 @@ config = "0.13" console-subscriber = { version = "0.1", default-features = false, features = [ "parking_lot" ], optional = true } crossbeam = "0.8" dagga = "0.2" +dashmap = "5.4" diesel = { version = "2.1", features = ["sqlite", "r2d2", "returning_clauses_for_sqlite_3_35"] } dotenvy = "0.15" enum-assoc = { workspace = true } +evmap = "10.0" +fnv = "1.0" futures = "0.3" headers = "0.3" homestar-core = { version = "0.1", path = "../homestar-core" } @@ -51,7 +54,6 @@ http-serde = "1.1" indexmap = "2.0" ipfs-api = { version = "0.17", optional = true } ipfs-api-backend-hyper = { version = "0.6", features = ["with-builder", "with-send-sync"], optional = true } -itertools = "0.10" libipld = { workspace = true } libp2p = { version = "0.51", default-features = false, features = ["kad", "request-response", "macros", "identify", "mdns", "gossipsub", "tokio", "dns", "mplex", "tcp", "noise", "yamux", "websocket", "ed25519", "secp256k1"] } libsqlite3-sys = { version = "0.26", features = ["bundled"] } @@ -78,14 +80,17 @@ criterion = "0.5" diesel_migrations = "2.1" homestar-core = { version = "0.1", path = "../homestar-core", features = [ "test_utils" ] } json = "0.12" +nix = "0.26" +rand = { workspace = true } tokio-tungstenite = "0.19" [features] -default = ["ipfs"] +default = ["ipfs", "websocket-server"] ansi-logs = ["ansi_term"] console = ["console-subscriber"] ipfs = ["ipfs-api", "ipfs-api-backend-hyper"] test_utils = ["proptest"] +websocket-server = ["axum"] [package.metadata.docs.rs] all-features = true diff --git a/homestar-runtime/fixtures/settings.toml b/homestar-runtime/fixtures/settings.toml new file mode 100644 index 00000000..7f471e51 --- /dev/null +++ b/homestar-runtime/fixtures/settings.toml @@ -0,0 +1,8 @@ +[monitoring] +process_collector_interval = 10 + +[node] + +[node.network] +events_buffer_len = 1000 +websocket_port = 9999 diff --git a/homestar-runtime/src/cli.rs b/homestar-runtime/src/cli.rs index 4bbbc416..feabaee9 100644 --- a/homestar-runtime/src/cli.rs +++ b/homestar-runtime/src/cli.rs @@ -13,24 +13,24 @@ USAGE: /// CLI arguments. #[derive(Parser, Debug)] #[command(author, version, about, long_about = None, help_template = HELP_TEMPLATE)] -pub struct Args { - /// Ipvm-specific [Argument]. +pub struct Cli { + /// TODO + #[arg( + short = 'c', + long = "config", + value_name = "CONFIG", + help = "runtime configuration file" + )] + pub runtime_config: Option, + + /// Homestar [Command]. #[clap(subcommand)] - pub argument: Argument, + pub command: Command, } /// CLI Argument types. #[derive(Debug, Parser)] -pub enum Argument { - /// Run a workflow given a file. - Run { - /// Configuration file for *homestar* node settings. - #[arg( - short = 'c', - long = "config", - value_name = "CONFIG", - help = "runtime configuration file" - )] - runtime_config: Option, - }, +pub enum Command { + /// Start the Runtime with the Homestar runner. + Start, } diff --git a/homestar-runtime/src/db.rs b/homestar-runtime/src/db.rs index cb3a79ed..db92ce04 100644 --- a/homestar-runtime/src/db.rs +++ b/homestar-runtime/src/db.rs @@ -69,7 +69,7 @@ impl Db { /// /// [pool]: Pool /// [connection]: Connection -pub trait Database { +pub trait Database: Send + Clone { /// Establish a pooled connection to Sqlite database. fn setup_connection_pool(settings: &settings::Node) -> Result where diff --git a/homestar-runtime/src/event_handler.rs b/homestar-runtime/src/event_handler.rs new file mode 100644 index 00000000..673bd287 --- /dev/null +++ b/homestar-runtime/src/event_handler.rs @@ -0,0 +1,106 @@ +//! [EventHandler] implementation for handling network events and messages. + +#[cfg(feature = "ipfs")] +use crate::network::IpfsCli; +use crate::{db::Database, network::swarm::ComposedBehaviour, settings}; +use anyhow::Result; +use async_trait::async_trait; +use fnv::FnvHashMap; +use libp2p::{futures::StreamExt, kad::QueryId, swarm::Swarm}; +use std::sync::Arc; +use tokio::{select, sync::mpsc}; + +pub(crate) mod channel; +pub(crate) mod event; +pub(crate) mod swarm_event; + +pub(crate) use event::Event; + +type P2PSender = channel::BoundedChannelSender; + +#[async_trait] +pub(crate) trait Handler +where + DB: Database, +{ + #[cfg(not(feature = "ipfs"))] + async fn handle_event(self, event_loop: &mut EventHandler); + #[cfg(feature = "ipfs")] + async fn handle_event(self, event_handler: &mut EventHandler, ipfs: IpfsCli); +} + +/// Event loop handler for [libp2p] network events and commands. +#[allow(dead_code)] +#[allow(missing_debug_implementations)] +pub struct EventHandler { + db: DB, + sender: Arc>, + receiver: mpsc::Receiver, + receipt_quorum: usize, + swarm: Swarm, + workflow_quorum: usize, + worker_swarm_senders: FnvHashMap, +} + +impl EventHandler +where + DB: Database, +{ + fn setup_channel(settings: &settings::Node) -> (mpsc::Sender, mpsc::Receiver) { + mpsc::channel(settings.network.events_buffer_len) + } + + /// Create an [EventHandler] with channel sender/receiver defaults. + pub(crate) fn new(swarm: Swarm, db: DB, settings: &settings::Node) -> Self { + let (sender, receiver) = Self::setup_channel(settings); + Self { + db, + sender: Arc::new(sender), + receiver, + receipt_quorum: settings.network.receipt_quorum, + swarm, + workflow_quorum: settings.network.workflow_quorum, + worker_swarm_senders: FnvHashMap::default(), + } + } + + /// Sequence for shutting down [EventHandler]. + pub(crate) async fn shutdown(&mut self) { + self.receiver.close(); + self.sender.closed().await + } + + /// Get a [Arc]'ed copy of the [EventHandler] channel sender. + pub(crate) fn sender(&self) -> Arc> { + self.sender.clone() + } + + /// Start [EventHandler] that matches on swarm and pubsub [events]. + /// + /// [events]: libp2p::swarm::SwarmEvent + #[cfg(not(feature = "ipfs"))] + pub(crate) async fn start(mut self) -> Result<()> { + loop { + select! { + swarm_event = self.swarm.select_next_some() => + swarm_event.handle_event(&mut self).await, + runtime_event = self.receiver.recv() => + if let Some(ev) = runtime_event { ev.handle_event(&mut self).await }, + } + } + } + /// Start [EventHandler] that matches on swarm and pubsub [events]. + /// + /// [events]: libp2p::swarm::SwarmEvent + #[cfg(feature = "ipfs")] + pub(crate) async fn start(mut self, ipfs: IpfsCli) -> Result<()> { + loop { + select! { + swarm_event = self.swarm.select_next_some() => + swarm_event.handle_event(&mut self, ipfs.clone()).await, + runtime_event = self.receiver.recv() => + if let Some(ev) = runtime_event { ev.handle_event(&mut self, ipfs.clone()).await }, + } + } + } +} diff --git a/homestar-runtime/src/event_handler/channel.rs b/homestar-runtime/src/event_handler/channel.rs new file mode 100644 index 00000000..ca2b8392 --- /dev/null +++ b/homestar-runtime/src/event_handler/channel.rs @@ -0,0 +1,31 @@ +//! Wrapper around [crossbeam::channel] to provide a common interface for +//! bounded and non-tokio "oneshot" channels. + +use crossbeam::channel; + +/// Sender for a bounded [crossbeam::channel]. +pub(crate) type BoundedChannelSender = channel::Sender; +/// Receiver for a bounded [crossbeam::channel]. +#[allow(dead_code)] +pub(crate) type BoundedChannelReceiver = channel::Receiver; + +/// A bounded [crossbeam::channel] with a sender and receiver. +#[derive(Debug, Clone)] +pub(crate) struct BoundedChannel { + pub(crate) tx: channel::Sender, + pub(crate) rx: channel::Receiver, +} + +impl BoundedChannel { + /// Create a new [BoundedChannel] with a given capacity. + pub(crate) fn new(capacity: usize) -> Self { + let (tx, rx) = channel::bounded(capacity); + Self { tx, rx } + } + + /// Create a oneshot (1) [BoundedChannel]. + pub(crate) fn oneshot() -> Self { + let (tx, rx) = channel::bounded(1); + Self { tx, rx } + } +} diff --git a/homestar-runtime/src/event_handler/event.rs b/homestar-runtime/src/event_handler/event.rs new file mode 100644 index 00000000..f76d3833 --- /dev/null +++ b/homestar-runtime/src/event_handler/event.rs @@ -0,0 +1,232 @@ +#[cfg(feature = "ipfs")] +use crate::network::IpfsCli; +use crate::{ + db::{Connection, Database, Db}, + event_handler::{Handler, P2PSender}, + network::{pubsub, swarm::TopicMessage}, + workflow, EventHandler, Receipt, +}; +use anyhow::{anyhow, Result}; +use async_trait::async_trait; +use homestar_core::workflow::Receipt as InvocationReceipt; +use libipld::Cid; +use libp2p::kad::{record::Key, Quorum, Record}; +use std::{num::NonZeroUsize, sync::Arc}; +use tokio::sync::oneshot; +use tracing::{error, info}; + +/// A [Receipt] captured (inner) event. +#[derive(Debug, Clone)] +pub struct Captured { + pub(crate) receipt: Receipt, + pub(crate) workflow: Arc, +} + +/// A structured query for finding a [Record] in the DHT and +/// returning to a [P2PSender]. +#[derive(Debug, Clone)] +pub struct QueryRecord { + pub(crate) cid: Cid, + pub(crate) sender: P2PSender, +} + +/// Internal events to capture. +#[derive(Debug)] +pub enum Event { + /// [Receipt] captured event. + CapturedReceipt(Captured), + /// General shutdown event. + Shutdown(oneshot::Sender<()>), + /// Find a [Record] in the DHT, e.g. a [Receipt]. + /// + /// [Record]: libp2p::kad::Record + /// [Receipt]: homestar_core::workflow::Receipt + FindRecord(QueryRecord), + /// TODO + RemoveRecord(QueryRecord), +} + +impl Event { + async fn handle_info(self, event_handler: &mut EventHandler) -> Result<()> + where + DB: Database, + { + match self { + Event::CapturedReceipt(captured) => { + let mut conn = event_handler.db.conn()?; + let (cid, _bytes) = captured.store(event_handler, &mut conn)?; + info!( + cid = cid.to_string(), + "record replicated with quorum {}", event_handler.receipt_quorum + ); + } + Event::Shutdown(tx) => { + event_handler.shutdown().await; + let _ = tx.send(()); + } + Event::FindRecord(record) => record.find(event_handler), + Event::RemoveRecord(record) => record.remove(event_handler), + } + Ok(()) + } +} + +impl Captured { + /// `Captured` structure, containing a [Receipt] and [workflow::Info]. + pub fn with(receipt: Receipt, workflow: Arc) -> Self { + Self { receipt, workflow } + } + + fn store( + mut self, + event_handler: &mut EventHandler, + conn: &mut Connection, + ) -> Result<(Cid, Vec)> + where + DB: Database, + { + let receipt_cid = self.receipt.cid(); + let invocation_receipt = InvocationReceipt::from(&self.receipt); + let instruction_bytes = self.receipt.instruction_cid_as_bytes(); + match event_handler.swarm.behaviour_mut().gossip_publish( + pubsub::RECEIPTS_TOPIC, + TopicMessage::CapturedReceipt(self.receipt), + ) { + Ok(msg_id) => info!( + "message {msg_id} published on {} for receipt with cid: {receipt_cid}", + pubsub::RECEIPTS_TOPIC + ), + Err(err) => { + error!( + error=?err, "message not published on {} for receipt with cid: {receipt_cid}", + pubsub::RECEIPTS_TOPIC + ) + } + } + + let receipt_quorum = if event_handler.receipt_quorum > 0 { + unsafe { Quorum::N(NonZeroUsize::new_unchecked(event_handler.receipt_quorum)) } + } else { + Quorum::One + }; + + let workflow_quorum = if event_handler.workflow_quorum > 0 { + unsafe { Quorum::N(NonZeroUsize::new_unchecked(event_handler.receipt_quorum)) } + } else { + Quorum::One + }; + + if let Ok(receipt_bytes) = Receipt::invocation_capsule(invocation_receipt) { + let _id = event_handler + .swarm + .behaviour_mut() + .kademlia + .put_record( + Record::new(instruction_bytes, receipt_bytes.to_vec()), + receipt_quorum, + ) + .map_err(anyhow::Error::msg)?; + + // Store workflow_receipt join information. + let _ = Db::store_workflow_receipt(self.workflow.cid, receipt_cid, conn); + Arc::make_mut(&mut self.workflow).increment_progress(receipt_cid); + + let wf_cid_bytes = self.workflow.cid_as_bytes(); + let wf_bytes = self.workflow.capsule()?; + + let _id = event_handler + .swarm + .behaviour_mut() + .kademlia + .put_record(Record::new(wf_cid_bytes, wf_bytes), workflow_quorum) + .map_err(anyhow::Error::msg)?; + + // TODO: Handle Workflow Complete / Num of Tasks finished. + + Ok((receipt_cid, receipt_bytes.to_vec())) + } else { + Err(anyhow!("cannot convert receipt {receipt_cid} to bytes")) + } + } +} + +impl QueryRecord { + /// Create a new [QueryRecord] with a [Cid] and [P2PSender]. + pub fn with(cid: Cid, sender: P2PSender) -> Self { + Self { cid, sender } + } + + fn find(self, event_handler: &mut EventHandler) + where + DB: Database, + { + let id = event_handler + .swarm + .behaviour_mut() + .kademlia + .get_record(Key::new(&self.cid.to_bytes())); + event_handler.worker_swarm_senders.insert(id, self.sender); + } + + fn remove(self, event_handler: &mut EventHandler) + where + DB: Database, + { + event_handler + .swarm + .behaviour_mut() + .kademlia + .remove_record(&Key::new(&self.cid.to_bytes())); + } +} + +#[async_trait] +impl Handler<(), DB> for Event +where + DB: Database, +{ + #[cfg(not(feature = "ipfs"))] + async fn handle_event(self, event_handler: &mut EventHandler) { + if let Err(err) = self.handle_info(event_handler).await { + error!(error=?err, "error storing event") + } + } + + #[cfg(feature = "ipfs")] + async fn handle_event(self, event_handler: &mut EventHandler, ipfs: IpfsCli) { + match self { + Event::CapturedReceipt(captured) => { + if let Err(err) = event_handler.db.conn().map(|mut conn| { + captured.store(event_handler, &mut conn).map(|(cid, bytes)| { + info!( + cid = cid.to_string(), + "record replicated with quorum {}", event_handler.receipt_quorum + ); + + // Spawn client call in background, without awaiting. + tokio::spawn(async move { + match ipfs.put_receipt_bytes(bytes.to_vec()).await { + Ok(put_cid) => { + info!(cid = put_cid, "IPLD DAG node stored"); + + #[cfg(debug_assertions)] + debug_assert_eq!(put_cid, cid.to_string()); + } + Err(err) => { + info!(error=?err, cid=cid.to_string(), "Failed to store IPLD DAG node") + } + } + }); + }) + }) { + error!(error=?err, "error storing event") + } + } + event => { + if let Err(err) = event.handle_info(event_handler).await { + error!(error=?err, "error storing event") + } + } + } + } +} diff --git a/homestar-runtime/src/event_handler/swarm_event.rs b/homestar-runtime/src/event_handler/swarm_event.rs new file mode 100644 index 00000000..ad649c07 --- /dev/null +++ b/homestar-runtime/src/event_handler/swarm_event.rs @@ -0,0 +1,282 @@ +#[cfg(feature = "ipfs")] +use crate::network::IpfsCli; +use crate::{ + db::Database, + event_handler::Handler, + network::swarm::ComposedEvent, + receipt::{RECEIPT_TAG, VERSION_KEY}, + workflow, + workflow::WORKFLOW_TAG, + Db, EventHandler, Receipt, +}; +use anyhow::{anyhow, Result}; +use async_trait::async_trait; +use homestar_core::{ + consts, + workflow::{Pointer, Receipt as InvocationReceipt}, +}; +use libipld::{Cid, Ipld}; +use libp2p::{ + gossipsub, + kad::{ + AddProviderOk, BootstrapOk, GetProvidersOk, GetRecordOk, KademliaEvent, PeerRecord, + PutRecordOk, QueryResult, + }, + mdns, + multiaddr::Protocol, + swarm::SwarmEvent, +}; +use std::fmt; +use tracing::{debug, error, info}; + +/// Internal events within the [SwarmEvent] context related to finding results +/// on the DHT. +#[derive(Debug, Clone, PartialEq)] +pub enum FoundEvent { + /// Found [Receipt] on the DHT. + Receipt(Receipt), + /// Found [workflow::Info] on the DHT. + Workflow(workflow::Info), +} + +/// Trait for handling [PeerRecord]s found on the DHT. +pub(crate) trait FoundRecord { + fn found_record(&self) -> Result; +} + +impl FoundRecord for PeerRecord { + fn found_record(&self) -> Result { + let key_cid = Cid::try_from(self.record.key.as_ref())?; + match serde_ipld_dagcbor::de::from_reader(&*self.record.value) { + Ok(Ipld::Map(mut map)) => match map.pop_first() { + Some((code, Ipld::Map(mut rest))) if code == RECEIPT_TAG => { + if rest.remove(VERSION_KEY) + == Some(Ipld::String(consts::INVOCATION_VERSION.to_string())) + { + let invocation_receipt = InvocationReceipt::try_from(Ipld::Map(rest))?; + let receipt = + Receipt::try_with(Pointer::new(key_cid), &invocation_receipt)?; + Ok(FoundEvent::Receipt(receipt)) + } else { + Err(anyhow!( + "record version mismatch, current version: {}", + consts::INVOCATION_VERSION + )) + } + } + Some((code, Ipld::Map(rest))) if code == WORKFLOW_TAG => { + let workflow_info = workflow::Info::try_from(Ipld::Map(rest))?; + Ok(FoundEvent::Workflow(workflow_info)) + } + Some((code, _)) => Err(anyhow!("decode mismatch: {code} is not known")), + None => Err(anyhow!("invalid record value")), + }, + Ok(ipld) => Err(anyhow!( + "decode mismatch: expected an Ipld map, got {ipld:#?}", + )), + Err(err) => { + error!(error=?err, "error deserializing record value"); + Err(anyhow!("error deserializing record value")) + } + } + } +} + +#[async_trait] +impl Handler for SwarmEvent +where + THandlerErr: fmt::Debug + Send, + DB: Database, +{ + #[cfg(feature = "ipfs")] + async fn handle_event(self, event_handler: &mut EventHandler, _ipfs: IpfsCli) { + handle_swarm_event(self, event_handler).await + } + + #[cfg(not(feature = "ipfs"))] + async fn handle_event(self, event_handler: &mut EventHandler) { + handle_swarm_event(self, event_handler).await + } +} + +async fn handle_swarm_event( + event: SwarmEvent, + event_handler: &mut EventHandler, +) { + match event { + SwarmEvent::Behaviour(ComposedEvent::Gossipsub(gossipsub::Event::Message { + message, + propagation_source, + message_id, + })) => match Receipt::try_from(message.data) { + Ok(receipt) => { + info!( + "got message: {receipt} from {propagation_source} with message id: {message_id}" + ); + + // Store gossiped receipt. + let _ = event_handler + .db + .conn() + .as_mut() + .map(|conn| Db::store_receipt(receipt, conn)); + } + Err(err) => info!(err=?err, "cannot handle incoming event message"), + }, + SwarmEvent::Behaviour(ComposedEvent::Gossipsub(gossipsub::Event::Subscribed { + peer_id, + topic, + })) => { + debug!("{peer_id} subscribed to topic {topic} over gossipsub") + } + SwarmEvent::Behaviour(ComposedEvent::Gossipsub(_)) => {} + SwarmEvent::Behaviour(ComposedEvent::Kademlia( + KademliaEvent::OutboundQueryProgressed { id, result, .. }, + )) => match result { + QueryResult::Bootstrap(Ok(BootstrapOk { peer, .. })) => { + debug!("successfully bootstrapped peer: {peer}") + } + QueryResult::GetProviders(Ok(GetProvidersOk::FoundProviders { + key, + providers, + .. + })) => { + for peer in providers { + debug!("peer {peer} provides key: {key:#?}"); + } + } + QueryResult::GetProviders(Err(err)) => { + error!("error retrieving outbound query providers: {err}") + } + QueryResult::GetRecord(Ok(GetRecordOk::FoundRecord(peer_record))) => { + debug!( + "found record {:#?}, published by {:?}", + peer_record.record.key, peer_record.record.publisher + ); + match peer_record.found_record() { + Ok(event) => { + info!("event: {event:#?}"); + if let Some(sender) = event_handler.worker_swarm_senders.remove(&id) { + let _ = sender.send(event); + } else { + error!("error converting key {:#?} to cid", peer_record.record.key) + } + } + Err(err) => error!(err=?err, "error retrieving record"), + } + } + QueryResult::GetRecord(Ok(_)) => {} + QueryResult::GetRecord(Err(err)) => { + error!("error retrieving record: {err}"); + } + QueryResult::PutRecord(Ok(PutRecordOk { key })) => { + debug!("successfully put record {key:#?}"); + } + QueryResult::PutRecord(Err(err)) => { + error!("error putting record: {err}") + } + QueryResult::StartProviding(Ok(AddProviderOk { key })) => { + debug!("successfully put provider record {key:#?}"); + } + QueryResult::StartProviding(Err(err)) => { + error!("error putting provider record: {err}"); + } + _ => {} + }, + SwarmEvent::Behaviour(ComposedEvent::Mdns(mdns::Event::Discovered(list))) => { + for (peer_id, _multiaddr) in list { + info!("mDNS discovered a new peer: {peer_id}"); + + event_handler + .swarm + .behaviour_mut() + .gossipsub + .add_explicit_peer(&peer_id); + } + } + SwarmEvent::Behaviour(ComposedEvent::Mdns(mdns::Event::Expired(list))) => { + for (peer_id, _multiaddr) in list { + info!("mDNS discover peer has expired: {peer_id}"); + + event_handler + .swarm + .behaviour_mut() + .gossipsub + .remove_explicit_peer(&peer_id); + } + } + SwarmEvent::NewListenAddr { address, .. } => { + let local_peer_id = *event_handler.swarm.local_peer_id(); + info!( + "local node is listening on {:?}", + address.with(Protocol::P2p(local_peer_id.into())) + ); + } + SwarmEvent::IncomingConnection { .. } => {} + _ => {} + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{test_utils, workflow}; + use homestar_core::{ + ipld::DagCbor, + test_utils::workflow as workflow_test_utils, + workflow::{config::Resources, instruction::RunInstruction, prf::UcanPrf, Task}, + Workflow, + }; + use homestar_wasm::io::Arg; + use libp2p::{kad::Record, PeerId}; + + #[test] + fn found_receipt_record() { + let (invocation_receipt, receipt) = test_utils::receipt::receipts(); + let instruction_bytes = receipt.instruction_cid_as_bytes(); + let bytes = Receipt::invocation_capsule(invocation_receipt).unwrap(); + let record = Record::new(instruction_bytes, bytes); + let peer_record = PeerRecord { + record, + peer: Some(PeerId::random()), + }; + if let FoundEvent::Receipt(found_receipt) = peer_record.found_record().unwrap() { + assert_eq!(found_receipt, receipt); + } else { + panic!("Incorrect event type") + } + } + + #[test] + fn found_workflow_record() { + let config = Resources::default(); + let (instruction1, instruction2, _) = + workflow_test_utils::related_wasm_instructions::(); + let task1 = Task::new( + RunInstruction::Expanded(instruction1.clone()), + config.clone().into(), + UcanPrf::default(), + ); + let task2 = Task::new( + RunInstruction::Expanded(instruction2), + config.into(), + UcanPrf::default(), + ); + + let workflow = Workflow::new(vec![task1.clone(), task2.clone()]); + let workflow_info = + workflow::Info::default(workflow.clone().to_cid().unwrap(), workflow.len()); + let workflow_cid_bytes = workflow_info.cid_as_bytes(); + let bytes = workflow_info.capsule().unwrap(); + let record = Record::new(workflow_cid_bytes, bytes); + let peer_record = PeerRecord { + record, + peer: Some(PeerId::random()), + }; + if let FoundEvent::Workflow(found_workflow) = peer_record.found_record().unwrap() { + assert_eq!(found_workflow, workflow_info); + } else { + panic!("Incorrect event type") + } + } +} diff --git a/homestar-runtime/src/lib.rs b/homestar-runtime/src/lib.rs index d054bb31..f7111c74 100644 --- a/homestar-runtime/src/lib.rs +++ b/homestar-runtime/src/lib.rs @@ -19,10 +19,11 @@ pub mod cli; pub mod db; +mod event_handler; pub mod logger; -pub mod network; +mod network; mod receipt; -mod runtime; +pub mod runner; pub mod scheduler; mod settings; pub mod tasks; @@ -30,12 +31,12 @@ mod worker; pub mod workflow; pub use db::Db; -#[cfg(feature = "ipfs")] -pub use network::ipfs::IpfsCli; +pub use event_handler::{event::Event, EventHandler}; +#[cfg(feature = "websocket-server")] +pub use network::ws; pub use receipt::{Receipt, RECEIPT_TAG, VERSION_KEY}; -pub use runtime::*; +pub use runner::Runner; pub use settings::Settings; -pub use worker::Worker; /// Test utilities. #[cfg(any(test, feature = "test_utils"))] diff --git a/homestar-runtime/src/logger.rs b/homestar-runtime/src/logger.rs index 9c63aad0..0580ad43 100644 --- a/homestar-runtime/src/logger.rs +++ b/homestar-runtime/src/logger.rs @@ -18,7 +18,6 @@ pub fn init(writer: tracing_appender::non_blocking::NonBlocking) -> Result<()> { let filter = EnvFilter::try_from_default_env() .unwrap_or_else(|_| { EnvFilter::new("info") - .add_directive("atuin_client=warn".parse().expect(DIRECTIVE_EXPECT)) .add_directive("libp2p=info".parse().expect(DIRECTIVE_EXPECT)) .add_directive( "libp2p_gossipsub::behaviour=debug" @@ -32,7 +31,6 @@ pub fn init(writer: tracing_appender::non_blocking::NonBlocking) -> Result<()> { #[cfg(any(not(feature = "console"), not(tokio_unstable)))] let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| { EnvFilter::new("info") - .add_directive("atuin_client=warn".parse().expect(DIRECTIVE_EXPECT)) .add_directive("libp2p=info".parse().expect(DIRECTIVE_EXPECT)) .add_directive( "libp2p_gossipsub::behaviour=debug" diff --git a/homestar-runtime/src/main.rs b/homestar-runtime/src/main.rs index 90d22412..f137cbd3 100644 --- a/homestar-runtime/src/main.rs +++ b/homestar-runtime/src/main.rs @@ -1,53 +1,89 @@ use anyhow::Result; use clap::Parser; -#[cfg(feature = "ipfs")] -use homestar_runtime::network::ipfs::IpfsCli; +#[cfg(feature = "websocket-server")] +use homestar_runtime::ws; use homestar_runtime::{ - cli::{Args, Argument}, - db::{Database, Db}, - logger, - network::{eventloop::EventLoop, swarm, ws::WebSocket}, - Settings, + cli::{Cli, Command}, + db::Database, + logger, Db, Runner, Settings, }; -use std::sync::Arc; +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, +}; +use tokio::{runtime, select, time}; +use tracing::info; -#[tokio::main(flavor = "multi_thread")] -async fn main() -> Result<()> { +fn main() { let (stdout_writer, _stdout_guard) = tracing_appender::non_blocking(std::io::stdout()); - logger::init(stdout_writer)?; + logger::init(stdout_writer).expect("Failed to initialize logger"); + + let cli = Cli::parse(); + + let settings = if let Some(file) = cli.runtime_config { + Settings::load_from_file(file) + } else { + Settings::load() + } + .expect("Failed to load settings"); - let opts = Args::parse(); + info!("starting with settings: {:?}", settings,); - #[cfg(feature = "ipfs")] - let ipfs = IpfsCli::default(); + let runtime = runtime::Builder::new_multi_thread() + .enable_all() + .thread_name_fn(|| { + static ATOMIC_ID: AtomicUsize = AtomicUsize::new(0); + let id = ATOMIC_ID.fetch_add(1, Ordering::SeqCst); + format!("runtime-{}", id) + }) + .build() + .expect("Failed to start multi-threaded runtime"); - match opts.argument { - Argument::Run { runtime_config } => { - let settings = if let Some(file) = runtime_config { - Settings::load_from_file(file) - } else { - Settings::load() - }?; + let db = Db::setup_connection_pool(settings.node()) + .expect("Failed to setup database connection pool"); - let db = Db::setup_connection_pool(settings.node())?; - let swarm = swarm::new(settings.node()).await?; + match cli.command { + Command::Start => { + runtime + .block_on(runner(Arc::new(settings), db)) + .expect("Failed to run initialization"); + } + } - let (_tx, rx) = EventLoop::setup_channel(settings.node()); - // instantiate and start event-loop for events - let eventloop = EventLoop::new(swarm, rx, settings.node()); + drop(runtime); +} - #[cfg(not(feature = "ipfs"))] - tokio::spawn(eventloop.run(db)); +async fn runner(settings: Arc, db: impl Database + 'static) -> Result<()> { + let mut runner = Runner::start(settings.clone(), db).await?; - #[cfg(feature = "ipfs")] - tokio::spawn(eventloop.run(db, ipfs)); + loop { + select! { + biased; + Ok(_event) = runner.command_receiver() => info!("Connected to the Network"), + _ = Runner::shutdown_signal() => { + info!("gracefully shutting down runner"); + let drain_timeout = time::Instant::now() + settings.node().shutdown_timeout(); - let (ws_tx, ws_rx) = WebSocket::setup_channel(settings.node()); - let ws_sender = Arc::new(ws_tx); - let ws_receiver = Arc::new(ws_rx); - WebSocket::start_server(ws_sender, ws_receiver, settings.node()).await?; + select! { + Ok(()) = runner.shutdown() => { + #[cfg(feature = "websocket-server")] + match runner.ws_receiver().recv() { + Ok(ws::WsMessage::GracefulShutdown) => (), + Err(err) => info!(error=?err, "runner shutdown complete, but with error"), + } + info!("runner shutdown complete"); + break; + }, + _ = time::sleep_until(drain_timeout) => { + info!("shutdown timeout reached, shutting down runner anyway"); + break; + } + } + } } } + //drop(db); + Ok(()) } diff --git a/homestar-runtime/src/network/eventloop.rs b/homestar-runtime/src/network/eventloop.rs deleted file mode 100644 index 1fe235f4..00000000 --- a/homestar-runtime/src/network/eventloop.rs +++ /dev/null @@ -1,499 +0,0 @@ -//! [EventLoop] implementation for handling network events and messages, as well -//! as commands for the running [libp2p] node. - -#[cfg(feature = "ipfs")] -use crate::IpfsCli; -use crate::{ - db::{Connection, Database, Db}, - network::swarm::{ComposedBehaviour, ComposedEvent, TopicMessage}, - receipt::{RECEIPT_TAG, VERSION_KEY}, - settings, workflow, - workflow::WORKFLOW_TAG, - Receipt, -}; -use anyhow::{anyhow, Result}; -use crossbeam::channel; -use homestar_core::{ - consts, - workflow::{Pointer, Receipt as InvocationReceipt}, -}; -use libipld::{Cid, Ipld}; -use libp2p::{ - futures::StreamExt, - gossipsub, - kad::{ - record::Key, AddProviderOk, BootstrapOk, GetProvidersOk, GetRecordOk, KademliaEvent, - PeerRecord, PutRecordOk, QueryId, QueryResult, Quorum, Record, - }, - mdns, - multiaddr::Protocol, - swarm::{Swarm, SwarmEvent}, -}; -use std::{collections::HashMap, fmt, num::NonZeroUsize, str}; -use tokio::sync::mpsc; - -/// [Receipt]-related topic for pub(gossip)sub. -/// -/// [Receipt]: homestar_core::workflow::receipt -pub const RECEIPTS_TOPIC: &str = "receipts"; - -type WorkerSender = channel::Sender<(Cid, FoundEvent)>; - -/// Event loop handler for [libp2p] network events and commands. -#[allow(missing_debug_implementations)] -pub struct EventLoop { - receiver: mpsc::Receiver, - receipt_quorum: usize, - workflow_quorum: usize, - swarm: Swarm, - worker_senders: HashMap, -} - -impl EventLoop { - /// Setup bounded, MPSC channel for runtime to send and receive internal - /// events with workers. - pub fn setup_channel( - settings: &settings::Node, - ) -> (mpsc::Sender, mpsc::Receiver) { - mpsc::channel(settings.network.events_buffer_len) - } - - /// Create an [EventLoop] with channel sender/receiver defaults. - pub fn new( - swarm: Swarm, - receiver: mpsc::Receiver, - settings: &settings::Node, - ) -> Self { - Self { - receiver, - receipt_quorum: settings.network.receipt_quorum, - workflow_quorum: settings.network.workflow_quorum, - worker_senders: HashMap::new(), - swarm, - } - } - - /// Loop and select over swarm and pubsub [events] and client [commands]. - /// - /// [events]: SwarmEvent - #[cfg(not(feature = "ipfs"))] - pub async fn run(mut self, db: Db) -> Result<()> { - loop { - tokio::select! { - swarm_event = self.swarm.select_next_some() => self.handle_event(swarm_event, db.clone()).await, - runtime_event = self.receiver.recv() => if let Some(ev) = runtime_event { self.handle_runtime_event(ev, db.clone()).await }, - } - } - } - - /// Loop and select over swarm and pubsub [events]. - /// - /// [events]: SwarmEvent - #[cfg(feature = "ipfs")] - pub async fn run(mut self, db: Db, ipfs: IpfsCli) -> Result<()> { - loop { - tokio::select! { - swarm_event = self.swarm.select_next_some() => self.handle_event(swarm_event, db.clone()).await, - runtime_event = self.receiver.recv() => if let Some(ev) = runtime_event { self.handle_runtime_event(ev, db.clone(), ipfs.clone()).await }, - } - } - } - - #[cfg(not(feature = "ipfs"))] - async fn handle_runtime_event(&mut self, event: Event, db: impl Database) { - match event { - Event::CapturedReceipt(receipt, workflow_info) => { - if let Ok(conn) = db.conn().as_mut() { - match self.on_capture(receipt, workflow_info, conn) { - Ok((cid, _bytes)) => { - tracing::debug!( - cid = cid.to_string(), - "record replicated with quorum {}", - self.receipt_quorum - ) - } - - Err(err) => { - tracing::error!(error=?err, "error putting record on DHT with quorum {}", self.receipt_quorum) - } - } - } else { - tracing::error!("database connection not available") - } - } - Event::FindReceipt(cid, sender) => self.on_find_receipt(cid, sender), - Event::FindWorkflow(cid, sender) => self.on_find_workflow(cid, sender), - } - } - - #[cfg(feature = "ipfs")] - async fn handle_runtime_event(&mut self, event: Event, db: impl Database, ipfs: IpfsCli) { - match event { - Event::CapturedReceipt(receipt, workflow_info) => { - if let Ok(conn) = db.conn().as_mut() { - match self.on_capture(receipt, workflow_info, conn) { - Ok((cid, bytes)) => { - tracing::debug!( - cid = cid.to_string(), - "record replicated with quorum {}", - self.receipt_quorum - ); - - // Spawn client call in background, without awaiting. - tokio::spawn(async move { - match ipfs.put_receipt_bytes(bytes.to_vec()).await { - Ok(put_cid) => { - tracing::info!(cid = put_cid, "IPLD DAG node stored"); - - #[cfg(debug_assertions)] - debug_assert_eq!(put_cid, cid.to_string()); - } - Err(err) => { - tracing::info!(error=?err, cid=cid.to_string(), "Failed to store IPLD DAG node") - } - } - }); - } - Err(err) => { - tracing::error!(error=?err, "error putting record(s) on DHT with quorum {}", self.receipt_quorum) - } - } - } else { - tracing::error!("database connection not available") - } - } - Event::FindReceipt(cid, sender) => self.on_find_receipt(cid, sender), - Event::FindWorkflow(cid, sender) => self.on_find_workflow(cid, sender), - } - } - - fn on_capture( - &mut self, - receipt: Receipt, - mut workflow_info: workflow::Info, - conn: &mut Connection, - ) -> Result<(Cid, Vec)> { - let receipt_cid = receipt.cid(); - let invocation_receipt = InvocationReceipt::from(&receipt); - let instruction_bytes = receipt.instruction_cid_as_bytes(); - match self.swarm.behaviour_mut() - .gossip_publish(RECEIPTS_TOPIC, TopicMessage::CapturedReceipt(receipt)) { - Ok(msg_id) => - tracing::info!("message {msg_id} published on {RECEIPTS_TOPIC} for receipt with cid: {receipt_cid}"), - Err(err) => tracing::error!(error=?err, "message not published on {RECEIPTS_TOPIC} for receipt with cid: {receipt_cid}") - } - - let receipt_quorum = if self.receipt_quorum > 0 { - unsafe { Quorum::N(NonZeroUsize::new_unchecked(self.receipt_quorum)) } - } else { - Quorum::One - }; - - let workflow_quorum = if self.workflow_quorum > 0 { - unsafe { Quorum::N(NonZeroUsize::new_unchecked(self.receipt_quorum)) } - } else { - Quorum::One - }; - - if let Ok(receipt_bytes) = Receipt::invocation_capsule(invocation_receipt) { - let _id = self - .swarm - .behaviour_mut() - .kademlia - .put_record( - Record::new(instruction_bytes, receipt_bytes.to_vec()), - receipt_quorum, - ) - .map_err(anyhow::Error::msg)?; - - // Store workflow_receipt join information. - let _ = Db::store_workflow_receipt(workflow_info.cid, receipt_cid, conn); - workflow_info.increment_progress(receipt_cid); - - let wf_cid_bytes = workflow_info.cid_as_bytes(); - let wf_bytes = workflow_info.capsule()?; - - let _id = self - .swarm - .behaviour_mut() - .kademlia - .put_record(Record::new(wf_cid_bytes, wf_bytes), workflow_quorum) - .map_err(anyhow::Error::msg)?; - - Ok((receipt_cid, receipt_bytes.to_vec())) - } else { - Err(anyhow!("cannot convert receipt {receipt_cid} to bytes")) - } - } - - fn on_find_receipt(&mut self, instruction_cid: Cid, sender: WorkerSender) { - let id = self - .swarm - .behaviour_mut() - .kademlia - .get_record(Key::new(&instruction_cid.to_bytes())); - self.worker_senders.insert(id, sender); - } - - fn on_find_workflow(&mut self, workflow_cid: Cid, sender: WorkerSender) { - let id = self - .swarm - .behaviour_mut() - .kademlia - .get_record(Key::new(&workflow_cid.to_bytes())); - self.worker_senders.insert(id, sender); - } - - fn on_found_record(key_cid: Cid, value: Vec) -> Result { - match serde_ipld_dagcbor::de::from_reader(&*value) { - Ok(Ipld::Map(mut map)) => match map.pop_first() { - Some((code, Ipld::Map(mut rest))) if code == RECEIPT_TAG => { - if rest.remove(VERSION_KEY) - == Some(Ipld::String(consts::INVOCATION_VERSION.to_string())) - { - let invocation_receipt = InvocationReceipt::try_from(Ipld::Map(rest))?; - let receipt = - Receipt::try_with(Pointer::new(key_cid), &invocation_receipt)?; - Ok(FoundEvent::Receipt(receipt)) - } else { - Err(anyhow!( - "record version mismatch, current version: {}", - consts::INVOCATION_VERSION - )) - } - } - Some((code, Ipld::Map(rest))) if code == WORKFLOW_TAG => { - let workflow_info = workflow::Info::try_from(Ipld::Map(rest))?; - Ok(FoundEvent::Workflow(workflow_info)) - } - Some((code, _)) => Err(anyhow!("decode mismatch: {code} is not known")), - None => Err(anyhow!("invalid record value")), - }, - Ok(ipld) => Err(anyhow!( - "decode mismatch: expected an Ipld map, got {ipld:#?}", - )), - Err(err) => { - tracing::error!(error=?err, "error deserializing record value"); - Err(anyhow!("error deserializing record value")) - } - } - } - - async fn handle_event( - &mut self, - event: SwarmEvent, - db: impl Database, - ) { - match event { - SwarmEvent::Behaviour(ComposedEvent::Gossipsub(gossipsub::Event::Message { - message, - propagation_source, - message_id, - })) => match Receipt::try_from(message.data) { - Ok(receipt) => { - tracing::info!( - "got message: {receipt} from {propagation_source} with message id: {message_id}" - ); - - // Store gossiped receipt. - let _ = db - .conn() - .as_mut() - .map(|conn| Db::store_receipt(receipt, conn)); - } - Err(err) => tracing::info!(err=?err, "cannot handle incoming event message"), - }, - SwarmEvent::Behaviour(ComposedEvent::Gossipsub(gossipsub::Event::Subscribed { - peer_id, - topic, - })) => { - tracing::debug!("{peer_id} subscribed to topic {topic} over gossipsub") - } - SwarmEvent::Behaviour(ComposedEvent::Gossipsub(_)) => {} - SwarmEvent::Behaviour(ComposedEvent::Kademlia( - KademliaEvent::OutboundQueryProgressed { id, result, .. }, - )) => match result { - QueryResult::Bootstrap(Ok(BootstrapOk { peer, .. })) => { - tracing::debug!("successfully bootstrapped peer: {peer}") - } - QueryResult::GetProviders(Ok(GetProvidersOk::FoundProviders { - key, - providers, - .. - })) => { - for peer in providers { - tracing::debug!("peer {peer} provides key: {key:#?}"); - } - } - QueryResult::GetProviders(Err(err)) => { - tracing::error!("error retrieving outbound query providers: {err}") - } - QueryResult::GetRecord(Ok(GetRecordOk::FoundRecord(PeerRecord { - record: - Record { - key, - value, - publisher, - .. - }, - .. - }))) => { - tracing::debug!("found record {key:#?}, published by {publisher:?}"); - if let Ok(cid) = Cid::try_from(key.as_ref()) { - match Self::on_found_record(cid, value) { - Ok(FoundEvent::Receipt(receipt)) => { - tracing::info!("found receipt: {receipt}"); - if let Some(sender) = self.worker_senders.remove(&id) { - let _ = sender.send((cid, FoundEvent::Receipt(receipt))); - } else { - tracing::error!("error converting key {key:#?} to cid") - } - } - Ok(FoundEvent::Workflow(wf)) => { - tracing::info!("found workflow info: {wf:?}"); - if let Some(sender) = self.worker_senders.remove(&id) { - let _ = sender.send((cid, FoundEvent::Workflow(wf))); - } else { - tracing::error!("error converting key {key:#?} to cid") - } - } - Err(err) => tracing::error!(err=?err, "error retrieving record"), - } - } - } - QueryResult::GetRecord(Ok(_)) => {} - QueryResult::GetRecord(Err(err)) => { - tracing::error!("error retrieving record: {err}"); - } - QueryResult::PutRecord(Ok(PutRecordOk { key })) => { - tracing::debug!("successfully put record {key:#?}"); - } - QueryResult::PutRecord(Err(err)) => { - tracing::error!("error putting record: {err}") - } - QueryResult::StartProviding(Ok(AddProviderOk { key })) => { - tracing::debug!("successfully put provider record {key:#?}"); - } - QueryResult::StartProviding(Err(err)) => { - tracing::error!("error putting provider record: {err}"); - } - _ => {} - }, - SwarmEvent::Behaviour(ComposedEvent::Mdns(mdns::Event::Discovered(list))) => { - for (peer_id, _multiaddr) in list { - tracing::info!("mDNS discovered a new peer: {peer_id}"); - - self.swarm - .behaviour_mut() - .gossipsub - .add_explicit_peer(&peer_id); - } - } - SwarmEvent::Behaviour(ComposedEvent::Mdns(mdns::Event::Expired(list))) => { - for (peer_id, _multiaddr) in list { - tracing::info!("mDNS discover peer has expired: {peer_id}"); - - self.swarm - .behaviour_mut() - .gossipsub - .remove_explicit_peer(&peer_id); - } - } - SwarmEvent::NewListenAddr { address, .. } => { - let local_peer_id = *self.swarm.local_peer_id(); - tracing::info!( - "local node is listening on {:?}", - address.with(Protocol::P2p(local_peer_id.into())) - ); - } - SwarmEvent::IncomingConnection { .. } => {} - _ => {} - } - } -} - -/// Internal events to capture. -#[derive(Debug, Clone)] -pub enum Event { - /// [Receipt] stored and captured event. - CapturedReceipt(Receipt, workflow::Info), - /// Find a [Receipt] stored in the DHT. - /// - /// [Receipt]: InvocationReceipt - FindReceipt(Cid, WorkerSender), - /// Find a [Workflow], stored as [workflow::Info], in the DHT. - /// - /// [Workflow]: homestar_core::Workflow - FindWorkflow(Cid, WorkerSender), -} - -/// Internal events related to finding results on the DHT. -#[derive(Debug, Clone, PartialEq)] -pub enum FoundEvent { - /// Found [Receipt] on the DHT. - Receipt(Receipt), - /// Found [workflow::Info] on the DHT. - Workflow(workflow::Info), -} - -#[cfg(test)] -mod test { - use super::*; - use crate::{test_utils, workflow}; - use homestar_core::{ - ipld::DagCbor, - test_utils::workflow as workflow_test_utils, - workflow::{config::Resources, instruction::RunInstruction, prf::UcanPrf, Task}, - Workflow, - }; - use homestar_wasm::io::Arg; - - #[test] - fn found_receipt_record() { - let (invocation_receipt, receipt) = test_utils::receipt::receipts(); - let instruction_bytes = receipt.instruction_cid_as_bytes(); - let bytes = Receipt::invocation_capsule(invocation_receipt).unwrap(); - let record = Record::new(instruction_bytes, bytes); - let record_value = record.value; - if let FoundEvent::Receipt(found_receipt) = - EventLoop::on_found_record(Cid::try_from(receipt.instruction()).unwrap(), record_value) - .unwrap() - { - assert_eq!(found_receipt, receipt); - } else { - panic!("Incorrect event type") - } - } - - #[test] - fn found_workflow_record() { - let config = Resources::default(); - let (instruction1, instruction2, _) = - workflow_test_utils::related_wasm_instructions::(); - let task1 = Task::new( - RunInstruction::Expanded(instruction1.clone()), - config.clone().into(), - UcanPrf::default(), - ); - let task2 = Task::new( - RunInstruction::Expanded(instruction2), - config.into(), - UcanPrf::default(), - ); - - let workflow = Workflow::new(vec![task1.clone(), task2.clone()]); - let workflow_info = - workflow::Info::default(workflow.clone().to_cid().unwrap(), workflow.len()); - let workflow_cid_bytes = workflow_info.cid_as_bytes(); - let bytes = workflow_info.capsule().unwrap(); - let record = Record::new(workflow_cid_bytes, bytes); - let record_value = record.value; - if let FoundEvent::Workflow(found_workflow) = - EventLoop::on_found_record(workflow.to_cid().unwrap(), record_value).unwrap() - { - assert_eq!(found_workflow, workflow_info); - } else { - panic!("Incorrect event type") - } - } -} diff --git a/homestar-runtime/src/network/ipfs.rs b/homestar-runtime/src/network/ipfs.rs index fb4267cc..9f75b598 100644 --- a/homestar-runtime/src/network/ipfs.rs +++ b/homestar-runtime/src/network/ipfs.rs @@ -21,7 +21,7 @@ const SHA3_256: &str = "sha3-256"; /// [IpfsClient]-wrapper. #[allow(missing_debug_implementations)] -pub struct IpfsCli(Arc); +pub(crate) struct IpfsCli(Arc); impl Clone for IpfsCli { fn clone(&self) -> Self { @@ -37,14 +37,14 @@ impl Default for IpfsCli { impl IpfsCli { /// Retrieve content from a IPFS [Url]. - pub async fn get_resource(&self, url: &Url) -> Result> { + pub(crate) async fn get_resource(&self, url: &Url) -> Result> { let cid = Cid::try_from(url.to_string())?; self.get_cid(cid).await } /// Retrieve content from a [Cid]. #[cfg(not(test))] - pub async fn get_cid(&self, cid: Cid) -> Result> { + pub(crate) async fn get_cid(&self, cid: Cid) -> Result> { self.0 .cat(&cid.to_string()) .map_ok(|chunk| chunk.to_vec()) @@ -55,7 +55,7 @@ impl IpfsCli { /// Load known content from a [Cid]. #[cfg(test)] - pub async fn get_cid(&self, _cid: Cid) -> Result> { + pub(crate) async fn get_cid(&self, _cid: Cid) -> Result> { let path = PathBuf::from(format!( "{}/../homestar-wasm/fixtures/homestar_guest_wasm.wasm", env!("CARGO_MANIFEST_DIR") @@ -64,13 +64,14 @@ impl IpfsCli { } /// Put/Write [Receipt] into IPFS. - pub async fn put_receipt(&self, receipt: Receipt) -> Result { + #[allow(dead_code)] + pub(crate) async fn put_receipt(&self, receipt: Receipt) -> Result { let receipt_bytes: Vec = receipt.try_into()?; self.put_receipt_bytes(receipt_bytes).await } /// Put/Write [Receipt], as bytes, into IPFS. - pub async fn put_receipt_bytes(&self, receipt_bytes: Vec) -> Result { + pub(crate) async fn put_receipt_bytes(&self, receipt_bytes: Vec) -> Result { let dag_builder = DagPut::builder() .store_codec(DagCodec::Cbor) .input_codec(DagCodec::Cbor) diff --git a/homestar-runtime/src/network/mod.rs b/homestar-runtime/src/network/mod.rs index f01a0e2c..646f8495 100644 --- a/homestar-runtime/src/network/mod.rs +++ b/homestar-runtime/src/network/mod.rs @@ -4,11 +4,12 @@ //! [websocket]: ws //! [ipfs]: ipfs -pub mod eventloop; #[cfg(feature = "ipfs")] -pub mod ipfs; -pub mod pubsub; -pub mod swarm; +pub(crate) mod ipfs; +pub(crate) mod pubsub; +pub(crate) mod swarm; +#[cfg(feature = "websocket-server")] pub mod ws; -pub use eventloop::EventLoop; +#[cfg(feature = "ipfs")] +pub(crate) use ipfs::IpfsCli; diff --git a/homestar-runtime/src/network/pubsub.rs b/homestar-runtime/src/network/pubsub.rs index 1b898731..27bac555 100644 --- a/homestar-runtime/src/network/pubsub.rs +++ b/homestar-runtime/src/network/pubsub.rs @@ -1,5 +1,6 @@ //! [gossipsub] initializer for PubSub across connected peers. +use crate::settings; use anyhow::Result; use libp2p::{ gossipsub::{self, ConfigBuilder, Message, MessageAuthenticity, MessageId, ValidationMode}, @@ -8,13 +9,15 @@ use libp2p::{ use std::{ collections::hash_map::DefaultHasher, hash::{Hash, Hasher}, - time::Duration, }; -use crate::settings; +/// [Receipt]-related topic for pub(gossip)sub. +/// +/// [Receipt]: homestar_core::workflow::receipt +pub(crate) const RECEIPTS_TOPIC: &str = "receipts"; /// Setup [gossipsub] mesh protocol with default configuration. -pub fn new(keypair: Keypair, settings: &settings::Node) -> Result { +pub(crate) fn new(keypair: Keypair, settings: &settings::Node) -> Result { // To content-address message, we can take the hash of message and use it as an ID. let message_id_fn = |message: &Message| { let mut s = DefaultHasher::new(); @@ -23,10 +26,8 @@ pub fn new(keypair: Keypair, settings: &settings::Node) -> Result Result Result> { +pub(crate) async fn new(settings: &settings::Node) -> Result> { let keypair = settings .network .keypair_config .keypair() .with_context(|| "Failed to generate/import keypair for libp2p".to_string())?; + let peer_id = keypair.public().to_peer_id(); let transport = tcp::tokio::Transport::new(tcp::Config::default().nodelay(true)) .upgrade(upgrade::Version::V1Lazy) .authenticate(noise::Config::new(&keypair)?) .multiplex(yamux::Config::default()) - .timeout(Duration::from_secs( - settings.network.transport_connection_timeout_secs, - )) + .timeout(settings.network.transport_connection_timeout) .boxed(); let mut swarm = SwarmBuilder::with_tokio_executor( @@ -49,14 +44,16 @@ pub async fn new(settings: &settings::Node) -> Result> swarm.listen_on(settings.network.listen_address.to_string().parse()?)?; // subscribe to `receipts` topic - swarm.behaviour_mut().gossip_subscribe(RECEIPTS_TOPIC)?; + swarm + .behaviour_mut() + .gossip_subscribe(pubsub::RECEIPTS_TOPIC)?; Ok(swarm) } /// Custom event types to listen for and respond to. #[derive(Debug)] -pub enum ComposedEvent { +pub(crate) enum ComposedEvent { /// [gossipsub::Event] event. Gossipsub(gossipsub::Event), /// [KademliaEvent] event. @@ -65,26 +62,9 @@ pub enum ComposedEvent { Mdns(mdns::Event), } -/// Message topic. -#[derive(Debug)] -pub struct Topic(String); - -impl fmt::Display for Topic { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl Topic { - /// Make a [Topic] from a [String]. - pub fn new(s: String) -> Self { - Topic(s) - } -} - /// Message types to deliver on a topic. #[derive(Debug)] -pub enum TopicMessage { +pub(crate) enum TopicMessage { /// Receipt topic, wrapping [Receipt]. CapturedReceipt(Receipt), } @@ -93,24 +73,24 @@ pub enum TopicMessage { #[allow(missing_debug_implementations)] #[derive(NetworkBehaviour)] #[behaviour(out_event = "ComposedEvent")] -pub struct ComposedBehaviour { +pub(crate) struct ComposedBehaviour { /// [gossipsub::Behaviour] behaviour. - pub gossipsub: gossipsub::Behaviour, + pub(crate) gossipsub: gossipsub::Behaviour, /// In-memory [kademlia: Kademlia] behaviour. - pub kademlia: Kademlia, + pub(crate) kademlia: Kademlia, /// [mdns::tokio::Behaviour] behaviour. - pub mdns: mdns::tokio::Behaviour, + pub(crate) mdns: mdns::tokio::Behaviour, } impl ComposedBehaviour { /// Subscribe to [gossipsub] topic. - pub fn gossip_subscribe(&mut self, topic: &str) -> Result { + pub(crate) fn gossip_subscribe(&mut self, topic: &str) -> Result { let topic = gossipsub::IdentTopic::new(topic); self.gossipsub.subscribe(&topic) } /// Serialize [TopicMessage] and publish to [gossipsub] topic. - pub fn gossip_publish(&mut self, topic: &str, msg: TopicMessage) -> Result { + pub(crate) fn gossip_publish(&mut self, topic: &str, msg: TopicMessage) -> Result { let id_topic = gossipsub::IdentTopic::new(topic); // Make this a match once we have other topics. let TopicMessage::CapturedReceipt(receipt) = msg; diff --git a/homestar-runtime/src/network/ws.rs b/homestar-runtime/src/network/ws.rs index 9a233370..105d2e0a 100644 --- a/homestar-runtime/src/network/ws.rs +++ b/homestar-runtime/src/network/ws.rs @@ -1,7 +1,7 @@ //! Sets up a websocket server for sending and receiving messages from browser //! clients. -use crate::settings; +use crate::{event_handler::channel::BoundedChannelSender, runner::Runner, settings}; use anyhow::{anyhow, Result}; use axum::{ extract::{ @@ -20,36 +20,53 @@ use std::{ sync::Arc, }; use tokio::sync::broadcast; +use tracing::{debug, info}; + +/// Type alias for websocket sender. +pub(crate) type WsSender = Arc>; + +/// Message type for messages sent back from the +/// [websocket server] to the [runner] for example. +/// +/// [websocket server]: WebSocketServer +/// [runner]: crate::Runner +#[derive(Debug, Clone, PartialEq)] +pub enum WsMessage { + /// Notify the listener that the websocket server is shutting down + /// gracefully. + GracefulShutdown, +} /// WebSocket state information. -#[allow(dead_code)] -#[derive(Clone, Debug)] -pub struct WebSocket { +#[allow(dead_code, missing_debug_implementations)] +#[derive(Clone)] +pub(crate) struct WebSocketServer { addr: SocketAddr, - sender: Arc>, - receiver: Arc>, + msg_sender: WsSender, + runner_sender: Arc>, } -impl WebSocket { +impl WebSocketServer { /// Setup bounded, MPMC channel for runtime to send and received messages /// through the websocket connection(s). - pub fn setup_channel( - settings: &settings::Node, + pub(crate) fn setup_channel( + capacity: usize, ) -> (broadcast::Sender, broadcast::Receiver) { - broadcast::channel(settings.network.websocket_capacity) + broadcast::channel(capacity) } /// Start the websocket server given settings. - pub async fn start_server( - sender: Arc>, - receiver: Arc>, - settings: &settings::Node, + pub(crate) async fn start( + settings: settings::Network, + ws_sender: WsSender, + runner_sender: Arc>, ) -> Result<()> { - let host = IpAddr::from_str(&settings.network.websocket_host.to_string())?; - let addr = if port_available(host, settings.network.websocket_port) { - SocketAddr::from((host, settings.network.websocket_port)) + let host = IpAddr::from_str(&settings.websocket_host.to_string())?; + let port_setting = settings.websocket_port; + let addr = if port_available(host, port_setting) { + SocketAddr::from((host, port_setting)) } else { - let port = (settings.network.websocket_port..settings.network.websocket_port + 1000) + let port = (port_setting..port_setting + 1000) .find(|port| port_available(host, *port)) .ok_or_else(|| anyhow!("no free TCP ports available"))?; SocketAddr::from((host, port)) @@ -57,19 +74,22 @@ impl WebSocket { let ws_state = Self { addr, - sender, - receiver, + msg_sender: ws_sender, + runner_sender: runner_sender.clone(), }; - let app = Router::new().route("/", get(ws_handler).with_state(ws_state)); + let app = Router::new().route("/", get(ws_handler).with_state(ws_state.clone())); - tokio::spawn(async move { - axum::Server::bind(&addr) - .serve(app.into_make_service_with_connect_info::()) - .await - .expect("Websocket server to start"); - }); + info!("websocket server listening on {}", addr); - tracing::info!("websocket server starting on {addr}"); + axum::Server::bind(&addr) + .serve(app.into_make_service_with_connect_info::()) + .with_graceful_shutdown(async { + let _ = Runner::shutdown_signal().await; + info!("websocket server shutting down"); + drop(ws_state.msg_sender); + let _ = runner_sender.send(WsMessage::GracefulShutdown); + }) + .await?; Ok(()) } @@ -78,7 +98,7 @@ impl WebSocket { async fn ws_handler( ws: WebSocketUpgrade, user_agent: Option>, - State(state): State, + State(state): State, ConnectInfo(addr): ConnectInfo, ) -> impl IntoResponse { let user_agent = if let Some(TypedHeader(user_agent)) = user_agent { @@ -86,22 +106,22 @@ async fn ws_handler( } else { String::from("Unknown browser") }; - tracing::info!("`{user_agent}` at {addr} connected."); + info!("`{user_agent}` at {addr} connected."); // Finalize the upgrade process by returning upgrade callback. // We can customize the callback by sending additional info such as address. ws.on_upgrade(move |socket| handle_socket(socket, state)) } -async fn handle_socket(mut socket: ws::WebSocket, state: WebSocket) { +async fn handle_socket(mut socket: ws::WebSocket, state: WebSocketServer) { let addr = state.addr; // Send a ping (unsupported by some browsers) just to kick things off and // get a response. if socket.send(Message::Ping(vec![1, 2, 3])).await.is_ok() { - tracing::debug!("Pinged {}...", addr); + debug!("Pinged {}...", addr); } else { - tracing::info!("Could not send ping {}!", addr); + info!("Could not send ping {}!", addr); // no Error here since the only thing we can do is to close the connection. // If we can not send messages, there is no way to salvage the statemachine anyway. return; @@ -118,14 +138,14 @@ async fn handle_socket(mut socket: ws::WebSocket, state: WebSocket) { return; } } else { - tracing::info!("client {} abruptly disconnected", state.addr); + info!("client {} abruptly disconnected", state.addr); return; } } // By splitting socket we can send and receive at the same time. let (mut socket_sender, mut socket_receiver) = socket.split(); - let mut subscribed_rx = state.sender.subscribe(); + let mut subscribed_rx = state.msg_sender.subscribe(); let mut send_task = tokio::spawn(async move { while let Ok(msg) = subscribed_rx.recv().await { @@ -157,7 +177,7 @@ async fn handle_socket(mut socket: ws::WebSocket, state: WebSocket) { _ = (&mut recv_task) => send_task.abort(), }; - tracing::info!("Websocket context {} destroyed", addr); + info!("Websocket context {} destroyed", addr); } /// Process [messages]. @@ -166,33 +186,31 @@ async fn handle_socket(mut socket: ws::WebSocket, state: WebSocket) { async fn process_message(msg: Message, addr: SocketAddr) -> ControlFlow<(), ()> { match msg { Message::Text(t) => { - tracing::info!(">>> {} sent str: {:?}", addr, t); + info!(">>> {} sent str: {:?}", addr, t); } Message::Binary(d) => { - tracing::info!(">>> {} sent {} bytes: {:?}", addr, d.len(), d); + info!(">>> {} sent {} bytes: {:?}", addr, d.len(), d); } Message::Close(c) => { if let Some(cf) = c { - tracing::info!( + info!( ">>> {} sent close with code {} and reason `{}`", - addr, - cf.code, - cf.reason + addr, cf.code, cf.reason ); } else { - tracing::info!(">>> {} somehow sent close message without CloseFrame", addr); + info!(">>> {} somehow sent close message without CloseFrame", addr); } return ControlFlow::Break(()); } Message::Pong(v) => { - tracing::info!(">>> {} sent pong with {:?}", addr, v); + info!(">>> {} sent pong with {:?}", addr, v); } // You should never need to manually handle Message::Ping, as axum's websocket library // will do so for you automagically by replying with Pong and copying the v according to // spec. But if you need the contents of the pings you can see them here. Message::Ping(v) => { - tracing::info!(">>> {} sent ping with {:?}", addr, v); + info!(">>> {} sent ping with {:?}", addr, v); } } ControlFlow::Continue(()) @@ -204,20 +222,19 @@ fn port_available(host: IpAddr, port: u16) -> bool { #[cfg(test)] mod test { - use crate::settings::Settings; - use super::*; + use crate::{event_handler::channel::BoundedChannel, settings::Settings}; #[tokio::test] async fn ws_connect() { - let (tx, rx) = broadcast::channel(1); - let sender = Arc::new(tx); - let receiver = Arc::new(rx); - let settings = Settings::load().unwrap(); - - WebSocket::start_server(Arc::clone(&sender), Arc::clone(&receiver), settings.node()) - .await - .unwrap(); + let settings = Arc::new(Settings::load().unwrap()); + let (tx, _rx) = WebSocketServer::setup_channel(10); + let ch = BoundedChannel::oneshot(); + tokio::spawn(WebSocketServer::start( + settings.node().network().clone(), + tx.into(), + ch.tx.into(), + )); tokio_tungstenite::connect_async("ws://localhost:1337".to_string()) .await diff --git a/homestar-runtime/src/runner.rs b/homestar-runtime/src/runner.rs new file mode 100644 index 00000000..bc8f3b24 --- /dev/null +++ b/homestar-runtime/src/runner.rs @@ -0,0 +1,336 @@ +//! General [Runner] interface for working across multiple workers +//! and executing workflows. + +#[cfg(feature = "ipfs")] +use crate::network::IpfsCli; +use crate::{db::Database, network::swarm, Event, EventHandler, Settings}; +#[cfg(feature = "websocket-server")] +use crate::{ + event_handler::channel::{BoundedChannel, BoundedChannelReceiver}, + network::ws::{self, WebSocketServer}, +}; +use anyhow::Result; +use dashmap::DashMap; +use libipld::Cid; +use std::sync::Arc; +use tokio::{ + select, + signal::unix::{signal, SignalKind}, + sync::{mpsc, oneshot}, + task::AbortHandle, +}; +use tracing::info; + +/// Type alias for a [DashMap] containing running task information. +pub type RunningSet = DashMap>; + +/// Trait for managing a [DashMap] of running task information. +pub trait ModifiedSet { + /// Append or insert a new [AbortHandle] into the [RunningSet]. + fn append_or_insert(&mut self, cid: Cid, handles: Vec); +} + +impl ModifiedSet for RunningSet { + fn append_or_insert(&mut self, cid: Cid, mut handles: Vec) { + self.entry(cid) + .and_modify(|prev_handles| { + prev_handles.append(&mut handles); + }) + .or_insert_with(|| handles); + } +} + +/// Runner interface. +/// Used to manage [Workers] and execute/run [Workflows]. +/// +/// [Workers]: crate::Worker +/// [Workflows]: homestar_core::Workflow +#[cfg(feature = "websocket-server")] +#[derive(Debug)] +pub struct Runner { + command_sender: oneshot::Sender, + command_receiver: oneshot::Receiver, + event_sender: Arc>, + running_set: RunningSet, + #[allow(dead_code)] + ws_sender: ws::WsSender, + ws_receiver: BoundedChannelReceiver, +} + +/// Runner interface. +/// Used to manage [Workers] and execute/run [Workflows]. +/// +/// [Workers]: crate::Worker +/// [Workflows]: homestar_core::Workflow +#[cfg(not(feature = "websocket-server"))] +#[derive(Debug)] +pub struct Runner { + command_sender: oneshot::Sender, + command_receiver: oneshot::Receiver, + event_sender: Arc>, + running_set: RunningSet, +} + +impl Runner { + /// Start the Homestar runner context. + pub async fn start(settings: Arc, db: impl Database + 'static) -> Result { + let (command_sender, command_receiver) = oneshot::channel(); + let map = DashMap::new(); + let swarm = swarm::new(settings.node()).await?; + + let event_handler = EventHandler::new(swarm, db.clone(), settings.node()); + let event_sender = event_handler.sender(); + + #[cfg(feature = "ipfs")] + tokio::spawn({ + let ipfs = IpfsCli::default(); + event_handler.start(ipfs) + }); + + #[cfg(not(feature = "ipfs"))] + tokio::spawn(event_handler.start()); + + #[cfg(feature = "websocket-server")] + { + // Setup websocket communication. + let (tx, _rx) = + WebSocketServer::setup_channel(settings.node().network().websocket_capacity); + let ws_tx = Arc::new(tx); + let ws_channel = BoundedChannel::oneshot(); + let oneshot_sender = ws_channel.tx; + let oneshot_receiver = ws_channel.rx; + + tokio::spawn({ + let settings = settings.node().network().clone(); + WebSocketServer::start(settings, ws_tx.clone(), oneshot_sender.into()) + }); + + Ok(Self { + command_sender, + command_receiver, + event_sender, + running_set: map, + ws_sender: ws_tx, + ws_receiver: oneshot_receiver, + }) + } + + #[cfg(not(feature = "websocket-server"))] + Ok(Self { + command_sender, + command_receiver, + event_sender, + running_set: map, + }) + } + + /// Sequence for shutting down a [Runner], including: + /// a) [EventHandler] channels, + /// b) Running workers + /// c) [Runner] channels. + /// + /// [EventHandler]: crate::EventHandler + pub async fn shutdown(&mut self) -> Result<()> { + let (shutdown_sender, shutdown_receiver) = oneshot::channel(); + self.event_sender + .send(Event::Shutdown(shutdown_sender)) + .await?; + + shutdown_receiver.await?; + + // TODO: shutdown workers + + info!("shutting down runner's channels"); + self.command_receiver.close(); + self.command_sender.closed().await; + Ok(()) + } + + /// Captures shutdown signals for [Runner] and other sub-processes like + /// the [webSocket server]. + /// + /// [websocket server]: WebSocketServer + pub async fn shutdown_signal() -> Result<()> { + let mut sigint = signal(SignalKind::interrupt())?; + let mut sigterm = signal(SignalKind::terminate())?; + + select! { + _ = tokio::signal::ctrl_c() => info!("CTRL-C received, shutting down"), + _ = sigint.recv() => info!("SIGINT received, shutting down"), + _ = sigterm.recv() => info!("SIGTERM received, shutting down"), + } + + Ok(()) + } + + /// Garbage-collect task [AbortHandle]s in the [RunningSet]. + pub fn gc(&mut self) { + self.running_set.retain(|_cid, handles| { + handles.retain(|handle| !handle.is_finished()); + !handles.is_empty() + }); + } + + /// Garbage-collect task [AbortHandle]s in the [RunningSet] for a specific + /// [Worker]-workflow [Cid]. + /// + /// [Worker]: crate::Worker + pub fn gc_worker(&mut self, cid: Cid) { + if let Some(mut handles) = self.running_set.get_mut(&cid) { + handles.retain(|handle| !handle.is_finished()); + } + self.running_set.retain(|_cid, handles| !handles.is_empty()); + } + + /// Abort all [Workers]. + /// + /// [Workers]: crate::Worker + pub fn abort_all_tasks(&mut self) { + self.running_set.iter_mut().for_each(|handles| { + for abort_handle in &*handles { + abort_handle.abort(); + } + }); + } + + /// Abort a specific [Worker]'s tasks given a [Cid]. + /// + /// [Worker]: crate::Worker + pub fn abort_worker_tasks(&mut self, cid: Cid) { + if let Some(handles) = self.running_set.get_mut(&cid) { + for abort_handle in &*handles { + abort_handle.abort(); + } + } + } + + /// [mpsc::Sender] of the [EventHandler]. + /// + /// [EventHandler]: crate::EventHandler + pub fn event_sender(&self) -> Arc> { + self.event_sender.clone() + } + + /// [tokio::broadcast::Sender] for sending messages through the + /// [webSocket server] to subscribers. + /// + /// [websocket server]: WebSocketServer + #[cfg(feature = "websocket-server")] + pub fn ws_sender(&self) -> &ws::WsSender { + &self.ws_sender + } + + /// [BoundedChannel] for receiving [messages] back from the + /// [webSocket server]. + /// + /// [messages]: ws::WsMessage + /// [websocket server]: WebSocketServer + #[cfg(feature = "websocket-server")] + pub fn ws_receiver(&mut self) -> &mut BoundedChannelReceiver { + &mut self.ws_receiver + } + + /// [oneshot::Sender] for sending commands to the [Runner]. + pub fn command_sender(&self) -> &oneshot::Sender { + &self.command_sender + } + + /// [oneshot::Receiver] for Runner to receive commands. + pub fn command_receiver(&mut self) -> &mut oneshot::Receiver { + &mut self.command_receiver + } +} + +#[cfg(test)] +mod test { + use super::*; + use homestar_core::test_utils; + use rand::thread_rng; + use std::{ + sync::atomic::{AtomicUsize, Ordering}, + time::Duration, + }; + + static ATOMIC_PORT: AtomicUsize = AtomicUsize::new(1338); + + async fn setup() -> Runner { + let mut settings = Settings::load().unwrap(); + settings.node.network.websocket_port = ATOMIC_PORT.fetch_add(1, Ordering::SeqCst) as u16; + let db = crate::test_utils::db::MemoryDb::setup_connection_pool( + Settings::load().unwrap().node(), + ) + .unwrap(); + + Runner::start(settings.into(), db).await.unwrap() + } + + #[tokio::test] + async fn shutdown() { + let mut runner = setup().await; + + tokio::spawn(async move { + tokio::time::sleep(Duration::from_millis(100)).await; + // Send SIGINT signal + let _ = nix::sys::signal::kill(nix::unistd::getpid(), nix::sys::signal::Signal::SIGINT); + }); + + select! { + result = Runner::shutdown_signal() => { + assert!(result.is_ok()); + select! { + Ok(()) = runner.shutdown() => { + assert!(runner.command_sender().is_closed()); + #[cfg(feature = "websocket-server")] + assert_eq!(runner.ws_receiver().recv().unwrap(), ws::WsMessage::GracefulShutdown); + } + } + } + } + } + + #[tokio::test] + async fn abort_all_tasks() { + let mut runner = setup().await; + + let mut set = tokio::task::JoinSet::new(); + + for i in 0..3 { + let handle = set.spawn(async move { i }); + runner.running_set.append_or_insert( + test_utils::cid::generate_cid(&mut thread_rng()), + vec![handle], + ); + } + + runner.abort_all_tasks(); + assert!(!runner.running_set.is_empty()); + + while set.join_next().await.is_some() {} + runner.gc(); + assert!(runner.running_set.is_empty()); + } + + #[tokio::test] + async fn abort_one_task() { + let mut runner = setup().await; + + let mut set = tokio::task::JoinSet::new(); + let mut cids = vec![]; + + for i in 0..3 { + let handle = set.spawn(async move { i }); + let cid = test_utils::cid::generate_cid(&mut thread_rng()); + runner.running_set.append_or_insert(cid, vec![handle]); + cids.push(cid); + } + + runner.abort_worker_tasks(cids[0]); + assert!(runner.running_set.len() == 3); + + while set.join_next().await.is_some() {} + + runner.gc_worker(cids[0]); + + assert!(runner.running_set.len() == 2); + } +} diff --git a/homestar-runtime/src/runtime.rs b/homestar-runtime/src/runtime.rs deleted file mode 100644 index 11bdad2d..00000000 --- a/homestar-runtime/src/runtime.rs +++ /dev/null @@ -1,18 +0,0 @@ -//! General [Runtime] for working across multiple workers -//! and workflows. -//! -//! TODO: Fill this out. - -use homestar_wasm::io::Arg; -use tokio::task::JoinSet; - -/// Runtime for starting workers on workflows. -#[allow(dead_code)] -#[derive(Debug)] -pub struct Runtime { - /// The set of [workers] for [workflows] - /// - /// [workers]: crate::Worker - /// [workflows]: crate::Workflow - pub(crate) workers: JoinSet>, -} diff --git a/homestar-runtime/src/scheduler.rs b/homestar-runtime/src/scheduler.rs index fdd66d04..f4505237 100644 --- a/homestar-runtime/src/scheduler.rs +++ b/homestar-runtime/src/scheduler.rs @@ -5,12 +5,11 @@ use crate::{ db::{Connection, Database}, - network::eventloop::{Event, FoundEvent}, + event_handler::{channel::BoundedChannel, event::QueryRecord, swarm_event::FoundEvent}, workflow::{self, Builder, Resource, Vertex}, - Db, + Db, Event, }; use anyhow::Result; -use crossbeam::channel; use dagga::Node; use futures::future::BoxFuture; use homestar_core::{ @@ -27,6 +26,7 @@ use std::{ time::{Duration, Instant}, }; use tokio::sync::mpsc; +use tracing::info; type Schedule<'a> = Vec, usize>>>; @@ -53,7 +53,7 @@ pub struct ExecutionGraph<'a> { #[derive(Debug, Clone, Default)] pub struct TaskScheduler<'a> { /// In-memory map of task/instruction results. - pub(crate) linkmap: LinkMap>, + pub(crate) linkmap: Arc>>, /// [ExecutionGraph] of what's been run so far for a [Workflow] of `batched` /// [Tasks]. /// @@ -84,7 +84,7 @@ impl<'a> TaskScheduler<'a> { /// [Receipt]: crate::Receipt pub async fn init( workflow: Workflow<'a, Arg>, - settings: &'a workflow::Settings, + settings: Arc, event_sender: Arc>, conn: &mut Connection, fetch_fn: F, @@ -131,22 +131,21 @@ impl<'a> TaskScheduler<'a> { } } Err(_) => { - tracing::info!("receipt not available in the database"); - let (sender, receiver) = channel::bounded(pointers_len); + info!("receipt not available in the database"); + let channel = BoundedChannel::new(pointers_len); for ptr in &pointers { - let _ = event_sender - .blocking_send(Event::FindReceipt(ptr.cid(), sender.clone())); + let _ = event_sender.blocking_send(Event::FindRecord( + QueryRecord::with(ptr.cid(), channel.tx.clone()), + )); } let mut linkmap = LinkMap::>::new(); let mut counter = 0; - while let Ok((found_cid, FoundEvent::Receipt(found))) = receiver - .recv_deadline( - Instant::now() - + Duration::from_secs(settings.p2p_check_timeout_secs), - ) - { - if pointers.contains(&Pointer::new(found_cid)) { + while let Ok(FoundEvent::Receipt(found)) = channel.rx.recv_deadline( + Instant::now() + + Duration::from_secs(settings.p2p_check_timeout_secs), + ) { + if pointers.contains(&Pointer::new(found.cid())) { if let Ok(cid) = found.instruction().try_into() { let _ = linkmap.insert(cid, found.output_as_arg()); counter += 1; @@ -178,7 +177,7 @@ impl<'a> TaskScheduler<'a> { }; Ok(Self { - linkmap, + linkmap: Arc::new(linkmap), ran: Some(schedule), run: pivot, resume_step: step, @@ -186,7 +185,7 @@ impl<'a> TaskScheduler<'a> { }) } _ => Ok(Self { - linkmap: LinkMap::>::new(), + linkmap: Arc::new(LinkMap::>::new()), ran: None, run: schedule, resume_step: None, @@ -199,9 +198,7 @@ impl<'a> TaskScheduler<'a> { #[cfg(test)] mod test { use super::*; - use crate::{ - db::Database, network::EventLoop, settings::Settings, test_utils, workflow as wf, Receipt, - }; + use crate::{db::Database, settings::Settings, test_utils, workflow as wf, Receipt}; use futures::FutureExt; use homestar_core::{ ipld::DagCbor, @@ -237,12 +234,17 @@ mod test { let workflow_settings = wf::Settings::default(); let fetch_fn = |_rscs: Vec| { async { Ok(IndexMap::default()) } }.boxed(); - let (tx, mut _rx) = EventLoop::setup_channel(settings.node()); + let (tx, mut _rx) = test_utils::event::setup_channel(settings); - let scheduler = - TaskScheduler::init(workflow, &workflow_settings, tx.into(), &mut conn, fetch_fn) - .await - .unwrap(); + let scheduler = TaskScheduler::init( + workflow, + workflow_settings.into(), + tx.into(), + &mut conn, + fetch_fn, + ) + .await + .unwrap(); assert!(scheduler.linkmap.is_empty()); assert!(scheduler.ran.is_none()); @@ -293,12 +295,17 @@ mod test { let workflow_settings = wf::Settings::default(); let fetch_fn = |_rscs: Vec| { async { Ok(IndexMap::default()) } }.boxed(); - let (tx, mut _rx) = EventLoop::setup_channel(settings.node()); + let (tx, mut _rx) = test_utils::event::setup_channel(settings); - let scheduler = - TaskScheduler::init(workflow, &workflow_settings, tx.into(), &mut conn, fetch_fn) - .await - .unwrap(); + let scheduler = TaskScheduler::init( + workflow, + workflow_settings.into(), + tx.into(), + &mut conn, + fetch_fn, + ) + .await + .unwrap(); let ran = scheduler.ran.as_ref().unwrap(); @@ -369,12 +376,17 @@ mod test { let workflow_settings = wf::Settings::default(); let fetch_fn = |_rscs: Vec| { async { Ok(IndexMap::default()) } }.boxed(); - let (tx, mut _rx) = EventLoop::setup_channel(settings.node()); + let (tx, mut _rx) = test_utils::event::setup_channel(settings); - let scheduler = - TaskScheduler::init(workflow, &workflow_settings, tx.into(), &mut conn, fetch_fn) - .await - .unwrap(); + let scheduler = TaskScheduler::init( + workflow, + workflow_settings.into(), + tx.into(), + &mut conn, + fetch_fn, + ) + .await + .unwrap(); let ran = scheduler.ran.as_ref().unwrap(); diff --git a/homestar-runtime/src/settings.rs b/homestar-runtime/src/settings.rs index 718259f7..6483ca8c 100644 --- a/homestar-runtime/src/settings.rs +++ b/homestar-runtime/src/settings.rs @@ -7,31 +7,107 @@ use libp2p::{identity, identity::secp256k1}; use rand::{Rng, SeedableRng}; use sec1::der::Decode; use serde::Deserialize; -use serde_with::{base64::Base64, serde_as}; +use serde_with::{base64::Base64, serde_as, DurationSeconds}; use std::{ io::Read, path::{Path, PathBuf}, + time::Duration, }; use tracing::info; +/// Application settings. +#[derive(Clone, Debug, Deserialize, PartialEq)] +pub struct Settings { + pub(crate) monitoring: Monitoring, + pub(crate) node: Node, +} + +impl Settings { + /// Monitoring settings getter. + pub fn monitoring(&self) -> &Monitoring { + &self.monitoring + } + + /// Node + pub fn node(&self) -> &Node { + &self.node + } +} + +/// Process monitoring settings. +#[derive(Clone, Debug, Deserialize, PartialEq)] +pub struct Monitoring { + /// Monitoring collection interval. + #[allow(dead_code)] + process_collector_interval: u64, +} + /// Server settings. -#[derive(Clone, Debug, Deserialize)] +#[serde_as] +#[derive(Clone, Debug, Default, Deserialize, PartialEq)] pub struct Node { #[serde(default)] pub(crate) network: Network, #[serde(default)] pub(crate) db: Database, + #[serde_as(as = "DurationSeconds")] + #[serde(default = "default_shutdown_timeout")] + pub(crate) shutdown_timeout: Duration, } -/// Process monitoring settings. -#[derive(Clone, Debug, Deserialize)] -pub struct Monitoring { - /// Monitoring collection interval. - #[allow(dead_code)] - process_collector_interval: u64, +/// Network-related settings for a homestar node. +#[serde_as] +#[derive(Clone, Debug, Deserialize, PartialEq)] +#[serde(default)] +pub struct Network { + /// Buffer-length for events channel. + pub(crate) events_buffer_len: usize, + /// Address for [Swarm] to listen on. + /// + /// [Swarm]: libp2p::swarm::Swarm + #[serde(with = "http_serde::uri")] + pub(crate) listen_address: Uri, + /// Pub/sub duplicate cache time. + #[serde_as(as = "DurationSeconds")] + pub(crate) pubsub_duplication_cache_time: Duration, + /// Pub/sub hearbeat interval for mesh configuration. + #[serde_as(as = "DurationSeconds")] + pub(crate) pubsub_heartbeat: Duration, + /// Pub/sub idle timeout + #[serde_as(as = "DurationSeconds")] + pub(crate) pubsub_idle_timeout: Duration, + /// Quorum for receipt records on the DHT. + pub(crate) receipt_quorum: usize, + /// Transport connection timeout. + #[serde_as(as = "DurationSeconds")] + pub(crate) transport_connection_timeout: Duration, + /// Websocket-server host address. + #[serde(with = "http_serde::uri")] + pub(crate) websocket_host: Uri, + /// Websocket-server port. + pub(crate) websocket_port: u16, + /// Number of *bounded* clients to send messages to, used for a + /// [tokio::sync::broadcast::channel] + pub(crate) websocket_capacity: usize, + /// Quorum for [workflow::Info] records on the DHT. + /// + /// [workflow::Info]: crate::workflow::Info + pub(crate) workflow_quorum: usize, + /// Pubkey setup configuration + pub(crate) keypair_config: PubkeyConfig, +} + +/// Database-related settings for a homestar node. +#[derive(Clone, Debug, Deserialize, PartialEq)] +#[serde(default)] +pub(crate) struct Database { + /// Maximum number of connections managed by the [pool]. + /// + /// [pool]: crate::db::Pool + pub(crate) max_pool_size: u32, } -#[derive(Clone, Debug, Deserialize)] +#[derive(Clone, Debug, Deserialize, PartialEq)] /// Configure how the Network keypair is generated or using an existing one pub(crate) enum PubkeyConfig { #[serde(rename = "random")] @@ -45,7 +121,7 @@ pub(crate) enum PubkeyConfig { } /// Supported key types of homestar -#[derive(Clone, Debug, Default, Deserialize)] +#[derive(Clone, Debug, Default, Deserialize, PartialEq)] pub(crate) enum KeyType { #[default] #[serde(rename = "ed25519")] @@ -56,7 +132,7 @@ pub(crate) enum KeyType { /// Seed material for RNG generated keys #[serde_as] -#[derive(Clone, Debug, Deserialize)] +#[derive(Clone, Debug, Deserialize, PartialEq)] pub(crate) struct PupkeyRNGSeed { #[serde(default)] key_type: KeyType, @@ -65,13 +141,53 @@ pub(crate) struct PupkeyRNGSeed { } /// Info on where and what the Key file is -#[derive(Clone, Debug, Deserialize)] +#[derive(Clone, Debug, Deserialize, PartialEq)] pub(crate) struct ExistingKeyPath { #[serde(default)] key_type: KeyType, path: String, } +impl Default for Database { + fn default() -> Self { + Self { max_pool_size: 100 } + } +} + +impl Default for Network { + fn default() -> Self { + Self { + events_buffer_len: 100, + listen_address: Uri::from_static("/ip4/0.0.0.0/tcp/0"), + pubsub_duplication_cache_time: Duration::new(1, 0), + pubsub_heartbeat: Duration::new(60, 0), + pubsub_idle_timeout: Duration::new(60 * 60 * 24, 0), + receipt_quorum: 2, + transport_connection_timeout: Duration::new(20, 0), + websocket_host: Uri::from_static("127.0.0.1"), + websocket_port: 1337, + websocket_capacity: 100, + workflow_quorum: 3, + keypair_config: PubkeyConfig::Random, + } + } +} + +impl Node { + /// Network settings. + pub fn network(&self) -> &Network { + &self.network + } + /// Node shutdown timeout. + pub fn shutdown_timeout(&self) -> Duration { + self.shutdown_timeout + } +} + +fn default_shutdown_timeout() -> Duration { + Duration::new(20, 0) +} + impl PubkeyConfig { /// Produce a Keypair using the given configuration. /// Calling this function will access the filesystem if configured to import a key. @@ -151,96 +267,6 @@ impl PubkeyConfig { } } -#[derive(Debug, Deserialize)] -/// Application settings. -pub struct Settings { - monitoring: Monitoring, - node: Node, -} - -impl Settings { - /// Monitoring settings getter. - pub fn monitoring(&self) -> &Monitoring { - &self.monitoring - } - - /// Node - pub fn node(&self) -> &Node { - &self.node - } -} - -/// Network-related settings for a homestar node. -#[derive(Clone, Debug, Deserialize)] -#[serde(default)] -pub(crate) struct Network { - /// - pub(crate) events_buffer_len: usize, - /// Address for [Swarm] to listen on. - /// - /// [Swarm]: libp2p::swarm::Swarm - #[serde(with = "http_serde::uri")] - pub(crate) listen_address: Uri, - /// Pub/sub duplicate cache time. - pub(crate) pubsub_duplication_cache_secs: u64, - /// Pub/sub hearbeat interval for mesh configuration. - pub(crate) pubsub_heartbeat_secs: u64, - /// Pub/sub idle timeout - pub(crate) pubsub_idle_timeout_secs: u64, - /// Quorum for receipt records on the DHT. - pub(crate) receipt_quorum: usize, - /// Transport connection timeout. - pub(crate) transport_connection_timeout_secs: u64, - /// Websocket-server host address. - #[serde(with = "http_serde::uri")] - pub(crate) websocket_host: Uri, - /// Websocket-server port. - pub(crate) websocket_port: u16, - /// Number of *bounded* clients to send messages to, used for a - /// [tokio::sync::broadcast::channel] - pub(crate) websocket_capacity: usize, - /// Quorum for [workflow::Info] records on the DHT. - /// - /// [workflow::Info]: crate::workflow::Info - pub(crate) workflow_quorum: usize, - /// Pubkey setup configuration - pub(crate) keypair_config: PubkeyConfig, -} - -/// Database-related settings for a homestar node. -#[derive(Clone, Debug, Deserialize)] -pub(crate) struct Database { - /// Maximum number of connections managed by the [pool]. - /// - /// [pool]: crate::db::Pool - pub(crate) max_pool_size: u32, -} - -impl Default for Network { - fn default() -> Self { - Self { - events_buffer_len: 100, - listen_address: Uri::from_static("/ip4/0.0.0.0/tcp/0"), - pubsub_duplication_cache_secs: 1, - pubsub_heartbeat_secs: 60, - pubsub_idle_timeout_secs: 60 * 60 * 24, - receipt_quorum: 2, - transport_connection_timeout_secs: 20, - websocket_host: Uri::from_static("127.0.0.1"), - websocket_port: 1337, - websocket_capacity: 100, - workflow_quorum: 3, - keypair_config: PubkeyConfig::Random, - } - } -} - -impl Default for Database { - fn default() -> Self { - Self { max_pool_size: 100 } - } -} - impl Settings { /// Load settings. pub fn load() -> Result { @@ -269,8 +295,33 @@ impl Settings { #[cfg(test)] mod test { + use super::*; use crate::Settings; + #[test] + fn test_defaults() { + let settings = Settings::load().unwrap(); + let node_settings = settings.node(); + + let default_settings = Node { + shutdown_timeout: Duration::from_secs(20), + ..Default::default() + }; + + assert_eq!(node_settings, &default_settings); + } + + #[test] + fn test_defaults_with_modification() { + let settings = Settings::build("fixtures/settings.toml".into()).unwrap(); + let mut default_modded_settings = Node::default(); + + default_modded_settings.network.events_buffer_len = 1000; + default_modded_settings.network.websocket_port = 9999; + default_modded_settings.shutdown_timeout = Duration::from_secs(20); + assert_eq!(settings.node(), &default_modded_settings); + } + #[test] fn import_existing_key() { let settings = Settings::build("fixtures/settings-import-ed25519.toml".into()) diff --git a/homestar-runtime/src/tasks/wasm.rs b/homestar-runtime/src/tasks/wasm.rs index f0583a41..657ed507 100644 --- a/homestar-runtime/src/tasks/wasm.rs +++ b/homestar-runtime/src/tasks/wasm.rs @@ -6,12 +6,14 @@ use homestar_wasm::{ wasmtime::{world::Env, Error as WasmRuntimeError, State, World}, }; +#[allow(dead_code)] #[allow(missing_debug_implementations)] pub(crate) struct WasmContext { env: Env, } impl WasmContext { + #[allow(dead_code)] pub(crate) fn new(data: State) -> Result { Ok(Self { env: World::default(data)?, @@ -19,6 +21,7 @@ impl WasmContext { } /// Instantiate environment via [World] and execute on [Args]. + #[allow(dead_code)] pub(crate) async fn run<'a>( &mut self, bytes: Vec, diff --git a/homestar-runtime/src/test_utils/event.rs b/homestar-runtime/src/test_utils/event.rs new file mode 100644 index 00000000..e84c9ba0 --- /dev/null +++ b/homestar-runtime/src/test_utils/event.rs @@ -0,0 +1,6 @@ +use crate::{settings::Settings, Event}; +use tokio::sync::mpsc; + +pub fn setup_channel(settings: Settings) -> (mpsc::Sender, mpsc::Receiver) { + mpsc::channel(settings.node().network.events_buffer_len) +} diff --git a/homestar-runtime/src/test_utils/mod.rs b/homestar-runtime/src/test_utils/mod.rs index d041ac17..4be4e9e7 100644 --- a/homestar-runtime/src/test_utils/mod.rs +++ b/homestar-runtime/src/test_utils/mod.rs @@ -1,4 +1,6 @@ #[cfg(test)] pub mod db; #[cfg(test)] +pub mod event; +#[cfg(test)] pub mod receipt; diff --git a/homestar-runtime/src/worker.rs b/homestar-runtime/src/worker.rs index 62884465..e23d58e1 100644 --- a/homestar-runtime/src/worker.rs +++ b/homestar-runtime/src/worker.rs @@ -1,67 +1,87 @@ #[cfg(feature = "ipfs")] -use crate::workflow::settings::BackoffStrategy; +use crate::network::IpfsCli; #[cfg(feature = "ipfs")] -use crate::IpfsCli; +use crate::workflow::settings::BackoffStrategy; use crate::{ db::{Connection, Database}, - network::eventloop::{Event, FoundEvent}, + event_handler::{ + channel::BoundedChannel, + event::{Captured, QueryRecord}, + swarm_event::FoundEvent, + Event, + }, + runner::{ModifiedSet, RunningSet}, scheduler::TaskScheduler, tasks::{RegisteredTasks, WasmContext}, workflow::{self, Resource}, Db, Receipt, }; use anyhow::{anyhow, Result}; -use crossbeam::channel; use futures::FutureExt; #[cfg(feature = "ipfs")] use futures::StreamExt; use homestar_core::{ + bail, workflow::{ error::ResolveError, prf::UcanPrf, receipt::metadata::{OP_KEY, WORKFLOW_KEY}, - InstructionResult, Pointer, Receipt as InvocationReceipt, + InstructionResult, LinkMap, Pointer, Receipt as InvocationReceipt, }, Workflow, }; -use homestar_wasm::{io::Arg, wasmtime::State}; +use homestar_wasm::{ + io::{Arg, Output}, + wasmtime::State, +}; use indexmap::IndexMap; use libipld::{Cid, Ipld}; use std::{ collections::BTreeMap, sync::Arc, + thread, time::{Duration, Instant}, + vec, }; use tokio::{sync::mpsc, task::JoinSet}; +use tracing::{debug, error}; #[cfg(feature = "ipfs")] use tryhard::RetryFutureConfig; +/// [JoinSet] of tasks run by a [Worker]. +#[allow(dead_code)] +pub(crate) type TaskSet = JoinSet>; + /// Worker that operates over a given [TaskScheduler]. +#[allow(dead_code)] #[derive(Debug)] -pub struct Worker<'a> { +pub(crate) struct Worker<'a> { pub(crate) scheduler: TaskScheduler<'a>, pub(crate) event_sender: Arc>, - pub(crate) workflow_info: &'a mut workflow::Info, - pub(crate) workflow_settings: &'a workflow::Settings, + pub(crate) workflow_info: Arc, + pub(crate) workflow_settings: Arc, } impl<'a> Worker<'a> { /// Instantiate a new [Worker] for a [Workflow]. #[cfg(not(feature = "ipfs"))] - pub async fn new( + #[allow(dead_code)] + pub(crate) async fn new( workflow: Workflow<'a, Arg>, - workflow_info: &'a mut workflow::Info, - workflow_settings: &'a workflow::Settings, + workflow_info: Arc, + workflow_settings: Arc, event_sender: Arc>, mut conn: Connection, ) -> Result> { + let workflow_settings_scheduler = workflow_settings.clone(); + let workflow_settings_worker = workflow_settings.clone(); let fetch_fn = |rscs: Vec| { async { Self::get_resources(rscs, workflow_settings).await }.boxed() }; let scheduler = TaskScheduler::init( - workflow.clone(), - workflow_settings, + workflow, + workflow_settings_scheduler, event_sender.clone(), &mut conn, fetch_fn, @@ -70,30 +90,33 @@ impl<'a> Worker<'a> { Ok(Self { scheduler, - workflow_info, event_sender, - workflow_settings, + workflow_info, + workflow_settings: workflow_settings_worker, }) } /// Instantiate a new [Worker] for a [Workflow]. #[cfg(feature = "ipfs")] #[cfg_attr(docsrs, doc(cfg(feature = "ipfs")))] - pub async fn new( + #[allow(dead_code)] + pub(crate) async fn new( workflow: Workflow<'a, Arg>, - workflow_info: &'a mut workflow::Info, - workflow_settings: &'a workflow::Settings, + workflow_info: Arc, + workflow_settings: Arc, event_sender: Arc>, mut conn: Connection, ipfs: &'a IpfsCli, ) -> Result> { + let workflow_settings_scheduler = workflow_settings.clone(); + let workflow_settings_worker = workflow_settings.clone(); let fetch_fn = |rscs: Vec| { async { Self::get_resources(rscs, workflow_settings, ipfs).await }.boxed() }; let scheduler = TaskScheduler::init( workflow, - workflow_settings, + workflow_settings_scheduler, event_sender.clone(), &mut conn, fetch_fn, @@ -104,21 +127,197 @@ impl<'a> Worker<'a> { scheduler, event_sender, workflow_info, - workflow_settings, + workflow_settings: workflow_settings_worker, }) } /// Run [Worker]'s tasks in task-queue with access to the [Db] object /// to use a connection from the Database pool per run. - pub async fn run(self, db: impl Database) -> Result<()> { - self.run_queue(db).await + #[allow(dead_code)] + pub(crate) async fn run( + self, + db: impl Database + Sync, + running_set: &mut RunningSet, + ) -> Result<()> { + self.run_queue(db, running_set).await + } + + async fn run_queue( + mut self, + db: impl Database + Sync, + running_set: &mut RunningSet, + ) -> Result<()> { + fn insert_into_map(mut map: Arc>, key: Cid, value: T) + where + T: Clone, + { + Arc::make_mut(&mut map) + .entry(key) + .or_insert_with(|| value.clone()); + } + + fn resolve_cid( + cid: Cid, + workflow_settings: Arc, + linkmap: &Arc>>, + db: &impl Database, + event_sender: &Arc>, + ) -> Result, ResolveError> { + if let Some(result) = linkmap.get(&cid) { + Ok(result.to_owned()) + } else { + match Db::find_instruction(Pointer::new(cid), &mut db.conn()?) { + Ok(found) => Ok(found.output_as_arg()), + Err(_) => { + debug!("no related instruction receipt found in the DB"); + let channel = BoundedChannel::oneshot(); + event_sender + .blocking_send(Event::FindRecord(QueryRecord::with(cid, channel.tx))) + .map_err(|err| ResolveError::TransportError(err.to_string()))?; + + let found = match channel.rx.recv_deadline( + Instant::now() + + Duration::from_secs(workflow_settings.p2p_timeout_secs), + ) { + Ok(FoundEvent::Receipt(found)) => found, + Ok(_) => bail!(ResolveError::UnresolvedCidError( + "wrong or unexpected event message received".to_string(), + )), + Err(err) => bail!(ResolveError::UnresolvedCidError(format!( + "timeout deadline reached for invocation receipt @ {cid}: {err}", + ))), + }; + + let found_result = found.output_as_arg(); + // Store the result in the linkmap for use in next iterations. + insert_into_map(Arc::clone(linkmap), cid, found_result.clone()); + Ok(found_result) + } + } + } + } + + for batch in self.scheduler.run.into_iter() { + let (mut task_set, handles) = batch.into_iter().try_fold( + (TaskSet::new(), vec![]), + |(mut task_set, mut handles), node| { + let vertice = node.into_inner(); + let invocation_ptr = vertice.invocation; + let instruction = vertice.instruction; + let rsc = instruction.resource(); + let parsed = vertice.parsed; + let fun = parsed.fun().ok_or_else(|| anyhow!("no function defined"))?; + + let args = parsed.into_args(); + let meta = Ipld::Map(BTreeMap::from([ + (OP_KEY.into(), fun.to_string().into()), + (WORKFLOW_KEY.into(), self.workflow_info.cid().into()), + ])); + + match RegisteredTasks::ability(&instruction.op().to_string()) { + Some(RegisteredTasks::WasmRun) => { + let wasm = self + .scheduler + .resources + .get(&Resource::Url(rsc.to_owned())) + .ok_or_else(|| anyhow!("resource not available"))? + .to_owned(); + + let instruction_ptr = Pointer::try_from(instruction)?; + let state = State::default(); + let mut wasm_ctx = WasmContext::new(state)?; + + let resolved = args.resolve(|cid| { + // Resolve Cid in a separate native threads, + // under a `std::thread::scope`. + thread::scope(|scope| { + let handle = scope.spawn(|| { + resolve_cid( + cid, + self.workflow_settings.clone(), + &self.scheduler.linkmap, + &db, + &self.event_sender, + ) + }); + + handle.join().map_err(|_| { + anyhow!("failed to join thread for resolving Cid: {cid}") + })? + }) + })?; + + let handle = task_set.spawn(async move { + match wasm_ctx.run(wasm, &fun, resolved).await { + Ok(output) => { + Ok((output, instruction_ptr, invocation_ptr, meta)) + } + Err(e) => Err(anyhow!("cannot execute wasm module: {e}")), + } + }); + handles.push(handle); + } + None => error!( + "no valid task/instruction-type referenced by operation: {}", + instruction.op() + ), + }; + + Ok::<_, anyhow::Error>((task_set, handles)) + }, + )?; + + // Concurrently add handles to Runner's running set. + running_set.append_or_insert(self.workflow_info.cid(), handles); + + while let Some(res) = task_set.join_next().await { + let (executed, instruction_ptr, invocation_ptr, meta) = res??; + let output_to_store = Ipld::try_from(executed)?; + + let invocation_receipt = InvocationReceipt::new( + invocation_ptr, + InstructionResult::Ok(output_to_store), + Ipld::Null, + None, + UcanPrf::default(), + ); + + let mut receipt = Receipt::try_with(instruction_ptr, &invocation_receipt)?; + Arc::make_mut(&mut Arc::clone(&self.scheduler.linkmap)).insert( + Cid::try_from(receipt.instruction())?, + receipt.output_as_arg(), + ); + + // set receipt metadata + receipt.set_meta(meta); + // modify workflow info before progress update, in case + // that we timed out getting info from the network, but later + // recovered where we last started from. + if let Some(step) = self.scheduler.resume_step { + let current_progress_count = self.workflow_info.progress_count; + Arc::make_mut(&mut self.workflow_info) + .set_progress_count(std::cmp::max(current_progress_count, step as u32)) + }; + + let stored_receipt = Db::store_receipt(receipt, &mut db.conn()?)?; + + // send internal event + self.event_sender + .send(Event::CapturedReceipt(Captured::with( + stored_receipt, + self.workflow_info.clone(), + ))) + .await?; + } + } + Ok(()) } #[cfg(feature = "ipfs")] #[cfg_attr(docsrs, doc(cfg(feature = "ipfs")))] async fn get_resources( resources: Vec, - settings: &'a workflow::Settings, + settings: Arc, ipfs: &'a IpfsCli, ) -> Result>> { /// TODO: http(s) calls @@ -158,6 +357,7 @@ impl<'a> Worker<'a> { } } let num_requests = resources.len(); + let settings = settings.as_ref(); futures::stream::iter(resources.into_iter().map(|rsc| async move { // Have to enumerate configs here, as type variants are different // and cannot be matched on. @@ -228,156 +428,21 @@ impl<'a> Worker<'a> { /// TODO: Client calls (only) over http(s). #[cfg(not(feature = "ipfs"))] + #[allow(dead_code)] async fn get_resources( _resources: Vec, - _settings: &'a workflow::Settings, + _settings: Arc, ) -> Result> { Ok(IndexMap::default()) } - - async fn run_queue(mut self, db: impl Database) -> Result<()> { - for batch in self.scheduler.run.into_iter() { - let (mut set, _handles) = batch.into_iter().try_fold( - (JoinSet::new(), vec![]), - |(mut set, mut handles), node| { - let vertice = node.into_inner(); - let invocation_ptr = vertice.invocation; - let instruction = vertice.instruction; - let rsc = instruction.resource(); - let parsed = vertice.parsed; - let fun = parsed.fun().ok_or_else(|| anyhow!("no function defined"))?; - - let args = parsed.into_args(); - let meta = Ipld::Map(BTreeMap::from([ - (OP_KEY.into(), fun.to_string().into()), - (WORKFLOW_KEY.into(), self.workflow_info.cid().into()) - ])); - - match RegisteredTasks::ability(&instruction.op().to_string()) { - Some(RegisteredTasks::WasmRun) => { - let wasm = self - .scheduler - .resources - .get(&Resource::Url(rsc.to_owned())) - .ok_or_else(|| anyhow!("resource not available"))? - .to_owned(); - - let instruction_ptr = Pointer::try_from(instruction)?; - let state = State::default(); - let mut wasm_ctx = WasmContext::new(state)?; - let resolved = - args.resolve(|cid| if let Some(result) = self.scheduler.linkmap.get(&cid) { - Ok(result.to_owned()) - } else { - match Db::find_instruction( - Pointer::new(cid), - &mut db.conn()?, - ) { - Ok(found) => Ok(found.output_as_arg()), - Err(_e) => { - tracing::debug!( - "no related instruction receipt found in the DB" - ); - let (sender, receiver) = channel::bounded(1); - self.event_sender.blocking_send(Event::FindReceipt( - cid, - sender, - )).map_err(|err| ResolveError::TransportError(err.to_string()))?; - - let found = match receiver.recv_deadline( - Instant::now() + Duration::from_secs(self.workflow_settings.p2p_timeout_secs), - ) { - Ok((found_cid, FoundEvent::Receipt(found))) if found_cid == cid => { - found - } - Ok(_) => - homestar_core::bail!( - ResolveError::UnresolvedCidError( - "wrong or unexpected event message received".to_string()) - ), - Err(err) => - homestar_core::bail!(ResolveError::UnresolvedCidError( - format!("timeout deadline reached for invocation receipt @ {cid}: {err}")) - ), - }; - - Ok(found.output_as_arg()) - } - } - })?; - - let handle = set.spawn(async move { - match wasm_ctx.run(wasm, &fun, resolved).await { - Ok(output) => { - Ok((output, instruction_ptr, invocation_ptr, meta)) - } - Err(e) => Err(anyhow!("cannot execute wasm module: {e}")), - } - }); - handles.push(handle); - } - None => tracing::error!( - "no valid task/instruction-type referenced by operation: {}", - instruction.op() - ), - }; - - Ok::<_, anyhow::Error>((set, handles)) - }, - )?; - - while let Some(res) = set.join_next().await { - let (executed, instruction_ptr, invocation_ptr, meta) = res??; - let output_to_store = Ipld::try_from(executed)?; - - let invocation_receipt = InvocationReceipt::new( - invocation_ptr, - InstructionResult::Ok(output_to_store), - Ipld::Null, - None, - UcanPrf::default(), - ); - - let mut receipt = Receipt::try_with(instruction_ptr, &invocation_receipt)?; - self.scheduler.linkmap.insert( - Cid::try_from(receipt.instruction())?, - receipt.output_as_arg(), - ); - - // set receipt metadata - receipt.set_meta(meta); - // modify workflow info before progress update, in case - // that we timed out getting info from the network, but later - // recovered where we last started from. - if let Some(step) = self.scheduler.resume_step { - self.workflow_info.set_progress_count(std::cmp::max( - self.workflow_info.progress_count, - step as u32, - )) - }; - - let stored_receipt = Db::store_receipt(receipt, &mut db.conn()?)?; - - // send internal event - self.event_sender - .send(Event::CapturedReceipt( - stored_receipt, - self.workflow_info.clone(), - )) - .await?; - } - } - Ok(()) - } } #[cfg(test)] mod test { use super::*; + use crate::{db::Database, test_utils, workflow as wf, Settings}; #[cfg(feature = "ipfs")] - use crate::IpfsCli; - - use crate::{db::Database, network::EventLoop, settings::Settings, test_utils, workflow as wf}; + use dashmap::DashMap; use homestar_core::{ ipld::DagCbor, test_utils::workflow as workflow_test_utils, @@ -408,21 +473,21 @@ mod test { let workflow = Workflow::new(vec![task1.clone(), task2.clone()]); let workflow_cid = workflow.clone().to_cid().unwrap(); - let workflow_settings = wf::Settings::default(); + let workflow_settings = Arc::new(wf::Settings::default()); let settings = Settings::load().unwrap(); #[cfg(feature = "ipfs")] - let (tx, mut rx) = EventLoop::setup_channel(settings.node()); + let (tx, mut rx) = test_utils::event::setup_channel(settings); #[cfg(not(feature = "ipfs"))] - let (tx, mut _rx) = EventLoop::setup_channel(settings.node()); + let (tx, mut _rx) = test_utils::event::setup_channel(settings); #[cfg(feature = "ipfs")] let ipfs = IpfsCli::default(); - let mut workflow_info = wf::Info::gather( + let workflow_info = wf::Info::gather( workflow.clone(), - &workflow_settings, - &tx.clone().into(), + workflow_settings.clone(), + tx.clone().into(), &mut conn, ) .await @@ -431,8 +496,8 @@ mod test { #[cfg(feature = "ipfs")] let worker = Worker::new( workflow, - &mut workflow_info, - &workflow_settings, + workflow_info.into(), + workflow_settings, tx.into(), conn, &ipfs, @@ -442,8 +507,8 @@ mod test { #[cfg(not(feature = "ipfs"))] let worker = Worker::new( workflow, - &mut workflow_info, - &workflow_settings, + workflow_info.into(), + workflow_settings.clone(), tx.into(), conn, ) @@ -459,8 +524,12 @@ mod test { #[cfg(feature = "ipfs")] { + let mut running_set = DashMap::new(); let worker_workflow_cid = worker.workflow_info.cid; - worker.run(db.clone()).await.unwrap(); + worker.run(db.clone(), &mut running_set).await.unwrap(); + assert_eq!(running_set.len(), 1); + assert!(running_set.contains_key(&worker_workflow_cid)); + assert_eq!(running_set.get(&worker_workflow_cid).unwrap().len(), 2); // first time check DHT for workflow info let workflow_info_event = rx.recv().await.unwrap(); @@ -469,12 +538,15 @@ mod test { let next_next_run_receipt = rx.recv().await.unwrap(); match workflow_info_event { - Event::FindWorkflow(cid, _) => assert_eq!(cid, worker_workflow_cid), + Event::FindRecord(QueryRecord { cid, .. }) => assert_eq!(cid, worker_workflow_cid), _ => panic!("Wrong event type"), }; let (next_receipt, _wf_info) = match next_run_receipt { - Event::CapturedReceipt(next_receipt, _) => { + Event::CapturedReceipt(Captured { + receipt: next_receipt, + .. + }) => { let mut conn = db.conn().unwrap(); let _ = Db::store_workflow_receipt(workflow_cid, next_receipt.cid(), &mut conn); let mut info = workflow::Info::default(workflow_cid, 2); @@ -486,7 +558,10 @@ mod test { }; let (_next_next_receipt, wf_info) = match next_next_run_receipt { - Event::CapturedReceipt(next_next_receipt, _) => { + Event::CapturedReceipt(Captured { + receipt: next_next_receipt, + .. + }) => { let mut conn = db.conn().unwrap(); let _ = Db::store_workflow_receipt( workflow_cid, @@ -554,7 +629,7 @@ mod test { let workflow = Workflow::new(vec![task1.clone(), task2.clone()]); let workflow_cid = workflow.clone().to_cid().unwrap(); - let workflow_settings = wf::Settings::default(); + let workflow_settings = Arc::new(wf::Settings::default()); let settings = Settings::load().unwrap(); // already have stored workflow information (from a previous run) @@ -571,17 +646,17 @@ mod test { .unwrap(); #[cfg(feature = "ipfs")] - let (tx, mut rx) = EventLoop::setup_channel(settings.node()); + let (tx, mut rx) = test_utils::event::setup_channel(settings); #[cfg(not(feature = "ipfs"))] - let (tx, mut _rx) = EventLoop::setup_channel(settings.node()); + let (tx, mut _rx) = test_utils::event::setup_channel(settings); #[cfg(feature = "ipfs")] let ipfs = IpfsCli::default(); - let mut workflow_info = wf::Info::gather( + let workflow_info = wf::Info::gather( workflow.clone(), - &workflow_settings, - &tx.clone().into(), + workflow_settings.clone(), + tx.clone().into(), &mut conn, ) .await @@ -590,8 +665,8 @@ mod test { #[cfg(feature = "ipfs")] let worker = Worker::new( workflow, - &mut workflow_info, - &workflow_settings, + workflow_info.into(), + workflow_settings, tx.into(), conn, &ipfs, @@ -601,8 +676,8 @@ mod test { #[cfg(not(feature = "ipfs"))] let worker = Worker::new( workflow, - &mut workflow_info, - &workflow_settings, + workflow_info.into(), + workflow_settings.clone(), tx.into(), conn, ) @@ -622,13 +697,21 @@ mod test { #[cfg(feature = "ipfs")] { - worker.run(db.clone()).await.unwrap(); + let mut running_set = DashMap::new(); + let worker_workflow_cid = worker.workflow_info.cid; + worker.run(db.clone(), &mut running_set).await.unwrap(); + assert_eq!(running_set.len(), 1); + assert!(running_set.contains_key(&worker_workflow_cid)); + assert_eq!(running_set.get(&worker_workflow_cid).unwrap().len(), 1); // we should have received 1 receipt let next_run_receipt = rx.recv().await.unwrap(); let (_next_receipt, wf_info) = match next_run_receipt { - Event::CapturedReceipt(next_receipt, _) => { + Event::CapturedReceipt(Captured { + receipt: next_receipt, + .. + }) => { let mut conn = db.conn().unwrap(); let _ = Db::store_workflow_receipt(workflow_cid, next_receipt.cid(), &mut conn); let mut info = workflow::Info::default(workflow_cid, 2); @@ -713,7 +796,7 @@ mod test { let workflow = Workflow::new(vec![task1.clone(), task2.clone()]); let workflow_cid = workflow.clone().to_cid().unwrap(); - let workflow_settings = wf::Settings::default(); + let workflow_settings = Arc::new(wf::Settings::default()); let settings = Settings::load().unwrap(); // already have stored workflow information (from a previous run) @@ -735,18 +818,15 @@ mod test { ) .unwrap(); - #[cfg(feature = "ipfs")] - let (tx, mut rx) = EventLoop::setup_channel(settings.node()); - #[cfg(not(feature = "ipfs"))] - let (tx, mut rx) = EventLoop::setup_channel(settings.node()); + let (tx, mut rx) = test_utils::event::setup_channel(settings); #[cfg(feature = "ipfs")] let ipfs = IpfsCli::default(); - let mut workflow_info = wf::Info::gather( + let workflow_info = wf::Info::gather( workflow.clone(), - &workflow_settings, - &tx.clone().into(), + workflow_settings.clone(), + tx.clone().into(), &mut conn, ) .await @@ -755,8 +835,8 @@ mod test { #[cfg(feature = "ipfs")] let worker = Worker::new( workflow, - &mut workflow_info, - &workflow_settings, + workflow_info.into(), + workflow_settings, tx.into(), conn, &ipfs, @@ -766,8 +846,8 @@ mod test { #[cfg(not(feature = "ipfs"))] let worker = Worker::new( workflow, - &mut workflow_info, - &workflow_settings, + workflow_info.into(), + workflow_settings, tx.into(), conn, ) diff --git a/homestar-runtime/src/workflow/info.rs b/homestar-runtime/src/workflow/info.rs index a13d2843..3e704caa 100644 --- a/homestar-runtime/src/workflow/info.rs +++ b/homestar-runtime/src/workflow/info.rs @@ -1,10 +1,9 @@ use crate::{ db::{Connection, Database}, - network::eventloop::{Event, FoundEvent}, + event_handler::{channel::BoundedChannel, event::QueryRecord, swarm_event::FoundEvent, Event}, Db, Receipt, }; use anyhow::{anyhow, bail, Result}; -use crossbeam::channel; use diesel::{Associations, Identifiable, Insertable, Queryable, Selectable}; use homestar_core::{ipld::DagCbor, workflow::Pointer, Workflow}; use homestar_wasm::io::Arg; @@ -15,6 +14,7 @@ use std::{ time::{Duration, Instant}, }; use tokio::sync::mpsc; +use tracing::info; /// [Workflow Info] header tag, for sharing over libp2p. /// @@ -157,8 +157,8 @@ impl Info { /// [workflow settings]: super::Settings pub async fn gather<'a>( workflow: Workflow<'_, Arg>, - workflow_settings: &'a super::Settings, - event_sender: &'a Arc>, + workflow_settings: Arc, + event_sender: Arc>, conn: &mut Connection, ) -> Result { let workflow_len = workflow.len(); @@ -167,18 +167,19 @@ impl Info { let workflow_info = match Db::join_workflow_with_receipts(workflow_cid, conn) { Ok((wf_info, receipts)) => Info::new(workflow_cid, receipts, wf_info.num_tasks as u32), Err(_err) => { - tracing::info!("workflow information not available in the database"); - let (sender, receiver) = channel::bounded(1); + info!("workflow information not available in the database"); + let channel = BoundedChannel::oneshot(); event_sender - .send(Event::FindWorkflow(workflow_cid, sender)) + .send(Event::FindRecord(QueryRecord::with( + workflow_cid, + channel.tx, + ))) .await?; - match receiver.recv_deadline( + match channel.rx.recv_deadline( Instant::now() + Duration::from_secs(workflow_settings.p2p_timeout_secs), ) { - Ok((found_cid, FoundEvent::Workflow(workflow_info))) - if found_cid == workflow_cid => - { + Ok(FoundEvent::Workflow(workflow_info)) => { // store workflow from info Db::store_workflow( Stored::new( @@ -190,11 +191,11 @@ impl Info { workflow_info } - Ok((found_cid, event)) => { - bail!("received unexpected event {event:?} for workflow {found_cid}") + Ok(event) => { + bail!("received unexpected event {event:?} for workflow {workflow_cid}") } Err(err) => { - tracing::info!(error=?err, "no information found for {workflow_cid}, setting default"); + info!(error=?err, "no information found for {workflow_cid}, setting default"); let workflow_info = Info::default(workflow_cid, workflow_len); // store workflow from info Db::store_workflow( diff --git a/homestar-wasm/Cargo.toml b/homestar-wasm/Cargo.toml index d6ba6cad..76290671 100644 --- a/homestar-wasm/Cargo.toml +++ b/homestar-wasm/Cargo.toml @@ -27,16 +27,16 @@ atomic_refcell = "0.1" enum-as-inner = { workspace = true } heck = "0.4" homestar-core = { version = "0.1", path = "../homestar-core" } -itertools = "0.10" +itertools = "0.11" libipld = { workspace = true } rust_decimal = "1.30" stacker = "0.1" thiserror = { workspace = true } tracing = { workspace = true } -wasi-common = "8.0" +wasi-common = "10.0" wasmparser = "0.104" wasmtime = { version = "8.0", features = ["async", "component-model", "default"] } -wasmtime-component-util = "8.0" +wasmtime-component-util = "10.0" wat = "1.0" wit-component = "0.8" diff --git a/homestar-wasm/src/wasmtime/ipld.rs b/homestar-wasm/src/wasmtime/ipld.rs index 6ad31767..38d101b1 100644 --- a/homestar-wasm/src/wasmtime/ipld.rs +++ b/homestar-wasm/src/wasmtime/ipld.rs @@ -203,8 +203,8 @@ impl RuntimeVal { .enumerate() .with_position() .fold_while(Ok((Ok(Val::Bool(false)), Tags::default())), |acc, pos| { - let is_last = matches!(pos, Position::Last(_) | Position::Only(_)); - let (idx, (ty, elem)) = pos.into_inner(); + let is_last = matches!(pos.0, Position::Last | Position::Only); + let (idx, (ty, elem)) = pos.1; match RuntimeVal::try_from(elem, &InterfaceType::TypeRef(&ty)) { Ok(RuntimeVal(value, tags)) => { if value.ty() == ty { @@ -247,8 +247,8 @@ impl RuntimeVal { .enumerate() .with_position() .fold_while(Ok(Val::Bool(false)), |acc, pos| { - let is_last = matches!(pos, Position::Last(_) | Position::Only(_)); - let (idx, (ty, elem)) = pos.into_inner(); + let is_last = matches!(pos.0, Position::Last | Position::Only); + let (idx, (ty, elem)) = pos.1; match RuntimeVal::try_from(elem, &InterfaceType::TypeRef(&ty)) { Ok(RuntimeVal(value, _tags)) => { if value.ty() == ty {