From a1af099035fca435af59d6eafd2a435a77ab8401 Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Tue, 7 May 2024 21:06:52 +0200 Subject: [PATCH] docs: add contributing, git, and styleguide docs --- CHANGELOG.md | 9 + README.md | 8 + docs/contributing.md | 32 ++ docs/git.md | 374 +++++++++++++++++++++++ docs/styleguide.md | 690 +++++++++++++++++++++++++++++++++++++++++++ justfile | 8 +- simulator/README.md | 2 +- 7 files changed, 1121 insertions(+), 2 deletions(-) create mode 100644 CHANGELOG.md create mode 100644 docs/contributing.md create mode 100644 docs/git.md create mode 100644 docs/styleguide.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..0c7ef7bab --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,9 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres +to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] diff --git a/README.md b/README.md index 1cce74a67..d2c4b7955 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,13 @@ To interactivly choose what to run: just ``` +To format code: + +```bash +just fmt +``` + + ## Prebuilt RocksDB By default, we compile RocksDB (a C++ project) from source during the build. @@ -49,3 +56,4 @@ git checkout v8.10.0 make -j 10 static_lib export ROCKSDB_LIB_DIR=/path/to/rocksdb ``` + diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 000000000..cd1f24534 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,32 @@ +## Pull Requests + +All contributions to `tycho` are made via Pull Requests (PRs). Please follow +these steps when creating a PR: + +- **Fork the Repository:** Fork the repository or create a new branch in the + repository if you have write access. +- **Commit Structure:** We review each commit separately, and we do not use the + squash-merge strategy. Please manually combine any fixup commits before + sending them for review. Each commit should ideally focus on a single task. + For example, if you need to refactor a function to add a new feature cleanly, + place the refactoring in one commit and the new feature in another. If the + refactoring itself consists of many parts, separate these into individual + commits. Include tests and documentation in the same commit as the code they + test and document. The commit message should describe the changes in the + commit; the PR description can be brief or even empty, but feel free to + include a personal message. +- **Testing:** Ensure that the changes are thoroughly tested. If the PR + introduces a new feature, it should also contain tests for that feature. +- **Draft PRs:** Feel free to submit draft PRs to get early feedback and to + ensure you are on the right track. +- **Commit Naming Convention:** Each commit should follow [Conventional + Commits](https://www.conventionalcommits.org/en/v1.0.0/). This helps us + automatically generate changelogs and version bumps. (todo: implement this) +- **Documentation of Changes:** If your PR introduces a user-observable change ( + e.g., a new protocol feature, new configuration option, new Prometheus metric, + etc.), please document it in [CHANGELOG.md](../CHANGELOG.md) in + the `[unreleased]` section. +- **No Merge Policy:** The tycho repository uses + a [no merge policy](./git.md#no-merge-policy). Please rebase your branch on + top of the `master` branch before creating a PR. + diff --git a/docs/git.md b/docs/git.md new file mode 100644 index 000000000..b2a7bcc45 --- /dev/null +++ b/docs/git.md @@ -0,0 +1,374 @@ +## Standard Process + +Below is the normal procedure that you're likely to use for most minor changes +and PRs: + +1. Ensure that you're making your changes on top of master: + `git checkout master`. +2. Get the latest changes from the Rust + repo: `git pull upstream master --ff-only`. + (see [No-Merge Policy][no-merge-policy] for more info about this). +3. Make a new branch for your change: `git checkout -b issue-12345-fix`. +4. Make some changes to the repo and test them. +5. Stage your changes via `git add src/changed/file.rs src/another/change.rs` + and then commit them with `git commit`. Of course, making intermediate + commits + may be a good idea as well. Avoid `git add .`, as it makes it too easy to + unintentionally commit changes that should not be committed, such as + submodule + updates. You can use `git status` to check if there are any files you forgot + to stage. +6. Push your changes to your + fork: `git push --set-upstream origin issue-12345-fix` + (After adding commits, you can use `git push` and after rebasing or + pulling-and-rebasing, you can use `git push --force-with-lease`). +7. [Open a PR][ghpullrequest] from your fork to `broxus/tycho`'s master + branch. + +[ghpullrequest]: https://guides.github.com/activities/forking/#making-a-pull-request + +If your reviewer requests changes, the procedure for those changes looks much +the same, with some steps skipped: + +1. Ensure that you're making changes to the most recent version of your code: + `git checkout issue-12345-fix`. +2. Make, stage, and commit your additional changes just like before. +3. Push those changes to your fork: `git push`. + +[no-merge-policy]: #keeping-things-up-to-date + +## Troubleshooting git issues + +You don't need to clone `broxus/tycho` from scratch if it's out of date! +Even if you think you've messed it up beyond repair, there are ways to fix +the git state that don't require downloading the whole repository again. +Here are some common issues you might run into: + +### I made a merge commit by accident. + +Git has two ways to update your branch with the newest changes: merging and +rebasing. +Rust [uses rebasing][no-merge-policy]. If you make a merge commit, it's not too +hard to fix: +`git rebase -i upstream/master`. + +See [Rebasing](#rebasing) for more about rebasing. + +### I see "error: cannot rebase" when I try to rebase + +These are two common errors to see when rebasing: + +``` +error: cannot rebase: Your index contains uncommitted changes. +error: Please commit or stash them. +``` + +``` +error: cannot rebase: You have unstaged changes. +error: Please commit or stash them. +``` + +( +See +for the difference between the two.) + +This means you have made changes since the last time you made a commit. To be +able to rebase, either +commit your changes, or make a temporary commit called a "stash" to have them +still not be committed +when you finish rebasing. You may want to configure git to make this "stash" +automatically, which +will prevent the "cannot rebase" error in nearly all cases: + +``` +git config --global rebase.autostash true +``` + +See for more +info about stashing. + +### failed to push some refs + +`git push` will not work properly and say something like this: + +``` + ! [rejected] issue-xxxxx -> issue-xxxxx (non-fast-forward) +error: failed to push some refs to 'https://github.com/username/rust.git' +hint: Updates were rejected because the tip of your current branch is behind +hint: its remote counterpart. Integrate the remote changes (e.g. +hint: 'git pull ...') before pushing again. +hint: See the 'Note about fast-forwards' in 'git push --help' for details. +``` + +The advice this gives is incorrect! Because of Rust's +["no-merge" policy](#no-merge-policy) the merge commit created by `git pull` +will not be allowed in the final PR, in addition to defeating the point of the +rebase! Use `git push --force-with-lease` instead. + +### Git is trying to rebase commits I didn't write? + +If you see many commits in your rebase list, or merge commits, or commits by +other people that you +didn't write, it likely means you're trying to rebase over the wrong branch. For +example, you may +have a `broxus/tycho` remote `upstream`, but ran `git rebase origin/master` +instead of `git rebase upstream/master`. The fix is to abort the rebase and use the correct branch +instead: + +``` +git rebase --abort +git rebase -i upstream/master +``` + +## Rebasing and Conflicts + +When you edit your code locally, you are making changes to the version of +broxus/tycho that existed when you created your feature branch. As such, when +you submit your PR it is possible that some of the changes that have been made +to broxus/tycho since then are in conflict with the changes you've made. +When this happens, you need to resolve the conflicts before your changes can be +merged. To do that, you need to rebase your work on top of broxus/tycho. + +### Rebasing + +To rebase your feature branch on top of the newest version of the master branch +of broxus/tycho, checkout your branch, and then run this command: + +``` +git pull --rebase https://github.com/broxus/tycho.git master +``` + +> If you are met with the following error: +> +> ``` +> error: cannot pull with rebase: Your index contains uncommitted changes. +> error: please commit or stash them. +> ``` +> +> it means that you have some uncommitted work in your working tree. In that +> case, run `git stash` before rebasing, and then `git stash pop` after you +> have rebased and fixed all conflicts. + +When you rebase a branch on master, all the changes on your branch are +reapplied to the most recent version of master. In other words, Git tries to +pretend that the changes you made to the old version of master were instead +made to the new version of master. During this process, you should expect to +encounter at least one "rebase conflict." This happens when Git's attempt to +reapply the changes fails because your changes conflicted with other changes +that have been made. You can tell that this happened because you'll see +lines in the output that look like + +``` +CONFLICT (content): Merge conflict in file.rs +``` + +When you open these files, you'll see sections of the form + +``` +<<<<<<< HEAD +Original code +======= +Your code +>>>>>>> 8fbf656... Commit fixes 12345 +``` + +This represents the lines in the file that Git could not figure out how to +rebase. The section between `<<<<<<< HEAD` and `=======` has the code from +master, while the other side has your version of the code. You'll need to +decide how to deal with the conflict. You may want to keep your changes, +keep the changes on master, or combine the two. + +Generally, resolving the conflict consists of two steps: First, fix the +particular conflict. Edit the file to make the changes you want and remove the +`<<<<<<<`, `=======` and `>>>>>>>` lines in the process. Second, check the +surrounding code. If there was a conflict, its likely there are some logical +errors lying around too! It's a good idea to run `x check` here to make sure +there are no glaring errors. + +Once you're all done fixing the conflicts, you need to stage the files that had +conflicts in them via `git add`. Afterwards, run `git rebase --continue` to let +Git know that you've resolved the conflicts and it should finish the rebase. + +Once the rebase has succeeded, you'll want to update the associated branch on +your fork with `git push --force-with-lease`. + +### Keeping things up to date + +The above section on [Rebasing](#rebasing) is a specific +guide on rebasing work and dealing with merge conflicts. +Here is some general advice about how to keep your local repo +up-to-date with upstream changes: + +Using `git pull upstream master` while on your local master branch regularly +will keep it up-to-date. You will also want to rebase your feature branches +up-to-date as well. After pulling, you can checkout the feature branches +and rebase them: + +``` +git checkout master +git pull upstream master --ff-only # to make certain there are no merge commits +git rebase master feature_branch +git push --force-with-lease # (set origin to be the same as local) +``` + +To avoid merges as per the [No-Merge Policy](#no-merge-policy), you may want to +use +`git config pull.ff only` (this will apply the config only to the local repo) +to ensure that Git doesn't create merge commits when `git pull`ing, without +needing to pass `--ff-only` or `--rebase` every time. + +You can also `git push --force-with-lease` from master to keep your fork's +master in sync with +upstream. + +## Advanced Rebasing + +### Squash your commits + +If your branch contains multiple consecutive rewrites of the same code, or if +the rebase conflicts are extremely severe, you can use +`git rebase --interactive master` to gain more control over the process. This +allows you to choose to skip commits, edit the commits that you do not skip, +change the order in which they are applied, or "squash" them into each other. + +Alternatively, you can sacrifice the commit history like this: + +``` +# squash all the changes into one commit so you only have to worry about conflicts once +git rebase -i $(git merge-base master HEAD) # and squash all changes along the way +git rebase master +# fix all merge conflicts +git rebase --continue +``` + +"Squashing" commits into each other causes them to be merged into a single +commit. Both the upside and downside of this is that it simplifies the history. +On the one hand, you lose track of the steps in which changes were made, but +the history becomes easier to work with. + +You also may want to squash just the last few commits together, possibly +because they only represent "fixups" and not real changes. For example, +`git rebase --interactive HEAD~2` will allow you to edit the two commits only. + +### `git range-diff` + +After completing a rebase, and before pushing up your changes, you may want to +review the changes between your old branch and your new one. You can do that +with `git range-diff master @{upstream} HEAD`. + +The first argument to `range-diff`, `master` in this case, is the base revision +that you're comparing your old and new branch against. The second argument is +the old version of your branch; in this case, `@upstream` means the version that +you've pushed to GitHub, which is the same as what people will see in your pull +request. Finally, the third argument to `range-diff` is the _new_ version of +your branch; in this case, it is `HEAD`, which is the commit that is currently +checked-out in your local repo. + +Note that you can also use the equivalent, abbreviated form `git range-diff master @{u} HEAD`. + +Unlike in regular Git diffs, you'll see a `-` or `+` next to another `-` or `+` +in the range-diff output. The marker on the left indicates a change between the +old branch and the new branch, and the marker on the right indicates a change +you've committed. So, you can think of a range-diff as a "diff of diffs" since +it shows you the differences between your old diff and your new diff. + +Here's an example of `git range-diff` output (taken from [Git's +docs][range-diff-example-docs]): + +``` +-: ------- > 1: 0ddba11 Prepare for the inevitable! +1: c0debee = 2: cab005e Add a helpful message at the start +2: f00dbal ! 3: decafe1 Describe a bug + @@ -1,3 +1,3 @@ + Author: A U Thor + + -TODO: Describe a bug + +Describe a bug + @@ -324,5 +324,6 + This is expected. + + -+What is unexpected is that it will also crash. + ++Unexpectedly, it also crashes. This is a bug, and the jury is + ++still out there how to fix it best. See ticket #314 for details. + + Contact +3: bedead < -: ------- TO-UNDO +``` + +(Note that `git range-diff` output in your terminal will probably be easier to +read than in this example because it will have colors.) + +Another feature of `git range-diff` is that, unlike `git diff`, it will also +diff commit messages. This feature can be useful when amending several commit +messages so you can make sure you changed the right parts. + +`git range-diff` is a very useful command, but note that it can take some time +to get used to its output format. You may also find Git's documentation on the +command useful, especially their ["Examples" section][range-diff-example-docs]. + +[range-diff-example-docs]: https://git-scm.com/docs/git-range-diff#_examples + +## No-Merge Policy + +The broxus/tycho repo uses what is known as a "rebase workflow." This means +that merge commits in PRs are not accepted. As a result, if you are running +`git merge` locally, chances are good that you should be rebasing instead. Of +course, this is not always true; if your merge will just be a fast-forward, +like the merges that `git pull` usually performs, then no merge commit is +created and you have nothing to worry about. Running `git config merge.ff only` +(this will apply the config to the local repo) +once will ensure that all the merges you perform are of this type, so that you +cannot make a mistake. + +There are a number of reasons for this decision and like all others, it is a +tradeoff. The main advantage is the generally linear commit history. This +greatly simplifies bisecting and makes the history and commit log much easier +to follow and understand. + +## Tips for reviewing + +**NOTE**: This section is for _reviewing_ PRs, not authoring them. + +### Fetching PRs + +To checkout PRs locally, you can +use `git fetch upstream pull/NNNNN/head && git checkout FETCH_HEAD`. + +You can also use github's cli tool. Github shows a button on PRs where you can +copy-paste the +command to check it out locally. See for more info. + +### Moving large sections of code + +Git and Github's default diff view for large moves _within_ a file is quite +poor; it will show each +line as deleted and each line as added, forcing you to compare each line +yourself. Git has an option +to show moved lines in a different color: + +``` +git log -p --color-moved=dimmed-zebra --color-moved-ws=allow-indentation-change +``` + +See [the docs for `--color-moved`](https://git-scm.com/docs/git-diff#Documentation/git-diff.txt---color-movedltmodegt) +for more info. + +### range-diff + +See [the relevant section for PR authors](#git-range-diff). This can be useful +for comparing code +that was force-pushed to make sure there are no unexpected changes. + +### Ignoring changes to specific files + +Many large files in the repo are autogenerated. To view a diff that ignores +changes to those files, +you can use the following syntax (e.g. Cargo.lock): + +``` +git log -p ':!Cargo.lock' +``` + +Arbitrary patterns are supported (e.g. `:!compiler/*`). Patterns use the same +syntax as +`.gitignore`, with `:` prepended to indicate a pattern. diff --git a/docs/styleguide.md b/docs/styleguide.md new file mode 100644 index 000000000..6d40761ac --- /dev/null +++ b/docs/styleguide.md @@ -0,0 +1,690 @@ +## Code Style + +### Idiomatic Rust + +- _Consistency_: there's usually only one idiomatic solution amidst many + non-idiomatic ones. +- _Predictability_: you can use the APIs without consulting documentation. +- _Performance, ergonomics and correctness_: language idioms usually reflect learned truths, which might not be immediately obvious. +- _Normalization_: reduce the number of possible invariants by using the type system and logic. +- _Awareness_: if in the process of writing code you thought about some little thing that could break something - either handle it or leave a TODO comment. + +### Standard Naming + +- Use `-` rather than `_` in crate names and in corresponding folder names. +- Avoid single-letter variable names especially in long functions. Common `i`, `j` etc. loop variables are somewhat of an exception but since Rust encourages use of iterators those cases aren’t that common anyway. +- Follow standard [Rust naming patterns](https://rust-lang.github.io/api-guidelines/naming.html) such as: + - Don’t use `get_` prefix for getter methods. A getter method is one which returns (a reference to) a field of an object. + - Use `set_` prefix for setter methods. An exception are builder objects which may use different a naming style. + - Use `into_` prefix for methods which consume `self` and `to_` prefix for methods which don’t. + +### Deriving traits (apply only to libraries) + +Derive `Debug`, `Clone`, `Copy`, `PartialEq`, `Eq`, and `Hash` for public types when possible (in this order). + +**Rationale:** these traits can be useful for users and can be implemented for most types. + +Derive `Default` when there is a reasonable default value for the type. + +### Full paths for logging + +Always write `tracing::!(...)` instead of importing `use tracing::;` and invoking `!(...)`. + +```rust +// GOOD +tracing::warn!("Everything is on fire"); + +// BAD +use tracing::warn; + +warn!("Everything is on fire"); +``` + +**Rationale:** + +- Less polluted import blocks +- Uniformity + +### [Tracing](https://tracing.rs) + +When emitting events and spans with `tracing` prefer adding variable data via [`tracing`'s field mechanism](https://docs.rs/tracing/latest/tracing/#recording-fields). + +```rust +// GOOD +debug!( + target: "client", + validator_id = self.client.validator_signer.as_ref().map(|vs| { + tracing::field::display(vs.validator_id()) + }), + %hash, + "block.previous_hash" = %block.header().prev_hash(), + "block.height" = block.header().height(), + %peer_id, + was_requested, + "received block", +); +``` + +Most apparent violation of this rule will be when the event message utilizes any form of formatting, as seen in the following example: + +```rust +// BAD +debug!( + target: "client", + "{:?} Received block {} <- {} at {} from {}, requested: {}", + self.client.validator_signer.as_ref().map(|vs| vs.validator_id()), + hash, + block.header().prev_hash(), + block.header().height(), + peer_id, + was_requested +); +``` + +Always specify the `target` explicitly. A good default value to use is the crate name, or the module path (e.g. `chain::client`) so that events and spans common to a topic can be grouped together. This grouping can later be used for customizing which events to output. + +**Rationale:** This makes the events structured – one of the major value add propositions of the tracing ecosystem. Structured events allow for immediately actionable data without additional post-processing, especially when using some of the more advanced tracing subscribers. Of particular interest would be those that output events as JSON, or those that publish data to distributed event collection systems such as opentelemetry. Maintaining this rule will also usually result in faster execution (when logs at the relevant level are enabled.) + +### Spans + +Use the [spans](https://docs.rs/tracing/latest/tracing/#spans) to introduce context and grouping to and between events instead of manually adding such information as part of the events themselves. Most of the subscribers ingesting spans also provide a built-in timing facility, so prefer using spans for measuring the amount of time a section of code needs to execute. + +Give spans simple names that make them both easy to trace back to code and to find a particular span in logs or other tools ingesting the span data. If a span begins at the top of a function, prefer giving it the name of that function; otherwise, prefer a `snake_case` name. + +When instrumenting asynchronous functions the [`#[tracing::instrument]`][instrument] macro or the `Future::instrument` is **required**. Using `Span::entered` or a similar method that is not aware of yield points will result in incorrect span data and could lead to difficult to troubleshoot issues such as stack overflows. + +Always explicitly specify the `level`, `target`, and `skip_all` options and do not rely on the default values. `skip_all` avoids adding all function arguments as span fields which can lead recording potentially unnecessary and expensive information. Carefully consider which information needs recording and the cost of recording the information when using the `fields` option. + +[instrument]: https://docs.rs/tracing-attributes/latest/tracing_attributes/attr.instrument.html + +```rust +#[tracing::instrument( + level = "trace", + target = "network", + "handle_sync_routing_table", + skip_all +)] +async fn handle_sync_routing_table( + clock: &time::Clock, + network_state: &Arc, + conn: Arc, + rtu: RoutingTableUpdate, +) { + ... +} +``` + +In regular synchronous code it is fine to use the regular span API if you need to instrument portions of a function without affecting the code structure: + +```rust +fn compile_and_serialize_wasmer(code: &[u8]) -> Result { + // Some code... + { + let _span = tracing::debug_span!(target: "vm", "compile_wasmer").entered(); + // ... + // _span will be dropped when this scope ends, terminating the span created above. + // You can also `drop` it manually, to end the span early with `drop(_span)`. + } + // Some more code... +} +``` + +**Rationale:** Much as with events, this makes the information provided by spans structured and contextual. This information can then be output to tooling in an industry standard format, and can be interpreted by an extensive ecosystem of `tracing` subscribers. + +### Event and span levels + +The `INFO` level is enabled by default, use it for information useful for node operators. The `DEBUG` level is enabled on the canary nodes, use it for information useful in debugging testnet failures. The `TRACE` level is not generally enabled, use it for arbitrary debug output. + +### Order of imports + +```rust +// First core, alloc and/or std +use core::fmt; +use std::{...}; + +// Second, external crates (both crates.io crates and other rust-analyzer crates). +use crate_foo::{ ... }; +use crate_bar::{ ... }; + +// Finally, the internal crate modules and submodules +use crate::{}; +use super::{}; +use self::y::Y; + +// If applicable, the current sub-modules +mod x; +mod y; +``` + +### Import Style + +When implementing traits from `core::fmt`/`std::fmt` import the module: + +```rust +// GOOD +use core::fmt; + +impl fmt::Display for RenameError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { .. } +} + +// BAD +impl core::fmt::Display for RenameError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { .. } +} +``` + +When importing sub-modules: + +```rust +// GOOD +use self::x::Y; + +mod x; + +// BAD +use x::Y; + +mod x; +``` + +### If-let + +Avoid the `if let ... { } else { }` construct if possible, use `match` instead: + +```rust +// GOOD +match ctx.expected_type.as_ref() { + Some(expected_type) => completion_ty == expected_type && !expected_type.is_unit(), + None => false, +} + +// BAD +if let Some(expected_type) = ctx.expected_type.as_ref() { + completion_ty == expected_type && !expected_type.is_unit() +} else { + false +} +``` + +Use `if let ... { }` when a match arm is intentionally empty: + +```rust +// GOOD +if let Some(expected_type) = this.as_ref() { + // Handle it +} + +// BAD +match this.as_ref() { + Some(expected_type) => { + // Handle it + }, + None => (), +} +``` + +### Push Ifs Up And Fors Down + +[Please read this](https://matklad.github.io/2023/11/15/push-ifs-up-and-fors-down.html) + +If there’s an if condition inside a function, consider if it could be moved to the caller instead: + +```rust +// GOOD +fn frobnicate(walrus: Walrus) { + ... +} + +// BAD +fn frobnicate(walrus: Option) { + let walrus = match walrus { + Some(it) => it, + None => return, + }; + ... +} +``` + +If there's an if condition inside a for loop, consider splitting it into different loops: + +```rust +// GOOD +if condition { + for walrus in walruses { + walrus.frobnicate() + } +} else { + for walrus in walruses { + walrus.transmogrify() + } +} + +// BAD +for walrus in walruses { + if condition { + walrus.frobnicate() + } else { + walrus.transmogrify() + } +} +``` + +Avoid deep nesting with early exit: + +```rust +// GOOD +fn make_chevapi(ctx: Context) -> Result> { + let Some(grill) = ctx.grill else { + return Ok(None); + }; + + let Some(mut meat) = ctx.meat else { + return Ok(None); + }; + + meat.verify_edible()?; + + loop { + if let Some(chevapi) = grill.cook(&mut meat) { + return Ok(Some(chevapi)); + } + + meat.flip(); + } +} + +// BAD +fn make_chevapi(ctx: Context) -> Result> { + Ok(if let Some(grill) = ctx.grill { + if let Some(mut meat) = ctx.meat { + meat.verify_edible()?; + loop { + if let Some(chevapi) = grill.cook(&mut meat) { + break Some(chevapi); + } else { + meat.flip(); + } + } + } else { + None + } + } else { + None + }) +} +``` + +### Prefer for loops over `for_each` and `try_for_each` methods + +Iterators offer `for_each` and `try_for_each` methods which allow executing a closure over all items of the iterator. This is similar to using a for loop but comes with various complications and may lead to less readable code. Prefer using a loop rather than those methods, for example: + +```rust +// GOOD +for outcome_with_id in result? { + *total_gas_burnt = + safe_add_gas(*total_gas_burnt, outcome_with_id.outcome.gas_burnt)?; + outcomes.push(outcome_with_id); +} + +// BAD +result?.into_iter().try_for_each( + |outcome_with_id: ExecutionOutcomeWithId| -> Result<(), RuntimeError> { + *total_gas_burnt = + safe_add_gas(*total_gas_burnt, outcome_with_id.outcome.gas_burnt)?; + outcomes.push(outcome_with_id); + Ok(()) + }, +)?; +``` + +**Rationale:** The `for_each` and `try_for_each` methods don't play nice with `break` and `continue` statements, nor do they mesh well with async IO (since `.await` inside of the closure isn't possible). And while `try_for_each` allows for the use of the question mark operator, one may end up having to use it twice: once inside the closure and a second time outside the call to `try_for_each`. Furthermore, usage of these functions often introduces some minor syntactic noise. + +There are situations when those methods may lead to more readable code. Common example are long call chains. Even then such code may evolve with the closure growing and leading to less readable code. If advantages of using the methods aren’t clear cut, it’s usually better to err on side of more imperative style. + +Lastly, anecdotally, the methods (e.g., when used with `chain` or `flat_map`) may lead to faster code. This intuitively makes sense, but it's worth keeping in mind that compilers are pretty good at optimizing, and in practice, they may generate optimal code anyway. Furthermore, optimizing code for readability may be more important (especially outside of the hot path) than small performance gains. + +### `&str` -> `String` conversion + +Prefer using `.to_string()` or `.to_owned()`, rather than `.into()`, `String::from`, etc. + +**Rationale:** uniformity, intent clarity. + +### Prefer `to_string` to `format!("{}")` + +Prefer calling `to_string` method on an object rather than passing it through +`format!("{}")` if all you’re doing is converting it to a `String`. + +```rust +// GOOD +let hash = block_hash.to_string(); +let msg = format!("{}: failed to open", path.display()); + +// BAD +let hash = format!("{block_hash}"); +let msg = path.display() + ": failed to open"; +``` + +**Rationale:** `to_string` is shorter to type and also faster. + +### Prefer using enums instead of a bunch of `bool`s + +Avoid using multiple bools when their meaning depends on each other: + +```rust +// GOOD +enum InsertMode { + Add, + Replace, + Set, +} + +fn dict_insert(key: Key, value: Value, mode: InsertMode) { + ... +} + +// BAD +fn dict_insert(key: Key, value: Value, can_add: bool, can_replace: bool) { + assert!(can_add || can_replace, "invalid params"); + ... +} +``` + +### Always keep an eye on the size of the structures + +Do not pass large objects by value as arguments. Instead, use references or `Box`/`Arc`. Exceptions to this rule include various types of builders or cases where performance concerns are negligible. + +**Rationale:** `memcpy` goes brr. + +### Prefer clonnable structs instead of always wrapping into `Arc` + +```rust +// GOOD +#[derive(Clone)] +pub struct MyObject { + inner: Arc +} + +impl MyObject { + pub fn new() -> Self { + ... + } +} + +struct Inner { + ... +} + +// BAD +pub struct MyObject { + ... +} + +impl MyObject { + pub fn new() -> Arc { + ... + } +} +``` + +**Rationale:** hiding an `Arc` inside the struct simplifies usage outside of this module. + +### Leave comments + +- Always mark `unsafe` blocks with `// SAFETY: why...` +- Mark performance critical paths with `// PERF: why...` +- Duplicate a complex algorithm with a simple text description of each step: + + ```rust + // Read the next part of the key from the current data + let prefix = &mut ok!(read_label(&mut remaining_data, key.remaining_bits())); + + // Match the prefix with the key + let lcp = key.longest_common_data_prefix(prefix); + match lcp.remaining_bits().cmp(&key.remaining_bits()) { + // If all bits match, an existing value was found + std::cmp::Ordering::Equal => break remaining_data.range(), + // LCP is less than prefix, an edge to slice was found + std::cmp::Ordering::Less if lcp.remaining_bits() < prefix.remaining_bits() => { + return Ok(None); + } + // The key contains the entire prefix, but there are still some bits left + std::cmp::Ordering::Less => { + // Fail fast if there are not enough references in the fork + if data.reference_count() != 2 { + return Err(Error::CellUnderflow); + } + + // Remove the LCP from the key + prev_key_bit_len = key.remaining_bits(); + key.try_advance(lcp.remaining_bits(), 0); + + // Load the next branch + let next_branch = Branch::from(ok!(key.load_bit())); + + let child = match data.reference(next_branch as u8) { + // TODO: change mode to `LoadMode::UseGas` if copy-on-write for libraries is not ok + Some(child) => ok!(context.load_dyn_cell(child, LoadMode::Full)), + None => return Err(Error::CellUnderflow), + }; + + // Push an intermediate edge to the stack + stack.push(Segment { + data, + next_branch, + key_bit_len: prev_key_bit_len, + }); + data = child; + } + std::cmp::Ordering::Greater => { + panic!(false, "LCP of prefix and key can't be greater than key"); + } + } + ``` + +### Import Granularity + +Group imports by module, but not deeper: + +```rust +// GOOD +use std::collections::{hash_map, BTreeSet}; +use std::sync::Arc; + +// BAD - nested groups. +use std::{ + collections::{hash_map, BTreeSet}, + sync::Arc, +}; + +// BAD - not grouped together. +use std::collections::BTreeSet; +use std::collections::hash_map; +use std::sync::Arc; +``` + +This corresponds to `"rust-analyzer.assist.importGranularity": "module"` setting +in rust-analyzer +([docs](https://rust-analyzer.github.io/manual.html#rust-analyzer.assist.importGranularity)). + +**Rationale:** Consistency, matches existing practice. + +### Use `Self` where possible + +When referring to the type for which block is implemented, prefer using `Self`, rather than the name of the type: + +```rust +impl ErrorKind { + // GOOD + fn print(&self) { + Self::Io => println!("Io"), + Self::Network => println!("Network"), + Self::Json => println!("Json"), + } + + // BAD + fn print(&self) { + ErrorKind::Io => println!("Io"), + ErrorKind::Network => println!("Network"), + ErrorKind::Json => println!("Json"), + } +} +``` + +```rust +impl<'a> AnswerCallbackQuery<'a> { + // GOOD + fn new(bot: &'a Bot, callback_query_id: C) -> Self + where + C: Into, + { ... } + + // BAD + fn new(bot: &'a Bot, callback_query_id: C) -> AnswerCallbackQuery<'a> + where + C: Into, + { ... } +} +``` + +**Rationale:** `Self` is generally shorter and it's easier to copy-paste code or rename the type. + +### Arithmetic integer operations + +Use methods with an appropriate overflow handling over plain arithmetic operators (`+-*/%`) when dealing with integers. + +``` +// GOOD +a.wrapping_add(b); +c.saturating_sub(2); +d.widening_mul(3); // NB: unstable at the time of writing +e.overflowing_div(5); +f.checked_rem(7); + +// BAD +a + b +c - 2 +d * 3 +e / 5 +f % 7 +``` + +If you’re confident the arithmetic operation cannot fail, `x.checked_[add|sub|mul|div](y).expect("explanation why the operation is safe")` is a great alternative, as it neatly documents not just the infallibility, but also _why_ that is the case. + +This convention may be enforced by the `clippy::arithmetic_side_effects` and `clippy::integer_arithmetic` lints. + +**Rationale:** By default the outcome of an overflowing computation in Rust depends on a few factors, most notably the compilation flags used. The quick explanation is that in debug mode the computations may panic (cause side effects) if the result has overflowed, and when built with optimizations enabled, these computations will wrap-around instead. + +### Metrics + +Consider adding metrics to new functionality. For example, how often each type of error was triggered, how often each message type was processed. + +**Rationale:** Metrics are cheap to increment, and they often provide a significant insight into operation of the code, almost as much as logging. But unlike logging metrics don't incur a significant runtime cost. + +### Naming + +Prefix all `tycho` metrics with `tycho_`. Follow the [Prometheus naming convention](https://prometheus.io/docs/practices/naming/) for new metrics. + +**Rationale:** The `tycho_` prefix makes it trivial to separate metrics exported by `tycho` from other metrics, such as metrics about the state of the machine that runs `tycho`. + +All time measurements must end with the `_sec` postfix: + +```rust +metrics::histogram!("tycho_metric_name_sec").record(elapsed_time) +``` + +**Rationale:** The `_sec` postfix is handled by the collector, which transforms it into a lightweight histogram. Otherwise, the collector will try to build a complex summary. [Read more](https://prometheus.io/docs/practices/histograms/#quantiles) + +### Metrics performance + +In most cases incrementing a metric is cheap enough never to give it a second thought. However accessing a metric with labels on a hot path needs to be done carefully. + +If a label is based on an integer, use a faster way of converting an integer to the label, such as the `itoa` crate. + +For hot code paths, re-use results of `with_label_values()` as much as possible. + +**Rationale:** We've encountered issues caused by the runtime costs of incrementing metrics before. Avoid runtime costs of incrementing metrics too often. + +### Locking in `async` context + +It is ok to use `std`/`parking_lot` `Mutex`/`RwLock` inside short-lived sync blocks. Just make sure to not leave guards to live across `.await`. +Use `tokio::sync::Mutex` to synchronise tasks (control flow). + +```rust +// GOOD +struct SharedData { + state: std::sync::Mutex, +} + +async fn some_process(shared: &SharedData) { + ... + loop { + ... + { + let mut state = shared.state.lock().unwrap(); + state.fast_operation(); + } + + some_io().await; + ... + } +} + +// BAD +struct SharedData { + state: tokio::sync::Mutex, +} + +async fn some_process(shared: &SharedData) { + ... + loop { + ... + { + let mut state = shared.state.lock().await; + state.fast_operation(); + } + + some_io().await; + ... + } +} + +``` + +Make sure that sync blocks don't occupy the thread for too long. Use `spawn_blocking` for complex computations or file IO. + +```rust +// GOOD +async fn verify_block(node: &Node, block: &Block) -> Result<()> { + // NOTE: signatures.len() > 100 + let signatures = node.get_signatures(block).await?; + tokio::task::spawn_blocking(move || { + for signature in signatures { + signature.verify()?; + } + Ok(()) + }) + .await + .unwrap() +} + +// BAD +async fn verify_block(node: &Node, block: &Block) -> Result<()> { + let signatures = node.get_signatures(block).await?; + + for signature in signatures { + signature.verify()?; + } + Ok(()) +} +``` + +### Try to make your futures cancel safe (TODO) + +### Don't make big futures (TODO) + +### DashMap common pitfalls (TODO) + +### Atomics pitfalls (TODO) + +[Read this book!](https://marabos.nl/atomics/preface.html) diff --git a/justfile b/justfile index 40ba2c285..b6b4192c2 100644 --- a/justfile +++ b/justfile @@ -4,6 +4,9 @@ default: install_fmt: rustup component add rustfmt --toolchain nightly +install_lychee: + cargo install lychee + integration_test_dir := justfile_directory() / ".scratch/integration_tests" integration_test_base_url := "https://tycho-test.broxus.cc" @@ -45,7 +48,10 @@ fmt: install_fmt cargo +nightly fmt --all # ci checks -ci: fmt lint docs test +ci: check_dev_docs check_format lint docs test + +check_dev_docs: + lychee {{justfile_directory()}}/docs check_format: install_fmt cargo +nightly fmt --all -- --check diff --git a/simulator/README.md b/simulator/README.md index d231dd8f2..4c8903969 100644 --- a/simulator/README.md +++ b/simulator/README.md @@ -11,7 +11,7 @@ or - `podman` - `podman-compose` -### Usage +### Usage ```bash cargo install --path ./simulator