diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0dfd2da3a..17f8ee425 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,7 +4,7 @@ on: merge_group: env: - MDBOOK_VERSION: 0.4.40 + MDBOOK_VERSION: 0.4.43 jobs: code-tests: @@ -35,6 +35,11 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@master + - name: Checkout rust-lang/rust + uses: actions/checkout@master + with: + repository: rust-lang/rust + path: rust - name: Update rustup run: rustup self update - name: Install Rust @@ -52,16 +57,17 @@ jobs: rustup --version rustc -Vv mdbook --version - - name: Verify the book builds - env: - SPEC_DENY_WARNINGS: 1 - run: mdbook build - name: Style checks working-directory: style-check run: cargo run --locked -- ../src - name: Style fmt working-directory: style-check run: cargo fmt --check + - name: Verify the book builds + env: + SPEC_DENY_WARNINGS: 1 + SPEC_RUST_ROOT: ${{ github.workspace }}/rust + run: mdbook build - name: Check for broken links run: | curl -sSLo linkcheck.sh \ @@ -98,6 +104,40 @@ jobs: working-directory: ./mdbook-spec run: cargo fmt --check + preview: + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Checkout rust-lang/rust + uses: actions/checkout@master + with: + repository: rust-lang/rust + path: rust + - name: Update rustup + run: rustup self update + - name: Install Rust + run: | + rustup set profile minimal + rustup toolchain install nightly + rustup default nightly + - name: Install mdbook + run: | + mkdir bin + curl -sSL https://github.com/rust-lang/mdBook/releases/download/v${MDBOOK_VERSION}/mdbook-v${MDBOOK_VERSION}-x86_64-unknown-linux-gnu.tar.gz | tar -xz --directory=bin + echo "$(pwd)/bin" >> $GITHUB_PATH + - name: Build the book + env: + SPEC_RELATIVE: 0 + SPEC_RUST_ROOT: ${{ github.workspace }}/rust + run: mdbook build --dest-dir dist/preview-${{ github.event.pull_request.number }} + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: preview-${{ github.event.pull_request.number }} + overwrite: true + path: dist + # The success job is here to consolidate the total success/failure state of # all other jobs. This job is then included in the GitHub branch protection # rule which prevents merges unless all other jobs are passing. This makes @@ -110,6 +150,7 @@ jobs: - code-tests - style-tests - mdbook-spec + # preview is explicitly excluded here since it doesn't run on merge runs-on: ubuntu-latest steps: - run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' diff --git a/README.md b/README.md index a296a3b38..0c7f3c496 100644 --- a/README.md +++ b/README.md @@ -66,10 +66,22 @@ SPEC_RELATIVE=0 mdbook build --open This will open a browser with a websocket live-link to automatically reload whenever the source is updated. -The `SPEC_RELATIVE=0` environment variable makes links to the standard library go to instead of being relative, which is useful when viewing locally since you normally don't have a copy of the standard library. - You can also use mdbook's live webserver option, which will automatically rebuild the book and reload your web browser whenever a source file is modified: ```sh SPEC_RELATIVE=0 mdbook serve --open ``` + +### `SPEC_RELATIVE` + +The `SPEC_RELATIVE=0` environment variable makes links to the standard library go to instead of being relative, which is useful when viewing locally since you normally don't have a copy of the standard library. + +The published site at (or local docs using `rustup doc`) does not set this, which means it will use relative links which supports offline viewing and links to the correct version (for example, links in will stay within the 1.81.0 directory). + +### `SPEC_DENY_WARNINGS` + +The `SPEC_DENY_WARNINGS=1` environment variable will turn all warnings generated by `mdbook-spec` to errors. This is used in CI to ensure that there aren't any problems with the book content. + +### `SPEC_RUST_ROOT` + +The `SPEC_RUST_ROOT` can be used to point to the directory of a checkout of . This is used by the test-linking feature so that it can find tests linked to reference rules. If this is not set, then the tests won't be linked. diff --git a/book.toml b/book.toml index 404b8cca8..ff0c66ae2 100644 --- a/book.toml +++ b/book.toml @@ -5,6 +5,7 @@ author = "The Rust Project Developers" [output.html] additional-css = ["theme/reference.css"] +additional-js = ["theme/reference.js"] git-repository-url = "https://github.com/rust-lang/reference/" edit-url-template = "https://github.com/rust-lang/reference/edit/master/{path}" smart-punctuation = true @@ -15,7 +16,7 @@ smart-punctuation = true "/unsafe-functions.html" = "unsafe-keyword.html" [rust] -edition = "2021" +edition = "2024" [preprocessor.spec] command = "cargo run --release --manifest-path mdbook-spec/Cargo.toml" diff --git a/docs/authoring.md b/docs/authoring.md index 74c9bc962..29a476c01 100644 --- a/docs/authoring.md +++ b/docs/authoring.md @@ -15,7 +15,8 @@ This document serves as a guide for editors and reviewers. Some conventions and * Code blocks should have an explicit language tag. * Do not wrap long lines. This helps with reviewing diffs of the source. * Use [smart punctuation] instead of Unicode characters. For example, use `---` for em-dash instead of the Unicode character. Characters like em-dash can be difficult to see in a fixed-width editor, and some editors may not have easy methods to enter such characters. -* Links should be relative with the `.md` extension. Links to other rust-lang books that are published with the reference or the standard library API should also be relative so that the linkchecker can validate them. +* Links should be relative with the `.md` extension. Links to other rust-lang books that are published with the reference should also be relative so that the linkchecker can validate them. +* Links to the standard library should use rustdoc-style links described in [Standard library links](#standard-library-links). * The use of reference links is preferred, with shortcuts if appropriate. Place the sorted link reference definitions at the bottom of the file, or at the bottom of a section if there are an unusually large number of links that are specific to the section. ```markdown @@ -75,6 +76,45 @@ Rules can be linked to by their ID using markdown such as `[foo.bar]`. There are In the HTML, the rules are clickable just like headers. +When assigning rules to new paragraphs, or when modifying rule names, use the following guidelines: + +1. A rule applies to one core idea, which should be easily determined when reading the paragraph it is applied to. +2. Other than the "intro" paragraph, purely explanatory, expository, or exemplary content does not need a rule. If the expository paragraph isn't directly related to the previous, separate it with a hard (rendered) line break. + * This content will be moved to `[!NOTE]` or more specific admonitions in the future. +3. Rust code examples and tests do not need their own rules. +4. Use the following guidelines for admonitions: + * Notes: Do not include a rule. + * Warning: Omit the rule if the warning follows from the previous paragraph or if the warning is explanatory and doesn't introduce any new rules. + * Target specific behavior: Always include the rule. + * Edition differences: Always include the rule. +5. The following keywords should be used to identify paragraphs when unambiguous: + * `intro`: The beginning paragraph of each section - should explain the construct being defined overall. + * `syntax`: Syntax definitions or explanations when BNF syntax definitions are not used. + * `namespace`: For items only, specifies the namespace(s) the item introduces a name in. May also be used elsewhere when defining a namespace (e.g. `r[attribute.diagnostic.namespace]`). +6. When a rule doesn't fall under the above keywords, or for section rule ids, name the subrule as follows: + * If the rule is naming a specific Rust language construct (e.g. an attribute, standard library type/function, or keyword-introduced concept), use the construct as named in the language, appropriately case-adjusted (but do not replace `_`s with `-`s). + * Other than Rust language concepts with `_`s in the name, use `-` characters to separate words within a "subrule". + * Whenever possible, do not repeat previous components of the rule. + * Edition differences admonitions should typically be named by the edition referenced directly by the rule. If multiple editions are named, use the one for which the behavior is defined by the admonition, and not by a previous paragraph. + * Target specific admonitions should typically be named by the least specific target property to which they apply (e.g. if a rule affects all x86 CPUs, the rule name should include `x86` rather than separately listing `i586`, `i686` and `x86_64`, and if a rule applies to all ELF platforms, it should be named `elf` rather than listing every ELF OS). + * Use an appropriately descriptive, but short, name if the language does not provide one. + +#### Test rule annotations + +Tests in can be linked to rules in the reference. The rule will include a link to the tests, and there is also an [appendix] which tracks how the rules are currently linked. + +Tests in the `tests` directory can be annotated with the `//@ reference: x.y.z` header to link it to a rule. The header can be specified multiple times if a single file covers multiple rules. + +Compiler developers are not expected to add `reference` annotations to tests. However, if they do want to help, their cooperation is very welcome. Reference authors and editors are responsible for making sure every rule has a test associated with it. + +The tests are beneficial for reviewers to see the behavior of a rule. It is also a benefit to readers who may want to see examples of particular behaviors. When adding new rules, you should wait until the reference side is approved before submitting a PR to `rust-lang/rust` (to avoid churn if we decide on different names). + +Prefixed rule names should not be used in tests. That is, do not use something like `asm.rules` when there are specific rules like `asm.rules.reg-not-input`. + +We are not expecting 100% coverage at any time. Although it would be nice, it is unrealistic due to the sequence things are developed, and resources available. + +[appendix]: https://doc.rust-lang.org/nightly/reference/test-summary.html + ### Standard library links You should link to the standard library without specifying a URL in a fashion similar to [rustdoc intra-doc links][intra]. Some examples: diff --git a/mdbook-spec/Cargo.lock b/mdbook-spec/Cargo.lock index ff835b409..c983d9842 100644 --- a/mdbook-spec/Cargo.lock +++ b/mdbook-spec/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -28,9 +28,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.14" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", @@ -43,49 +43,49 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.7" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.0" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.3" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "bitflags" @@ -104,9 +104,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.9.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +checksum = "1a68f1f47cdf0ec8ee4b941b2eee2a80cb796db73118c0dd09ac63fbe405be22" dependencies = [ "memchr", "regex-automata", @@ -121,9 +121,12 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "cc" -version = "1.0.104" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490" +checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" +dependencies = [ + "shlex", +] [[package]] name = "cfg-if" @@ -140,23 +143,23 @@ dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", - "windows-targets 0.52.6", + "windows-targets", ] [[package]] name = "clap" -version = "4.5.8" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84b3edb18336f4df585bc9aa31dd99c036dfa5dc5e9a2939a722a188f3a8970d" +checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.8" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1c09dd5ada6c6c78075d6fd0da3f90d8080651e2d6cc8eb2f1aaa4034ced708" +checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec" dependencies = [ "anstream", "anstyle", @@ -167,36 +170,36 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.5.7" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d598e88f6874d4b888ed40c71efbcbf4076f1dfbae128a08a8c9e45f710605d" +checksum = "d9647a559c112175f17cf724dc72d3645680a883c58481332779192b0d8e7a01" dependencies = [ "clap", ] [[package]] name = "clap_lex" -version = "0.7.1" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" +checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7" [[package]] name = "colorchoice" -version = "1.0.1" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.12" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" dependencies = [ "libc", ] @@ -234,9 +237,9 @@ dependencies = [ [[package]] name = "env_filter" -version = "0.1.0" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" +checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" dependencies = [ "log", "regex", @@ -244,9 +247,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.3" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" +checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" dependencies = [ "anstream", "anstyle", @@ -267,9 +270,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "generic-array" @@ -283,11 +286,12 @@ dependencies = [ [[package]] name = "handlebars" -version = "5.1.2" +version = "6.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d08485b96a0e6393e9e4d1b8d48cf74ad6c063cd905eb33f42c1ce3f0377539b" +checksum = "fd4ccde012831f9a071a637b0d4e31df31c0f6c525784b35ae76a9ac6bc1e315" dependencies = [ "log", + "num-order", "pest", "pest_derive", "serde", @@ -303,9 +307,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "iana-time-zone" -version = "0.1.60" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -326,30 +330,30 @@ dependencies = [ [[package]] name = "is_terminal_polyfill" -version = "1.70.0" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "540654e97a3f4470a492cd30ff187bc95d89557a903a2bbf112e2fae98104ef2" [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] [[package]] name = "libc" -version = "0.2.155" +version = "0.2.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" [[package]] name = "libdbus-sys" @@ -375,9 +379,9 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "mdbook" -version = "0.4.40" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45a38e19bd200220ef07c892b0157ad3d2365e5b5a267ca01ad12182491eea5" +checksum = "fe1f98b8d66e537d2f0ba06e7dec4f44001deec539a2d18bfc102d6a86189148" dependencies = [ "anyhow", "chrono", @@ -412,6 +416,7 @@ dependencies = [ "semver", "serde_json", "tempfile", + "walkdir", ] [[package]] @@ -422,11 +427,26 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "normpath" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8911957c4b1549ac0dc74e30db9c8b0e66ddcd6d7acc33098f4c63a64a6d7ed" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "num-modular" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17bb261bf36fa7d83f4c294f834e91256769097b3cb505d44831e0a179ac647f" + +[[package]] +name = "num-order" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5831952a9476f2fed74b77d74182fa5ddc4d21c72ec45a333b250e3ed0272804" +checksum = "537b596b97c40fcf8056d153049eb22f481c17ebce72a513ec9286e4986d1bb6" dependencies = [ - "windows-sys 0.52.0", + "num-modular", ] [[package]] @@ -440,33 +460,33 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "opener" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8df34be653210fbe9ffaff41d3b92721c56ce82dfee58ee684f9afb5e3a90c0" +checksum = "d0812e5e4df08da354c851a3376fead46db31c2214f849d3de356d774d057681" dependencies = [ "bstr", "dbus", "normpath", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "pathdiff" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" +checksum = "d61c5ce1153ab5b689d0c074c4e7fc613e942dfb7dd9eea5ab202d2ad91fe361" [[package]] name = "pest" -version = "2.7.11" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd53dff83f26735fdc1ca837098ccf133605d794cdae66acfc2bfac3ec809d95" +checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" dependencies = [ "memchr", "thiserror", @@ -475,9 +495,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.7.11" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a548d2beca6773b1c244554d36fcf8548a8a58e74156968211567250e48e49a" +checksum = "d214365f632b123a47fd913301e14c946c61d1c183ee245fa76eb752e59a02dd" dependencies = [ "pest", "pest_generator", @@ -485,9 +505,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.11" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c93a82e8d145725dcbaf44e5ea887c8a869efdcc28706df2d08c69e17077183" +checksum = "eb55586734301717aea2ac313f50b2eb8f60d2fc3dc01d190eefa2e625f60c4e" dependencies = [ "pest", "pest_meta", @@ -498,9 +518,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.7.11" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a941429fea7e08bedec25e4f6785b6ffaacc6b755da98df5ef3e7dcf4a124c4f" +checksum = "b75da2a70cf4d9cb76833c990ac9cd3923c9a8905a8929789ce347c84564d03d" dependencies = [ "once_cell", "pest", @@ -509,15 +529,15 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -542,18 +562,18 @@ checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3" [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] [[package]] name = "regex" -version = "1.10.5" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -563,9 +583,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -574,15 +594,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" dependencies = [ "bitflags", "errno", @@ -597,6 +617,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "semver" version = "1.0.23" @@ -605,18 +634,18 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.203" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.203" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -625,11 +654,12 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.120" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -659,9 +689,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.68" +version = "2.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" dependencies = [ "proc-macro2", "quote", @@ -670,40 +700,41 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.10.1" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", + "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "terminal_size" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef" dependencies = [ "rustix", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", @@ -733,24 +764,21 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "ucd-trie" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" [[package]] name = "unicase" -version = "2.7.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "utf8parse" @@ -760,25 +788,36 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", @@ -791,9 +830,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -801,9 +840,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", @@ -814,9 +853,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "winapi" @@ -834,6 +873,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -846,16 +894,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", + "windows-targets", ] [[package]] @@ -864,22 +903,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.6", + "windows-targets", ] [[package]] -name = "windows-targets" -version = "0.48.5" +name = "windows-sys" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-targets", ] [[package]] @@ -888,46 +921,28 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -940,48 +955,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" diff --git a/mdbook-spec/Cargo.toml b/mdbook-spec/Cargo.toml index 703322cb0..c9a6e31af 100644 --- a/mdbook-spec/Cargo.toml +++ b/mdbook-spec/Cargo.toml @@ -19,3 +19,4 @@ regex = "1.9.4" semver = "1.0.21" serde_json = "1.0.113" tempfile = "3.10.1" +walkdir = "2.5.0" diff --git a/mdbook-spec/src/lib.rs b/mdbook-spec/src/lib.rs index 523453131..27a6d807d 100644 --- a/mdbook-spec/src/lib.rs +++ b/mdbook-spec/src/lib.rs @@ -1,5 +1,7 @@ #![deny(rust_2018_idioms, unused_lifetimes)] +use crate::rules::Rules; +use anyhow::{bail, Context, Result}; use mdbook::book::{Book, Chapter}; use mdbook::errors::Error; use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext}; @@ -7,14 +9,12 @@ use mdbook::BookItem; use once_cell::sync::Lazy; use regex::{Captures, Regex}; use semver::{Version, VersionReq}; -use std::collections::BTreeMap; use std::io; use std::path::PathBuf; +mod rules; mod std_links; - -/// The Regex for rules like `r[foo]`. -static RULE_RE: Lazy = Lazy::new(|| Regex::new(r"(?m)^r\[([^]]+)]$").unwrap()); +mod test_links; /// The Regex for the syntax for blockquotes that have a specific CSS class, /// like `> [!WARNING]`. @@ -22,7 +22,8 @@ static ADMONITION_RE: Lazy = Lazy::new(|| { Regex::new(r"(?m)^ *> \[!(?[^]]+)\]\n(?
(?: *>.*\n)+)").unwrap() }); -pub fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> { +pub fn handle_preprocessing() -> Result<(), Error> { + let pre = Spec::new(None)?; let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())?; let book_version = Version::parse(&ctx.mdbook_version)?; @@ -48,59 +49,52 @@ pub struct Spec { /// Whether or not warnings should be errors (set by SPEC_DENY_WARNINGS /// environment variable). deny_warnings: bool, + /// Path to the rust-lang/rust git repository (set by SPEC_RUST_ROOT + /// environment variable). + rust_root: Option, + /// The git ref that can be used in a URL to the rust-lang/rust repository. + git_ref: String, } impl Spec { - pub fn new() -> Spec { - Spec { - deny_warnings: std::env::var("SPEC_DENY_WARNINGS").as_deref() == Ok("1"), + /// Creates a new `Spec` preprocessor. + /// + /// The `rust_root` parameter specifies an optional path to the root of + /// the rust git checkout. If `None`, it will use the `SPEC_RUST_ROOT` + /// environment variable. If the root is not specified, then no tests will + /// be linked unless `SPEC_DENY_WARNINGS` is set in which case this will + /// return an error.. + pub fn new(rust_root: Option) -> Result { + let deny_warnings = std::env::var("SPEC_DENY_WARNINGS").as_deref() == Ok("1"); + let rust_root = rust_root.or_else(|| std::env::var_os("SPEC_RUST_ROOT").map(PathBuf::from)); + if deny_warnings && rust_root.is_none() { + bail!("SPEC_RUST_ROOT environment variable must be set"); } - } - - /// Converts lines that start with `r[…]` into a "rule" which has special - /// styling and can be linked to. - fn rule_definitions( - &self, - chapter: &Chapter, - found_rules: &mut BTreeMap, - ) -> String { - let source_path = chapter.source_path.clone().unwrap_or_default(); - let path = chapter.path.clone().unwrap_or_default(); - RULE_RE - .replace_all(&chapter.content, |caps: &Captures<'_>| { - let rule_id = &caps[1]; - if let Some((old, _)) = - found_rules.insert(rule_id.to_string(), (source_path.clone(), path.clone())) - { - let message = format!( - "rule `{rule_id}` defined multiple times\n\ - First location: {old:?}\n\ - Second location: {source_path:?}" - ); - if self.deny_warnings { - panic!("error: {message}"); - } else { - eprintln!("warning: {message}"); - } + let git_ref = match git_ref(&rust_root) { + Ok(s) => s, + Err(e) => { + if deny_warnings { + eprintln!("error: {e:?}"); + std::process::exit(1); + } else { + eprintln!("warning: {e:?}"); + "master".into() } - format!( - "
\ - [{rule_id}]\ -
\n" - ) - }) - .to_string() + } + }; + Ok(Spec { + deny_warnings, + rust_root, + git_ref, + }) } /// Generates link references to all rules on all pages, so you can easily /// refer to rules anywhere in the book. - fn auto_link_references( - &self, - chapter: &Chapter, - found_rules: &BTreeMap, - ) -> String { + fn auto_link_references(&self, chapter: &Chapter, rules: &Rules) -> String { let current_path = chapter.path.as_ref().unwrap().parent().unwrap(); - let definitions: String = found_rules + let definitions: String = rules + .def_paths .iter() .map(|(rule_id, (_, path))| { let relative = pathdiff::diff_paths(path, current_path).unwrap(); @@ -155,13 +149,38 @@ fn to_initial_case(s: &str) -> String { format!("{first}{rest}") } +/// Determines the git ref used for linking to a particular branch/tag in GitHub. +fn git_ref(rust_root: &Option) -> Result { + let Some(rust_root) = rust_root else { + return Ok("master".into()); + }; + let channel = std::fs::read_to_string(rust_root.join("src/ci/channel")) + .context("failed to read src/ci/channel")?; + let git_ref = match channel.trim() { + // nightly/beta are branches, not stable references. Should be ok + // because we're not expecting those channels to be long-lived. + "nightly" => "master".into(), + "beta" => "beta".into(), + "stable" => { + let version = std::fs::read_to_string(rust_root.join("src/version")) + .context("|| failed to read src/version")?; + version.trim().into() + } + ch => bail!("unknown channel {ch}"), + }; + Ok(git_ref) +} + impl Preprocessor for Spec { fn name(&self) -> &str { "spec" } fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result { - let mut found_rules = BTreeMap::new(); + let rules = self.collect_rules(&book); + let tests = self.collect_tests(&rules); + let summary_table = test_links::make_summary_table(&book, &tests, &rules); + book.for_each_mut(|item| { let BookItem::Chapter(ch) = item else { return; @@ -169,20 +188,14 @@ impl Preprocessor for Spec { if ch.is_draft_chapter() { return; } - ch.content = self.rule_definitions(&ch, &mut found_rules); ch.content = self.admonitions(&ch); - }); - // This is a separate pass because it relies on the modifications of - // the previous passes. - book.for_each_mut(|item| { - let BookItem::Chapter(ch) = item else { - return; - }; - if ch.is_draft_chapter() { - return; + ch.content = self.auto_link_references(&ch, &rules); + ch.content = self.render_rule_definitions(&ch.content, &tests); + if ch.name == "Test summary" { + ch.content = ch.content.replace("{{summary-table}}", &summary_table); } - ch.content = self.auto_link_references(&ch, &found_rules); }); + // Final pass will resolve everything as a std link (or error if the // link is unknown). std_links::std_links(&mut book); diff --git a/mdbook-spec/src/main.rs b/mdbook-spec/src/main.rs index 56e11d760..83ac83046 100644 --- a/mdbook-spec/src/main.rs +++ b/mdbook-spec/src/main.rs @@ -12,9 +12,7 @@ fn main() { None => {} } - let preprocessor = mdbook_spec::Spec::new(); - - if let Err(e) = mdbook_spec::handle_preprocessing(&preprocessor) { + if let Err(e) = mdbook_spec::handle_preprocessing() { eprintln!("{}", e); std::process::exit(1); } diff --git a/mdbook-spec/src/rules.rs b/mdbook-spec/src/rules.rs new file mode 100644 index 000000000..b477ab721 --- /dev/null +++ b/mdbook-spec/src/rules.rs @@ -0,0 +1,115 @@ +//! Handling for rule identifiers. + +use crate::test_links::RuleToTests; +use crate::Spec; +use mdbook::book::Book; +use mdbook::BookItem; +use once_cell::sync::Lazy; +use regex::{Captures, Regex}; +use std::collections::{BTreeMap, HashSet}; +use std::fmt::Write; +use std::path::PathBuf; + +/// The Regex for rules like `r[foo]`. +static RULE_RE: Lazy = Lazy::new(|| Regex::new(r"(?m)^r\[([^]]+)]$").unwrap()); + +/// The set of rules defined in the reference. +#[derive(Default)] +pub struct Rules { + /// A mapping from a rule identifier to a tuple of `(source_path, path)`. + /// + /// `source_path` is the path to the markdown source file relative to the + /// `SUMMARY.md`. + /// + /// `path` is the same as `source_path`, except filenames like `README.md` + /// are translated to `index.md`. Which to use depends on if you are + /// trying to access the source files (`source_path`), or creating links + /// in the output (`path`). + pub def_paths: BTreeMap, + /// Set of rule name prefixes that have more specific rules within. + /// + /// For example, `asm.ts-args` is an interior prefix of `asm.ts-args.syntax`. + pub interior_prefixes: HashSet, +} + +impl Spec { + /// Collects all rule definitions in the book. + pub fn collect_rules(&self, book: &Book) -> Rules { + let mut rules = Rules::default(); + for item in book.iter() { + let BookItem::Chapter(ch) = item else { + continue; + }; + if ch.is_draft_chapter() { + continue; + } + RULE_RE + .captures_iter(&ch.content) + .for_each(|caps: Captures<'_>| { + let rule_id = &caps[1]; + let source_path = ch.source_path.clone().unwrap_or_default(); + let path = ch.path.clone().unwrap_or_default(); + if let Some((old, _)) = rules + .def_paths + .insert(rule_id.to_string(), (source_path.clone(), path.clone())) + { + let message = format!( + "rule `{rule_id}` defined multiple times\n\ + First location: {old:?}\n\ + Second location: {source_path:?}" + ); + if self.deny_warnings { + panic!("error: {message}"); + } else { + eprintln!("warning: {message}"); + } + } + let mut parts: Vec<_> = rule_id.split('.').collect(); + while !parts.is_empty() { + parts.pop(); + let prefix = parts.join("."); + rules.interior_prefixes.insert(prefix); + } + }); + } + + rules + } + + /// Converts lines that start with `r[…]` into a "rule" which has special + /// styling and can be linked to. + pub fn render_rule_definitions(&self, content: &str, tests: &RuleToTests) -> String { + RULE_RE + .replace_all(content, |caps: &Captures<'_>| { + let rule_id = &caps[1]; + let mut test_html = String::new(); + if let Some(tests) = tests.get(rule_id) { + test_html = format!( + "\n\ +     \ + Tests\n\ +
\n\ + Tests with this rule: +
    "); + for test in tests { + writeln!( + test_html, + "
  • {test_path}
  • ", + test_path = test.path, + git_ref = self.git_ref + ) + .unwrap(); + } + + test_html.push_str("
"); + } + format!( + "
\ + [{rule_id}]\ + {test_html}\ +
\n" + ) + }) + .to_string() + } +} diff --git a/mdbook-spec/src/test_links.rs b/mdbook-spec/src/test_links.rs new file mode 100644 index 000000000..8f847d58c --- /dev/null +++ b/mdbook-spec/src/test_links.rs @@ -0,0 +1,203 @@ +//! Handling for linking tests in rust's testsuite to rule identifiers. + +use crate::{Rules, Spec}; +use mdbook::book::{Book, BookItem}; +use std::collections::HashMap; +use std::fmt::Write; +use std::path::PathBuf; +use walkdir::WalkDir; + +/// Mapping of rule identifier to the tests that include that identifier. +pub type RuleToTests = HashMap>; +/// A test in rustc's test suite. +pub struct Test { + pub path: String, +} + +const TABLE_START: &str = " + + + + + + + + +"; + +/// Generates an HTML table summarizing the coverage of the testsuite. +pub fn make_summary_table(book: &Book, tests: &RuleToTests, rules: &Rules) -> String { + let ch_to_rules = invert_rule_map(rules); + + let mut table = String::from(TABLE_START); + let mut total_rules = 0; + let mut total_tests = 0; + let mut total_uncovered = 0; + + for (item_index, item) in book.iter().enumerate() { + let BookItem::Chapter(ch) = item else { + continue; + }; + let Some(ch_path) = &ch.path else { + continue; + }; + let level = ch + .number + .as_ref() + .map(|ch| ch.len() - 1) + .unwrap_or_default() as u32; + // Note: This path assumes that the summary chapter is in the root of + // the book. If instead it is in a subdirectory, then this needs to + // include relative `../` as needed. + let html_path = ch_path + .with_extension("html") + .to_str() + .unwrap() + .replace('\\', "/"); + let number = ch + .number + .as_ref() + .map(|n| n.to_string()) + .unwrap_or_default(); + let mut num_rules = 0; + let mut num_tests_str = String::from(""); + let mut uncovered_str = String::from(""); + let mut coverage_str = String::from(""); + if let Some(rules) = ch_to_rules.get(ch_path) { + num_rules = rules.len(); + total_rules += num_rules; + let num_tests = rules + .iter() + .map(|rule| tests.get(rule).map(|ts| ts.len()).unwrap_or_default()) + .sum::(); + total_tests += num_tests; + num_tests_str = num_tests.to_string(); + let uncovered_rules: Vec<_> = rules + .iter() + .filter(|rule| !tests.contains_key(rule.as_str())) + .collect(); + let uncovered = uncovered_rules.len(); + total_uncovered += uncovered; + coverage_str = fmt_pct(uncovered, num_rules); + if uncovered == 0 { + uncovered_str = String::from("0"); + } else { + uncovered_str = format!( + "
\n\ + \ + {uncovered}\n\ +
\n\ + Uncovered rules +
    "); + for uncovered_rule in uncovered_rules { + writeln!( + uncovered_str, + "
  • {uncovered_rule}
  • " + ) + .unwrap(); + } + uncovered_str.push_str("
"); + } + } + let indent = " ".repeat(level as usize * 6); + + writeln!( + table, + "
\n\ + \n\ + \n\ + \n\ + \n\ + \n\ + ", + name = ch.name, + ) + .unwrap(); + } + + let total_coverage = fmt_pct(total_uncovered, total_rules); + writeln!( + table, + "\n\ + \n\ + \n\ + \n\ + \n\ + \n\ + " + ) + .unwrap(); + table.push_str("
RulesTestsUncovered RulesCoverage
{indent}{number} {name}{num_rules}{num_tests_str}{uncovered_str}{coverage_str}
Total:{total_rules}{total_tests}{total_uncovered}{total_coverage}
\n"); + table +} + +/// Formats a float as a percentage string. +fn fmt_pct(uncovered: usize, total: usize) -> String { + let pct = ((total - uncovered) as f32 / total as f32) * 100.0; + // Round up to tenths of a percent. + let x = (pct * 10.0).ceil() / 10.0; + format!("{x:.1}%") +} + +/// Inverts the rule map so that it is chapter path to set of rules in that +/// chapter. +fn invert_rule_map(rules: &Rules) -> HashMap> { + let mut map: HashMap> = HashMap::new(); + for (rule, (_, path)) in &rules.def_paths { + map.entry(path.clone()).or_default().push(rule.clone()); + } + for value in map.values_mut() { + value.sort(); + } + map +} + +impl Spec { + /// Scans all tests in rust-lang/rust, and creates a mapping of a rule + /// identifier to the set of tests that include that identifier. + pub fn collect_tests(&self, rules: &Rules) -> RuleToTests { + let mut map = HashMap::new(); + let Some(rust_root) = &self.rust_root else { + return map; + }; + for entry in WalkDir::new(rust_root.join("tests")) { + let entry = entry.unwrap(); + let path = entry.path(); + let relative = path.strip_prefix(rust_root).unwrap_or_else(|_| { + panic!("expected root {rust_root:?} to be a prefix of {path:?}") + }); + if path.extension().unwrap_or_default() == "rs" { + let contents = std::fs::read_to_string(path).unwrap(); + for line in contents.lines() { + if let Some(id) = line.strip_prefix("//@ reference: ") { + if rules.interior_prefixes.contains(id) { + let instead: Vec<_> = rules + .def_paths + .keys() + .filter(|key| key.starts_with(&format!("{id}."))) + .collect(); + eprintln!( + "info: Interior prefix rule {id} found in {path:?}\n \ + Tests should not be annotated with prefixed rule names.\n \ + Use the rules from {instead:?} instead." + ); + } else if !rules.def_paths.contains_key(id) { + eprintln!( + "info: Orphaned rule identifier {id} found in {path:?}\n \ + Please update the test to use an existing rule name." + ); + } + let test = Test { + path: relative.to_str().unwrap().replace('\\', "/"), + }; + map.entry(id.to_string()).or_default().push(test); + } + } + } + } + for tests in map.values_mut() { + tests.sort_by(|a, b| a.path.cmp(&b.path)); + } + map + } +} diff --git a/src/SUMMARY.md b/src/SUMMARY.md index 2b17bf45d..91f343b8d 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -132,4 +132,5 @@ - [Appendices](appendices.md) - [Macro Follow-Set Ambiguity Formal Specification](macro-ambiguity.md) - [Influences](influences.md) + - [Test summary](test-summary.md) - [Glossary](glossary.md) diff --git a/src/abi.md b/src/abi.md index 09ff20b16..b28457ad9 100644 --- a/src/abi.md +++ b/src/abi.md @@ -74,6 +74,8 @@ with the same name (or with a well-known symbol), leading to undefined behavior. extern "C" fn foo() {} ``` +> **Edition differences**: Before the 2024 edition it is allowed to use the `no_mangle` attribute without the `unsafe` qualification. + ## The `link_section` attribute The *`link_section` attribute* specifies the section of the object file that a @@ -90,6 +92,8 @@ of memory not expecting them, such as mutable data into read-only areas. pub static VAR1: u32 = 1; ``` +> **Edition differences**: Before the 2024 edition it is allowed to use the `link_section` attribute without the `unsafe` qualification. + ## The `export_name` attribute The *`export_name` attribute* specifies the name of the symbol that will be @@ -105,6 +109,8 @@ behavior. pub fn name_in_rust() { } ``` +> **Edition differences**: Before the 2024 edition it is allowed to use the `export_name` attribute without the `unsafe` qualification. + [_MetaNameValueStr_]: attributes.md#meta-item-attribute-syntax [`static` items]: items/static-items.md [attribute]: attributes.md diff --git a/src/attributes.md b/src/attributes.md index f3ca9173e..f537e4cd2 100644 --- a/src/attributes.md +++ b/src/attributes.md @@ -1,6 +1,7 @@ {{#include attributes-redirect.html}} # Attributes +r[attributes.syntax] > **Syntax**\ > _InnerAttribute_ :\ >    `#` `!` `[` _Attr_ `]` @@ -16,20 +17,24 @@ >       [_DelimTokenTree_]\ >    | `=` [_Expression_] +r[attributes.intro] An _attribute_ is a general, free-form metadatum that is interpreted according to name, convention, language, and compiler version. Attributes are modeled on Attributes in [ECMA-335], with the syntax coming from [ECMA-334] \(C#). +r[attributes.inner] _Inner attributes_, written with a bang (`!`) after the hash (`#`), apply to the item that the attribute is declared within. _Outer attributes_, written without the bang after the hash, apply to the thing that follows the attribute. +r[attributes.input] The attribute consists of a path to the attribute, followed by an optional delimited token tree whose interpretation is defined by the attribute. Attributes other than macro attributes also allow the input to be an equals sign (`=`) followed by an expression. See the [meta item syntax](#meta-item-attribute-syntax) below for more details. +r[attributes.safety] An attribute may be unsafe to apply. To avoid undefined behavior when using these attributes, certain obligations that cannot be checked by the compiler must be met. To assert these have been, the attribute is wrapped in @@ -41,13 +46,15 @@ The following attributes are unsafe: * [`link_section`] * [`no_mangle`] +r[attributes.kind] Attributes can be classified into the following kinds: * [Built-in attributes] -* [Macro attributes][attribute macros] +* [Proc macro attributes][attribute macros] * [Derive macro helper attributes] * [Tool attributes](#tool-attributes) +r[attributes.allowed-position] Attributes may be applied to many things in the language: * All [item declarations] accept outer attributes while [external blocks], @@ -100,9 +107,13 @@ fn some_unused_variables() { ## Meta Item Attribute Syntax +r[attributes.meta] + +r[attributes.meta.intro] A "meta item" is the syntax used for the _Attr_ rule by most [built-in attributes]. It has the following grammar: +r[attributes.meta.syntax] > **Syntax**\ > _MetaItem_ :\ >       [_SimplePath_]\ @@ -116,10 +127,12 @@ attributes]. It has the following grammar: >       _MetaItem_\ >    | [_Expression_] +r[attributes.meta.literal-expr] Expressions in meta items must macro-expand to literal expressions, which must not include integer or float type suffixes. Expressions which are not literal expressions will be syntactically accepted (and can be passed to proc-macros), but will be rejected after parsing. +r[attributes.meta.order] Note that if the attribute appears within another macro, it will be expanded after that outer macro. For example, the following code will expand the `Serialize` proc-macro first, which must preserve the `include_str!` call in @@ -133,6 +146,7 @@ struct Foo { } ``` +r[attributes.meta.order-macro] Additionally, macros in attributes will be expanded only after all other attributes applied to the item: ```rust ignore @@ -143,6 +157,7 @@ Additionally, macros in attributes will be expanded only after all other attribu fn foo() {} ``` +r[attributes.meta.builtin] Various built-in attributes use different subsets of the meta item syntax to specify their inputs. The following grammar rules show some commonly used forms: @@ -175,6 +190,9 @@ _MetaListNameValueStr_ | `link(name = "CoreFoundation", kind = "framework")` ## Active and inert attributes +r[attributes.activity] + +r[attributes.activity.intro] An attribute is either active or inert. During attribute processing, *active attributes* remove themselves from the thing they are on while *inert attributes* stay on. @@ -185,15 +203,20 @@ active. All other attributes are inert. ## Tool attributes +r[attributes.tool] + +r[attributes.tool.intro] The compiler may allow attributes for external tools where each tool resides in its own module in the [tool prelude]. The first segment of the attribute path is the name of the tool, with one or more additional segments whose interpretation is up to the tool. +r[attributes.tool.ignored] When a tool is not in use, the tool's attributes are accepted without a warning. When the tool is in use, the tool is responsible for processing and interpretation of its attributes. +r[attributes.tool.prelude] Tool attributes are not available if the [`no_implicit_prelude`] attribute is used. @@ -213,19 +236,24 @@ pub fn f() {} ## Built-in attributes index +r[attributes.builtin] + The following is an index of all built-in attributes. - Conditional compilation - [`cfg`] --- Controls conditional compilation. - [`cfg_attr`] --- Conditionally includes attributes. + - Testing - [`test`] --- Marks a function as a test. - [`ignore`] --- Disables a test function. - [`should_panic`] --- Indicates a test should generate a panic. + - Derive - [`derive`] --- Automatic trait implementations. - [`automatically_derived`] --- Marker for implementations created by `derive`. + - Macros - [`macro_export`] --- Exports a `macro_rules` macro for cross-crate usage. - [`macro_use`] --- Expands macro visibility, or imports macros from other @@ -233,12 +261,14 @@ The following is an index of all built-in attributes. - [`proc_macro`] --- Defines a function-like macro. - [`proc_macro_derive`] --- Defines a derive macro. - [`proc_macro_attribute`] --- Defines an attribute macro. + - Diagnostics - [`allow`], [`expect`], [`warn`], [`deny`], [`forbid`] --- Alters the default lint level. - [`deprecated`] --- Generates deprecation notices. - [`must_use`] --- Generates a lint for unused values. - [`diagnostic::on_unimplemented`] --- Hints the compiler to emit a certain error message if a trait is not implemented. + - ABI, linking, symbols, and FFI - [`link`] --- Specifies a native library to link with an `extern` block. - [`link_name`] --- Specifies the name of the symbol for functions or statics @@ -257,35 +287,44 @@ The following is an index of all built-in attributes. - [`used`] --- Forces the compiler to keep a static item in the output object file. - [`crate_name`] --- Specifies the crate name. + - Code generation - [`inline`] --- Hint to inline code. - [`cold`] --- Hint that a function is unlikely to be called. - [`no_builtins`] --- Disables use of certain built-in functions. - [`target_feature`] --- Configure platform-specific code generation. - - [`track_caller`] - Pass the parent call location to `std::panic::Location::caller()`. - - [`instruction_set`] - Specify the instruction set used to generate a functions code + - [`track_caller`] --- Pass the parent call location to `std::panic::Location::caller()`. + - [`instruction_set`] --- Specify the instruction set used to generate a functions code + - Documentation - `doc` --- Specifies documentation. See [The Rustdoc Book] for more information. [Doc comments] are transformed into `doc` attributes. + - Preludes - [`no_std`] --- Removes std from the prelude. - [`no_implicit_prelude`] --- Disables prelude lookups within a module. + - Modules - [`path`] --- Specifies the filename for a module. + - Limits - [`recursion_limit`] --- Sets the maximum recursion limit for certain compile-time operations. - [`type_length_limit`] --- Sets the maximum size of a polymorphic type. + - Runtime - [`panic_handler`] --- Sets the function to handle panics. - [`global_allocator`] --- Sets the global memory allocator. - [`windows_subsystem`] --- Specifies the windows subsystem to link with. + - Features - `feature` --- Used to enable unstable or experimental compiler features. See [The Unstable Book] for features implemented in `rustc`. + - Type System - [`non_exhaustive`] --- Indicate that a type will have more fields/variants added in future. + - Debugger - [`debugger_visualizer`] --- Embeds a file that specifies debugger output for a type. - [`collapse_debuginfo`] --- Controls how macro invocations are encoded in debuginfo. diff --git a/src/attributes/codegen.md b/src/attributes/codegen.md index f30c296bc..297d6436f 100644 --- a/src/attributes/codegen.md +++ b/src/attributes/codegen.md @@ -1,13 +1,19 @@ # Code generation attributes +r[attributes.codegen] + The following [attributes] are used for controlling code generation. ## Optimization hints +r[attributes.codegen.hint] + +r[attributes.codegen.hint.cold-inline] The `cold` and `inline` [attributes] give suggestions to generate code in a way that may be faster than what it would do without the hint. The attributes are only hints, and may be ignored. +r[attributes.codegen.hint.usage] Both attributes can be used on [functions]. When applied to a function in a [trait], they apply only to that function when used as a default function for a trait implementation and not to all trait implementations. The attributes @@ -15,6 +21,9 @@ have no effect on a trait function without a body. ### The `inline` attribute +r[attributes.codegen.inline] + +r[attributes.codegen.inline.intro] The *`inline` [attribute]* suggests that a copy of the attributed function should be placed in the caller, rather than generating code to call the function where it is defined. @@ -23,6 +32,7 @@ function where it is defined. > internal heuristics. Incorrectly inlining functions can make the program > slower, so this attribute should be used with care. +r[attributes.codegen.inline.modes] There are three ways to use the inline attribute: * `#[inline]` *suggests* performing an inline expansion. @@ -36,17 +46,24 @@ There are three ways to use the inline attribute: ### The `cold` attribute +r[attributes.codegen.cold] + The *`cold` [attribute]* suggests that the attributed function is unlikely to be called. ## The `no_builtins` attribute +r[attributes.codegen.no_builtins] + The *`no_builtins` [attribute]* may be applied at the crate level to disable optimizing certain code patterns to invocations of library functions that are assumed to exist. ## The `target_feature` attribute +r[attributes.codegen.target_feature] + +r[attributes.codegen.target_feature.intro] The *`target_feature` [attribute]* may be applied to a function to enable code generation of that function for specific platform architecture features. It uses the [_MetaListNameValueStr_] syntax with a single key of @@ -58,24 +75,31 @@ features. It uses the [_MetaListNameValueStr_] syntax with a single key of unsafe fn foo_avx2() {} ``` +r[attributes.codegen.target_feature.arch] Each [target architecture] has a set of features that may be enabled. It is an error to specify a feature for a target architecture that the crate is not being compiled for. +r[attributes.codegen.target_feature.target-ub] It is [undefined behavior] to call a function that is compiled with a feature that is not supported on the current platform the code is running on, *except* if the platform explicitly documents this to be safe. +r[attributes.codegen.target_feature.inline] Functions marked with `target_feature` are not inlined into a context that does not support the given features. The `#[inline(always)]` attribute may not be used with a `target_feature` attribute. ### Available features +r[attributes.codegen.target_feature.availability] + The following is a list of the available feature names. #### `x86` or `x86_64` +r[attributes.codegen.target_feature.x86] + Executing code with unsupported features is undefined behavior on this platform. Hence this platform requires that `#[target_feature]` is only applied to [`unsafe` functions][unsafe function]. @@ -143,6 +167,8 @@ Feature | Implicitly Enables | Description #### `aarch64` +r[attributes.codegen.target_feature.aarch64] + This platform requires that `#[target_feature]` is only applied to [`unsafe` functions][unsafe function]. @@ -206,6 +232,8 @@ Feature | Implicitly Enables | Feature Name #### `riscv32` or `riscv64` +r[attributes.codegen.target_feature.riscv] + This platform requires that `#[target_feature]` is only applied to [`unsafe` functions][unsafe function]. @@ -266,6 +294,8 @@ Feature | Implicitly Enables | Description #### `wasm32` or `wasm64` +r[attributes.codegen.target_feature.wasm] + `#[target_feature]` may be used with both safe and [`unsafe` functions][unsafe function] on Wasm platforms. It is impossible to cause undefined behavior via the `#[target_feature]` attribute because @@ -282,6 +312,9 @@ Feature | Implicitly Enables | Description `relaxed-simd` | `simd128` | [WebAssembly relaxed simd proposal][relaxed-simd] `sign-ext` | | [WebAssembly sign extension operators Proposal][sign-ext] `simd128` | | [WebAssembly simd proposal][simd128] +`multivalue` | | [WebAssembly multivalue proposal][multivalue] +`reference-types` | | [WebAssembly reference-types proposal][reference-types] +`tail-call` | | [WebAssembly tail-call proposal][tail-call] [bulk-memory]: https://github.com/WebAssembly/bulk-memory-operations [extended-const]: https://github.com/WebAssembly/extended-const @@ -290,14 +323,21 @@ Feature | Implicitly Enables | Description [relaxed-simd]: https://github.com/WebAssembly/relaxed-simd [sign-ext]: https://github.com/WebAssembly/sign-extension-ops [simd128]: https://github.com/webassembly/simd +[reference-types]: https://github.com/webassembly/reference-types +[tail-call]: https://github.com/webassembly/tail-call +[multivalue]: https://github.com/webassembly/multi-value ### Additional information +r[attributes.codegen.target_feature.info] + +r[attributes.codegen.target_feature.remark-cfg] See the [`target_feature` conditional compilation option] for selectively enabling or disabling compilation of code based on compile-time settings. Note that this option is not affected by the `target_feature` attribute, and is only driven by the features enabled for the entire crate. +r[attributes.codegen.target_feature.remark-rt] See the [`is_x86_feature_detected`] or [`is_aarch64_feature_detected`] macros in the standard library for runtime feature detection on these platforms. @@ -308,11 +348,17 @@ in the standard library for runtime feature detection on these platforms. ## The `track_caller` attribute +r[attributes.codegen.track_caller] + +r[attributes.codegen.track_caller.allowed-positions] The `track_caller` attribute may be applied to any function with [`"Rust"` ABI][rust-abi] -with the exception of the entry point `fn main`. When applied to functions and methods in -trait declarations, the attribute applies to all implementations. If the trait provides a +with the exception of the entry point `fn main`. + +r[attributes.codegen.track_caller.traits] +When applied to functions and methods in trait declarations, the attribute applies to all implementations. If the trait provides a default implementation with the attribute, then the attribute also applies to override implementations. +r[attributes.codegen.track_caller.extern] When applied to a function in an `extern` block the attribute must also be applied to any linked implementations, otherwise undefined behavior results. When applied to a function which is made available to an `extern` block, the declaration in the `extern` block must also have the attribute, @@ -320,6 +366,7 @@ otherwise undefined behavior results. ### Behavior +r[attributes.codegen.track_caller.behavior] Applying the attribute to a function `f` allows code within `f` to get a hint of the [`Location`] of the "topmost" tracked call that led to `f`'s invocation. At the point of observation, an implementation behaves as if it walks up the stack from `f`'s frame to find the nearest frame of an @@ -399,8 +446,12 @@ And so on. ### Limitations +r[attributes.codegen.track_caller.limits] + +r[attributes.codegen.track_caller.hint] This information is a hint and implementations are not required to preserve it. +r[attributes.codegen.track_caller.decay] In particular, coercing a function with `#[track_caller]` to a function pointer creates a shim which appears to observers to have been called at the attributed function's definition site, losing actual caller information across virtual calls. A common example of this coercion is the creation of a @@ -431,18 +482,27 @@ trait object whose methods are attributed. ## The `instruction_set` attribute +r[attributes.codegen.instruction_set] + +r[attributes.codegen.instruction_set.allowed-positions] The *`instruction_set` [attribute]* may be applied to a function to control which instruction set the function will be generated for. + +r[attributes.codegen.instruction_set.behavior] This allows mixing more than one instruction set in a single program on CPU architectures that support it. + +r[attributes.codegen.instruction_set.syntax] It uses the [_MetaListPath_] syntax, and a path comprised of the architecture family name and instruction set name. [_MetaListPath_]: ../attributes.md#meta-item-attribute-syntax +r[attributes.codegen.instruction_set.target-limits] It is a compilation error to use the `instruction_set` attribute on a target that does not support it. ### On ARM -For the `ARMv4T` and `ARMv5te` architectures, the following are supported: +r[attributes.codegen.instruction_set.arm] +For the `ARMv4T` and `ARMv5te` architectures, the following are supported: * `arm::a32` --- Generate the function as A32 "ARM" code. * `arm::t32` --- Generate the function as T32 "Thumb" code. diff --git a/src/attributes/debugger.md b/src/attributes/debugger.md index 21add01ca..2521c4e83 100644 --- a/src/attributes/debugger.md +++ b/src/attributes/debugger.md @@ -1,20 +1,32 @@ # Debugger attributes +r[attributes.debugger] + The following [attributes] are used for enhancing the debugging experience when using third-party debuggers like GDB or WinDbg. ## The `debugger_visualizer` attribute +r[attributes.debugger.debugger_visualizer] + +r[attributes.debugger.debugger_visualizer.intro] The *`debugger_visualizer` attribute* can be used to embed a debugger visualizer file into the debug information. This enables an improved debugger experience for displaying values in the debugger. + +r[attributes.debugger.debugger_visualizer.syntax] It uses the [_MetaListNameValueStr_] syntax to specify its inputs, and must be specified as a crate attribute. ### Using `debugger_visualizer` with Natvis +r[attributes.debugger.debugger_visualizer.natvis] + +r[attributes.debugger.debugger_visualizer.natvis.intro] Natvis is an XML-based framework for Microsoft debuggers (such as Visual Studio and WinDbg) that uses declarative rules to customize the display of types. For detailed information on the Natvis format, refer to Microsoft's [Natvis documentation]. +r[attributes.debugger.debugger_visualizer.natvis.msvc] This attribute only supports embedding Natvis files on `-windows-msvc` targets. +r[attributes.debugger.debugger_visualizer.natvis.path] The path to the Natvis file is specified with the `natvis_file` key, which is a path relative to the crate source file: @@ -72,6 +84,9 @@ When viewed under WinDbg, the `fancy_rect` variable would be shown as follows: ### Using `debugger_visualizer` with GDB +r[attributes.debugger.debugger_visualizer.gdb] + +r[attributes.debugger.debugger_visualizer.gdb.pretty] GDB supports the use of a structured Python script, called a *pretty printer*, that describes how a type should be visualized in the debugger view. For detailed information on pretty printers, refer to GDB's [pretty printing documentation]. @@ -81,6 +96,7 @@ There are two ways to enable auto-loading embedded pretty printers: For more information, see GDB's [auto-loading documentation]. 1. Create a file named `gdbinit` under `$HOME/.config/gdb` (you may need to create the directory if it doesn't already exist). Add the following line to that file: `add-auto-load-safe-path path/to/binary`. +r[attributes.debugger.debugger_visualizer.gdb.path] These scripts are embedded using the `gdb_script_file` key, which is a path relative to the crate source file. @@ -142,16 +158,22 @@ When the crate's debug executable is passed into GDB[^rust-gdb], `print bob` wil ## The `collapse_debuginfo` attribute +r[attributes.debugger.collapse_debuginfo] + +r[attributes.debugger.collapse_debuginfo.intro] The *`collapse_debuginfo` [attribute]* controls whether code locations from a macro definition are collapsed into a single location associated with the macro's call site, when generating debuginfo for code calling this macro. +r[attributes.debugger.collapse_debuginfo.syntax] The attribute uses the [_MetaListIdents_] syntax to specify its inputs, and can only be applied to macro definitions. +r[attributes.debugger.collapse_debuginfo.options] Accepted options: - `#[collapse_debuginfo(yes)]` --- code locations in debuginfo are collapsed. - `#[collapse_debuginfo(no)]` --- code locations in debuginfo are not collapsed. - `#[collapse_debuginfo(external)]` --- code locations in debuginfo are collapsed only if the macro comes from a different crate. +r[attributes.debugger.collapse_debuginfo.default] The `external` behavior is the default for macros that don't have this attribute, unless they are built-in macros. For built-in macros the default is `yes`. diff --git a/src/attributes/derive.md b/src/attributes/derive.md index 44ce8c7b3..6a61dbf78 100644 --- a/src/attributes/derive.md +++ b/src/attributes/derive.md @@ -1,7 +1,13 @@ # Derive +r[attributes.derive] + +r[attributes.derive.intro] The *`derive` attribute* allows new [items] to be automatically generated for -data structures. It uses the [_MetaListPaths_] syntax to specify a list of +data structures. + +r[attributes.derive.syntax] +It uses the [_MetaListPaths_] syntax to specify a list of traits to implement or paths to [derive macros] to process. For example, the following will create an [`impl` item] for the @@ -27,10 +33,12 @@ impl PartialEq for Foo { } ``` +r[attributes.derive.proc-macro] You can implement `derive` for your own traits through [procedural macros]. ## The `automatically_derived` attribute +r[attributes.derive.automatically_derived] The *`automatically_derived` attribute* is automatically added to [implementations] created by the `derive` attribute for built-in traits. It has no direct effect, but it may be used by tools and diagnostic lints to diff --git a/src/attributes/diagnostics.md b/src/attributes/diagnostics.md index 34dab5913..c75915d51 100644 --- a/src/attributes/diagnostics.md +++ b/src/attributes/diagnostics.md @@ -1,25 +1,41 @@ # Diagnostic attributes +r[attributes.diagnostics] + The following [attributes] are used for controlling or generating diagnostic messages during compilation. ## Lint check attributes +r[attributes.diagnostics.lint] + A lint check names a potentially undesirable coding pattern, such as -unreachable code or omitted documentation. The lint attributes `allow`, +unreachable code or omitted documentation. + +r[attributes.diagnostics.lint.level] +The lint attributes `allow`, `expect`, `warn`, `deny`, and `forbid` use the [_MetaListPaths_] syntax to specify a list of lint names to change the lint level for the entity to which the attribute applies. For any lint check `C`: +r[attributes.diagnostics.lint.allow] * `#[allow(C)]` overrides the check for `C` so that violations will go unreported. + +r[attributes.diagnostics.lint.expect] * `#[expect(C)]` indicates that lint `C` is expected to be emitted. The attribute will suppress the emission of `C` or issue a warning, if the expectation is unfulfilled. + +r[attributes.diagnostics.lint.warn] * `#[warn(C)]` warns about violations of `C` but continues compilation. + +r[attributes.diagnostics.lint.deny] * `#[deny(C)]` signals an error after encountering a violation of `C`, + +r[attributes.diagnostics.lint.forbid] * `#[forbid(C)]` is the same as `deny(C)`, but also forbids changing the lint level afterwards, @@ -42,9 +58,11 @@ pub mod m1 { } ``` +r[attributes.diagnostics.lint.override] Lint attributes can override the level specified from a previous attribute, as -long as the level does not attempt to change a forbidden lint. Previous -attributes are those from a higher level in the syntax tree, or from a +long as the level does not attempt to change a forbidden lint +(except for `deny`, which is allowed inside a `forbid` context, but ignored). +Previous attributes are those from a higher level in the syntax tree, or from a previous attribute on the same entity as listed in left-to-right source order. This example shows how one can use `allow` and `warn` to toggle a particular @@ -88,6 +106,7 @@ pub mod m3 { ### Lint Reasons +r[attributes.diagnostics.lint.reason] All lint attributes support an additional `reason` parameter, to give context why a certain attribute was added. This reason will be displayed as part of the lint message if the lint is emitted at the defined level. @@ -124,6 +143,9 @@ pub fn get_path() -> PathBuf { ### The `#[expect]` attribute +r[attributes.diagnostics.expect] + +r[attributes.diagnostics.expect.intro] The `#[expect(C)]` attribute creates a lint expectation for lint `C`. The expectation will be fulfilled, if a `#[warn(C)]` attribute at the same location would result in a lint emission. If the expectation is unfulfilled, because @@ -149,6 +171,7 @@ fn main() { } ``` +r[attributes.diagnostics.expect.fulfillment] The lint expectation is only fulfilled by lint emissions which have been suppressed by the `expect` attribute. If the lint level is modified in the scope with other level attributes like `allow` or `warn`, the lint emission will be handled accordingly and the @@ -178,6 +201,7 @@ fn select_song() { } ``` +r[attributes.diagnostics.expect.independent] If the `expect` attribute contains several lints, each one is expected separately. For a lint group it's enough if one lint inside the group has been emitted: @@ -206,6 +230,7 @@ pub fn another_example() { ### Lint groups +r[attributes.diagnostics.lint.group] Lints may be organized into named groups so that the level of related lints can be adjusted together. Using a named group is equivalent to listing out the lints within that group. @@ -226,6 +251,7 @@ fn example() { } ``` +r[attributes.diagnostics.lint.group.warnings] There is a special group named "warnings" which includes all lints at the "warn" level. The "warnings" group ignores attribute order and applies to all lints that would otherwise warn within the entity. @@ -245,9 +271,13 @@ fn example_err() { ### Tool lint attributes +r[attributes.diagnostics.lint.tool] + +r[attributes.diagnostics.lint.tool.intro] Tool lints allows using scoped lints, to `allow`, `warn`, `deny` or `forbid` lints of certain tools. +r[attributes.diagnostics.lint.tool.activation] Tool lints only get checked when the associated tool is active. If a lint attribute, such as `allow`, references a nonexistent tool lint, the compiler will not warn about the nonexistent lint until you use the tool. @@ -275,10 +305,14 @@ fn foo() { ## The `deprecated` attribute +r[attributes.diagnostics.deprecated] + +r[attributes.diagnostics.deprecated.intro] The *`deprecated` attribute* marks an item as deprecated. `rustc` will issue warnings on usage of `#[deprecated]` items. `rustdoc` will show item deprecation, including the `since` version and `note`, if available. +r[attributes.diagnostics.deprecated.syntax] The `deprecated` attribute has several forms: - `deprecated` --- Issues a generic message. @@ -292,6 +326,7 @@ The `deprecated` attribute has several forms: message. This is typically used to provide an explanation about the deprecation and preferred alternatives. +r[attributes.diagnostic.deprecated.allowed-positions] The `deprecated` attribute may be applied to any [item], [trait item], [enum variant], [struct field], [external block item], or [macro definition]. It cannot be applied to [trait implementation items]. When applied to an item @@ -317,15 +352,23 @@ The [RFC][1270-deprecation.md] contains motivations and more details. ## The `must_use` attribute +r[attributes.diagnostics.must_use] + +r[attributes.diagnostics.must_use.intro] The *`must_use` attribute* is used to issue a diagnostic warning when a value -is not "used". It can be applied to user-defined composite types +is not "used". + +r[attributes.diagnostics.must_use.allowed-positions] +The `must_use` attribute can be applied to user-defined composite types ([`struct`s][struct], [`enum`s][enum], and [`union`s][union]), [functions], and [traits]. +r[attributes.diagnostics.must_use.message] The `must_use` attribute may include a message by using the [_MetaNameValueStr_] syntax such as `#[must_use = "example message"]`. The message will be given alongside the warning. +r[attributes.diagnostics.must_use.type] When used on user-defined composite types, if the [expression] of an [expression statement] has that type, then the `unused_must_use` lint is violated. @@ -344,6 +387,7 @@ struct MustUse { MustUse::new(); ``` +r[attributes.diagnostics.must_use.fn] When used on a function, if the [expression] of an [expression statement] is a [call expression] to that function, then the `unused_must_use` lint is violated. @@ -356,6 +400,7 @@ fn five() -> i32 { 5i32 } five(); ``` +r[attributes.diagnostics.must_use.trait] When used on a [trait declaration], a [call expression] of an [expression statement] to a function that returns an [impl trait] or a [dyn trait] of that trait violates the `unused_must_use` lint. @@ -373,6 +418,7 @@ fn get_critical() -> impl Critical { get_critical(); ``` +r[attributes.diagnostics.must_use.trait-function] When used on a function in a trait declaration, then the behavior also applies when the call expression is a function from an implementation of the trait. @@ -390,6 +436,7 @@ impl Trait for i32 { 5i32.use_me(); ``` +r[attributes.diagnostics.must_use.trait-impl-function] When used on a function in a trait implementation, the attribute does nothing. > Note: Trivial no-op expressions containing the value will not violate the @@ -424,36 +471,57 @@ When used on a function in a trait implementation, the attribute does nothing. ## The `diagnostic` tool attribute namespace +r[attributes.diagnostic.namespace] + +r[attributes.diagnostic.namespace.intro] The `#[diagnostic]` attribute namespace is a home for attributes to influence compile-time error messages. The hints provided by these attributes are not guaranteed to be used. + +r[attributes.diagnostic.namespace.unknown-invalid-syntax] Unknown attributes in this namespace are accepted, though they may emit warnings for unused attributes. Additionally, invalid inputs to known attributes will typically be a warning (see the attribute definitions for details). This is meant to allow adding or discarding attributes and changing inputs in the future to allow changes without the need to keep the non-meaningful attributes or options working. ### The `diagnostic::on_unimplemented` attribute +r[attributes.diagnostic.on_unimplemented] + +r[attributes.diagnostic.on_unimplemented.intro] The `#[diagnostic::on_unimplemented]` attribute is a hint to the compiler to supplement the error message that would normally be generated in scenarios where a trait is required but not implemented on a type. + +r[attributes.diagnostic.on_unimplemented.allowed-positions] The attribute should be placed on a [trait declaration], though it is not an error to be located in other positions. + +r[attributes.diagnostic.on_unimplemented.syntax] The attribute uses the [_MetaListNameValueStr_] syntax to specify its inputs, though any malformed input to the attribute is not considered as an error to provide both forwards and backwards compatibility. -The following keys have the given meaning: +r[attributes.diagnostic.on_unimplemented.keys] +The following keys have the given meaning: * `message` --- The text for the top level error message. * `label` --- The text for the label shown inline in the broken code in the error message. * `note` --- Provides additional notes. +r[attributes.diagnostic.on_unimplemented.note-repetition] The `note` option can appear several times, which results in several note messages being emitted. -If any of the other options appears several times the first occurrence of the relevant option specifies the actually used value. -Any other occurrence generates an lint warning. -For any other non-existing option a lint-warning is generated. +r[attributes.diagnostic.on_unimplemented.repetition] +If any of the other options appears several times the first occurrence of the relevant option specifies the actually used value. Subsequent occurrences generates a warning. + +r[attributes.diagnostic.on_unimplemented.unknown-keys] +A warning is generated for any unknown keys. + +r[attributes.diagnostic.on_unimplemented.format-string] All three options accept a string as an argument, interpreted using the same formatting as a [`std::fmt`] string. -Format parameters with the given named parameter will be replaced with the following text: +r[attributes.diagnostic.on_unimplemented.format-parameters] +Format parameters with the given named parameter will be replaced with the following text: * `{Self}` --- The name of the type implementing the trait. * `{` *GenericParameterName* `}` --- The name of the generic argument's type for the given generic parameter. +r[attributes.diagnostic.on_unimplemented.invalid-formats] Any other format parameter will generate a warning, but will otherwise be included in the string as-is. +r[attributes.diagnostic.on_unimplemented.invalid-string] Invalid format strings may generate a warning, but are otherwise allowed, but may not display as intended. Format specifiers may generate a warning, but are otherwise ignored. diff --git a/src/attributes/limits.md b/src/attributes/limits.md index 79f292a31..1c8c46bb5 100644 --- a/src/attributes/limits.md +++ b/src/attributes/limits.md @@ -1,12 +1,20 @@ # Limits +r[attributes.limits] + The following [attributes] affect compile-time limits. ## The `recursion_limit` attribute +r[attributes.limits.recursion_limit] + +r[attributes.limits.recursion_limit.intro] The *`recursion_limit` attribute* may be applied at the [crate] level to set the maximum depth for potentially infinitely-recursive compile-time operations -like macro expansion or auto-dereference. It uses the [_MetaNameValueStr_] +like macro expansion or auto-dereference. + +r[attributes.limits.recursion_limit.syntax] +It uses the [_MetaNameValueStr_] syntax to specify the recursion depth. > Note: The default in `rustc` is 128. @@ -35,12 +43,17 @@ a!{} ## The `type_length_limit` attribute +r[attributes.limits.type_length_limit] + > **Note**: This limit is only enforced when the nightly `-Zenforce-type-length-limit` flag is active. > > For more information, see . +r[attributes.limits.type_length_limit.intro] The *`type_length_limit` attribute* limits the maximum number of type substitutions made when constructing a concrete type during monomorphization. + +r[attributes.limits.type_length_limit.syntax] It is applied at the [crate] level, and uses the [_MetaNameValueStr_] syntax to set the limit based on the number of type substitutions. diff --git a/src/attributes/testing.md b/src/attributes/testing.md index a2db083a7..d245da66b 100644 --- a/src/attributes/testing.md +++ b/src/attributes/testing.md @@ -1,5 +1,7 @@ # Testing attributes +r[attributes.testing] + The following [attributes] are used for specifying functions for performing tests. Compiling a crate in "test" mode enables building the test functions along with a test harness for executing the tests. Enabling the test mode also @@ -7,9 +9,16 @@ enables the [`test` conditional compilation option]. ## The `test` attribute -The *`test` attribute* marks a function to be executed as a test. These -functions are only compiled when in test mode. Test functions must be free, -monomorphic functions that take no arguments, and the return type must implement the [`Termination`] trait, for example: +r[attributes.testing.test] + +r[attributes.testing.test.intro] +The *`test` attribute* marks a function to be executed as a test. + +r[attributes.testing.test.enabled] +These functions are only compiled when in test mode. + +r[attributes.testing.test.allowed-positions] +Test functions must be free, monomorphic functions that take no arguments, and the return type must implement the [`Termination`] trait, for example: * `()` * `Result where T: Termination, E: Debug` @@ -21,6 +30,7 @@ monomorphic functions that take no arguments, and the return type must implement > Note: The test mode is enabled by passing the `--test` argument to `rustc` > or using `cargo test`. +r[attributes.testing.test.success] The test harness calls the returned value's [`report`] method, and classifies the test as passed or failed depending on whether the resulting [`ExitCode`] represents successful termination. In particular: * Tests that return `()` pass as long as they terminate and do not panic. @@ -42,10 +52,14 @@ fn test_the_thing() -> io::Result<()> { ## The `ignore` attribute +r[attributes.testing.ignore] + +r[attributes.testing.ignore.intro] A function annotated with the `test` attribute can also be annotated with the `ignore` attribute. The *`ignore` attribute* tells the test harness to not execute that function as a test. It will still be compiled when in test mode. +r[attributes.testing.ignore.syntax] The `ignore` attribute may optionally be written with the [_MetaNameValueStr_] syntax to specify a reason why the test is ignored. @@ -62,10 +76,17 @@ fn mytest() { ## The `should_panic` attribute +r[attributes.testing.should_panic] + +r[attributes.testing.should_panic.intro] A function annotated with the `test` attribute that returns `()` can also be -annotated with the `should_panic` attribute. The *`should_panic` attribute* +annotated with the `should_panic` attribute. + +r[attributes.testing.should_panic.behavior] +The *`should_panic` attribute* makes the test only pass if it actually panics. +r[attributes.testing.should_panic.syntax] The `should_panic` attribute may optionally take an input string that must appear within the panic message. If the string is not found in the message, then the test will fail. The string may be passed using the diff --git a/src/attributes/type_system.md b/src/attributes/type_system.md index d3ea63233..e26de1210 100644 --- a/src/attributes/type_system.md +++ b/src/attributes/type_system.md @@ -1,16 +1,25 @@ # Type system attributes +r[attributes.type-system] + The following [attributes] are used for changing how a type can be used. ## The `non_exhaustive` attribute +r[attributes.type-system.non_exhaustive] + +r[attributes.type-system.non_exhaustive.intro] The *`non_exhaustive` attribute* indicates that a type or variant may have -more fields or variants added in the future. It can be applied to -[`struct`s][struct], [`enum`s][enum], and `enum` variants. +more fields or variants added in the future. + +r[attributes.type-system.non_exhaustive.allowed-positions] +It can be applied to [`struct`s][struct], [`enum`s][enum], and `enum` variants. +r[attributes.type-system.non_exhaustive.syntax] The `non_exhaustive` attribute uses the [_MetaWord_] syntax and thus does not take any inputs. +r[attributes.type-system.non_exhaustive.same-crate] Within the defining crate, `non_exhaustive` has no effect. ```rust @@ -65,9 +74,11 @@ match message { } ``` +r[attributes.type-system.non_exhaustive.external-crate] Outside of the defining crate, types annotated with `non_exhaustive` have limitations that preserve backwards compatibility when new fields or variants are added. +r[attributes.type-system.non_exhaustive.construction] Non-exhaustive types cannot be constructed outside of the defining crate: - Non-exhaustive variants ([`struct`][struct] or [`enum` variant][enum]) cannot be constructed @@ -119,6 +130,7 @@ let message = Message::Reaction(0); let message = Message::Quit; ``` +r[attributes.type-system.non_exhaustive.match] There are limitations when matching on non-exhaustive types outside of the defining crate: - When pattern matching on a non-exhaustive variant ([`struct`][struct] or [`enum` variant][enum]), diff --git a/src/behavior-considered-undefined.md b/src/behavior-considered-undefined.md index 63fa28f63..424ef228f 100644 --- a/src/behavior-considered-undefined.md +++ b/src/behavior-considered-undefined.md @@ -1,29 +1,49 @@ ## Behavior considered undefined +r[undefined] + +r[undefined.general] Rust code is incorrect if it exhibits any of the behaviors in the following list. This includes code within `unsafe` blocks and `unsafe` functions. `unsafe` only means that avoiding undefined behavior is on the programmer; it does not change anything about the fact that Rust programs must never cause undefined behavior. +r[undefined.soundness] It is the programmer's responsibility when writing `unsafe` code to ensure that any safe code interacting with the `unsafe` code cannot trigger these behaviors. `unsafe` code that satisfies this property for any safe client is called *sound*; if `unsafe` code can be misused by safe code to exhibit undefined behavior, it is *unsound*. -> [!WARNING] -> The following list is not exhaustive; it may grow or shrink. There is no formal model of Rust's semantics for what is and is not allowed in unsafe code, so there may be more behavior considered unsafe. We also reserve the right to make some of the behavior in that list defined in the future. In other words, this list does not say that anything will *definitely* always be undefined in all future Rust version (but we might make such commitments for some list items in the future). -> -> Please read the [Rustonomicon] before writing unsafe code. +
+ +***Warning:*** The following list is not exhaustive; it may grow or shrink. +There is no formal model of Rust's semantics for what is and is not allowed in +unsafe code, so there may be more behavior considered unsafe. We also reserve +the right to make some of the behavior in that list defined in the future. In +other words, this list does not say that anything will *definitely* always be +undefined in all future Rust version (but we might make such commitments for +some list items in the future). + +Please read the [Rustonomicon] before writing unsafe code. +
+ +r[undefined.race] * Data races. + +r[undefined.pointer-access] * Accessing (loading from or storing to) a place that is [dangling] or [based on a misaligned pointer]. + +r[undefined.place-projection] * Performing a place projection that violates the requirements of [in-bounds pointer arithmetic](pointer#method.offset). A place projection is a [field expression][project-field], a [tuple index expression][project-tuple], or an [array/slice index expression][project-slice]. + +r[undefined.alias] * Breaking the [pointer aliasing rules]. `Box`, `&mut T` and `&T` follow LLVM’s scoped [noalias] model, except if the `&T` contains an [`UnsafeCell`]. References and boxes must not be [dangling] while they are @@ -40,22 +60,37 @@ undefined behavior, it is *unsound*. All this also applies when values of these types are passed in a (nested) field of a compound type, but not behind pointer indirections. -* Mutating immutable bytes. All bytes inside a [`const`] item or within an implicitly [const-promoted] expression are immutable. + +r[undefined.immutable] +* Mutating immutable bytes. + All bytes reachable through a [const-promoted] expression are immutable, as well as bytes reachable through borrows in `static` and `const` initializers that have been [lifetime-extended] to `'static`. The bytes owned by an immutable binding or immutable `static` are immutable, unless those bytes are part of an [`UnsafeCell`]. Moreover, the bytes [pointed to] by a shared reference, including transitively through other references (both shared and mutable) and `Box`es, are immutable; transitivity includes those references stored in fields of compound types. A mutation is any write of more than 0 bytes which overlaps with any of the relevant bytes (even if that write does not change the memory contents). + +r[undefined.intrinsic] * Invoking undefined behavior via compiler intrinsics. + +r[undefined.target-feature] * Executing code compiled with platform features that the current platform does not support (see [`target_feature`]), *except* if the platform explicitly documents this to be safe. + +r[undefined.call] * Calling a function with the wrong call ABI or unwinding from a function with the wrong unwind ABI. + +r[undefined.invalid] * Producing an [invalid value][invalid-values]. "Producing" a value happens any time a value is assigned to or read from a place, passed to a function/primitive operation or returned from a function/primitive operation. + +r[undefined.asm] * Incorrect use of inline assembly. For more details, refer to the [rules] to follow when writing code that uses inline assembly. + +r[undefined.const-transmute-ptr2int] * **In [const context](const_eval.md#const-context)**: transmuting or otherwise reinterpreting a pointer (reference, raw pointer, or function pointer) into some allocated object as a non-pointer type (such as integers). @@ -70,11 +105,15 @@ undefined behavior, it is *unsound*. ### Pointed-to bytes +r[undefined.pointed-to] The span of bytes a pointer or reference "points to" is determined by the pointer value and the size of the pointee type (using `size_of_val`). ### Places based on misaligned pointers [based on a misaligned pointer]: #places-based-on-misaligned-pointers +r[undefined.misaligned] + +r[undefined.misaligned.general] A place is said to be "based on a misaligned pointer" if the last `*` projection during place computation was performed on a pointer that was not aligned for its type. (If there is no `*` projection in the place expression, then this is @@ -92,12 +131,20 @@ alignment 1). In other words, the alignment requirement derives from the type of the pointer that was dereferenced, *not* the type of the field that is being accessed. -Note that a place based on a misaligned pointer only leads to undefined behavior -when it is loaded from or stored to. `&raw const`/`&raw mut` on such a place -is allowed. `&`/`&mut` on a place requires the alignment of the field type (or +r[undefined.misaligned.load-store] +Note that a place based on a misaligned pointer only leads to Undefined Behavior +when it is loaded from or stored to. + +r[undefined.misaligned.raw] +`&raw const`/`&raw mut` on such a place is allowed. + +r[undefined.misaligned.reference] +`&`/`&mut` on a place requires the alignment of the field type (or else the program would be "producing an invalid value"), which generally is a -less restrictive requirement than being based on an aligned pointer. Taking a -reference will lead to a compiler error in cases where the field type might be +less restrictive requirement than being based on an aligned pointer. + +r[undefined.misaligned.packed] +Taking a reference will lead to a compiler error in cases where the field type might be more aligned than the type that contains it, i.e., `repr(packed)`. This means that being based on an aligned pointer is always sufficient to ensure that the new reference is aligned, but it is not always necessary. @@ -105,43 +152,75 @@ new reference is aligned, but it is not always necessary. ### Dangling pointers [dangling]: #dangling-pointers +r[undefined.dangling] + +r[undefined.dangling.general] A reference/pointer is "dangling" if not all of the bytes it [points to] are part of the same live allocation (so in particular they all have to be part of *some* allocation). +r[undefined.dangling.zero-size] If the size is 0, then the pointer is trivially never "dangling" (even if it is a null pointer). +r[undefined.dangling.dynamic-size] Note that dynamically sized types (such as slices and strings) point to their -entire range, so it is important that the length metadata is never too large. In -particular, the dynamic size of a Rust value (as determined by `size_of_val`) +entire range, so it is important that the length metadata is never too large. + +r[undefined.dangling.alloc-limit] +In particular, the dynamic size of a Rust value (as determined by `size_of_val`) must never exceed `isize::MAX`, since it is impossible for a single allocation to be larger than `isize::MAX`. ### Invalid values [invalid-values]: #invalid-values +r[undefined.validity] + +r[undefined.validity.general] The Rust compiler assumes that all values produced during program execution are "valid", and producing an invalid value is hence immediate UB. Whether a value is valid depends on the type: + +r[undefined.validity.bool] * A [`bool`] value must be `false` (`0`) or `true` (`1`). + +r[undefined.validity.fn-pointer] * A `fn` pointer value must be non-null. + +r[undefined.validity.char] * A `char` value must not be a surrogate (i.e., must not be in the range `0xD800..=0xDFFF`) and must be equal to or less than `char::MAX`. + +r[undefined.validity.never] * A `!` value must never exist. + +r[undefined.validity.scalar] * An integer (`i*`/`u*`), floating point value (`f*`), or raw pointer must be initialized, i.e., must not be obtained from [uninitialized memory][undef]. + +r[undefined.validity.str] * A `str` value is treated like `[u8]`, i.e. it must be initialized. + +r[undefined.validity.enum] * An `enum` must have a valid discriminant, and all fields of the variant indicated by that discriminant must be valid at their respective type. + +r[undefined.validity.struct] * A `struct`, tuple, and array requires all fields/elements to be valid at their respective type. + +r[undefined.validity.union] * For a `union`, the exact validity requirements are not decided yet. Obviously, all values that can be created entirely in safe code are valid. If the union has a zero-sized field, then every possible value is valid. Further details are [still being debated](https://github.com/rust-lang/unsafe-code-guidelines/issues/438). + +r[undefined.validity.reference-box] * A reference or [`Box`] must be aligned, it cannot be [dangling], and it must point to a valid value (in case of dynamically sized types, using the actual dynamic type of the pointee as determined by the metadata). Note that the last point (about pointing to a valid value) remains a subject of some debate. + +r[undefined.validity.wide] * The metadata of a wide reference, [`Box`], or raw pointer must match the type of the unsized tail: * `dyn Trait` metadata must be a pointer to a compiler-generated vtable for `Trait`. @@ -149,18 +228,20 @@ Whether a value is valid depends on the type: * Slice (`[T]`) metadata must be a valid `usize`. Furthermore, for wide references and [`Box`], slice metadata is invalid if it makes the total size of the pointed-to value bigger than `isize::MAX`. + +r[undefined.validity.valid-range] * If a type has a custom range of a valid values, then a valid value must be in that range. In the standard library, this affects [`NonNull`] and [`NonZero`]. > **Note**: `rustc` achieves this with the unstable > `rustc_layout_scalar_valid_range_*` attributes. +r[undefined.validity.undef] **Note:** Uninitialized memory is also implicitly invalid for any type that has a restricted set of valid values. In other words, the only cases in which reading uninitialized memory is permitted are inside `union`s and in "padding" (the gaps between the fields of a type). - [`bool`]: types/boolean.md [`const`]: items/constant-items.md [noalias]: http://llvm.org/docs/LangRef.html#noalias @@ -179,3 +260,4 @@ reading uninitialized memory is permitted are inside `union`s and in "padding" [project-tuple]: expressions/tuple-expr.md#tuple-indexing-expressions [project-slice]: expressions/array-expr.md#array-and-slice-indexing-expressions [const-promoted]: destructors.md#constant-promotion +[lifetime-extended]: destructors.md#temporary-lifetime-extension diff --git a/src/comments.md b/src/comments.md index d395767e2..502bf89a8 100644 --- a/src/comments.md +++ b/src/comments.md @@ -1,5 +1,7 @@ # Comments +r[comments.syntax] + > **Lexer**\ > LINE_COMMENT :\ >       `//` (~\[`/` `!` `\n`] | `//`) ~`\n`\*\ @@ -34,26 +36,40 @@ ## Non-doc comments +r[comments.normal] + Comments follow the general C++ style of line (`//`) and block (`/* ... */`) comment forms. Nested block comments are supported. +r[comments.normal.tokenization] Non-doc comments are interpreted as a form of whitespace. ## Doc comments +r[comments.doc] + +r[comments.doc.syntax] Line doc comments beginning with exactly _three_ slashes (`///`), and block doc comments (`/** ... */`), both outer doc comments, are interpreted as a -special syntax for [`doc` attributes]. That is, they are equivalent to writing +special syntax for [`doc` attributes]. + +r[comments.doc.attributes] +That is, they are equivalent to writing `#[doc="..."]` around the body of the comment, i.e., `/// Foo` turns into `#[doc="Foo"]` and `/** Bar */` turns into `#[doc="Bar"]`. They must therefore appear before something that accepts an outer attribute. +r[comments.doc.inner-syntax] Line comments beginning with `//!` and block comments `/*! ... */` are doc comments that apply to the parent of the comment, rather than the item -that follows. That is, they are equivalent to writing `#![doc="..."]` around +that follows. + +r[comments.doc.inner-attributes] +That is, they are equivalent to writing `#![doc="..."]` around the body of the comment. `//!` comments are usually used to document modules that occupy a source file. +r[comments.doc.bare-crs] The character `U+000D` (CR) is not allowed in doc comments. > **Note**: It is conventional for doc comments to contain Markdown, as expected by diff --git a/src/conditional-compilation.md b/src/conditional-compilation.md index f73f8354a..5255bb8d8 100644 --- a/src/conditional-compilation.md +++ b/src/conditional-compilation.md @@ -1,5 +1,8 @@ # Conditional compilation +r[cfg] + +r[cfg.syntax] > **Syntax**\ > _ConfigurationPredicate_ :\ >       _ConfigurationOption_\ @@ -22,34 +25,65 @@ > _ConfigurationPredicateList_\ >    _ConfigurationPredicate_ (`,` _ConfigurationPredicate_)\* `,`? -*Conditionally compiled source code* is source code that is compiled only under certain conditions. Source code can be made conditionally compiled using the [`cfg`] and [`cfg_attr`] [attributes] and the built-in [`cfg` macro]. Whether to compile can depend on the target architecture of the compiled crate, arbitrary values passed to the compiler, and other things further described below. +r[cfg.general] +*Conditionally compiled source code* is source code that is compiled only under certain conditions. + +r[cfg.attributes-macro] +Source code can be made conditionally compiled using the [`cfg`] and [`cfg_attr`] [attributes] and the built-in [`cfg` macro]. + +r[cfg.conditional] +Whether to compile can depend on the target architecture of the compiled crate, arbitrary values passed to the compiler, and other things further described below. +r[cfg.predicate] Each form of conditional compilation takes a _configuration predicate_ that evaluates to true or false. The predicate is one of the following: +r[cfg.predicate.option] * A configuration option. The predicate is true if the option is set, and false if it is unset. + +r[cfg.predicate.all] * `all()` with a comma-separated list of configuration predicates. It is true if all of the given predicates are true, or if the list is empty. + +r[cfg.predicate.any] * `any()` with a comma-separated list of configuration predicates. It is true if at least one of the given predicates is true. If there are no predicates, it is false. + +r[cfg.predicate.not] * `not()` with a configuration predicate. It is true if its predicate is false and false if its predicate is true. +r[cfg.option-spec] _Configuration options_ are either names or key-value pairs, and are either set or unset. + +r[cfg.option-name] Names are written as a single identifier, such as `unix`. + +r[cfg.option-key-value] Key-value pairs are written as an identifier, `=`, and then a string, such as `target_arch = "x86_64"`. > **Note**: Whitespace around the `=` is ignored, so `foo="bar"` and `foo = "bar"` are equivalent. +r[cfg.option-key-uniqueness] Keys do not need to be unique. For example, both `feature = "std"` and `feature = "serde"` can be set at the same time. ## Set Configuration Options +r[cfg.options.set] + +r[cfg.options.general] Which configuration options are set is determined statically during the -compilation of the crate. Some options are _compiler-set_ based on data -about the compilation. Other options are _arbitrarily-set_ based on input -passed to the compiler outside of the code. It is not possible to set a +compilation of the crate. + +r[cfg.options.target] +Some options are _compiler-set_ based on data about the compilation. + +r[cfg.options.other] +Other options are _arbitrarily-set_ based on input passed to the compiler outside of the code. + +r[cfg.options.crate] +It is not possible to set a configuration option from within the source code of the crate being compiled. > **Note**: For `rustc`, arbitrary-set configuration options are set using the -> [`--cfg`] flag. +> [`--cfg`] flag. Configuration values for a specified target can be displayed with `rustc --print cfg --target $TARGET`. > **Note**: Configuration options with the key `feature` are a convention used > by [Cargo][cargo-feature] for specifying compile-time options and optional @@ -60,10 +94,14 @@ configuration option from within the source code of the crate being compiled. ### `target_arch` +r[cfg.target_arch] + +r[cfg.target_arch.gen] Key-value option set once with the target's CPU architecture. The value is similar to the first element of the platform's target triple, but not identical. +r[cfg.target_arch.values] Example values: * `"x86"` @@ -76,9 +114,13 @@ Example values: ### `target_feature` +r[cfg.target_feature] + +r[cfg.target_feature.general] Key-value option set for each platform feature available for the current compilation target. +r[cfg.target_feature.values] Example values: * `"avx"` @@ -90,14 +132,21 @@ Example values: * `"sse4.1"` See the [`target_feature` attribute] for more details on the available -features. An additional feature of `crt-static` is available to the +features. + +r[cfg.target_feature.crt_static] +An additional feature of `crt-static` is available to the `target_feature` option to indicate that a [static C runtime] is available. ### `target_os` +r[cfg.target_os] + +r[cfg.target_os.general] Key-value option set once with the target's operating system. This value is similar to the second and third element of the platform's target triple. +r[cfg.target_os.values] Example values: * `"windows"` @@ -113,10 +162,14 @@ Example values: ### `target_family` +r[cfg.target_family] + +r[cfg.target_family.general] Key-value option providing a more generic description of a target, such as the family of the operating systems or architectures that the target generally falls into. Any number of `target_family` key-value pairs can be set. +r[cfg.target_family.values] Example values: * `"unix"` @@ -126,11 +179,17 @@ Example values: ### `unix` and `windows` -`unix` is set if `target_family = "unix"` is set and `windows` is set if -`target_family = "windows"` is set. +r[cfg.target_family.unix] +`unix` is set if `target_family = "unix"` is set. + +r[cfg.target_family.windows] +`windows` is set if `target_family = "windows"` is set. ### `target_env` +r[cfg.target_env] + +r[cfg.target_env.general] Key-value option set with further disambiguating information about the target platform with information about the ABI or `libc` used. For historical reasons, this value is only defined as not the empty-string when actually needed for @@ -139,6 +198,7 @@ empty. This value is similar to the fourth element of the platform's target triple. One difference is that embedded ABIs such as `gnueabihf` will simply define `target_env` as `"gnu"`. +r[cfg.target_env.values] Example values: * `""` @@ -149,12 +209,18 @@ Example values: ### `target_abi` +r[cfg.target_abi] + +r[cfg.target_abi.general] Key-value option set to further disambiguate the `target_env` with information -about the target ABI. For historical reasons, -this value is only defined as not the empty-string when actually needed for -disambiguation. Thus, for example, on many GNU platforms, this value will be +about the target ABI. + +r[cfg.target_abi.disambiguation] +For historical reasons, this value is only defined as not the empty-string when actually + needed for disambiguation. Thus, for example, on many GNU platforms, this value will be empty. +r[cfg.target_abi.values] Example values: * `""` @@ -166,13 +232,18 @@ Example values: ### `target_endian` +r[cfg.target_endian] Key-value option set once with either a value of "little" or "big" depending on the endianness of the target's CPU. ### `target_pointer_width` +r[cfg.target_pointer_width] + +r[cfg.target_pointer_width.general] Key-value option set once with the target's pointer width in bits. +r[cfg.target_pointer_width.values] Example values: * `"16"` @@ -181,8 +252,12 @@ Example values: ### `target_vendor` +r[cfg.target_vendor] + +r[cfg.target_vendor.general] Key-value option set once with the vendor of the target. +r[cfg.target_vendor.values] Example values: * `"apple"` @@ -192,12 +267,18 @@ Example values: ### `target_has_atomic` +r[cfg.target_has_atomic] + +r[cfg.target_has_atomic.general] Key-value option set for each bit width that the target supports atomic loads, stores, and compare-and-swap operations. +r[cfg.target_has_atomic.stdlib] When this cfg is present, all of the stable [`core::sync::atomic`] APIs are available for the relevant atomic width. + +r[cfg.target_has_atomic.values] Possible values: * `"8"` @@ -209,11 +290,15 @@ Possible values: ### `test` +r[cfg.test] + Enabled when compiling the test harness. Done with `rustc` by using the [`--test`] flag. See [Testing] for more on testing support. ### `debug_assertions` +r[cfg.debug_assertions] + Enabled by default when compiling without optimizations. This can be used to enable extra debugging code in development but not in production. For example, it controls the behavior of the standard library's @@ -221,13 +306,19 @@ production. For example, it controls the behavior of the standard library's ### `proc_macro` +r[cfg.proc_macro] + Set when the crate being compiled is being compiled with the `proc_macro` [crate type]. ### `panic` +r[cfg.panic] + +r[cfg.panic.general] Key-value option set depending on the panic strategy. Note that more values may be added in the future. +r[cfg.panic.values] Example values: * `"abort"` @@ -237,20 +328,27 @@ Example values: ### The `cfg` attribute +r[cfg.attr] + +r[cfg.attr.syntax] > **Syntax**\ > _CfgAttrAttribute_ :\ >    `cfg` `(` _ConfigurationPredicate_ `)` +r[cfg.attr.general] The `cfg` [attribute] conditionally includes the thing it is attached to based on a configuration predicate. +r[cfg.attr.syntax-explanation] It is written as `cfg`, `(`, a configuration predicate, and finally `)`. +r[cfg.attr.effect] If the predicate is true, the thing is rewritten to not have the `cfg` attribute on it. If the predicate is false, the thing is removed from the source code. +r[cfg.attr.crate-level-attrs] When a crate-level `cfg` has a false predicate, the behavior is slightly different: any crate attributes preceding the `cfg` are kept, and any crate attributes following the `cfg` are removed. This allows `#![no_std]` and @@ -293,10 +391,14 @@ fn when_unwinding() { ``` +r[cfg.attr.restriction] The `cfg` attribute is allowed anywhere attributes are allowed. ### The `cfg_attr` attribute +r[cfg.cfg_attr] + +r[cfg.cfg_attr.syntax] > **Syntax**\ > _CfgAttrAttribute_ :\ >    `cfg_attr` `(` _ConfigurationPredicate_ `,` _CfgAttrs_? `)` @@ -304,9 +406,11 @@ The `cfg` attribute is allowed anywhere attributes are allowed. > _CfgAttrs_ :\ >    [_Attr_] (`,` [_Attr_])\* `,`? +r[cfg.cfg_attr.general] The `cfg_attr` [attribute] conditionally includes [attributes] based on a configuration predicate. +r[cfg.cfg_attr.behaviour] When the configuration predicate is true, this attribute expands out to the attributes listed after the predicate. For example, the following module will either be found at `linux.rs` or `windows.rs` based on the target. @@ -318,6 +422,7 @@ either be found at `linux.rs` or `windows.rs` based on the target. mod os; ``` +r[cfg.cfg_attr.attribute-list] Zero, one, or more attributes may be listed. Multiple attributes will each be expanded into separate attributes. For example: @@ -337,10 +442,14 @@ fn bewitched() {} > is valid. This example would be equivalent to > `#[cfg_attr(all(target_os = "linux", feature ="multithreaded"), some_other_attribute)]`. +r[cfg.cfg_attr.restriction] The `cfg_attr` attribute is allowed anywhere attributes are allowed. +The [`crate_type`] and [`crate_name`] attributes cannot be used with `cfg_attr`. + ### The `cfg` macro +r[cfg.macro] The built-in `cfg` macro takes in a single configuration predicate and evaluates to the `true` literal when the predicate is true and the `false` literal when it is false. @@ -369,6 +478,8 @@ println!("I'm running on a {} machine!", machine_kind); [`cfg`]: #the-cfg-attribute [`cfg` macro]: #the-cfg-macro [`cfg_attr`]: #the-cfg_attr-attribute +[`crate_name`]: crates-and-source-files.md#the-crate_name-attribute +[`crate_type`]: linkage.md [`target_feature` attribute]: attributes/codegen.md#the-target_feature-attribute [attribute]: attributes.md [attributes]: attributes.md diff --git a/src/const_eval.md b/src/const_eval.md index eca2530ea..66efed2a8 100644 --- a/src/const_eval.md +++ b/src/const_eval.md @@ -1,68 +1,137 @@ # Constant evaluation +r[const-eval] +r[const-eval.general] Constant evaluation is the process of computing the result of [expressions] during compilation. Only a subset of all expressions can be evaluated at compile-time. ## Constant expressions +r[const-eval.const-expr] + +r[const-eval.const-expr.general] Certain forms of expressions, called constant expressions, can be evaluated at -compile time. In [const contexts](#const-context), these are the only allowed -expressions, and are always evaluated at compile time. In other places, such as -[let statements], constant expressions *may* -be, but are not guaranteed to be, evaluated at compile time. Behaviors such as -out of bounds [array indexing] or [overflow] are compiler errors if the value +compile time. + +r[const-eval.const-expr.const-context] +In [const contexts](#const-context), these are the only allowed +expressions, and are always evaluated at compile time. + +r[const-eval.const-expr.runtime-context] +In other places, such as [let statements], constant expressions *may* be, but are not guaranteed to be, evaluated at compile time. + +r[const-eval.const-expr.error] +Behaviors such as out of bounds [array indexing] or [overflow] are compiler errors if the value must be evaluated at compile time (i.e. in const contexts). Otherwise, these behaviors are warnings, but will likely panic at run-time. +r[const-eval.const-expr.list] The following expressions are constant expressions, so long as any operands are also constant expressions and do not cause any [`Drop::drop`][destructors] calls to be run. +r[const-eval.const-expr.literal] * [Literals]. + +r[const-eval.const-expr.parameter] * [Const parameters]. + +r[const-eval.const-expr.path-item] * [Paths] to [functions] and [constants]. Recursively defining constants is not allowed. -* Paths to [statics]. These are only allowed within the initializer of a static. + +r[const-eval.const-expr.path-static] +* Paths to [statics] with these restrictions: + * Writes to `static` items are not allowed in any constant evaluation context. + * Reads from `extern` statics are not allowed in any constant evaluation context. + * If the evaluation is *not* carried out in an initializer of a `static` item, then reads from any mutable `static` are not allowed. A mutable `static` is a `static mut` item, or a `static` item with an interior-mutable type. + +These requirements are checked only when the constant is evaluated. In other words, having such accesses syntactically occur in const contexts is allowed as long as they never get executed. + +r[const-eval.const-expr.tuple] * [Tuple expressions]. + +r[const-eval.const-expr.array] * [Array expressions]. + +r[const-eval.const-expr.constructor] * [Struct] expressions. + +r[const-eval.const-expr.block] * [Block expressions], including `unsafe` and `const` blocks. * [let statements] and thus irrefutable [patterns], including mutable bindings * [assignment expressions] * [compound assignment expressions] * [expression statements] + +r[const-eval.const-expr.field] * [Field] expressions. + +r[const-eval.const-expr.index] * Index expressions, [array indexing] or [slice] with a `usize`. + +r[const-eval.const-expr.range] * [Range expressions]. + +r[const-eval.const-expr.closure] * [Closure expressions] which don't capture variables from the environment. + +r[const-eval.const-expr.builtin-arith-logic] * Built-in [negation], [arithmetic], [logical], [comparison] or [lazy boolean] operators used on integer and floating point types, `bool`, and `char`. + +r[const-eval.const-expr.borrows] * All forms of [borrow]s, including raw borrows, with one limitation: mutable borrows and shared borrows to values with interior mutability are only allowed to refer to *transient* places. A place is *transient* if its lifetime is strictly contained inside the current [const context]. -* The [dereference operator]. + +r[const-eval.const-expr.deref] +* The [dereference operator] except for raw pointers. + +r[const-eval.const-expr.group] + * [Grouped] expressions. + +r[const-eval.const-expr.cast] * [Cast] expressions, except * pointer to address casts and * function pointer to address casts. + +r[const-eval.const-expr.const-fn] * Calls of [const functions] and const methods. + +r[const-eval.const-expr.loop] * [loop], [while] and [`while let`] expressions. + +r[const-eval.const-expr.if-match] * [if], [`if let`] and [match] expressions. ## Const context [const context]: #const-context +r[const-eval.const-context] + +r[const-eval.const-context.general] A _const context_ is one of the following: +r[const-eval.const-context.array-length] * [Array type length expressions] + +r[const-eval.const-context.repeat-length] * [Array repeat length expressions][array expressions] + +r[const-eval.const-context.init] * The initializer of * [constants] * [statics] * [enum discriminants] + +r[const-eval.const-context.generic] * A [const generic argument] + +r[const-eval.const-context.block] * A [const block] Const contexts that are used as parts of types (array type and repeat length @@ -73,10 +142,17 @@ generics. ## Const Functions -A _const fn_ is a function that one is permitted to call from a const context. Declaring a function +r[const-eval.const-fn] + +r[const-eval.const-fn.general] +A _const fn_ is a function that one is permitted to call from a const context. + +r[const-eval.const-fn.usage] +Declaring a function `const` has no effect on any existing uses, it only restricts the types that arguments and the return type may use, and restricts the function body to constant expressions. +r[const-eval.const-fn.const-context] When called from a const context, the function is interpreted by the compiler at compile time. The interpretation happens in the environment of the compilation target and not the host. So `usize` is @@ -106,6 +182,7 @@ of whether you are building on a `64` bit or a `32` bit system. [enum discriminants]: items/enumerations.md#discriminants [expression statements]: statements.md#expression-statements [expressions]: expressions.md +[`extern` statics]: items/external-blocks.md#statics [field]: expressions/field-expr.md [functions]: items/functions.md [grouped]: expressions/grouped-expr.md diff --git a/src/crates-and-source-files.md b/src/crates-and-source-files.md index 426ee26f1..734ee3e98 100644 --- a/src/crates-and-source-files.md +++ b/src/crates-and-source-files.md @@ -1,5 +1,8 @@ # Crates and source files +r[crate] + +r[crate.syntax] > **Syntax**\ > _Crate_ :\ >    [_InnerAttribute_]\*\ @@ -10,17 +13,20 @@ > compiler, and the language has always been designed to be compiled. For these > reasons, this section assumes a compiler. +r[crate.compile-time] Rust's semantics obey a *phase distinction* between compile-time and run-time.[^phase-distinction] Semantic rules that have a *static interpretation* govern the success or failure of compilation, while semantic rules that have a *dynamic interpretation* govern the behavior of the program at run-time. +r[crate.unit] The compilation model centers on artifacts called _crates_. Each compilation processes a single crate in source form, and if successful, produces a single crate in binary form: either an executable or some sort of library.[^cratesourcefile] +r[crate.module] A _crate_ is a unit of compilation and linking, as well as versioning, distribution, and runtime loading. A crate contains a _tree_ of nested [module] scopes. The top level of this tree is a module that is @@ -28,22 +34,31 @@ anonymous (from the point of view of paths within the module) and any item within a crate has a canonical [module path] denoting its location within the crate's module tree. +r[crate.input-source] The Rust compiler is always invoked with a single source file as input, and always produces a single output crate. The processing of that source file may result in other source files being loaded as modules. Source files have the extension `.rs`. +r[crate.module-def] A Rust source file describes a module, the name and location of which — in the module tree of the current crate — are defined from outside the source file: either by an explicit [_Module_][module] item in a referencing -source file, or by the name of the crate itself. Every source file is a +source file, or by the name of the crate itself. + +r[crate.inline-module] +Every source file is a module, but not every module needs its own source file: [module definitions][module] can be nested within one file. +r[crate.items] Each source file contains a sequence of zero or more [_Item_] definitions, and may optionally begin with any number of [attributes] that apply to the containing module, most of which influence the behavior of -the compiler. The anonymous crate module can have additional attributes that +the compiler. + +r[crate.attributes] +The anonymous crate module can have additional attributes that apply to the crate as a whole. > **Note**: The file's contents may be preceded by a [shebang]. @@ -62,8 +77,13 @@ apply to the crate as a whole. ## Main Functions -A crate that contains a `main` [function] can be compiled to an executable. If a -`main` function is present, it must take no arguments, must not declare any +r[crate.main] + +r[crate.main.general] +A crate that contains a `main` [function] can be compiled to an executable. + +r[crate.main.restriction] +If a `main` function is present, it must take no arguments, must not declare any [trait or lifetime bounds], must not have any [where clauses], and its return type must implement the [`Termination`] trait. @@ -81,6 +101,7 @@ fn main() -> impl std::process::Termination { } ``` +r[crate.main.import] The `main` function may be an import, e.g. from an external crate or from the current one. ```rust @@ -105,12 +126,17 @@ use foo::bar as main; ### The `no_main` attribute +r[crate.no_main] + The *`no_main` [attribute]* may be applied at the crate level to disable emitting the `main` symbol for an executable binary. This is useful when some other object being linked to defines `main`. ## The `crate_name` attribute +r[crate.crate_name] + +r[crate.crate_name.general] The *`crate_name` [attribute]* may be applied at the crate level to specify the name of the crate with the [_MetaNameValueStr_] syntax. @@ -118,6 +144,7 @@ name of the crate with the [_MetaNameValueStr_] syntax. #![crate_name = "mycrate"] ``` +r[crate.crate_name.restriction] The crate name must not be empty, and must only contain [Unicode alphanumeric] or `_` (U+005F) characters. diff --git a/src/destructors.md b/src/destructors.md index a536221f4..247f44f71 100644 --- a/src/destructors.md +++ b/src/destructors.md @@ -210,19 +210,20 @@ smallest scope that contains the expression and is one of the following: * The condition expression of an `if` or `while` expression, or a `match` guard. * The body expression for a match arm. -* The second operand of a [lazy boolean expression]. +* Each operand of a [lazy boolean expression]. +* The pattern-matching condition and consequent body of [`if let`] ([destructors.scope.temporary.edition2024]). +* The entirety of the tail expression of a block ([destructors.scope.temporary.edition2024]). > **Notes**: > -> Temporaries that are created in the final expression of a function -> body are dropped *after* any named variables bound in the function body. -> Their drop scope is the entire function, as there is no smaller enclosing temporary scope. -> > The [scrutinee] of a `match` expression is not a temporary scope, so > temporaries in the scrutinee can be dropped after the `match` expression. For > example, the temporary for `1` in `match 1 { ref mut z => z };` lives until > the end of the statement. +r[destructors.scope.temporary.edition2024] +> **Edition differences**: The 2024 edition added two new temporary scope narrowing rules: `if let` temporaries are dropped before the `else` block, and temporaries of tail expressions of blocks are dropped immediately after the tail expression is evaluated. + Some examples: ```rust @@ -242,17 +243,25 @@ if PrintOnDrop("If condition").0 == "If condition" { unreachable!() }; -// Dropped at the end of the statement +if let "if let scrutinee" = PrintOnDrop("if let scrutinee").0 { + PrintOnDrop("if let consequent").0 + // `if let consequent` dropped here +} +// `if let scrutinee` is dropped here +else { + PrintOnDrop("if let else").0 + // `if let else` dropped here +}; + +// Dropped before the first || (PrintOnDrop("first operand").0 == "" -// Dropped at the ) +// Dropped before the ) || PrintOnDrop("second operand").0 == "") -// Dropped at the end of the expression +// Dropped before the ; || PrintOnDrop("third operand").0 == ""; -// Dropped at the end of the function, after local variables. -// Changing this to a statement containing a return expression would make the -// temporary be dropped before the local variables. Binding to a variable -// which is then returned would also make the temporary be dropped first. +// Scrutinee is dropped at the end of the function, before local variables +// (because this is the tail expression of the function body block). match PrintOnDrop("Matched value in final expression") { // Dropped once the condition has been evaluated _ if PrintOnDrop("guard condition").0 == "" => (), @@ -325,6 +334,18 @@ let x = &mut 0; println!("{}", x); ``` +r[destructors.scope.lifetime-extension.static] +Lifetime extension also applies to `static` and `const` items, where it +makes temporaries live until the end of the program. For example: + +```rust +const C: &Vec = &Vec::new(); +// Usually this would be a dangling reference as the `Vec` would only +// exist inside the initializer expression of `C`, but instead the +// borrow gets lifetime-extended so it effectively has `'static` lifetime. +println!("{:?}", C); +``` + r[destructors.scope.lifetime-extension.sub-expressions] If a [borrow][borrow expression], [dereference][dereference expression], [field][field expression], or [tuple indexing expression] has an extended diff --git a/src/expressions/operator-expr.md b/src/expressions/operator-expr.md index a9e91f0a6..530d41d10 100644 --- a/src/expressions/operator-expr.md +++ b/src/expressions/operator-expr.md @@ -398,25 +398,84 @@ reference types and `mut` or `const` in pointer types. * Casting between two integers of the same size (e.g. i32 -> u32) is a no-op (Rust uses 2's complement for negative values of fixed integers) + + ```rust + assert_eq!(42i8 as u8, 42u8); + assert_eq!(-1i8 as u8, 255u8); + assert_eq!(255u8 as i8, -1i8); + assert_eq!(-1i16 as u16, 65535u16); + ``` + * Casting from a larger integer to a smaller integer (e.g. u32 -> u8) will truncate + + ```rust + assert_eq!(42u16 as u8, 42u8); + assert_eq!(1234u16 as u8, 210u8); + assert_eq!(0xabcdu16 as u8, 0xcdu8); + + assert_eq!(-42i16 as i8, -42i8); + assert_eq!(1234u16 as i8, -46i8); + assert_eq!(0xabcdi32 as i8, -51i8); + ``` + * Casting from a smaller integer to a larger integer (e.g. u8 -> u32) will * zero-extend if the source is unsigned * sign-extend if the source is signed + + ```rust + assert_eq!(42i8 as i16, 42i16); + assert_eq!(-17i8 as i16, -17i16); + assert_eq!(0b1000_1010u8 as u16, 0b0000_0000_1000_1010u16, "Zero-extend"); + assert_eq!(0b0000_1010i8 as i16, 0b0000_0000_0000_1010i16, "Sign-extend 0"); + assert_eq!(0b1000_1010u8 as i8 as i16, 0b1111_1111_1000_1010u16 as i16, "Sign-extend 1"); + ``` + * Casting from a float to an integer will round the float towards zero * `NaN` will return `0` * Values larger than the maximum integer value, including `INFINITY`, will saturate to the maximum value of the integer type. * Values smaller than the minimum integer value, including `NEG_INFINITY`, will saturate to the minimum value of the integer type. + + ```rust + assert_eq!(42.9f32 as i32, 42); + assert_eq!(-42.9f32 as i32, -42); + assert_eq!(42_000_000f32 as i32, 42_000_000); + assert_eq!(std::f32::NAN as i32, 0); + assert_eq!(1_000_000_000_000_000f32 as i32, 0x7fffffffi32); + assert_eq!(std::f32::NEG_INFINITY as i32, -0x80000000i32); + ``` + * Casting from an integer to float will produce the closest possible float \* * if necessary, rounding is according to `roundTiesToEven` mode \*\*\* * on overflow, infinity (of the same sign as the input) is produced * note: with the current set of numeric types, overflow can only happen on `u128 as f32` for values greater or equal to `f32::MAX + (0.5 ULP)` + + ```rust + assert_eq!(1337i32 as f32, 1337f32); + assert_eq!(123_456_789i32 as f32, 123_456_790f32, "Rounded"); + assert_eq!(0xffffffff_ffffffff_ffffffff_ffffffff_u128 as f32, std::f32::INFINITY); + ``` + * Casting from an f32 to an f64 is perfect and lossless + + ```rust + assert_eq!(1_234.5f32 as f64, 1_234.5f64); + assert_eq!(std::f32::INFINITY as f64, std::f64::INFINITY); + assert!((std::f32::NAN as f64).is_nan()); + ``` + * Casting from an f64 to an f32 will produce the closest possible f32 \*\* * if necessary, rounding is according to `roundTiesToEven` mode \*\*\* * on overflow, infinity (of the same sign as the input) is produced + ```rust + assert_eq!(1_234.5f64 as f32, 1_234.5f32); + assert_eq!(1_234_567_891.123f64 as f32, 1_234_567_890f32, "Rounded"); + assert_eq!(std::f64::INFINITY as f32, std::f32::INFINITY); + assert!((std::f64::NAN as f32).is_nan()); + ``` + \* if integer-to-float casts with this rounding mode and overflow behavior are not supported natively by the hardware, these casts will likely be slower than expected. @@ -437,15 +496,34 @@ Casting is limited to the following kinds of enumerations: * [Unit-only enums] * [Field-less enums] without [explicit discriminants], or where only unit-variants have explicit discriminants +```rust +enum Enum { A, B, C } +assert_eq!(Enum::A as i32, 0); +assert_eq!(Enum::B as i32, 1); +assert_eq!(Enum::C as i32, 2); +``` + #### Primitive to integer cast * `false` casts to `0`, `true` casts to `1` * `char` casts to the value of the code point, then uses a numeric cast if needed. +```rust +assert_eq!(false as i32, 0); +assert_eq!(true as i32, 1); +assert_eq!('A' as i32, 65); +assert_eq!('Ö' as i32, 214); +``` + #### `u8` to `char` cast Casts to the `char` with the corresponding code point. +```rust +assert_eq!(65u8 as char, 'A'); +assert_eq!(214u8 as char, 'Ö'); +``` + #### Pointer to address cast Casting from a raw pointer to an integer produces the machine address of the referenced memory. diff --git a/src/glossary.md b/src/glossary.md index b5074767c..dabd65d7c 100644 --- a/src/glossary.md +++ b/src/glossary.md @@ -173,12 +173,14 @@ the hierarchy has its own collection of named entities. ### Nominal types Types that can be referred to by a path directly. Specifically [enums], -[structs], [unions], and [trait objects]. +[structs], [unions], and [trait object types]. -### Object safe traits +### Dyn-compatible traits -[Traits] that can be used as [trait objects]. Only traits that follow specific -[rules][object safety] are object safe. +[Traits] that can be used in [trait object types] (`dyn Trait`). +Only traits that follow specific [rules][dyn compatibility] are *dyn compatible*. + +These were formerly known as *object safe* traits. ### Path @@ -293,6 +295,7 @@ example of an uninhabited type is the [never type] `!`, or an enum with no varia [attributes]: attributes.md [*entity*]: names.md [crate]: crates-and-source-files.md +[dyn compatibility]: items/traits.md#dyn-compatibility [enums]: items/enumerations.md [fields]: expressions/field-expr.md [free item]: #free-item @@ -315,12 +318,11 @@ example of an uninhabited type is the [never type] `!`, or an enum with no varia [*name*]: names.md [*namespace*]: names/namespaces.md [never type]: types/never.md -[object safety]: items/traits.md#object-safety [*path*]: paths.md [Paths]: paths.md [*scope*]: names/scopes.md [structs]: items/structs.md -[trait objects]: types/trait-object.md +[trait object types]: types/trait-object.md [traits]: items/traits.md [turbofish test]: https://github.com/rust-lang/rust/blob/1.58.0/src/test/ui/parser/bastion-of-the-turbofish.rs [types of crates]: linkage.md @@ -329,3 +331,17 @@ example of an uninhabited type is the [never type] `!`, or an enum with no varia [unions]: items/unions.md [variable bindings]: patterns.md [visibility rules]: visibility-and-privacy.md + + diff --git a/src/identifiers.md b/src/identifiers.md index c760f6826..c510321bb 100644 --- a/src/identifiers.md +++ b/src/identifiers.md @@ -1,5 +1,8 @@ # Identifiers +r[ident] + +r[ident.syntax] > **Lexer:**\ > IDENTIFIER_OR_KEYWORD :\ >       XID_Start XID_Continue\*\ @@ -11,9 +14,12 @@ > > IDENTIFIER :\ > NON_KEYWORD_IDENTIFIER | RAW_IDENTIFIER +> +> RESERVED_RAW_IDENTIFIER : `r#_` -Identifiers follow the specification in [Unicode Standard Annex #31][UAX31] for Unicode version 15.0, with the additions described below. Some examples of identifiers: +r[ident.unicode] +Identifiers follow the specification in [Unicode Standard Annex #31][UAX31] for Unicode version 16.0, with the additions described below. Some examples of identifiers: * `foo` * `_identifier` @@ -21,6 +27,7 @@ Identifiers follow the specification in [Unicode Standard Annex #31][UAX31] for * `Москва` * `東京` +r[ident.profile] The profile used from UAX #31 is: * Start := [`XID_Start`], plus the underscore character (U+005F) @@ -31,10 +38,13 @@ with the additional constraint that a single underscore character is not an iden > **Note**: Identifiers starting with an underscore are typically used to indicate an identifier that is intentionally unused, and will silence the unused warning in `rustc`. +r[ident.keyword] Identifiers may not be a [strict] or [reserved] keyword without the `r#` prefix described below in [raw identifiers](#raw-identifiers). +r[ident.zero-width-chars] Zero width non-joiner (ZWNJ U+200C) and zero width joiner (ZWJ U+200D) characters are not allowed in identifiers. +r[ident.ascii-limitations] Identifiers are restricted to the ASCII subset of [`XID_Start`] and [`XID_Continue`] in the following situations: * [`extern crate`] declarations @@ -45,17 +55,28 @@ Identifiers are restricted to the ASCII subset of [`XID_Start`] and [`XID_Contin ## Normalization +r[ident.normalization] + Identifiers are normalized using Normalization Form C (NFC) as defined in [Unicode Standard Annex #15][UAX15]. Two identifiers are equal if their NFC forms are equal. [Procedural][proc-macro] and [declarative][mbe] macros receive normalized identifiers in their input. ## Raw identifiers +r[ident.raw] + +r[ident.raw.intro] A raw identifier is like a normal identifier, but prefixed by `r#`. (Note that the `r#` prefix is not included as part of the actual identifier.) + +r[ident.raw.allowed] Unlike a normal identifier, a raw identifier may be any strict or reserved keyword except the ones listed above for `RAW_IDENTIFIER`. +r[ident.raw.reserved] +It is an error to use the RESERVED_RAW_IDENTIFIER token `r#_` in order to avoid confusion with the [_WildcardPattern_]. + +[_WildcardPattern_]: patterns.md#wildcard-pattern [`extern crate`]: items/extern-crates.md [`no_mangle`]: abi.md#the-no_mangle-attribute [`path` attribute]: items/modules.md#the-path-attribute @@ -68,5 +89,5 @@ keyword except the ones listed above for `RAW_IDENTIFIER`. [proc-macro]: procedural-macros.md [reserved]: keywords.md#reserved-keywords [strict]: keywords.md#strict-keywords -[UAX15]: https://www.unicode.org/reports/tr15/tr15-53.html -[UAX31]: https://www.unicode.org/reports/tr31/tr31-37.html +[UAX15]: https://www.unicode.org/reports/tr15/tr15-56.html +[UAX31]: https://www.unicode.org/reports/tr31/tr31-41.html diff --git a/src/inline-assembly.md b/src/inline-assembly.md index 3ad16a600..a6cdb1ed8 100644 --- a/src/inline-assembly.md +++ b/src/inline-assembly.md @@ -13,9 +13,10 @@ r[asm.stable-targets] Support for inline assembly is stable on the following architectures: - x86 and x86-64 - ARM -- AArch64 +- AArch64 and Arm64EC - RISC-V - LoongArch +- s390x The compiler will emit an error if `asm!` is used on an unsupported target. @@ -234,6 +235,9 @@ Here is the list of currently supported register classes: | AArch64 | `vreg` | `v[0-31]` | `w` | | AArch64 | `vreg_low16` | `v[0-15]` | `x` | | AArch64 | `preg` | `p[0-15]`, `ffr` | Only clobbers | +| Arm64EC | `reg` | `x[0-12]`, `x[15-22]`, `x[25-27]`, `x30` | `r` | +| Arm64EC | `vreg` | `v[0-15]` | `w` | +| Arm64EC | `vreg_low16` | `v[0-15]` | `x` | | ARM (ARM/Thumb2) | `reg` | `r[0-12]`, `r14` | `r` | | ARM (Thumb1) | `reg` | `r[0-7]` | `r` | | ARM | `sreg` | `s[0-31]` | `t` | @@ -249,6 +253,11 @@ Here is the list of currently supported register classes: | RISC-V | `vreg` | `v[0-31]` | Only clobbers | | LoongArch | `reg` | `$r1`, `$r[4-20]`, `$r[23,30]` | `r` | | LoongArch | `freg` | `$f[0-31]` | `f` | +| s390x | `reg` | `r[0-10]`, `r[12-14]` | `r` | +| s390x | `reg_addr` | `r[1-10]`, `r[12-14]` | `a` | +| s390x | `freg` | `f[0-15]` | `f` | +| s390x | `vreg` | `v[0-31]` | Only clobbers | +| s390x | `areg` | `a[2-15]` | Only clobbers | > **Notes**: > - On x86 we treat `reg_byte` differently from `reg` because the compiler can allocate `al` and `ah` separately whereas `reg` reserves the whole register. @@ -277,6 +286,8 @@ The availability of supported types for a particular register class may depend o | AArch64 | `reg` | None | `i8`, `i16`, `i32`, `f32`, `i64`, `f64` | | AArch64 | `vreg` | `neon` | `i8`, `i16`, `i32`, `f32`, `i64`, `f64`,
`i8x8`, `i16x4`, `i32x2`, `i64x1`, `f32x2`, `f64x1`,
`i8x16`, `i16x8`, `i32x4`, `i64x2`, `f32x4`, `f64x2` | | AArch64 | `preg` | N/A | Only clobbers | +| Arm64EC | `reg` | None | `i8`, `i16`, `i32`, `f32`, `i64`, `f64` | +| Arm64EC | `vreg` | `neon` | `i8`, `i16`, `i32`, `f32`, `i64`, `f64`,
`i8x8`, `i16x4`, `i32x2`, `i64x1`, `f32x2`, `f64x1`,
`i8x16`, `i16x8`, `i32x4`, `i64x2`, `f32x4`, `f64x2` | | ARM | `reg` | None | `i8`, `i16`, `i32`, `f32` | | ARM | `sreg` | `vfp2` | `i32`, `f32` | | ARM | `dreg` | `vfp2` | `i64`, `f64`, `i8x8`, `i16x4`, `i32x2`, `i64x1`, `f32x2` | @@ -288,6 +299,10 @@ The availability of supported types for a particular register class may depend o | RISC-V | `vreg` | N/A | Only clobbers | | LoongArch64 | `reg` | None | `i8`, `i16`, `i32`, `i64`, `f32`, `f64` | | LoongArch64 | `freg` | None | `f32`, `f64` | +| s390x | `reg`, `reg_addr` | None | `i8`, `i16`, `i32`, `i64` | +| s390x | `freg` | None | `f32`, `f64` | +| s390x | `vreg` | N/A | Only clobbers | +| s390x | `areg` | N/A | Only clobbers | > **Note**: For the purposes of the above table pointers, function pointers and `isize`/`usize` are treated as the equivalent integer type (`i16`/`i32`/`i64` depending on the target). @@ -329,6 +344,12 @@ Here is the list of all supported register aliases: | AArch64 | `sp` | `wsp` | | AArch64 | `xzr` | `wzr` | | AArch64 | `v[0-31]` | `b[0-31]`, `h[0-31]`, `s[0-31]`, `d[0-31]`, `q[0-31]` | +| Arm64EC | `x[0-30]` | `w[0-30]` | +| Arm64EC | `x29` | `fp` | +| Arm64EC | `x30` | `lr` | +| Arm64EC | `sp` | `wsp` | +| Arm64EC | `xzr` | `wzr` | +| Arm64EC | `v[0-15]` | `b[0-15]`, `h[0-15]`, `s[0-15]`, `d[0-15]`, `q[0-15]` | | ARM | `r[0-3]` | `a[1-4]` | | ARM | `r[4-9]` | `v[1-6]` | | ARM | `r9` | `rfp` | @@ -372,13 +393,16 @@ Some registers cannot be used for input or output operands: | Architecture | Unsupported register | Reason | | ------------ | -------------------- | ------ | -| All | `sp` | The stack pointer must be restored to its original value at the end of an asm code block. | -| All | `bp` (x86), `x29` (AArch64), `x8` (RISC-V), `$fp` (LoongArch) | The frame pointer cannot be used as an input or output. | +| All | `sp`, `r15` (s390x) | The stack pointer must be restored to its original value at the end of an asm code block. | +| All | `bp` (x86), `x29` (AArch64 and Arm64EC), `x8` (RISC-V), `$fp` (LoongArch), `r11` (s390x) | The frame pointer cannot be used as an input or output. | | ARM | `r7` or `r11` | On ARM the frame pointer can be either `r7` or `r11` depending on the target. The frame pointer cannot be used as an input or output. | -| All | `si` (x86-32), `bx` (x86-64), `r6` (ARM), `x19` (AArch64), `x9` (RISC-V), `$s8` (LoongArch) | This is used internally by LLVM as a "base pointer" for functions with complex stack frames. | +| All | `si` (x86-32), `bx` (x86-64), `r6` (ARM), `x19` (AArch64 and Arm64EC), `x9` (RISC-V), `$s8` (LoongArch) | This is used internally by LLVM as a "base pointer" for functions with complex stack frames. | | x86 | `ip` | This is the program counter, not a real register. | | AArch64 | `xzr` | This is a constant zero register which can't be modified. | | AArch64 | `x18` | This is an OS-reserved register on some AArch64 targets. | +| Arm64EC | `xzr` | This is a constant zero register which can't be modified. | +| Arm64EC | `x18` | This is an OS-reserved register. | +| Arm64EC | `x13`, `x14`, `x23`, `x24`, `x28`, `v[16-31]`, `p[0-15]`, `ffr` | These are AArch64 registers that are not supported for Arm64EC. | | ARM | `pc` | This is the program counter, not a real register. | | ARM | `r9` | This is an OS-reserved register on some ARM targets. | | RISC-V | `x0` | This is a constant zero register which can't be modified. | @@ -386,6 +410,8 @@ Some registers cannot be used for input or output operands: | LoongArch | `$r0` or `$zero` | This is a constant zero register which can't be modified. | | LoongArch | `$r2` or `$tp` | This is reserved for TLS. | | LoongArch | `$r21` | This is reserved by the ABI. | +| s390x | `c[0-15]` | Reserved by the kernel. | +| s390x | `a[0-1]` | Reserved for system use. | r[asm.register-names.fp-bp-reserved] The frame pointer and base pointer registers are reserved for internal use by LLVM. While `asm!` statements cannot explicitly specify the use of reserved registers, in some cases LLVM will allocate one of these reserved registers for `reg` operands. Assembly code making use of reserved registers should be careful since `reg` operands may use the same registers. @@ -422,16 +448,16 @@ The supported modifiers are a subset of LLVM's (and GCC's) [asm template argumen | x86 | `*mm_reg` | `y` | `ymm0` | `t` | | x86 | `*mm_reg` | `z` | `zmm0` | `g` | | x86 | `kreg` | None | `k1` | None | -| AArch64 | `reg` | None | `x0` | `x` | -| AArch64 | `reg` | `w` | `w0` | `w` | -| AArch64 | `reg` | `x` | `x0` | `x` | -| AArch64 | `vreg` | None | `v0` | None | -| AArch64 | `vreg` | `v` | `v0` | None | -| AArch64 | `vreg` | `b` | `b0` | `b` | -| AArch64 | `vreg` | `h` | `h0` | `h` | -| AArch64 | `vreg` | `s` | `s0` | `s` | -| AArch64 | `vreg` | `d` | `d0` | `d` | -| AArch64 | `vreg` | `q` | `q0` | `q` | +| AArch64/Arm64EC | `reg` | None | `x0` | `x` | +| AArch64/Arm64EC | `reg` | `w` | `w0` | `w` | +| AArch64/Arm64EC | `reg` | `x` | `x0` | `x` | +| AArch64/Arm64EC | `vreg` | None | `v0` | None | +| AArch64/Arm64EC | `vreg` | `v` | `v0` | None | +| AArch64/Arm64EC | `vreg` | `b` | `b0` | `b` | +| AArch64/Arm64EC | `vreg` | `h` | `h0` | `h` | +| AArch64/Arm64EC | `vreg` | `s` | `s0` | `s` | +| AArch64/Arm64EC | `vreg` | `d` | `d0` | `d` | +| AArch64/Arm64EC | `vreg` | `q` | `q0` | `q` | | ARM | `reg` | None | `r0` | None | | ARM | `sreg` | None | `s0` | None | | ARM | `dreg` | None | `d0` | `P` | @@ -441,6 +467,9 @@ The supported modifiers are a subset of LLVM's (and GCC's) [asm template argumen | RISC-V | `freg` | None | `f0` | None | | LoongArch | `reg` | None | `$r1` | None | | LoongArch | `freg` | None | `$f0` | None | +| s390x | `reg` | None | `%r0` | None | +| s390x | `reg_addr` | None | `%r1` | None | +| s390x | `freg` | None | `%f0` | None | > **Notes**: > - on ARM `e` / `f`: this prints the low or high doubleword register name of a NEON quad (128-bit) register. @@ -482,9 +511,11 @@ The following ABIs can be used with `clobber_abi`: | x86-64 | `"C"`, `"system"` (on Windows), `"efiapi"`, `"win64"` | `ax`, `cx`, `dx`, `r[8-11]`, `xmm[0-31]`, `mm[0-7]`, `k[0-7]`, `st([0-7])`, `tmm[0-7]` | | x86-64 | `"C"`, `"system"` (on non-Windows), `"sysv64"` | `ax`, `cx`, `dx`, `si`, `di`, `r[8-11]`, `xmm[0-31]`, `mm[0-7]`, `k[0-7]`, `st([0-7])`, `tmm[0-7]` | | AArch64 | `"C"`, `"system"`, `"efiapi"` | `x[0-17]`, `x18`\*, `x30`, `v[0-31]`, `p[0-15]`, `ffr` | +| Arm64EC | `"C"`, `"system"` | `x[0-12]`, `x[15-17]`, `x30`, `v[0-15]` | | ARM | `"C"`, `"system"`, `"efiapi"`, `"aapcs"` | `r[0-3]`, `r12`, `r14`, `s[0-15]`, `d[0-7]`, `d[16-31]` | | RISC-V | `"C"`, `"system"`, `"efiapi"` | `x1`, `x[5-7]`, `x[10-17]`, `x[28-31]`, `f[0-7]`, `f[10-17]`, `f[28-31]`, `v[0-31]` | -| LoongArch | `"C"`, `"system"`, `"efiapi"` | `$r1`, `$r[4-20]`, `$f[0-23]` | +| LoongArch | `"C"`, `"system"` | `$r1`, `$r[4-20]`, `$f[0-23]` | +| s390x | `"C"`, `"system"` | `r[0-5]`, `r14`, `f[0-7]`, `v[0-31]`, `a[2-15]` | > Notes: > - On AArch64 `x18` only included in the clobber list if it is not considered as a reserved register on the target. @@ -616,7 +647,7 @@ r[asm.rules.preserved-registers] - Condition flags in `FPSCR` (N, Z, C, V) - Saturation flag in `FPSCR` (QC) - Floating-point exception flags in `FPSCR` (IDC, IXC, UFC, OFC, DZC, IOC). - - AArch64 + - AArch64 and Arm64EC - Condition flags (`NZCV` register). - Floating-point status (`FPSR` register). - RISC-V @@ -624,6 +655,8 @@ r[asm.rules.preserved-registers] - Vector extension state (`vtype`, `vl`, `vcsr`). - LoongArch - Floating-point condition flags in `$fcc[0-7]`. + - s390x + - The condition code register `cc`. r[asm.rules.x86-df] - On x86, the direction flag (DF in `EFLAGS`) is clear on entry to an asm block and must be clear on exit. @@ -633,6 +666,9 @@ r[asm.rules.x86-x87] - On x86, the x87 floating-point register stack must remain unchanged unless all of the `st([0-7])` registers have been marked as clobbered with `out("st(0)") _, out("st(1)") _, ...`. - If all x87 registers are clobbered then the x87 register stack is guaranteed to be empty upon entering an `asm` block. Assembly code must ensure that the x87 register stack is also empty when exiting the asm block. +r[asm.rules.arm64ec] +- On arm64ec, [call checkers with appropriate thunks](https://learn.microsoft.com/en-us/windows/arm/arm64ec-abi#authoring-arm64ec-in-assembly) are mandatory when calling functions. + r[asm.rules.only-on-exit] - The requirement of restoring the stack pointer and non-output registers to their original value only applies when exiting an `asm!` block. - This means that `asm!` blocks that never return (even if not marked `noreturn`) don't need to preserve these registers. diff --git a/src/items.md b/src/items.md index 00639acf4..d5733491c 100644 --- a/src/items.md +++ b/src/items.md @@ -1,5 +1,8 @@ # Items +r[items] + +r[items.syntax] > **Syntax:**\ > _Item_:\ >    [_OuterAttribute_]\*\ @@ -28,15 +31,17 @@ >       [_MacroInvocationSemi_]\ >    | [_MacroRulesDefinition_] - +r[items.intro] An _item_ is a component of a crate. Items are organized within a crate by a nested set of [modules]. Every crate has a single "outermost" anonymous module; all further items within the crate have [paths] within the module tree of the crate. +r[items.static-def] Items are entirely determined at compile-time, generally remain fixed during execution, and may reside in read-only memory. +r[items.kinds] There are several kinds of items: * [modules] @@ -53,11 +58,19 @@ There are several kinds of items: * [implementations] * [`extern` blocks] +r[items.locations] Items may be declared in the [root of the crate], a [module][modules], or a [block expression]. + +r[items.associated-locations] A subset of items, called [associated items], may be declared in [traits] and [implementations]. + +r[items.extern-locations] A subset of items, called external items, may be declared in [`extern` blocks]. +r[items.decl-order] Items may be defined in any order, with the exception of [`macro_rules`] which has its own scoping behavior. + +r[items.name-resolution] [Name resolution] of item names allows items to be defined before or after where the item is referred to in the module or block. See [item scopes] for information on the scoping rules of items. diff --git a/src/items/associated-items.md b/src/items/associated-items.md index c4e7a194f..3df2e0eee 100644 --- a/src/items/associated-items.md +++ b/src/items/associated-items.md @@ -1,5 +1,8 @@ # Associated Items +r[items.associated] + +r[items.associated.syntax] > **Syntax**\ > _AssociatedItem_ :\ >    [_OuterAttribute_]\* (\ @@ -7,39 +10,53 @@ >       | ( [_Visibility_]? ( [_TypeAlias_] | [_ConstantItem_] | [_Function_] ) )\ >    ) +r[items.associated.intro] *Associated Items* are the items declared in [traits] or defined in [implementations]. They are called this because they are defined on an associate -type — the type in the implementation. They are a subset of the kinds of -items you can declare in a module. Specifically, there are [associated -functions] (including methods), [associated types], and [associated constants]. +type — the type in the implementation. + +r[items.associated.kinds] +They are a subset of the kinds of items you can declare in a module. +Specifically, there are [associated functions] (including methods), [associated types], and [associated constants]. [associated functions]: #associated-functions-and-methods [associated types]: #associated-types [associated constants]: #associated-constants +r[items.associated.related] Associated items are useful when the associated item logically is related to the associating item. For example, the `is_some` method on `Option` is intrinsically related to Options, so should be associated. +r[items.associated.decl-def] Every associated item kind comes in two varieties: definitions that contain the actual implementation and declarations that declare signatures for definitions. +r[items.associated.trait-items] It is the declarations that make up the contract of traits and what is available on generic types. ## Associated functions and methods +r[items.associated.fn] + +r[items.associated.fn.intro] *Associated functions* are [functions] associated with a type. +r[items.associated.fn.decl] An *associated function declaration* declares a signature for an associated function definition. It is written as a function item, except the function body is replaced with a `;`. -The identifier is the name of the function. The generics, parameter list, -return type, and where clause of the associated function must be the same as the +r[items.associated.name] +The identifier is the name of the function. + +r[items.associated.same-signature] +The generics, parameter list, return type, and where clause of the associated function must be the same as the associated function declarations's. +r[items.associated.fn.def] An *associated function definition* defines a function associated with another type. It is written the same as a [function item]. @@ -64,6 +81,7 @@ fn main () { } ``` +r[items.associated.fn.qualified-self] When the associated function is declared on a trait, the function can also be called with a [path] that is a path to the trait appended by the name of the trait. When this happens, it is substituted for `<_ as Trait>::function_name`. @@ -86,10 +104,14 @@ let _: f64 = f64::from_i32(42); ### Methods +r[items.associated.fn.method] + +r[items.associated.fn.method.intro] Associated functions whose first parameter is named `self` are called *methods* and may be invoked using the [method call operator], for example, `x.foo()`, as well as the usual function call notation. +r[items.associated.fn.method.self-ty] If the type of the `self` parameter is specified, it is limited to types resolving to one generated by the following grammar (where `'lt` denotes some arbitrary lifetime): @@ -127,6 +149,7 @@ impl Example { } ``` +r[associated.fn.method.self-pat-shorthands] Shorthand syntax can be used without specifying a type, which have the following equivalents: @@ -138,6 +161,7 @@ Shorthand | Equivalent > **Note**: Lifetimes can be, and usually are, elided with this shorthand. +r[associated.fn.method.self-pat-mut] If the `self` parameter is prefixed with `mut`, it becomes a mutable variable, similar to regular parameters using a `mut` [identifier pattern]. For example: @@ -189,21 +213,30 @@ let circle_shape = Circle::new(); let bounding_box = circle_shape.bounding_box(); ``` +r[items.associated.fn.params.edition2015] > **Edition differences**: In the 2015 edition, it is possible to declare trait > methods with anonymous parameters (e.g. `fn foo(u8)`). This is deprecated and > an error as of the 2018 edition. All parameters must have an argument name. #### Attributes on method parameters +r[items.associated.fn.param-attributes] + Attributes on method parameters follow the same rules and restrictions as [regular function parameters]. ## Associated Types -*Associated types* are [type aliases] associated with another type. Associated -types cannot be defined in [inherent implementations] nor can they be given a +r[items.associated.type] + +r[items.associated.type.intro] +*Associated types* are [type aliases] associated with another type. + +r[items.associated.type.restrictions] +Associated types cannot be defined in [inherent implementations] nor can they be given a default implementation in traits. +r[items.associated.type.decl] An *associated type declaration* declares a signature for associated type definitions. It is written in one of the following forms, where `Assoc` is the name of the associated type, `Params` is a comma-separated list of type, @@ -221,13 +254,21 @@ type Assoc where WhereBounds; type Assoc: Bounds where WhereBounds; ``` -The identifier is the name of the declared type alias. The optional trait bounds -must be fulfilled by the implementations of the type alias. +r[items.associated.type.name] +The identifier is the name of the declared type alias. + +r[items.associated.type.impl-fulfillment] +The optional trait bounds must be fulfilled by the implementations of the type alias. + +r[items.associated.type.sized] There is an implicit [`Sized`] bound on associated types that can be relaxed using the special `?Sized` bound. +r[items.associated.type.def] An *associated type definition* defines a type alias for the implementation -of a trait on a type. They are written similarly to an *associated type declaration*, -but cannot contain `Bounds`, but instead must contain a `Type`: +of a trait on a type + +r[items.associated.type.def.restriction] +They are written similarly to an *associated type declaration*, but cannot contain `Bounds`, but instead must contain a `Type`: ```rust,ignore @@ -237,11 +278,15 @@ type Assoc = Type where WhereBounds; type Assoc where WhereBounds = Type; // deprecated, prefer the form above ``` +r[items.associated.type.alias] If a type `Item` has an associated type `Assoc` from a trait `Trait`, then `::Assoc` is a type that is an alias of the type specified in the -associated type definition. Furthermore, if `Item` is a type parameter, then -`Item::Assoc` can be used in type parameters. +associated type definition + +r[items.associated.type.param] +Furthermore, if `Item` is a type parameter, then `Item::Assoc` can be used in type parameters. +r[items.associated.type.generic] Associated types may include [generic parameters] and [where clauses]; these are often referred to as *generic associated types*, or *GATs*. If the type `Thing` has an associated type `Item` from a trait `Trait` with the generics `<'a>` , the @@ -300,7 +345,6 @@ fn borrow<'a, T: Lend>(array: &'a mut T) -> ::Lender<'a> { array.lend() } - fn main() { let mut array = [0usize; 16]; let lender = borrow(&mut array); @@ -352,11 +396,15 @@ Given a reference to the associated type like `::Output`, the a ### Required where clauses on generic associated types +r[items.associated.type.generic-where-clause] + +r[items.associated.type.generic-where-clause.intro] Generic associated type declarations on traits currently may require a list of where clauses, dependent on functions in the trait and how the GAT is used. These rules may be loosened in the future; updates can be found [on the generic associated types initiative repository](https://rust-lang.github.io/generic-associated-types-initiative/explainer/required_bounds.html). +r[items.associated.type.generic-where-clause.valid-fn] In a few words, these where clauses are required in order to maximize the allowed definitions of the associated type in impls. To do this, any clauses that *can be proven to hold* on functions (using the parameters of the function or trait) @@ -373,6 +421,7 @@ In the above, on the `next` function, we can prove that `Self: 'a`, because of the implied bounds from `&'a mut self`; therefore, we must write the equivalent bound on the GAT itself: `where Self: 'x`. +r[items.associated.type.generic-where-clause.intersection] When there are multiple functions in a trait that use the GAT, then the *intersection* of the bounds from the different functions are used, rather than the union. @@ -390,6 +439,7 @@ know that `T: 'a` on `create_checker`, we do not know that on `do_check`. Howeve if `do_check` was commented out, then the `where T: 'x` bound would be required on `Checker`. +r[items.associated.type.generic-where-clause.forward] The bounds on associated types also propagate required where clauses. ```rust @@ -404,6 +454,7 @@ Here, `where Self: 'a` is required on `Item` because of `iter`. However, `Item` is used in the bounds of `Iterator`, the `where Self: 'a` clause is also required there. +r[items.associated.type.generic-where-clause.static] Finally, any explicit uses of `'static` on GATs in the trait do not count towards the required bounds. @@ -416,18 +467,25 @@ trait StaticReturn { ## Associated Constants +r[items.associated.const] + +r[items.associated.const.intro] *Associated constants* are [constants] associated with a type. +r[items.associated.const.decl] An *associated constant declaration* declares a signature for associated constant definitions. It is written as `const`, then an identifier, then `:`, then a type, finished by a `;`. +r[items.associated.const.name] The identifier is the name of the constant used in the path. The type is the type that the definition has to implement. +r[items.associated.const.def] An *associated constant definition* defines a constant associated with a type. It is written the same as a [constant item]. +r[items.associated.const.eval] Associated constant definitions undergo [constant evaluation] only when referenced. Further, definitions that include [generic parameters] are evaluated after monomorphization. diff --git a/src/items/constant-items.md b/src/items/constant-items.md index f6ba8da73..bb8a43827 100644 --- a/src/items/constant-items.md +++ b/src/items/constant-items.md @@ -1,23 +1,32 @@ # Constant items +r[items.const] + +r[items.const.syntax] > **Syntax**\ > _ConstantItem_ :\ >    `const` ( [IDENTIFIER] | `_` ) `:` [_Type_] ( `=` [_Expression_] )? `;` +r[items.const.intro] A *constant item* is an optionally named _[constant value]_ which is not associated -with a specific memory location in the program. Constants are essentially inlined -wherever they are used, meaning that they are copied directly into the relevant +with a specific memory location in the program. + +r[items.const.behavior] +Constants are essentially inlined wherever they are used, meaning that they are copied directly into the relevant context when used. This includes usage of constants from external crates, and non-[`Copy`] types. References to the same constant are not necessarily guaranteed to refer to the same memory address. +r[items.const.namespace] The constant declaration defines the constant value in the [value namespace] of the module or block where it is located. +r[items.const.static] Constants must be explicitly typed. The type must have a `'static` lifetime: any references in the initializer must have `'static` lifetimes. References in the type of a constant default to `'static` lifetime; see [static lifetime elision]. +r[items.const.static-temporary] A reference to a constant will have `'static` lifetime if the constant value is eligible for [promotion]; otherwise, a temporary will be created. @@ -39,10 +48,16 @@ const BITS_N_STRINGS: BitsNStrings<'static> = BitsNStrings { }; ``` +r[items.const.final-value-immutable] +The final value of a `const` item cannot contain references to anything mutable. + +r[items.const.expr-omission] The constant expression may only be omitted in a [trait definition]. ## Constants with Destructors +r[items.const.destructor] + Constants can contain destructors. Destructors are run when the value goes out of scope. @@ -66,6 +81,9 @@ fn create_and_drop_zero_with_destructor() { ## Unnamed constant +r[items.const.unnamed] + +r[items.const.unnamed.intro] Unlike an [associated constant], a [free] constant may be unnamed by using an underscore instead of the name. For example: @@ -76,6 +94,7 @@ const _: () = { struct _SameNameTwice; }; const _: () = { struct _SameNameTwice; }; ``` +r[items.const.unnamed.repetition] As with [underscore imports], macros may safely emit the same unnamed constant in the same scope more than once. For example, the following should not produce an error: @@ -92,6 +111,8 @@ m!(const _: () = ();); ## Evaluation +r[items.const.eval] + [Free][free] constants are always [evaluated][const_eval] at compile-time to surface panics. This happens even within an unused function: diff --git a/src/items/enumerations.md b/src/items/enumerations.md index 63a3e76ba..02b2b8373 100644 --- a/src/items/enumerations.md +++ b/src/items/enumerations.md @@ -1,5 +1,8 @@ # Enumerations +r[items.enum] + +r[items.enum.syntax] > **Syntax**\ > _Enumeration_ :\ >    `enum` @@ -25,11 +28,15 @@ > _EnumItemDiscriminant_ :\ >    `=` [_Expression_] +r[items.enum.intro] An *enumeration*, also referred to as an *enum*, is a simultaneous definition of a nominal [enumerated type] as well as a set of *constructors*, that can be used to create or pattern-match values of the corresponding enumerated type. +r[items.enum.decl] Enumerations are declared with the keyword `enum`. + +r[items.enum.namespace] The `enum` declaration defines the enumeration type in the [type namespace] of the module or block where it is located. An example of an `enum` item and its use: @@ -44,6 +51,7 @@ let mut a: Animal = Animal::Dog; a = Animal::Cat; ``` +r[items.enum.constructor] Enum constructors can have either named or unnamed fields: ```rust @@ -59,6 +67,7 @@ a = Animal::Cat { name: "Spotty".to_string(), weight: 2.7 }; In this example, `Cat` is a _struct-like enum variant_, whereas `Dog` is simply called an enum variant. +r[items.enum.fieldless] An enum where no constructors contain fields are called a *field-less enum*. For example, this is a fieldless enum: @@ -70,6 +79,7 @@ enum Fieldless { } ``` +r[items.enum.unit-only] If a field-less enum only contains unit variants, the enum is called an *unit-only enum*. For example: @@ -81,12 +91,20 @@ enum Enum { } ``` +r[items.enum.constructor-names] Variant constructors are similar to [struct] definitions, and can be referenced by a path from the enumeration name, including in [use declarations]. + +r[items.enum.constructor-namespace] Each variant defines its type in the [type namespace], though that type cannot be used as a type specifier. Tuple-like and unit-like variants also define a constructor in the [value namespace]. +r[items.enum.struct-expr] A struct-like variant can be instantiated with a [struct expression]. + +r[items.enum.tuple-expr] A tuple-like variant can be instantiated with a [call expression] or a [struct expression]. + +r[items.enum.path-expr] A unit-like variant can be instantiated with a [path expression] or a [struct expression]. For example: @@ -108,10 +126,14 @@ let z = StructLike { value: 123 }; // Struct expression. ## Discriminants +r[items.enum.discriminant] + +r[items.enum.discriminant.intro] Each enum instance has a _discriminant_: an integer logically associated to it that is used to determine which variant it holds. -Under the [default representation], the discriminant is interpreted as +r[items.enum.discriminant.repr-rust] +Under the [`Rust` representation], the discriminant is interpreted as an `isize` value. However, the compiler is allowed to use a smaller type (or another means of distinguishing variants) in its actual memory layout. @@ -119,13 +141,16 @@ another means of distinguishing variants) in its actual memory layout. #### Explicit discriminants +r[items.enum.discriminant.explicit] + +r[items.enum.discriminant.explicit.intro] In two circumstances, the discriminant of a variant may be explicitly set by following the variant name with `=` and a [constant expression]: - +r[items.enum.discriminant.explicit.unit-only] 1. if the enumeration is "[unit-only]". - +r[items.enum.discriminant.explicit.primitive-repr] 2. if a [primitive representation] is used. For example: ```rust @@ -142,6 +167,8 @@ following the variant name with `=` and a [constant expression]: #### Implicit discriminants +r[items.enum.discriminant.implicit] + If a discriminant for a variant is not specified, then it is set to one higher than the discriminant of the previous variant in the declaration. If the discriminant of the first variant in the declaration is unspecified, then @@ -160,6 +187,9 @@ assert_eq!(baz_discriminant, 123); #### Restrictions +r[items.enum.discriminant.restrictions] + +r[items.enum.discriminant.restrictions.same-discriminant] It is an error when two variants share the same discriminant. ```rust,compile_fail @@ -175,6 +205,7 @@ enum SharedDiscriminantError2 { } ``` +r[items.enum.discriminant.restrictions.above-max-discriminant] It is also an error to have an unspecified discriminant where the previous discriminant is the maximum value for the size of the discriminant. @@ -197,12 +228,17 @@ enum OverflowingDiscriminantError2 { #### Via `mem::discriminant` +r[items.enum.discriminant.access-opaque] + [`std::mem::discriminant`] returns an opaque reference to the discriminant of an enum value which can be compared. This cannot be used to get the value of the discriminant. #### Casting +r[items.enum.discriminant.coercion] + +r[items.enum.discriminant.coercion.intro] If an enumeration is [unit-only] (with no tuple and struct variants), then its discriminant can be directly accessed with a [numeric cast]; e.g.: @@ -218,6 +254,7 @@ assert_eq!(1, Enum::Bar as isize); assert_eq!(2, Enum::Baz as isize); ``` +r[items.enum.discriminant.coercion.fieldless] [Field-less enums] can be casted if they do not have explicit discriminants, or where only unit variants are explicit. ```rust @@ -249,6 +286,8 @@ assert_eq!(22, FieldlessWithDiscrimants::Unit as u8); #### Pointer casting +r[items.enum.discriminant.access-memory] + If the enumeration specifies a [primitive representation], then the discriminant may be reliably accessed via unsafe pointer casting: @@ -277,6 +316,9 @@ assert_eq!(2, struct_like.discriminant()); ## Zero-variant enums +r[items.enum.empty] + +r[items.enum.empty.intro] Enums with zero variants are known as *zero-variant enums*. As they have no valid values, they cannot be instantiated. @@ -284,6 +326,7 @@ no valid values, they cannot be instantiated. enum ZeroVariants {} ``` +r[items.enum.empty.uninhabited] Zero-variant enums are equivalent to the [never type], but they cannot be coerced into other types. @@ -295,6 +338,8 @@ let y: u32 = x; // mismatched type error ## Variant visibility +r[items.enum.variant-visibility] + Enum variants syntactically allow a [_Visibility_] annotation, but this is rejected when the enum is validated. This allows items to be parsed with a unified syntax across different contexts where they are used. @@ -333,7 +378,6 @@ enum E { [`C` representation]: ../type-layout.md#the-c-representation [call expression]: ../expressions/call-expr.md [constant expression]: ../const_eval.md#constant-expressions -[default representation]: ../type-layout.md#the-default-representation [enumerated type]: ../types/enum.md [Field-less enums]: #field-less-enum [IDENTIFIER]: ../identifiers.md @@ -341,6 +385,7 @@ enum E { [numeric cast]: ../expressions/operator-expr.md#semantics [path expression]: ../expressions/path-expr.md [primitive representation]: ../type-layout.md#primitive-representations +[`Rust` representation]: ../type-layout.md#the-rust-representation [struct expression]: ../expressions/struct-expr.md [struct]: structs.md [type namespace]: ../names/namespaces.md diff --git a/src/items/extern-crates.md b/src/items/extern-crates.md index 523e9720d..85f10630e 100644 --- a/src/items/extern-crates.md +++ b/src/items/extern-crates.md @@ -1,5 +1,8 @@ # Extern crate declarations +r[items.extern-crate] + +r[items.extern-crate.syntax] > **Syntax:**\ > _ExternCrate_ :\ >    `extern` `crate` _CrateRef_ _AsClause_? `;` @@ -10,11 +13,19 @@ > _AsClause_ :\ >    `as` ( [IDENTIFIER] | `_` ) +r[items.extern-crate.intro] An _`extern crate` declaration_ specifies a dependency on an external crate. + +r[items.extern-crate.namespace] The external crate is then bound into the declaring scope as the given [identifier] in the [type namespace]. + +r[items.extern-crate.extern-prelude] Additionally, if the `extern crate` appears in the crate root, then the crate name is also added to the [extern prelude], making it automatically in scope in all modules. + +r[items.extern-crate.as] The `as` clause can be used to bind the imported crate to a different name. +r[items.extern-crate.lookup] The external crate is resolved to a specific `soname` at compile time, and a runtime linkage requirement to that `soname` is passed to the linker for loading at runtime. The `soname` is resolved at compile time by scanning the @@ -23,6 +34,7 @@ the [`crate_name` attributes] that were declared on the external crate when it w compiled. If no `crate_name` is provided, a default `name` attribute is assumed, equal to the [identifier] given in the `extern crate` declaration. +r[items.extern-crate.self] The `self` crate may be imported which creates a binding to the current crate. In this case the `as` clause must be used to specify the name to bind it to. @@ -37,6 +49,7 @@ extern crate std; // equivalent to: extern crate std as std; extern crate std as ruststd; // linking to 'std' under another name ``` +r[items.extern-crate.name-restrictions] When naming Rust crates, hyphens are disallowed. However, Cargo packages may make use of them. In such case, when `Cargo.toml` doesn't specify a crate name, Cargo will transparently replace `-` with `_` (Refer to [RFC 940] for more @@ -52,16 +65,22 @@ extern crate hello_world; // hyphen replaced with an underscore ## Underscore Imports +r[items.extern-crate.underscore] + +r[items.extern-crate.underscore.intro] An external crate dependency can be declared without binding its name in scope by using an underscore with the form `extern crate foo as _`. This may be useful for crates that only need to be linked, but are never referenced, and will avoid being reported as unused. +r[items.extern-crate.underscore.macro_use] The [`macro_use` attribute] works as usual and imports the macro names into the [`macro_use` prelude]. ## The `no_link` attribute +r[items.extern-crate.no_link] + The *`no_link` attribute* may be specified on an `extern crate` item to prevent linking the crate into the output. This is commonly used to load a crate to access only its macros. diff --git a/src/items/external-blocks.md b/src/items/external-blocks.md index dbd55fb33..74aa5cedb 100644 --- a/src/items/external-blocks.md +++ b/src/items/external-blocks.md @@ -1,8 +1,11 @@ # External blocks +r[items.extern] + +r[items.extern.syntax] > **Syntax**\ > _ExternBlock_ :\ ->    `unsafe`? `extern` [_Abi_]? `{`\ +>    `unsafe`?[^unsafe-2024] `extern` [_Abi_]? `{`\ >       [_InnerAttribute_]\*\ >       _ExternalItem_\*\ >    `}` @@ -12,33 +15,57 @@ >          [_MacroInvocationSemi_]\ >       | ( [_Visibility_]? ( [_StaticItem_] | [_Function_] ) )\ >    ) +> +> [^unsafe-2024]: Starting with the 2024 Edition, the `unsafe` keyword is required semantically. +r[items.extern.intro] External blocks provide _declarations_ of items that are not _defined_ in the current crate and are the basis of Rust's foreign function interface. These are akin to unchecked imports. +r[items.extern.allowed-kinds] Two kinds of item _declarations_ are allowed in external blocks: [functions] and -[statics]. Calling functions or accessing statics that are declared in external -blocks is only allowed in an `unsafe` context. +[statics]. + +r[items.extern.fn-safety] +Calling functions or accessing statics that are declared in external blocks is only allowed in an `unsafe` context. +r[items.extern.namespace] The external block defines its functions and statics in the [value namespace] of the module or block where it is located. +r[items.extern.unsafe-required] +The `unsafe` keyword is semantically required to appear before the `extern` keyword on external blocks. + +r[items.extern.edition2024] +> **Edition differences**: Prior to the 2024 edition, the `unsafe` keyword is optional. The `safe` and `unsafe` item qualifiers are only allowed if the external block itself is marked as `unsafe`. + ## Functions +r[items.extern.fn] + +r[items.extern.fn.body] Functions within external blocks are declared in the same way as other Rust functions, with the exception that they must not have a body and are instead -terminated by a semicolon. Patterns are not allowed in parameters, only -[IDENTIFIER] or `_` may be used. The `safe` and `unsafe` function qualifiers are +terminated by a semicolon. + +r[items.extern.fn.param-patterns] +Patterns are not allowed in parameters, only [IDENTIFIER] or `_` may be used. + +r[items.extern.fn.qualifiers] +The `safe` and `unsafe` function qualifiers are allowed, but other function qualifiers (e.g. `const`, `async`, `extern`) are not. +r[items.extern.fn.foreign-abi] Functions within external blocks may be called by Rust code, just like functions defined in Rust. The Rust compiler automatically translates between the Rust ABI and the foreign ABI. +r[items.extern.fn.safety] A function declared in an extern block is implicitly `unsafe` unless the `safe` function qualifier is present. +r[items.extern.fn.fn-ptr] When coerced to a function pointer, a function declared in an extern block has type `extern "abi" for<'l1, ..., 'lm> fn(A1, ..., An) -> R`, where `'l1`, ... `'lm` are its lifetime parameters, `A1`, ..., `An` are the declared types of @@ -46,14 +73,22 @@ its parameters, `R` is the declared return type. ## Statics +r[items.extern.static] + +r[items.extern.static.intro] Statics within external blocks are declared in the same way as [statics] outside of external blocks, except that they do not have an expression initializing their value. + +r[items.extern.static.safety] Unless a static item declared in an extern block is qualified as `safe`, it is `unsafe` to access that item, whether or not it's mutable, because there is nothing guaranteeing that the bit pattern at the static's memory is valid for the type it is declared with, since some arbitrary (e.g. C) code is in charge of initializing the static. +r[items.extern.static.mut] Extern statics can be either immutable or mutable just like [statics] outside of external blocks. + +r[items.extern.static.read-only] An immutable static *must* be initialized before any Rust code is executed. It is not enough for the static to be initialized before Rust code reads from it. Once Rust code runs, mutating an immutable static (from inside or outside Rust) is UB, @@ -61,43 +96,73 @@ except if the mutation happens to bytes inside of an `UnsafeCell`. ## ABI +r[items.extern.abi] + +r[items.extern.abi.intro] By default external blocks assume that the library they are calling uses the standard C ABI on the specific platform. Other ABIs may be specified using an `abi` string, as shown here: ```rust +# #[cfg(any(windows, target_arch = "x86"))] // Interface to the Windows API unsafe extern "stdcall" { } ``` +r[items.extern.abi.standard] There are three ABI strings which are cross-platform, and which all compilers are guaranteed to support: +r[items.extern.abi.rust] * `unsafe extern "Rust"` -- The default ABI when you write a normal `fn foo()` in any Rust code. + +r[items.extern.abi.c] * `unsafe extern "C"` -- This is the same as `extern fn foo()`; whatever the default your C compiler supports. + +r[items.extern.abi.system] * `unsafe extern "system"` -- Usually the same as `extern "C"`, except on Win32, in which case it's `"stdcall"`, or what you should use to link to the Windows API itself +r[items.extern.abi.platform] There are also some platform-specific ABI strings: +r[items.extern.abi.cdecl] * `unsafe extern "cdecl"` -- The default for x86\_32 C code. + +r[items.extern.abi.stdcall] * `unsafe extern "stdcall"` -- The default for the Win32 API on x86\_32. + +r[items.extern.abi.win64] * `unsafe extern "win64"` -- The default for C code on x86\_64 Windows. + +r[items.extern.abi.sysv64] * `unsafe extern "sysv64"` -- The default for C code on non-Windows x86\_64. + +r[items.extern.abi.aapcs] * `unsafe extern "aapcs"` -- The default for ARM. + +r[items.extern.abi.fastcall] * `unsafe extern "fastcall"` -- The `fastcall` ABI -- corresponds to MSVC's `__fastcall` and GCC and clang's `__attribute__((fastcall))` + +r[items.extern.abi.vectorcall] * `unsafe extern "vectorcall"` -- The `vectorcall` ABI -- corresponds to MSVC's `__vectorcall` and clang's `__attribute__((vectorcall))` + +r[items.extern.abi.thiscall] * `unsafe extern "thiscall"` -- The default for C++ member functions on MSVC -- corresponds to MSVC's `__thiscall` and GCC and clang's `__attribute__((thiscall))` + +r[items.extern.abi.efiapi] * `unsafe extern "efiapi"` -- The ABI used for [UEFI] functions. ## Variadic functions +r[items.extern.variadic] + Functions within external blocks may be variadic by specifying `...` as the last argument. The variadic parameter may optionally be specified with an identifier. @@ -112,36 +177,58 @@ unsafe extern "C" { ## Attributes on extern blocks +r[items.extern.attributes] + +r[items.extern.attributes.intro] The following [attributes] control the behavior of external blocks. ### The `link` attribute +r[items.extern.attributes.link] + +r[items.extern.attributes.link.intro] The *`link` attribute* specifies the name of a native library that the -compiler should link with for the items within an `extern` block. It uses the -[_MetaListNameValueStr_] syntax to specify its inputs. The `name` key is the +compiler should link with for the items within an `extern` block. + +r[items.extern.attributes.link.syntax] +It uses the [_MetaListNameValueStr_] syntax to specify its inputs. The `name` key is the name of the native library to link. The `kind` key is an optional value which specifies the kind of library with the following possible values: +r[items.extern.attributes.link.dylib] - `dylib` --- Indicates a dynamic library. This is the default if `kind` is not specified. + +r[items.extern.attributes.link.static] - `static` --- Indicates a static library. + +r[items.extern.attributes.link.framework] - `framework` --- Indicates a macOS framework. This is only valid for macOS targets. + +r[items.extern.attributes.link.raw-dylib] - `raw-dylib` --- Indicates a dynamic library where the compiler will generate an import library to link against (see [`dylib` versus `raw-dylib`] below for details). This is only valid for Windows targets. +r[items.extern.attributes.link.name-requirement] The `name` key must be included if `kind` is specified. +r[items.extern.attributes.link.modifiers] The optional `modifiers` argument is a way to specify linking modifiers for the library to link. + +r[items.extern.attributes.link.modifiers.syntax] Modifiers are specified as a comma-delimited string with each modifier prefixed with either a `+` or `-` to indicate that the modifier is enabled or disabled, respectively. + +r[items.extern.attributes.link.modifiers.multiple] Specifying multiple `modifiers` arguments in a single `link` attribute, or multiple identical modifiers in the same `modifiers` argument is not currently supported. \ Example: `#[link(name = "mylib", kind = "static", modifiers = "+whole-archive")]`. +r[items.extern.attributes.link.wasm_import_module] The `wasm_import_module` key may be used to specify the [WebAssembly module] name for the items within an `extern` block when importing symbols from the host environment. The default module name is `env` if `wasm_import_module` is @@ -165,6 +252,7 @@ unsafe extern { } ``` +r[items.extern.attributes.link.empty-block] It is valid to add the `link` attribute on an empty extern block. You can use this to satisfy the linking requirements of extern blocks elsewhere in your code (including upstream crates) instead of adding the attribute to each extern @@ -172,13 +260,18 @@ block. #### Linking modifiers: `bundle` +r[items.extern.attributes.link.modifiers.bundle] + +r[items.extern.attributes.link.modifiers.bundle.allowed-kinds] This modifier is only compatible with the `static` linking kind. Using any other kind will result in a compiler error. +r[items.extern.attributes.link.modifiers.bundle.behavior] When building a rlib or staticlib `+bundle` means that the native static library will be packed into the rlib or staticlib archive, and then retrieved from there during linking of the final binary. +r[items.extern.attributes.link.modifiers.bundle.behavior-negative] When building a rlib `-bundle` means that the native static library is registered as a dependency of that rlib "by name", and object files from it are included only during linking of the final binary, the file search by that name is also performed during final linking. \ @@ -186,8 +279,10 @@ When building a staticlib `-bundle` means that the native static library is simp into the archive and some higher level build system will need to add it later during linking of the final binary. +r[items.extern.attributes.link.modifiers.bundle.no-effect] This modifier has no effect when building other targets like executables or dynamic libraries. +r[items.extern.attributes.link.modifiers.bundle.default] The default for this modifier is `+bundle`. More implementation details about this modifier can be found in @@ -195,12 +290,17 @@ More implementation details about this modifier can be found in #### Linking modifiers: `whole-archive` +r[items.extern.attributes.link.modifiers.whole-archive] + +r[items.extern.attributes.link.modifiers.whole-archive.allowed-kinds] This modifier is only compatible with the `static` linking kind. Using any other kind will result in a compiler error. +r[items.extern.attributes.link.modifiers.whole-archive.behavior] `+whole-archive` means that the static library is linked as a whole archive without throwing any object files away. +r[items.extern.attributes.link.modifiers.whole-archive.default] The default for this modifier is `-whole-archive`. More implementation details about this modifier can be found in @@ -208,15 +308,21 @@ More implementation details about this modifier can be found in ### Linking modifiers: `verbatim` +r[items.extern.attributes.link.modifiers.verbatim] + +r[items.extern.attributes.link.modifiers.verbatim.allowed-kinds] This modifier is compatible with all linking kinds. +r[items.extern.attributes.link.modifiers.verbatim.behavior] `+verbatim` means that rustc itself won't add any target-specified library prefixes or suffixes (like `lib` or `.a`) to the library name, and will try its best to ask for the same thing from the linker. +r[items.extern.attributes.link.modifiers.verbatim.behavior-negative] `-verbatim` means that rustc will either add a target-specific prefix and suffix to the library name before passing it to linker, or won't prevent linker from implicitly adding it. +r[items.extern.attributes.link.modifiers.verbatim.default] The default for this modifier is `-verbatim`. More implementation details about this modifier can be found in @@ -224,22 +330,30 @@ More implementation details about this modifier can be found in #### `dylib` versus `raw-dylib` +r[items.extern.attributes.link.kind-raw-dylib] + +r[items.extern.attributes.link.kind-raw-dylib.intro] On Windows, linking against a dynamic library requires that an import library is provided to the linker: this is a special static library that declares all of the symbols exported by the dynamic library in such a way that the linker knows that they have to be dynamically loaded at runtime. +r[items.extern.attributes.link.kind-raw-dylib.import] Specifying `kind = "dylib"` instructs the Rust compiler to link an import library based on the `name` key. The linker will then use its normal library resolution logic to find that import library. Alternatively, specifying `kind = "raw-dylib"` instructs the compiler to generate an import library during compilation and provide that to the linker instead. +r[items.extern.attributes.link.kind-raw-dylib.platform-specific] `raw-dylib` is only supported on Windows. Using it when targeting other platforms will result in a compiler error. #### The `import_name_type` key +r[items.extern.attributes.link.import_name_type] + +r[items.extern.attributes.link.import_name_type.intro] On x86 Windows, names of functions are "decorated" (i.e., have a specific prefix and/or suffix added) to indicate their calling convention. For example, a `stdcall` calling convention function with the name `fn1` that has no arguments @@ -249,6 +363,7 @@ use different decorations for the same calling conventions which means, by default, some Win32 functions cannot be called using the `raw-dylib` link kind via the GNU toolchain. +r[items.extern.attributes.link.import_name_type.values] To allow for these differences, when using the `raw-dylib` link kind you may also specify the `import_name_type` key with one of the following values to change how functions are named in the generated import library: @@ -259,20 +374,28 @@ change how functions are named in the generated import library: format, but skipping the leading `?`, `@`, or optionally `_`. * `undecorated`: The function name will not be decorated. +r[items.extern.attributes.link.import_name_type.default] If the `import_name_type` key is not specified, then the function name will be fully-decorated using the target toolchain's format. +r[items.extern.attributes.link.import_name_type.variables] Variables are never decorated and so the `import_name_type` key has no effect on how they are named in the generated import library. +r[items.extern.attributes.link.import_name_type.platform-specific] The `import_name_type` key is only supported on x86 Windows. Using it when targeting other platforms will result in a compiler error. ### The `link_name` attribute +r[items.extern.attributes.link_name] + +r[items.extern.attributes.link_name.intro] The *`link_name` attribute* may be specified on declarations inside an `extern` -block to indicate the symbol to import for the given function or static. It -uses the [_MetaNameValueStr_] syntax to specify the name of the symbol. +block to indicate the symbol to import for the given function or static. + +r[items.extern.attributes.link_name.syntax] +It uses the [_MetaNameValueStr_] syntax to specify the name of the symbol. ```rust unsafe extern { @@ -281,11 +404,15 @@ unsafe extern { } ``` +r[items.extern.attributes.link_name.exclusive] Using this attribute with the `link_ordinal` attribute will result in a compiler error. ### The `link_ordinal` attribute +r[items.extern.attributes.link_ordinal] + +r[items.extern.attributes.link_ordinal.intro] The *`link_ordinal` attribute* can be applied on declarations inside an `extern` block to indicate the numeric ordinal to use when generating the import library to link against. An ordinal is a unique number per symbol exported by a dynamic @@ -295,8 +422,8 @@ that symbol rather than having to look it up by name. > [!WARNING] > `link_ordinal` should only be used in cases where the ordinal of the symbol is known to be stable: if the ordinal of a symbol is not explicitly set when its containing binary is built then one will be automatically assigned to it, and that assigned ordinal may change between builds of the binary. - -```rust,ignore +```rust +# #[cfg(all(windows, target_arch = "x86"))] #[link(name = "exporter", kind = "raw-dylib")] unsafe extern "stdcall" { #[link_ordinal(15)] @@ -304,14 +431,18 @@ unsafe extern "stdcall" { } ``` +r[items.extern.attributes.link_ordinal.allowed-kinds] This attribute is only used with the `raw-dylib` linking kind. Using any other kind will result in a compiler error. +r[items.extern.attributes.link_ordinal.exclusive] Using this attribute with the `link_name` attribute will result in a compiler error. ### Attributes on function parameters +r[items.extern.attributes.fn-parameters] + Attributes on extern function parameters follow the same rules and restrictions as [regular function parameters]. diff --git a/src/items/functions.md b/src/items/functions.md index d3f2c4548..1fe173e70 100644 --- a/src/items/functions.md +++ b/src/items/functions.md @@ -1,5 +1,8 @@ # Functions +r[items.fn] + +r[items.fn.syntax] > **Syntax**\ > _Function_ :\ >    _FunctionQualifiers_ `fn` [IDENTIFIER] [_GenericParams_]?\ @@ -8,7 +11,7 @@ >       ( [_BlockExpression_] | `;` ) > > _FunctionQualifiers_ :\ ->    `const`? `async`[^async-edition]? _ItemSafety_? (`extern` _Abi_?)? +>    `const`? `async`[^async-edition]? _ItemSafety_?[^extern-qualifiers] (`extern` _Abi_?)? > > _ItemSafety_ :\ >    `safe`[^extern-safe] | `unsafe` @@ -45,16 +48,28 @@ > [^extern-safe]: The `safe` function qualifier is only allowed semantically within > `extern` blocks. > +> [^extern-qualifiers]: *Relevant to editions earlier than Rust 2024*: Within +> `extern` blocks, the `safe` or `unsafe` function qualifier is only allowed +> when the `extern` is qualified as `unsafe`. +> > [^fn-param-2015]: Function parameters with only a type are only allowed > in an associated function of a [trait item] in the 2015 edition. +r[items.fn.intro] A _function_ consists of a [block] (that's the _body_ of the function), along with a name, a set of parameters, and an output type. Other than a name, all these are optional. + +r[items.fn.namespace] Functions are declared with the keyword `fn` which defines the given name in the [value namespace] of the module or block where it is located. + +r[items.fn.signature] Functions may declare a set of *input* [*variables*][variables] as parameters, through which the caller passes arguments into the function, and the *output* [*type*][type] of the value the function will return to its caller on completion. + +r[items.fn.implicit-return] If the output type is not explicitly stated, it is the [unit type]. +r[items.fn.fn-item-type] When referred to, a _function_ yields a first-class *value* of the corresponding zero-sized [*function item type*], which when called evaluates to a direct call to the function. For example, this is a simple function: @@ -64,10 +79,14 @@ fn answer_to_life_the_universe_and_everything() -> i32 { } ``` +r[items.fn.safety-qualifiers] The `safe` function is semantically only allowed when used in an [`extern` block]. ## Function parameters +r[items.fn.params] + +r[items.fn.params.intro] Function parameters are irrefutable [patterns], so any pattern that is valid in an else-less `let` binding is also valid as a parameter: @@ -75,16 +94,24 @@ an else-less `let` binding is also valid as a parameter: fn first((value, _): (i32, i32)) -> i32 { value } ``` +r[items.fn.params.self-pat] If the first parameter is a _SelfParam_, this indicates that the function is a -[method]. Functions with a self parameter may only appear as an [associated +[method]. + +r[items.fn.params.self-restriction] +Functions with a self parameter may only appear as an [associated function] in a [trait] or [implementation]. +r[items.fn.params.varargs] A parameter with the `...` token indicates a [variadic function], and may only be used as the last parameter of an [external block] function. The variadic parameter may have an optional identifier, such as `args: ...`. ## Function body +r[items.fn.body] + +r[items.fn.body.intro] The body block of a function is conceptually wrapped in another block that first binds the argument patterns and then `return`s the value of the function's body. This means that the tail expression of the block, if evaluated, ends up being @@ -102,11 +129,15 @@ return { }; ``` +r[items.fn.body.bodyless] Functions without a body block are terminated with a semicolon. This form may only appear in a [trait] or [external block]. ## Generic functions +r[items.fn.generics] + +r[items.fn.generics.intro] A _generic function_ allows one or more _parameterized types_ to appear in its signature. Each type parameter must be explicitly declared in an angle-bracket-enclosed and comma-separated list, following the function name. @@ -118,8 +149,12 @@ fn foo(x: A, y: B) { # } ``` +r[items.fn.generics.param-names] Inside the function signature and body, the name of the type parameter can be -used as a type name. [Trait] bounds can be specified for type +used as a type name. + +r[items.fn.generics.param-bounds] +[Trait] bounds can be specified for type parameters to allow methods with that trait to be called on values of that type. This is specified using the `where` syntax: @@ -129,6 +164,7 @@ fn foo(x: T) where T: Debug { # } ``` +r[items.fn.generics.mono] When a generic function is referenced, its type is instantiated based on the context of the reference. For example, calling the `foo` function here: @@ -144,6 +180,7 @@ foo(&[1, 2]); will instantiate type parameter `T` with `i32`. +r[items.fn.generics.explicit-arguments] The type parameters can also be explicitly supplied in a trailing [path] component after the function name. This might be necessary if there is not sufficient context to determine the type parameters. For example, @@ -151,6 +188,9 @@ sufficient context to determine the type parameters. For example, ## Extern function qualifier +r[items.fn.extern] + +r[items.fn.extern.intro] The `extern` function qualifier allows providing function _definitions_ that can be called with a particular ABI: @@ -159,6 +199,7 @@ be called with a particular ABI: extern "ABI" fn foo() { /* ... */ } ``` +r[items.fn.extern.def] These are often used in combination with [external block] items which provide function _declarations_ that can be used to call functions without providing their _definition_: @@ -173,6 +214,7 @@ unsafe { foo() }; bar(); ``` +r[items.fn.extern.default-abi] When `"extern" Abi?*` is omitted from `FunctionQualifiers` in function items, the ABI `"Rust"` is assigned. For example: @@ -186,6 +228,7 @@ is equivalent to: extern "Rust" fn foo() {} ``` +r[items.fn.extern.foreign-call] Functions can be called by foreign code, and using an ABI that differs from Rust allows, for example, to provide functions that can be called from other programming languages like C: @@ -199,6 +242,7 @@ extern "C" fn new_i32() -> i32 { 0 } extern "stdcall" fn new_i32_stdcall() -> i32 { 0 } ``` +r[items.fn.extern.default-extern] Just as with [external block], when the `extern` keyword is used and the `"ABI"` is omitted, the ABI used defaults to `"C"`. That is, this: @@ -214,6 +258,7 @@ extern "C" fn new_i32() -> i32 { 0 } let fptr: extern "C" fn() -> i32 = new_i32; ``` +r[items.fn.extern.unwind] Functions with an ABI that differs from `"Rust"` do not support unwinding in the exact same way that Rust does. Therefore, unwinding past the end of functions with such ABIs causes the process to abort. @@ -223,16 +268,24 @@ aborts the process by executing an illegal instruction. ## Const functions +r[items.fn.const] + +r[items.fn.const.intro] Functions qualified with the `const` keyword are [const functions], as are [tuple struct] and [tuple variant] constructors. _Const functions_ can be called from within [const contexts]. +r[items.fn.const.extern] Const functions may use the [`extern`] function qualifier. +r[items.fn.const.exclusivity] Const functions are not allowed to be [async](#async-functions). ## Async functions +r[items.fn.async] + +r[items.fn.async.intro] Functions may be qualified as async, and this can also be combined with the `unsafe` qualifier: @@ -241,10 +294,12 @@ async fn regular_example() { } async unsafe fn unsafe_example() { } ``` +r[items.fn.async.future] Async functions do no work when called: instead, they capture their arguments into a future. When polled, that future will execute the function's body. +r[items.fn.async.desugar-brief] An async function is roughly equivalent to a function that returns [`impl Future`] and with an [`async move` block][async-blocks] as its body: @@ -266,12 +321,16 @@ fn example<'a>(x: &'a str) -> impl Future + 'a { } ``` +r[items.fn.async.desugar] The actual desugaring is more complex: +r[items.fn.async.lifetime-capture] - The return type in the desugaring is assumed to capture all lifetime parameters from the `async fn` declaration. This can be seen in the desugared example above, which explicitly outlives, and hence captures, `'a`. + +r[items.fn.async.param-capture] - The [`async move` block][async-blocks] in the body captures all function parameters, including those that are unused or bound to a `_` pattern. This ensures that function parameters are dropped in the @@ -284,11 +343,15 @@ For more information on the effect of async, see [`async` blocks][async-blocks]. [async-blocks]: ../expressions/block-expr.md#async-blocks [`impl Future`]: ../types/impl-trait.md +r[items.fn.async.edition2018] > **Edition differences**: Async functions are only available beginning with > Rust 2018. ### Combining `async` and `unsafe` +r[items.fn.async.safety] + +r[items.fn.async.safety.intro] It is legal to declare a function that is both async and unsafe. The resulting function is unsafe to call and (like any async function) returns a future. This future is just an ordinary future and thus an @@ -331,6 +394,9 @@ responsibility to ensure that. ## Attributes on functions +r[items.fn.attributes] + +r[items.fn.attributes.intro] [Outer attributes][attributes] are allowed on functions. [Inner attributes][attributes] are allowed directly after the `{` inside its body [block]. @@ -346,6 +412,7 @@ fn documented() { > Note: Except for lints, it is idiomatic to only use outer attributes on > function items. +r[items.fn.attributes.builtin-attributes] The attributes that have meaning on a function are [`cfg`], [`cfg_attr`], [`deprecated`], [`doc`], [`export_name`], [`link_section`], [`no_mangle`], [the lint check attributes], [`must_use`], [the procedural macro attributes], [the testing @@ -354,6 +421,9 @@ attributes macros. ## Attributes on function parameters +r[items.fn.param-attributes] + +r[items.fn.param-attributes.intro] [Outer attributes][attributes] are allowed on function parameters and the permitted [built-in attributes] are restricted to `cfg`, `cfg_attr`, `allow`, `warn`, `deny`, and `forbid`. @@ -367,6 +437,7 @@ fn len( } ``` +r[items.fn.param-attributes.parsed-attributes] Inert helper attributes used by procedural macro attributes applied to items are also allowed but be careful to not include these inert attributes in your final `TokenStream`. diff --git a/src/items/generics.md b/src/items/generics.md index 9b756594b..5fab8de8e 100644 --- a/src/items/generics.md +++ b/src/items/generics.md @@ -1,5 +1,8 @@ # Generic parameters +r[items.generics] + +r[items.generics.syntax] > **Syntax**\ > _GenericParams_ :\ >       `<` `>`\ @@ -9,7 +12,7 @@ >    [_OuterAttribute_]\* ( _LifetimeParam_ | _TypeParam_ | _ConstParam_ ) > > _LifetimeParam_ :\ ->    [LIFETIME_OR_LABEL] ( `:` [_LifetimeBounds_] )? +>    [_Lifetime_] ( `:` [_LifetimeBounds_] )? > > _TypeParam_ :\ >    [IDENTIFIER] ( `:` [_TypeParamBounds_]? )? ( `=` [_Type_] )? @@ -17,12 +20,17 @@ > _ConstParam_:\ >    `const` [IDENTIFIER] `:` [_Type_] ( `=` _[Block][block]_ | [IDENTIFIER] | -?[LITERAL] )? +r[items.generics.syntax.intro] [Functions], [type aliases], [structs], [enumerations], [unions], [traits], and [implementations] may be *parameterized* by types, constants, and lifetimes. These parameters are listed in angle brackets (`<...>`), usually immediately after the name of the item and before its definition. For implementations, which don't have a name, they come directly after `impl`. + +r[items.generics.syntax.decl-order] The order of generic parameters is restricted to lifetime parameters and then type and const parameters intermixed. + +r[items.generics.syntax.duplicate-params] The same parameter name may not be declared more than once in a _GenericParams_ list. Some examples of items with type, const, and lifetime parameters: @@ -35,25 +43,35 @@ struct InnerArray([T; N]); struct EitherOrderWorks(U); ``` +r[items.generics.syntax.scope] Generic parameters are in scope within the item definition where they are declared. They are not in scope for items declared within the body of a function as described in [item declarations]. See [generic parameter scopes] for more details. +r[items.generics.builtin-generic-types] [References], [raw pointers], [arrays], [slices], [tuples], and [function pointers] have lifetime or type parameters as well, but are not referred to with path syntax. -`'_` is not a valid lifetime parameter. +r[items.generics.invalid-lifetimes] +`'_` and `'_static` are not valid lifetime parameters. ### Const generics +r[items.generics.const] + +r[items.generics.const.intro] *Const generic parameters* allow items to be generic over constant values. + +r[items.generics.const.namespace] The const identifier introduces a name in the [value namespace] for the constant parameter, and all instances of the item must be instantiated with a value of the given type. +r[items.generics.const.allowed-types] The only allowed types of const parameters are `u8`, `u16`, `u32`, `u64`, `u128`, `usize`, `i8`, `i16`, `i32`, `i64`, `i128`, `isize`, `char` and `bool`. +r[items.generics.const.usage] Const parameters can be used anywhere a [const item] can be used, with the exception that when used in a [type] or [array repeat expression], it must be standalone (as described below). That is, they are allowed in the following @@ -111,6 +129,7 @@ fn foo() { } ``` +r[items.generics.const.standalone] As a further restriction, const parameters may only appear as a standalone argument inside of a [type] or [array repeat expression]. In those contexts, they may only be used as a single segment [path expression], possibly inside a @@ -128,7 +147,10 @@ fn bad_function() -> [u8; {N + 1}] { } ``` +r[items.generics.const.argument] A const argument in a [path] specifies the const value to use for that item. + +r[items.generics.const.argument.const-expr] The argument must be a [const expression] of the type ascribed to the const parameter. The const expression must be a [block expression][block] (surrounded with braces) unless it is a single path segment (an [IDENTIFIER]) @@ -154,6 +176,7 @@ fn example() { } ``` +r[items.generics.const.type-ambiguity] When there is ambiguity if a generic argument could be resolved as either a type or const argument, it is always resolved as a type. Placing the argument in a block expression can force it to be interpreted as a const argument. @@ -172,6 +195,7 @@ fn foo() -> Foo { todo!() } // ERROR fn bar() -> Foo<{ N }> { todo!() } // ok ``` +r[items.generics.const.variance] Unlike type and lifetime parameters, const parameters can be declared without being used inside of a parameterized item, with the exception of implementations as described in [generic implementations]: @@ -188,6 +212,7 @@ struct Unconstrained; impl Unconstrained {} ``` +r[items.generics.const.exhaustiveness] When resolving a trait bound obligation, the exhaustiveness of all implementations of const parameters is not considered when determining if the bound is satisfied. For example, in the following, even though all possible @@ -207,9 +232,11 @@ fn generic() { } ``` - ## Where clauses +r[items.generics.where] + +r[items.generics.where.syntax] > **Syntax**\ > _WhereClause_ :\ >    `where` ( _WhereClauseItem_ `,` )\* _WhereClauseItem_ ? @@ -224,10 +251,12 @@ fn generic() { > _TypeBoundWhereClauseItem_ :\ >    [_ForLifetimes_]? [_Type_] `:` [_TypeParamBounds_]? +r[items.generics.where.intro] *Where clauses* provide another way to specify bounds on type and lifetime parameters as well as a way to specify bounds on types that aren't type parameters. +r[items.generics.where.higher-ranked-lifetimes] The `for` keyword can be used to introduce [higher-ranked lifetimes]. It only allows [_LifetimeParam_] parameters. @@ -245,6 +274,8 @@ where ## Attributes +r[items.generics.attributes] + Generic lifetime and type parameters allow [attributes] on them. There are no built-in attributes that do anything in this position, although custom derive attributes may give meaning to it. @@ -263,7 +294,6 @@ struct Foo<#[my_flexible_clone(unbounded)] H> { ``` [IDENTIFIER]: ../identifiers.md -[LIFETIME_OR_LABEL]: ../tokens.md#lifetimes-and-loop-labels [_ForLifetimes_]: ../trait-bounds.md#higher-ranked-trait-bounds [_LifetimeParam_]: #generic-parameters diff --git a/src/items/implementations.md b/src/items/implementations.md index 37965569e..5bde1ec21 100644 --- a/src/items/implementations.md +++ b/src/items/implementations.md @@ -1,5 +1,8 @@ # Implementations +r[items.impl] + +r[items.impl.syntax] > **Syntax**\ > _Implementation_ :\ >    _InherentImpl_ | _TraitImpl_ @@ -19,11 +22,13 @@ >       [_AssociatedItem_]\*\ >    `}` +r[items.impl.intro] An _implementation_ is an item that associates items with an _implementing type_. Implementations are defined with the keyword `impl` and contain functions that belong to an instance of the type that is being implemented or to the type statically. +r[items.impl.kinds] There are two types of implementations: - inherent implementations @@ -31,22 +36,33 @@ There are two types of implementations: ## Inherent Implementations +r[items.impl.inherent] + +r[items.impl.inherent.intro] An inherent implementation is defined as the sequence of the `impl` keyword, generic type declarations, a path to a nominal type, a where clause, and a bracketed set of associable items. +r[items.impl.inherent.implementing-type] The nominal type is called the _implementing type_ and the associable items are the _associated items_ to the implementing type. +r[items.impl.inherent.associated-items] Inherent implementations associate the contained items to the -implementing type. Inherent implementations can contain [associated -functions] (including [methods]) and [associated constants]. They cannot -contain associated type aliases. +implementing type. + +r[items.impl.inherent.associated-items.allowed-items] +Inherent implementations can contain [associated functions] (including [methods]) and [associated constants]. +r[items.impl.inherent.type-alias] +They cannot contain associated type aliases. + +r[items.impl.inherent.associated-item-path] The [path] to an associated item is any path to the implementing type, followed by the associated item's identifier as the final path component. +r[items.impl.inherent.coherence] A type can also have multiple inherent implementations. An implementing type must be defined within the same crate as the original type definition. @@ -86,23 +102,30 @@ fn main() { ## Trait Implementations +r[items.impl.trait] + +r[items.impl.trait.intro] A _trait implementation_ is defined like an inherent implementation except that the optional generic type declarations are followed by a [trait], followed by the keyword `for`, followed by a path to a nominal type. +r[items.impl.trait.implemented-trait] The trait is known as the _implemented trait_. The implementing type implements the implemented trait. +r[items.impl.trait.def-requirement] A trait implementation must define all non-default associated items declared by the implemented trait, may redefine default associated items defined by the implemented trait, and cannot define any other items. +r[items.impl.trait.associated-item-path] The path to the associated items is `<` followed by a path to the implementing type followed by `as` followed by a path to the trait followed by `>` as a path component followed by the associated item's path component. +r[items.impl.trait.safety] [Unsafe traits] require the trait implementation to begin with the `unsafe` keyword. @@ -140,9 +163,13 @@ impl Shape for Circle { ### Trait Implementation Coherence +r[items.impl.trait.coherence] + +r[items.impl.trait.coherence.intro] A trait implementation is considered incoherent if either the orphan rules check fails or there are overlapping implementation instances. +r[items.impl.trait.coherence.overlapping] Two trait implementations overlap when there is a non-empty intersection of the traits the implementation is for, the implementations can be instantiated with the same type. #### Orphan rules +r[items.impl.trait.orphan-rule] + +r[items.impl.trait.orphan-rule.general] Given `impl Trait for T0`, an `impl` is valid only if at least one of the following is true: @@ -160,14 +190,19 @@ least one of the following is true: - No [uncovered type] parameters `P1..=Pn` may appear in `T0..Ti` (excluding `Ti`) +r[items.impl.trait.uncovered-param] Only the appearance of *uncovered* type parameters is restricted. + +r[items.impl.trait.fundamental] Note that for the purposes of coherence, [fundamental types] are special. The `T` in `Box` is not considered covered, and `Box` is considered local. - ## Generic Implementations +r[items.impl.generics] + +r[items.impl.generics.intro] An implementation can take [generic parameters], which can be used in the rest of the implementation. Implementation parameters are written directly after the `impl` keyword. @@ -182,6 +217,7 @@ impl Seq for u32 { } ``` +r[items.impl.generics.usage] Generic parameters *constrain* an implementation if the parameter appears at least once in one of: @@ -190,6 +226,7 @@ least once in one of: * As an [associated type] in the [bounds] of a type that contains another parameter that constrains the implementation +r[items.impl.generics.constrain] Type and const parameters must always constrain the implementation. Lifetimes must constrain the implementation if the lifetime is used in an associated type. @@ -263,6 +300,8 @@ impl<'a> HasAssocType for Struct { ## Attributes on Implementations +r[items.impl.attributes] + Implementations may contain outer [attributes] before the `impl` keyword and inner [attributes] inside the brackets that contain the associated items. Inner attributes must come before any associated items. The attributes that have diff --git a/src/items/modules.md b/src/items/modules.md index e709f52fd..67322f845 100644 --- a/src/items/modules.md +++ b/src/items/modules.md @@ -1,5 +1,8 @@ # Modules +r[items.mod] + +r[items.mod.syntax] > **Syntax:**\ > _Module_ :\ >       `unsafe`? `mod` [IDENTIFIER] `;`\ @@ -8,11 +11,16 @@ >         [_Item_]\*\ >       `}` +r[items.mod.intro] A module is a container for zero or more [items]. +r[items.mod.def] A _module item_ is a module, surrounded in braces, named, and prefixed with the keyword `mod`. A module item introduces a new, named module into the tree of -modules making up a crate. Modules can nest arbitrarily. +modules making up a crate. + +r[items.mod.nesting] +Modules can nest arbitrarily. An example of a module: @@ -34,10 +42,14 @@ mod math { } ``` +r[items.mod.namespace] Modules are defined in the [type namespace] of the module or block where they are located. + +r[items.mod.multiple-items] It is an error to define multiple items with the same name in the same namespace within a module. See the [scopes chapter] for more details on restrictions and shadowing behavior. +r[items.mod.unsafe] The `unsafe` keyword is syntactically allowed to appear before the `mod` keyword, but it is rejected at a semantic level. This allows macros to consume the syntax and make use of the `unsafe` keyword, before removing it from the @@ -45,9 +57,15 @@ token stream. ## Module Source Filenames +r[items.mod.outlined] + +r[items.mod.outlined.intro] A module without a body is loaded from an external file. When the module does not have a `path` attribute, the path to the file mirrors the logical [module -path]. Ancestor module path components are directories, and the module's +path]. + +r[items.mod.outlined.search] +Ancestor module path components are directories, and the module's contents are in a file with the name of the module plus the `.rs` extension. For example, the following module structure can have this corresponding filesystem structure: @@ -58,6 +76,7 @@ Module Path | Filesystem Path | File Contents `crate::util` | `util.rs` | `mod config;` `crate::util::config` | `util/config.rs` | +r[items.mod.outlined.search-mod] Module filenames may also be the name of the module as a directory with the contents in a file named `mod.rs` within that directory. The above example can alternately be expressed with `crate::util`'s contents in a file named @@ -70,9 +89,13 @@ alternately be expressed with `crate::util`'s contents in a file named ### The `path` attribute +r[items.mod.outlined.path] + +r[items.mod.outlined.path.intro] The directories and files used for loading external file modules can be influenced with the `path` attribute. +r[items.mod.outlined.path.search] For `path` attributes on modules not inside inline module blocks, the file path is relative to the directory the source file is located. For example, the following code snippet would use the paths shown based on where it is located: @@ -88,6 +111,7 @@ Source File | `c`'s File Location | `c`'s Module Path `src/a/b.rs` | `src/a/foo.rs` | `crate::a::b::c` `src/a/mod.rs` | `src/a/foo.rs` | `crate::a::c` +r[items.mod.outlined.path.search-nested] For `path` attributes inside inline module blocks, the relative location of the file path depends on the kind of source file the `path` attribute is located in. "mod-rs" source files are root modules (such as `lib.rs` or @@ -128,10 +152,14 @@ mod thread { ## Attributes on Modules +r[items.mod.attributes] + +r[items.mod.attributes.intro] Modules, like all items, accept outer attributes. They also accept inner attributes: either after `{` for a module with a body, or at the beginning of the source file, after the optional BOM and shebang. +r[items.mod.attributes.supported] The built-in attributes that have meaning on a module are [`cfg`], [`deprecated`], [`doc`], [the lint check attributes], [`path`], and [`no_implicit_prelude`]. Modules also accept macro attributes. diff --git a/src/items/static-items.md b/src/items/static-items.md index f688a9024..d9e61e648 100644 --- a/src/items/static-items.md +++ b/src/items/static-items.md @@ -1,5 +1,8 @@ # Static items +r[items.static] + +r[items.static.syntax] > **Syntax**\ > _StaticItem_ :\ >    [_ItemSafety_]?[^extern-safety] `static` `mut`? [IDENTIFIER] `:` [_Type_] @@ -8,33 +11,45 @@ > [^extern-safety]: The `safe` and `unsafe` function qualifiers are only > allowed semantically within `extern` blocks. +r[items.static.intro] A *static item* is similar to a [constant], except that it represents a precise memory location in the program. All references to the static refer to the same -memory location. Static items have the `static` lifetime, which outlives all +memory location. + +r[items.static.lifetime] +Static items have the `static` lifetime, which outlives all other lifetimes in a Rust program. Static items do not call [`drop`] at the end of the program. +r[items.static.namespace] The static declaration defines a static value in the [value namespace] of the module or block where it is located. +r[items.static.init] The static initializer is a [constant expression] evaluated at compile time. Static initializers may refer to other statics. +r[items.static.read-only] Non-`mut` static items that contain a type that is not [interior mutable] may be placed in read-only memory. +r[items.static.safety] All access to a static is safe, but there are a number of restrictions on statics: -* The type must have the `Sync` trait bound to allow thread-safe access. -* Constants cannot refer to statics. +r[items.static.sync] +* The type must have the [`Sync`](std::marker::Sync) trait bound to allow thread-safe access. +r[items.static.init.omission] The initializer expression must be omitted in an [external block], and must be provided for free static items. +r[items.static.safety-qualifiers] The `safe` and `unsafe` qualifiers are semantically only allowed when used in an [external block]. ## Statics & generics +r[items.static.generics] + A static item defined in a generic scope (for example in a blanket or default implementation) will result in exactly one static item being defined, as if the static definition was pulled out of the current scope into the module. @@ -83,14 +98,21 @@ blanket_impl: counter was 1 ## Mutable statics +r[items.static.mut] + +r[items.static.mut.intro] If a static item is declared with the `mut` keyword, then it is allowed to be modified by the program. One of Rust's goals is to make concurrency bugs hard to run into, and this is obviously a very large source of race conditions or -other bugs. For this reason, an `unsafe` block is required when either reading +other bugs + +r[items.static.mut.safety] +For this reason, an `unsafe` block is required when either reading or writing a mutable static variable. Care should be taken to ensure that modifications to a mutable static are safe with respect to other threads running in the same process. +r[items.static.mut.extern] Mutable statics are still very useful, however. They can be used with C libraries and can also be bound from C libraries in an `extern` block. @@ -122,16 +144,19 @@ fn bump_levels_safe() -> u32 { } ``` +r[items.static.mut.sync] Mutable statics have the same restrictions as normal statics, except that the type does not have to implement the `Sync` trait. ## Using Statics or Consts +r[items.static.alternate] + It can be confusing whether or not you should use a constant item or a static item. Constants should, in general, be preferred over statics unless one of the following are true: -* Large amounts of data are being stored +* Large amounts of data are being stored. * The single-address property of statics is required. * Interior mutability is required. diff --git a/src/items/structs.md b/src/items/structs.md index e59d7e9c0..8a6c9eb22 100644 --- a/src/items/structs.md +++ b/src/items/structs.md @@ -1,5 +1,8 @@ # Structs +r[items.struct] + +r[items.struct.syntax] > **Syntax**\ > _Struct_ :\ >       _StructStruct_\ @@ -36,7 +39,10 @@ >    [_Visibility_]?\ >    [_Type_] +r[items.struct.intro] A _struct_ is a nominal [struct type] defined with the keyword `struct`. + +r[items.struct.namespace] A struct declaration defines the given name in the [type namespace] of the module or block where it is located. An example of a `struct` item and its use: @@ -47,6 +53,7 @@ let p = Point {x: 10, y: 11}; let px: i32 = p.x; ``` +r[items.struct.tuple] A _tuple struct_ is a nominal [tuple type], and is also defined with the keyword `struct`. In addition to defining a type, it also defines a constructor of the same name in the [value namespace]. The constructor is a function which can be called to create a new instance of the struct. @@ -58,6 +65,7 @@ let p = Point(10, 11); let px: i32 = match p { Point(x, _) => x }; ``` +r[items.struct.unit] A _unit-like struct_ is a struct without any fields, defined by leaving off the list of fields entirely. Such a struct implicitly defines a [constant] of its type with the same name. For example: @@ -75,6 +83,7 @@ const Cookie: Cookie = Cookie {}; let c = [Cookie, Cookie {}, Cookie, Cookie {}]; ``` +r[items.struct.layout] The precise memory layout of a struct is not specified. One can specify a particular layout using the [`repr` attribute]. diff --git a/src/items/traits.md b/src/items/traits.md index cb6c6f902..dd315e440 100644 --- a/src/items/traits.md +++ b/src/items/traits.md @@ -1,5 +1,8 @@ # Traits +r[items.traits] + +r[items.traits.syntax] > **Syntax**\ > _Trait_ :\ >    `unsafe`? `trait` [IDENTIFIER]  @@ -10,6 +13,7 @@ >      [_AssociatedItem_]\*\ >    `}` +r[items.traits.intro] A _trait_ describes an abstract interface that types can implement. This interface consists of [associated items], which come in three varieties: @@ -17,16 +21,22 @@ interface consists of [associated items], which come in three varieties: - [types](associated-items.md#associated-types) - [constants](associated-items.md#associated-constants) +r[items.traits.namespace] The trait declaration defines a trait in the [type namespace] of the module or block where it is located. + +r[items.traits.associated-item-namespaces] Associated items are defined as members of the trait within their respective namespaces. Associated types are defined in the type namespace. Associated constants and associated functions are defined in the value namespace. +r[items.traits.self-param] All traits define an implicit type parameter `Self` that refers to "the type that is implementing this interface". Traits may also contain additional type parameters. These type parameters, including `Self`, may be constrained by other traits and so forth [as usual][generics]. +r[items.traits.impls] Traits are implemented for specific types through separate [implementations]. +r[items.traits.associated-item-decls] Trait functions may omit the function body by replacing it with a semicolon. This indicates that the implementation must define the function. If the trait function defines a body, this definition acts as a default for any @@ -46,13 +56,18 @@ trait Example { } ``` +r[items.traits.const-fn] Trait functions are not allowed to be [`const`]. ## Trait bounds +r[items.traits.bounds] + Generic items may use traits as [bounds] on their type parameters. -## Generic Traits +## Generic traits + +r[items.traits.generic] Type parameters can be specified for a trait to make it generic. These appear after the trait name, using the same syntax used in [generic functions]. @@ -65,15 +80,28 @@ trait Seq { } ``` -## Object Safety + +## Dyn compatibility + +r[items.traits.dyn-compatible] -Object safe traits can be the base trait of a [trait object]. A trait is -*object safe* if it has the following qualities (defined in [RFC 255]): +r[items.traits.dyn-compatible.intro] +A dyn-compatible trait can be the base trait of a [trait object]. A trait is +*dyn compatible* if it has the following qualities: -* All [supertraits] must also be object safe. +r[items.traits.dyn-compatible.supertraits] +* All [supertraits] must also be dyn compatible. + +r[items.traits.dyn-compatible.sized] * `Sized` must not be a [supertrait][supertraits]. In other words, it must not require `Self: Sized`. + +r[items.traits.dyn-compatible.associated-consts] * It must not have any associated constants. + +r[items.traits.dyn-compatible.associated-types] * It must not have any associated types with generics. + +r[items.traits.dyn-compatible.associated-functions] * All associated functions must either be dispatchable from a trait object or be explicitly non-dispatchable: * Dispatchable functions must: * Not have any type parameters (although lifetime parameters are allowed). @@ -92,11 +120,13 @@ Object safe traits can be the base trait of a [trait object]. A trait is * Explicitly non-dispatchable functions require: * Have a `where Self: Sized` bound (receiver type of `Self` (i.e. `self`) implies this). +> **Note**: This concept was formerly known as *object safety*. + ```rust # use std::rc::Rc; # use std::sync::Arc; # use std::pin::Pin; -// Examples of object safe methods. +// Examples of dyn compatible methods. trait TraitMethods { fn by_ref(self: &Self) {} fn by_ref_mut(self: &mut Self) {} @@ -113,7 +143,7 @@ trait TraitMethods { ``` ```rust,compile_fail -// This trait is object-safe, but these methods cannot be dispatched on a trait object. +// This trait is dyn compatible, but these methods cannot be dispatched on a trait object. trait NonDispatchable { // Non-methods cannot be dispatched. fn foo() where Self: Sized {} @@ -137,8 +167,8 @@ obj.typed(1); // ERROR: cannot call with generic type ```rust,compile_fail # use std::rc::Rc; -// Examples of non-object safe traits. -trait NotObjectSafe { +// Examples of dyn-incompatible traits. +trait DynIncompatible { const CONST: i32 = 1; // ERROR: cannot have associated const fn foo() {} // ERROR: associated function without Sized @@ -148,14 +178,14 @@ trait NotObjectSafe { } struct S; -impl NotObjectSafe for S { +impl DynIncompatible for S { fn returns(&self) -> Self { S } } -let obj: Box = Box::new(S); // ERROR +let obj: Box = Box::new(S); // ERROR ``` ```rust,compile_fail -// Self: Sized traits are not object-safe. +// `Self: Sized` traits are dyn-incompatible. trait TraitWithSize where Self: Sized {} struct S; @@ -164,7 +194,7 @@ let obj: Box = Box::new(S); // ERROR ``` ```rust,compile_fail -// Not object safe if `Self` is a type argument. +// Dyn-incompatible if `Self` is a type argument. trait Super {} trait WithSelf: Super where Self: Sized {} @@ -176,14 +206,19 @@ let obj: Box = Box::new(S); // ERROR: cannot use `Self` type param ## Supertraits +r[items.traits.supertraits] + +r[items.traits.supertraits.intro] **Supertraits** are traits that are required to be implemented for a type to implement a specific trait. Furthermore, anywhere a [generic][generics] or [trait object] is bounded by a trait, it has access to the associated items of its supertraits. +r[items.traits.supertraits.decl] Supertraits are declared by trait bounds on the `Self` type of a trait and transitively the supertraits of the traits declared in those trait bounds. It is an error for a trait to be its own supertrait. +r[items.traits.supertraits.subtrait] The trait with a supertrait is called a **subtrait** of its supertrait. The following is an example of declaring `Shape` to be a supertrait of `Circle`. @@ -242,6 +277,9 @@ let nonsense = circle.radius() * circle.area(); ## Unsafe traits +r[items.traits.safety] + +r[items.traits.safety.intro] Traits items that begin with the `unsafe` keyword indicate that *implementing* the trait may be [unsafe]. It is safe to use a correctly implemented unsafe trait. The [trait implementation] must also begin with the `unsafe` keyword. @@ -250,11 +288,15 @@ The [trait implementation] must also begin with the `unsafe` keyword. ## Parameter patterns +r[items.traits.params] + +r[items.traits.params.allowed-patterns] Function or method declarations without a body only allow [IDENTIFIER] or `_` [wild card][WildcardPattern] patterns. `mut` [IDENTIFIER] is currently allowed, but it is deprecated and will become a hard error in the future. +r[items.traits.params.edition2015] In the 2015 edition, the pattern for a trait function or method parameter is optional: @@ -265,6 +307,7 @@ trait T { } ``` +r[items.traits.params.restriction] The kinds of patterns for parameters is limited to one of the following: * [IDENTIFIER] @@ -273,6 +316,7 @@ The kinds of patterns for parameters is limited to one of the following: * `&` [IDENTIFIER] * `&&` [IDENTIFIER] +r[items.traits.params.restriction.edition2018] Beginning in the 2018 edition, function or method parameter patterns are no longer optional. Also, all irrefutable patterns are allowed as long as there is a body. Without a body, the limitations listed above are still in effect. @@ -286,6 +330,9 @@ trait T { ## Item visibility +r[items.traits.associated-visibility] + +r[items.traits.associated-visibility.intro] Trait items syntactically allow a [_Visibility_] annotation, but this is rejected when the trait is validated. This allows items to be parsed with a unified syntax across different contexts where they are used. As an example, @@ -330,7 +377,6 @@ fn main() { [_WhereClause_]: generics.md#where-clauses [bounds]: ../trait-bounds.md [trait object]: ../types/trait-object.md -[RFC 255]: https://github.com/rust-lang/rfcs/blob/master/text/0255-object-safety.md [associated items]: associated-items.md [method]: associated-items.md#methods [supertraits]: #supertraits @@ -349,3 +395,17 @@ fn main() { [`async`]: functions.md#async-functions [`const`]: functions.md#const-functions [type namespace]: ../names/namespaces.md + + diff --git a/src/items/type-aliases.md b/src/items/type-aliases.md index ef3e6fcb7..d496b8c60 100644 --- a/src/items/type-aliases.md +++ b/src/items/type-aliases.md @@ -1,11 +1,15 @@ # Type aliases +r[items.type] + +r[items.type.syntax] > **Syntax**\ > _TypeAlias_ :\ >    `type` [IDENTIFIER] [_GenericParams_]? > ( `:` [_TypeParamBounds_] )? > [_WhereClause_]? ( `=` [_Type_] [_WhereClause_]?)? `;` +r[items.type.intro] A _type alias_ defines a new name for an existing [type] in the [type namespace] of the module or block where it is located. Type aliases are declared with the keyword `type`. Every value has a single, specific type, but may implement several different traits, and may be compatible with several different type constraints. @@ -18,6 +22,7 @@ type Point = (u8, u8); let p: Point = (41, 68); ``` +r[items.type.constructor-alias] A type alias to a tuple-struct or unit-struct cannot be used to qualify that type's constructor: ```rust,compile_fail @@ -30,15 +35,19 @@ let _ = UseAlias(5); // OK let _ = TypeAlias(5); // Doesn't work ``` +r[items.type.associated-type] A type alias, when not used as an [associated type], must include a [_Type_] and may not include [_TypeParamBounds_]. +r[items.type.associated-trait] A type alias, when used as an [associated type] in a [trait], must not include a [_Type_] specification but may include [_TypeParamBounds_]. +r[items.type.associated-impl] A type alias, when used as an [associated type] in a [trait impl], must include a [_Type_] specification and may not include [_TypeParamBounds_]. +r[items.type.deprecated] Where clauses before the equals sign on a type alias in a [trait impl] (like `type TypeAlias where T: Foo = Bar`) are deprecated. Where clauses after the equals sign (like `type TypeAlias = Bar where T: Foo`) are preferred. diff --git a/src/items/unions.md b/src/items/unions.md index d6a03ed39..835f924ea 100644 --- a/src/items/unions.md +++ b/src/items/unions.md @@ -1,12 +1,18 @@ # Unions +r[items.union] + +r[items.union.syntax] > **Syntax**\ > _Union_ :\ >    `union` [IDENTIFIER] [_GenericParams_]? [_WhereClause_]? > `{`[_StructFields_]? `}` +r[items.union.intro] A union declaration uses the same syntax as a struct declaration, except with `union` in place of `struct`. + +r[items.union.namespace] A union declaration defines the given name in the [type namespace] of the module or block where it is located. ```rust @@ -17,24 +23,39 @@ union MyUnion { } ``` +r[items.union.common-storage] The key property of unions is that all fields of a union share common storage. As a result, writes to one field of a union can overwrite its other fields, and size of a union is determined by the size of its largest field. +r[items.union.field-restrictions] Union field types are restricted to the following subset of types: + +r[items.union.field-copy] - `Copy` types + +r[items.union.field-references] - References (`&T` and `&mut T` for arbitrary `T`) + +r[items.union.field-manually-drop] - `ManuallyDrop` (for arbitrary `T`) + +r[items.union.field-tuple] - Tuples and arrays containing only allowed union field types +r[items.union.drop] This restriction ensures, in particular, that union fields never need to be dropped. Like for structs and enums, it is possible to `impl Drop` for a union to manually define what happens when it gets dropped. +r[items.union.fieldless] Unions without any fields are not accepted by the compiler, but can be accepted by macros. ## Initialization of a union +r[items.union.init] + +r[items.union.init.intro] A value of a union type can be created using the same syntax that is used for struct types, except that it must specify exactly one field: @@ -44,6 +65,7 @@ struct types, except that it must specify exactly one field: let u = MyUnion { f1: 1 }; ``` +r[items.union.init.result] The expression above creates a value of type `MyUnion` and initializes the storage using field `f1`. The union can be accessed using the same syntax as struct fields: @@ -57,18 +79,28 @@ let f = unsafe { u.f1 }; ## Reading and writing union fields +r[items.union.fields] + +r[items.union.fields.intro] Unions have no notion of an "active field". Instead, every union access just -interprets the storage as the type of the field used for the access. Reading a -union field reads the bits of the union at the field's type. Fields might have a -non-zero offset (except when [the C representation] is used); in that case the -bits starting at the offset of the fields are read. It is the programmer's -responsibility to make sure that the data is valid at the field's type. Failing +interprets the storage as the type of the field used for the access. + +r[items.union.fields.read] +Reading a union field reads the bits of the union at the field's type. + +r[items.union.fields.offset] +Fields might have a non-zero offset (except when [the C representation] is used); in that case the +bits starting at the offset of the fields are read + +r[items.union.fields.validity] +It is the programmer's responsibility to make sure that the data is valid at the field's type. Failing to do so results in [undefined behavior]. For example, reading the value `3` from a field of the [boolean type] is undefined behavior. Effectively, writing to and then reading from a union with [the C representation] is analogous to a [`transmute`] from the type used for writing to the type used for reading. +r[items.union.fields.read-safety] Consequently, all reads of union fields have to be placed in `unsafe` blocks: ```rust @@ -83,6 +115,7 @@ unsafe { Commonly, code using unions will provide safe wrappers around unsafe union field accesses. +r[items.union.fields.write-safety] In contrast, writes to union fields are safe, since they just overwrite arbitrary data, but cannot cause undefined behavior. (Note that union field types can never have drop glue, so a union field write will never implicitly @@ -90,10 +123,17 @@ drop anything.) ## Pattern matching on unions -Another way to access union fields is to use pattern matching. Pattern matching -on union fields uses the same syntax as struct patterns, except that the pattern -must specify exactly one field. Since pattern matching is like reading the union -with a particular field, it has to be placed in `unsafe` blocks as well. +r[items.union.pattern] + +r[items.union.pattern.intro] +Another way to access union fields is to use pattern matching. + +r[items.union.pattern.one-field] +Pattern matching on union fields uses the same syntax as struct patterns, except that the pattern +must specify exactly one field. + +r[items.union.pattern.safety] +Since pattern matching is like reading the union with a particular field, it has to be placed in `unsafe` blocks as well. ```rust # union MyUnion { f1: u32, f2: f32 } @@ -108,6 +148,7 @@ fn f(u: MyUnion) { } ``` +r[items.union.pattern.subpattern] Pattern matching may match a union as a field of a larger structure. In particular, when using a Rust union to implement a C tagged union via FFI, this allows matching on the tag and the corresponding field simultaneously: @@ -141,9 +182,14 @@ fn is_zero(v: Value) -> bool { ## References to union fields +r[items.union.ref] + +r[items.union.ref.intro] Since union fields share common storage, gaining write access to one field of a -union can give write access to all its remaining fields. Borrow checking rules -have to be adjusted to account for this fact. As a result, if one field of a +union can give write access to all its remaining fields. + +r[items.union.ref.borrow] +Borrow checking rules have to be adjusted to account for this fact. As a result, if one field of a union is borrowed, all its remaining fields are borrowed as well for the same lifetime. @@ -164,6 +210,7 @@ fn test() { } ``` +r[items.union.ref.usage] As you could see, in many aspects (except for layouts, safety, and ownership) unions behave exactly like structs, largely as a consequence of inheriting their syntactic shape from structs. This is also true for many unmentioned diff --git a/src/items/use-declarations.md b/src/items/use-declarations.md index 4e1096082..ca70b8a8b 100644 --- a/src/items/use-declarations.md +++ b/src/items/use-declarations.md @@ -1,5 +1,8 @@ # Use declarations +r[items.use] + +r[items.use.syntax] > **Syntax:**\ > _UseDeclaration_ :\ >    `use` _UseTree_ `;` @@ -9,6 +12,7 @@ >    | ([_SimplePath_]? `::`)? `{` (_UseTree_ ( `,` _UseTree_ )\* `,`?)? `}`\ >    | [_SimplePath_] ( `as` ( [IDENTIFIER] | `_` ) )? +r[items.use.intro] A _use declaration_ creates one or more local name bindings synonymous with some other [path]. Usually a `use` declaration is used to shorten the path required to refer to a module item. These declarations may appear in [modules] @@ -19,17 +23,27 @@ A `use` declaration is also sometimes called an _import_, or, if it is public, a [modules]: modules.md [blocks]: ../expressions/block-expr.md +r[items.use.forms] Use declarations support a number of convenient shortcuts: +r[items.use.forms.multiple] * Simultaneously binding a list of paths with a common prefix, using the brace syntax `use a::b::{c, d, e::f, g::h::i};` + +r[items.use.forms.self] * Simultaneously binding a list of paths with a common prefix and their common parent module, using the `self` keyword, such as `use a::b::{self, c, d::e};` + +r[items.use.forms.as] * Rebinding the target name as a new local name, using the syntax `use p::q::r as x;`. This can also be used with the last two features: `use a::b::{self as ab, c as abc}`. + +r[items.use.forms.glob] * Binding all paths matching a given prefix, using the asterisk wildcard syntax `use a::b::*;`. + +r[items.use.forms.nesting] * Nesting groups of the previous features multiple times, such as `use a::b::{self as ab, c, d::{*, e::f}};` @@ -58,12 +72,18 @@ fn main() { ## `use` Visibility +r[items.use.visibility] + +r[items.use.visibility.intro] Like items, `use` declarations are private to the containing module, by default. Also like items, a `use` declaration can be public, if qualified by the `pub` keyword. Such a `use` declaration serves to _re-export_ a name. A public `use` declaration can therefore _redirect_ some public name to a different target definition: even a definition with a private canonical path, -inside a different module. If a sequence of such redirections form a cycle or +inside a different module. + +r[items.use.visibility.unambiguous] +If a sequence of such redirections form a cycle or cannot be resolved unambiguously, they represent a compile-time error. An example of re-exporting: @@ -88,6 +108,9 @@ In this example, the module `quux` re-exports two public names defined in ## `use` Paths +r[items.use.path] + +r[items.use.path.intro] The [paths] that are allowed in a `use` item follow the [_SimplePath_] grammar and are similar to the paths that may be used in an expression. They may create bindings for: @@ -97,8 +120,10 @@ They may create bindings for: * [Attributes] * [Derive macros] +r[items.use.path.disallowed] They cannot import [associated items], [generic parameters], [local variables], paths with [`Self`], or [tool attributes]. More restrictions are described below. +r[items.use.path.namespace] `use` will create bindings for all [namespaces] from the imported entities, with the exception that a `self` import will only import from the type namespace (as described below). For example, the following illustrates creating bindings for the same name in two namespaces: @@ -116,6 +141,7 @@ fn example() { } ``` +r[items.use.path.edition2015] > **Edition differences**: In the 2015 edition, `use` paths are relative to the crate root. > For example: > @@ -141,6 +167,8 @@ fn example() { ## `as` renames +r[items.use.as] + The `as` keyword can be used to change the name of an imported entity. For example: @@ -155,6 +183,9 @@ mod inner { ## Brace syntax +r[items.use.multiple-syntax] + +r[items.use.multiple-syntax.intro] Braces can be used in the last segment of the path to import multiple entities from the previous segment, or, if there are no previous segments, from the current scope. Braces can be nested, creating a tree of paths, where each grouping of segments is logically combined with its parent to create a full path. @@ -166,13 +197,18 @@ Braces can be nested, creating a tree of paths, where each grouping of segments use std::collections::{BTreeSet, hash_map::{self, HashMap}}; ``` +r[items.use.multiple-syntax.empty] An empty brace does not import anything, though the leading path is validated that it is accessible. +r[items.use.multiple-syntax.edition2015] > **Edition differences**: In the 2015 edition, paths are relative to the crate root, so an import such as `use {foo, bar};` will import the names `foo` and `bar` from the crate root, whereas starting in 2018, those names are relative to the current scope. ## `self` imports +r[items.use.self] + +r[items.use.self.intro] The keyword `self` may be used within [brace syntax](#brace-syntax) to create a binding of the parent entity under its own name. ```rust @@ -191,6 +227,7 @@ mod example { # fn main() {} ``` +r[items.use.self.namespace] `self` only creates a binding from the [type namespace] of the parent entity. For example, in the following, only the `foo` mod is imported: @@ -215,6 +252,9 @@ fn main() { ## Glob imports +r[items.use.glob] + +r[items.use.glob.intro] The `*` character may be used as the last segment of a `use` path to import all importable entities from the entity of the preceding segment. For example: @@ -237,6 +277,7 @@ mod foo { } ``` +r[items.use.glob.shadowing] Items and named imports are allowed to shadow names from glob imports in the same [namespace]. That is, if there is a name already defined by another item in the same namespace, the glob import will be shadowed. For example: @@ -268,20 +309,28 @@ mod clashing { } ``` +r[items.use.glob.last-segment-only] `*` cannot be used as the first or intermediate segments. + +r[items.use.glob.self-import] `*` cannot be used to import a module's contents into itself (such as `use self::*;`). +r[items.use.glob.edition2015] > **Edition differences**: In the 2015 edition, paths are relative to the crate root, so an import such as `use *;` is valid, and it means to import everything from the crate root. > This cannot be used in the crate root itself. ## Underscore Imports +r[items.use.as-underscore] + +r[items.use.as-underscore.intro] Items can be imported without binding to a name by using an underscore with the form `use path as _`. This is particularly useful to import a trait so that its methods may be used without importing the trait's symbol, for example if the trait's symbol may conflict with another symbol. Another example is to link an external crate without importing its name. +r[items.use.as-underscore.glob] Asterisk glob imports will import items imported with `_` in their unnameable form. @@ -303,6 +352,7 @@ fn main() { } ``` +r[items.use.as-underscore.macro] The unique, unnameable symbols are created after macro expansion so that macros may safely emit multiple references to `_` imports. For example, the following should not produce an error: @@ -320,12 +370,23 @@ m!(use std as _;); ## Restrictions +r[items.use.restrictions] + The following are restrictions for valid `use` declarations: +r[items.use.restrictions.crate] * `use crate;` must use `as` to define the name to which to bind the crate root. + +r[items.use.restrictions.self] * `use {self};` is an error; there must be a leading segment when using `self`. + +r[items.use.restrictions.duplicate-name] * As with any item definition, `use` imports cannot create duplicate bindings of the same name in the same namespace in a module or block. + +r[items.use.restrictions.macro-crate] * `use` paths with `$crate` are not allowed in a [`macro_rules`] expansion. + +r[items.use.restrictions.variant] * `use` paths cannot refer to enum variants through a [type alias]. For example: ```rust,compile_fail enum MyEnum { @@ -339,10 +400,14 @@ The following are restrictions for valid `use` declarations: ## Ambiguities +r[items.use.ambiguities] + > **Note**: This section is incomplete. +r[items.use.ambiguities.intro] Some situations are an error when there is an ambiguity as to which name a `use` declaration refers. This happens when there are two name candidates that do not resolve to the same entity. +r[items.use.ambiguities.glob] Glob imports are allowed to import conflicting names in the same namespace as long as the name is not used. For example: diff --git a/src/keywords.md b/src/keywords.md index e5bb2e76a..a8ec58a1c 100644 --- a/src/keywords.md +++ b/src/keywords.md @@ -102,6 +102,11 @@ The following keywords are reserved beginning in the 2018 edition. > **Lexer 2018+**\ > KW_TRY : `try` +The following keywords are reserved beginning in the 2024 edition. + +> **Lexer 2024+**\ +> KW_GEN : `gen` + ## Weak keywords r[lex.keywords.weak] @@ -110,6 +115,17 @@ r[lex.keywords.weak.intro] These keywords have special meaning only in certain contexts. For example, it is possible to declare a variable or method with the name `union`. +r[lex.keywords.weak.list] +> **Lexer**\ +> KW_MACRO_RULES : `macro_rules`\ +> KW_UNION : `union`\ +> KW_STATICLIFETIME : `'static`\ +> KW_SAFE : `safe`\ +> KW_RAW : `raw` +> +> **Lexer 2015**\ +> KW_DYN : `dyn` + r[lex.keywords.weak.macro_rules] * `macro_rules` is used to create custom [macros]. @@ -133,18 +149,12 @@ r[lex.keywords.weak.dyn] Beginning in the 2018 edition, `dyn` has been promoted to a strict keyword. -r[lex.keywords.weak.list] -> **Lexer**\ -> KW_MACRO_RULES : `macro_rules`\ -> KW_UNION : `union`\ -> KW_STATICLIFETIME : `'static` -> -> **Lexer 2015**\ -> KW_DYN : `dyn` - r[lex.keywords.weak.safe] * `safe` is used for functions and statics, which has meaning in [external blocks]. +r[lex.keywords.weak.raw] +* `raw` is used for [raw borrow operators], and is only a keyword when matching a raw borrow operator form (such as `&raw const expr` or `&raw mut expr`). + [items]: items.md [Variables]: variables.md [Type parameters]: types/parameters.md @@ -159,3 +169,4 @@ r[lex.keywords.weak.safe] [loop label]: expressions/loop-expr.md#loop-labels [generic lifetime parameter]: items/generics.md [external blocks]: items/external-blocks.md +[raw borrow operators]: expressions/operator-expr.md#raw-borrow-operators diff --git a/src/lexical-structure.md b/src/lexical-structure.md index 5e1388e0d..d70e97ac3 100644 --- a/src/lexical-structure.md +++ b/src/lexical-structure.md @@ -1 +1,3 @@ # Lexical structure + + diff --git a/src/lifetime-elision.md b/src/lifetime-elision.md index 77a01061c..6a229bf10 100644 --- a/src/lifetime-elision.md +++ b/src/lifetime-elision.md @@ -63,6 +63,9 @@ fn get_mut2<'a>(&'a mut self) -> &'a mut dyn T; // expanded fn args1(&mut self, args: &[T]) -> &mut Command; // elided fn args2<'a, 'b, T: ToCStr>(&'a mut self, args: &'b [T]) -> &'a mut Command; // expanded +fn other_args1<'a>(arg: &str) -> &'a str; // elided +fn other_args2<'a, 'b>(arg: &'b str) -> &'a str; // expanded + fn new1(buf: &mut [u8]) -> Thing<'_>; // elided - preferred fn new2(buf: &mut [u8]) -> Thing; // elided fn new3<'a>(buf: &'a mut [u8]) -> Thing<'a>; // expanded diff --git a/src/linkage.md b/src/linkage.md index 48a1c8e58..ff41a140f 100644 --- a/src/linkage.md +++ b/src/linkage.md @@ -1,8 +1,11 @@ # Linkage +r[link] + > Note: This section is described more in terms of the compiler than of > the language. +r[link.intro] The compiler supports various methods to link crates together both statically and dynamically. This section will explore the various methods to link crates together, and more information about native libraries can be @@ -10,17 +13,20 @@ found in the [FFI section of the book][ffi]. [ffi]: ../book/ch19-01-unsafe-rust.html#using-extern-functions-to-call-external-code +r[link.type] In one session of compilation, the compiler can generate multiple artifacts through the usage of either command line flags or the `crate_type` attribute. If one or more command line flags are specified, all `crate_type` attributes will be ignored in favor of only building the artifacts specified by command line. +r[link.bin] * `--crate-type=bin`, `#![crate_type = "bin"]` - A runnable executable will be produced. This requires that there is a `main` function in the crate which will be run when the program begins executing. This will link in all Rust and native dependencies, producing a single distributable binary. This is the default crate type. +r[link.lib] * `--crate-type=lib`, `#![crate_type = "lib"]` - A Rust library will be produced. This is an ambiguous concept as to what exactly is produced because a library can manifest itself in several forms. The purpose of this generic `lib` option @@ -30,6 +36,7 @@ be ignored in favor of only building the artifacts specified by command line. libraries, and the `lib` type can be seen as an alias for one of them (but the actual one is compiler-defined). +r[link.dylib] * `--crate-type=dylib`, `#![crate_type = "dylib"]` - A dynamic Rust library will be produced. This is different from the `lib` output type in that this forces dynamic library generation. The resulting dynamic library can be used as a @@ -37,6 +44,7 @@ be ignored in favor of only building the artifacts specified by command line. create `*.so` files on Linux, `*.dylib` files on macOS, and `*.dll` files on Windows. +r[link.staticlib] * `--crate-type=staticlib`, `#![crate_type = "staticlib"]` - A static system library will be produced. This is different from other library outputs in that the compiler will never attempt to link to `staticlib` outputs. The @@ -62,12 +70,14 @@ be ignored in favor of only building the artifacts specified by command line. dependencies that is not actually used (e.g. `--gc-sections` or `-dead_strip` for macOS). +r[link.cdylib] * `--crate-type=cdylib`, `#![crate_type = "cdylib"]` - A dynamic system library will be produced. This is used when compiling a dynamic library to be loaded from another language. This output type will create `*.so` files on Linux, `*.dylib` files on macOS, and `*.dll` files on Windows. +r[link.rlib] * `--crate-type=rlib`, `#![crate_type = "rlib"]` - A "Rust library" file will be produced. This is used as an intermediate artifact and can be thought of as a "static Rust library". These `rlib` files, unlike `staticlib` files, are @@ -76,6 +86,7 @@ be ignored in favor of only building the artifacts specified by command line. in dynamic libraries. This form of output is used to produce statically linked executables as well as `staticlib` outputs. +r[link.proc-macro] * `--crate-type=proc-macro`, `#![crate_type = "proc-macro"]` - The output produced is not specified, but if a `-L` path is provided to it then the compiler will recognize the output artifacts as a macro and it can be loaded @@ -87,6 +98,7 @@ be ignored in favor of only building the artifacts specified by command line. `x86_64-unknown-linux-gnu` even if the crate is a dependency of another crate being built for a different target. +r[link.repetition] Note that these outputs are stackable in the sense that if multiple are specified, then the compiler will produce each form of output without having to recompile. However, this only applies for outputs specified by the @@ -94,6 +106,7 @@ same method. If only `crate_type` attributes are specified, then they will all be built, but if one or more `--crate-type` command line flags are specified, then only those outputs will be built. +r[link.dependency] With all these different kinds of outputs, if crate A depends on crate B, then the compiler could find B in various different forms throughout the system. The only forms looked for by the compiler, however, are the `rlib` format and the @@ -102,6 +115,7 @@ compiler must at some point make a choice between these two formats. With this in mind, the compiler follows these rules when determining what format of dependencies will be used: +r[link.dependency-staticlib] 1. If a static library is being produced, all upstream dependencies are required to be available in `rlib` formats. This requirement stems from the reason that a dynamic library cannot be converted into a static format. @@ -110,6 +124,8 @@ dependencies will be used: library, and in this case warnings will be printed about all unlinked native dynamic dependencies. +r[link.dependency-rlib] + 2. If an `rlib` file is being produced, then there are no restrictions on what format the upstream dependencies are available in. It is simply required that all upstream dependencies be available for reading metadata from. @@ -118,11 +134,15 @@ dependencies will be used: dependencies. It wouldn't be very efficient for all `rlib` files to contain a copy of `libstd.rlib`! +r[link.dependency-prefer-dynamic] + 3. If an executable is being produced and the `-C prefer-dynamic` flag is not specified, then dependencies are first attempted to be found in the `rlib` format. If some dependencies are not available in an rlib format, then dynamic linking is attempted (see below). +r[link.dependency-dynamic] + 4. If a dynamic library or an executable that is being dynamically linked is being produced, then the compiler will attempt to reconcile the available dependencies in either the rlib or dylib format to create a final product. @@ -148,6 +168,9 @@ fine-grained control is desired over the output format of a crate. ## Static and dynamic C runtimes +r[link.crt] + +r[link.crt.intro] The standard library in general strives to support both statically linked and dynamically linked C runtimes for targets as appropriate. For example the `x86_64-pc-windows-msvc` and `x86_64-unknown-linux-musl` targets typically come @@ -162,6 +185,7 @@ default such as: * `i686-unknown-linux-musl` * `x86_64-unknown-linux-musl` +r[link.crt.crt-static] The linkage of the C runtime is configured to respect the `crt-static` target feature. These target features are typically configured from the command line via flags to the compiler itself. For example to enable a static runtime you @@ -177,10 +201,12 @@ whereas to link dynamically to the C runtime you would execute: rustc -C target-feature=-crt-static foo.rs ``` +r[link.crt.ineffective] Targets which do not support switching between linkage of the C runtime will ignore this flag. It's recommended to inspect the resulting binary to ensure that it's linked as you would expect after the compiler succeeds. +r[link.crt.target_feature] Crates may also learn about how the C runtime is being linked. Code on MSVC, for example, needs to be compiled differently (e.g. with `/MT` or `/MD`) depending on the runtime being linked. This is exported currently through the @@ -226,6 +252,23 @@ a statically linked binary on MSVC you would execute: RUSTFLAGS='-C target-feature=+crt-static' cargo build --target x86_64-pc-windows-msvc ``` +## Mixed Rust and foreign codebases + +If you are mixing Rust with foreign code (e.g. C, C++) and wish to make a single +binary containing both types of code, you have two approaches for the final +binary link: + +* Use `rustc`. Pass any non-Rust libraries using `-L ` and `-l` + rustc arguments, and/or `#[link]` directives in your Rust code. If you need to + link against `.o` files you can use `-Clink-arg=file.o`. +* Use your foreign linker. In this case, you first need to generate a Rust `staticlib` + target and pass that into your foreign linker invocation. If you need to link + multiple Rust subsystems, you will need to generate a _single_ `staticlib` + perhaps using lots of `extern crate` statements to include multiple Rust `rlib`s. + Multiple Rust `staticlib` files are likely to conflict. + +Passing `rlib`s directly into your foreign linker is currently unsupported. + [`cfg` attribute `target_feature` option]: conditional-compilation.md#target_feature [configuration option]: conditional-compilation.md [procedural macros]: procedural-macros.md diff --git a/src/macro-ambiguity.md b/src/macro-ambiguity.md index 01ae288a3..159b86799 100644 --- a/src/macro-ambiguity.md +++ b/src/macro-ambiguity.md @@ -1,12 +1,17 @@ # Appendix: Macro Follow-Set Ambiguity Formal Specification +r[macro.ambiguity] + This page documents the formal specification of the follow rules for [Macros By Example]. They were originally specified in [RFC 550], from which the bulk of this text is copied, and expanded upon in subsequent RFCs. ## Definitions & Conventions - - `macro`: anything invokable as `foo!(...)` in source code. +r[macro.ambiguity.convention] + +r[macro.ambiguity.convention.defs] + - `macro`: anything invocable as `foo!(...)` in source code. - `MBE`: macro-by-example, a macro defined by `macro_rules`. - `matcher`: the left-hand-side of a rule in a `macro_rules` invocation, or a subportion thereof. @@ -46,11 +51,13 @@ macro_rules! i_am_an_mbe { } ``` +r[macro.ambiguity.convention.matcher] `(start $foo:expr $($i:ident),* end)` is a matcher. The whole matcher is a delimited sequence (with open- and close-delimiters `(` and `)`), and `$foo` and `$i` are simple NT's with `expr` and `ident` as their respective fragment specifiers. +r[macro.ambiguity.convention.complex-nt] `$(i:ident),*` is *also* an NT; it is a complex NT that matches a comma-separated repetition of identifiers. The `,` is the separator token for the complex NT; it occurs in between each pair of elements (if any) of the @@ -65,16 +72,19 @@ token. proper nesting of token tree structure and correct matching of open- and close-delimiters.) +r[macro.ambiguity.convention.vars] We will tend to use the variable "M" to stand for a matcher, variables "t" and "u" for arbitrary individual tokens, and the variables "tt" and "uu" for arbitrary token trees. (The use of "tt" does present potential ambiguity with its additional role as a fragment specifier; but it will be clear from context which interpretation is meant.) +r[macro.ambiguity.convention.set] "SEP" will range over separator tokens, "OP" over the repetition operators `*`, `+`, and `?`, "OPEN"/"CLOSE" over matching token pairs surrounding a delimited sequence (e.g. `[` and `]`). +r[macro.ambiguity.convention.sequence-vars] Greek letters "α" "β" "γ" "δ" stand for potentially empty token-tree sequences. (However, the Greek letter "ε" (epsilon) has a special role in the presentation and does not stand for a token-tree sequence.) @@ -101,6 +111,9 @@ purposes of the formalism, we will treat `$v:vis` as actually being ### The Matcher Invariants +r[macro.ambiguity.invariant] + +r[macro.ambiguity.invariant.list] To be valid, a matcher must meet the following three invariants. The definitions of FIRST and FOLLOW are described later. @@ -112,18 +125,21 @@ of FIRST and FOLLOW are described later. 1. For an unseparated complex NT in a matcher, `M = ... $(tt ...) OP ...`, if OP = `*` or `+`, we must have FOLLOW(`tt ...`) ⊇ FIRST(`tt ...`). +r[macro.ambiguity.invariant.follow-matcher] The first invariant says that whatever actual token that comes after a matcher, if any, must be somewhere in the predetermined follow set. This ensures that a legal macro definition will continue to assign the same determination as to where `... tt` ends and `uu ...` begins, even as new syntactic forms are added to the language. +r[macro.ambiguity.invariant.separated-complex-nt] The second invariant says that a separated complex NT must use a separator token that is part of the predetermined follow set for the internal contents of the NT. This ensures that a legal macro definition will continue to parse an input fragment into the same delimited sequence of `tt ...`'s, even as new syntactic forms are added to the language. +r[macro.ambiguity.invariant.unseparated-complex-nt] The third invariant says that when we have a complex NT that can match two or more copies of the same thing with no separation in between, it must be permissible for them to be placed next to each other as per the first invariant. @@ -137,6 +153,9 @@ invalid in a future edition of Rust. See the [tracking issue].** ### FIRST and FOLLOW, informally +r[macro.ambiguity.sets] + +r[macro.ambiguity.sets.intro] A given matcher M maps to three sets: FIRST(M), LAST(M) and FOLLOW(M). Each of the three sets is made up of tokens. FIRST(M) and LAST(M) may also @@ -145,12 +164,15 @@ can match the empty fragment. (But FOLLOW(M) is always just a set of tokens.) Informally: +r[macro.ambiguity.sets.first] * FIRST(M): collects the tokens potentially used first when matching a fragment to M. +r[macro.ambiguity.sets.last] * LAST(M): collects the tokens potentially used last when matching a fragment to M. +r[macro.ambiguity.sets.follow] * FOLLOW(M): the set of tokens allowed to follow immediately after some fragment matched by M. @@ -163,6 +185,7 @@ Informally: * The concatenation α β γ δ is a parseable Rust program. +r[macro.ambiguity.sets.universe] We use the shorthand ANYTOKEN to denote the set of all tokens (including simple NTs). For example, if any token is legal after a matcher M, then FOLLOW(M) = ANYTOKEN. @@ -174,18 +197,27 @@ definitions.) ### FIRST, LAST +r[macro.ambiguity.sets.def] + +r[macro.ambiguity.sets.def.intro] Below are formal inductive definitions for FIRST and LAST. +r[macro.ambiguity.sets.def.notation] "A ∪ B" denotes set union, "A ∩ B" denotes set intersection, and "A \ B" denotes set difference (i.e. all elements of A that are not present in B). #### FIRST +r[macro.ambiguity.sets.def.first] + +r[macro.ambiguity.sets.def.first.intro] FIRST(M) is defined by case analysis on the sequence M and the structure of its first token-tree (if any): +r[macro.ambiguity.sets.def.first.epsilon] * if M is the empty sequence, then FIRST(M) = { ε }, +r[macro.ambiguity.sets.def.first.token] * if M starts with a token t, then FIRST(M) = { t }, (Note: this covers the case where M starts with a delimited token-tree @@ -195,6 +227,7 @@ first token-tree (if any): (Note: this critically relies on the property that no simple NT matches the empty fragment.) +r[macro.ambiguity.sets.def.first.complex] * Otherwise, M is a token-tree sequence starting with a complex NT: `M = $( tt ... ) OP α`, or `M = $( tt ... ) SEP OP α`, (where `α` is the (potentially empty) sequence of token trees for the rest of the matcher). @@ -229,12 +262,18 @@ with respect to \varepsilon as well. #### LAST +r[macro.ambiguity.sets.def.last] + +r[macro.ambiguity.sets.def.last.intro] LAST(M), defined by case analysis on M itself (a sequence of token-trees): +r[macro.ambiguity.sets.def.last.empty] * if M is the empty sequence, then LAST(M) = { ε } +r[macro.ambiguity.sets.def.last.token] * if M is a singleton token t, then LAST(M) = { t } +r[macro.ambiguity.sets.def.last.rep-star] * if M is the singleton complex NT repeating zero or more times, `M = $( tt ... ) *`, or `M = $( tt ... ) SEP *` @@ -245,6 +284,7 @@ LAST(M), defined by case analysis on M itself (a sequence of token-trees): * otherwise, the sequence `tt ...` must be non-empty; LAST(M) = LAST(`tt ...`) ∪ {ε}. +r[macro.ambiguity.sets.def.last.rep-plus] * if M is the singleton complex NT repeating one or more times, `M = $( tt ... ) +`, or `M = $( tt ... ) SEP +` @@ -255,12 +295,15 @@ LAST(M), defined by case analysis on M itself (a sequence of token-trees): * otherwise, the sequence `tt ...` must be non-empty; LAST(M) = LAST(`tt ...`) +r[macro.ambiguity.sets.def.last.rep-question] * if M is the singleton complex NT repeating zero or one time, `M = $( tt ...) ?`, then LAST(M) = LAST(`tt ...`) ∪ {ε}. +r[macro.ambiguity.sets.def.last.delim] * if M is a delimited token-tree sequence `OPEN tt ... CLOSE`, then LAST(M) = { `CLOSE` }. +r[macro.ambiguity.sets.def.last.sequence] * if M is a non-empty sequence of token-trees `tt uu ...`, * If ε ∈ LAST(`uu ...`), then LAST(M) = LAST(`tt`) ∪ (LAST(`uu ...`) \ { ε }). @@ -320,25 +363,35 @@ Here are similar examples but now for LAST. ### FOLLOW(M) +r[macro.ambiguity.sets.def.follow] + +r[macro.ambiguity.sets.def.follow.intro] Finally, the definition for FOLLOW(M) is built up as follows. pat, expr, etc. represent simple nonterminals with the given fragment specifier. +r[macro.ambiguity.sets.def.follow.pat] * FOLLOW(pat) = {`=>`, `,`, `=`, `|`, `if`, `in`}`. - * FOLLOW(expr) = FOLLOW(stmt) = {`=>`, `,`, `;`}`. +r[macro.ambiguity.sets.def.follow.expr-stmt] + * FOLLOW(expr) = FOLLOW(expr_2021) = FOLLOW(stmt) = {`=>`, `,`, `;`}`. +r[macro.ambiguity.sets.def.follow.ty-path] * FOLLOW(ty) = FOLLOW(path) = {`{`, `[`, `,`, `=>`, `:`, `=`, `>`, `>>`, `;`, `|`, `as`, `where`, block nonterminals}. +r[macro.ambiguity.sets.def.follow.vis] * FOLLOW(vis) = {`,`l any keyword or identifier except a non-raw `priv`; any token that can begin a type; ident, ty, and path nonterminals}. +r[macro.ambiguity.sets.def.follow.simple] * FOLLOW(t) = ANYTOKEN for any other simple token, including block, ident, tt, item, lifetime, literal and meta simple nonterminals, and all terminals. +r[macro.ambiguity.sets.def.follow.other-matcher] * FOLLOW(M), for any other M, is defined as the intersection, as t ranges over (LAST(M) \ {ε}), of FOLLOW(t). +r[macro.ambiguity.sets.def.follow.type-first] The tokens that can begin a type are, as of this writing, {`(`, `[`, `!`, `*`, `&`, `&&`, `?`, lifetimes, `>`, `>>`, `::`, any non-keyword identifier, `super`, `self`, `Self`, `extern`, `crate`, `$crate`, `_`, `for`, `impl`, `fn`, `unsafe`, diff --git a/src/macros-by-example.md b/src/macros-by-example.md index e95cd2e64..90c21baab 100644 --- a/src/macros-by-example.md +++ b/src/macros-by-example.md @@ -1,5 +1,8 @@ # Macros By Example +r[macro.decl] + +r[macro.decl.syntax] > **Syntax**\ > _MacroRulesDefinition_ :\ >    `macro_rules` `!` [IDENTIFIER] _MacroRulesDef_ @@ -27,7 +30,7 @@ >    | `$` `(` _MacroMatch_+ `)` _MacroRepSep_? _MacroRepOp_ > > _MacroFragSpec_ :\ ->       `block` | `expr` | `ident` | `item` | `lifetime` | `literal`\ +>       `block` | `expr` | `expr_2021` | `ident` | `item` | `lifetime` | `literal`\ >    | `meta` | `pat` | `pat_param` | `path` | `stmt` | `tt` | `ty` | `vis` > > _MacroRepSep_ :\ @@ -39,6 +42,7 @@ > _MacroTranscriber_ :\ >    [_DelimTokenTree_] +r[macro.decl.intro] `macro_rules` allows users to define syntax extension in a declarative way. We call such extensions "macros by example" or simply "macros". @@ -51,10 +55,15 @@ items), types, or patterns. ## Transcribing +r[macro.decl.transcription] + +r[macro.decl.transcription.intro] When a macro is invoked, the macro expander looks up macro invocations by name, and tries each macro rule in turn. It transcribes the first successful match; if -this results in an error, then future matches are not tried. When matching, no -lookahead is performed; if the compiler cannot unambiguously determine how to +this results in an error, then future matches are not tried. + +r[macro.decl.transcription.lookahead] +When matching, no lookahead is performed; if the compiler cannot unambiguously determine how to parse the macro invocation one token at a time, then it is an error. In the following example, the compiler does not look ahead past the identifier to see if the following token is a `)`, even though that would allow it to parse the @@ -68,6 +77,7 @@ macro_rules! ambiguity { ambiguity!(error); // Error: local ambiguity ``` +r[macro.decl.transcription.syntax] In both the matcher and the transcriber, the `$` token is used to invoke special behaviours from the macro engine (described below in [Metavariables] and [Repetitions]). Tokens that aren't part of such an invocation are matched and @@ -78,6 +88,8 @@ instance, the matcher `(())` will match `{()}` but not `{{}}`. The character ### Forwarding a matched fragment +r[macro.decl.transcription.fragment] + When forwarding a matched fragment to another macro-by-example, matchers in the second macro will see an opaque AST of the fragment type. The second macro can't use literal tokens to match the fragments in the matcher, only a @@ -116,62 +128,79 @@ foo!(3); ## Metavariables +r[macro.decl.meta] + +r[macro.decl.meta.intro] In the matcher, `$` _name_ `:` _fragment-specifier_ matches a Rust syntax -fragment of the kind specified and binds it to the metavariable `$`_name_. Valid -fragment specifiers are: +fragment of the kind specified and binds it to the metavariable `$`_name_. + +r[macro.decl.meta.specifier] +Valid fragment specifiers are: - * `item`: an [_Item_] * `block`: a [_BlockExpression_] - * `stmt`: a [_Statement_] without the trailing semicolon (except for item - statements that require semicolons) - * `pat_param`: a [_PatternNoTopAlt_] - * `pat`: at least any [_PatternNoTopAlt_], and possibly more depending on edition * `expr`: an [_Expression_] - * `ty`: a [_Type_] + * `expr_2021`: an [_Expression_] except [_UnderscoreExpression_] and [_ConstBlockExpression_] (see [macro.decl.meta.edition2024]) * `ident`: an [IDENTIFIER_OR_KEYWORD] or [RAW_IDENTIFIER] + * `item`: an [_Item_] + * `lifetime`: a [LIFETIME_TOKEN] + * `literal`: matches `-`?[_LiteralExpression_] + * `meta`: an [_Attr_], the contents of an attribute + * `pat`: a [_Pattern_] (see [macro.decl.meta.edition2021]) + * `pat_param`: a [_PatternNoTopAlt_] * `path`: a [_TypePath_] style path + * `stmt`: a [_Statement_] without the trailing semicolon (except for item statements that require semicolons) * `tt`: a [_TokenTree_] (a single [token] or tokens in matching delimiters `()`, `[]`, or `{}`) - * `meta`: an [_Attr_], the contents of an attribute - * `lifetime`: a [LIFETIME_TOKEN] + * `ty`: a [_Type_] * `vis`: a possibly empty [_Visibility_] qualifier - * `literal`: matches `-`?[_LiteralExpression_] +r[macro.decl.meta.transcription] In the transcriber, metavariables are referred to simply by `$`_name_, since the fragment kind is specified in the matcher. Metavariables are replaced with -the syntax element that matched them. The keyword metavariable `$crate` can be -used to refer to the current crate; see [Hygiene] below. Metavariables can be -transcribed more than once or not at all. +the syntax element that matched them. -For reasons of backwards compatibility, though `_` [is also an -expression][_UnderscoreExpression_], a standalone underscore is not matched by -the `expr` fragment specifier. However, `_` is matched by the `expr` fragment -specifier when it appears as a subexpression. -For the same reason, a standalone [const block] is not matched but it is matched when appearing as a subexpression. +r[macro.decl.meta.dollar-crate] +The keyword metavariable `$crate` can be used to refer to the current crate; see [Hygiene] below. Metavariables can be +transcribed more than once or not at all. +r[macro.decl.meta.edition2021] > **Edition differences**: Starting with the 2021 edition, `pat` fragment-specifiers match top-level or-patterns (that is, they accept [_Pattern_]). > > Before the 2021 edition, they match exactly the same fragments as `pat_param` (that is, they accept [_PatternNoTopAlt_]). > > The relevant edition is the one in effect for the `macro_rules!` definition. +r[macro.decl.meta.edition2024] +> **Edition differences**: Before the 2024 edition, `expr` fragment specifiers do not match [_UnderscoreExpression_] or [_ConstBlockExpression_] at the top level. They are allowed within subexpressions. +> +> The `expr_2021` fragment specifier exists to maintain backwards compatibility with editions before 2024. + ## Repetitions +r[macro.decl.repetition] + +r[macro.decl.repetition.intro] In both the matcher and transcriber, repetitions are indicated by placing the tokens to be repeated inside `$(`…`)`, followed by a repetition operator, -optionally with a separator token between. The separator token can be any token +optionally with a separator token between. + +r[macro.decl.repetition.separator] +The separator token can be any token other than a delimiter or one of the repetition operators, but `;` and `,` are the most common. For instance, `$( $i:ident ),*` represents any number of identifiers separated by commas. Nested repetitions are permitted. +r[macro.decl.repetition.operators] The repetition operators are: - `*` --- indicates any number of repetitions. - `+` --- indicates any number but at least one. - `?` --- indicates an optional fragment with zero or one occurrence. +r[macro.decl.repetition.optional-restriction] Since `?` represents at most one occurrence, it cannot be used with a separator. +r[macro.decl.repetition.fragment] The repeated fragment both matches and transcribes to the specified number of the fragment, separated by the separator token. Metavariables are matched to every repetition of their corresponding fragment. For instance, the `$( $i:ident @@ -198,6 +227,9 @@ compiler knows how to expand them properly: ## Scoping, Exporting, and Importing +r[macro.decl.scope] + +r[macro.decl.scope.intro] For historical reasons, the scoping of macros by example does not work entirely like items. Macros have two forms of scope: textual scope, and path-based scope. Textual scope is based on the order that things appear in source files, or even @@ -205,6 +237,7 @@ across multiple files, and is the default scoping. It is explained further below Path-based scope works exactly the same way that item scoping does. The scoping, exporting, and importing of macros is controlled largely by attributes. +r[macro.decl.scope.unqualified] When a macro is invoked by an unqualified identifier (not part of a multi-part path), it is first looked up in textual scoping. If this does not yield any results, then it is looked up in path-based scoping. If the macro's name is @@ -224,6 +257,9 @@ self::lazy_static!{} // Path-based lookup ignores our macro, finds imported one. ### Textual Scope +r[macro.decl.scope.textual] + +r[macro.decl.scope.textual.intro] Textual scope is based largely on the order that things appear in source files, and works similarly to the scope of local variables declared with `let` except it also applies at the module level. When `macro_rules!` is used to define a @@ -253,6 +289,7 @@ mod has_macro { m!{} // OK: appears after declaration of m in src/lib.rs ``` +r[macro.decl.scope.textual.shadow] It is not an error to define a macro multiple times; the most recent declaration will shadow the previous one unless it has gone out of scope. @@ -293,12 +330,14 @@ fn foo() { m!(); } - // m!(); // Error: m is not in scope. ``` ### The `macro_use` attribute +r[macro.decl.scope.macro_use] + +r[macro.decl.scope.macro_use.mod-decl] The *`macro_use` attribute* has two purposes. First, it can be used to make a module's macro scope not end when the module is closed, by applying it to a module: @@ -314,6 +353,7 @@ mod inner { m!(); ``` +r[macro.decl.scope.macro_use.prelude] Second, it can be used to import macros from another crate, by attaching it to an `extern crate` declaration appearing in the crate's root module. Macros imported this way are imported into the [`macro_use` prelude], not textually, @@ -332,11 +372,15 @@ lazy_static!{} // self::lazy_static!{} // Error: lazy_static is not defined in `self` ``` +r[macro.decl.scope.macro_use.export] Macros to be imported with `#[macro_use]` must be exported with `#[macro_export]`, which is described below. ### Path-Based Scope +r[macro.decl.scope.path] + +r[macro.decl.scope.path.intro] By default, a macro has no path-based scope. However, if it has the `#[macro_export]` attribute, then it is declared in the crate root scope and can be referred to normally as such: @@ -358,16 +402,55 @@ mod mac { } ``` +r[macro.decl.scope.path.export] Macros labeled with `#[macro_export]` are always `pub` and can be referred to by other crates, either by path or by `#[macro_use]` as described above. ## Hygiene -By default, all identifiers referred to in a macro are expanded as-is, and are -looked up at the macro's invocation site. This can lead to issues if a macro -refers to an item or macro which isn't in scope at the invocation site. To -alleviate this, the `$crate` metavariable can be used at the start of a path to -force lookup to occur inside the crate defining the macro. +r[macro.decl.hygiene] + +r[macro.decl.hygiene.intro] +Macros by example have _mixed-site hygiene_. This means that [loop labels], [block labels], and local variables are looked up at the macro definition site while other symbols are looked up at the macro invocation site. For example: + +```rust +let x = 1; +fn func() { + unreachable!("this is never called") +} + +macro_rules! check { + () => { + assert_eq!(x, 1); // Uses `x` from the definition site. + func(); // Uses `func` from the invocation site. + }; +} + +{ + let x = 2; + fn func() { /* does not panic */ } + check!(); +} +``` + +Labels and local variables defined in macro expansion are not shared between invocations, so this code doesn’t compile: + +```rust,compile_fail,E0425 +macro_rules! m { + (define) => { + let x = 1; + }; + (refer) => { + dbg!(x); + }; +} + +m!(define); +m!(refer); +``` + +r[macro.decl.hygiene.crate] +A special case is the `$crate` metavariable. It refers to the crate defining the macro, and can be used at the start of the path to look up items or macros which are not in scope at the invocation site. ```rust,ignore @@ -406,6 +489,7 @@ pub mod inner { } ``` +r[macro.decl.hygiene.vis] Additionally, even though `$crate` allows a macro to refer to items within its own crate when expanding, its use has no effect on visibility. An item or macro referred to must still be visible from the invocation site. In the following @@ -429,6 +513,7 @@ fn foo() {} > modified to use `$crate` or `local_inner_macros` to work well with path-based > imports. +r[macro.decl.hygiene.local_inner_macros] When a macro is exported, the `#[macro_export]` attribute can have the `local_inner_macros` keyword added to automatically prefix all contained macro invocations with `$crate::`. This is intended primarily as a tool to migrate @@ -449,9 +534,14 @@ macro_rules! helper { ## Follow-set Ambiguity Restrictions +r[macro.decl.follow-set] + +r[macro.decl.follow-set.intro] The parser used by the macro system is reasonably powerful, but it is limited in -order to prevent ambiguity in current or future versions of the language. In -particular, in addition to the rule about ambiguous expansions, a nonterminal +order to prevent ambiguity in current or future versions of the language. + +r[macro.decl.follow-set.token-restriction] +In particular, in addition to the rule about ambiguous expansions, a nonterminal matched by a metavariable must be followed by a token which has been decided can be safely used after that kind of match. @@ -464,19 +554,32 @@ matcher would become ambiguous or would misparse, breaking working code. Matchers like `$i:expr,` or `$i:expr;` would be legal, however, because `,` and `;` are legal expression separators. The specific rules are: +r[macro.decl.follow-set.token-expr-stmt] * `expr` and `stmt` may only be followed by one of: `=>`, `,`, or `;`. + +r[macro.decl.follow-set.token-pat_param] * `pat_param` may only be followed by one of: `=>`, `,`, `=`, `|`, `if`, or `in`. + +r[macro.decl.follow-set.token-pat] * `pat` may only be followed by one of: `=>`, `,`, `=`, `if`, or `in`. + +r[macro.decl.follow-set.token-path-ty] * `path` and `ty` may only be followed by one of: `=>`, `,`, `=`, `|`, `;`, `:`, `>`, `>>`, `[`, `{`, `as`, `where`, or a macro variable of `block` fragment specifier. + +r[macro.decl.follow-set.token-vis] * `vis` may only be followed by one of: `,`, an identifier other than a non-raw `priv`, any token that can begin a type, or a metavariable with a `ident`, `ty`, or `path` fragment specifier. + +r[macro.decl.follow-set.token-other] * All other fragment specifiers have no restrictions. +r[macro.decl.follow-set.edition2021] > **Edition differences**: Before the 2021 edition, `pat` may also be followed by `|`. +r[macro.decl.follow-set.repetition] When repetitions are involved, then the rules apply to every possible number of expansions, taking separators into account. This means: @@ -490,10 +593,9 @@ expansions, taking separators into account. This means: * If the repetition can match zero times (`*` or `?`), then whatever comes after must be able to follow whatever comes before. - For more detail, see the [formal specification]. -[const block]: expressions/block-expr.md#const-blocks +[block labels]: expressions/loop-expr.md#labelled-block-expressions [Hygiene]: #hygiene [IDENTIFIER]: identifiers.md [IDENTIFIER_OR_KEYWORD]: identifiers.md @@ -503,10 +605,12 @@ For more detail, see the [formal specification]. [Repetitions]: #repetitions [_Attr_]: attributes.md [_BlockExpression_]: expressions/block-expr.md +[_ConstBlockExpression_]: expressions/block-expr.md#const-blocks [_DelimTokenTree_]: macros.md [_Expression_]: expressions.md [_Item_]: items.md [_LiteralExpression_]: expressions/literal-expr.md +[loop labels]: expressions/loop-expr.md#loop-labels [_MetaListIdents_]: attributes.md#meta-item-attribute-syntax [_Pattern_]: patterns.md [_PatternNoTopAlt_]: patterns.md diff --git a/src/names/preludes.md b/src/names/preludes.md index 6aa761c92..8ec9c493a 100644 --- a/src/names/preludes.md +++ b/src/names/preludes.md @@ -26,6 +26,7 @@ Edition | `no_std` not applied | `no_std` applied 2015 | [`std::prelude::rust_2015`] | [`core::prelude::rust_2015`] 2018 | [`std::prelude::rust_2018`] | [`core::prelude::rust_2018`] 2021 | [`std::prelude::rust_2021`] | [`core::prelude::rust_2021`] +2024 | [`std::prelude::rust_2024`] | [`core::prelude::rust_2024`] > **Note**: diff --git a/src/patterns.md b/src/patterns.md index 377b56d3d..21b4eee0d 100644 --- a/src/patterns.md +++ b/src/patterns.md @@ -274,7 +274,7 @@ To make it valid, write the following: # age: u8, # } # let value = Person { name: String::from("John"), age: 23 }; -if let Person {name: ref person_name, age: 18..=150 } = value { } +if let Person { name: ref person_name, age: 18..=150 } = value { } ``` r[pattern.ident.ref-ignored] diff --git a/src/procedural-macros.md b/src/procedural-macros.md index a97755f7f..0ae6e26d5 100644 --- a/src/procedural-macros.md +++ b/src/procedural-macros.md @@ -1,5 +1,8 @@ ## Procedural Macros +r[macro.proc] + +r[macro.proc.intro] *Procedural macros* allow creating syntax extensions as execution of a function. Procedural macros come in one of three flavors: @@ -11,6 +14,7 @@ Procedural macros allow you to run code at compile time that operates over Rust syntax, both consuming and producing Rust syntax. You can sort of think of procedural macros as functions from an AST to another AST. +r[macro.proc.def] Procedural macros must be defined in the root of a crate with the [crate type] of `proc-macro`. The macros may not be used from the crate where they are defined, and can only be used when imported in another crate. @@ -23,6 +27,7 @@ The macros may not be used from the crate where they are defined, and can only b > proc-macro = true > ``` +r[macro.proc.result] As functions, they must either return syntax, panic, or loop endlessly. Returned syntax either replaces or adds the syntax depending on the kind of procedural macro. Panics are caught by the compiler and are turned into a compiler error. @@ -34,15 +39,20 @@ that the compiler has access to. Similarly, file access is the same. Because of this, procedural macros have the same security concerns that [Cargo's build scripts] have. +r[macro.proc.error] Procedural macros have two ways of reporting errors. The first is to panic. The second is to emit a [`compile_error`] macro invocation. ### The `proc_macro` crate +r[macro.proc.proc_macro] + +r[macro.proc.proc_macro.intro] Procedural macro crates almost always will link to the compiler-provided [`proc_macro` crate]. The `proc_macro` crate provides types required for writing procedural macros and facilities to make it easier. +r[macro.proc.proc_macro.token-stream] This crate primarily contains a [`TokenStream`] type. Procedural macros operate over *token streams* instead of AST nodes, which is a far more stable interface over time for both the compiler and for procedural macros to target. A @@ -51,6 +61,7 @@ can roughly be thought of as lexical token. For example `foo` is an `Ident` token, `.` is a `Punct` token, and `1.2` is a `Literal` token. The `TokenStream` type, unlike `Vec`, is cheap to clone. +r[macro.proc.proc_macro.span] All tokens have an associated `Span`. A `Span` is an opaque value that cannot be modified but can be manufactured. `Span`s represent an extent of source code within a program and are primarily used for error reporting. While you @@ -59,6 +70,8 @@ with any token, such as through getting a `Span` from another token. ### Procedural macro hygiene +r[macro.proc.hygiene] + Procedural macros are *unhygienic*. This means they behave as if the output token stream was simply written inline to the code it's next to. This means that it's affected by external items and also affects external imports. @@ -71,13 +84,19 @@ other functions (like `__internal_foo` instead of `foo`). ### Function-like procedural macros +r[macro.proc.function] + +r[macro.proc.function.intro] *Function-like procedural macros* are procedural macros that are invoked using the macro invocation operator (`!`). +r[macro.proc.function.def] These macros are defined by a [public] [function] with the `proc_macro` [attribute] and a signature of `(TokenStream) -> TokenStream`. The input [`TokenStream`] is what is inside the delimiters of the macro invocation and the output [`TokenStream`] replaces the entire macro invocation. + +r[macro.proc.function.namespace] The `proc_macro` attribute defines the macro in the [macro namespace] in the root of the crate. For example, the following macro definition ignores its input and outputs a @@ -109,6 +128,7 @@ fn main() { } ``` +r[macro.proc.function.invocation] Function-like procedural macros may be invoked in any macro invocation position, which includes [statements], [expressions], [patterns], [type expressions], [item] positions, including items in [`extern` blocks], inherent @@ -116,14 +136,21 @@ and trait [implementations], and [trait definitions]. ### Derive macros +r[macro.proc.derive] + +r[macro.proc.derive.intro] *Derive macros* define new inputs for the [`derive` attribute]. These macros can create new [items] given the token stream of a [struct], [enum], or [union]. They can also define [derive macro helper attributes]. +r[macro.proc.derive.def] Custom derive macros are defined by a [public] [function] with the `proc_macro_derive` attribute and a signature of `(TokenStream) -> TokenStream`. + +r[macro.proc.derive.namespace] The `proc_macro_derive` attribute defines the custom derive in the [macro namespace] in the root of the crate. +r[macro.proc.derive.output] The input [`TokenStream`] is the token stream of the item that has the `derive` attribute on it. The output [`TokenStream`] must be a set of items that are then appended to the [module] or [block] that the item from the input @@ -161,11 +188,15 @@ fn main() { #### Derive macro helper attributes +r[macro.proc.derive.attributes] + +r[macro.proc.derive.attributes.intro] Derive macros can add additional [attributes] into the scope of the [item] they are on. Said attributes are called *derive macro helper attributes*. These attributes are [inert], and their only purpose is to be fed into the derive macro that defined them. That said, they can be seen by all macros. +r[macro.proc.derive.attributes.def] The way to define helper attributes is to put an `attributes` key in the `proc_macro_derive` macro with a comma separated list of identifiers that are the names of the helper attributes. @@ -197,10 +228,14 @@ struct Struct { ### Attribute macros +r[macro.proc.attribute] + +r[macro.proc.attribute.intro] *Attribute macros* define new [outer attributes][attributes] which can be attached to [items], including items in [`extern` blocks], inherent and trait [implementations], and [trait definitions]. +r[macro.proc.attribute.def] Attribute macros are defined by a [public] [function] with the `proc_macro_attribute` [attribute] that has a signature of `(TokenStream, TokenStream) -> TokenStream`. The first [`TokenStream`] is the delimited token @@ -209,6 +244,8 @@ the attribute is written as a bare attribute name, the attribute [`TokenStream`] is empty. The second [`TokenStream`] is the rest of the [item] including other [attributes] on the [item]. The returned [`TokenStream`] replaces the [item] with an arbitrary number of [items]. + +r[macro.proc.attribute.namespace] The `proc_macro_attribute` attribute defines the attribute in the [macro namespace] in the root of the crate. For example, this attribute macro takes the input stream and returns it as is, @@ -278,9 +315,13 @@ fn invoke4() {} ### Declarative macro tokens and procedural macro tokens +r[macro.proc.token] + +r[macro.proc.token.intro] Declarative `macro_rules` macros and procedural macros use similar, but different definitions for tokens (or rather [`TokenTree`s].) +r[macro.proc.token.macro_rules] Token trees in `macro_rules` (corresponding to `tt` matchers) are defined as - Delimited groups (`(...)`, `{...}`, etc) - All operators supported by the language, both single-character and @@ -296,6 +337,7 @@ Token trees in `macro_rules` (corresponding to `tt` matchers) are defined as expansion, which will be considered a single token tree regardless of the passed expression) +r[macro.proc.token.tree] Token trees in procedural macros are defined as - Delimited groups (`(...)`, `{...}`, etc) - All punctuation characters used in operators supported by the language (`+`, @@ -306,11 +348,13 @@ Token trees in procedural macros are defined as and floating point literals. - Identifiers, including keywords (`ident`, `r#ident`, `fn`) +r[macro.proc.token.conversion.intro] Mismatches between these two definitions are accounted for when token streams are passed to and from procedural macros. \ Note that the conversions below may happen lazily, so they might not happen if the tokens are not actually inspected. +r[macro.proc.token.conversion.to-proc_macro] When passed to a proc-macro - All multi-character operators are broken into single characters. - Lifetimes are broken into a `'` character and an identifier. @@ -322,6 +366,7 @@ When passed to a proc-macro - `tt` and `ident` substitutions are never wrapped into such groups and always represented as their underlying token trees. +r[macro.proc.token.conversion.from-proc_macro] When emitted from a proc macro - Punctuation characters are glued into multi-character operators when applicable. @@ -330,6 +375,7 @@ When emitted from a proc macro possibly wrapped into a delimited group ([`Group`]) with implicit delimiters ([`Delimiter::None`]) when it's necessary for preserving parsing priorities. +r[macro.proc.token.doc-comment] Note that neither declarative nor procedural macros support doc comment tokens (e.g. `/// Doc`), so they are always converted to token streams representing their equivalent `#[doc = r"str"]` attributes when passed to macros. diff --git a/src/statements-and-expressions.md b/src/statements-and-expressions.md index fede41196..b7496964c 100644 --- a/src/statements-and-expressions.md +++ b/src/statements-and-expressions.md @@ -1,5 +1,7 @@ # Statements and expressions +r[stmt-expr] + Rust is _primarily_ an expression language. This means that most forms of value-producing or effect-causing evaluation are directed by the uniform syntax category of _expressions_. Each kind of expression can typically _nest_ within each other kind of expression, and rules for evaluation of expressions involve specifying both the value produced by the expression and the order in which its sub-expressions are themselves evaluated. diff --git a/src/statements.md b/src/statements.md index 40f95beca..5ee35d9ab 100644 --- a/src/statements.md +++ b/src/statements.md @@ -1,5 +1,8 @@ # Statements +r[statement] + +r[statement.syntax] > **Syntax**\ > _Statement_ :\ >       `;`\ @@ -8,13 +11,16 @@ >    | [_ExpressionStatement_]\ >    | [_MacroInvocationSemi_] - +r[statement.intro] A *statement* is a component of a [block], which is in turn a component of an outer [expression] or [function]. +r[statement.kind] Rust has two kinds of statement: [declaration statements](#declaration-statements) and [expression statements](#expression-statements). ## Declaration statements +r[statement.decl] + A *declaration statement* is one that introduces one or more *names* into the enclosing statement block. The declared names may denote new variables or new [items][item]. @@ -22,12 +28,20 @@ The two kinds of declaration statements are item declarations and `let` statemen ### Item declarations +r[statement.item] + +r[statement.item.intro] An *item declaration statement* has a syntactic form identical to an [item declaration][item] within a [module]. + +r[statement.item.scope] Declaring an item within a statement block restricts its [scope] to the block containing the statement. The item is not given a [canonical path] nor are any sub-items it may declare. + +r[statement.item.associated-scope] The exception to this is that associated items defined by [implementations] are still accessible in outer scopes as long as the item and, if applicable, trait are accessible. It is otherwise identical in meaning to declaring the item inside a module. +r[statement.item.outer-generics] There is no implicit capture of the containing function's generic parameters, parameters, and local variables. For example, `inner` may not access `outer_var`. @@ -43,6 +57,9 @@ fn outer() { ### `let` statements +r[statement.let] + +r[statement.let.syntax] > **Syntax**\ > _LetStatement_ :\ >    [_OuterAttribute_]\* `let` [_PatternNoTopAlt_] @@ -52,13 +69,21 @@ fn outer() { > † When an `else` block is specified, the > _Expression_ must not be a [_LazyBooleanExpression_], or end with a `}`. +r[statement.let.intro] A *`let` statement* introduces a new set of [variables], given by a [pattern]. The pattern is followed optionally by a type annotation and then either ends, or is followed by an initializer expression plus an optional `else` block. + +r[statement.let.inference] When no type annotation is given, the compiler will infer the type, or signal an error if insufficient type information is available for definite inference. + +r[statement.let.scope] Any variables introduced by a variable declaration are visible from the point of declaration until the end of the enclosing block scope, except when they are shadowed by another variable declaration. +r[statement.let.constraint] If an `else` block is not present, the pattern must be irrefutable. If an `else` block is present, the pattern may be refutable. + +r[statement.let.behavior] If the pattern does not match (this requires it to be refutable), the `else` block is executed. The `else` block must always diverge (evaluate to the [never type]). @@ -75,17 +100,24 @@ let [u, v] = [v[0], v[1]] else { // This pattern is irrefutable, so the compiler ## Expression statements +r[statement.expr] + +r[statement.expr.syntax] > **Syntax**\ > _ExpressionStatement_ :\ >       [_ExpressionWithoutBlock_][expression] `;`\ >    | [_ExpressionWithBlock_][expression] `;`? +r[statement.expr.intro] An *expression statement* is one that evaluates an [expression] and ignores its result. As a rule, an expression statement's purpose is to trigger the effects of evaluating its expression. +r[statement.expr.restriction-semicolon] An expression that consists of only a [block expression][block] or control flow expression, if used in a context where a statement is permitted, can omit the trailing semicolon. This can cause an ambiguity between it being parsed as a standalone statement and as a part of another expression; in this case, it is parsed as a statement. + +r[statement.expr.constraint-block] The type of [_ExpressionWithBlock_][expression] expressions when used as statements must be the unit type. ```rust @@ -118,6 +150,8 @@ if true { ## Attributes on Statements +r[statement.attribute] + Statements accept [outer attributes]. The attributes that have meaning on a statement are [`cfg`], and [the lint check attributes]. diff --git a/src/subtyping.md b/src/subtyping.md index b31e25d8a..38fb1270a 100644 --- a/src/subtyping.md +++ b/src/subtyping.md @@ -1,7 +1,13 @@ # Subtyping and Variance +r[subtype] + +r[subtype.intro] Subtyping is implicit and can occur at any stage in type checking or -inference. Subtyping is restricted to two cases: +inference. + +r[subtype.kinds] +Subtyping is restricted to two cases: variance with respect to lifetimes and between types with higher ranked lifetimes. If we were to erase lifetimes from types, then the only subtyping would be due to type equality. @@ -19,6 +25,7 @@ fn bar<'a>() { Since `'static` outlives the lifetime parameter `'a`, `&'static str` is a subtype of `&'a str`. +r[subtype.higher-ranked] [Higher-ranked] [function pointers] and [trait objects] have another subtype relation. They are subtypes of types that are given by substitutions of the higher-ranked lifetimes. Some examples: @@ -39,17 +46,26 @@ let supertype: &for<'c> fn(&'c i32, &'c i32) = subtype; ## Variance +r[subtyping.variance] + +r[subtyping.variance.intro] Variance is a property that generic types have with respect to their arguments. A generic type's *variance* in a parameter is how the subtyping of the parameter affects the subtyping of the type. +r[subtyping.variance.covariant] * `F` is *covariant* over `T` if `T` being a subtype of `U` implies that `F` is a subtype of `F` (subtyping "passes through") + +r[subtyping.variance.contravariant] * `F` is *contravariant* over `T` if `T` being a subtype of `U` implies that `F` is a subtype of `F` + +r[subtyping.variance.invariant] * `F` is *invariant* over `T` otherwise (no subtyping relation can be derived) +r[subtyping.variance.builtin-types] Variance of types is automatically determined as follows | Type | Variance in `'a` | Variance in `T` | @@ -65,6 +81,7 @@ Variance of types is automatically determined as follows | `std::marker::PhantomData` | | covariant | | `dyn Trait + 'a` | covariant | invariant | +r[subtyping.variance.user-composite-types] The variance of other `struct`, `enum`, and `union` types is decided by looking at the variance of the types of their fields. If the parameter is used in positions with different variances then the parameter is invariant. For @@ -85,6 +102,7 @@ struct Variance<'a, 'b, 'c, T, U: 'a> { } ``` +r[subtyping.variance.builtin-composite-types] When used outside of an `struct`, `enum`, or `union`, the variance for parameters is checked at each location separately. ```rust diff --git a/src/test-summary.md b/src/test-summary.md new file mode 100644 index 000000000..e4e3e7491 --- /dev/null +++ b/src/test-summary.md @@ -0,0 +1,5 @@ +# Test summary + +The following is a summary of the total tests that are linked to individual rule identifiers within the reference. + +{{summary-table}} diff --git a/src/tokens.md b/src/tokens.md index d94464f9f..d3278f681 100644 --- a/src/tokens.md +++ b/src/tokens.md @@ -1,5 +1,8 @@ # Tokens +r[lex.token] + +r[lex.token.intro] Tokens are primitive productions in the grammar defined by regular (non-recursive) languages. Rust source input can be broken down into the following kinds of tokens: @@ -18,6 +21,7 @@ table production] form, and appear in `monospace` font. ## Literals +r[lex.token.literal] Literals are tokens used in [literal expressions]. ### Examples @@ -88,13 +92,17 @@ Literals are tokens used in [literal expressions]. #### Suffixes -A suffix is a sequence of characters following the primary part of a literal (without intervening whitespace), of the same form as a non-raw identifier or keyword. +r[lex.token.literal.suffix] +r[lex.token.literal.literal.suffix.intro] +A suffix is a sequence of characters following the primary part of a literal (without intervening whitespace), of the same form as a non-raw identifier or keyword. +r[lex.token.literal.suffix.syntax] > **Lexer**\ > SUFFIX : IDENTIFIER_OR_KEYWORD\ > SUFFIX_NO_E : SUFFIX _not beginning with `e` or `E`_ +r[lex.token.literal.suffix.validity] Any kind of literal (string, integer, etc) with any suffix is valid as a token. A literal token with any suffix can be passed to a macro without producing an error. @@ -109,6 +117,7 @@ blackhole!("string"suffix); // OK blackhole_lit!(1suffix); // OK ``` +r[lex.token.literal.suffix.parse] However, suffixes on literal tokens which are interpreted as literal expressions or patterns are restricted. Any suffixes are rejected on non-numeric literal tokens, and numeric literal tokens are accepted only with suffixes from the list below. @@ -121,6 +130,9 @@ and numeric literal tokens are accepted only with suffixes from the list below. #### Character literals +r[lex.token.literal.char] + +r[lex.token.literal.char.syntax] > **Lexer**\ > CHAR_LITERAL :\ >    `'` ( ~\[`'` `\` \\n \\r \\t] | QUOTE_ESCAPE | ASCII_ESCAPE | UNICODE_ESCAPE ) `'` SUFFIX? @@ -135,12 +147,16 @@ and numeric literal tokens are accepted only with suffixes from the list below. > UNICODE_ESCAPE :\ >    `\u{` ( HEX_DIGIT `_`\* )1..6 `}` +r[lex.token.literal.char.intro] A _character literal_ is a single Unicode character enclosed within two `U+0027` (single-quote) characters, with the exception of `U+0027` itself, which must be _escaped_ by a preceding `U+005C` character (`\`). #### String literals +r[lex.token.literal.str] + +r[lex.token.literal.str.syntax] > **Lexer**\ > STRING_LITERAL :\ >    `"` (\ @@ -154,10 +170,12 @@ which must be _escaped_ by a preceding `U+005C` character (`\`). > STRING_CONTINUE :\ >    `\` _followed by_ \\n +r[lex.token.literal.str.intro] A _string literal_ is a sequence of any Unicode characters enclosed within two `U+0022` (double-quote) characters, with the exception of `U+0022` itself, which must be _escaped_ by a preceding `U+005C` character (`\`). +r[lex.token.literal.str.linefeed] Line-breaks, represented by the character `U+000A` (LF), are allowed in string literals. When an unescaped `U+005C` character (`\`) occurs immediately before a line break, the line break does not appear in the string represented by the token. See [String continuation escapes] for details. @@ -165,28 +183,43 @@ The character `U+000D` (CR) may not appear in a string literal other than as par #### Character escapes +r[lex.token.literal.char-escape] + +r[lex.token.literal.char-escape.intro] Some additional _escapes_ are available in either character or non-raw string literals. An escape starts with a `U+005C` (`\`) and continues with one of the following forms: +r[lex.token.literal.char-escape.ascii] * A _7-bit code point escape_ starts with `U+0078` (`x`) and is followed by exactly two _hex digits_ with value up to `0x7F`. It denotes the ASCII character with value equal to the provided hex value. Higher values are not permitted because it is ambiguous whether they mean Unicode code points or byte values. + +r[lex.token.literal.char-escape.unicode] * A _24-bit code point escape_ starts with `U+0075` (`u`) and is followed by up to six _hex digits_ surrounded by braces `U+007B` (`{`) and `U+007D` (`}`). It denotes the Unicode code point equal to the provided hex value. + +r[lex.token.literal.char-escape.whitespace] * A _whitespace escape_ is one of the characters `U+006E` (`n`), `U+0072` (`r`), or `U+0074` (`t`), denoting the Unicode values `U+000A` (LF), `U+000D` (CR) or `U+0009` (HT) respectively. + +r[lex.token.literal.char-escape.null] * The _null escape_ is the character `U+0030` (`0`) and denotes the Unicode value `U+0000` (NUL). + +r[lex.token.literal.char-escape.slash] * The _backslash escape_ is the character `U+005C` (`\`) which must be escaped in order to denote itself. #### Raw string literals +r[lex.token.literal.str-raw] + +r[lex.token.literal.str-raw.syntax] > **Lexer**\ > RAW_STRING_LITERAL :\ >    `r` RAW_STRING_CONTENT SUFFIX? @@ -195,13 +228,16 @@ following forms: >       `"` ( ~ _IsolatedCR_ )* (non-greedy) `"`\ >    | `#` RAW_STRING_CONTENT `#` +r[lex.token.literal.str-raw.intro] Raw string literals do not process any escapes. They start with the character `U+0072` (`r`), followed by fewer than 256 of the character `U+0023` (`#`) and a `U+0022` (double-quote) character. +r[lex.token.literal.str-raw.body] The _raw string body_ can contain any sequence of Unicode characters other than `U+000D` (CR). It is terminated only by another `U+0022` (double-quote) character, followed by the same number of `U+0023` (`#`) characters that preceded the opening `U+0022` (double-quote) character. +r[lex.token.literal.str-raw.content] All Unicode characters contained in the raw string body represent themselves, the characters `U+0022` (double-quote) (except when followed by at least as many `U+0023` (`#`) characters as were used to start the raw string literal) or @@ -224,6 +260,9 @@ r##"foo #"# bar"##; // foo #"# bar #### Byte literals +r[lex.token.byte] + +r[lex.token.byte.syntax] > **Lexer**\ > BYTE_LITERAL :\ >    `b'` ( ASCII_FOR_CHAR | BYTE_ESCAPE ) `'` SUFFIX? @@ -235,6 +274,7 @@ r##"foo #"# bar"##; // foo #"# bar >       `\x` HEX_DIGIT HEX_DIGIT\ >    | `\n` | `\r` | `\t` | `\\` | `\0` | `\'` | `\"` +r[lex.token.byte.intro] A _byte literal_ is a single ASCII character (in the `U+0000` to `U+007F` range) or a single _escape_ preceded by the characters `U+0062` (`b`) and `U+0027` (single-quote), and followed by the character `U+0027`. If the character @@ -244,6 +284,9 @@ _number literal_. #### Byte string literals +r[lex.token.str-byte] + +r[lex.token.str-byte.syntax] > **Lexer**\ > BYTE_STRING_LITERAL :\ >    `b"` ( ASCII_FOR_STRING | BYTE_ESCAPE | STRING_CONTINUE )\* `"` SUFFIX? @@ -251,6 +294,7 @@ _number literal_. > ASCII_FOR_STRING :\ >    _any ASCII (i.e 0x00 to 0x7F), except_ `"`, `\` _and IsolatedCR_ +r[lex.token.str-byte.intro] A non-raw _byte string literal_ is a sequence of ASCII characters and _escapes_, preceded by the characters `U+0062` (`b`) and `U+0022` (double-quote), and followed by the character `U+0022`. If the character `U+0022` is present within @@ -258,28 +302,40 @@ the literal, it must be _escaped_ by a preceding `U+005C` (`\`) character. Alternatively, a byte string literal can be a _raw byte string literal_, defined below. +r[lex.token.str-byte.linefeed] Line-breaks, represented by the character `U+000A` (LF), are allowed in byte string literals. When an unescaped `U+005C` character (`\`) occurs immediately before a line break, the line break does not appear in the string represented by the token. See [String continuation escapes] for details. The character `U+000D` (CR) may not appear in a byte string literal other than as part of such a string continuation escape. +r[lex.token.str-byte.escape] Some additional _escapes_ are available in either byte or non-raw byte string literals. An escape starts with a `U+005C` (`\`) and continues with one of the following forms: +r[lex.token.str-byte.escape-byte] * A _byte escape_ escape starts with `U+0078` (`x`) and is followed by exactly two _hex digits_. It denotes the byte equal to the provided hex value. + +r[lex.token.str-byte.escape-whitespace] * A _whitespace escape_ is one of the characters `U+006E` (`n`), `U+0072` (`r`), or `U+0074` (`t`), denoting the bytes values `0x0A` (ASCII LF), `0x0D` (ASCII CR) or `0x09` (ASCII HT) respectively. + +r[lex.token.str-byte.escape-null] * The _null escape_ is the character `U+0030` (`0`) and denotes the byte value `0x00` (ASCII NUL). + +r[lex.token.str-byte.escape-slash] * The _backslash escape_ is the character `U+005C` (`\`) which must be escaped in order to denote its ASCII encoding `0x5C`. #### Raw byte string literals +r[lex.token.str-byte-raw] + +r[lex.token.str-byte-raw.syntax] > **Lexer**\ > RAW_BYTE_STRING_LITERAL :\ >    `br` RAW_BYTE_STRING_CONTENT SUFFIX? @@ -291,14 +347,17 @@ following forms: > ASCII_FOR_RAW :\ >    _any ASCII (i.e. 0x00 to 0x7F) except IsolatedCR_ +r[lex.token.str-byte-raw.intro] Raw byte string literals do not process any escapes. They start with the character `U+0062` (`b`), followed by `U+0072` (`r`), followed by fewer than 256 of the character `U+0023` (`#`), and a `U+0022` (double-quote) character. +r[lex.token.str-byte-raw.body] The _raw string body_ can contain any sequence of ASCII characters other than `U+000D` (CR). It is terminated only by another `U+0022` (double-quote) character, followed by the same number of `U+0023` (`#`) characters that preceded the opening `U+0022` (double-quote) character. A raw byte string literal can not contain any non-ASCII byte. +r[lex.token.literal.str-byte-raw.content] All characters contained in the raw string body represent their ASCII encoding, the characters `U+0022` (double-quote) (except when followed by at least as many `U+0023` (`#`) characters as were used to start the raw string literal) or @@ -321,6 +380,9 @@ b"\\x52"; br"\x52"; // \x52 #### C string literals +r[lex.token.str-c] + +r[lex.token.str-c.syntax] > **Lexer**\ > C_STRING_LITERAL :\ >    `c"` (\ @@ -330,6 +392,7 @@ b"\\x52"; br"\x52"; // \x52 >       | STRING_CONTINUE\ >    )\* `"` SUFFIX? +r[lex.token.str-c.intro] A _C string literal_ is a sequence of Unicode characters and _escapes_, preceded by the characters `U+0063` (`c`) and `U+0022` (double-quote), and followed by the character `U+0022`. If the character `U+0022` is present within @@ -338,31 +401,42 @@ Alternatively, a C string literal can be a _raw C string literal_, defined below [CStr]: core::ffi::CStr +r[lex.token.str-c.null] C strings are implicitly terminated by byte `0x00`, so the C string literal `c""` is equivalent to manually constructing a `&CStr` from the byte string literal `b"\x00"`. Other than the implicit terminator, byte `0x00` is not permitted within a C string. +r[lex.token.str-c.linefeed] Line-breaks, represented by the character `U+000A` (LF), are allowed in C string literals. When an unescaped `U+005C` character (`\`) occurs immediately before a line break, the line break does not appear in the string represented by the token. See [String continuation escapes] for details. The character `U+000D` (CR) may not appear in a C string literal other than as part of such a string continuation escape. +r[lex.token.str-c.escape] Some additional _escapes_ are available in non-raw C string literals. An escape starts with a `U+005C` (`\`) and continues with one of the following forms: +r[lex.token.str-c.escape-byte] * A _byte escape_ escape starts with `U+0078` (`x`) and is followed by exactly two _hex digits_. It denotes the byte equal to the provided hex value. + +r[lex.token.str-c.escape-unicode] * A _24-bit code point escape_ starts with `U+0075` (`u`) and is followed by up to six _hex digits_ surrounded by braces `U+007B` (`{`) and `U+007D` (`}`). It denotes the Unicode code point equal to the provided hex value, encoded as UTF-8. + +r[lex.token.str-c.escape-whitespace] * A _whitespace escape_ is one of the characters `U+006E` (`n`), `U+0072` (`r`), or `U+0074` (`t`), denoting the bytes values `0x0A` (ASCII LF), `0x0D` (ASCII CR) or `0x09` (ASCII HT) respectively. + +r[lex.token.str-c.escape-slash] * The _backslash escape_ is the character `U+005C` (`\`) which must be escaped in order to denote its ASCII encoding `0x5C`. +r[lex.token.str-c.char-unicode] A C string represents bytes with no defined encoding, but a C string literal may contain Unicode characters above `U+007F`. Such characters will be replaced with the bytes of that character's UTF-8 representation. @@ -375,11 +449,15 @@ c"\u{00E6}"; c"\xC3\xA6"; ``` +r[lex.token.str-c.edition2021] > **Edition differences**: C string literals are accepted in the 2021 edition or > later. In earlier additions the token `c""` is lexed as `c ""`. #### Raw C string literals +r[lex.token.str-c-raw] + +r[lex.token.str-c-raw.syntax] > **Lexer**\ > RAW_C_STRING_LITERAL :\ >    `cr` RAW_C_STRING_CONTENT SUFFIX? @@ -388,18 +466,22 @@ c"\xC3\xA6"; >       `"` ( ~ _IsolatedCR_ _NUL_ )* (non-greedy) `"`\ >    | `#` RAW_C_STRING_CONTENT `#` +r[lex.token.str-c-raw.intro] Raw C string literals do not process any escapes. They start with the character `U+0063` (`c`), followed by `U+0072` (`r`), followed by fewer than 256 of the character `U+0023` (`#`), and a `U+0022` (double-quote) character. +r[lex.token.str-c-raw.body] The _raw C string body_ can contain any sequence of Unicode characters other than `U+0000` (NUL) and `U+000D` (CR). It is terminated only by another `U+0022` (double-quote) character, followed by the same number of `U+0023` (`#`) characters that preceded the opening `U+0022` (double-quote) character. +r[lex.token.str-c-raw.content] All characters contained in the raw C string body represent themselves in UTF-8 encoding. The characters `U+0022` (double-quote) (except when followed by at least as many `U+0023` (`#`) characters as were used to start the raw C string literal) or `U+005C` (`\`) do not have any special meaning. +r[lex.token.str-c-raw.edition2021] > **Edition differences**: Raw C string literals are accepted in the 2021 > edition or later. In earlier additions the token `cr""` is lexed as `cr ""`, > and `cr#""#` is lexed as `cr #""#` (which is non-grammatical). @@ -419,11 +501,16 @@ c"\\x52"; cr"\x52"; // \x52 ### Number literals +r[lex.token.literal.num] + A _number literal_ is either an _integer literal_ or a _floating-point literal_. The grammar for recognizing the two kinds of literals is mixed. #### Integer literals +r[lex.token.literal.int] + +r[lex.token.literal.int.syntax] > **Lexer**\ > INTEGER_LITERAL :\ >    ( DEC_LITERAL | BIN_LITERAL | OCT_LITERAL | HEX_LITERAL ) @@ -449,20 +536,29 @@ literal_. The grammar for recognizing the two kinds of literals is mixed. > > HEX_DIGIT : \[`0`-`9` `a`-`f` `A`-`F`] +r[lex.token.literal.int.kind] An _integer literal_ has one of four forms: +r[lex.token.literal.int.kind-dec] * A _decimal literal_ starts with a *decimal digit* and continues with any mixture of *decimal digits* and _underscores_. + +r[lex.token.literal.int.kind-hex] * A _hex literal_ starts with the character sequence `U+0030` `U+0078` (`0x`) and continues as any mixture (with at least one digit) of hex digits and underscores. + +r[lex.token.literal.int.kind-oct] * An _octal literal_ starts with the character sequence `U+0030` `U+006F` (`0o`) and continues as any mixture (with at least one digit) of octal digits and underscores. + +r[lex.token.literal.int.kind-bin] * A _binary literal_ starts with the character sequence `U+0030` `U+0062` (`0b`) and continues as any mixture (with at least one digit) of binary digits and underscores. +r[lex.token.literal.int.restriction] Like any literal, an integer literal may be followed (immediately, without any spaces) by a suffix as described above. The suffix may not begin with `e` or `E`, as that would be interpreted as the exponent of a floating-point literal. See [Integer literal expressions] for the effect of these suffixes. @@ -515,13 +611,18 @@ Examples of integer literals which are not accepted as literal expressions: #### Tuple index +r[lex.token.literal.int.tuple-field] + +r[lex.token.literal.int.tuple-field.syntax] > **Lexer**\ > TUPLE_INDEX: \ >    INTEGER_LITERAL +r[lex.token.literal.int.tuple-field.intro] A tuple index is used to refer to the fields of [tuples], [tuple structs], and [tuple variants]. +r[lex.token.literal.int.tuple-field.eq] Tuple indices are compared with the literal token directly. Tuple indices start with `0` and each successive index increments the value by `1` as a decimal value. Thus, only decimal values will match, and the value must not @@ -541,6 +642,9 @@ let horse = example.0b10; // ERROR no field named `0b10` #### Floating-point literals +r[lex.token.literal.float] + +r[lex.token.literal.float.syntax] > **Lexer**\ > FLOAT_LITERAL :\ >       DEC_LITERAL `.` @@ -553,12 +657,14 @@ let horse = example.0b10; // ERROR no field named `0b10` > (DEC_DIGIT|`_`)\* DEC_DIGIT (DEC_DIGIT|`_`)\* > +r[lex.token.literal.float.form] A _floating-point literal_ has one of two forms: * A _decimal literal_ followed by a period character `U+002E` (`.`). This is optionally followed by another decimal literal, with an optional _exponent_. * A single _decimal literal_ followed by an _exponent_. +r[lex.token.literal.float.suffix] Like integer literals, a floating-point literal may be followed by a suffix, so long as the pre-suffix part does not end with `U+002E` (`.`). The suffix may not begin with `e` or `E` if the literal does not include an exponent. @@ -575,7 +681,7 @@ let x: f64 = 2.; ``` This last example is different because it is not possible to use the suffix -syntax with a floating point literal ending in a period. `2.f64` would attempt +syntax with a floating point literal end.token.ing in a period. `2.f64` would attempt to call a method named `f64` on `2`. Note that `-1.0`, for example, is analyzed as two tokens: `-` followed by `1.0`. @@ -594,6 +700,8 @@ Examples of floating-point literals which are not accepted as literal expression #### Reserved forms similar to number literals +r[lex.token.literal.reserved] + > **Lexer**\ > RESERVED_NUMBER :\ >       BIN_LITERAL \[`2`-`9`​]\ @@ -606,17 +714,23 @@ Examples of floating-point literals which are not accepted as literal expression >    | `0x` `_`\* _end of input or not HEX_DIGIT_\ >    | DEC_LITERAL ( . DEC_LITERAL)? (`e`|`E`) (`+`|`-`)? _end of input or not DEC_DIGIT_ +r[lex.token.literal.reserved.intro] The following lexical forms similar to number literals are _reserved forms_. Due to the possible ambiguity these raise, they are rejected by the tokenizer instead of being interpreted as separate tokens. +r[lex.token.literal.reserved.out-of-range] * An unsuffixed binary or octal literal followed, without intervening whitespace, by a decimal digit out of the range for its radix. +r[lex.token.literal.reserved.period] * An unsuffixed binary, octal, or hexadecimal literal followed, without intervening whitespace, by a period character (with the same restrictions on what follows the period as for floating-point literals). +r[lex.token.literal.reserved.exp] * An unsuffixed binary or octal literal followed, without intervening whitespace, by the character `e` or `E`. +r[lex.token.literal.reserved.empty-with-radix] * Input which begins with one of the radix prefixes but is not a valid binary, octal, or hexadecimal literal (because it contains no digits). +r[lex.token.literal.reserved.empty-exp] * Input which has the form of a floating-point literal with no digits in the exponent. Examples of reserved forms: @@ -636,25 +750,52 @@ Examples of reserved forms: ## Lifetimes and loop labels +r[lex.token.life] + +r[lex.token.life.syntax] > **Lexer**\ > LIFETIME_TOKEN :\ >       `'` [IDENTIFIER_OR_KEYWORD][identifier] > _(not immediately followed by `'`)_\ >    | `'_` -> _(not immediately followed by `'`)_ +> _(not immediately followed by `'`)_\ +>    | RAW_LIFETIME > > LIFETIME_OR_LABEL :\ >       `'` [NON_KEYWORD_IDENTIFIER][identifier] > _(not immediately followed by `'`)_\ ->    | `'_` +>    | RAW_LIFETIME +> +> RAW_LIFETIME :\ +>    `'r#` [IDENTIFIER_OR_KEYWORD][identifier] *Except `crate`, `self`, `super`, `Self`* +> _(not immediately followed by `'`)_ +> +> RESERVED_RAW_LIFETIME : `'r#_` > _(not immediately followed by `'`)_ +r[lex.token.life.intro] Lifetime parameters and [loop labels] use LIFETIME_OR_LABEL tokens. Any LIFETIME_TOKEN will be accepted by the lexer, and for example, can be used in macros. +r[lex.token.life.raw.intro] +A raw lifetime is like a normal lifetime, but its identifier is prefixed by `r#`. (Note that the `r#` prefix is not included as part of the actual lifetime.) + +r[lex.token.life.raw.allowed] +Unlike a normal lifetime, a raw lifetime may be any strict or reserved keyword except the ones listed above for `RAW_LIFETIME`. + +r[lex.token.life.raw.reserved] +It is an error to use the RESERVED_RAW_LIFETIME token `'r#_` in order to avoid confusion with the [placeholder lifetime]. + +r[lex.token.life.raw.edition2021] +> **Edition differences**: Raw lifetimes are accepted in the 2021 +> edition or later. In earlier additions the token `'r#lt` is lexed as `'r # lt`. + ## Punctuation +r[lex.token.punct] + +r[lex.token.punct.intro] Punctuation symbol tokens are listed here for completeness. Their individual usages and meanings are defined in the linked pages. @@ -710,6 +851,8 @@ usages and meanings are defined in the linked pages. ## Delimiters +r[lex.token.delim] + Bracket punctuation is used in various parts of the grammar. An open bracket must always be paired with a close bracket. Brackets and the tokens within them are referred to as "token trees" in [macros]. The three types of brackets are: @@ -722,19 +865,31 @@ them are referred to as "token trees" in [macros]. The three types of brackets ## Reserved prefixes +r[lex.token.reserved-prefix] + +r[lex.token.reserved-prefix.syntax] > **Lexer 2021+**\ > RESERVED_TOKEN_DOUBLE_QUOTE : ( IDENTIFIER_OR_KEYWORD _Except `b` or `c` or `r` or `br` or `cr`_ | `_` ) `"`\ > RESERVED_TOKEN_SINGLE_QUOTE : ( IDENTIFIER_OR_KEYWORD _Except `b`_ | `_` ) `'`\ -> RESERVED_TOKEN_POUND : ( IDENTIFIER_OR_KEYWORD _Except `r` or `br` or `cr`_ | `_` ) `#` +> RESERVED_TOKEN_POUND : ( IDENTIFIER_OR_KEYWORD _Except `r` or `br` or `cr`_ | `_` ) `#`\ +> RESERVED_TOKEN_LIFETIME : `'` (IDENTIFIER_OR_KEYWORD _Except `r`_ | _) `#` +r[lex.token.reserved-prefix.intro] Some lexical forms known as _reserved prefixes_ are reserved for future use. +r[lex.token.reserved-prefix.id] Source input which would otherwise be lexically interpreted as a non-raw identifier (or a keyword or `_`) which is immediately followed by a `#`, `'`, or `"` character (without intervening whitespace) is identified as a reserved prefix. +r[lex.token.reserved-prefix.raw-token] Note that raw identifiers, raw string literals, and raw byte string literals may contain a `#` character but are not interpreted as containing a reserved prefix. +r[lex.token.reserved-prefix.strings] Similarly the `r`, `b`, `br`, `c`, and `cr` prefixes used in raw string literals, byte literals, byte string literals, raw byte string literals, C string literals, and raw C string literals are not interpreted as reserved prefixes. +r[lex.token.reserved-prefix.life] +Source input which would otherwise be lexically interpreted as a non-raw lifetime (or a keyword or `_`) which is immediately followed by a `#` character (without intervening whitespace) is identified as a reserved lifetime prefix. + +r[lex.token.reserved-prefix.edition2021] > **Edition differences**: Starting with the 2021 edition, reserved prefixes are reported as an error by the lexer (in particular, they cannot be passed to macros). > > Before the 2021 edition, reserved prefixes are accepted by the lexer and interpreted as multiple tokens (for example, one token for the identifier or keyword, followed by a `#` token). @@ -746,6 +901,7 @@ Similarly the `r`, `b`, `br`, `c`, and `cr` prefixes used in raw string literals > lexes!{continue 'foo} > lexes!{match "..." {}} > lexes!{r#let#foo} // three tokens: r#let # foo +> lexes!{'prefix #lt} > ``` > > Examples accepted before the 2021 edition but rejected later: @@ -754,11 +910,34 @@ Similarly the `r`, `b`, `br`, `c`, and `cr` prefixes used in raw string literals > lexes!{a#foo} > lexes!{continue'foo} > lexes!{match"..." {}} +> lexes!{'prefix#lt} > ``` +## Reserved guards + +r[lex.token.reserved-guards] + +r[lex.token.reserved-guards.syntax] +> **Lexer 2024+**\ +> RESERVED_GUARDED_STRING_LITERAL : `#`+ [STRING_LITERAL]\ +> RESERVED_POUNDS : `#`2.. + +r[lex.token.reserved-guards.intro] +The reserved guards are syntax reserved for future use, and will generate a compile error if used. + +r[lex.token.reserved-guards.string-literal] +The *reserved guarded string literal* is a token of one or more `U+0023` (`#`) immediately followed by a [STRING_LITERAL]. + +r[lex.token.reserved-guards.pounds] +The *reserved pounds* is a token of two or more `U+0023` (`#`). + +r[lex.token.reserved-guards.edition2024] +> **Edition differences**: Before the 2024 edition, reserved guards are accepted by the lexer and interpreted as multiple tokens. For example, the `#"foo"#` form is interpreted as three tokens. `##` is interpreted as two tokens. + [Inferred types]: types/inferred.md [Range patterns]: patterns.md#range-patterns [Reference patterns]: patterns.md#reference-patterns +[STRING_LITERAL]: tokens.md#string-literals [Subpattern binding]: patterns.md#identifier-patterns [Wildcard patterns]: patterns.md#wildcard-pattern [arith]: expressions/operator-expr.md#arithmetic-and-logical-binary-operators @@ -795,6 +974,7 @@ Similarly the `r`, `b`, `br`, `c`, and `cr` prefixes used in raw string literals [numeric types]: types/numeric.md [paths]: paths.md [patterns]: patterns.md +[placeholder lifetime]: lifetime-elision.md [question]: expressions/operator-expr.md#the-question-mark-operator [range]: expressions/range-expr.md [rangepat]: patterns.md#range-patterns diff --git a/src/trait-bounds.md b/src/trait-bounds.md index 019a2f7f0..2ff83412c 100644 --- a/src/trait-bounds.md +++ b/src/trait-bounds.md @@ -1,5 +1,8 @@ # Trait and lifetime bounds +r[bound] + +r[bound.syntax] > **Syntax**\ > _TypeParamBounds_ :\ >    _TypeParamBound_ ( `+` _TypeParamBound_ )\* `+`? @@ -8,17 +11,18 @@ >       _Lifetime_ | _TraitBound_ | _UseBound_ > > _TraitBound_ :\ ->       `?`? -> [_ForLifetimes_](#higher-ranked-trait-bounds)? [_TypePath_]\ ->    | `(` `?`? -> [_ForLifetimes_](#higher-ranked-trait-bounds)? [_TypePath_] `)` +>       ( `?` | +> [_ForLifetimes_](#higher-ranked-trait-bounds) )? [_TypePath_]\ +>    | `(` ( `?` | +> [_ForLifetimes_](#higher-ranked-trait-bounds) )? [_TypePath_] `)` > > _LifetimeBounds_ :\ >    ( _Lifetime_ `+` )\* _Lifetime_? > > _Lifetime_ :\ >       [LIFETIME_OR_LABEL]\ ->    | `'static` +>    | `'static`\ +>    | `'_` > > _UseBound_ :\ >    `use` _UseBoundGenericArgs_ @@ -35,6 +39,7 @@ >    | [IDENTIFIER][] \ >    | `Self` +r[bound.intro] [Trait] and lifetime bounds provide a way for [generic items][generic] to restrict which types and lifetimes are used as their parameters. Bounds can be provided on any type in a [where clause]. There are also shorter forms for @@ -48,6 +53,7 @@ certain common cases: `trait A { type B: Copy; }` is equivalent to `trait A where Self::B: Copy { type B; }`. +r[bound.satisfaction] Bounds on an item must be satisfied when using the item. When type checking and borrow checking a generic item, the bounds can be used to determine that a trait is implemented for a type. For example, given `Ty: Trait` @@ -87,9 +93,11 @@ fn name_figure( } ``` +r[bound.trivial] Bounds that don't use the item's parameters or [higher-ranked lifetimes] are checked when the item is defined. It is an error for such a bound to be false. +r[bound.special] [`Copy`], [`Clone`], and [`Sized`] bounds are also checked for certain generic types when using the item, even if the use does not provide a concrete type. It is an error to have `Copy` or `Clone` as a bound on a mutable reference, [trait object], or [slice]. It is an error to have `Sized` as a bound on a trait object or slice. @@ -107,16 +115,24 @@ where struct UsesA<'a, T>(A<'a, T>); ``` +r[bound.trait-object] Trait and lifetime bounds are also used to name [trait objects]. ## `?Sized` +r[bound.sized] + `?` is only used to relax the implicit [`Sized`] trait bound for [type parameters] or [associated types]. `?Sized` may not be used as a bound for other types. ## Lifetime bounds +r[bound.lifetime] + +r[bound.lifetime.intro] Lifetime bounds can be applied to types or to other lifetimes. + +r[bound.lifetime.outlive-lifetime] The bound `'a: 'b` is usually read as `'a` *outlives* `'b`. `'a: 'b` means that `'a` lasts at least as long as `'b`, so a reference `&'a ()` is valid whenever `&'b ()` is valid. @@ -127,14 +143,19 @@ fn f<'a, 'b>(x: &'a i32, mut y: &'b i32) where 'a: 'b { } ``` +r[bound.lifetime.outlive-type] `T: 'a` means that all lifetime parameters of `T` outlive `'a`. For example, if `'a` is an unconstrained lifetime parameter, then `i32: 'static` and `&'static str: 'a` are satisfied, but `Vec<&'a ()>: 'static` is not. ## Higher-ranked trait bounds +r[bound.higher-ranked] + +r[bound.higher-ranked.syntax] > _ForLifetimes_ :\ >    `for` [_GenericParams_] +r[bound.higher-ranked.intro] Trait bounds may be *higher ranked* over lifetimes. These bounds specify a bound that is true *for all* lifetimes. For example, a bound such as `for<'a> &'a T: PartialEq` would require an implementation like @@ -158,6 +179,7 @@ fn call_on_ref_zero(f: F) where for<'a> F: Fn(&'a i32) { } ``` +r[bound.higher-ranked.trait] Higher-ranked lifetimes may also be specified just before the trait: the only difference is the [scope][hrtb-scopes] of the lifetime parameter, which extends only to the end of the following trait instead of the whole bound. This function is @@ -172,15 +194,20 @@ fn call_on_ref_zero(f: F) where F: for<'a> Fn(&'a i32) { ## Implied bounds +r[bound.implied] + +r[bound.implied.intro] Lifetime bounds required for types to be well-formed are sometimes inferred. ```rust fn requires_t_outlives_a<'a, T>(x: &'a T) {} ``` + The type parameter `T` is required to outlive `'a` for the type `&'a T` to be well-formed. This is inferred because the function signature contains the type `&'a T` which is only valid if `T: 'a` holds. +r[bound.implied.context] Implied bounds are added for all parameters and outputs of functions. Inside of `requires_t_outlives_a` you can assume `T: 'a` to hold even if you don't explicitly specify this: @@ -203,6 +230,7 @@ fn not_implied<'a, T>() { } ``` +r[bound.implied.trait] Only lifetime bounds are implied, trait bounds still have to be explicitly added. The following example therefore causes an error: @@ -213,6 +241,7 @@ struct IsDebug(T); fn doesnt_specify_t_debug(x: IsDebug) {} ``` +r[bound.implied.def] Lifetime bounds are also inferred for type definitions and impl blocks for any type: ```rust @@ -244,6 +273,8 @@ impl<'a, T> Trait<'a, T> for &'a T {} ## Use bounds +r[bound.use] + Certain bounds lists may include a `use<..>` bound to control which generic parameters are captured by the `impl Trait` [abstract return type]. See [precise capturing] for more details. [IDENTIFIER]: identifiers.html diff --git a/src/type-coercions.md b/src/type-coercions.md index 7d254f4e7..26e27eb1d 100644 --- a/src/type-coercions.md +++ b/src/type-coercions.md @@ -1,9 +1,13 @@ # Type coercions +r[coerce] + +r[coerce.intro] **Type coercions** are implicit operations that change the type of a value. They happen automatically at specific locations and are highly restricted in what types actually coerce. +r[cerce.as] Any conversions allowed by coercion can also be explicitly performed by the [type cast operator], `as`. @@ -11,11 +15,15 @@ Coercions are originally defined in [RFC 401] and expanded upon in [RFC 1558]. ## Coercion sites +r[coerce.site] + +r[coerce.site.intro] A coercion can only occur at certain coercion sites in a program; these are typically places where the desired type is explicit or can be derived by propagation from explicit types (without type inference). Possible coercion sites are: +r[coerce.site.let] * `let` statements where an explicit type is given. For example, `&mut 42` is coerced to have type `&i8` in the following: @@ -24,8 +32,10 @@ sites are: let _: &i8 = &mut 42; ``` +r[coerce.site.value] * `static` and `const` item declarations (similar to `let` statements). +r[coerce.site.argument] * Arguments for function calls The value being coerced is the actual parameter, and it is coerced to @@ -44,6 +54,7 @@ sites are: For method calls, the receiver (`self` parameter) type is coerced differently, see the documentation on [method-call expressions] for details. +r[coerce.site.constructor] * Instantiations of struct, union, or enum variant fields For example, `&mut 42` is coerced to have type `&i8` in the following: @@ -56,6 +67,7 @@ sites are: } ``` +r[coerce.site.return] * Function results—either the final line of a block if it is not semicolon-terminated or any expression in a `return` statement @@ -68,24 +80,30 @@ sites are: } ``` +r[coerce.site.subexpr] If the expression in one of these coercion sites is a coercion-propagating expression, then the relevant sub-expressions in that expression are also coercion sites. Propagation recurses from these new coercion sites. Propagating expressions and their relevant sub-expressions are: +r[coerce.site.array] * Array literals, where the array has type `[U; n]`. Each sub-expression in the array literal is a coercion site for coercion to type `U`. +r[coerce.site.repeat] * Array literals with repeating syntax, where the array has type `[U; n]`. The repeated sub-expression is a coercion site for coercion to type `U`. +r[coerce.site.tuple] * Tuples, where a tuple is a coercion site to type `(U_0, U_1, ..., U_n)`. Each sub-expression is a coercion site to the respective type, e.g. the zeroth sub-expression is a coercion site to type `U_0`. +r[coerce.site.parenthesis] * Parenthesized sub-expressions (`(e)`): if the expression has type `U`, then the sub-expression is a coercion site to `U`. +r[coerce.site.block] * Blocks: if a block has type `U`, then the last expression in the block (if it is not semicolon-terminated) is a coercion site to `U`. This includes blocks which are part of control flow statements, such as `if`/`else`, if @@ -93,23 +111,33 @@ the block has a known type. ## Coercion types +r[coerce.types] + +r[coerce.types.intro] Coercion is allowed between the following types: +r[coerce.types.reflexive] * `T` to `U` if `T` is a [subtype] of `U` (*reflexive case*) +r[coerce.types.transitive] * `T_1` to `T_3` where `T_1` coerces to `T_2` and `T_2` coerces to `T_3` (*transitive case*) Note that this is not fully supported yet. +r[coerce.types.mut-reborrow] * `&mut T` to `&T` +r[coerce.types.mut-pointer] * `*mut T` to `*const T` +r[coerce.types.ref-to-pointer] * `&T` to `*const T` +r[coerce.types.mut-to-pointer] * `&mut T` to `*mut T` +r[coerce.types.deref] * `&T` or `&mut T` to `&U` if `T` implements `Deref`. For example: ```rust @@ -135,8 +163,10 @@ Coercion is allowed between the following types: } ``` +r[coerce.types.deref-mut] * `&mut T` to `&mut U` if `T` implements `DerefMut`. +r[coerce.types.unsize] * TyCtor(`T`) to TyCtor(`U`), where TyCtor(`T`) is one of - `&T` - `&mut T` @@ -150,35 +180,46 @@ Coercion is allowed between the following types: structs. In addition, coercions from subtraits to supertraits will be added. See [RFC 401] for more details.--> +r[coerce.types.fn] * Function item types to `fn` pointers +r[coerce.types.closure] * Non capturing closures to `fn` pointers +r[coerce.types.never] * `!` to any `T` ### Unsized Coercions +r[coerce.unsize] + +r[coerce.unsize.intro] The following coercions are called `unsized coercions`, since they relate to converting sized types to unsized types, and are permitted in a few cases where other coercions are not, as described above. They can still happen anywhere else a coercion can occur. +r[coerce.unsize.trait] Two traits, [`Unsize`] and [`CoerceUnsized`], are used to assist in this process and expose it for library use. The following coercions are built-ins and, if `T` can be coerced to `U` with one of them, then an implementation of `Unsize` for `T` will be provided: +r[coerce.unsize.slice] * `[T; n]` to `[T]`. -* `T` to `dyn U`, when `T` implements `U + Sized`, and `U` is [object safe]. +r[coerce.unsize.trait-object] +* `T` to `dyn U`, when `T` implements `U + Sized`, and `U` is [dyn compatible]. +r[coerce.unsized.composite] * `Foo<..., T, ...>` to `Foo<..., U, ...>`, when: * `Foo` is a struct. * `T` implements `Unsize`. * The last field of `Foo` has a type involving `T`. - * If that field has type `Bar`, then `Bar` implements `Unsized>`. + * If that field has type `Bar`, then `Bar` implements `Unsize>`. * T is not part of the type of any other fields. +r[coerce.unsized.pointer] Additionally, a type `Foo` can implement `CoerceUnsized>` when `T` implements `Unsize` or `CoerceUnsized>`. This allows it to provide an unsized coercion to `Foo`. @@ -189,6 +230,9 @@ unsized coercion to `Foo`. ## Least upper bound coercions +r[coerce.least-upper-bound] + +r[coerce.least-upper-bound.intro] In some contexts, the compiler must coerce together multiple types to try and find the most general type. This is called a "Least Upper Bound" coercion. LUB coercion is used and only used in the following situations: @@ -199,15 +243,24 @@ LUB coercion is used and only used in the following situations: + To find the type for the return type of a closure with multiple return statements. + To check the type for the return type of a function with multiple return statements. +r[coerce.least-upper-bound.target] In each such case, there are a set of types `T0..Tn` to be mutually coerced -to some target type `T_t`, which is unknown to start. Computing the LUB +to some target type `T_t`, which is unknown to start. + +r[coerce.least-upper-bound.computation] +Computing the LUB coercion is done iteratively. The target type `T_t` begins as the type `T0`. For each new type `Ti`, we consider whether +r[coerce.least-upper-bound.computation-identity] + If `Ti` can be coerced to the current target type `T_t`, then no change is made. + +r[coerce.least-upper-bound.computation-replace] + Otherwise, check whether `T_t` can be coerced to `Ti`; if so, the `T_t` is changed to `Ti`. (This check is also conditioned on whether all of the source expressions considered thus far have implicit coercions.) + +r[coerce.least-upper-bound.computation-unify] + If not, try to compute a mutual supertype of `T_t` and `Ti`, which will become the new target type. ### Examples: @@ -269,7 +322,7 @@ precisely. [RFC 401]: https://github.com/rust-lang/rfcs/blob/master/text/0401-coercions.md [RFC 1558]: https://github.com/rust-lang/rfcs/blob/master/text/1558-closure-to-fn-coercion.md [subtype]: subtyping.md -[object safe]: items/traits.md#object-safety +[dyn compatible]: items/traits.md#dyn-compatibility [type cast operator]: expressions/operator-expr.md#type-cast-expressions [`Unsize`]: std::marker::Unsize [`CoerceUnsized`]: std::ops::CoerceUnsized diff --git a/src/type-layout.md b/src/type-layout.md index 2edab7989..5d04d2a80 100644 --- a/src/type-layout.md +++ b/src/type-layout.md @@ -1,9 +1,13 @@ # Type Layout +r[layout] + +r[layout.intro] The layout of a type is its size, alignment, and the relative offsets of its fields. For enums, how the discriminant is laid out and interpreted is also part of type layout. +r[layout.guarantees] Type layout can be changed with each compilation. Instead of trying to document exactly what is done, we only document what is guaranteed today. @@ -13,8 +17,10 @@ see [here][fn-abi-compatibility]. ## Size and Alignment +r[layout.properties] All values have an alignment and size. +r[layout.properties.align] The *alignment* of a value specifies what addresses are valid to store the value at. A value of alignment `n` must only be stored at an address that is a multiple of n. For example, a value with an alignment of 2 must be stored at an @@ -22,6 +28,7 @@ even address, while a value with an alignment of 1 can be stored at any address. Alignment is measured in bytes, and must be at least 1, and always a power of 2. The alignment of a value can be checked with the [`align_of_val`] function. +r[layout.properties.size] The *size* of a value is the offset in bytes between successive elements in an array with that item type including alignment padding. The size of a value is always a multiple of its alignment. Note that some types are zero-sized; 0 is @@ -29,6 +36,7 @@ considered a multiple of any alignment (for example, on some platforms, the type `[u16; 0]` has size 0 and alignment 2). The size of a value can be checked with the [`size_of_val`] function. +r[layout.properties.sized] Types where all values have the same size and alignment, and both are known at compile time, implement the [`Sized`] trait and can be checked with the [`size_of`] and [`align_of`] functions. Types that are not [`Sized`] are known @@ -38,6 +46,9 @@ the alignment of the type respectively. ## Primitive Data Layout +r[layout.primitive] + +r[layout.primitive.size] The size of most primitives is given in this table. | Type | `size_of::()`| @@ -53,10 +64,12 @@ The size of most primitives is given in this table. | `f64` | 8 | | `char` | 4 | +r[layout.primitive.size-int] `usize` and `isize` have a size big enough to contain every address on the target platform. For example, on a 32 bit target, this is 4 bytes, and on a 64 bit target, this is 8 bytes. +r[layout.primitive.align] The alignment of primitives is platform-specific. In most cases, their alignment is equal to their size, but it may be less. In particular, `i128` and `u128` are often aligned to 4 or 8 bytes even though @@ -65,11 +78,16 @@ aligned to 4 bytes, not 8. ## Pointers and References Layout +r[layout.pointer] + +r[layout.pointer.intro] Pointers and references have the same layout. Mutability of the pointer or reference does not change the layout. +r[layout.pointer.thin] Pointers to sized types have the same size and alignment as `usize`. +r[layout.pointer.unsized] Pointers to unsized types are sized. The size and alignment is guaranteed to be at least equal to the size and alignment of a pointer. @@ -79,29 +97,42 @@ at least equal to the size and alignment of a pointer. ## Array Layout +r[layout.array] + An array of `[T; N]` has a size of `size_of::() * N` and the same alignment of `T`. Arrays are laid out so that the zero-based `nth` element of the array is offset from the start of the array by `n * size_of::()` bytes. ## Slice Layout +r[layout.slice] + Slices have the same layout as the section of the array they slice. > Note: This is about the raw `[T]` type, not pointers (`&[T]`, `Box<[T]>`, > etc.) to slices. ## `str` Layout + +r[layout.str] + String slices are a UTF-8 representation of characters that have the same layout as slices of type `[u8]`. ## Tuple Layout +r[layout.tuple] + +r[layout.tuple.general] Tuples are laid out according to the [`Rust` representation][`Rust`]. +r[layout.tuple.unit] The exception to this is the unit tuple (`()`), which is guaranteed as a zero-sized type to have a size of 0 and an alignment of 1. ## Trait Object Layout +r[layout.trait-object] + Trait objects have the same layout as the value the trait object is of. > Note: This is about the raw trait object types, not pointers (`&dyn Trait`, @@ -109,19 +140,27 @@ Trait objects have the same layout as the value the trait object is of. ## Closure Layout +r[layout.closure] + Closures have no layout guarantees. ## Representations +r[layout.repr] + +r[layout.repr.intro] All user-defined composite types (`struct`s, `enum`s, and `union`s) have a -*representation* that specifies what the layout is for the type. The possible -representations for a type are: +*representation* that specifies what the layout is for the type. + +r[layout.repr.kinds] +The possible representations for a type are: - [`Rust`] (default) - [`C`] - The [primitive representations] - [`transparent`] +r[layout.repr.attribute] The representation of a type can be changed by applying the `repr` attribute to it. The following example shows a struct with a `C` representation. @@ -134,6 +173,7 @@ struct ThreeInts { } ``` +r[layout.repr.align-packed] The alignment may be raised or lowered with the `align` and `packed` modifiers respectively. They alter the representation specified in the attribute. If no representation is specified, the default one is altered. @@ -161,18 +201,24 @@ struct AlignedStruct { > the same name have the same representation. For example, `Foo` and > `Foo` both have the same representation. +r[layout.repr.inter-field] The representation of a type can change the padding between fields, but does not change the layout of the fields themselves. For example, a struct with a -`C` representation that contains a struct `Inner` with the default +`C` representation that contains a struct `Inner` with the `Rust` representation will not change the layout of `Inner`. -### The `Rust` Representation + +### The `Rust` Representation +r[layout.repr.rust] + +r[layout.repr.rust.intro] The `Rust` representation is the default representation for nominal types without a `repr` attribute. Using this representation explicitly through a `repr` attribute is guaranteed to be the same as omitting the attribute entirely. +r[layout.repr.rust.layout] The only data layout guarantees made by this representation are those required for soundness. They are: @@ -180,8 +226,12 @@ for soundness. They are: 2. The fields do not overlap. 3. The alignment of the type is at least the maximum alignment of its fields. +r[layout.repr.rust.alignment] Formally, the first guarantee means that the offset of any field is divisible by -that field's alignment. The second guarantee means that the fields can be +that field's alignment. + +r[layout.repr.rust.field-storage] +The second guarantee means that the fields can be ordered such that the offset plus the size of any field is less than or equal to the offset of the next field in the ordering. The ordering does not have to be the same as the order in which the fields are specified in the declaration of @@ -191,10 +241,14 @@ Be aware that the second guarantee does not imply that the fields have distinct addresses: zero-sized types may have the same address as other fields in the same struct. +r[layout.repr.rust.unspecified] There are no other guarantees of data layout made by this representation. ### The `C` Representation +r[layout.repr.c] + +r[layout.repr.c.intro] The `C` representation is designed for dual purposes. One purpose is for creating types that are interoperable with the C Language. The second purpose is to create types that you can soundly perform operations on that rely on data @@ -203,13 +257,18 @@ layout such as reinterpreting values as a different type. Because of this dual purpose, it is possible to create types that are not useful for interfacing with the C programming language. +r[layout.repr.c.constraint] This representation can be applied to structs, unions, and enums. The exception is [zero-variant enums] for which the `C` representation is an error. #### `#[repr(C)]` Structs +r[layout.repr.c.struct] + +r[layout.repr.c.struct.align] The alignment of the struct is the alignment of the most-aligned field in it. +r[layout.repr.c.struct.size-field-offset] The size and offset of fields is determined by the following algorithm. Start with a current offset of 0 bytes. @@ -270,8 +329,13 @@ struct.size = current_offset + padding_needed_for(current_offset, struct.alignme #### `#[repr(C)]` Unions +r[layout.repr.c.union] + +r[layout.repr.c.union.intro] A union declared with `#[repr(C)]` will have the same size and alignment as an equivalent C union declaration in the C language for the target platform. + +r[layout.repr.c.union.size-align] The union will have a size of the maximum size of all of its fields rounded to its alignment, and an alignment of the maximum alignment of all of its fields. These maximums may come from different fields. @@ -300,6 +364,8 @@ assert_eq!(std::mem::align_of::(), 4); // From a #### `#[repr(C)]` Field-less Enums +r[layout.repr.c.enum] + For [field-less enums], the `C` representation has the size and alignment of the default `enum` size and alignment for the target platform's C ABI. @@ -312,10 +378,16 @@ the default `enum` size and alignment for the target platform's C ABI. #### `#[repr(C)]` Enums With Fields +r[layout.repr.c.adt] + +r[layout.repr.c.adt.intro] The representation of a `repr(C)` enum with fields is a `repr(C)` struct with two fields, also called a "tagged union" in C: +r[layout.repr.c.adt.tag] - a `repr(C)` version of the enum with all fields removed ("the tag") + +r[layout.repr.c.adt.fields] - a `repr(C)` union of `repr(C)` structs for the fields of each variant that had them ("the payload") @@ -378,10 +450,14 @@ struct MyDFields; ### Primitive representations +r[layout.repr.primitive] + +r[layout.repr.primitive.intro] The *primitive representations* are the representations with the same names as the primitive integer types. That is: `u8`, `u16`, `u32`, `u64`, `u128`, `usize`, `i8`, `i16`, `i32`, `i64`, `i128`, and `isize`. +r[layout.repr.primitive.constraint] Primitive representations can only be applied to enumerations and have different behavior whether the enum has fields or no fields. It is an error for [zero-variant enums] to have a primitive representation. Combining @@ -389,6 +465,8 @@ two primitive representations together is an error. #### Primitive Representation of Field-less Enums +r[layout.repr.primitive.enum] + For [field-less enums], primitive representations set the size and alignment to be the same as the primitive type of the same name. For example, a field-less enum with a `u8` representation can only have discriminants between 0 and 255 @@ -396,6 +474,8 @@ inclusive. #### Primitive Representation of Enums With Fields +r[layout.repr.primitive.adt] + The representation of a primitive representation enum is a `repr(C)` union of `repr(C)` structs for each variant with a field. The first field of each struct in the union is the primitive representation version of the enum with all fields @@ -450,6 +530,8 @@ struct MyVariantD(MyEnumDiscriminant); #### Combining primitive representations of enums with fields and `#[repr(C)]` +r[layout.repr.primitive-c] + For enums with fields, it is also possible to combine `repr(C)` and a primitive representation (e.g., `repr(C, u8)`). This modifies the [`repr(C)`] by changing the representation of the discriminant enum to the chosen primitive @@ -514,6 +596,9 @@ assert_eq!(std::mem::size_of::(), 4); ### The alignment modifiers +r[layout.repr.alignment] + +r[layout.repr.alignment.intro] The `align` and `packed` modifiers can be used to respectively raise or lower the alignment of `struct`s and `union`s. `packed` may also alter the padding between fields (although it will not alter the padding inside of any field). @@ -522,28 +607,37 @@ of fields in the layout of a struct or the layout of an enum variant, although they may be combined with representations (such as `C`) which do provide such guarantees. +r[layout.repr.alignment.constraint-alignment] The alignment is specified as an integer parameter in the form of `#[repr(align(x))]` or `#[repr(packed(x))]`. The alignment value must be a power of two from 1 up to 229. For `packed`, if no value is given, as in `#[repr(packed)]`, then the value is 1. +r[layout.repr.alignment.align] For `align`, if the specified alignment is less than the alignment of the type without the `align` modifier, then the alignment is unaffected. +r[layout.repr.alignment.packed] For `packed`, if the specified alignment is greater than the type's alignment without the `packed` modifier, then the alignment and layout is unaffected. + +r[layout.repr.alignment.packed-fields] The alignments of each field, for the purpose of positioning fields, is the smaller of the specified alignment and the alignment of the field's type. + +r[layout.repr.alignment.packed-padding] Inter-field padding is guaranteed to be the minimum required in order to satisfy each field's (possibly altered) alignment (although note that, on its own, `packed` does not provide any guarantee about field ordering). An important consequence of these rules is that a type with `#[repr(packed(1))]` (or `#[repr(packed)]`) will have no inter-field padding. +r[layout.repr.alignment.constraint-exclusive] The `align` and `packed` modifiers cannot be applied on the same type and a `packed` type cannot transitively contain another `align`ed type. `align` and `packed` may only be applied to the [`Rust`] and [`C`] representations. +r[layout.repr.alignment.enum] The `align` modifier can also be applied on an `enum`. When it is, the effect on the `enum`'s alignment is the same as if the `enum` was wrapped in a newtype `struct` with the same `align` modifier. @@ -573,11 +667,15 @@ was wrapped in a newtype `struct` with the same `align` modifier. ### The `transparent` Representation +r[layout.repr.transparent] + +r[layout.repr.transparent.constraint-field] The `transparent` representation can only be used on a [`struct`][structs] or an [`enum`][enumerations] with a single variant that has: - any number of fields with size 0 and alignment 1 (e.g. [`PhantomData`]), and - at most one other field. +r[layout.repr.transparent.layout-abi] Structs and enums with this representation have the same layout and ABI as the only non-size 0 non-alignment 1 field, if present, or unit otherwise. @@ -586,6 +684,7 @@ a struct with the `C` representation will always have the ABI of a `C` `struct` while, for example, a struct with the `transparent` representation with a primitive field will have the ABI of the primitive field. +r[layout.repr.transparent.constraint-exclusive] Because this representation delegates type layout to another type, it cannot be used with any other representation. diff --git a/src/types.md b/src/types.md index 0f8263835..a93f93d1f 100644 --- a/src/types.md +++ b/src/types.md @@ -1,14 +1,21 @@ {{#include types-redirect.html}} # Types +r[type] + +r[type.intro] Every variable, item, and value in a Rust program has a type. The _type_ of a *value* defines the interpretation of the memory holding it and the operations that may be performed on the value. +r[type.builtin] Built-in types are tightly integrated into the language, in nontrivial ways -that are not possible to emulate in user-defined types. User-defined types have -limited capabilities. +that are not possible to emulate in user-defined types. + +r[type.user-defined] +User-defined types have limited capabilities. +r[type.kinds] The list of types is: * Primitive types: @@ -37,6 +44,9 @@ The list of types is: ## Type expressions +r[type.name] + +r[type.name.syntax] > **Syntax**\ > _Type_ :\ >       _TypeNoBounds_\ @@ -59,27 +69,47 @@ The list of types is: >    | [_BareFunctionType_]\ >    | [_MacroInvocation_] +r[type.name.intro] A _type expression_ as defined in the _Type_ grammar rule above is the syntax for referring to a type. It may refer to: +r[type.name.sequence] * Sequence types ([tuple], [array], [slice]). + +r[type.name.path] * [Type paths] which can reference: * Primitive types ([boolean], [numeric], [textual]). * Paths to an [item] ([struct], [enum], [union], [type alias], [trait]). * [`Self` path] where `Self` is the implementing type. * Generic [type parameters]. + +r[type.name.pointer] * Pointer types ([reference], [raw pointer], [function pointer]). + +r[type.name.inference] * The [inferred type] which asks the compiler to determine the type. + +r[type.name.grouped] * [Parentheses] which are used for disambiguation. + +r[type.name.trait] * Trait types: [Trait objects] and [impl trait]. + +r[type.name.never] * The [never] type. + +r[type.name.macro-expansion] * [Macros] which expand to a type expression. ### Parenthesized types +r[type.name.parenthesized] + +r[type.name.parenthesized.syntax] > _ParenthesizedType_ :\ >    `(` [_Type_] `)` +r[type.name.parenthesized.intro] In some situations the combination of types may be ambiguous. Use parentheses around a type to avoid ambiguity. For example, the `+` operator for [type boundaries] within a [reference type] is unclear where the @@ -94,10 +124,16 @@ type T<'a> = &'a (dyn Any + Send); ## Recursive types +r[type.recursive] + +r[type.recursive.intro] Nominal types — [structs], [enumerations], and [unions] — may be recursive. That is, each `enum` variant or `struct` or `union` field may refer, directly or indirectly, to the enclosing `enum` or `struct` type -itself. Such recursion has restrictions: +itself. + +r[type.recursive.constraint] +Such recursion has restrictions: * Recursive types must include a nominal type in the recursion (not mere [type aliases], or other structural types such as [arrays] or [tuples]). So `type diff --git a/src/types/array.md b/src/types/array.md index 167954bfe..ef54af1f3 100644 --- a/src/types/array.md +++ b/src/types/array.md @@ -1,12 +1,18 @@ # Array types +r[type.array] + +r[type.array.syntax] > **Syntax**\ > _ArrayType_ :\ >    `[` [_Type_] `;` [_Expression_] `]` +r[type.array.intro] An array is a fixed-size sequence of `N` elements of type `T`. The array type -is written as `[T; N]`. The size is a [constant expression] that evaluates to a -[`usize`]. +is written as `[T; N]`. + +r[type.array.constraint] +The size is a [constant expression] that evaluates to a [`usize`]. Examples: @@ -18,6 +24,7 @@ let array: [i32; 3] = [1, 2, 3]; let boxed_array: Box<[i32]> = Box::new([1, 2, 3]); ``` +r[type.array.index] All elements of arrays are always initialized, and access to an array is always bounds-checked in safe methods and operators. diff --git a/src/types/boolean.md b/src/types/boolean.md index a0c07101f..10c6e5de1 100644 --- a/src/types/boolean.md +++ b/src/types/boolean.md @@ -1,31 +1,44 @@ # Boolean type +r[type.bool] + ```rust let b: bool = true; ``` +r[type.bool.intro] The *boolean type* or *bool* is a primitive data type that can take on one of two values, called *true* and *false*. +r[type.bool.literal] Values of this type may be created using a [literal expression] using the keywords `true` and `false` corresponding to the value of the same name. +r[type.bool.namespace] This type is a part of the [language prelude] with the [name] `bool`. -An object with the boolean type has a [size and alignment] of 1 each. The -value false has the bit pattern `0x00` and the value true has the bit pattern +r[type.bool.layout] +An object with the boolean type has a [size and alignment] of 1 each. + +r[type.bool.repr] +The value false has the bit pattern `0x00` and the value true has the bit pattern `0x01`. It is [undefined behavior] for an object with the boolean type to have any other bit pattern. +r[type.bool.usage] The boolean type is the type of many operands in various [expressions]: +r[type.bool.usage-condition] * The condition operand in [if expressions] and [while expressions] + +r[type.bool.usage-lazy-operator] * The operands in [lazy boolean operator expressions][lazy] > **Note**: The boolean type acts similarly to but is not an [enumerated type]. In practice, this mostly means that constructors are not associated to the type (e.g. `bool::true`). +r[type.bool.traits] Like all primitives, the boolean type [implements][p-impl] the [traits][p-traits] [`Clone`][p-clone], [`Copy`][p-copy], [`Sized`][p-sized], [`Send`][p-send], and [`Sync`][p-sync]. @@ -34,11 +47,15 @@ Like all primitives, the boolean type [implements][p-impl] the ## Operations on boolean values +r[type.bool.expr] + When using certain operator expressions with a boolean type for its operands, they evaluate using the rules of [boolean logic]. ### Logical not +r[type.bool.expr.not] + | `b` | [`!b`][op-not] | |- | - | | `true` | `false` | @@ -46,6 +63,8 @@ boolean type for its operands, they evaluate using the rules of [boolean logic]. ### Logical or +r[type.bool.expr.or] + | `a` | `b` | [a | b][op-or] | |- | - | - | | `true` | `true` | `true` | @@ -55,6 +74,8 @@ boolean type for its operands, they evaluate using the rules of [boolean logic]. ### Logical and +r[type.bool.expr.and] + | `a` | `b` | [`a & b`][op-and] | |- | - | - | | `true` | `true` | `true` | @@ -64,6 +85,8 @@ boolean type for its operands, they evaluate using the rules of [boolean logic]. ### Logical xor +r[type.bool.expr.xor] + | `a` | `b` | [`a ^ b`][op-xor] | |- | - | - | | `true` | `true` | `false` | @@ -73,6 +96,9 @@ boolean type for its operands, they evaluate using the rules of [boolean logic]. ### Comparisons +r[type.bool.expr.cmp] + +r[type.bool.expr.cmp.eq] | `a` | `b` | [`a == b`][op-compare] | |- | - | - | | `true` | `true` | `true` | @@ -80,6 +106,7 @@ boolean type for its operands, they evaluate using the rules of [boolean logic]. | `false` | `true` | `false` | | `false` | `false` | `true` | +r[type.bool.expr.cmp.greater] | `a` | `b` | [`a > b`][op-compare] | |- | - | - | | `true` | `true` | `false` | @@ -87,13 +114,22 @@ boolean type for its operands, they evaluate using the rules of [boolean logic]. | `false` | `true` | `false` | | `false` | `false` | `false` | +r[type.bool.expr.cmp.not-eq] * `a != b` is the same as `!(a == b)` + +r[type.bool.expr.cmp.greater-eq] * `a >= b` is the same as `a == b | a > b` + +r[type.bool.expr.cmp.less] * `a < b` is the same as `!(a >= b)` + +r[type.bool.expr.cmp.less-eq] * `a <= b` is the same as `a == b | a < b` ## Bit validity +r[type.bool.validity] + The single byte of a `bool` is guaranteed to be initialized (in other words, `transmute::(...)` is always sound -- but since some bit patterns are invalid `bool`s, the inverse is not always sound). diff --git a/src/types/closure.md b/src/types/closure.md index f93537819..d46d7f7ae 100644 --- a/src/types/closure.md +++ b/src/types/closure.md @@ -1,39 +1,52 @@ # Closure types -A [closure expression] produces a closure value with a unique, anonymous type -that cannot be written out. A closure type is approximately equivalent to a -struct which contains the captured variables. For instance, the following -closure: +r[type.closure] + +r[type.closure.intro] +A [closure expression] produces a closure value with a unique, anonymous type that cannot be written out. +A closure type is approximately equivalent to a struct which contains the captured values. +For instance, the following closure: ```rust +#[derive(Debug)] +struct Point { x: i32, y: i32 } +struct Rectangle { left_top: Point, right_bottom: Point } + fn f String> (g: F) { println!("{}", g()); } -let mut s = String::from("foo"); -let t = String::from("bar"); - -f(|| { - s += &t; - s -}); -// Prints "foobar". +let mut rect = Rectangle { + left_top: Point { x: 1, y: 1 }, + right_bottom: Point { x: 0, y: 0 } +}; + +let c = || { + rect.left_top.x += 1; + rect.right_bottom.x += 1; + format!("{:?}", rect.left_top) +}; +f(c); // Prints "Point { x: 2, y: 1 }". ``` generates a closure type roughly like the following: - + ```rust,ignore +// Note: This is not exactly how it is translated, this is only for +// illustration. + struct Closure<'a> { - s : String, - t : &'a String, + left_top : &'a mut Point, + right_bottom_x : &'a mut i32, } impl<'a> FnOnce<()> for Closure<'a> { type Output = String; - fn call_once(self) -> String { - self.s += &*self.t; - self.s + extern "rust-call" fn call_once(self, args: ()) -> String { + self.left_top.x += 1; + *self.right_bottom_x += 1; + format!("{:?}", self.left_top) } } ``` @@ -42,86 +55,432 @@ so that the call to `f` works as if it were: ```rust,ignore -f(Closure{s: s, t: &t}); +// Note: This is not valid Rust due to the duplicate mutable borrows. +// This is only provided as an illustration. +f(Closure{ left_top: &mut rect.left_top, right_bottom_x: &mut rect.left_top.x }); ``` ## Capture modes -The compiler prefers to capture a closed-over variable by immutable borrow, -followed by unique immutable borrow (see below), by mutable borrow, and finally -by move. It will pick the first choice of these that is compatible with how the -captured variable is used inside the closure body. The compiler does not take -surrounding code into account, such as the lifetimes of involved variables, or -of the closure itself. +r[type.closure.capture] + +r[type.closure.capture.intro] +A *capture mode* determines how a [place expression] from the environment is borrowed or moved into the closure. +The capture modes are: + +1. Immutable borrow (`ImmBorrow`) --- The place expression is captured as a [shared reference]. +2. Unique immutable borrow (`UniqueImmBorrow`) --- This is similar to an immutable borrow, but must be unique as described [below](#unique-immutable-borrows-in-captures). +3. Mutable borrow (`MutBorrow`) --- The place expression is captured as a [mutable reference]. +4. Move (`ByValue`) --- The place expression is captured by [moving the value] into the closure. -If the `move` keyword is used, then all captures are by move or, for `Copy` -types, by copy, regardless of whether a borrow would work. The `move` keyword is -usually used to allow the closure to outlive the captured values, such as if the -closure is being returned or used to spawn a new thread. +r[type.closure.capture.precedence] +Place expressions from the environment are captured from the first mode that is compatible with how the captured value is used inside the closure body. +The mode is not affected by the code surrounding the closure, such as the lifetimes of involved variables or fields, or of the closure itself. -Composite types such as structs, tuples, and enums are always captured entirely, -not by individual fields. It may be necessary to borrow into a local variable in -order to capture a single field: +[moving the value]: ../expressions.md#moved-and-copied-types +[mutable reference]: pointer.md#mutable-references-mut +[place expression]: ../expressions.md#place-expressions-and-value-expressions +[shared reference]: pointer.md#references--and-mut + +### `Copy` values + +r[type.closure.capture.copy] +Values that implement [`Copy`] that are moved into the closure are captured with the `ImmBorrow` mode. ```rust -# use std::collections::HashSet; -# -struct SetVec { - set: HashSet, - vec: Vec +let x = [0; 1024]; +let c = || { + let y = x; // x captured by ImmBorrow +}; +``` + +## Capture Precision + +r[type.closure.capture.precision.capture-path] +A *capture path* is a sequence starting with a variable from the environment followed by zero or more place projections that were applied to that variable. + +r[type.closure.capture.precision.place-projection] +A *place projection* is a [field access], [tuple index], [dereference] (and automatic dereferences), or [array or slice index] expression applied to a variable. + +r[type.closure.capture.precision.intro] +The closure borrows or moves the capture path, which may be truncated based on the rules described below. + +For example: + +```rust +struct SomeStruct { + f1: (i32, i32), } +let s = SomeStruct { f1: (1, 2) }; -impl SetVec { - fn populate(&mut self) { - let vec = &mut self.vec; - self.set.iter().for_each(|&n| { - vec.push(n); - }) - } +let c = || { + let x = s.f1.1; // s.f1.1 captured by ImmBorrow +}; +c(); +``` + +Here the capture path is the local variable `s`, followed by a field access `.f1`, and then a tuple index `.1`. +This closure captures an immutable borrow of `s.f1.1`. + +[field access]: ../expressions/field-expr.md +[tuple index]: ../expressions/tuple-expr.md#tuple-indexing-expressions +[dereference]: ../expressions/operator-expr.md#the-dereference-operator +[array or slice index]: ../expressions/array-expr.md#array-and-slice-indexing-expressions + +### Shared prefix + +r[type.closure.capture.precision.shared-prefix] +In the case where a capture path and one of the ancestor’s of that path are both captured by a closure, the ancestor path is captured with the highest capture mode among the two captures, `CaptureMode = max(AncestorCaptureMode, DescendantCaptureMode)`, using the strict weak ordering: + +`ImmBorrow < UniqueImmBorrow < MutBorrow < ByValue` + +Note that this might need to be applied recursively. + +```rust +// In this example, there are three different capture paths with a shared ancestor: +# fn move_value(_: T){} +let s = String::from("S"); +let t = (s, String::from("T")); +let mut u = (t, String::from("U")); + +let c = || { + println!("{:?}", u); // u captured by ImmBorrow + u.1.truncate(0); // u.0 captured by MutBorrow + move_value(u.0.0); // u.0.0 captured by ByValue +}; +c(); +``` + +Overall this closure will capture `u` by `ByValue`. + +### Rightmost shared reference truncation + +r[type.closure.capture.precision.dereference-shared] +The capture path is truncated at the rightmost dereference in the capture path if the dereference is applied to a shared reference. + +This truncation is allowed because fields that are read through a shared reference will always be read via a shared reference or a copy. +This helps reduce the size of the capture when the extra precision does not yield any benefit from a borrow checking perspective. + +The reason it is the *rightmost* dereference is to help avoid a shorter lifetime than is necessary. +Consider the following example: + +```rust +struct Int(i32); +struct B<'a>(&'a i32); + +struct MyStruct<'a> { + a: &'static Int, + b: B<'a>, } + +fn foo<'a, 'b>(m: &'a MyStruct<'b>) -> impl FnMut() + 'static { + let c = || drop(&m.a.0); + c +} +``` + +If this were to capture `m`, then the closure would no longer outlive `'static`, since `m` is constrained to `'a`. Instead, it captures `(*(*m).a)` by `ImmBorrow`. + +### Wildcard pattern bindings + +r[type.closure.capture.precision.wildcard] +Closures only capture data that needs to be read. +Binding a value with a [wildcard pattern] does not count as a read, and thus won't be captured. +For example, the following closures will not capture `x`: + +```rust +let x = String::from("hello"); +let c = || { + let _ = x; // x is not captured +}; +c(); + +let c = || match x { // x is not captured + _ => println!("Hello World!") +}; +c(); +``` + +This also includes destructuring of tuples, structs, and enums. +Fields matched with the [_RestPattern_] or [_StructPatternEtCetera_] are also not considered as read, and thus those fields will not be captured. +The following illustrates some of these: + +```rust +let x = (String::from("a"), String::from("b")); +let c = || { + let (first, ..) = x; // captures `x.0` ByValue +}; +// The first tuple field has been moved into the closure. +// The second tuple field is still accessible. +println!("{:?}", x.1); +c(); +``` + +```rust +struct Example { + f1: String, + f2: String, +} + +let e = Example { + f1: String::from("first"), + f2: String::from("second"), +}; +let c = || { + let Example { f2, .. } = e; // captures `e.f2` ByValue +}; +// Field f2 cannot be accessed since it is moved into the closure. +// Field f1 is still accessible. +println!("{:?}", e.f1); +c(); +``` + +r[type.closure.capture.precision.wildcard.array-slice] +Partial captures of arrays and slices are not supported; the entire slice or array is always captured even if used with wildcard pattern matching, indexing, or sub-slicing. +For example: + +```rust,compile_fail,E0382 +#[derive(Debug)] +struct Example; +let x = [Example, Example]; + +let c = || { + let [first, _] = x; // captures all of `x` ByValue +}; +c(); +println!("{:?}", x[1]); // ERROR: borrow of moved value: `x` +``` + +r[type.closure.capture.precision.wildcard.initialized] +Values that are matched with wildcards must still be initialized. + +```rust,compile_fail,E0381 +let x: i32; +let c = || { + let _ = x; // ERROR: used binding `x` isn't initialized +}; +``` + +[_RestPattern_]: ../patterns.md#rest-patterns +[_StructPatternEtCetera_]: ../patterns.md#struct-patterns +[wildcard pattern]: ../patterns.md#wildcard-pattern + +### Capturing references in move contexts + +r[type.closure.capture.precision.move-dereference] +Because it is not allowed to move fields out of a reference, `move` closures will only capture the prefix of a capture path that runs up to, but not including, the first dereference of a reference. +The reference itself will be moved into the closure. + +```rust +struct T(String, String); + +let mut t = T(String::from("foo"), String::from("bar")); +let t_mut_ref = &mut t; +let mut c = move || { + t_mut_ref.0.push_str("123"); // captures `t_mut_ref` ByValue +}; +c(); +``` + +### Raw pointer dereference + +r[type.closure.capture.precision.raw-pointer-dereference] +Because it is `unsafe` to dereference a raw pointer, closures will only capture the prefix of a capture path that runs up to, but not including, the first dereference of a raw pointer. + +```rust +struct T(String, String); + +let t = T(String::from("foo"), String::from("bar")); +let t_ptr = &t as *const T; + +let c = || unsafe { + println!("{}", (*t_ptr).0); // captures `t_ptr` by ImmBorrow +}; +c(); +``` + +### Union fields + +r[type.closure.capture.precision.union] +Because it is `unsafe` to access a union field, closures will only capture the prefix of a capture path that runs up to the union itself. + +```rust +union U { + a: (i32, i32), + b: bool, +} +let u = U { a: (123, 456) }; + +let c = || { + let x = unsafe { u.a.0 }; // captures `u` ByValue +}; +c(); + +// This also includes writing to fields. +let mut u = U { a: (123, 456) }; + +let mut c = || { + u.b = true; // captures `u` with MutBorrow +}; +c(); +``` + +### Reference into unaligned `struct`s + +r[type.closure.capture.precision.unaligned] +Because it is [undefined behavior] to create references to unaligned fields in a structure, +closures will only capture the prefix of the capture path that runs up to, but not including, the first field access into a structure that uses [the `packed` representation]. +This includes all fields, even those that are aligned, to protect against compatibility concerns should any of the fields in the structure change in the future. + +```rust +#[repr(packed)] +struct T(i32, i32); + +let t = T(2, 5); +let c = || { + let a = t.0; // captures `t` with ImmBorrow +}; +// Copies out of `t` are ok. +let (a, b) = (t.0, t.1); +c(); +``` + +Similarly, taking the address of an unaligned field also captures the entire struct: + +```rust,compile_fail,E0505 +#[repr(packed)] +struct T(String, String); + +let mut t = T(String::new(), String::new()); +let c = || { + let a = std::ptr::addr_of!(t.1); // captures `t` with ImmBorrow +}; +let a = t.0; // ERROR: cannot move out of `t.0` because it is borrowed +c(); +``` + +but the above works if it is not packed since it captures the field precisely: + +```rust +struct T(String, String); + +let mut t = T(String::new(), String::new()); +let c = || { + let a = std::ptr::addr_of!(t.1); // captures `t.1` with ImmBorrow +}; +// The move here is allowed. +let a = t.0; +c(); ``` -If, instead, the closure were to use `self.vec` directly, then it would attempt -to capture `self` by mutable reference. But since `self.set` is already -borrowed to iterate over, the code would not compile. +[undefined behavior]: ../behavior-considered-undefined.md +[the `packed` representation]: ../type-layout.md#the-alignment-modifiers + +### `Box` vs other `Deref` implementations + +r[type.closure.capture.precision.box-deref] +The implementation of the [`Deref`] trait for [`Box`] is treated differently from other `Deref` implementations, as it is considered a special entity. + +For example, let us look at examples involving `Rc` and `Box`. The `*rc` is desugared to a call to the trait method `deref` defined on `Rc`, but since `*box` is treated differently, it is possible to do a precise capture of the contents of the `Box`. + +[`Box`]: ../special-types-and-traits.md#boxt +[`Deref`]: ../special-types-and-traits.md#deref-and-derefmut + +#### `Box` with non-`move` closure + +r[type.closure.capture.precision.box-non-move.not-moved] +In a non-`move` closure, if the contents of the `Box` are not moved into the closure body, the contents of the `Box` are precisely captured. + +```rust +struct S(String); + +let b = Box::new(S(String::new())); +let c_box = || { + let x = &(*b).0; // captures `(*b).0` by ImmBorrow +}; +c_box(); + +// Contrast `Box` with another type that implements Deref: +let r = std::rc::Rc::new(S(String::new())); +let c_rc = || { + let x = &(*r).0; // captures `r` by ImmBorrow +}; +c_rc(); +``` + +r[type.closure.capture.precision.box-non-move.moved] +However, if the contents of the `Box` are moved into the closure, then the box is entirely captured. This is done so the amount of data that needs to be moved into the closure is minimized. + +```rust +// This is the same as the example above except the closure +// moves the value instead of taking a reference to it. + +struct S(String); + +let b = Box::new(S(String::new())); +let c_box = || { + let x = (*b).0; // captures `b` with ByValue +}; +c_box(); +``` + +#### `Box` with move closure + +r[type.closure.capture.precision.box-move.read] +Similarly to moving contents of a `Box` in a non-`move` closure, reading the contents of a `Box` in a `move` closure will capture the `Box` entirely. + +```rust +struct S(i32); + +let b = Box::new(S(10)); +let c_box = move || { + let x = (*b).0; // captures `b` with ByValue +}; +``` ## Unique immutable borrows in captures -Captures can occur by a special kind of borrow called a _unique immutable -borrow_, which cannot be used anywhere else in the language and cannot be -written out explicitly. It occurs when modifying the referent of a mutable -reference, as in the following example: +r[type.closure.unique-immutable] +Captures can occur by a special kind of borrow called a _unique immutable borrow_, +which cannot be used anywhere else in the language and cannot be written out explicitly. +It occurs when modifying the referent of a mutable reference, as in the following example: ```rust let mut b = false; let x = &mut b; -{ - let mut c = || { *x = true; }; - // The following line is an error: - // let y = &x; - c(); -} +let mut c = || { + // An ImmBorrow and a MutBorrow of `x`. + let a = &x; + *x = true; // `x` captured by UniqueImmBorrow +}; +// The following line is an error: +// let y = &x; +c(); +// However, the following is OK. let z = &x; ``` In this case, borrowing `x` mutably is not possible, because `x` is not `mut`. But at the same time, borrowing `x` immutably would make the assignment illegal, -because a `& &mut` reference might not be unique, so it cannot safely be used to -modify a value. So a unique immutable borrow is used: it borrows `x` immutably, -but like a mutable borrow, it must be unique. In the above example, uncommenting -the declaration of `y` will produce an error because it would violate the -uniqueness of the closure's borrow of `x`; the declaration of z is valid because -the closure's lifetime has expired at the end of the block, releasing the borrow. +because a `& &mut` reference might not be unique, so it cannot safely be used to modify a value. +So a unique immutable borrow is used: it borrows `x` immutably, but like a mutable borrow, it must be unique. + +In the above example, uncommenting the declaration of `y` will produce an error because it would violate the uniqueness of the closure's borrow of `x`; the declaration of z is valid because the closure's lifetime has expired at the end of the block, releasing the borrow. ## Call traits and coercions +r[type.closure.call] + +r[type.closure.call.intro] Closure types all implement [`FnOnce`], indicating that they can be called once by consuming ownership of the closure. Additionally, some closures implement more specific call traits: +r[type.closure.call.fn-mut] * A closure which does not move out of any captured variables implements [`FnMut`], indicating that it can be called by mutable reference. +r[type.closure.call.fn] * A closure which does not mutate or move out of any captured variables implements [`Fn`], indicating that it can be called by shared reference. @@ -130,6 +489,7 @@ more specific call traits: > closure type are determined by what the closure does with captured values, > not how it captures them. +r[type.closure.non-capturing] *Non-capturing closures* are closures that don't capture anything from their environment. They can be coerced to function pointers (e.g., `fn()`) with the matching signature. @@ -146,6 +506,9 @@ x = bo(5,7); ## Other traits +r[type.closure.traits] + +r[type.closure.traits.intro] All closure types implement [`Sized`]. Additionally, closure types implement the following traits if allowed to do so by the types of the captures it stores: @@ -154,14 +517,16 @@ following traits if allowed to do so by the types of the captures it stores: * [`Sync`] * [`Send`] +r[type.closure.traits.behavior] The rules for [`Send`] and [`Sync`] match those for normal struct types, while [`Clone`] and [`Copy`] behave as if [derived]. For [`Clone`], the order of -cloning of the captured variables is left unspecified. +cloning of the captured values is left unspecified. + Because captures are often by reference, the following general rules arise: -* A closure is [`Sync`] if all captured variables are [`Sync`]. -* A closure is [`Send`] if all variables captured by non-unique immutable +* A closure is [`Sync`] if all captured values are [`Sync`]. +* A closure is [`Send`] if all values captured by non-unique immutable reference are [`Sync`], and all values captured by unique immutable or mutable reference, copy, or move are [`Send`]. * A closure is [`Clone`] or [`Copy`] if it does not capture any values by @@ -175,3 +540,101 @@ Because captures are often by reference, the following general rules arise: [`Sync`]: ../special-types-and-traits.md#sync [closure expression]: ../expressions/closure-expr.md [derived]: ../attributes/derive.md + +## Drop Order + +r[type.closure.drop-order] +If a closure captures a field of a composite types such as structs, tuples, and enums by value, the field's lifetime would now be tied to the closure. As a result, it is possible for disjoint fields of a composite types to be dropped at different times. + +```rust +{ + let tuple = + (String::from("foo"), String::from("bar")); // --+ + { // | + let c = || { // ----------------------------+ | + // tuple.0 is captured into the closure | | + drop(tuple.0); // | | + }; // | | + } // 'c' and 'tuple.0' dropped here ------------+ | +} // tuple.1 dropped here -----------------------------+ +``` + +## Edition 2018 and before + +### Closure types difference + +r[type.closure.capture.precision.edition2018.entirety] +In Edition 2018 and before, closures always capture a variable in its entirety, without its precise capture path. This means that for the example used in the [Closure types](#closure-types) section, the generated closure type would instead look something like this: + + +```rust,ignore +struct Closure<'a> { + rect : &'a mut Rectangle, +} + +impl<'a> FnOnce<()> for Closure<'a> { + type Output = String; + extern "rust-call" fn call_once(self, args: ()) -> String { + self.rect.left_top.x += 1; + self.rect.right_bottom.x += 1; + format!("{:?}", self.rect.left_top) + } +} +``` + +and the call to `f` would work as follows: + + +```rust,ignore +f(Closure { rect: rect }); +``` + +### Capture precision difference + +r[type.closure.capture.precision.edition2018.composite] +Composite types such as structs, tuples, and enums are always captured in its entirety, +not by individual fields. As a result, it may be necessary to borrow into a local variable in order to capture a single field: + +```rust +# use std::collections::HashSet; +# +struct SetVec { + set: HashSet, + vec: Vec +} + +impl SetVec { + fn populate(&mut self) { + let vec = &mut self.vec; + self.set.iter().for_each(|&n| { + vec.push(n); + }) + } +} +``` + +If, instead, the closure were to use `self.vec` directly, then it would attempt to capture `self` by mutable reference. But since `self.set` is already borrowed to iterate over, the code would not compile. + +r[type.closure.capture.precision.edition2018.move] +If the `move` keyword is used, then all captures are by move or, for `Copy` types, by copy, regardless of whether a borrow would work. The `move` keyword is usually used to allow the closure to outlive the captured values, such as if the closure is being returned or used to spawn a new thread. + +r[type.closure.capture.precision.edition2018.wildcard] +Regardless of if the data will be read by the closure, i.e. in case of wild card patterns, if a variable defined outside the closure is mentioned within the closure the variable will be captured in its entirety. + +### Drop order difference + +r[type.closure.capture.precision.edition2018.drop-order] +As composite types are captured in their entirety, a closure which captures one of those composite types by value would drop the entire captured variable at the same time as the closure gets dropped. + +```rust +{ + let tuple = + (String::from("foo"), String::from("bar")); + { + let c = || { // --------------------------+ + // tuple is captured into the closure | + drop(tuple.0); // | + }; // | + } // 'c' and 'tuple' dropped here ------------+ +} +``` diff --git a/src/types/enum.md b/src/types/enum.md index 1ee6fc608..a3ae2878f 100644 --- a/src/types/enum.md +++ b/src/types/enum.md @@ -1,17 +1,24 @@ # Enumerated types +r[type.enum] + +r[type.enum.intro] An *enumerated type* is a nominal, heterogeneous disjoint union type, denoted by the name of an [`enum` item]. [^enumtype] +r[type.enum.declaration] An [`enum` item] declares both the type and a number of *variants*, each of which is independently named and has the syntax of a struct, tuple struct or unit-like struct. +r[type.enum.constructor] New instances of an `enum` can be constructed with a [struct expression]. +r[type.enum.value] Any `enum` value consumes as much memory as the largest variant for its corresponding `enum` type, as well as the size needed to store a discriminant. +r[type.enum.name] Enum types cannot be denoted *structurally* as types, but must be denoted by named reference to an [`enum` item]. diff --git a/src/types/function-item.md b/src/types/function-item.md index 3221f3e2b..81a867e3c 100644 --- a/src/types/function-item.md +++ b/src/types/function-item.md @@ -1,13 +1,19 @@ # Function item types +r[type.fn-item] + +r[type.fn-item.intro] When referred to, a function item, or the constructor of a tuple-like struct or -enum variant, yields a zero-sized value of its _function item type_. That type -explicitly identifies the function - its name, its type arguments, and its +enum variant, yields a zero-sized value of its _function item type_. + +r[type.fn-item.unique] +That type explicitly identifies the function - its name, its type arguments, and its early-bound lifetime arguments (but not its late-bound lifetime arguments, which are only assigned when the function is called) - so the value does not need to contain an actual function pointer, and no indirection is needed when the function is called. +r[type.fn-item.name] There is no syntax that directly refers to a function item type, but the compiler will display the type as something like `fn(u32) -> i32 {fn_name}` in error messages. @@ -22,6 +28,7 @@ let x = &mut foo::; *x = foo::; //~ ERROR mismatched types ``` +r[type.fn-item.coercion] However, there is a [coercion] from function items to [function pointers] with the same signature, which is triggered not only when a function item is used when a function pointer is directly expected, but also when different function @@ -43,6 +50,7 @@ let foo_ptr_2 = if want_i32 { }; ``` +r[type.fn-item.traits] All function items implement [`Fn`], [`FnMut`], [`FnOnce`], [`Copy`], [`Clone`], [`Send`], and [`Sync`]. diff --git a/src/types/function-pointer.md b/src/types/function-pointer.md index 82103beaa..d7950b159 100644 --- a/src/types/function-pointer.md +++ b/src/types/function-pointer.md @@ -1,5 +1,8 @@ # Function pointer types +r[type.fn-pointer] + +r[type.fn-pointer.syntax] > **Syntax**\ > _BareFunctionType_ :\ >    [_ForLifetimes_]? _FunctionTypeQualifiers_ `fn`\ @@ -23,15 +26,27 @@ > _MaybeNamedFunctionParametersVariadic_ :\ >    ( _MaybeNamedParam_ `,` )\* _MaybeNamedParam_ `,` [_OuterAttribute_]\* `...` +r[type.fn-pointer.intro] Function pointer types, written using the `fn` keyword, refer to a function -whose identity is not necessarily known at compile-time. They can be created -via a coercion from both [function items] and non-capturing [closures]. +whose identity is not necessarily known at compile-time. + +r[type.fn-pointer.coercion] +They can be created via a coercion from both [function items] and non-capturing [closures]. +r[type.fn-pointer.qualifiers] The `unsafe` qualifier indicates that the type's value is an [unsafe function], and the `extern` qualifier indicates it is an [extern function]. +r[type.fn-pointer.constraint-variadic] Variadic parameters can only be specified with [`extern`] function types with -the `"C"` or `"cdecl"` calling convention. +these calling conventions: +* `C` +* `cdecl` +* `system` +* `aapcs` +* `sysv64` +* `win64` +* `efiapi` An example where `Binop` is defined as a function pointer type: @@ -49,6 +64,8 @@ x = bo(5,7); ## Attributes on function pointer parameters +r[type.fn-pointer.attributes] + Attributes on function pointer parameters follow the same rules and restrictions as [regular function parameters]. diff --git a/src/types/impl-trait.md b/src/types/impl-trait.md index 7e99949cf..bf5f8622f 100644 --- a/src/types/impl-trait.md +++ b/src/types/impl-trait.md @@ -1,10 +1,14 @@ # Impl trait +r[type.impl-trait] + +r[type.impl-trait.syntax] > **Syntax**\ > _ImplTraitType_ : `impl` [_TypeParamBounds_] > > _ImplTraitTypeOneBound_ : `impl` [_TraitBound_] +r[type.impl-trait.intro] `impl Trait` provides ways to specify unnamed but concrete types that implement a specific trait. It can appear in two sorts of places: argument position (where it can act as an anonymous type parameter to functions), and return position (where it can act as an abstract return type). @@ -23,9 +27,12 @@ fn bar() -> impl Trait { ``` ## Anonymous type parameters +r[type.impl-trait.param] + > Note: This is often called "impl Trait in argument position". (The term "parameter" is more correct here, but "impl Trait in argument position" is the phrasing used during the development of this feature, and it remains in parts of the implementation.) +r[type.impl-trait.param.intro] Functions can use `impl` followed by a set of trait bounds to declare a parameter as having an anonymous type. The caller must provide a type that satisfies the bounds declared by the anonymous type parameter, and the function can only use the methods available through the trait bounds of the anonymous type parameter. @@ -43,6 +50,7 @@ fn with_impl_trait(arg: impl Trait) { } ``` +r[type.impl-trait.param.generic] That is, `impl Trait` in argument position is syntactic sugar for a generic type parameter like ``, except that the type is anonymous and doesn't appear in the [_GenericParams_] list. > **Note:** @@ -52,10 +60,15 @@ That is, `impl Trait` in argument position is syntactic sugar for a generic type ## Abstract return types +r[type.impl-trait.return] + > Note: This is often called "impl Trait in return position". +r[type.impl-trait.return.intro] Functions can use `impl Trait` to return an abstract return type. These types stand in for another concrete type where the caller may only use the methods declared by the specified `Trait`. + +r[type.impl-trait.return.constraint-body] Each possible return value from the function must resolve to the same concrete type. `impl Trait` in return position allows a function to return an unboxed abstract type. @@ -87,22 +100,35 @@ Returning `impl Iterator` means that a function only exposes the `Iterator` trai ## Return-position `impl Trait` in traits and trait implementations +r[type.impl-trait.return-in-trait] + +r[type.impl-trait.return-in-trait.intro] Functions in traits may also use `impl Trait` as a syntax for an anonymous associated type. +r[type.impl-trait.return-in-trait.desugaring] Every `impl Trait` in the return type of an associated function in a trait is desugared to an anonymous associated type. The return type that appears in the implementation's function signature is used to determine the value of the associated type. ## Capturing +r[type.impl-trait.generic-captures] + Behind each return-position `impl Trait` abstract type is some hidden concrete type. For this concrete type to use a generic parameter, that generic parameter must be *captured* by the abstract type. ## Automatic capturing -Return-position `impl Trait` abstract types automatically capture certain of the in-scope generic parameters. Everywhere, these automatically capture all in-scope type and const generic parameters. +r[type.impl-trait.generic-capture.auto] + +r[type.impl-trait.generic-capture.auto.intro] +Return-position `impl Trait` abstract types automatically capture all in-scope generic parameters, including generic type, const, and lifetime parameters (including higher-ranked ones). -On items of trait impls and trait definitions, these types additionally automatically capture all in-scope generic lifetime parameters, including higher-ranked ones. On free functions and on associated functions and methods of inherent impls, only the generic lifetime parameters that appear in the bounds of abstract return type are captured. +r[type.impl-trait.generic-capture.edition2024] +> **Edition differences**: Before the 2024 edition, on free functions and on associated functions and methods of inherent impls, generic lifetime parameters that do not appear in the bounds of the abstract return type are not automatically captured. ## Precise capturing +r[type.impl-trait.generic-capture.precise] + +r[type.impl-trait.generic-capture.precise.use] The set of generic parameters captured by a return-position `impl Trait` abstract type may be explicitly controlled with a [`use<..>` bound]. If present, only the generic parameters listed in the `use<..>` bound will be captured. E.g.: ```rust @@ -113,8 +139,13 @@ fn capture<'a, 'b, T>(x: &'a (), y: T) -> impl Sized + use<'a, T> { } ``` -Currently, only one `use<..>` bound may be present in a bounds list, such bounds are not allowed in the signature of items of a trait definition, all in-scope type and const generic parameters must be included, and all lifetime parameters that appear in other bounds of the abstract type must be included. Within the `use<..>` bound, any lifetime parameters present must appear before all type and const generic parameters, and the elided lifetime (`'_`) may be present if it is otherwise allowed to appear within the `impl Trait` return type. +r[type.impl-trait.generic-capture.precise.constraint-single] +Currently, only one `use<..>` bound may be present in a bounds list, such bounds are not allowed in the signature of items of a trait definition, all in-scope type and const generic parameters must be included, and all lifetime parameters that appear in other bounds of the abstract type must be included. +r[type.impl-trait.generic-capture.precise.constraint-lifetime] +Within the `use<..>` bound, any lifetime parameters present must appear before all type and const generic parameters, and the elided lifetime (`'_`) may be present if it is otherwise allowed to appear within the `impl Trait` return type. + +r[type.impl-trait.generic-capture.precise.constraint-param-impl-trait] Because all in-scope type parameters must be included by name, a `use<..>` bound may not be used in the signature of items that use argument-position `impl Trait`, as those items have anonymous type parameters in scope. ## Differences between generics and `impl Trait` in return position @@ -150,6 +181,8 @@ Instead, the function chooses the return type, but only promises that it will im ## Limitations +r[type.impl-trait.constraint] + `impl Trait` can only appear as a parameter or return type of a non-`extern` function. It cannot be the type of a `let` binding, field type, or appear inside a type alias. diff --git a/src/types/inferred.md b/src/types/inferred.md index c33ebd91c..7179826c4 100644 --- a/src/types/inferred.md +++ b/src/types/inferred.md @@ -1,11 +1,19 @@ # Inferred type +r[type.inferred] + +r[type.inferred.syntax] > **Syntax**\ > _InferredType_ : `_` +r[type.inferred.intro] The inferred type asks the compiler to infer the type if possible based on the -surrounding information available. It cannot be used in item signatures. It is -often used in generic arguments: +surrounding information available. + +r[type.inferred.constraint] +It cannot be used in item signatures. + +It is often used in generic arguments: ```rust let x: Vec<_> = (0..10).collect(); diff --git a/src/types/never.md b/src/types/never.md index 7f58a3ace..702281db2 100644 --- a/src/types/never.md +++ b/src/types/never.md @@ -1,12 +1,19 @@ # Never type +r[type.never] + +r[type.never.syntax] > **Syntax**\ > _NeverType_ : `!` +r[type.never.intro] The never type `!` is a type with no values, representing the result of -computations that never complete. Expressions of type `!` can be coerced into -any other type. +computations that never complete. + +r[type.never.coercion] +Expressions of type `!` can be coerced into any other type. +r[type.never.constraint] The `!` type can **only** appear in function return types presently, indicating it is a diverging function that never returns. diff --git a/src/types/numeric.md b/src/types/numeric.md index bd59daa6b..88178d123 100644 --- a/src/types/numeric.md +++ b/src/types/numeric.md @@ -1,7 +1,12 @@ # Numeric types +r[type.numeric] + ## Integer types +r[type.numeric.int] + +r[type.numeric.int.unsigned] The unsigned integer types consist of: Type | Minimum | Maximum @@ -12,6 +17,7 @@ Type | Minimum | Maximum `u64` | 0 | 264-1 `u128` | 0 | 2128-1 +r[type.numeric.int.signed] The signed two's complement integer types consist of: Type | Minimum | Maximum @@ -25,20 +31,27 @@ Type | Minimum | Maximum ## Floating-point types +r[type.numeric.float] + The IEEE 754-2008 "binary32" and "binary64" floating-point types are `f32` and `f64`, respectively. ## Machine-dependent integer types +r[type.numeric.int.size] + +r[type.numeric.int.size.usize] The `usize` type is an unsigned integer type with the same number of bits as the platform's pointer type. It can represent every memory address in the process. +r[type.numeric.int.size.isize] The `isize` type is a signed integer type with the same number of bits as the platform's pointer type. The theoretical upper bound on object and array size is the maximum `isize` value. This ensures that `isize` can be used to calculate differences between pointers into an object or array and can address every byte within an object along with one byte past the end. +r[type.numeric.int.size.minimum] `usize` and `isize` are at least 16-bits wide. > **Note**: Many pieces of Rust code may assume that pointers, `usize`, and @@ -48,5 +61,7 @@ within an object along with one byte past the end. ## Bit validity +r[type.numeric.validity] + For every numeric type, `T`, the bit validity of `T` is equivalent to the bit validity of `[u8; size_of::()]`. An uninitialized byte is not a valid `u8`. diff --git a/src/types/parameters.md b/src/types/parameters.md index 7b9e7e64e..89b7df9ec 100644 --- a/src/types/parameters.md +++ b/src/types/parameters.md @@ -1,5 +1,7 @@ # Type parameters +r[type.generic] + Within the body of an item that has type parameter declarations, the names of its type parameters are types: diff --git a/src/types/pointer.md b/src/types/pointer.md index 7299ce75e..0f24d6bce 100644 --- a/src/types/pointer.md +++ b/src/types/pointer.md @@ -1,61 +1,96 @@ # Pointer types +r[type.pointer] + +r[type.pointer.intro] All pointers are explicit first-class values. They can be moved or copied, stored into data structs, and returned from functions. ## References (`&` and `&mut`) +r[type.pointer.reference] + +r[type.pointer.reference.syntax] > **Syntax**\ > _ReferenceType_ :\ >    `&` [_Lifetime_]? `mut`? [_TypeNoBounds_] ### Shared references (`&`) +r[type.pointer.reference.shared] + +r[type.pointer.reference.shared.intro] Shared references point to memory which is owned by some other value. + +r[type.pointer.reference.shared.constraint-mutation] When a shared reference to a value is created, it prevents direct mutation of the value. [Interior mutability] provides an exception for this in certain circumstances. As the name suggests, any number of shared references to a value may exist. A shared reference type is written `&type`, or `&'a type` when you need to specify an explicit lifetime. + +r[type.pointer.reference.shared.copy] Copying a reference is a "shallow" operation: it involves only copying the pointer itself, that is, pointers are `Copy`. Releasing a reference has no effect on the value it points to, but referencing of a [temporary value] will keep it alive during the scope of the reference itself. ### Mutable references (`&mut`) +r[type.pointer.reference.mut] + +r[type.pointer.reference.mut.intro] Mutable references point to memory which is owned by some other value. A mutable reference type is written `&mut type` or `&'a mut type`. + +r[type.pointer.reference.mut.copy] A mutable reference (that hasn't been borrowed) is the only way to access the value it points to, so is not `Copy`. ## Raw pointers (`*const` and `*mut`) +r[type.pointer.raw] + +r[type.pointer.raw.syntax] > **Syntax**\ > _RawPointerType_ :\ >    `*` ( `mut` | `const` ) [_TypeNoBounds_] +r[type.pointer.raw.intro] Raw pointers are pointers without safety or liveness guarantees. Raw pointers are written as `*const T` or `*mut T`. For example `*const i32` means a raw pointer to a 32-bit integer. + +r[type.pointer.raw.copy] Copying or dropping a raw pointer has no effect on the lifecycle of any other value. + +r[type.pointer.raw.safety] Dereferencing a raw pointer is an [`unsafe` operation]. + This can also be used to convert a raw pointer to a reference by reborrowing it (`&*` or `&mut *`). Raw pointers are generally discouraged; they exist to support interoperability with foreign code, and writing performance-critical or low-level functions. +r[type.pointer.raw.cmp] When comparing raw pointers they are compared by their address, rather than by what they point to. When comparing raw pointers to [dynamically sized types] they also have their additional data compared. +r[type.pointer.raw.constructor] Raw pointers can be created directly using `&raw const` for `*const` pointers and `&raw mut` for `*mut` pointers. ## Smart Pointers +r[type.pointer.smart] + The standard library contains additional 'smart pointer' types beyond references and raw pointers. ## Bit validity +r[type.pointer.validity] + +r[type.pointer.validity.pointer-fragment] Despite pointers and references being similar to `usize`s in the machine code emitted on most platforms, the semantics of transmuting a reference or pointer type to a non-pointer type is currently undecided. Thus, it may not be valid to transmute a pointer or reference type, `P`, to a `[u8; size_of::

()]`. +r[type.pointer.validity.raw] For thin raw pointers (i.e., for `P = *const T` or `P = *mut T` for `T: Sized`), the inverse direction (transmuting from an integer or array of integers to `P`) is always valid. However, the pointer produced via such a transmutation may not be dereferenced (not even if `T` has size zero). diff --git a/src/types/slice.md b/src/types/slice.md index 6ba5e7d21..79c340a87 100644 --- a/src/types/slice.md +++ b/src/types/slice.md @@ -1,12 +1,17 @@ # Slice types +r[type.slice] + +r[type.slice.syntax] > **Syntax**\ > _SliceType_ :\ >    `[` [_Type_] `]` +r[type.slice.intro] A slice is a [dynamically sized type] representing a 'view' into a sequence of elements of type `T`. The slice type is written as `[T]`. +r[type.slice.unsized] Slice types are generally used through pointer types. For example: * `&[T]`: a 'shared slice', often just called a 'slice'. It doesn't own the @@ -24,6 +29,7 @@ let boxed_array: Box<[i32]> = Box::new([1, 2, 3]); let slice: &[i32] = &boxed_array[..]; ``` +r[type.slice.safe] All elements of slices are always initialized, and access to a slice is always bounds-checked in safe methods and operators. diff --git a/src/types/struct.md b/src/types/struct.md index 1f20dbb3c..6a672f7af 100644 --- a/src/types/struct.md +++ b/src/types/struct.md @@ -1,22 +1,30 @@ # Struct types +r[type.struct] + +r[type.struct.intro] A `struct` *type* is a heterogeneous product of other types, called the *fields* of the type.[^structtype] +r[type.struct.constructor] New instances of a `struct` can be constructed with a [struct expression]. +r[type.struct.layout] The memory layout of a `struct` is undefined by default to allow for compiler optimizations like field reordering, but it can be fixed with the [`repr` attribute]. In either case, fields may be given in any order in a corresponding struct *expression*; the resulting `struct` value will always have the same memory layout. +r[type.struct.field-visibility] The fields of a `struct` may be qualified by [visibility modifiers], to allow access to data in a struct outside a module. +r[type.struct.tuple] A _tuple struct_ type is just like a struct type, except that the fields are anonymous. +r[type.struct.unit] A _unit-like struct_ type is like a struct type, except that it has no fields. The one value constructed by the associated [struct expression] is the only value that inhabits such a type. diff --git a/src/types/textual.md b/src/types/textual.md index a4765b523..294c791fd 100644 --- a/src/types/textual.md +++ b/src/types/textual.md @@ -1,26 +1,39 @@ # Textual types +r[type.text] + +r[type.text.intro] The types `char` and `str` hold textual data. +r[type.text.char-value] A value of type `char` is a [Unicode scalar value] (i.e. a code point that is not a surrogate), represented as a 32-bit unsigned word in the 0x0000 to 0xD7FF -or 0xE000 to 0x10FFFF range. It is immediate [undefined behavior] to create a +or 0xE000 to 0x10FFFF range. + +r[type.text.char-precondition] +It is immediate [undefined behavior] to create a `char` that falls outside this range. A `[char]` is effectively a UCS-4 / UTF-32 string of length 1. +r[type.text.str-value] A value of type `str` is represented the same way as `[u8]`, a slice of 8-bit unsigned bytes. However, the Rust standard library makes extra assumptions about `str`: methods working on `str` assume and ensure that the data in there is valid UTF-8. Calling a `str` method with a non-UTF-8 buffer can cause [undefined behavior] now or in the future. +r[type.text.str-unsized] Since `str` is a [dynamically sized type], it can only be instantiated through a pointer type, such as `&str`. ## Layout and bit validity +r[type.text.layout] + +r[type.layout.char-layout] `char` is guaranteed to have the same size and alignment as `u32` on all platforms. +r[type.layout.char-validity] Every byte of a `char` is guaranteed to be initialized (in other words, `transmute::()]>(...)` is always sound -- but since some bit patterns are invalid `char`s, the inverse is not always sound). diff --git a/src/types/trait-object.md b/src/types/trait-object.md index 5b8541fa8..0c4c7596b 100644 --- a/src/types/trait-object.md +++ b/src/types/trait-object.md @@ -1,5 +1,8 @@ # Trait objects +r[type.trait-object] + +r[type.trait-object.syntax] > **Syntax**\ > _TraitObjectType_ :\ >    `dyn`? [_TypeParamBounds_] @@ -7,16 +10,21 @@ > _TraitObjectTypeOneBound_ :\ >    `dyn`? [_TraitBound_] +r[type.trait-object.intro] A *trait object* is an opaque value of another type that implements a set of -traits. The set of traits is made up of an [object safe] *base trait* plus any +traits. The set of traits is made up of a [dyn compatible] *base trait* plus any number of [auto traits]. +r[type.trait-object.impls] Trait objects implement the base trait, its auto traits, and any [supertraits] of the base trait. +r[type.trait-object.name] Trait objects are written as the keyword `dyn` followed by a set of trait -bounds, but with the following restrictions on the trait bounds. All traits -except the first trait must be auto traits, there may not be more than one +bounds, but with the following restrictions on the trait bounds. + +r[type.trait-object.constraint] +There may not be more than one non-auto trait, no more than one lifetime, and opt-out bounds (e.g. `?Sized`) are not allowed. Furthermore, paths to traits may be parenthesized. @@ -31,12 +39,14 @@ For example, given a trait `Trait`, the following are all trait objects: * `dyn 'static + Trait`. * `dyn (Trait)` +r[type.trait-object.syntax-edition2021] > **Edition differences**: Before the 2021 edition, the `dyn` keyword may be > omitted. > > Note: For clarity, it is recommended to always use the `dyn` keyword on your > trait objects unless your codebase supports compiling with Rust 1.26 or lower. +r[type.trait-object.syntax-edition2015] > **Edition differences**: In the 2015 edition, if the first bound of the > trait object is a path that starts with `::`, then the `dyn` will be treated > as a part of the path. The first path can be put in parenthesis to get @@ -46,11 +56,13 @@ For example, given a trait `Trait`, the following are all trait objects: > Beginning in the 2018 edition, `dyn` is a true keyword and is not allowed in > paths, so the parentheses are not necessary. +r[type.trait-object.alias] Two trait object types alias each other if the base traits alias each other and if the sets of auto traits are the same and the lifetime bounds are the same. For example, `dyn Trait + Send + UnwindSafe` is the same as `dyn Trait + UnwindSafe + Send`. +r[type.trait-object.unsized] Due to the opaqueness of which concrete type the value is of, trait objects are [dynamically sized types]. Like all DSTs, trait objects are used @@ -93,6 +105,8 @@ type signature of `print`, and the cast expression in `main`. ## Trait Object Lifetime Bounds +r[type.trait-object.lifetime-bounds] + Since a trait object can contain references, the lifetimes of those references need to be expressed as part of the trait object. This lifetime is written as `Trait + 'a`. There are [defaults] that allow this lifetime to usually be @@ -102,6 +116,6 @@ inferred with a sensible choice. [_TypeParamBounds_]: ../trait-bounds.md [auto traits]: ../special-types-and-traits.md#auto-traits [defaults]: ../lifetime-elision.md#default-trait-object-lifetimes +[dyn compatible]: ../items/traits.md#dyn-compatibility [dynamically sized types]: ../dynamically-sized-types.md -[object safe]: ../items/traits.md#object-safety [supertraits]: ../items/traits.md#supertraits diff --git a/src/types/tuple.md b/src/types/tuple.md index 804d8a0ae..073fbd193 100644 --- a/src/types/tuple.md +++ b/src/types/tuple.md @@ -1,26 +1,35 @@ # Tuple types +r[type.tuple] + +r[type.tuple.syntax] > **Syntax**\ > _TupleType_ :\ >       `(` `)`\ >    | `(` ( [_Type_] `,` )+ [_Type_]? `)` +r[type.tuple.intro] *Tuple types* are a family of structural types[^1] for heterogeneous lists of other types. The syntax for a tuple type is a parenthesized, comma-separated list of types. + +r[type.tuple.restriction] 1-ary tuples require a comma after their element type to be disambiguated with a [parenthesized type]. +r[type.tuple.field-number] A tuple type has a number of fields equal to the length of the list of types. This number of fields determines the *arity* of the tuple. A tuple with `n` fields is called an *n-ary tuple*. For example, a tuple with 2 fields is a 2-ary tuple. +r[type.tuple.field-name] Fields of tuples are named using increasing numeric names matching their position in the list of types. The first field is `0`. The second field is `1`. And so on. The type of each field is the type of the same position in the tuple's list of types. +r[type.tuple.unit] For convenience and historical reasons, the tuple type with no fields (`()`) is often called *unit* or *the unit type*. Its one value is also called *unit* or *the unit value*. @@ -33,8 +42,11 @@ Some examples of tuple types: * `(i32, String)` (different type from the previous example) * `(i32, f64, Vec, Option)` +r[type.tuple.constructor] Values of this type are constructed using a [tuple expression]. Furthermore, various expressions will produce the unit value if there is no other meaningful value for it to evaluate to. + +r[type.tuple.access] Tuple fields can be accessed by either a [tuple index expression] or [pattern matching]. [^1]: Structural types are always equivalent if their internal types are equivalent. diff --git a/src/types/union.md b/src/types/union.md index 7a2f037e8..c8801ee2f 100644 --- a/src/types/union.md +++ b/src/types/union.md @@ -1,15 +1,25 @@ # Union types +r[type.union] + +r[type.union.intro] A *union type* is a nominal, heterogeneous C-like union, denoted by the name of a [`union` item][item]. +r[type.union.access] Unions have no notion of an "active field". Instead, every union access transmutes parts of the content of the union to the type of the accessed field. + +r[type.union.safety] Since transmutes can cause unexpected or undefined behaviour, `unsafe` is -required to read from a union field. Union field types are also restricted to a +required to read from a union field. + +r[type.union.constraint] +Union field types are also restricted to a subset of types which ensures that they never need dropping. See the [item] documentation for further details. +r[type.union.layout] The memory layout of a `union` is undefined by default (in particular, fields do *not* have to be at offset 0), but the `#[repr(...)]` attribute can be used to fix a layout. diff --git a/src/unsafe-keyword.md b/src/unsafe-keyword.md index 0206d8792..333140dd1 100644 --- a/src/unsafe-keyword.md +++ b/src/unsafe-keyword.md @@ -1,5 +1,8 @@ # The `unsafe` keyword +r[unsafe] + +r[unsafe.intro] The `unsafe` keyword can occur in several different contexts: unsafe functions (`unsafe fn`), unsafe blocks (`unsafe {}`), unsafe traits (`unsafe trait`), unsafe trait implementations (`unsafe impl`), unsafe external blocks (`unsafe extern`), and unsafe attributes (`#[unsafe(attr)]`). It plays several different roles, depending on where it is used and whether the `unsafe_op_in_unsafe_fn` lint is enabled: @@ -11,16 +14,25 @@ See the [keyword documentation][keyword] for some illustrative examples. ## Unsafe functions (`unsafe fn`) +r[unsafe.fn] + +r[unsafe.fn.intro] Unsafe functions are functions that are not safe in all contexts and/or for all possible inputs. We say they have *extra safety conditions*, which are requirements that must be upheld by all callers and that the compiler does not check. For example, [`get_unchecked`] has the extra safety condition that the index must be in-bounds. The unsafe function should come with documentation explaining what those extra safety conditions are. +r[unsafe.fn.safety] Such a function must be prefixed with the keyword `unsafe` and can only be called from inside an `unsafe` block, or inside `unsafe fn` without the [`unsafe_op_in_unsafe_fn`] lint. ## Unsafe blocks (`unsafe {}`) +r[unsafe.block] + +r[unsafe.block.intro] A block of code can be prefixed with the `unsafe` keyword, to permit calling `unsafe` functions or dereferencing raw pointers. + +r[unsafe.block.fn-body] By default, the body of an unsafe function is also considered to be an unsafe block; this can be changed by enabling the [`unsafe_op_in_unsafe_fn`] lint. @@ -41,13 +53,19 @@ By using `unsafe` blocks to represent the reverse links as raw pointers, it can ## Unsafe traits (`unsafe trait`) +r[unsafe.trait] + +r[unsafe.trait.intro] An unsafe trait is a trait that comes with extra safety conditions that must be upheld by *implementations* of the trait. The unsafe trait should come with documentation explaining what those extra safety conditions are. +r[unsafe.trait.safety] Such a trait must be prefixed with the keyword `unsafe` and can only be implemented by `unsafe impl` blocks. ## Unsafe trait implementations (`unsafe impl`) +r[unsafe.impl] + When implementing an unsafe trait, the implementation needs to be prefixed with the `unsafe` keyword. By writing `unsafe impl`, the programmer states that they have taken care of satisfying the extra safety conditions required by the trait. @@ -59,12 +77,19 @@ Unsafe trait implementations are the logical dual to unsafe traits: where unsafe ## Unsafe external blocks (`unsafe extern`) +r[unsafe.extern] + The programmer who declares an [external block] must assure that the signatures of the items contained within are correct. Failing to do so may lead to undefined behavior. That this obligation has been met is indicated by writing `unsafe extern`. +r[unsafe.extern.edition2024] +> **Edition differences**: Prior to edition 2024, `extern` blocks were allowed without being qualified as `unsafe`. + [external block]: items/external-blocks.md ## Unsafe attributes (`#[unsafe(attr)]`) +r[unsafe.attribute] + An [unsafe attribute] is one that has extra safety conditions that must be upheld when using the attribute. The compiler cannot check whether these conditions have been upheld. To assert that they have been, these attributes must be wrapped in `unsafe(..)`, e.g. `#[unsafe(no_mangle)]`. [unsafe attribute]: attributes.md diff --git a/src/unsafety.md b/src/unsafety.md index 9383eba9d..88aaf373b 100644 --- a/src/unsafety.md +++ b/src/unsafety.md @@ -1,19 +1,38 @@ # Unsafety +r[safety] + +r[safety.intro] Unsafe operations are those that can potentially violate the memory-safety guarantees of Rust's static semantics. +r[safety.unsafe-ops] The following language level features cannot be used in the safe subset of Rust: +r[safety.unsafe-deref] - Dereferencing a [raw pointer]. + +r[safety.unsafe-static] - Reading or writing a [mutable] or [external] static variable. + +r[safety.unsafe-union-access] - Accessing a field of a [`union`], other than to assign to it. + +r[safety.unsafe-call] - Calling an unsafe function (including an intrinsic or foreign function). + +r[safety.unsafe-impl] - Implementing an [unsafe trait]. -- Declaring an [`extern`] block. + +r[safety.unsafe-extern] +- Declaring an [`extern`] block[^extern-2024]. + +r[safety.unsafe-attribute] - Applying an [unsafe attribute] to an item. +[^extern-2024]: Prior to the 2024 edition, extern blocks were allowed to be declared without `unsafe`. + [`extern`]: items/external-blocks.md [`union`]: items/unions.md [mutable]: items/static-items.md#mutable-statics diff --git a/src/variables.md b/src/variables.md index 5e5ec1bc1..0e22af6a5 100644 --- a/src/variables.md +++ b/src/variables.md @@ -1,20 +1,27 @@ # Variables +r[variable] + +r[variable.intro] A _variable_ is a component of a stack frame, either a named function parameter, an anonymous [temporary](expressions.md#temporaries), or a named local variable. +r[variable.local] A _local variable_ (or *stack-local* allocation) holds a value directly, allocated within the stack's memory. The value is a part of the stack frame. +r[variable.local-mut] Local variables are immutable unless declared otherwise. For example: `let mut x = ...`. +r[variable.param-mut] Function parameters are immutable unless declared with `mut`. The `mut` keyword applies only to the following parameter. For example: `|mut x, y|` and `fn f(mut x: Box, y: Box)` declare one mutable variable `x` and one immutable variable `y`. +r[variable.init] Local variables are not initialized when allocated. Instead, the entire frame worth of local variables are allocated, on frame-entry, in an uninitialized state. Subsequent statements within a function may or may not initialize the diff --git a/src/visibility-and-privacy.md b/src/visibility-and-privacy.md index 5ccf8b4b8..58e4b9614 100644 --- a/src/visibility-and-privacy.md +++ b/src/visibility-and-privacy.md @@ -1,5 +1,8 @@ # Visibility and Privacy +r[vis] + +r[vis.syntax] > **Syntax**\ > _Visibility_ :\ >       `pub`\ @@ -8,20 +11,24 @@ >    | `pub` `(` `super` `)`\ >    | `pub` `(` `in` [_SimplePath_] `)` +r[vis.intro] These two terms are often used interchangeably, and what they are attempting to convey is the answer to the question "Can this item be used at this location?" +r[vis.name-hierarchy] Rust's name resolution operates on a global hierarchy of namespaces. Each level in the hierarchy can be thought of as some item. The items are one of those mentioned above, but also include external crates. Declaring or defining a new module can be thought of as inserting a new tree into the hierarchy at the location of the definition. +r[vis.privacy] To control whether interfaces can be used across modules, Rust checks each use of an item to see whether it should be allowed or not. This is where privacy warnings are generated, or otherwise "you used a private item of another module and weren't allowed to." +r[vis.default] By default, everything is *private*, with two exceptions: Associated items in a `pub` Trait are public by default; Enum variants in a `pub` enum are also public by default. When an item is declared as `pub`, @@ -44,6 +51,7 @@ pub enum State { } ``` +r[vis.access] With the notion of an item being either public or private, Rust allows item accesses in two cases: @@ -78,8 +86,10 @@ explain, here's a few use cases and what they would entail: In the second case, it mentions that a private item "can be accessed" by the current module and its descendants, but the exact meaning of accessing an item -depends on what the item is. Accessing a module, for example, would mean -looking inside of it (to import more items). On the other hand, accessing a +depends on what the item is. + +r[vis.usage] +Accessing a module, for example, would mean looking inside of it (to import more items). On the other hand, accessing a function would mean that it is invoked. Additionally, path expressions and import statements are considered to access an item in the sense that the import/expression is only valid if the destination is in the current visibility @@ -144,18 +154,30 @@ expressions, types, etc. ## `pub(in path)`, `pub(crate)`, `pub(super)`, and `pub(self)` +r[vis.scoped] + +r[vis.scoped.intro] In addition to public and private, Rust allows users to declare an item as visible only within a given scope. The rules for `pub` restrictions are as follows: + +r[vis.scoped.in] - `pub(in path)` makes an item visible within the provided `path`. `path` must be a simple path which resolves to an ancestor module of the item whose visibility is being declared. Each identifier in `path` must refer directly to a module (not to a name introduced by a `use` statement). + +r[vis.scoped.crate] - `pub(crate)` makes an item visible within the current crate. + +r[vis.scoped.super] - `pub(super)` makes an item visible to the parent module. This is equivalent to `pub(in super)`. + +r[vis.scoped.self] - `pub(self)` makes an item visible to the current module. This is equivalent to `pub(in self)` or not using `pub` at all. +r[vis.scoped.edition2018] > **Edition differences**: Starting with the 2018 edition, paths for > `pub(in path)` must start with `crate`, `self`, or `super`. The 2015 edition > may also use paths starting with `::` or modules from the crate root. @@ -219,6 +241,9 @@ fn main() { bar() } ## Re-exporting and Visibility +r[vis.reexports] + +r[vis.reexports.intro] Rust allows publicly re-exporting items through a `pub use` directive. Because this is a public directive, this allows the item to be used in the current module through the rules above. It essentially allows public access into the @@ -239,6 +264,7 @@ mod implementation { This means that any external crate referencing `implementation::api::f` would receive a privacy violation, while the path `api::f` would be allowed. +r[vis.reexports.private-item] When re-exporting a private item, it can be thought of as allowing the "privacy chain" being short-circuited through the reexport instead of passing through the namespace hierarchy as it normally would. diff --git a/src/whitespace.md b/src/whitespace.md index a93bdcbdb..cd099946b 100644 --- a/src/whitespace.md +++ b/src/whitespace.md @@ -1,5 +1,8 @@ # Whitespace +r[lex.whitespace] + +r[lex.whitespace.intro] Whitespace is any non-empty string containing only characters that have the [`Pattern_White_Space`] Unicode property, namely: @@ -15,9 +18,11 @@ Whitespace is any non-empty string containing only characters that have the - `U+2028` (line separator) - `U+2029` (paragraph separator) +r[lex.whitespace.token-sep] Rust is a "free-form" language, meaning that all forms of whitespace serve only to separate _tokens_ in the grammar, and have no semantic significance. +r[lex.whitespace.replacement] A Rust program has identical meaning if each whitespace element is replaced with any other legal whitespace element, such as a single space character. diff --git a/theme/reference.css b/theme/reference.css index cbc7aca8a..58be91816 100644 --- a/theme/reference.css +++ b/theme/reference.css @@ -200,3 +200,33 @@ dfn { .history > blockquote { background: #f7c0eb; } + +/* Provides a anchor container for positioning popups. */ +.popup-container { + position: relative; +} +/* In the test summary page, a convenience class for toggling visibility. */ +.popup-hidden { + display: none; +} +/* In the test summary page, the styling for the uncovered rule popup. */ +.uncovered-rules-popup { + position: absolute; + left: -250px; + width: 400px; + background: var(--bg); + border-radius: 4px; + border: 1px solid; + z-index: 1000; + padding: 1rem; +} + +/* The popup that shows when viewing tests for a specific rule. */ +.tests-popup { + color: var(--fg); + background: var(--bg); + border-radius: 4px; + border: 1px solid; + z-index: 1000; + padding: 1rem; +} diff --git a/theme/reference.js b/theme/reference.js new file mode 100644 index 000000000..44a237034 --- /dev/null +++ b/theme/reference.js @@ -0,0 +1,24 @@ +/* On the test summary page, toggles the popup for the uncovered tests. */ +function spec_toggle_uncovered(item_index) { + let el = document.getElementById(`uncovered-${item_index}`); + const currently_hidden = el.classList.contains('popup-hidden'); + const all = document.querySelectorAll('.uncovered-rules-popup'); + all.forEach(element => { + element.classList.add('popup-hidden'); + }); + if (currently_hidden) { + el.classList.remove('popup-hidden'); + } +} + +function spec_toggle_tests(rule_id) { + let el = document.getElementById(`tests-${rule_id}`); + const currently_hidden = el.classList.contains('popup-hidden'); + const all = document.querySelectorAll('.tests-popup'); + all.forEach(element => { + element.classList.add('popup-hidden'); + }); + if (currently_hidden) { + el.classList.remove('popup-hidden'); + } +} diff --git a/triagebot.toml b/triagebot.toml index 4a29166cb..327f4ec53 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -6,3 +6,19 @@ allow-unauthenticated = [ [assign] [shortcut] + +[merge-conflicts] +remove = [] +add = ["S-waiting-on-author"] +unless = ["S-blocked", "S-waiting-on-team", "S-waiting-on-review"] + +[autolabel."S-waiting-on-review"] +new_pr = true + +[review-submitted] +reviewed_label = "S-waiting-on-author" +review_labels = ["S-waiting-on-review"] + +[review-requested] +remove_labels = ["S-waiting-on-author"] +add_labels = ["S-waiting-on-review"]