rust-lang · bors · Jul 25, 2024 · Jul 21, 2024 · Jul 21, 2024 · Jul 22, 2024
diff --git a/.github/workflows/lintcheck.yml b/.github/workflows/lintcheck.yml
@@ -115,4 +115,20 @@ jobs:
       uses: actions/download-artifact@v4
 
     - name: Diff results
-      run: ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> $GITHUB_STEP_SUMMARY
+      # GH's summery has a maximum size of 1024k:
+      # https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-markdown-summary
+      # That's why we first log to file and then to the summary and logs
+      run: |
+        ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json --truncate >> truncated_diff.md
+        head -c 1024000 truncated_diff.md >> $GITHUB_STEP_SUMMARY
+        cat truncated_diff.md
+        ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> full_diff.md
+
+    - name: Upload full diff
+      uses: actions/upload-artifact@v4
+      with:
+        name: diff
+        if-no-files-found: ignore
+        path: |
+          full_diff.md
+          truncated_diff.md
diff --git a/lintcheck/Cargo.toml b/lintcheck/Cargo.toml
@@ -16,7 +16,7 @@ clap = { version = "4.4", features = ["derive", "env"] }
 crossbeam-channel = "0.5.6"
 diff = "0.1.13"
 flate2 = "1.0"
-itertools = "0.12"
+itertools = "0.13"
 rayon = "1.5.1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0.85"

diff --git a/lintcheck/src/config.rs b/lintcheck/src/config.rs
@@ -53,7 +53,13 @@ pub(crate) struct LintcheckConfig {
 #[derive(Subcommand, Clone, Debug)]
 pub(crate) enum Commands {
     /// Display a markdown diff between two lintcheck log files in JSON format
-    Diff { old: PathBuf, new: PathBuf },
+    Diff {
+        old: PathBuf,
+        new: PathBuf,
+        /// This will limit the number of warnings that will be printed for each lint
+        #[clap(long)]
+        truncate: bool,
+    },
     /// Create a lintcheck crates TOML file containing the top N popular crates
     Popular {
         /// Output TOML file name

diff --git a/lintcheck/src/json.rs b/lintcheck/src/json.rs
@@ -1,12 +1,16 @@
 use std::fs;
 use std::path::Path;
 
-use itertools::EitherOrBoth;
+use itertools::{EitherOrBoth, Itertools};
 use serde::{Deserialize, Serialize};
 
 use crate::ClippyWarning;
 
-#[derive(Deserialize, Serialize)]
+/// This is the total number. 300 warnings results in 100 messages per section.
+const DEFAULT_LIMIT_PER_LINT: usize = 300;
+const TRUNCATION_TOTAL_TARGET: usize = 1000;
+
+#[derive(Debug, Deserialize, Serialize)]
 struct LintJson {
     lint: String,
     krate: String,
@@ -18,7 +22,7 @@ struct LintJson {
 
 impl LintJson {
     fn key(&self) -> impl Ord + '_ {
-        (self.file_name.as_str(), self.byte_pos, self.lint.as_str())
+        (self.lint.as_str(), self.file_name.as_str(), self.byte_pos)
     }
 
     fn info_text(&self, action: &str) -> String {
@@ -52,14 +56,117 @@ fn load_warnings(path: &Path) -> Vec<LintJson> {
     serde_json::from_slice(&file).unwrap_or_else(|e| panic!("failed to deserialize {}: {e}", path.display()))
 }
 
-fn print_warnings(title: &str, warnings: &[LintJson]) {
+pub(crate) fn diff(old_path: &Path, new_path: &Path, truncate: bool) {
+    let old_warnings = load_warnings(old_path);
+    let new_warnings = load_warnings(new_path);
+
+    let mut lint_warnings = vec![];
+
+    for (name, changes) in &itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key()))
+        .chunk_by(|change| change.as_ref().into_left().lint.to_string())
+    {
+        let mut added = Vec::new();
+        let mut removed = Vec::new();
+        let mut changed = Vec::new();
+        for change in changes {
+            match change {
+                EitherOrBoth::Both(old, new) => {
+                    if old.rendered != new.rendered {
+                        changed.push((old, new));
+                    }
+                },
+                EitherOrBoth::Left(old) => removed.push(old),
+                EitherOrBoth::Right(new) => added.push(new),
+            }
+        }
+
+        if !added.is_empty() || !removed.is_empty() || !changed.is_empty() {
+            lint_warnings.push(LintWarnings {
+                name,
+                added,
+                removed,
+                changed,
+            });
+        }
+    }
+
+    print_summary_table(&lint_warnings);
+    println!();
+
+    if lint_warnings.is_empty() {
+        return;
+    }
+
+    let truncate_after = if truncate {
+        // Max 15 ensures that we at least have five messages per lint
+        DEFAULT_LIMIT_PER_LINT
+            .min(TRUNCATION_TOTAL_TARGET / lint_warnings.len())
+            .max(15)
+    } else {
+        // No lint should ever each this number of lint emissions, so this is equivialent to
+        // No truncation
+        usize::MAX
+    };
+
+    for lint in lint_warnings {
+        print_lint_warnings(&lint, truncate_after);
+    }
+}
+
+#[derive(Debug)]
+struct LintWarnings {
+    name: String,
+    added: Vec<LintJson>,
+    removed: Vec<LintJson>,
+    changed: Vec<(LintJson, LintJson)>,
+}
+
+fn print_lint_warnings(lint: &LintWarnings, truncate_after: usize) {
+    let name = &lint.name;
+    let html_id = to_html_id(name);
+
+    // The additional anchor is added for non GH viewers that don't prefix ID's
+    println!(r#"## `{name}` <a id="user-content-{html_id}"></a>"#);
+    println!();
+
+    print!(
+        r##"{}, {}, {}"##,
+        count_string(name, "added", lint.added.len()),
+        count_string(name, "removed", lint.removed.len()),
+        count_string(name, "changed", lint.changed.len()),
+    );
+    println!();
+
+    print_warnings("Added", &lint.added, truncate_after / 3);
+    print_warnings("Removed", &lint.removed, truncate_after / 3);
+    print_changed_diff(&lint.changed, truncate_after / 3);
+}
+
+fn print_summary_table(lints: &[LintWarnings]) {
+    println!("| Lint                                       | Added   | Removed | Changed |");
+    println!("| ------------------------------------------ | ------: | ------: | ------: |");
+
+    for lint in lints {
+        println!(
+            "| {:<62} | {:>7} | {:>7} | {:>7} |",
+            format!("[`{}`](#user-content-{})", lint.name, to_html_id(&lint.name)),
+            lint.added.len(),
+            lint.removed.len(),
+            lint.changed.len()
+        );
+    }
+}
+
+fn print_warnings(title: &str, warnings: &[LintJson], truncate_after: usize) {
     if warnings.is_empty() {
         return;
     }
 
-    //We have to use HTML here to be able to manually add an id.
-    println!(r#"<h3 id="{title}">{title}</h3>"#);
+    print_h3(&warnings[0].lint, title);
     println!();
+
+    let warnings = truncate(warnings, truncate_after);
+
     for warning in warnings {
         println!("{}", warning.info_text(title));
         println!();
@@ -70,14 +177,16 @@ fn print_warnings(title: &str, warnings: &[LintJson]) {
     }
 }
 
-fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
+fn print_changed_diff(changed: &[(LintJson, LintJson)], truncate_after: usize) {
     if changed.is_empty() {
         return;
     }
 
-    //We have to use HTML here to be able to manually add an id.
-    println!(r#"<h3 id="changed">Changed</h3>"#);
+    print_h3(&changed[0].0.lint, "Changed");
     println!();
+
+    let changed = truncate(changed, truncate_after);
+
     for (old, new) in changed {
         println!("{}", new.info_text("Changed"));
         println!();
@@ -101,51 +210,44 @@ fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
     }
 }
 
-pub(crate) fn diff(old_path: &Path, new_path: &Path) {
-    let old_warnings = load_warnings(old_path);
-    let new_warnings = load_warnings(new_path);
+fn truncate<T>(list: &[T], truncate_after: usize) -> &[T] {
+    if list.len() > truncate_after {
+        println!(
+            "{} warnings have been truncated for this summary.",
+            list.len() - truncate_after
+        );
+        println!();
 
-    let mut added = Vec::new();
-    let mut removed = Vec::new();
-    let mut changed = Vec::new();
-
-    for change in itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key())) {
-        match change {
-            EitherOrBoth::Both(old, new) => {
-                if old.rendered != new.rendered {
-                    changed.push((old, new));
-                }
-            },
-            EitherOrBoth::Left(old) => removed.push(old),
-            EitherOrBoth::Right(new) => added.push(new),
-        }
+        list.split_at(truncate_after).0
+    } else {
+        list
     }
+}
 
-    print!(
-        r##"{}, {}, {}"##,
-        count_string("added", added.len()),
-        count_string("removed", removed.len()),
-        count_string("changed", changed.len()),
-    );
-    println!();
-    println!();
+fn print_h3(lint: &str, title: &str) {
+    let html_id = to_html_id(lint);
+    // We have to use HTML here to be able to manually add an id.
+    println!(r#"### {title} <a id="user-content-{html_id}-{title}"></a>"#);
+}
 
-    print_warnings("Added", &added);
-    print_warnings("Removed", &removed);
-    print_changed_diff(&changed);
+/// GitHub's markdown parsers doesn't like IDs with `::` and `_`. This simplifies
+/// the lint name for the HTML ID.
+fn to_html_id(lint_name: &str) -> String {
+    lint_name.replace("clippy::", "").replace('_', "-")
 }
 
 /// This generates the `x added` string for the start of the job summery.
 /// It linkifies them if possible to jump to the respective heading.
-fn count_string(label: &str, count: usize) -> String {
+fn count_string(lint: &str, label: &str, count: usize) -> String {
     // Headlines are only added, if anything will be displayed under the headline.
     // We therefore only want to add links to them if they exist
     if count == 0 {
         format!("0 {label}")
     } else {
+        let html_id = to_html_id(lint);
         // GitHub's job summaries don't add HTML ids to headings. That's why we
         // manually have to add them. GitHub prefixes these manual ids with
         // `user-content-` and that's how we end up with these awesome links :D
-        format!("[{count} {label}](#user-content-{label})")
+        format!("[{count} {label}](#user-content-{html_id}-{label})")
     }
 }
diff --git a/lintcheck/src/main.rs b/lintcheck/src/main.rs
@@ -260,7 +260,7 @@ fn main() {
     let config = LintcheckConfig::new();
 
     match config.subcommand {
-        Some(Commands::Diff { old, new }) => json::diff(&old, &new),
+        Some(Commands::Diff { old, new, truncate }) => json::diff(&old, &new, truncate),
         Some(Commands::Popular { output, number }) => popular_crates::fetch(output, number).unwrap(),
         None => lintcheck(config),
     }