Skip to content

Lintcheck: Rework and limit diff output for GH's CI #13139

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion .github/workflows/lintcheck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,20 @@ jobs:
uses: actions/download-artifact@v4

- name: Diff results
run: ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> $GITHUB_STEP_SUMMARY
# GH's summery has a maximum size of 1024k:
# https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-markdown-summary
# That's why we first log to file and then to the summary and logs
run: |
./target/debug/lintcheck diff {base,head}/ci_crates_logs.json --truncate >> truncated_diff.md
head -c 1024000 truncated_diff.md >> $GITHUB_STEP_SUMMARY
cat truncated_diff.md
./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> full_diff.md

- name: Upload full diff
uses: actions/upload-artifact@v4
with:
name: diff
if-no-files-found: ignore
path: |
full_diff.md
truncated_diff.md
2 changes: 1 addition & 1 deletion lintcheck/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ clap = { version = "4.4", features = ["derive", "env"] }
crossbeam-channel = "0.5.6"
diff = "0.1.13"
flate2 = "1.0"
itertools = "0.12"
itertools = "0.13"
rayon = "1.5.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.85"
Expand Down
8 changes: 7 additions & 1 deletion lintcheck/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@ pub(crate) struct LintcheckConfig {
#[derive(Subcommand, Clone, Debug)]
pub(crate) enum Commands {
/// Display a markdown diff between two lintcheck log files in JSON format
Diff { old: PathBuf, new: PathBuf },
Diff {
old: PathBuf,
new: PathBuf,
/// This will limit the number of warnings that will be printed for each lint
#[clap(long)]
truncate: bool,
},
/// Create a lintcheck crates TOML file containing the top N popular crates
Popular {
/// Output TOML file name
Expand Down
180 changes: 141 additions & 39 deletions lintcheck/src/json.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
use std::fs;
use std::path::Path;

use itertools::EitherOrBoth;
use itertools::{EitherOrBoth, Itertools};
use serde::{Deserialize, Serialize};

use crate::ClippyWarning;

#[derive(Deserialize, Serialize)]
/// This is the total number. 300 warnings results in 100 messages per section.
const DEFAULT_LIMIT_PER_LINT: usize = 300;
const TRUNCATION_TOTAL_TARGET: usize = 1000;

#[derive(Debug, Deserialize, Serialize)]
struct LintJson {
lint: String,
krate: String,
Expand All @@ -18,7 +22,7 @@ struct LintJson {

impl LintJson {
fn key(&self) -> impl Ord + '_ {
(self.file_name.as_str(), self.byte_pos, self.lint.as_str())
(self.lint.as_str(), self.file_name.as_str(), self.byte_pos)
}

fn info_text(&self, action: &str) -> String {
Expand Down Expand Up @@ -52,14 +56,117 @@ fn load_warnings(path: &Path) -> Vec<LintJson> {
serde_json::from_slice(&file).unwrap_or_else(|e| panic!("failed to deserialize {}: {e}", path.display()))
}

fn print_warnings(title: &str, warnings: &[LintJson]) {
pub(crate) fn diff(old_path: &Path, new_path: &Path, truncate: bool) {
let old_warnings = load_warnings(old_path);
let new_warnings = load_warnings(new_path);

let mut lint_warnings = vec![];

for (name, changes) in &itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key()))
.chunk_by(|change| change.as_ref().into_left().lint.to_string())
{
let mut added = Vec::new();
let mut removed = Vec::new();
let mut changed = Vec::new();
for change in changes {
match change {
EitherOrBoth::Both(old, new) => {
if old.rendered != new.rendered {
changed.push((old, new));
}
},
EitherOrBoth::Left(old) => removed.push(old),
EitherOrBoth::Right(new) => added.push(new),
}
}

if !added.is_empty() || !removed.is_empty() || !changed.is_empty() {
lint_warnings.push(LintWarnings {
name,
added,
removed,
changed,
});
}
}

print_summary_table(&lint_warnings);
println!();

if lint_warnings.is_empty() {
return;
}

let truncate_after = if truncate {
// Max 15 ensures that we at least have five messages per lint
DEFAULT_LIMIT_PER_LINT
.min(TRUNCATION_TOTAL_TARGET / lint_warnings.len())
.max(15)
} else {
// No lint should ever each this number of lint emissions, so this is equivialent to
// No truncation
usize::MAX
};

for lint in lint_warnings {
print_lint_warnings(&lint, truncate_after);
}
}

#[derive(Debug)]
struct LintWarnings {
name: String,
added: Vec<LintJson>,
removed: Vec<LintJson>,
changed: Vec<(LintJson, LintJson)>,
}

fn print_lint_warnings(lint: &LintWarnings, truncate_after: usize) {
let name = &lint.name;
let html_id = to_html_id(name);

// The additional anchor is added for non GH viewers that don't prefix ID's
println!(r#"## `{name}` <a id="user-content-{html_id}"></a>"#);
println!();

print!(
r##"{}, {}, {}"##,
count_string(name, "added", lint.added.len()),
count_string(name, "removed", lint.removed.len()),
count_string(name, "changed", lint.changed.len()),
);
println!();

print_warnings("Added", &lint.added, truncate_after / 3);
print_warnings("Removed", &lint.removed, truncate_after / 3);
print_changed_diff(&lint.changed, truncate_after / 3);
}

fn print_summary_table(lints: &[LintWarnings]) {
println!("| Lint | Added | Removed | Changed |");
println!("| ------------------------------------------ | ------: | ------: | ------: |");

for lint in lints {
println!(
"| {:<62} | {:>7} | {:>7} | {:>7} |",
format!("[`{}`](#user-content-{})", lint.name, to_html_id(&lint.name)),
lint.added.len(),
lint.removed.len(),
lint.changed.len()
);
}
}

fn print_warnings(title: &str, warnings: &[LintJson], truncate_after: usize) {
if warnings.is_empty() {
return;
}

//We have to use HTML here to be able to manually add an id.
println!(r#"<h3 id="{title}">{title}</h3>"#);
print_h3(&warnings[0].lint, title);
println!();

let warnings = truncate(warnings, truncate_after);

for warning in warnings {
println!("{}", warning.info_text(title));
println!();
Expand All @@ -70,14 +177,16 @@ fn print_warnings(title: &str, warnings: &[LintJson]) {
}
}

fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
fn print_changed_diff(changed: &[(LintJson, LintJson)], truncate_after: usize) {
if changed.is_empty() {
return;
}

//We have to use HTML here to be able to manually add an id.
println!(r#"<h3 id="changed">Changed</h3>"#);
print_h3(&changed[0].0.lint, "Changed");
println!();

let changed = truncate(changed, truncate_after);

for (old, new) in changed {
println!("{}", new.info_text("Changed"));
println!();
Expand All @@ -101,51 +210,44 @@ fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
}
}

pub(crate) fn diff(old_path: &Path, new_path: &Path) {
let old_warnings = load_warnings(old_path);
let new_warnings = load_warnings(new_path);
fn truncate<T>(list: &[T], truncate_after: usize) -> &[T] {
if list.len() > truncate_after {
println!(
"{} warnings have been truncated for this summary.",
list.len() - truncate_after
);
println!();

let mut added = Vec::new();
let mut removed = Vec::new();
let mut changed = Vec::new();

for change in itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key())) {
match change {
EitherOrBoth::Both(old, new) => {
if old.rendered != new.rendered {
changed.push((old, new));
}
},
EitherOrBoth::Left(old) => removed.push(old),
EitherOrBoth::Right(new) => added.push(new),
}
list.split_at(truncate_after).0
} else {
list
}
}

print!(
r##"{}, {}, {}"##,
count_string("added", added.len()),
count_string("removed", removed.len()),
count_string("changed", changed.len()),
);
println!();
println!();
fn print_h3(lint: &str, title: &str) {
let html_id = to_html_id(lint);
// We have to use HTML here to be able to manually add an id.
println!(r#"### {title} <a id="user-content-{html_id}-{title}"></a>"#);
}

print_warnings("Added", &added);
print_warnings("Removed", &removed);
print_changed_diff(&changed);
/// GitHub's markdown parsers doesn't like IDs with `::` and `_`. This simplifies
/// the lint name for the HTML ID.
fn to_html_id(lint_name: &str) -> String {
lint_name.replace("clippy::", "").replace('_', "-")
}

/// This generates the `x added` string for the start of the job summery.
/// It linkifies them if possible to jump to the respective heading.
fn count_string(label: &str, count: usize) -> String {
fn count_string(lint: &str, label: &str, count: usize) -> String {
// Headlines are only added, if anything will be displayed under the headline.
// We therefore only want to add links to them if they exist
if count == 0 {
format!("0 {label}")
} else {
let html_id = to_html_id(lint);
// GitHub's job summaries don't add HTML ids to headings. That's why we
// manually have to add them. GitHub prefixes these manual ids with
// `user-content-` and that's how we end up with these awesome links :D
format!("[{count} {label}](#user-content-{label})")
format!("[{count} {label}](#user-content-{html_id}-{label})")
}
}
2 changes: 1 addition & 1 deletion lintcheck/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ fn main() {
let config = LintcheckConfig::new();

match config.subcommand {
Some(Commands::Diff { old, new }) => json::diff(&old, &new),
Some(Commands::Diff { old, new, truncate }) => json::diff(&old, &new, truncate),
Some(Commands::Popular { output, number }) => popular_crates::fetch(output, number).unwrap(),
None => lintcheck(config),
}
Expand Down