Skip to content

Commit be0125b

Browse files
committed
Auto merge of #6469 - matthiaskrgr:clippy_dev_crater, r=flip1995
add "cargo dev crater" to run clippy on a fixed set of crates and diff the lint warnings `cargo dev crater` now does the following: build clippy in debug mode for a fixed set of crates: download and extract the crate run compiled clippy on the crate dump the warnings into a file that is inside the repo We can then do a "git diff" and see what effects our clippy changes had on a tiny fraction of the rust ecosystem and can see when an change unexpectedly added or silenced a lot of warnings. Checking all the crates took less than 5 minutes on my system. Should help with rust-lang/rust-clippy#6429 --- *Please write a short comment explaining your change (or "none" for internal only changes)* changelog: extend cargo dev to run clippy against a fixed set of crates and compare warnings
2 parents 8d57cee + 5b6a183 commit be0125b

File tree

7 files changed

+3713
-3
lines changed

7 files changed

+3713
-3
lines changed

.cargo/config

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[alias]
22
uitest = "test --test compile-test"
33
dev = "run --target-dir clippy_dev/target --package clippy_dev --bin clippy_dev --manifest-path clippy_dev/Cargo.toml --"
4+
dev-lintcheck = "run --target-dir clippy_dev/target --package clippy_dev --bin clippy_dev --manifest-path clippy_dev/Cargo.toml --features lintcheck -- lintcheck"
45

56
[build]
67
rustflags = ["-Zunstable-options"]

clippy_dev/Cargo.toml

+8
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,22 @@ version = "0.0.1"
44
authors = ["Philipp Hansch <[email protected]>"]
55
edition = "2018"
66

7+
78
[dependencies]
89
bytecount = "0.6"
910
clap = "2.33"
11+
flate2 = { version = "1.0.19", optional = true }
1012
itertools = "0.9"
1113
opener = "0.4"
1214
regex = "1"
15+
serde = { version = "1.0", features = ["derive"], optional = true }
16+
serde_json = { version = "1.0", optional = true }
1317
shell-escape = "0.1"
18+
tar = { version = "0.4.30", optional = true }
19+
toml = { version = "0.5", optional = true }
20+
ureq = { version = "2.0.0-rc3", optional = true }
1421
walkdir = "2"
1522

1623
[features]
24+
lintcheck = ["flate2", "serde_json", "tar", "toml", "ureq", "serde"]
1725
deny-warnings = []

clippy_dev/lintcheck_crates.toml

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[crates]
2+
# some of these are from cargotest
3+
cargo = ['0.49.0']
4+
iron = ['0.6.1']
5+
ripgrep = ['12.1.1']
6+
xsv = ['0.13.0']
7+
#tokei = ['12.0.4']
8+
rayon = ['1.5.0']
9+
serde = ['1.0.118']
10+
# top 10 crates.io dls
11+
bitflags = ['1.2.1']
12+
libc = ['0.2.81']
13+
log = ['0.4.11']
14+
proc-macro2 = ['1.0.24']
15+
quote = ['1.0.7']
16+
rand = ['0.7.3']
17+
rand_core = ['0.6.0']
18+
regex = ['1.3.2']
19+
syn = ['1.0.54']
20+
unicode-xid = ['0.2.1']

clippy_dev/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use walkdir::WalkDir;
1212

1313
pub mod bless;
1414
pub mod fmt;
15+
pub mod lintcheck;
1516
pub mod new_lint;
1617
pub mod ra_setup;
1718
pub mod serve;

clippy_dev/src/lintcheck.rs

+286
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
// Run clippy on a fixed set of crates and collect the warnings.
2+
// This helps observing the impact clippy changs have on a set of real-world code.
3+
//
4+
// When a new lint is introduced, we can search the results for new warnings and check for false
5+
// positives.
6+
7+
#![cfg(feature = "lintcheck")]
8+
#![allow(clippy::filter_map)]
9+
10+
use crate::clippy_project_root;
11+
12+
use std::collections::HashMap;
13+
use std::process::Command;
14+
use std::{fmt, fs::write, path::PathBuf};
15+
16+
use clap::ArgMatches;
17+
use serde::{Deserialize, Serialize};
18+
use serde_json::Value;
19+
20+
// use this to store the crates when interacting with the crates.toml file
21+
#[derive(Debug, Serialize, Deserialize)]
22+
struct CrateList {
23+
crates: HashMap<String, Vec<String>>,
24+
}
25+
26+
// crate data we stored in the toml, can have multiple versions per crate
27+
// A single TomlCrate is laster mapped to several CrateSources in that case
28+
struct TomlCrate {
29+
name: String,
30+
versions: Vec<String>,
31+
}
32+
33+
// represents an archive we download from crates.io
34+
#[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq)]
35+
struct CrateSource {
36+
name: String,
37+
version: String,
38+
}
39+
40+
// represents the extracted sourcecode of a crate
41+
#[derive(Debug)]
42+
struct Crate {
43+
version: String,
44+
name: String,
45+
// path to the extracted sources that clippy can check
46+
path: PathBuf,
47+
}
48+
49+
#[derive(Debug)]
50+
struct ClippyWarning {
51+
crate_name: String,
52+
crate_version: String,
53+
file: String,
54+
line: String,
55+
column: String,
56+
linttype: String,
57+
message: String,
58+
}
59+
60+
impl std::fmt::Display for ClippyWarning {
61+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
62+
writeln!(
63+
f,
64+
r#"{}-{}/{}:{}:{} {} "{}""#,
65+
&self.crate_name, &self.crate_version, &self.file, &self.line, &self.column, &self.linttype, &self.message
66+
)
67+
}
68+
}
69+
70+
impl CrateSource {
71+
fn download_and_extract(&self) -> Crate {
72+
let extract_dir = PathBuf::from("target/lintcheck/crates");
73+
let krate_download_dir = PathBuf::from("target/lintcheck/downloads");
74+
75+
// url to download the crate from crates.io
76+
let url = format!(
77+
"https://crates.io/api/v1/crates/{}/{}/download",
78+
self.name, self.version
79+
);
80+
println!("Downloading and extracting {} {} from {}", self.name, self.version, url);
81+
let _ = std::fs::create_dir("target/lintcheck/");
82+
let _ = std::fs::create_dir(&krate_download_dir);
83+
let _ = std::fs::create_dir(&extract_dir);
84+
85+
let krate_file_path = krate_download_dir.join(format!("{}-{}.crate.tar.gz", &self.name, &self.version));
86+
// don't download/extract if we already have done so
87+
if !krate_file_path.is_file() {
88+
// create a file path to download and write the crate data into
89+
let mut krate_dest = std::fs::File::create(&krate_file_path).unwrap();
90+
let mut krate_req = ureq::get(&url).call().unwrap().into_reader();
91+
// copy the crate into the file
92+
std::io::copy(&mut krate_req, &mut krate_dest).unwrap();
93+
94+
// unzip the tarball
95+
let ungz_tar = flate2::read::GzDecoder::new(std::fs::File::open(&krate_file_path).unwrap());
96+
// extract the tar archive
97+
let mut archive = tar::Archive::new(ungz_tar);
98+
archive.unpack(&extract_dir).expect("Failed to extract!");
99+
}
100+
// crate is extracted, return a new Krate object which contains the path to the extracted
101+
// sources that clippy can check
102+
Crate {
103+
version: self.version.clone(),
104+
name: self.name.clone(),
105+
path: extract_dir.join(format!("{}-{}/", self.name, self.version)),
106+
}
107+
}
108+
}
109+
110+
impl Crate {
111+
fn run_clippy_lints(&self, cargo_clippy_path: &PathBuf) -> Vec<ClippyWarning> {
112+
println!("Linting {} {}...", &self.name, &self.version);
113+
let cargo_clippy_path = std::fs::canonicalize(cargo_clippy_path).unwrap();
114+
115+
let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir/");
116+
117+
let all_output = std::process::Command::new(cargo_clippy_path)
118+
.env("CARGO_TARGET_DIR", shared_target_dir)
119+
// lint warnings will look like this:
120+
// src/cargo/ops/cargo_compile.rs:127:35: warning: usage of `FromIterator::from_iter`
121+
.args(&[
122+
"--",
123+
"--message-format=json",
124+
"--",
125+
"--cap-lints=warn",
126+
"-Wclippy::pedantic",
127+
"-Wclippy::cargo",
128+
])
129+
.current_dir(&self.path)
130+
.output()
131+
.unwrap();
132+
let stdout = String::from_utf8_lossy(&all_output.stdout);
133+
let output_lines = stdout.lines();
134+
//dbg!(&output_lines);
135+
let warnings: Vec<ClippyWarning> = output_lines
136+
.into_iter()
137+
// get all clippy warnings
138+
.filter(|line| line.contains("clippy::"))
139+
.map(|json_msg| parse_json_message(json_msg, &self))
140+
.collect();
141+
warnings
142+
}
143+
}
144+
145+
fn build_clippy() {
146+
Command::new("cargo")
147+
.arg("build")
148+
.output()
149+
.expect("Failed to build clippy!");
150+
}
151+
152+
// get a list of CrateSources we want to check from a "lintcheck_crates.toml" file.
153+
fn read_crates() -> Vec<CrateSource> {
154+
let toml_path = PathBuf::from("clippy_dev/lintcheck_crates.toml");
155+
let toml_content: String =
156+
std::fs::read_to_string(&toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display()));
157+
let crate_list: CrateList =
158+
toml::from_str(&toml_content).unwrap_or_else(|e| panic!("Failed to parse {}: \n{}", toml_path.display(), e));
159+
// parse the hashmap of the toml file into a list of crates
160+
let tomlcrates: Vec<TomlCrate> = crate_list
161+
.crates
162+
.into_iter()
163+
.map(|(name, versions)| TomlCrate { name, versions })
164+
.collect();
165+
166+
// flatten TomlCrates into CrateSources (one TomlCrates may represent several versions of a crate =>
167+
// multiple Cratesources)
168+
let mut crate_sources = Vec::new();
169+
tomlcrates.into_iter().for_each(|tk| {
170+
tk.versions.iter().for_each(|ver| {
171+
crate_sources.push(CrateSource {
172+
name: tk.name.clone(),
173+
version: ver.to_string(),
174+
});
175+
})
176+
});
177+
crate_sources
178+
}
179+
180+
// extract interesting data from a json lint message
181+
fn parse_json_message(json_message: &str, krate: &Crate) -> ClippyWarning {
182+
let jmsg: Value = serde_json::from_str(&json_message).unwrap_or_else(|e| panic!("Failed to parse json:\n{:?}", e));
183+
184+
ClippyWarning {
185+
crate_name: krate.name.to_string(),
186+
crate_version: krate.version.to_string(),
187+
file: jmsg["message"]["spans"][0]["file_name"]
188+
.to_string()
189+
.trim_matches('"')
190+
.into(),
191+
line: jmsg["message"]["spans"][0]["line_start"]
192+
.to_string()
193+
.trim_matches('"')
194+
.into(),
195+
column: jmsg["message"]["spans"][0]["text"][0]["highlight_start"]
196+
.to_string()
197+
.trim_matches('"')
198+
.into(),
199+
linttype: jmsg["message"]["code"]["code"].to_string().trim_matches('"').into(),
200+
message: jmsg["message"]["message"].to_string().trim_matches('"').into(),
201+
}
202+
}
203+
204+
// the main fn
205+
pub fn run(clap_config: &ArgMatches) {
206+
let cargo_clippy_path: PathBuf = PathBuf::from("target/debug/cargo-clippy");
207+
208+
println!("Compiling clippy...");
209+
build_clippy();
210+
println!("Done compiling");
211+
212+
// assert that clippy is found
213+
assert!(
214+
cargo_clippy_path.is_file(),
215+
"target/debug/cargo-clippy binary not found! {}",
216+
cargo_clippy_path.display()
217+
);
218+
219+
let clippy_ver = std::process::Command::new("target/debug/cargo-clippy")
220+
.arg("--version")
221+
.output()
222+
.map(|o| String::from_utf8_lossy(&o.stdout).into_owned())
223+
.expect("could not get clippy version!");
224+
225+
// download and extract the crates, then run clippy on them and collect clippys warnings
226+
// flatten into one big list of warnings
227+
228+
let crates = read_crates();
229+
230+
let clippy_warnings: Vec<ClippyWarning> = if let Some(only_one_crate) = clap_config.value_of("only") {
231+
// if we don't have the specified crated in the .toml, throw an error
232+
if !crates.iter().any(|krate| krate.name == only_one_crate) {
233+
eprintln!(
234+
"ERROR: could not find crate '{}' in clippy_dev/lintcheck_crates.toml",
235+
only_one_crate
236+
);
237+
std::process::exit(1);
238+
}
239+
240+
// only check a single crate that was passed via cmdline
241+
crates
242+
.into_iter()
243+
.map(|krate| krate.download_and_extract())
244+
.filter(|krate| krate.name == only_one_crate)
245+
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path))
246+
.flatten()
247+
.collect()
248+
} else {
249+
// check all crates (default)
250+
crates
251+
.into_iter()
252+
.map(|krate| krate.download_and_extract())
253+
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path))
254+
.flatten()
255+
.collect()
256+
};
257+
258+
// generate some stats:
259+
260+
// count lint type occurrences
261+
let mut counter: HashMap<&String, usize> = HashMap::new();
262+
clippy_warnings
263+
.iter()
264+
.for_each(|wrn| *counter.entry(&wrn.linttype).or_insert(0) += 1);
265+
266+
// collect into a tupled list for sorting
267+
let mut stats: Vec<(&&String, &usize)> = counter.iter().map(|(lint, count)| (lint, count)).collect();
268+
// sort by "000{count} {clippy::lintname}"
269+
// to not have a lint with 200 and 2 warnings take the same spot
270+
stats.sort_by_key(|(lint, count)| format!("{:0>4}, {}", count, lint));
271+
272+
let stats_formatted: String = stats
273+
.iter()
274+
.map(|(lint, count)| format!("{} {}\n", lint, count))
275+
.collect::<String>();
276+
277+
let mut all_msgs: Vec<String> = clippy_warnings.iter().map(|warning| warning.to_string()).collect();
278+
all_msgs.sort();
279+
all_msgs.push("\n\n\n\nStats\n\n".into());
280+
all_msgs.push(stats_formatted);
281+
282+
// save the text into lintcheck-logs/logs.txt
283+
let mut text = clippy_ver; // clippy version number on top
284+
text.push_str(&format!("\n{}", all_msgs.join("")));
285+
write("lintcheck-logs/logs.txt", text).unwrap();
286+
}

0 commit comments

Comments
 (0)