Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to latest run-clang-tidy-cached #2254

Merged
merged 1 commit into from
Sep 20, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 78 additions & 37 deletions .github/bin/run-clang-tidy-cached.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ B=${0%%.cc}; [ "$B" -nt "$0" ] || c++ -std=c++17 -o"$B" "$0" && exec "$B" "$@";
// See the License for the specific language governing permissions and
// limitations under the License.

// Location: https://github.com/hzeller/dev-tools
// Last update: 2024-09-16 cf86e3b6c51dbf620a08fb2a59f6ea71b6bebe3d
// Location: https://github.com/hzeller/dev-tools (2024-09-19)

// Script to run clang-tidy on files in a bazel project while caching the
// results as clang-tidy can be pretty slow. The clang-tidy output messages
Expand Down Expand Up @@ -62,18 +61,19 @@ B=${0%%.cc}; [ "$B" -nt "$0" ] || c++ -std=c++17 -o"$B" "$0" && exec "$B" "$@";
// Dont' change anything here, override for your project below in kConfig.
struct ConfigValues {
// Prefix for the cache to write to. If empty, use name of toplevel directory.
// Typical would be name of project with underscore suffix.
std::string_view cache_prefix;

// Directory to start recurse for sources createing a file list.
// Some projects need e.g. "src/".
// Some projects e.g. start at "src/".
std::string_view start_dir;

// Regular expression matching files that should be included from file list.
// Regular expression matching files that should be included in file list.
std::string_view file_include_re = ".*";

// Regular expxression matching files that should be excluded from file list.
// If overriding, make sure to include at least ".git".
std::string_view file_exclude_re = ".git/|.github/";
// If searching from toplevel, make sure to include at least ".git/".
std::string_view file_exclude_re = "^(\\.git|\\.github|build)/";

// A file in the toplevel of the project that should exist, typically
// something used to set up the build environment, such as MODULE.bazel,
Expand All @@ -86,6 +86,11 @@ struct ConfigValues {
// (Default configuration: just .clang-tidy as this should always be there)
std::string_view toplevel_build_file = ".clang-tidy";

// Bazel projects have some complicated paths where generated and external
// sources reside. Setting this to true will tell this script canonicalize
// these in the output. Set to true if this is a bazel project.
bool is_bazel_project = false;

// If the compilation DB or toplevel_build_file changed in timestamp, it
// might be worthwhile revisiting sources that previously had issues.
// This flag enables that.
Expand All @@ -94,22 +99,42 @@ struct ConfigValues {
// few problematic sources to begin with, otherwise every update of the
// compilation DB will re-trigger revisiting all of them.
bool revisit_brokenfiles_if_build_config_newer = true;

// Revisit a source file if any of its include files changed content. Say
// foo.cc includes bar.h. Reprocess foo.cc with clang-tidy when bar.h changed,
// even if foo.cc is unchanged. This will find issues in which foo.cc relies
// on something bar.h provides.
// Usually good to keep on, but it can result in situations in which a header
// that is included by a lot of other files results in lots of reprocessing.
bool revisit_if_any_include_changes = true;
};

// --------------[ Project-specific configuration ]--------------
static constexpr ConfigValues kConfig = {
.cache_prefix = "verible_",
.file_exclude_re = ".git/|.github/" // stuff we're not interested in
"|vscode/" // some generated code in there
"|tree_operations_test" // very slow
"|symbol_table_test",
"|tree_operations_test" // very slow to process.
"|symbol_table_test", // ...
.toplevel_build_file = "MODULE.bazel",
.is_bazel_project = true,
};
// --------------------------------------------------------------

// Files to be considered.
// Files that look like relevant include files.
inline bool IsIncludeExtension(const std::string &extension) {
for (const std::string_view e : {".h", ".hpp", ".hxx", ".inl"}) {
if (extension == e) return true;
}
return false;
}

// Filter for source files to be considered.
inline bool ConsiderExtension(const std::string &extension) {
return extension == ".cc" || extension == ".cpp" || extension == ".h";
for (const std::string_view e : {".cc", ".cpp", ".cxx"}) {
if (extension == e) return true;
}
return IsIncludeExtension(extension);
}

// Configuration of clang-tidy itself.
Expand Down Expand Up @@ -289,6 +314,10 @@ class ClangTidyRunner {

// Use major version as part of name of our configuration specific dir.
auto version = GetCommandOutput(clang_tidy_ + " --version");
if (version.empty()) {
std::cerr << "Could not invoke " << clang_tidy_ << "; is it in PATH ?\n";
exit(EXIT_FAILURE);
}
std::smatch version_match;
const std::string major_version =
std::regex_search(version, version_match, std::regex{"version ([0-9]+)"})
Expand All @@ -303,16 +332,18 @@ class ClangTidyRunner {
}

// Fix filename paths found in logfiles that are not emitted relative to
// project root in the log (bazel has its own, so this fixes bazel-specific
// projects)
// project root in the log - remove that prefix.
// (bazel has its own, so if this is bazel, also bazel-specific fix up that).
static void RepairFilenameOccurences(const fs::path &infile,
const fs::path &outfile) {
static const std::regex sFixPathsRe = []() {
std::string canonicalize_expr = "(^|\\n)("; // fix names at start of line
auto root = GetCommandOutput("bazel info execution_root 2>/dev/null");
if (!root.empty()) {
root.pop_back(); // remove newline.
canonicalize_expr += root + "/|";
if (kConfig.is_bazel_project) {
auto bzroot = GetCommandOutput("bazel info execution_root 2>/dev/null");
if (!bzroot.empty()) {
bzroot.pop_back(); // remove newline.
canonicalize_expr += bzroot + "/|";
}
}
canonicalize_expr += fs::current_path().string() + "/"; // $(pwd)/
canonicalize_expr +=
Expand Down Expand Up @@ -362,46 +393,56 @@ class FileGatherer {
}
// Remember content hash of header, so that we can make changed headers
// influence the hash of a file including this.
if (extension == ".h") {
if (kConfig.revisit_if_any_include_changes &&
IsIncludeExtension(extension)) {
// Since the files might be included sloppily without prefix path,
// just keep track of the basename (but since there might be collisions,
// accomodate all of them by xor-ing the hashes).
const std::string just_basename = fs::path(file).filename();
const std::string just_basename = p.filename();
header_hashes[just_basename] ^= hashContent(GetContent(p));
}
}
std::cerr << files_of_interest_.size() << " files of interest.\n";

// Create content hash address. If any header a file depends on changes, we
// want to reprocess. So we make the hash dependent on header content as
// well.
// Create content hash address for the cache and build list of work items.
// If we want to revisit if headers changed, make hash dependent on them.
std::list<filepath_contenthash_t> work_queue;
const std::regex inc_re("\"([0-9a-zA-Z_/-]+\\.h)\""); // match include
for (filepath_contenthash_t &f : files_of_interest_) {
const auto content = GetContent(f.first);
f.second = hashContent(content);
for (ReIt it(content.begin(), content.end(), inc_re); it != ReIt();
++it) {
const std::string &header_path = (*it)[1].str();
const std::string header_basename = fs::path(header_path).filename();
f.second ^= header_hashes[header_basename];
const std::regex inc_re(
R"""(#\s*include\s+"([0-9a-zA-Z_/-]+\.[a-zA-Z]+)")""");
for (filepath_contenthash_t &work_file : files_of_interest_) {
const auto content = GetContent(work_file.first);
work_file.second = hashContent(content);
if (kConfig.revisit_if_any_include_changes) {
// Update the hash with all the hashes from all include files.
for (ReIt it(content.begin(), content.end(), inc_re); it != ReIt();
++it) {
const std::string &header_path = (*it)[1].str();
const std::string header_basename = fs::path(header_path).filename();
const auto found = header_hashes.find(header_basename);
if (found != header_hashes.end()) {
work_file.second ^= found->second;
}
}
}
// Recreate if we don't have it yet or if it contains messages but is
// older than WORKSPACE or compilation db. Maybe something got fixed.
if (store_.NeedsRefresh(f, min_freshness)) {
work_queue.emplace_back(f);
// Recreate if we don't have it yet or if it contains findings but is
// older than build environment. Maybe something got fixed: revisit file.
if (store_.NeedsRefresh(work_file, min_freshness)) {
work_queue.emplace_back(work_file);
}
}
return work_queue;
}

// Tally up findings for files of interest and assemble in one file.
// (BuildWorkList() needs to be called first).
size_t CreateReport(const fs::path &project_dir,
size_t CreateReport(const fs::path &cache_dir,
std::string_view symlink_detail,
std::string_view symlink_summary) const {
const fs::path tidy_outfile = project_dir / "tidy.out";
const fs::path tidy_summary = project_dir / "tidy-summary.out";
// Make it possible to keep independent reports for different invocation
// locations (e.g. two checkouts of the same project) using the same cache.
const std::string suffix = ToHex(hashContent(fs::current_path().string()));
const fs::path tidy_outfile = cache_dir / ("tidy.out-" + suffix);
const fs::path tidy_summary = cache_dir / ("tidy-summary.out-" + suffix);

// Assemble the separate outputs into a single file. Tally up per-check
const std::regex check_re("(\\[[a-zA-Z.-]+\\])\n");
Expand Down
Loading