Skip to content

Commit

Permalink
GHA: detect "bad" words in documentation
Browse files Browse the repository at this point in the history
As we do in the source repo

Closes #353
  • Loading branch information
bagder committed Jun 7, 2024
1 parent e76e0a9 commit 6081a09
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .github/scripts/badwords.ok
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# whitelisted uses of bad words
# file:line:rule
docs/CVE-2020-19909.md:32:\bwill\b
docs/CVE-2023-38546.md:36:file name\b
94 changes: 94 additions & 0 deletions .github/scripts/badwords.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/perl
# Copyright (C) Daniel Stenberg, <[email protected]>, et al.
#
# SPDX-License-Identifier: curl
#
# bad[:=]correct
#
# If separator is '=', the string will be compared case sensitively.
# If separator is ':', the check is done case insensitively.
#

my %wl;
if($ARGV[0] eq "-w") {
shift @ARGV;
my $file = shift @ARGV;
open(W, "<$file");
while(<W>) {
if(/^#/) {
# allow #-comments
next;
}
if(/^([^:]*):(\d+):(.*)/) {
$wl{"$1:$2:$3"}=1;
#print STDERR "whitelisted $1:$2:$3\n";
}
}
close(W);
}

my $w;
while(<STDIN>) {
chomp;
if($_ =~ /^#/) {
next;
}
if($_ =~ /^([^:=]*)([:=])(.*)/) {
my ($bad, $sep, $better)=($1, $2, $3);
push @w, $bad;
$alt{$bad} = $better;
if($sep eq "=") {
$exactcase{$bad} = 1;
}
}
}

my $errors;

sub file {
my ($f) = @_;
my $l = 0;
open(F, "<$f");
while(<F>) {
my $in = $_;
$l++;
chomp $in;
if($in =~ /^ /) {
next;
}
# remove the link part
$in =~ s/(\[.*\])\(.*\)/$1/g;
# remove backticked texts
$in =~ s/\`.*\`//g;
foreach my $w (@w) {
my $case = $exactcase{$w};
if(($in =~ /^(.*)$w/i && !$case) ||
($in =~ /^(.*)$w/ && $case) ) {
my $p = $1;
my $c = length($p)+1;

my $ch = "$f:$l:$w";
if($wl{$ch}) {
# whitelisted
print STDERR "$ch found but whitelisted\n";
next;
}

print STDERR "$f:$l:$c: error: found bad word \"$w\"\n";
printf STDERR " %4d | $in\n", $l;
printf STDERR " | %*s^%s\n", length($p), " ",
"~" x (length($w)-1);
printf STDERR " maybe use \"%s\" instead?\n", $alt{$w};
$errors++;
}
}
}
close(F);
}

my @files = @ARGV;

foreach my $each (@files) {
file($each);
}
exit $errors;
50 changes: 50 additions & 0 deletions .github/scripts/badwords.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (C) Daniel Stenberg, <[email protected]>, et al.
#
# SPDX-License-Identifier: curl
#
back-end:backend
e-mail:email
run-time:runtime
set-up:setup
tool chain:toolchain
tool-chain:toolchain
wild-card:wildcard
wild card:wildcard
i'm:I am
you've:You have
they've:They have
they're:They are
should've:should have
don't:do not
could've:could have
doesn't:does not
isn't:is not
a html: an html
a http: an http
a ftp: an ftp
url =URL
internet\b=Internet
isation:ization
it's:it is
there's:there is
[^.]\. And: Rewrite it somehow?
^(And|So|But) = Rewrite it somehow?
\. But: Rewrite it somehow?
\. So : Rewrite without "so" ?
dir :directory
you'd:you would
you'll:you will
can't:cannot
that's:that is
web page:webpage
host name\b:hostname
host names\b:hostnames
file name\b:filename
file names\b:filenames
\buser name\b:username
\buser names\b:usernames
didn't:did not
doesn't:does not
won't:will not
couldn't:could not
\bwill\b:rewrite to present tense
29 changes: 29 additions & 0 deletions .github/workflows/badwords.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (C) Daniel Stenberg, <[email protected]>, et al.
#
# SPDX-License-Identifier: curl

name: badwords

on:
# Trigger the workflow on push or pull requests, but only for the
# master branch
push:
branches:
- master
- '*/ci'
pull_request:
branches:
- master

permissions: {}

jobs:
check:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4

- name: check
run: ./.github/scripts/badwords.pl -w ./.github/scripts/badwords.ok < .github/scripts/badwords.txt docs/*.md

0 comments on commit 6081a09

Please sign in to comment.