diff --git a/.github/scripts/badwords.ok b/.github/scripts/badwords.ok new file mode 100644 index 0000000000..a7dfa3af15 --- /dev/null +++ b/.github/scripts/badwords.ok @@ -0,0 +1,4 @@ +# whitelisted uses of bad words +# file:line:rule +docs/CVE-2020-19909.md:32:\bwill\b +docs/CVE-2023-38546.md:36:file name\b diff --git a/.github/scripts/badwords.pl b/.github/scripts/badwords.pl new file mode 100755 index 0000000000..512f805524 --- /dev/null +++ b/.github/scripts/badwords.pl @@ -0,0 +1,94 @@ +#!/usr/bin/perl +# Copyright (C) Daniel Stenberg, , et al. +# +# SPDX-License-Identifier: curl +# +# bad[:=]correct +# +# If separator is '=', the string will be compared case sensitively. +# If separator is ':', the check is done case insensitively. +# + +my %wl; +if($ARGV[0] eq "-w") { + shift @ARGV; + my $file = shift @ARGV; + open(W, "<$file"); + while() { + if(/^#/) { + # allow #-comments + next; + } + if(/^([^:]*):(\d+):(.*)/) { + $wl{"$1:$2:$3"}=1; + #print STDERR "whitelisted $1:$2:$3\n"; + } + } + close(W); +} + +my $w; +while() { + chomp; + if($_ =~ /^#/) { + next; + } + if($_ =~ /^([^:=]*)([:=])(.*)/) { + my ($bad, $sep, $better)=($1, $2, $3); + push @w, $bad; + $alt{$bad} = $better; + if($sep eq "=") { + $exactcase{$bad} = 1; + } + } +} + +my $errors; + +sub file { + my ($f) = @_; + my $l = 0; + open(F, "<$f"); + while() { + my $in = $_; + $l++; + chomp $in; + if($in =~ /^ /) { + next; + } + # remove the link part + $in =~ s/(\[.*\])\(.*\)/$1/g; + # remove backticked texts + $in =~ s/\`.*\`//g; + foreach my $w (@w) { + my $case = $exactcase{$w}; + if(($in =~ /^(.*)$w/i && !$case) || + ($in =~ /^(.*)$w/ && $case) ) { + my $p = $1; + my $c = length($p)+1; + + my $ch = "$f:$l:$w"; + if($wl{$ch}) { + # whitelisted + print STDERR "$ch found but whitelisted\n"; + next; + } + + print STDERR "$f:$l:$c: error: found bad word \"$w\"\n"; + printf STDERR " %4d | $in\n", $l; + printf STDERR " | %*s^%s\n", length($p), " ", + "~" x (length($w)-1); + printf STDERR " maybe use \"%s\" instead?\n", $alt{$w}; + $errors++; + } + } + } + close(F); +} + +my @files = @ARGV; + +foreach my $each (@files) { + file($each); +} +exit $errors; diff --git a/.github/scripts/badwords.txt b/.github/scripts/badwords.txt new file mode 100644 index 0000000000..2942ce8d8a --- /dev/null +++ b/.github/scripts/badwords.txt @@ -0,0 +1,50 @@ +# Copyright (C) Daniel Stenberg, , et al. +# +# SPDX-License-Identifier: curl +# +back-end:backend +e-mail:email +run-time:runtime +set-up:setup +tool chain:toolchain +tool-chain:toolchain +wild-card:wildcard +wild card:wildcard +i'm:I am +you've:You have +they've:They have +they're:They are +should've:should have +don't:do not +could've:could have +doesn't:does not +isn't:is not + a html: an html + a http: an http + a ftp: an ftp + url =URL +internet\b=Internet +isation:ization +it's:it is +there's:there is +[^.]\. And: Rewrite it somehow? +^(And|So|But) = Rewrite it somehow? +\. But: Rewrite it somehow? +\. So : Rewrite without "so" ? + dir :directory +you'd:you would +you'll:you will +can't:cannot +that's:that is +web page:webpage +host name\b:hostname +host names\b:hostnames +file name\b:filename +file names\b:filenames +\buser name\b:username +\buser names\b:usernames +didn't:did not +doesn't:does not +won't:will not +couldn't:could not +\bwill\b:rewrite to present tense diff --git a/.github/workflows/badwords.yml b/.github/workflows/badwords.yml new file mode 100644 index 0000000000..87c5924feb --- /dev/null +++ b/.github/workflows/badwords.yml @@ -0,0 +1,29 @@ +# Copyright (C) Daniel Stenberg, , et al. +# +# SPDX-License-Identifier: curl + +name: badwords + +on: + # Trigger the workflow on push or pull requests, but only for the + # master branch + push: + branches: + - master + - '*/ci' + pull_request: + branches: + - master + +permissions: {} + +jobs: + check: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4 + + - name: check + run: ./.github/scripts/badwords.pl -w ./.github/scripts/badwords.ok < .github/scripts/badwords.txt docs/*.md