Skip to content

Commit

Permalink
feat: print hex of unicode
Browse files Browse the repository at this point in the history
Useful to detect unwanted characters in third party contributions
patches using a CI hook.
  • Loading branch information
ben-grande committed Mar 14, 2024
1 parent a6a5c8b commit 8382f9d
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,36 @@
repos:
- repo: local
hooks:

- id: unicode-prohibit
name: unicode-prohibit
entry: scripts/unicode-prohibit.sh
language: script
pass_filenames: true
description: Prohibit Unicode

- id: reuse
name: reuse
entry: reuse
args: [lint]
language: python
pass_filenames: false
description: Lint files to comply with the REUSE Specification

- id: salt-lint
name: salt-lint
entry: scripts/salt-lint.sh
language: script
pass_filenames: true
description: Lint Salt files

- id: shellcheck
name: shellcheck
entry: scripts/shell-lint.sh
language: script
pass_filenames: true
description: Lint Shellscripts

- id: gitlint
name: gitlint
language: python
Expand Down
45 changes: 45 additions & 0 deletions scripts/unicode-prohibit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/sh

## SPDX-FileCopyrightText: 2024 Benjamin Grande M. S. <[email protected]>
##
## SPDX-License-Identifier: AGPL-3.0-or-later
##
## Finds Unicode recursively and prints in hexadecimal format.

set -eu

command -v git >/dev/null ||
{ printf "Missing program: git\n" >&2; exit 1; }
cd "$(git rev-parse --show-toplevel)" || exit 1

files=""
if test -n "${1-}"; then
files="${*}"
if test -z "${files}"; then
exit 0
fi
fi

files="$(echo "${files}" | sort -u)"
# shellcheck disable=SC2086
unicode_match="$(grep -oPrHn --exclude-dir=.git --exclude-dir=LICENSES \
-e "[^\x00-\x7F]" -- ${files} || true)"

match_found=""
if test -n "${unicode_match}"; then
for line in ${unicode_match}; do
line_file="$(echo "${line}" | cut -d ":" -f1)"
case "${line_file}" in
git/*|LICENSES/*|.reuse/dep5|*.asc) continue;;
esac
line_number="$(echo "${line}" | cut -d ":" -f2)"
line_unicode="$(echo "${line}" | cut -d ":" -f3 | od -A n -vt c)"
echo "${line_file}:${line_number}:${line_unicode}"
match_found="1"
done
if test "${match_found}" = 1; then
exit 1
fi
fi

exit 0

0 comments on commit 8382f9d

Please sign in to comment.