From 213aeceb9f733dfd5a37b8a159762ee1c7a6dae7 Mon Sep 17 00:00:00 2001 From: Nils Werner <64034005+nils-werner-sonarsource@users.noreply.github.com> Date: Fri, 5 Nov 2021 11:18:24 +0100 Subject: [PATCH] SONARPY-928 Nested character classes are not supported by Python (#984) --- .../test/resources/expected/python-S5856.json | 17 ++--------------- pom.xml | 2 +- .../regex/duplicatesInCharacterClassCheck.py | 11 ++--------- .../resources/checks/regex/invalidRegexCheck.py | 12 +++++++++--- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/its/ruling/src/test/resources/expected/python-S5856.json b/its/ruling/src/test/resources/expected/python-S5856.json index c9b046f62d..9e48c65f87 100644 --- a/its/ruling/src/test/resources/expected/python-S5856.json +++ b/its/ruling/src/test/resources/expected/python-S5856.json @@ -1,21 +1,8 @@ { -'project:biopython/Bio/motifs/pfm.py':[ -338, -], -'project:mypy-0.782/test-data/stdlib-samples/3.2/glob.py':[ -76, -77, -], -'project:numpy-1.16.4/numpy/distutils/mingw32ccompiler.py':[ -53, -], -'project:tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py':[ -75, -], 'project:tornado-2.3/demos/appengine/markdown.py':[ -826, +822, ], 'project:tornado-2.3/demos/blog/markdown.py':[ -826, +822, ], } diff --git a/pom.xml b/pom.xml index b28dae84e2..3763cb3104 100644 --- a/pom.xml +++ b/pom.xml @@ -91,7 +91,7 @@ 3.9.0 8.9.0.43852 3.35.1.2719 - 1.21.0.821 + 1.21.0.829 6.0.0.32513 1.23 3.17.3 diff --git a/python-checks/src/test/resources/checks/regex/duplicatesInCharacterClassCheck.py b/python-checks/src/test/resources/checks/regex/duplicatesInCharacterClassCheck.py index b473da3991..cd39d80325 100644 --- a/python-checks/src/test/resources/checks/regex/duplicatesInCharacterClassCheck.py +++ b/python-checks/src/test/resources/checks/regex/duplicatesInCharacterClassCheck.py @@ -31,12 +31,6 @@ def non_compliant(input): re.match(r"[\"\".]", input) # Noncompliant re.match(r"[\x{F600}-\x{F637}\x{F608}]", input) # Noncompliant re.match(r"[\Qxx\E]", input) # Noncompliant - re.match(r"[[a][a]]", input) # Noncompliant - re.match(r"[[abc][b]]", input) # Noncompliant - re.match(r"[[^a]b]", input) # Noncompliant - re.match(r"[[^a]z]", input) # Noncompliant - re.match(r"[a[^z]]", input) # Noncompliant - re.match(r"[z[^a]]", input) # Noncompliant re.match(r"[\s\Sx]", input) # Noncompliant re.match(r"(?U)[\s\Sx]", input) # Noncompliant re.match(r"[\w\d]", input) # Noncompliant @@ -54,6 +48,8 @@ def non_compliant(input): re.match(r"(?i)[äÄ]", input) # Noncompliant re.match(r"(?i)[Ä-Üä]", input) # Noncompliant re.match(r"(?i)[a-Öö]", input) # Noncompliant + re.match(r"[[^\s\S]x]", input) # Noncompliant + re.match(r"(?U)[[^\W]a]", input) # Noncompliant def compliant(input): @@ -61,7 +57,6 @@ def compliant(input): re.match(r"[0-9][0-9]?", input) re.match(r"[xX]", input) re.match(r"[\s\S]", input) - re.match(r"[[^\s\S]x]", input) re.match(r"(?U)[\s\S]", input) re.match(r"(?U)[\S\u0085\u2028\u2029]", input) re.match(r"[\d\D]", input) @@ -85,8 +80,6 @@ def compliant(input): re.match(r"[z-a9-0]", input) # Illegal character class should not make the check explode re.match(r"[aa", input) # Check should not run on syntactically invalid regexen re.match(r"(?U)[\wä]", input) # False negative because we don't support Unicode characters in \w and \W - re.match(r"(?U)[[^\W]a]", input) # False negative because once we negate a character class whose contents we don't - # fully understand, we ignore it to avoid false positives re.match(r"[[a-z&&b-e]c]", input) # FN because we don't support intersections re.match(r"(?i)[A-_d-{]", input) # FN because we ignore case insensitivity unless both ends of the ranges are letters re.match(r"(?i)[A-z_]", input) # FN because A-z gets misinterpreted as A-Za-z due to the way we handle case insensitivity diff --git a/python-checks/src/test/resources/checks/regex/invalidRegexCheck.py b/python-checks/src/test/resources/checks/regex/invalidRegexCheck.py index f413436094..6ac6a8b6fa 100644 --- a/python-checks/src/test/resources/checks/regex/invalidRegexCheck.py +++ b/python-checks/src/test/resources/checks/regex/invalidRegexCheck.py @@ -25,6 +25,12 @@ def unsupported_feature(input): def false_positives(): - re.compile(r"\s*([ACGT])\s*[[]*[|]*\s*([0-9.\s]+)\s*[]]*\s*") # Noncompliant - re.compile(r'^\s+\[([\s*[0-9]*)\] ([a-zA-Z0-9_]*)') # Noncompliant - re.compile(r'([^,[\]]*)(\[([^\]]+)\])?$') # Noncompliant + re.compile(r''' + # Match tail of: [text][id] + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + \[ + (?P.*?) + \] + ''', re.X | re.S) + # Noncompliant@-5