From 213aeceb9f733dfd5a37b8a159762ee1c7a6dae7 Mon Sep 17 00:00:00 2001
From: Nils Werner <64034005+nils-werner-sonarsource@users.noreply.github.com>
Date: Fri, 5 Nov 2021 11:18:24 +0100
Subject: [PATCH] SONARPY-928 Nested character classes are not supported by
Python (#984)
---
.../test/resources/expected/python-S5856.json | 17 ++---------------
pom.xml | 2 +-
.../regex/duplicatesInCharacterClassCheck.py | 11 ++---------
.../resources/checks/regex/invalidRegexCheck.py | 12 +++++++++---
4 files changed, 14 insertions(+), 28 deletions(-)
diff --git a/its/ruling/src/test/resources/expected/python-S5856.json b/its/ruling/src/test/resources/expected/python-S5856.json
index c9b046f62d..9e48c65f87 100644
--- a/its/ruling/src/test/resources/expected/python-S5856.json
+++ b/its/ruling/src/test/resources/expected/python-S5856.json
@@ -1,21 +1,8 @@
{
-'project:biopython/Bio/motifs/pfm.py':[
-338,
-],
-'project:mypy-0.782/test-data/stdlib-samples/3.2/glob.py':[
-76,
-77,
-],
-'project:numpy-1.16.4/numpy/distutils/mingw32ccompiler.py':[
-53,
-],
-'project:tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py':[
-75,
-],
'project:tornado-2.3/demos/appengine/markdown.py':[
-826,
+822,
],
'project:tornado-2.3/demos/blog/markdown.py':[
-826,
+822,
],
}
diff --git a/pom.xml b/pom.xml
index b28dae84e2..3763cb3104 100644
--- a/pom.xml
+++ b/pom.xml
@@ -91,7 +91,7 @@
3.9.0
8.9.0.43852
3.35.1.2719
- 1.21.0.821
+ 1.21.0.829
6.0.0.32513
1.23
3.17.3
diff --git a/python-checks/src/test/resources/checks/regex/duplicatesInCharacterClassCheck.py b/python-checks/src/test/resources/checks/regex/duplicatesInCharacterClassCheck.py
index b473da3991..cd39d80325 100644
--- a/python-checks/src/test/resources/checks/regex/duplicatesInCharacterClassCheck.py
+++ b/python-checks/src/test/resources/checks/regex/duplicatesInCharacterClassCheck.py
@@ -31,12 +31,6 @@ def non_compliant(input):
re.match(r"[\"\".]", input) # Noncompliant
re.match(r"[\x{F600}-\x{F637}\x{F608}]", input) # Noncompliant
re.match(r"[\Qxx\E]", input) # Noncompliant
- re.match(r"[[a][a]]", input) # Noncompliant
- re.match(r"[[abc][b]]", input) # Noncompliant
- re.match(r"[[^a]b]", input) # Noncompliant
- re.match(r"[[^a]z]", input) # Noncompliant
- re.match(r"[a[^z]]", input) # Noncompliant
- re.match(r"[z[^a]]", input) # Noncompliant
re.match(r"[\s\Sx]", input) # Noncompliant
re.match(r"(?U)[\s\Sx]", input) # Noncompliant
re.match(r"[\w\d]", input) # Noncompliant
@@ -54,6 +48,8 @@ def non_compliant(input):
re.match(r"(?i)[äÄ]", input) # Noncompliant
re.match(r"(?i)[Ä-Üä]", input) # Noncompliant
re.match(r"(?i)[a-Öö]", input) # Noncompliant
+ re.match(r"[[^\s\S]x]", input) # Noncompliant
+ re.match(r"(?U)[[^\W]a]", input) # Noncompliant
def compliant(input):
@@ -61,7 +57,6 @@ def compliant(input):
re.match(r"[0-9][0-9]?", input)
re.match(r"[xX]", input)
re.match(r"[\s\S]", input)
- re.match(r"[[^\s\S]x]", input)
re.match(r"(?U)[\s\S]", input)
re.match(r"(?U)[\S\u0085\u2028\u2029]", input)
re.match(r"[\d\D]", input)
@@ -85,8 +80,6 @@ def compliant(input):
re.match(r"[z-a9-0]", input) # Illegal character class should not make the check explode
re.match(r"[aa", input) # Check should not run on syntactically invalid regexen
re.match(r"(?U)[\wä]", input) # False negative because we don't support Unicode characters in \w and \W
- re.match(r"(?U)[[^\W]a]", input) # False negative because once we negate a character class whose contents we don't
- # fully understand, we ignore it to avoid false positives
re.match(r"[[a-z&&b-e]c]", input) # FN because we don't support intersections
re.match(r"(?i)[A-_d-{]", input) # FN because we ignore case insensitivity unless both ends of the ranges are letters
re.match(r"(?i)[A-z_]", input) # FN because A-z gets misinterpreted as A-Za-z due to the way we handle case insensitivity
diff --git a/python-checks/src/test/resources/checks/regex/invalidRegexCheck.py b/python-checks/src/test/resources/checks/regex/invalidRegexCheck.py
index f413436094..6ac6a8b6fa 100644
--- a/python-checks/src/test/resources/checks/regex/invalidRegexCheck.py
+++ b/python-checks/src/test/resources/checks/regex/invalidRegexCheck.py
@@ -25,6 +25,12 @@ def unsupported_feature(input):
def false_positives():
- re.compile(r"\s*([ACGT])\s*[[]*[|]*\s*([0-9.\s]+)\s*[]]*\s*") # Noncompliant
- re.compile(r'^\s+\[([\s*[0-9]*)\] ([a-zA-Z0-9_]*)') # Noncompliant
- re.compile(r'([^,[\]]*)(\[([^\]]+)\])?$') # Noncompliant
+ re.compile(r'''
+ # Match tail of: [text][id]
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+ \[
+ (?P.*?)
+ \]
+ ''', re.X | re.S)
+ # Noncompliant@-5