Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use raw strings in some places #2604

Merged
merged 9 commits into from
Jun 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@

rd_auto_link <- function(x) {
x <- unlist(x)
x <- gsub("([a-zA-Z0-9.]+)::([a-zA-Z0-9._]+)\\(\\)", "\\\\code{\\\\link[\\1:\\2]{\\1::\\2()}}", x)
x <- gsub("([^:a-zA-Z0-9._])([a-zA-Z0-9._]+)\\(\\)", "\\1\\\\code{\\\\link[=\\2]{\\2()}}", x)
x <- gsub("`([^`]+)`", "\\\\code{\\1}", x)
x <- gsub(R"{([a-zA-Z0-9.]+)::([a-zA-Z0-9._]+)\(\)}", R"(\\code{\\link[\1:\2]{\1::\2()}})", x)
x <- gsub(R"{([^:a-zA-Z0-9._])([a-zA-Z0-9._]+)\(\)}", R"(\1\\code{\\link[=\2]{\2()}})", x)
x <- gsub("`([^`]+)`", R"(\\code{\1})", x)

Check warning on line 173 in R/zzz.R

View check run for this annotation

Codecov / codecov/patch

R/zzz.R#L171-L173

Added lines #L171 - L173 were not covered by tests
x
}

Expand All @@ -181,7 +181,7 @@
"The following functions are sometimes regarded as undesirable:",
"\\itemize{",
sprintf(
"\\item \\code{\\link[=%1$s]{%1$s()}} As an alternative, %2$s.",
R"(\item \code{\link[=%1$s]{%1$s()}} As an alternative, %2$s.)",

Check warning on line 184 in R/zzz.R

View check run for this annotation

Codecov / codecov/patch

R/zzz.R#L184

Added line #L184 was not covered by tests
names(default_undesirable_functions), alternatives
),
"}"
Expand Down
168 changes: 81 additions & 87 deletions tests/testthat/test-fixed_regex_linter.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
# NB: escaping is confusing. We have to double-escape everything -- the first
# escape creates a string that will be parse()d, the second escape is normal
# escaping that would be done in R code. E.g. in "\\\\.", the R code would
# read like "\\.", but in order to create those two slashes, we need to write
# "\\\\." in the string here.

test_that("fixed_regex_linter skips allowed usages", {
linter <- fixed_regex_linter()

expect_lint("gsub('^x', '', y)", NULL, linter)
expect_lint("grep('x$', '', y)", NULL, linter)
expect_lint("sub('[a-zA-Z]', '', y)", NULL, linter)
expect_lint("grepl(fmt, y)", NULL, linter)
expect_lint("regexec('\\\\s', '', y)", NULL, linter)
expect_lint(R"{regexec('\\s', '', y)}", NULL, linter)
expect_lint("grep('a(?=b)', x, perl = TRUE)", NULL, linter)
expect_lint("grep('0+1', x, perl = TRUE)", NULL, linter)
expect_lint("grep('1*2', x)", NULL, linter)
expect_lint("grep('a|b', x)", NULL, linter)
expect_lint("grep('\\\\[|\\\\]', x)", NULL, linter)
expect_lint(R"{grep('\\[|\\]', x)}", NULL, linter)

# if fixed=TRUE is already set, regex patterns don't matter
expect_lint("gsub('\\\\.', '', y, fixed = TRUE)", NULL, linter)
expect_lint(R"{gsub('\\.', '', y, fixed = TRUE)}", NULL, linter)

# ignore.case=TRUE implies regex interpretation
expect_lint("gsub('abcdefg', '', y, ignore.case = TRUE)", NULL, linter)
Expand All @@ -37,10 +31,10 @@ test_that("fixed_regex_linter blocks simple disallowed usages", {
linter <- fixed_regex_linter()
lint_msg <- rex::rex("This regular expression is static")

expect_lint("gsub('\\\\.', '', x)", lint_msg, linter)
expect_lint(R"{gsub('\\.', '', x)}", lint_msg, linter)
expect_lint("grepl('abcdefg', x)", lint_msg, linter)
expect_lint("gregexpr('a-z', y)", lint_msg, linter)
expect_lint("regexec('\\\\$', x)", lint_msg, linter)
expect_lint(R"{regexec('\\$', x)}", lint_msg, linter)
expect_lint("grep('\n', x)", lint_msg, linter)

# naming the argument doesn't matter (if it's still used positionally)
Expand All @@ -51,21 +45,21 @@ patrick::with_parameters_test_that(
"fixed_regex_linter is robust to unrecognized escapes error",
{
expect_lint(
sprintf("grep('\\\\%s', x)", char),
sprintf(R"{grep('\\%s', x)}", char),
rex::rex("This regular expression is static"),
fixed_regex_linter()
)

expect_lint(
sprintf("strsplit('a%sb', '\\\\%s')", char, char),
sprintf(R"{strsplit('a%sb', '\\%s')}", char, char),
rex::rex("This regular expression is static"),
fixed_regex_linter()
)
},
.cases = local({
char <- c(
"^", "$", "{", "}", "(", ")", ".", "*", "+", "?",
"|", "[", "]", "\\\\", "<", ">", "=", ":", ";", "/",
"|", "[", "]", R"(\\)", "<", ">", "=", ":", ";", "/",
"_", "-", "!", "@", "#", "%", "&", "~"
)
data.frame(char = char, .test_name = char)
Expand All @@ -87,7 +81,7 @@ test_that("fixed_regex_linter catches null calls to strsplit as well", {
linter <- fixed_regex_linter()

expect_lint("strsplit(x, '^x')", NULL, linter)
expect_lint("strsplit(x, '\\\\s')", NULL, linter)
expect_lint(R"{strsplit(x, '\\s')}", NULL, linter)
expect_lint("strsplit(x, 'a(?=b)', perl = TRUE)", NULL, linter)
expect_lint("strsplit(x, '0+1', perl = TRUE)", NULL, linter)
expect_lint("strsplit(x, 'a|b')", NULL, linter)
Expand All @@ -97,15 +91,15 @@ test_that("fixed_regex_linter catches null calls to strsplit as well", {
expect_lint("tstrsplit(x, fmt)", NULL, linter)

# if fixed=TRUE is already set, regex patterns don't matter
expect_lint("strsplit(x, '\\\\.', fixed = TRUE)", NULL, linter)
expect_lint("strsplit(x, '\\\\.', fixed = T)", NULL, linter)
expect_lint(R"{strsplit(x, '\\.', fixed = TRUE)}", NULL, linter)
expect_lint(R"{strsplit(x, '\\.', fixed = T)}", NULL, linter)
})

test_that("fixed_regex_linter catches calls to strsplit as well", {
linter <- fixed_regex_linter()
lint_msg <- rex::rex("This regular expression is static")

expect_lint("strsplit(x, '\\\\.')", lint_msg, linter)
expect_lint(R"{strsplit(x, '\\.')}", lint_msg, linter)
expect_lint("strsplit(x, '[.]')", lint_msg, linter)

expect_lint("tstrsplit(x, 'abcdefg')", lint_msg, linter)
Expand All @@ -115,8 +109,8 @@ test_that("fixed_regex_linter is more exact about distinguishing \\s from \\:",
linter <- fixed_regex_linter()
lint_msg <- rex::rex("This regular expression is static")

expect_lint("grep('\\\\s', '', x)", NULL, linter)
expect_lint("grep('\\\\:', '', x)", lint_msg, linter)
expect_lint(R"{grep('\\s', '', x)}", NULL, linter)
expect_lint(R"{grep('\\:', '', x)}", lint_msg, linter)
})

## tests for stringr functions
Expand All @@ -126,12 +120,12 @@ test_that("fixed_regex_linter skips allowed stringr usages", {
expect_lint("str_replace(y, '[a-zA-Z]', '')", NULL, linter)
expect_lint("str_replace_all(y, '^x', '')", NULL, linter)
expect_lint("str_detect(y, fmt)", NULL, linter)
expect_lint("str_extract(y, '\\\\s')", NULL, linter)
expect_lint("str_extract_all(y, '\\\\s')", NULL, linter)
expect_lint(R"{str_extract(y, '\\s')}", NULL, linter)
expect_lint(R"{str_extract_all(y, '\\s')}", NULL, linter)
expect_lint("str_which(x, '1*2')", NULL, linter)

# if fixed() is already set, regex patterns don't matter
expect_lint("str_replace(y, fixed('\\\\.'), '')", NULL, linter)
expect_lint(R"{str_replace(y, fixed('\\.'), '')}", NULL, linter)

# namespace qualification doesn't matter
expect_lint("stringr::str_replace(y, stringr::fixed('abcdefg'), '')", NULL, linter)
Expand All @@ -141,16 +135,16 @@ test_that("fixed_regex_linter blocks simple disallowed usages of stringr functio
linter <- fixed_regex_linter()
lint_msg <- rex::rex("This regular expression is static")

expect_lint("str_replace_all(x, '\\\\.', '')", lint_msg, linter)
expect_lint(R"{str_replace_all(x, '\\.', '')}", lint_msg, linter)
expect_lint("str_detect(x, 'abcdefg')", lint_msg, linter)
expect_lint("str_locate(y, 'a-z')", lint_msg, linter)
expect_lint("str_subset(x, '\\\\$')", lint_msg, linter)
expect_lint(R"{str_subset(x, '\\$')}", lint_msg, linter)
expect_lint("str_which(x, '\n')", lint_msg, linter)

# named, positional arguments are still caught
expect_lint("str_locate(y, pattern = 'a-z')", lint_msg, linter)
# nor do other named arguments throw things off
expect_lint("str_starts(x, '\\\\.', negate = TRUE)", lint_msg, linter)
expect_lint(R"{str_starts(x, '\\.', negate = TRUE)}", lint_msg, linter)
})

test_that("fixed_regex_linter catches calls to str_split as well", {
Expand All @@ -161,8 +155,8 @@ test_that("fixed_regex_linter catches calls to str_split as well", {
expect_lint("str_split(x, fmt)", NULL, linter)

# if fixed() is already set, regex patterns don't matter
expect_lint("str_split(x, fixed('\\\\.'))", NULL, linter)
expect_lint("str_split(x, '\\\\.')", lint_msg, linter)
expect_lint(R"{str_split(x, fixed('\\.'))}", NULL, linter)
expect_lint(R"{str_split(x, '\\.')}", lint_msg, linter)
expect_lint("str_split(x, '[.]')", lint_msg, linter)
})

Expand All @@ -187,24 +181,24 @@ test_that("one-character character classes with escaped characters are caught",
linter <- fixed_regex_linter()
lint_msg <- rex::rex("This regular expression is static")

expect_lint("gsub('[\\n]', '', x)", lint_msg, linter)
expect_lint("gsub('[\\\"]', '', x)", lint_msg, linter)
expect_lint('gsub("\\\\<", "x", x, perl = TRUE)', lint_msg, linter)

expect_lint("str_split(x, '[\\1]')", lint_msg, linter)
expect_lint("str_split(x, '[\\12]')", lint_msg, linter)
expect_lint("str_split(x, '[\\123]')", lint_msg, linter)
expect_lint("str_split(x, '[\\xa]')", lint_msg, linter)
expect_lint("str_split(x, '[\\x32]')", lint_msg, linter)
expect_lint("str_split(x, '[\\uF]')", lint_msg, linter)
expect_lint("str_split(x, '[\\u01]')", lint_msg, linter)
expect_lint("str_split(x, '[\\u012]')", lint_msg, linter)
expect_lint("str_split(x, '[\\u0123]')", lint_msg, linter)
expect_lint("str_split(x, '[\\U8]')", lint_msg, linter)
expect_lint("str_split(x, '[\\U1d4d7]')", lint_msg, linter)
expect_lint("str_split(x, '[\\u{1}]')", lint_msg, linter)
expect_lint("str_split(x, '[\\U{F7D5}]')", lint_msg, linter)
expect_lint("str_split(x, '[\\U{1D4D7}]')", lint_msg, linter)
expect_lint(R"{gsub('[\n]', '', x)}", lint_msg, linter)
expect_lint(R"{gsub('[\"]', '', x)}", lint_msg, linter)
expect_lint(R'{gsub("\\<", "x", x, perl = TRUE)}', lint_msg, linter)

expect_lint(R"{str_split(x, '[\1]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\12]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\123]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\xa]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\x32]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\uF]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\u01]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\u012]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\u0123]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\U8]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\U1d4d7]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\u{1}]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\U{F7D5}]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\U{1D4D7}]')}", lint_msg, linter)
})

test_that("bracketed unicode escapes are caught", {
Expand All @@ -218,15 +212,15 @@ test_that("bracketed unicode escapes are caught", {

test_that("escaped characters are handled correctly", {
linter <- fixed_regex_linter()
expect_lint("gsub('\\n+', '', sql)", NULL, linter)
expect_lint(R"{gsub('\n+', '', sql)}", NULL, linter)
expect_lint('gsub("\\n{2,}", "\n", D)', NULL, linter)
expect_lint('gsub("[\\r\\n]", "", x)', NULL, linter)
expect_lint('gsub("\\n $", "", y)', NULL, linter)
expect_lint('gsub("```\\n*```r*\\n*", "", x)', NULL, linter)
expect_lint(R'{gsub("[\r\n]", "", x)}', NULL, linter)
expect_lint(R'{gsub("\n $", "", y)}', NULL, linter)
expect_lint(R'{gsub("```\n*```r*\n*", "", x)}', NULL, linter)
expect_lint('strsplit(x, "(;|\n)")', NULL, linter)
expect_lint('strsplit(x, "(;|\\n)")', NULL, linter)
expect_lint('grepl("[\\\\W]", x, perl = TRUE)', NULL, linter)
expect_lint('grepl("[\\\\W]", x)', NULL, linter)
expect_lint(R'{strsplit(x, "(;|\n)")}', NULL, linter)
expect_lint(R'{grepl("[\\W]", x, perl = TRUE)}', NULL, linter)
expect_lint(R'{grepl("[\\W]", x)}', NULL, linter)
})

# make sure the logic is properly vectorized
Expand All @@ -238,10 +232,10 @@ test_that("fixed replacements vectorize and recognize str_detect", {
linter <- fixed_regex_linter()
# properly vectorized
expect_lint(
trim_some("{
trim_some(R"({
grepl('abcdefg', x)
grepl('a[.]\\\\.b\\n', x)
}"),
grepl('a[.]\\.b\n', x)
})"),
list(
list(rex::rex('Use "abcdefg" with fixed = TRUE'), line_number = 2L),
list(rex::rex('Use "a..b\\n" with fixed = TRUE'), line_number = 3L)
Expand Down Expand Up @@ -289,37 +283,37 @@ robust_non_printable_unicode <- function() {
# styler: off
local({
.cases <- tibble::tribble(
~.test_name, ~regex_expr, ~fixed_expr,
"[.]", "[.]", ".",
'[\\\"]', '[\\\"]', '\\"',
"[]]", "[]]", "]",
"\\\\.", "\\\\.", ".",
"\\\\:", "\\\\:", ":",
"\\\\<", "\\\\<", "<",
"\\\\$", "\\\\$", "$",
"[\\1]", "[\\1]", "\\001",
"\\1", "\\1", "\\001",
"[\\12]", "[\\12]", "\\n",
"[\\123]", "[\\123]", "S",
"a[*]b", "a[*]b", "a*b",
"abcdefg", "abcdefg", "abcdefg",
"abc\\U{A0DEF}ghi", "abc\\U{A0DEF}ghi", robust_non_printable_unicode(),
"a-z", "a-z", "a-z",
"[\\n]", "[\\n]", "\\n",
"\\n", "\n", "\\n",
"[\\u01]", "[\\u01]", "\\001",
"[\\u012]", "[\\u012]", "\\022",
"[\\u0123]", "[\\u0123]", "\u0123",
"[\\u{1}]", "[\\u{1}]", "\\001",
"[\\U1d4d7]", "[\\U1d4d7]", "\U1D4D7",
"[\\U{1D4D7}]", "[\\U{1D4D7}]", "\U1D4D7",
"[\\U8]", "[\\U8]", "\\b",
"\\u{A0}", "\\u{A0}", "\uA0",
"\\u{A0}\\U{0001d4d7}", "\\u{A0}\\U{0001d4d7}", "\uA0\U1D4D7",
"[\\uF]", "[\\uF]", "\\017",
"[\\U{F7D5}]", "[\\U{F7D5}]", "\UF7D5",
"[\\x32]", "[\\x32]", "2",
"[\\xa]", "[\\xa]", "\\n"
~.test_name, ~regex_expr, ~fixed_expr,
"[.]", "[.]", ".",
'[\\\"]', R'([\"])', '\\"',
"[]]", "[]]", "]",
R"(\\.)", R"(\\.)", ".",
R"(\\:)", R"(\\:)", ":",
R"(\\<)", R"(\\<)", "<",
R"(\\$)", R"(\\$)", "$",
R"([\1])", R"([\1])", R"(\001)",
R"(\1)", R"(\1)", R"(\001)",
R"([\12])", R"([\12])", R"(\n)",
R"([\123])", R"([\123])", "S",
"a[*]b", "a[*]b", "a*b",
"abcdefg", "abcdefg", "abcdefg",
"abc\\U{A0DEF}ghi", "abc\\U{A0DEF}ghi", robust_non_printable_unicode(),
"a-z", "a-z", "a-z",
R"([\n])", R"([\n])", R"(\n)",
R"(\n)", "\n", R"(\n)",
R"([\u01])", R"([\u01])", R"(\001)",
R"([\u012])", R"([\u012])", R"(\022)",
R"([\u0123])", R"([\u0123])", "\u0123",
R"([\u{1}])", R"([\u{1}])", R"(\001)",
R"([\U1d4d7])", R"([\U1d4d7])", "\U1D4D7",
R"([\U{1D4D7}])", R"([\U{1D4D7}])", "\U1D4D7",
R"([\U8])", R"([\U8])", R"(\b)",
R"(\u{A0})", R"(\u{A0})", "\uA0",
R"(\u{A0}\U{0001d4d7})", R"(\u{A0}\U{0001d4d7})", "\uA0\U1D4D7",
R"([\uF])", R"([\uF])", R"(\017)",
R"([\U{F7D5}])", R"([\U{F7D5}])", "\UF7D5",
R"([\x32])", R"([\x32])", "2",
R"([\xa])", R"([\xa])", R"(\n)"
)
if (.Platform$OS.type == "windows" && !hasName(R.Version(), "crt")) {
skip_cases <- c(
Expand Down
Loading