Skip to content

Commit ea9d94e

Browse files
committed
Add code diagnostics (missing/extra commas)
1 parent 5e98b93 commit ea9d94e

File tree

4 files changed

+208
-2
lines changed

4 files changed

+208
-2
lines changed

DESCRIPTION

+3-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ Imports:
7171
xtable,
7272
digest,
7373
htmltools (>= 0.3),
74-
R6 (>= 2.0)
74+
R6 (>= 2.0),
75+
sourcetools
7576
Suggests:
7677
datasets,
7778
Cairo (>= 1.5-5),
@@ -92,6 +93,7 @@ Collate:
9293
'utils.R'
9394
'bootstrap.R'
9495
'cache.R'
96+
'diagnose.R'
9597
'fileupload.R'
9698
'stack.R'
9799
'graph.R'

R/diagnose.R

+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# Analyze an R file for possible extra or missing commas. Returns FALSE if any
2+
# problems detected, TRUE otherwise.
3+
diagnoseCode <- function(path = NULL, text = NULL) {
4+
if (!xor(is.null(path), is.null(text))) {
5+
stop("Must specify `path` or `text`, but not both.")
6+
}
7+
8+
if (!is.null(path)) {
9+
tokens <- sourcetools::tokenize_file(path)
10+
} else {
11+
tokens <- sourcetools::tokenize_string(text)
12+
}
13+
14+
find_scopes <- function(tokens) {
15+
# Strip whitespace and comments
16+
tokens <- tokens[!(tokens$type %in% c("whitespace", "comment")),]
17+
18+
# Replace various types of things with "value"
19+
tokens$type[tokens$type %in% c("string", "number", "symbol", "keyword")] <- "value"
20+
21+
# Record types for close and open brace/bracket/parens, and commas
22+
brace_idx <- tokens$value %in% c("(", ")", "{", "}", "[", "]", ",")
23+
tokens$type[brace_idx] <- tokens$value[brace_idx]
24+
25+
# Stack-related function for recording scope. Starting scope is "{"
26+
stack <- "{"
27+
push <- function(x) {
28+
stack <<- c(stack, x)
29+
}
30+
pop <- function() {
31+
if (length(stack) == 1) {
32+
# Stack underflow, but we need to keep going
33+
return(NA_character_)
34+
}
35+
res <- stack[length(stack)]
36+
stack <<- stack[-length(stack)]
37+
res
38+
}
39+
peek <- function() {
40+
stack[length(stack)]
41+
}
42+
43+
# First, establish a scope for each token. For opening and closing
44+
# braces/brackets/parens, the scope at that location is the *surrounding*
45+
# scope, not the new scope created by the brace/bracket/paren.
46+
for (i in seq_len(nrow(tokens))) {
47+
value <- tokens$value[i]
48+
49+
tokens$scope[i] <- peek()
50+
if (value %in% c("{", "(", "[")) {
51+
push(value)
52+
53+
} else if (value == "}") {
54+
if (!identical(pop(), "{"))
55+
tokens$err[i] <- "unmatched_brace"
56+
# For closing brace/paren/bracket, get the scope after popping
57+
tokens$scope[i] <- peek()
58+
59+
} else if (value == ")") {
60+
if (!identical(pop(), "("))
61+
tokens$err[i] <- "unmatched_paren"
62+
tokens$scope[i] <- peek()
63+
64+
} else if (value == "]") {
65+
if (!identical(pop(), "["))
66+
tokens$err[i] <- "unmatched_bracket"
67+
tokens$scope[i] <- peek()
68+
}
69+
}
70+
71+
tokens
72+
}
73+
74+
check_commas <- function(tokens) {
75+
# Find extra and missing commas
76+
tokens$err <- mapply(
77+
tokens$type,
78+
c("", tokens$type[-length(tokens$type)]),
79+
c(tokens$type[-1], ""),
80+
tokens$scope,
81+
tokens$err,
82+
SIMPLIFY = FALSE,
83+
FUN = function(type, prevType, nextType, scope, err) {
84+
# If an error was already found, just return it. This could have
85+
# happened in the brace/paren/bracket matching phase.
86+
if (!is.na(err)) {
87+
return(err)
88+
}
89+
if (is.na(scope)) {
90+
if (type == "}") return("unmatched_brace")
91+
else if (type == ")") return("unmatched_paren")
92+
else if (type == "]") return("unmatched_bracket")
93+
94+
} else if (scope == "(") {
95+
96+
if ((prevType == "(" && type == ",") ||
97+
(type == "," && nextType == ")") ||
98+
(prevType == "," && type == ",")) {
99+
return("extra_comma")
100+
}
101+
102+
if ((prevType == ")" && type == "value") ||
103+
(prevType == "value" && type == "value")) {
104+
return("missing_comma")
105+
}
106+
}
107+
108+
NA_character_
109+
}
110+
)
111+
112+
tokens
113+
}
114+
115+
116+
tokens$err <- NA_character_
117+
tokens <- find_scopes(tokens)
118+
tokens <- check_commas(tokens)
119+
120+
if (all(is.na(tokens$err))) {
121+
# No errors found
122+
return(TRUE)
123+
124+
} else {
125+
# If any errors were found, print messages
126+
if (!is.null(path)) {
127+
lines <- readLines(path)
128+
} else {
129+
lines <- strsplit(text, "\n")[[1]]
130+
}
131+
132+
# Print out the line of code with the error, and point to the column with
133+
# the error.
134+
show_code_error <- function(msg, lines, row, col) {
135+
message(paste0(
136+
msg, "\n",
137+
row, ":", lines[row], "\n",
138+
paste0(rep.int(" ", nchar(as.character(row)) + 1), collapse = ""),
139+
gsub(perl = TRUE, "[^\\s]", " ", substr(lines[row], 1, col-1)), "^"
140+
))
141+
}
142+
143+
err_idx <- which(!is.na(tokens$err))
144+
msg <- ""
145+
for (i in err_idx) {
146+
row <- tokens$row[i]
147+
col <- tokens$column[i]
148+
err <- tokens$err[i]
149+
150+
if (err == "missing_comma") {
151+
show_code_error("Possible missing comma at:", lines, row, col)
152+
} else if (err == "extra_comma") {
153+
show_code_error("Possible extra comma at:", lines, row, col)
154+
} else if (err == "unmatched_brace") {
155+
show_code_error("Possible unmatched '}' at:", lines, row, col)
156+
} else if (err == "unmatched_paren") {
157+
show_code_error("Possible unmatched ')' at:", lines, row, col)
158+
} else if (err == "unmatched_bracket") {
159+
show_code_error("Possible unmatched ']' at:", lines, row, col)
160+
}
161+
}
162+
return(FALSE)
163+
}
164+
}

R/utils.R

+5-1
Original file line numberDiff line numberDiff line change
@@ -1217,7 +1217,11 @@ sourceUTF8 <- function(file, envir = globalenv()) {
12171217
file <- tempfile(); on.exit(unlink(file), add = TRUE)
12181218
writeLines(lines, file)
12191219
}
1220-
exprs <- parse(file, keep.source = FALSE, srcfile = src, encoding = enc)
1220+
exprs <- try(parse(file, keep.source = FALSE, srcfile = src, encoding = enc))
1221+
if (inherits(exprs, "try-error")) {
1222+
diagnoseCode(file)
1223+
stop("Error sourcing ", file)
1224+
}
12211225

12221226
# Wrap the exprs in first `{`, then ..stacktraceon..(). It's only really the
12231227
# ..stacktraceon..() that we care about, but the `{` is needed to make that

tests/testthat/test-diagnostics.R

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
context("code diagnostics")
2+
3+
test_that("Code diagnostics", {
4+
suppressMessages({
5+
expect_false(diagnoseCode(text = "div(,)"))
6+
expect_false(diagnoseCode(text = "div(a,)"))
7+
expect_false(diagnoseCode(text = "div(,a)"))
8+
expect_false(diagnoseCode(text = "div(a,,b)"))
9+
expect_false(diagnoseCode(text = "div(a,\n,b)"))
10+
expect_false(diagnoseCode(text = "div(a,,b,)"))
11+
expect_false(diagnoseCode(text = "div(a,b))"))
12+
expect_false(diagnoseCode(text = "div()}"))
13+
expect_false(diagnoseCode(text = "div())"))
14+
expect_false(diagnoseCode(text = "div()]"))
15+
})
16+
17+
18+
# Ambiguous - these aren't valid R code, but they're outside the scope of
19+
# diagnoseCode, at least for now.
20+
# expect_false(diagnoseCode(text = "a,,b"))
21+
# expect_false(diagnoseCode(text = "div(a, ==2 )"))
22+
# expect_false(diagnoseCode(text = "div(a,!,b)"))
23+
# expect_false(diagnoseCode(text = "1 2"))
24+
25+
# Should not error
26+
expect_true(diagnoseCode(text = "div()"))
27+
expect_true(diagnoseCode(text = "div(a)"))
28+
expect_true(diagnoseCode(text = "div(a,b)"))
29+
expect_true(diagnoseCode(text = "div(1,'b')"))
30+
expect_true(diagnoseCode(text = "div(a,~b)"))
31+
expect_true(diagnoseCode(text = "div([mtcars,,FALSE])"))
32+
expect_true(diagnoseCode(text = "div(a, 1==2)"))
33+
34+
# Outside of () scope
35+
expect_true(diagnoseCode(text = "1\n2"))
36+
})

0 commit comments

Comments
 (0)