|
| 1 | +# Analyze an R file for possible extra or missing commas. Returns FALSE if any |
| 2 | +# problems detected, TRUE otherwise. |
| 3 | +diagnoseCode <- function(path = NULL, text = NULL) { |
| 4 | + if (!xor(is.null(path), is.null(text))) { |
| 5 | + stop("Must specify `path` or `text`, but not both.") |
| 6 | + } |
| 7 | + |
| 8 | + if (!is.null(path)) { |
| 9 | + tokens <- sourcetools::tokenize_file(path) |
| 10 | + } else { |
| 11 | + tokens <- sourcetools::tokenize_string(text) |
| 12 | + } |
| 13 | + |
| 14 | + find_scopes <- function(tokens) { |
| 15 | + # Strip whitespace and comments |
| 16 | + tokens <- tokens[!(tokens$type %in% c("whitespace", "comment")),] |
| 17 | + |
| 18 | + # Replace various types of things with "value" |
| 19 | + tokens$type[tokens$type %in% c("string", "number", "symbol", "keyword")] <- "value" |
| 20 | + |
| 21 | + # Record types for close and open brace/bracket/parens, and commas |
| 22 | + brace_idx <- tokens$value %in% c("(", ")", "{", "}", "[", "]", ",") |
| 23 | + tokens$type[brace_idx] <- tokens$value[brace_idx] |
| 24 | + |
| 25 | + # Stack-related function for recording scope. Starting scope is "{" |
| 26 | + stack <- "{" |
| 27 | + push <- function(x) { |
| 28 | + stack <<- c(stack, x) |
| 29 | + } |
| 30 | + pop <- function() { |
| 31 | + if (length(stack) == 1) { |
| 32 | + # Stack underflow, but we need to keep going |
| 33 | + return(NA_character_) |
| 34 | + } |
| 35 | + res <- stack[length(stack)] |
| 36 | + stack <<- stack[-length(stack)] |
| 37 | + res |
| 38 | + } |
| 39 | + peek <- function() { |
| 40 | + stack[length(stack)] |
| 41 | + } |
| 42 | + |
| 43 | + # First, establish a scope for each token. For opening and closing |
| 44 | + # braces/brackets/parens, the scope at that location is the *surrounding* |
| 45 | + # scope, not the new scope created by the brace/bracket/paren. |
| 46 | + for (i in seq_len(nrow(tokens))) { |
| 47 | + value <- tokens$value[i] |
| 48 | + |
| 49 | + tokens$scope[i] <- peek() |
| 50 | + if (value %in% c("{", "(", "[")) { |
| 51 | + push(value) |
| 52 | + |
| 53 | + } else if (value == "}") { |
| 54 | + if (!identical(pop(), "{")) |
| 55 | + tokens$err[i] <- "unmatched_brace" |
| 56 | + # For closing brace/paren/bracket, get the scope after popping |
| 57 | + tokens$scope[i] <- peek() |
| 58 | + |
| 59 | + } else if (value == ")") { |
| 60 | + if (!identical(pop(), "(")) |
| 61 | + tokens$err[i] <- "unmatched_paren" |
| 62 | + tokens$scope[i] <- peek() |
| 63 | + |
| 64 | + } else if (value == "]") { |
| 65 | + if (!identical(pop(), "[")) |
| 66 | + tokens$err[i] <- "unmatched_bracket" |
| 67 | + tokens$scope[i] <- peek() |
| 68 | + } |
| 69 | + } |
| 70 | + |
| 71 | + tokens |
| 72 | + } |
| 73 | + |
| 74 | + check_commas <- function(tokens) { |
| 75 | + # Find extra and missing commas |
| 76 | + tokens$err <- mapply( |
| 77 | + tokens$type, |
| 78 | + c("", tokens$type[-length(tokens$type)]), |
| 79 | + c(tokens$type[-1], ""), |
| 80 | + tokens$scope, |
| 81 | + tokens$err, |
| 82 | + SIMPLIFY = FALSE, |
| 83 | + FUN = function(type, prevType, nextType, scope, err) { |
| 84 | + # If an error was already found, just return it. This could have |
| 85 | + # happened in the brace/paren/bracket matching phase. |
| 86 | + if (!is.na(err)) { |
| 87 | + return(err) |
| 88 | + } |
| 89 | + if (is.na(scope)) { |
| 90 | + if (type == "}") return("unmatched_brace") |
| 91 | + else if (type == ")") return("unmatched_paren") |
| 92 | + else if (type == "]") return("unmatched_bracket") |
| 93 | + |
| 94 | + } else if (scope == "(") { |
| 95 | + |
| 96 | + if ((prevType == "(" && type == ",") || |
| 97 | + (type == "," && nextType == ")") || |
| 98 | + (prevType == "," && type == ",")) { |
| 99 | + return("extra_comma") |
| 100 | + } |
| 101 | + |
| 102 | + if ((prevType == ")" && type == "value") || |
| 103 | + (prevType == "value" && type == "value")) { |
| 104 | + return("missing_comma") |
| 105 | + } |
| 106 | + } |
| 107 | + |
| 108 | + NA_character_ |
| 109 | + } |
| 110 | + ) |
| 111 | + |
| 112 | + tokens |
| 113 | + } |
| 114 | + |
| 115 | + |
| 116 | + tokens$err <- NA_character_ |
| 117 | + tokens <- find_scopes(tokens) |
| 118 | + tokens <- check_commas(tokens) |
| 119 | + |
| 120 | + if (all(is.na(tokens$err))) { |
| 121 | + # No errors found |
| 122 | + return(TRUE) |
| 123 | + |
| 124 | + } else { |
| 125 | + # If any errors were found, print messages |
| 126 | + if (!is.null(path)) { |
| 127 | + lines <- readLines(path) |
| 128 | + } else { |
| 129 | + lines <- strsplit(text, "\n")[[1]] |
| 130 | + } |
| 131 | + |
| 132 | + # Print out the line of code with the error, and point to the column with |
| 133 | + # the error. |
| 134 | + show_code_error <- function(msg, lines, row, col) { |
| 135 | + message(paste0( |
| 136 | + msg, "\n", |
| 137 | + row, ":", lines[row], "\n", |
| 138 | + paste0(rep.int(" ", nchar(as.character(row)) + 1), collapse = ""), |
| 139 | + gsub(perl = TRUE, "[^\\s]", " ", substr(lines[row], 1, col-1)), "^" |
| 140 | + )) |
| 141 | + } |
| 142 | + |
| 143 | + err_idx <- which(!is.na(tokens$err)) |
| 144 | + msg <- "" |
| 145 | + for (i in err_idx) { |
| 146 | + row <- tokens$row[i] |
| 147 | + col <- tokens$column[i] |
| 148 | + err <- tokens$err[i] |
| 149 | + |
| 150 | + if (err == "missing_comma") { |
| 151 | + show_code_error("Possible missing comma at:", lines, row, col) |
| 152 | + } else if (err == "extra_comma") { |
| 153 | + show_code_error("Possible extra comma at:", lines, row, col) |
| 154 | + } else if (err == "unmatched_brace") { |
| 155 | + show_code_error("Possible unmatched '}' at:", lines, row, col) |
| 156 | + } else if (err == "unmatched_paren") { |
| 157 | + show_code_error("Possible unmatched ')' at:", lines, row, col) |
| 158 | + } else if (err == "unmatched_bracket") { |
| 159 | + show_code_error("Possible unmatched ']' at:", lines, row, col) |
| 160 | + } |
| 161 | + } |
| 162 | + return(FALSE) |
| 163 | + } |
| 164 | +} |
0 commit comments