Skip to content

Commit

Permalink
Support regex literals
Browse files Browse the repository at this point in the history
taku0 committed Jul 9, 2022
1 parent 975e256 commit 9b8a926
Showing 4 changed files with 435 additions and 37 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -172,6 +172,15 @@ var x = foo

Both are syntactically correct code. We cannot handle this case properly. This is also a future work.

Other example is regex literals and custom operators. The following example is valid Swift code with regex literals and custom operators.

```swift
let x = /^/ /^/ /^/
```

We parse them as regex literals rather than custom operators for now.


## Hacking

To build the package locally, run `make package`.
12 changes: 8 additions & 4 deletions swift-mode-indent.el
Original file line number Diff line number Diff line change
@@ -225,11 +225,15 @@ declaration and its offset is `swift-mode:basic-offset'."
(swift-mode:indentation (point) 0)))))

(defun swift-mode:calculate-indent-of-multiline-string ()
"Return the indentation of the current line inside a multiline string."
"Return the indentation of the current line inside a multiline string.
Also used for regexes."
(back-to-indentation)
(let ((string-beginning-position
(save-excursion (swift-mode:beginning-of-string))))
(if (looking-at "\"\"\"")
(if (and (looking-at "\\(\"\"\"\\|/\\)#*")
(equal (get-text-property (1- (match-end 0)) 'syntax-table)
(string-to-syntax "|")))
;; The last line.
(progn
(goto-char string-beginning-position)
@@ -240,13 +244,13 @@ declaration and its offset is `swift-mode:basic-offset'."
(swift-mode:goto-non-interpolated-expression-bol)
(back-to-indentation)
(if (<= (point) string-beginning-position)
;; The cursor was on the 2nd line of the comment, so aligns with
;; The cursor was on the 2nd line of the string, so aligns with
;; that line with offset.
(progn
(goto-char string-beginning-position)
(swift-mode:calculate-indent-of-expression
swift-mode:multiline-statement-offset))
;; The cursor was on the 3rd or following lines of the comment, so
;; The cursor was on the 3rd or following lines of the string, so
;; aligns with a non-empty preceding line.
(if (and (bolp) (eolp))
;; The cursor is on an empty line, so seeks a non-empty-line.
281 changes: 248 additions & 33 deletions swift-mode-lexer.el
Original file line number Diff line number Diff line change
@@ -75,7 +75,7 @@
(defun swift-mode:token (type text start end)
"Construct and return a token.
TYPE is the type of the token such as `infix-operator' or {.
TYPE is the type of the token such as `binary-operator' or {.
TEXT is the text of the token.
START is the start position of the token.
END is the point after the token."
@@ -122,7 +122,7 @@ END is the point after the token."
;; - : (part of conditional operator, key-value separator, label-statement
;; separator)
;; - anonymous-function-parameter-in ("in" after anonymous function parameter)
;; - string-chunk-after-interpolated-expression (part of a string ending with
;; - string-chunk-after-interpolated-expression (part of a string starting with
;; ")")
;; - string-chunk-before-interpolated-expression (part of a string ending with
;; "\\(")
@@ -220,17 +220,20 @@ Intended for `syntax-propertize-extend-region-functions'."

(defun swift-mode:syntax-propertize (start end)
"Update text properties for strings.
Mark the beginning of and the end of single-line/multiline strings between
the position START and END as general string delimiters.
Mark the beginning of and the end of single-line/multiline strings and regexes
between the position START and END as general string delimiters.
Intended for `syntax-propertize-function'."
(remove-text-properties start end
'(syntax-table
nil
syntax-multiline
nil
swift-mode:matching-parenthesis
nil
swift-mode:comment
nil))
(let* ((chunk (swift-mode:chunk-after (syntax-ppss start))))
(let* ((chunk (swift-mode:chunk-after (syntax-ppss start)))
comment-start)
(cond
((swift-mode:chunk:multiline-string-p chunk)
(swift-mode:syntax-propertize:end-of-string
@@ -240,30 +243,36 @@ Intended for `syntax-propertize-function'."
(swift-mode:syntax-propertize:end-of-string
end "\"" (swift-mode:chunk:pound-count chunk)))

((swift-mode:chunk:regex-p chunk)
(swift-mode:syntax-propertize:end-of-regex
(swift-mode:chunk:start chunk)))

((swift-mode:chunk:comment-p chunk)
(goto-char (swift-mode:chunk:start chunk))
(forward-comment (point-max)))))
(setq comment-start (point))
(forward-comment 1)
(put-text-property comment-start (point) 'swift-mode:comment t))))
(swift-mode:syntax-propertize:scan end 0))

(defun swift-mode:syntax-propertize:scan (end nesting-level)
"Update text properties for strings.
Mark the beginning of and the end of single-line/multiline strings between
the current position and END as general string delimiters.
Assuming the cursor is not on strings nor comments.
Mark the beginning of and the end of single-line/multiline strings and regexes
between the current position and END as general string delimiters.
Assuming the cursor is not on strings, regexes, nor comments.
If NESTING-LEVEL is non-zero, nesting of parentheses are tracked and the scan
stops where the level becomes zero."
(let ((found-matching-parenthesis nil)
(pattern (mapconcat #'regexp-quote
'("\"\"\"" "\"" "//" "/*" "(" ")")
'("\"\"\"" "\"" "/" "(" ")")
"\\|")))
(while (and (not found-matching-parenthesis)
(< (point) end)
(search-forward-regexp pattern end t))
(cond
((member (match-string-no-properties 0) '("\"\"\"" "\""))
(let ((start (match-beginning 0))
(pound-count 0)
(quotation (match-string-no-properties 0)))
(quotation (match-string-no-properties 0))
pound-count)
(save-excursion
(goto-char start)
(skip-chars-backward "#")
@@ -274,15 +283,31 @@ stops where the level becomes zero."
(string-to-syntax "|"))
(swift-mode:syntax-propertize:end-of-string
end quotation pound-count)
(put-text-property start (point) 'syntax-multiline t)))
(swift-mode:put-syntax-multiline-property start (point))))

((equal "/" (match-string-no-properties 0))
(let ((start (match-beginning 0))
regex-start)
(save-excursion
(goto-char start)
(skip-chars-backward "#")
(setq regex-start (point)))
(cond
;; Regexes
((swift-mode:syntax-propertize:end-of-regex regex-start)
(put-text-property regex-start (1+ regex-start)
'syntax-table
(string-to-syntax "|"))
(swift-mode:put-syntax-multiline-property regex-start (point)))

((equal "//" (match-string-no-properties 0))
(goto-char (match-beginning 0))
(forward-comment (point-max)))
;; Comments
((memq (char-after) '(?/ ?*))
(goto-char start)
(forward-comment 1)
(put-text-property start (point) 'swift-mode:comment t))

((equal "/*" (match-string-no-properties 0))
(goto-char (match-beginning 0))
(forward-comment (point-max)))
;; Operators
(t nil))))

((and (equal "(" (match-string-no-properties 0))
(/= nesting-level 0))
@@ -297,6 +322,17 @@ stops where the level becomes zero."
(goto-char end))
found-matching-parenthesis))

(defun swift-mode:put-syntax-multiline-property (start end)
"Put `syntax-multiline` text propery from START to END.
Also call `font-lock-flush' with START and END."
(put-text-property start end 'syntax-multiline t)
(if (fboundp 'font-lock-flush)
(font-lock-flush start end)
(if (eq font-lock-fontify-buffer-function #'jit-lock-refontify)
(jit-lock-refontify start end)
(font-lock-after-change-function start end (- end start)))))

(defun swift-mode:syntax-propertize:end-of-string (end quotation pound-count)
"Move point to the end of single-line/multiline string.
@@ -378,6 +414,143 @@ Return nil otherwise."
(setq p (1- p)))
(= (mod backslash-count 2) 1)))))

(defun swift-mode:syntax-propertize:end-of-regex (start)
"Move point to the end of regex if any.
START is the position of the open delimiter, including pounds if any.
If START is not a start of a regex, keep the point and return nil. Otherwise,
return non-nil.
This function doesn't take end parameter since if the closing delimiter is
missing, this function must return nil."
(let* ((pound-count (save-excursion
(goto-char start)
(skip-chars-forward "#")
(- (point) start)))
end-of-regex)
(setq end-of-regex
(if (zerop pound-count)
(swift-mode:syntax-propertize:end-of-basic-regex start)
(swift-mode:syntax-propertize:end-of-extended-regex
start
pound-count)))
(when end-of-regex
(put-text-property (1- end-of-regex) end-of-regex
'syntax-table
(string-to-syntax "|")))
end-of-regex))

(defun swift-mode:syntax-propertize:end-of-basic-regex (start)
"Move point to the end of regex if any.
START is the position of the open delimiter.
If START is not a start of a regex, keep the point and return nil. Otherwise,
return non-nil."
(let* ((pos (point))
(start-of-contents (1+ start))
after-last-dot
(square-brackets-count 0)
(parentheses-count 0)
(limit (line-end-position))
(end-of-regex nil))
(if (or
;; Cannot starts with spaces, tabs, slashes, or asterisks.
(memq (char-after start-of-contents) '(?\s ?\t ?/ ?*))
;; Cannot be a comment closer: /**/+++/.
(get-text-property start 'swift-mode:comment)
;; Cannot be preceded with infix operators while it can be preceded
;; with prefix operators.
(save-excursion
(goto-char start)
;; TODO Unicode operators
(skip-chars-backward "-/=+!*%<>&|^~?")
(when (eq (char-before) ?.)
(setq after-last-dot (point))
(skip-chars-backward "-/=+!*%<>&|^~?.")
(unless (eq (char-after) ?.)
(goto-char (1- after-last-dot))))
(and
;; preceded with an operator
(/= start (point))
;; it is not a prefix operator
(not (memq (char-before)
'(nil ?\s ?\t ?\[ ?\( ?{ ?, ?\; ?:)))
;; it does't contain comments: a/**/+/**//b /
(not (text-property-any (point) start 'swift-mode:comment t)))))
nil
(goto-char start-of-contents)
(while (and (null end-of-regex)
(search-forward-regexp "[][()\\/]" limit t))
(cond
((eq (char-before) ?\\)
(forward-char))
((eq (char-before) ?\[)
(setq square-brackets-count (1+ square-brackets-count)))
((eq (char-before) ?\])
(when (< 0 square-brackets-count)
(setq square-brackets-count (1- square-brackets-count))))
((eq (char-before) ?\()
(when (zerop square-brackets-count)
(setq parentheses-count (1+ parentheses-count))))
((eq (char-before) ?\))
(cond
((< 0 square-brackets-count)
nil)
((zerop parentheses-count)
;; Found an unmatching close parenthesis. This is not a regex
;; literal.
(goto-char limit))
(t
(setq parentheses-count (1- parentheses-count)))))
((eq (char-before) ?/)
(if (memq (char-after) '(?/ ?*))
(goto-char limit)
(setq end-of-regex (point)))))))
(unless end-of-regex
(goto-char pos))
end-of-regex))

(defun swift-mode:syntax-propertize:end-of-extended-regex (start pound-count)
"Move point to the end of extended regex if any.
START is the position of the open delimiter, including pounds of POUND-COUNT.
If START is not a start of a regex, keep the point and return nil. Otherwise,
return non-nil."
(let* ((pos (point))
(start-of-contents (1+ (+ start pound-count)))
(starts-with-line-break (save-excursion
(goto-char start-of-contents)
(skip-chars-forward "\s\t")
(eolp)))
(end-of-regex nil))
(goto-char start-of-contents)
(if starts-with-line-break
(while (and (null end-of-regex)
(zerop (forward-line)))
(skip-chars-forward "\s\t")
(when (and (eq (char-after) ?/)
(progn
(forward-char)
(eq (skip-chars-forward "#" (+ (point) pound-count))
pound-count)))
(setq end-of-regex (point))))
(while (and (null end-of-regex)
(search-forward-regexp "/#" (line-end-position) t))
(backward-char)
(when (and (eq (skip-chars-forward "#" (+ (point) pound-count))
pound-count)
;; Inside regex literal, backslashes without pounds are
;; still special.
(not (swift-mode:escaped-p (match-beginning 0) 0)))
(setq end-of-regex (point)))))
(unless end-of-regex
(swift-mode:put-syntax-multiline-property start (point))
(goto-char pos))
end-of-regex))

;;; Lexers

(defun swift-mode:implicit-semi-p ()
@@ -785,11 +958,13 @@ Other properties are the same as the TOKEN."
(has-preceding-space (or
(= start (point-min))
(memq (char-syntax (char-before start)) '(? ?>))
(memq (char-before start) '(?\( ?\[ ?{ ?, ?\; ?:))
(nth 4 (save-excursion
(syntax-ppss (1- start))))))
(has-following-space (or
(= end (point-max))
(memq (char-syntax (char-after end)) '(? ?<))
(memq (char-after end) '(?\) ?\] ?} ?, ?\; ?:))
(save-excursion (goto-char end)
(looking-at "/\\*\\|//"))
(= (char-after end) ?\C-j)))
@@ -953,23 +1128,40 @@ This function does not return `implicit-;' or `type-:'."
(forward-char)
(swift-mode:token '> ">" (1- (point)) (point)))

;; Regex
((and (looking-at "#*/")
(equal (get-text-property (match-beginning 0) 'syntax-table)
(string-to-syntax "|")))
(let ((pos-after-comment (point)))
(swift-mode:forward-string-chunk)
(swift-mode:token
'identifier
(buffer-substring-no-properties pos-after-comment (point))
pos-after-comment
(point))))

;; Operator (other than as, try, is, or await)
;;
;; Operators starts with a dot can contains dots. Other operators cannot
;; contain dots.
;;
;; https://developer.apple.com/library/ios/documentation/Swift/Conceptual/Swift_Programming_Language/LexicalStructure.html#//apple_ref/swift/grammar/dot-operator-head
;; TODO Unicode operators
((looking-at "[-/=+!*%<>&|^~?]+\\|[.][-./=+!*%<>&|^~?]*")
(let*
((text (match-string-no-properties 0))
(start (match-beginning 0))
(end (match-end 0)))
(when (string-match ".*/\\*\\|.*//" text)
(let* ((text (match-string-no-properties 0))
(start (match-beginning 0))
(end (match-end 0)))
(when (string-match "^.*?/\\*\\|^.*?//" text)
;; e.g. +++/* */ or +++//
(setq end
(- end
(- (length text) (- (match-end 0) 2))))
(setq end (- end (- (length text) (- (match-end 0) 2))))
(setq text (substring text 0 (- (match-end 0) 2))))
(when (and (string-match "^.*?/" text)
(equal (get-text-property (+ start (1- (match-end 0)))
'syntax-table)
(string-to-syntax "|")))
;; Regex after prefix operator, e.g. +++/<>/
(setq end (- end (- (length text) (- (match-end 0) 1))))
(setq text (substring text 0 (- (match-end 0) 1))))
(goto-char end)
(swift-mode:fix-operator-type
(swift-mode:token nil text start end))))
@@ -1206,12 +1398,27 @@ This function does not return `implicit-;' or `type-:'."
(backward-char)
(swift-mode:token '> ">" (point) (1+ (point))))

;; Regex
((and (save-excursion
(skip-chars-backward "#")
(eq (char-before) ?/))
(equal (get-text-property (1- (point)) 'syntax-table)
(string-to-syntax "|")))
(let ((pos-before-comment (point)))
(swift-mode:backward-string-chunk)
(swift-mode:token
'identifier
(buffer-substring-no-properties (point) pos-before-comment)
(point)
pos-before-comment)))

;; Operator (other than as, try, is, or await)
;;
;; Operators which starts with a dot can contain other dots. Other
;; operators cannot contain dots.
;;
;; https://developer.apple.com/library/ios/documentation/Swift/Conceptual/Swift_Programming_Language/LexicalStructure.html#//apple_ref/swift/grammar/dot-operator-head
;; TODO Unicode operators
((memq (char-before) '(?. ?- ?/ ?= ?+ ?! ?* ?% ?< ?> ?& ?| ?^ ?~ ??))
(let ((point-before-comments (point)))
(skip-chars-backward "-./=+!*%<>&|^~?")
@@ -1385,7 +1592,7 @@ If this line ends with a single-line comment, goto just before the comment."

;;; Comment or string chunks

;; A chunk is either a string-chunk or a comment.
;; A chunk is either a string-chunk, regex, or a comment.
;; It have the type and the start position.

(defun swift-mode:chunk (type start)
@@ -1433,12 +1640,16 @@ If this line ends with a single-line comment, goto just before the comment."
"Return non-nil if the CHUNK is a multiline string."
(eq (swift-mode:chunk:type chunk) 'multiline-string))

(defun swift-mode:chunk:regex-p (chunk)
"Return non-nil if the CHUNK is a regex."
(eq (swift-mode:chunk:type chunk) 'regex))

(defun swift-mode:chunk:pound-count (chunk)
"Return the number of pound signs before the start position of the CHUNK."
(save-excursion
(goto-char (swift-mode:chunk:start chunk))
(swift-mode:beginning-of-string)
(skip-chars-backward "#")
(skip-chars-forward "#")
(- (swift-mode:chunk:start chunk) (point))))

(defun swift-mode:chunk-after (&optional parser-state)
@@ -1457,10 +1668,14 @@ If PARSER-STATE is given, it is used instead of (syntax-ppss)."
;; Syntax category "|" is attached to both single-line and multiline
;; string delimiters. So (nth 3 parser-state) may be t even for
;; single-line string delimiters.
(if (save-excursion (goto-char (nth 8 parser-state))
(looking-at "#*\"\"\""))
(swift-mode:chunk 'multiline-string (nth 8 parser-state))
(swift-mode:chunk 'single-line-string (nth 8 parser-state))))
(cond
((save-excursion (goto-char (nth 8 parser-state))
(looking-at "#*\"\"\""))
(swift-mode:chunk 'multiline-string (nth 8 parser-state)))
((save-excursion (goto-char (nth 8 parser-state))
(looking-at "#*/"))
(swift-mode:chunk 'regex (nth 8 parser-state)))
(t (swift-mode:chunk 'single-line-string (nth 8 parser-state)))))

((eq (nth 4 parser-state) t)
(swift-mode:chunk 'single-line-comment (nth 8 parser-state)))
170 changes: 170 additions & 0 deletions test/swift-files/indent/strings.swift
Original file line number Diff line number Diff line change
@@ -161,4 +161,174 @@ func f() {
let x = #"abc\( 1 + (2 + 3) ) \#( 1 + (2 + 3) ) \" \#"# " a \"#
let x = ##"abc\( 1 + (2 + 3) ) \#( 1 + (2 + 3) ) \" \#"# " a \"##
let x = 1


// Regexes

// Simple case.
let x = /a/

// Slashes can be escaped.
let x = /\/ /

// Slashes must be escaped in character classes.
let x = /[/ + "]/ + // "
let x = /[\/ + "]/ + // "
a()

// Regexes can contain quotes.
let x = /"/
let x = /"""/

// Regex with extended delimiters can contain slashes.
let x = #// /* /#

// Backslashes are still special in regexes with extended delimiters.
let x = #/\/# /* /#
a()

// Multiline regex.
let x = #/
let x = #/
/#

// Closing extended delimiter can be escaped.
let x = #/
\/#
let x = #/
/#

// Extended delimiters with more than one pound.
let x = ##/
/#
let x = #/
/##

// Comments are ignored in exended regexes.
let x = #/
let x = "a" # /#
/#

// Multiline comment cannot contain regexes with */.
/*
let regex = /[0-9]*/
let x = "*/ // "


// Regexes without extended delimiters cannot be preceded by infix
// operators without whitespaces.
// `a`, infix operator `+/`, `b`, and infix operator `/`
let x = a+/b /
c()

// Regexes without extended delimiters can be preceded by infix operators
// with whitespaces.
// `a`, infix operator `+`, and regex /b /
let x = a + /b /
c()

// Comments are whitespaces.
let x = a/**/+/**//b /
c()

// Regexes with extended delimiters can be preceded by infix operators
// without whitespaces.
// `a`, infix operator `+`, and regex #/b /#
let x = a+#/b /#
c()

// Regexes without extended delimiters cannot start with spaces.
let regex = Regex {
digit
/ [+-] /
digit
}
let regex = Regex {
digit
/[+-]/
digit
}

// Initial space must be escaped.
let regex = Regex {
digit
/\ [+-] /
digit
}

// Regexes with extended delimiters can start with spaces.
let regex = Regex {
digit
#/ [+-] /#
digit
}

foo {
// This must be infix operator /^/.
let a = b() /^/
c() // swift-mode:test:known-bug
}

foo {
// Regex /^/, infix operator /^/, and b().
let a = /^/ /^/
b() // swift-mode:test:known-bug
}

foo {
// Regex /^/, infix operator /^/, regex /^/, and b()
let a = /^/ /^/ /^/
b()
}

// Regex without extended delimiters cannot be multiline.
// Also, it cannot end with // or /*
let a = /0 + // /
b()
let a = /0 + /* /
b()
*/
c()

// Regexes can be preceded with prefix operators wihtout spaces.
// prefix operator `+` and regex /a /.
let x = +/a /
b()

// Regexes without extended delimiters cannot contain unmatching close
// parentheses.
array.reduce(1, /) { otherArray.reduce(1, /)
array.reduce(1, /) }; otherArray.reduce(1, /)

// Regexes without extended delimiters can contain matching close
// parentheses.
array.reduce(1, /(a) { otherArray.reduce(1, /)
array.reduce(1, /(a) }; otherArray.reduce(1, /)

// Regexes without extended delimiters can contain escaped close
// parentheses.
array.reduce(1, /\) { otherArray.reduce(1, /)
array.reduce(1, /\) }; otherArray.reduce(1, /)

// Character classes can contain closing parentheses.
array.reduce(1, /[)] { otherArray.reduce(1, /)
array.reduce(1, /[)] }; otherArray.reduce(1, /)

// Regexes with extended delimiters can contain unmatching close
// parentheses.
array.reduce(1, #/) { otherArray.reduce(1, /#)
array.reduce(1, #/) }; otherArray.reduce(1, /#)


// Regexes can contain unmatching close square brackets.
let d = a[/] /
]
let d = a[(/)] /
b()

// Comments have higher precedence.
let x = a() /**/+++/
b()
let x = a() //+++/
b()
}

0 comments on commit 9b8a926

Please sign in to comment.