Skip to content

Commit 93a0f43

Browse files
authored
Merge pull request #21 from clojure-emacs/add-more-syntax-features
Add enough feature to be able to parse clojure.core
2 parents af6102c + b40670a commit 93a0f43

File tree

5 files changed

+273
-23
lines changed

5 files changed

+273
-23
lines changed

parseclj-ast.el

+7
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,13 @@ on available options."
133133
:tag (intern (substring (a-get opening-token :form) 1))
134134
:children children)
135135
stack))
136+
(:metadata (cons (parseclj-ast-node :with-meta
137+
pos
138+
:children children)
139+
stack))
140+
(:map-prefix (cons (a-assoc (car children)
141+
:map-prefix opening-token)
142+
stack))
136143
(t (cons
137144
(parseclj-ast-node type pos :children children)
138145
stack)))))

parseclj-lex.el

+121-14
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
:symbol
3737
:keyword
3838
:string
39+
:regex
3940
:character)
4041
"Types of tokens that represent leaf nodes in the AST.")
4142

@@ -44,6 +45,22 @@
4445
:rbrace)
4546
"Types of tokens that mark the end of a non-atomic form.")
4647

48+
(defvar parseclj-lex--prefix-tokens '(:quote
49+
:backquote
50+
:unquote
51+
:unquote-splice
52+
:discard
53+
:tag
54+
:reader-conditional
55+
:reader-conditional-splice
56+
:var
57+
:deref
58+
:map-prefix)
59+
"Tokens that modify the form that follows.")
60+
61+
(defvar parseclj-lex--prefix-2-tokens '(:metadata)
62+
"Tokens that modify the two forms that follow.")
63+
4764
;; Token interface
4865

4966
(defun parseclj-lex-token (type form pos &rest attributes)
@@ -81,6 +98,11 @@ A token is an association list with :token-type as its first key."
8198
(and (consp token)
8299
(cdr (assq :token-type token))))
83100

101+
(defun parseclj-lex-token-form (token)
102+
"Get the form of TOKEN."
103+
(and (consp token)
104+
(cdr (assq :form token))))
105+
84106
(defun parseclj-lex-leaf-token-p (token)
85107
"Return t if the given AST TOKEN is a leaf node."
86108
(member (parseclj-lex-token-type token) parseclj-lex--leaf-tokens))
@@ -89,6 +111,9 @@ A token is an association list with :token-type as its first key."
89111
"Return t if the given ast TOKEN is a closing token."
90112
(member (parseclj-lex-token-type token) parseclj-lex--closing-tokens))
91113

114+
(defun parseclj-lex-error-p (token)
115+
"Return t if the TOKEN represents a lexing error token."
116+
(eq (parseclj-lex-token-type token) :lex-error))
92117

93118
;; Elisp values from tokens
94119

@@ -177,18 +202,32 @@ S goes through three transformations:
177202
(<= (char-after (point)) ?9))
178203
(right-char)))
179204

205+
(defun parseclj-lex-skip-hex ()
206+
"Skip all consecutive hex digits after point."
207+
(while (and (char-after (point))
208+
(or (<= ?0 (char-after (point)) ?9)
209+
(<= ?a (char-after (point)) ?f)
210+
(<= ?A (char-after (point)) ?F)))
211+
(right-char)))
212+
180213
(defun parseclj-lex-skip-number ()
181214
"Skip a number at point."
182215
;; [\+\-]?\d+\.\d+
183-
(when (member (char-after (point)) '(?+ ?-))
184-
(right-char))
216+
(if (and (eq ?0 (char-after (point)))
217+
(eq ?x (char-after (1+ (point)))))
218+
(progn
219+
(right-char 2)
220+
(parseclj-lex-skip-hex))
221+
(progn
222+
(when (member (char-after (point)) '(?+ ?-))
223+
(right-char))
185224

186-
(parseclj-lex-skip-digits)
225+
(parseclj-lex-skip-digits)
187226

188-
(when (eq (char-after (point)) ?.)
189-
(right-char))
227+
(when (eq (char-after (point)) ?.)
228+
(right-char))
190229

191-
(parseclj-lex-skip-digits))
230+
(parseclj-lex-skip-digits))))
192231

193232
(defun parseclj-lex-number ()
194233
"Consume a number and return a `:number' token representing it."
@@ -270,22 +309,39 @@ are returned as their own lex tokens."
270309
((equal sym "false") (parseclj-lex-token :false "false" pos))
271310
(t (parseclj-lex-token :symbol sym pos))))))
272311

273-
(defun parseclj-lex-string ()
274-
"Return a lex token representing a string.
275-
If EOF is reached without finding a closing double quote, a :lex-error
276-
token is returned."
312+
(defun parseclj-lex-string* ()
313+
"Helper for string/regex lexing.
314+
Returns either the string, or an error token"
277315
(let ((pos (point)))
278316
(right-char)
279317
(while (not (or (equal (char-after (point)) ?\") (parseclj-lex-at-eof-p)))
280318
(if (equal (char-after (point)) ?\\)
281319
(right-char 2)
282320
(right-char)))
283-
(if (equal (char-after (point)) ?\")
284-
(progn
285-
(right-char)
286-
(parseclj-lex-token :string (buffer-substring-no-properties pos (point)) pos))
321+
(when (equal (char-after (point)) ?\")
322+
(right-char)
323+
(buffer-substring-no-properties pos (point)))))
324+
325+
(defun parseclj-lex-string ()
326+
"Return a lex token representing a string.
327+
If EOF is reached without finding a closing double quote, a :lex-error
328+
token is returned."
329+
(let ((pos (point))
330+
(str (parseclj-lex-string*)))
331+
(if str
332+
(parseclj-lex-token :string str pos)
287333
(parseclj-lex-error-token pos :invalid-string))))
288334

335+
(defun parseclj-lex-regex ()
336+
"Return a lex token representing a regular expression.
337+
If EOF is reached without finding a closing double quote, a :lex-error
338+
token is returned."
339+
(let ((pos (1- (point)))
340+
(str (parseclj-lex-string*)))
341+
(if str
342+
(parseclj-lex-token :regex (concat "#" str) pos)
343+
(parseclj-lex-error-token pos :invalid-regex))))
344+
289345
(defun parseclj-lex-lookahead (n)
290346
"Return a lookahead string of N characters after point."
291347
(buffer-substring-no-properties (point) (min (+ (point) n) (point-max))))
@@ -351,6 +407,16 @@ See `parseclj-lex-symbol', `parseclj-lex-symbol-start-p'."
351407
(right-char))
352408
(parseclj-lex-token :comment (buffer-substring-no-properties pos (point)) pos)))
353409

410+
(defun parseclj-lex-map-prefix ()
411+
"Return a lex token representing a map prefix."
412+
(let ((pos (1- (point))))
413+
(right-char)
414+
(when (equal (char-after (point)) ?:)
415+
(right-char))
416+
(while (parseclj-lex-symbol-rest-p (char-after (point)))
417+
(right-char))
418+
(parseclj-lex-token :map-prefix (buffer-substring-no-properties pos (point)) pos)))
419+
354420
(defun parseclj-lex-next ()
355421
"Consume characters at point and return the next lexical token.
356422
@@ -387,6 +453,22 @@ See `parseclj-lex-token'."
387453
(right-char)
388454
(parseclj-lex-token :rbrace "}" pos))
389455

456+
((equal char ?')
457+
(right-char)
458+
(parseclj-lex-token :quote "'" pos))
459+
460+
((equal char ?`)
461+
(right-char)
462+
(parseclj-lex-token :backquote "`" pos))
463+
464+
((equal char ?~)
465+
(right-char)
466+
(if (eq ?@ (char-after (point)))
467+
(progn
468+
(right-char)
469+
(parseclj-lex-token :unquote-splice "~@" pos))
470+
(parseclj-lex-token :unquote "~" pos)))
471+
390472
((parseclj-lex-at-number-p)
391473
(parseclj-lex-number))
392474

@@ -405,6 +487,14 @@ See `parseclj-lex-token'."
405487
((equal char ?\;)
406488
(parseclj-lex-comment))
407489

490+
((equal char ?^)
491+
(right-char)
492+
(parseclj-lex-token :metadata "^" pos))
493+
494+
((equal char ?@)
495+
(right-char)
496+
(parseclj-lex-token :deref "@" pos))
497+
408498
((equal char ?#)
409499
(right-char)
410500
(let ((char (char-after (point))))
@@ -415,6 +505,23 @@ See `parseclj-lex-token'."
415505
((equal char ?_)
416506
(right-char)
417507
(parseclj-lex-token :discard "#_" pos))
508+
((equal char ?\()
509+
(right-char)
510+
(parseclj-lex-token :lambda "#(" pos))
511+
((equal char ?')
512+
(right-char)
513+
(parseclj-lex-token :var "#'" pos))
514+
((equal char ?\")
515+
(parseclj-lex-regex))
516+
((equal char ?:)
517+
(parseclj-lex-map-prefix))
518+
((equal char ?\?)
519+
(right-char)
520+
(if (eq ?@ (char-after (point)))
521+
(progn
522+
(right-char)
523+
(parseclj-lex-token :reader-conditional-splice "#?@" pos))
524+
(parseclj-lex-token :reader-conditional "#?" pos)))
418525
((parseclj-lex-symbol-start-p char t)
419526
(right-char)
420527
(parseclj-lex-token :tag (concat "#" (parseclj-lex-get-symbol-at-point (1+ pos))) pos))

parseclj-parser.el

+29-3
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,11 @@ can be handled with `condition-case'."
4747
(defun parseclj--find-opening-token (stack closing-token)
4848
"Scan STACK for an opening-token matching CLOSING-TOKEN."
4949
(cl-case (parseclj-lex-token-type closing-token)
50-
(:rparen :lparen)
50+
(:rparen (parseclj-lex-token-type
51+
(seq-find (lambda (token)
52+
(member (parseclj-lex-token-type token)
53+
'(:lparen :lambda)))
54+
stack)))
5155
(:rbracket :lbracket)
5256
(:rbrace (parseclj-lex-token-type
5357
(seq-find (lambda (token)
@@ -192,6 +196,11 @@ functions. Additionally the following options are recognized
192196
;; (message "STACK: %S" stack)
193197
;; (message "TOKEN: %S\n" token)
194198

199+
(when (and fail-fast (parseclj-lex-error-p token))
200+
(parseclj--error "Invalid token at %s: %S"
201+
(a-get token :pos)
202+
(parseclj-lex-token-form token)))
203+
195204
;; Reduce based on the top item on the stack (collections)
196205
(cond
197206
((parseclj-lex-leaf-token-p token)
@@ -204,7 +213,7 @@ functions. Additionally the following options are recognized
204213

205214
;; Reduce based on top two items on the stack (special prefixed elements)
206215
(let* ((top-value (parseclj--take-value stack value-p))
207-
(opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p '(:discard :tag)))
216+
(opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))
208217
new-stack)
209218
(while (and top-value opening-token)
210219
;; (message "Reducing...")
@@ -214,8 +223,25 @@ functions. Additionally the following options are recognized
214223
(setq new-stack (nthcdr (+ (length top-value) (length opening-token)) stack))
215224
(setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value) options))
216225

226+
;; recur
217227
(setq top-value (parseclj--take-value stack value-p))
218-
(setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p '(:discard :tag)))))
228+
(setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))))
229+
230+
;; Reduce based on top three items on the stack (metadata, namespaced maps)
231+
(let* ((top-value-1 (parseclj--take-value stack value-p))
232+
(top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
233+
(opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
234+
(length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))
235+
new-stack)
236+
(while (and top-value-1 top-value-2 opening-token)
237+
(setq new-stack (nthcdr (apply #'+ (mapcar #'length (list top-value-1 top-value-2 opening-token))) stack))
238+
(setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value-2 top-value-1) options))
239+
240+
;; recur
241+
(setq top-value-1 (parseclj--take-value stack value-p))
242+
(setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
243+
(setq opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
244+
(length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))))
219245

220246
(setq token (parseclj-lex-next)))
221247

test/parseclj-lex-test.el

+37-6
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,25 @@
5252
(:form . "123e34M")
5353
(:pos . 1)))))
5454

55+
(with-temp-buffer
56+
(insert "0xff00AA")
57+
(goto-char 1)
58+
(should (equal (parseclj-lex-next) '((:token-type . :number)
59+
(:form . "0xff00AA")
60+
(:pos . 1)))))
61+
62+
(with-temp-buffer
63+
(insert "#?(:clj 1 :cljs 2)")
64+
(goto-char 1)
65+
(should (equal (parseclj-lex-next)
66+
'((:token-type . :reader-conditional) (:form . "#?") (:pos . 1)))))
67+
68+
(with-temp-buffer
69+
(insert "#?@(:clj [1] :cljs [2])")
70+
(goto-char 1)
71+
(should (equal (parseclj-lex-next)
72+
'((:token-type . :reader-conditional-splice) (:form . "#?@") (:pos . 1)))))
73+
5574
(with-temp-buffer
5675
(insert "123x")
5776
(goto-char 1)
@@ -203,12 +222,7 @@
203222
(should (equal (parseclj-lex-next) (parseclj-lex-token :number "13" 18)))
204223
(should (equal (parseclj-lex-next) (parseclj-lex-token :whitespace " " 20)))
205224
(should (equal (parseclj-lex-next) (parseclj-lex-token :number "14" 21)))
206-
(should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23))))
207-
208-
(with-temp-buffer
209-
(insert "~")
210-
(goto-char 1)
211-
(should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error "~" 1)))))
225+
(should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23)))))
212226

213227
(ert-deftest parseclj-lex-test-at-number-p ()
214228
(dolist (str '("123" ".9" "+1" "0" "-456"))
@@ -295,6 +309,12 @@
295309
(goto-char 1)
296310
(should (equal (parseclj-lex-string) (parseclj-lex-token :string "\"abc\\\"\"" 1)))))
297311

312+
(ert-deftest parseclj-lex-test-regex ()
313+
(with-temp-buffer
314+
(insert "#\"abc\"")
315+
(goto-char 1)
316+
(should (equal (parseclj-lex-next) (parseclj-lex-token :regex "#\"abc\"" 1)))))
317+
298318
(ert-deftest parseclj-lex-test-tag ()
299319
(with-temp-buffer
300320
(insert "#inst")
@@ -306,6 +326,17 @@
306326
(goto-char 1)
307327
(should (equal (parseclj-lex-next) (parseclj-lex-token :tag "#foo/bar" 1)))))
308328

329+
(ert-deftest parseclj-lex-test-quote ()
330+
(with-temp-buffer
331+
(insert "'foo")
332+
(goto-char 1)
333+
(should (equal (parseclj-lex-next) (parseclj-lex-token :quote "'" 1))))
334+
335+
(with-temp-buffer
336+
(insert "`foo")
337+
(goto-char 1)
338+
(should (equal (parseclj-lex-next) (parseclj-lex-token :backquote "`" 1)))))
339+
309340
(provide 'parseclj-lex-test)
310341

311342
;;; parseclj-lex-test.el ends here

0 commit comments

Comments
 (0)