Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add enough feature to be able to parse clojure.core #21

Merged
merged 3 commits into from
May 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions parseclj-ast.el
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ on available options."
:tag (intern (substring (a-get opening-token :form) 1))
:children children)
stack))
(:metadata (cons (parseclj-ast-node :with-meta
pos
:children children)
stack))
(:map-prefix (cons (a-assoc (car children)
:map-prefix opening-token)
stack))
(t (cons
(parseclj-ast-node type pos :children children)
stack)))))
Expand Down
135 changes: 121 additions & 14 deletions parseclj-lex.el
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
:symbol
:keyword
:string
:regex
:character)
"Types of tokens that represent leaf nodes in the AST.")

Expand All @@ -44,6 +45,22 @@
:rbrace)
"Types of tokens that mark the end of a non-atomic form.")

(defvar parseclj-lex--prefix-tokens '(:quote
:backquote
:unquote
:unquote-splice
:discard
:tag
:reader-conditional
:reader-conditional-splice
:var
:deref
:map-prefix)
"Tokens that modify the form that follows.")

(defvar parseclj-lex--prefix-2-tokens '(:metadata)
"Tokens that modify the two forms that follow.")

;; Token interface

(defun parseclj-lex-token (type form pos &rest attributes)
Expand Down Expand Up @@ -81,6 +98,11 @@ A token is an association list with :token-type as its first key."
(and (consp token)
(cdr (assq :token-type token))))

(defun parseclj-lex-token-form (token)
"Get the form of TOKEN."
(and (consp token)
(cdr (assq :form token))))

(defun parseclj-lex-leaf-token-p (token)
"Return t if the given AST TOKEN is a leaf node."
(member (parseclj-lex-token-type token) parseclj-lex--leaf-tokens))
Expand All @@ -89,6 +111,9 @@ A token is an association list with :token-type as its first key."
"Return t if the given ast TOKEN is a closing token."
(member (parseclj-lex-token-type token) parseclj-lex--closing-tokens))

(defun parseclj-lex-error-p (token)
"Return t if the TOKEN represents a lexing error token."
(eq (parseclj-lex-token-type token) :lex-error))

;; Elisp values from tokens

Expand Down Expand Up @@ -177,18 +202,32 @@ S goes through three transformations:
(<= (char-after (point)) ?9))
(right-char)))

(defun parseclj-lex-skip-hex ()
"Skip all consecutive hex digits after point."
(while (and (char-after (point))
(or (<= ?0 (char-after (point)) ?9)
(<= ?a (char-after (point)) ?f)
(<= ?A (char-after (point)) ?F)))
(right-char)))

(defun parseclj-lex-skip-number ()
"Skip a number at point."
;; [\+\-]?\d+\.\d+
(when (member (char-after (point)) '(?+ ?-))
(right-char))
(if (and (eq ?0 (char-after (point)))
(eq ?x (char-after (1+ (point)))))
(progn
(right-char 2)
(parseclj-lex-skip-hex))
(progn
(when (member (char-after (point)) '(?+ ?-))
(right-char))

(parseclj-lex-skip-digits)
(parseclj-lex-skip-digits)

(when (eq (char-after (point)) ?.)
(right-char))
(when (eq (char-after (point)) ?.)
(right-char))

(parseclj-lex-skip-digits))
(parseclj-lex-skip-digits))))

(defun parseclj-lex-number ()
"Consume a number and return a `:number' token representing it."
Expand Down Expand Up @@ -270,22 +309,39 @@ are returned as their own lex tokens."
((equal sym "false") (parseclj-lex-token :false "false" pos))
(t (parseclj-lex-token :symbol sym pos))))))

(defun parseclj-lex-string ()
"Return a lex token representing a string.
If EOF is reached without finding a closing double quote, a :lex-error
token is returned."
(defun parseclj-lex-string* ()
"Helper for string/regex lexing.
Returns either the string, or an error token"
(let ((pos (point)))
(right-char)
(while (not (or (equal (char-after (point)) ?\") (parseclj-lex-at-eof-p)))
(if (equal (char-after (point)) ?\\)
(right-char 2)
(right-char)))
(if (equal (char-after (point)) ?\")
(progn
(right-char)
(parseclj-lex-token :string (buffer-substring-no-properties pos (point)) pos))
(when (equal (char-after (point)) ?\")
(right-char)
(buffer-substring-no-properties pos (point)))))

(defun parseclj-lex-string ()
"Return a lex token representing a string.
If EOF is reached without finding a closing double quote, a :lex-error
token is returned."
(let ((pos (point))
(str (parseclj-lex-string*)))
(if str
(parseclj-lex-token :string str pos)
(parseclj-lex-error-token pos :invalid-string))))

(defun parseclj-lex-regex ()
"Return a lex token representing a regular expression.
If EOF is reached without finding a closing double quote, a :lex-error
token is returned."
(let ((pos (1- (point)))
(str (parseclj-lex-string*)))
(if str
(parseclj-lex-token :regex (concat "#" str) pos)
(parseclj-lex-error-token pos :invalid-regex))))

(defun parseclj-lex-lookahead (n)
"Return a lookahead string of N characters after point."
(buffer-substring-no-properties (point) (min (+ (point) n) (point-max))))
Expand Down Expand Up @@ -351,6 +407,16 @@ See `parseclj-lex-symbol', `parseclj-lex-symbol-start-p'."
(right-char))
(parseclj-lex-token :comment (buffer-substring-no-properties pos (point)) pos)))

(defun parseclj-lex-map-prefix ()
"Return a lex token representing a map prefix."
(let ((pos (1- (point))))
(right-char)
(when (equal (char-after (point)) ?:)
(right-char))
(while (parseclj-lex-symbol-rest-p (char-after (point)))
(right-char))
(parseclj-lex-token :map-prefix (buffer-substring-no-properties pos (point)) pos)))

(defun parseclj-lex-next ()
"Consume characters at point and return the next lexical token.

Expand Down Expand Up @@ -387,6 +453,22 @@ See `parseclj-lex-token'."
(right-char)
(parseclj-lex-token :rbrace "}" pos))

((equal char ?')
(right-char)
(parseclj-lex-token :quote "'" pos))

((equal char ?`)
(right-char)
(parseclj-lex-token :backquote "`" pos))

((equal char ?~)
(right-char)
(if (eq ?@ (char-after (point)))
(progn
(right-char)
(parseclj-lex-token :unquote-splice "~@" pos))
(parseclj-lex-token :unquote "~" pos)))

((parseclj-lex-at-number-p)
(parseclj-lex-number))

Expand All @@ -405,6 +487,14 @@ See `parseclj-lex-token'."
((equal char ?\;)
(parseclj-lex-comment))

((equal char ?^)
(right-char)
(parseclj-lex-token :metadata "^" pos))

((equal char ?@)
(right-char)
(parseclj-lex-token :deref "@" pos))

((equal char ?#)
(right-char)
(let ((char (char-after (point))))
Expand All @@ -415,6 +505,23 @@ See `parseclj-lex-token'."
((equal char ?_)
(right-char)
(parseclj-lex-token :discard "#_" pos))
((equal char ?\()
(right-char)
(parseclj-lex-token :lambda "#(" pos))
((equal char ?')
(right-char)
(parseclj-lex-token :var "#'" pos))
((equal char ?\")
(parseclj-lex-regex))
((equal char ?:)
(parseclj-lex-map-prefix))
((equal char ?\?)
(right-char)
(if (eq ?@ (char-after (point)))
(progn
(right-char)
(parseclj-lex-token :reader-conditional-splice "#?@" pos))
(parseclj-lex-token :reader-conditional "#?" pos)))
((parseclj-lex-symbol-start-p char t)
(right-char)
(parseclj-lex-token :tag (concat "#" (parseclj-lex-get-symbol-at-point (1+ pos))) pos))
Expand Down
32 changes: 29 additions & 3 deletions parseclj-parser.el
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ can be handled with `condition-case'."
(defun parseclj--find-opening-token (stack closing-token)
"Scan STACK for an opening-token matching CLOSING-TOKEN."
(cl-case (parseclj-lex-token-type closing-token)
(:rparen :lparen)
(:rparen (parseclj-lex-token-type
(seq-find (lambda (token)
(member (parseclj-lex-token-type token)
'(:lparen :lambda)))
stack)))
(:rbracket :lbracket)
(:rbrace (parseclj-lex-token-type
(seq-find (lambda (token)
Expand Down Expand Up @@ -192,6 +196,11 @@ functions. Additionally the following options are recognized
;; (message "STACK: %S" stack)
;; (message "TOKEN: %S\n" token)

(when (and fail-fast (parseclj-lex-error-p token))
(parseclj--error "Invalid token at %s: %S"
(a-get token :pos)
(parseclj-lex-token-form token)))

;; Reduce based on the top item on the stack (collections)
(cond
((parseclj-lex-leaf-token-p token)
Expand All @@ -204,7 +213,7 @@ functions. Additionally the following options are recognized

;; Reduce based on top two items on the stack (special prefixed elements)
(let* ((top-value (parseclj--take-value stack value-p))
(opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p '(:discard :tag)))
(opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))
new-stack)
(while (and top-value opening-token)
;; (message "Reducing...")
Expand All @@ -214,8 +223,25 @@ functions. Additionally the following options are recognized
(setq new-stack (nthcdr (+ (length top-value) (length opening-token)) stack))
(setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value) options))

;; recur
(setq top-value (parseclj--take-value stack value-p))
(setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p '(:discard :tag)))))
(setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))))

;; Reduce based on top three items on the stack (metadata, namespaced maps)
(let* ((top-value-1 (parseclj--take-value stack value-p))
(top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
(opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
(length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))
new-stack)
(while (and top-value-1 top-value-2 opening-token)
(setq new-stack (nthcdr (apply #'+ (mapcar #'length (list top-value-1 top-value-2 opening-token))) stack))
(setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value-2 top-value-1) options))

;; recur
(setq top-value-1 (parseclj--take-value stack value-p))
(setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
(setq opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
(length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))))

(setq token (parseclj-lex-next)))

Expand Down
43 changes: 37 additions & 6 deletions test/parseclj-lex-test.el
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,25 @@
(:form . "123e34M")
(:pos . 1)))))

(with-temp-buffer
(insert "0xff00AA")
(goto-char 1)
(should (equal (parseclj-lex-next) '((:token-type . :number)
(:form . "0xff00AA")
(:pos . 1)))))

(with-temp-buffer
(insert "#?(:clj 1 :cljs 2)")
(goto-char 1)
(should (equal (parseclj-lex-next)
'((:token-type . :reader-conditional) (:form . "#?") (:pos . 1)))))

(with-temp-buffer
(insert "#?@(:clj [1] :cljs [2])")
(goto-char 1)
(should (equal (parseclj-lex-next)
'((:token-type . :reader-conditional-splice) (:form . "#?@") (:pos . 1)))))

(with-temp-buffer
(insert "123x")
(goto-char 1)
Expand Down Expand Up @@ -203,12 +222,7 @@
(should (equal (parseclj-lex-next) (parseclj-lex-token :number "13" 18)))
(should (equal (parseclj-lex-next) (parseclj-lex-token :whitespace " " 20)))
(should (equal (parseclj-lex-next) (parseclj-lex-token :number "14" 21)))
(should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23))))

(with-temp-buffer
(insert "~")
(goto-char 1)
(should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error "~" 1)))))
(should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23)))))

(ert-deftest parseclj-lex-test-at-number-p ()
(dolist (str '("123" ".9" "+1" "0" "-456"))
Expand Down Expand Up @@ -295,6 +309,12 @@
(goto-char 1)
(should (equal (parseclj-lex-string) (parseclj-lex-token :string "\"abc\\\"\"" 1)))))

(ert-deftest parseclj-lex-test-regex ()
(with-temp-buffer
(insert "#\"abc\"")
(goto-char 1)
(should (equal (parseclj-lex-next) (parseclj-lex-token :regex "#\"abc\"" 1)))))

(ert-deftest parseclj-lex-test-tag ()
(with-temp-buffer
(insert "#inst")
Expand All @@ -306,6 +326,17 @@
(goto-char 1)
(should (equal (parseclj-lex-next) (parseclj-lex-token :tag "#foo/bar" 1)))))

(ert-deftest parseclj-lex-test-quote ()
(with-temp-buffer
(insert "'foo")
(goto-char 1)
(should (equal (parseclj-lex-next) (parseclj-lex-token :quote "'" 1))))

(with-temp-buffer
(insert "`foo")
(goto-char 1)
(should (equal (parseclj-lex-next) (parseclj-lex-token :backquote "`" 1)))))

(provide 'parseclj-lex-test)

;;; parseclj-lex-test.el ends here
Loading