diff --git a/parseclj-ast.el b/parseclj-ast.el index 5719659..e7b3ceb 100644 --- a/parseclj-ast.el +++ b/parseclj-ast.el @@ -133,6 +133,13 @@ on available options." :tag (intern (substring (a-get opening-token :form) 1)) :children children) stack)) + (:metadata (cons (parseclj-ast-node :with-meta + pos + :children children) + stack)) + (:map-prefix (cons (a-assoc (car children) + :map-prefix opening-token) + stack)) (t (cons (parseclj-ast-node type pos :children children) stack))))) diff --git a/parseclj-lex.el b/parseclj-lex.el index 2f5f8f5..5f73d90 100644 --- a/parseclj-lex.el +++ b/parseclj-lex.el @@ -36,6 +36,7 @@ :symbol :keyword :string + :regex :character) "Types of tokens that represent leaf nodes in the AST.") @@ -44,6 +45,22 @@ :rbrace) "Types of tokens that mark the end of a non-atomic form.") +(defvar parseclj-lex--prefix-tokens '(:quote + :backquote + :unquote + :unquote-splice + :discard + :tag + :reader-conditional + :reader-conditional-splice + :var + :deref + :map-prefix) + "Tokens that modify the form that follows.") + +(defvar parseclj-lex--prefix-2-tokens '(:metadata) + "Tokens that modify the two forms that follow.") + ;; Token interface (defun parseclj-lex-token (type form pos &rest attributes) @@ -81,6 +98,11 @@ A token is an association list with :token-type as its first key." (and (consp token) (cdr (assq :token-type token)))) +(defun parseclj-lex-token-form (token) + "Get the form of TOKEN." + (and (consp token) + (cdr (assq :form token)))) + (defun parseclj-lex-leaf-token-p (token) "Return t if the given AST TOKEN is a leaf node." (member (parseclj-lex-token-type token) parseclj-lex--leaf-tokens)) @@ -89,6 +111,9 @@ A token is an association list with :token-type as its first key." "Return t if the given ast TOKEN is a closing token." (member (parseclj-lex-token-type token) parseclj-lex--closing-tokens)) +(defun parseclj-lex-error-p (token) + "Return t if the TOKEN represents a lexing error token." + (eq (parseclj-lex-token-type token) :lex-error)) ;; Elisp values from tokens @@ -177,18 +202,32 @@ S goes through three transformations: (<= (char-after (point)) ?9)) (right-char))) +(defun parseclj-lex-skip-hex () + "Skip all consecutive hex digits after point." + (while (and (char-after (point)) + (or (<= ?0 (char-after (point)) ?9) + (<= ?a (char-after (point)) ?f) + (<= ?A (char-after (point)) ?F))) + (right-char))) + (defun parseclj-lex-skip-number () "Skip a number at point." ;; [\+\-]?\d+\.\d+ - (when (member (char-after (point)) '(?+ ?-)) - (right-char)) + (if (and (eq ?0 (char-after (point))) + (eq ?x (char-after (1+ (point))))) + (progn + (right-char 2) + (parseclj-lex-skip-hex)) + (progn + (when (member (char-after (point)) '(?+ ?-)) + (right-char)) - (parseclj-lex-skip-digits) + (parseclj-lex-skip-digits) - (when (eq (char-after (point)) ?.) - (right-char)) + (when (eq (char-after (point)) ?.) + (right-char)) - (parseclj-lex-skip-digits)) + (parseclj-lex-skip-digits)))) (defun parseclj-lex-number () "Consume a number and return a `:number' token representing it." @@ -270,22 +309,39 @@ are returned as their own lex tokens." ((equal sym "false") (parseclj-lex-token :false "false" pos)) (t (parseclj-lex-token :symbol sym pos)))))) -(defun parseclj-lex-string () - "Return a lex token representing a string. -If EOF is reached without finding a closing double quote, a :lex-error -token is returned." +(defun parseclj-lex-string* () + "Helper for string/regex lexing. +Returns either the string, or an error token" (let ((pos (point))) (right-char) (while (not (or (equal (char-after (point)) ?\") (parseclj-lex-at-eof-p))) (if (equal (char-after (point)) ?\\) (right-char 2) (right-char))) - (if (equal (char-after (point)) ?\") - (progn - (right-char) - (parseclj-lex-token :string (buffer-substring-no-properties pos (point)) pos)) + (when (equal (char-after (point)) ?\") + (right-char) + (buffer-substring-no-properties pos (point))))) + +(defun parseclj-lex-string () + "Return a lex token representing a string. +If EOF is reached without finding a closing double quote, a :lex-error +token is returned." + (let ((pos (point)) + (str (parseclj-lex-string*))) + (if str + (parseclj-lex-token :string str pos) (parseclj-lex-error-token pos :invalid-string)))) +(defun parseclj-lex-regex () + "Return a lex token representing a regular expression. +If EOF is reached without finding a closing double quote, a :lex-error +token is returned." + (let ((pos (1- (point))) + (str (parseclj-lex-string*))) + (if str + (parseclj-lex-token :regex (concat "#" str) pos) + (parseclj-lex-error-token pos :invalid-regex)))) + (defun parseclj-lex-lookahead (n) "Return a lookahead string of N characters after point." (buffer-substring-no-properties (point) (min (+ (point) n) (point-max)))) @@ -351,6 +407,16 @@ See `parseclj-lex-symbol', `parseclj-lex-symbol-start-p'." (right-char)) (parseclj-lex-token :comment (buffer-substring-no-properties pos (point)) pos))) +(defun parseclj-lex-map-prefix () + "Return a lex token representing a map prefix." + (let ((pos (1- (point)))) + (right-char) + (when (equal (char-after (point)) ?:) + (right-char)) + (while (parseclj-lex-symbol-rest-p (char-after (point))) + (right-char)) + (parseclj-lex-token :map-prefix (buffer-substring-no-properties pos (point)) pos))) + (defun parseclj-lex-next () "Consume characters at point and return the next lexical token. @@ -387,6 +453,22 @@ See `parseclj-lex-token'." (right-char) (parseclj-lex-token :rbrace "}" pos)) + ((equal char ?') + (right-char) + (parseclj-lex-token :quote "'" pos)) + + ((equal char ?`) + (right-char) + (parseclj-lex-token :backquote "`" pos)) + + ((equal char ?~) + (right-char) + (if (eq ?@ (char-after (point))) + (progn + (right-char) + (parseclj-lex-token :unquote-splice "~@" pos)) + (parseclj-lex-token :unquote "~" pos))) + ((parseclj-lex-at-number-p) (parseclj-lex-number)) @@ -405,6 +487,14 @@ See `parseclj-lex-token'." ((equal char ?\;) (parseclj-lex-comment)) + ((equal char ?^) + (right-char) + (parseclj-lex-token :metadata "^" pos)) + + ((equal char ?@) + (right-char) + (parseclj-lex-token :deref "@" pos)) + ((equal char ?#) (right-char) (let ((char (char-after (point)))) @@ -415,6 +505,23 @@ See `parseclj-lex-token'." ((equal char ?_) (right-char) (parseclj-lex-token :discard "#_" pos)) + ((equal char ?\() + (right-char) + (parseclj-lex-token :lambda "#(" pos)) + ((equal char ?') + (right-char) + (parseclj-lex-token :var "#'" pos)) + ((equal char ?\") + (parseclj-lex-regex)) + ((equal char ?:) + (parseclj-lex-map-prefix)) + ((equal char ?\?) + (right-char) + (if (eq ?@ (char-after (point))) + (progn + (right-char) + (parseclj-lex-token :reader-conditional-splice "#?@" pos)) + (parseclj-lex-token :reader-conditional "#?" pos))) ((parseclj-lex-symbol-start-p char t) (right-char) (parseclj-lex-token :tag (concat "#" (parseclj-lex-get-symbol-at-point (1+ pos))) pos)) diff --git a/parseclj-parser.el b/parseclj-parser.el index 34ed9d4..ef04e16 100644 --- a/parseclj-parser.el +++ b/parseclj-parser.el @@ -47,7 +47,11 @@ can be handled with `condition-case'." (defun parseclj--find-opening-token (stack closing-token) "Scan STACK for an opening-token matching CLOSING-TOKEN." (cl-case (parseclj-lex-token-type closing-token) - (:rparen :lparen) + (:rparen (parseclj-lex-token-type + (seq-find (lambda (token) + (member (parseclj-lex-token-type token) + '(:lparen :lambda))) + stack))) (:rbracket :lbracket) (:rbrace (parseclj-lex-token-type (seq-find (lambda (token) @@ -192,6 +196,11 @@ functions. Additionally the following options are recognized ;; (message "STACK: %S" stack) ;; (message "TOKEN: %S\n" token) + (when (and fail-fast (parseclj-lex-error-p token)) + (parseclj--error "Invalid token at %s: %S" + (a-get token :pos) + (parseclj-lex-token-form token))) + ;; Reduce based on the top item on the stack (collections) (cond ((parseclj-lex-leaf-token-p token) @@ -204,7 +213,7 @@ functions. Additionally the following options are recognized ;; Reduce based on top two items on the stack (special prefixed elements) (let* ((top-value (parseclj--take-value stack value-p)) - (opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p '(:discard :tag))) + (opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens)) new-stack) (while (and top-value opening-token) ;; (message "Reducing...") @@ -214,8 +223,25 @@ functions. Additionally the following options are recognized (setq new-stack (nthcdr (+ (length top-value) (length opening-token)) stack)) (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value) options)) + ;; recur (setq top-value (parseclj--take-value stack value-p)) - (setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p '(:discard :tag))))) + (setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens)))) + + ;; Reduce based on top three items on the stack (metadata, namespaced maps) + (let* ((top-value-1 (parseclj--take-value stack value-p)) + (top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p)) + (opening-token (parseclj--take-token (nthcdr (+ (length top-value-1) + (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens)) + new-stack) + (while (and top-value-1 top-value-2 opening-token) + (setq new-stack (nthcdr (apply #'+ (mapcar #'length (list top-value-1 top-value-2 opening-token))) stack)) + (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value-2 top-value-1) options)) + + ;; recur + (setq top-value-1 (parseclj--take-value stack value-p)) + (setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p)) + (setq opening-token (parseclj--take-token (nthcdr (+ (length top-value-1) + (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens)))) (setq token (parseclj-lex-next))) diff --git a/test/parseclj-lex-test.el b/test/parseclj-lex-test.el index 183c6ca..289fa72 100644 --- a/test/parseclj-lex-test.el +++ b/test/parseclj-lex-test.el @@ -52,6 +52,25 @@ (:form . "123e34M") (:pos . 1))))) + (with-temp-buffer + (insert "0xff00AA") + (goto-char 1) + (should (equal (parseclj-lex-next) '((:token-type . :number) + (:form . "0xff00AA") + (:pos . 1))))) + + (with-temp-buffer + (insert "#?(:clj 1 :cljs 2)") + (goto-char 1) + (should (equal (parseclj-lex-next) + '((:token-type . :reader-conditional) (:form . "#?") (:pos . 1))))) + + (with-temp-buffer + (insert "#?@(:clj [1] :cljs [2])") + (goto-char 1) + (should (equal (parseclj-lex-next) + '((:token-type . :reader-conditional-splice) (:form . "#?@") (:pos . 1))))) + (with-temp-buffer (insert "123x") (goto-char 1) @@ -203,12 +222,7 @@ (should (equal (parseclj-lex-next) (parseclj-lex-token :number "13" 18))) (should (equal (parseclj-lex-next) (parseclj-lex-token :whitespace " " 20))) (should (equal (parseclj-lex-next) (parseclj-lex-token :number "14" 21))) - (should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23)))) - - (with-temp-buffer - (insert "~") - (goto-char 1) - (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error "~" 1))))) + (should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23))))) (ert-deftest parseclj-lex-test-at-number-p () (dolist (str '("123" ".9" "+1" "0" "-456")) @@ -295,6 +309,12 @@ (goto-char 1) (should (equal (parseclj-lex-string) (parseclj-lex-token :string "\"abc\\\"\"" 1))))) +(ert-deftest parseclj-lex-test-regex () + (with-temp-buffer + (insert "#\"abc\"") + (goto-char 1) + (should (equal (parseclj-lex-next) (parseclj-lex-token :regex "#\"abc\"" 1))))) + (ert-deftest parseclj-lex-test-tag () (with-temp-buffer (insert "#inst") @@ -306,6 +326,17 @@ (goto-char 1) (should (equal (parseclj-lex-next) (parseclj-lex-token :tag "#foo/bar" 1))))) +(ert-deftest parseclj-lex-test-quote () + (with-temp-buffer + (insert "'foo") + (goto-char 1) + (should (equal (parseclj-lex-next) (parseclj-lex-token :quote "'" 1)))) + + (with-temp-buffer + (insert "`foo") + (goto-char 1) + (should (equal (parseclj-lex-next) (parseclj-lex-token :backquote "`" 1))))) + (provide 'parseclj-lex-test) ;;; parseclj-lex-test.el ends here diff --git a/test/parseclj-test.el b/test/parseclj-test.el index 190922a..a25d2e4 100644 --- a/test/parseclj-test.el +++ b/test/parseclj-test.el @@ -185,6 +185,85 @@ ((:node-type . :whitespace) (:position . 6) (:form . " ")) ((:node-type . :number) (:position . 7) (:form . "5") (:value . 5))))))))) +(ert-deftest parseclj--parse-reader-conditionals-test () + (should (equal + (parseclj-parse-clojure "#?(:clj 1 :cljs 2)") + '((:node-type . :root) + (:position . 1) + (:children ((:node-type . :reader-conditional) + (:position . 1) + (:children ((:node-type . :list) + (:position . 3) + (:children ((:node-type . :keyword) + (:position . 4) + (:form . ":clj") + (:value . :clj)) + ((:node-type . :number) + (:position . 9) + (:form . "1") + (:value . 1)) + ((:node-type . :keyword) + (:position . 11) + (:form . ":cljs") + (:value . :cljs)) + ((:node-type . :number) + (:position . 17) + (:form . "2") + (:value . 2))))))))))) + +(ert-deftest parseclj--parse-metadata-test () + (should (equal + (parseclj-parse-clojure "^{} [123]") + '((:node-type . :root) + (:position . 1) + (:children ((:node-type . :with-meta) + (:position . 1) + (:children ((:node-type . :map) + (:position . 2) + (:children)) + ((:node-type . :vector) + (:position . 5) + (:children ((:node-type . :number) + (:position . 6) + (:form . "123") + (:value . 123))))))))))) + +(ert-deftest parseclj--parse-var-test () + (should (equal + (parseclj-parse-clojure "#'foo") + '((:node-type . :root) + (:position . 1) + (:children ((:node-type . :var) + (:position . 1) + (:children ((:node-type . :symbol) + (:position . 3) + (:form . "foo") + (:value . foo))))))))) + +(ert-deftest parseclj--parse-lambda-test () + (should (equal + (parseclj-parse-clojure "#(foo)") + '((:node-type . :root) + (:position . 1) + (:children ((:node-type . :lambda) + (:position . 1) + (:children ((:node-type . :symbol) + (:position . 3) + (:form . "foo") + (:value . foo))))))))) + +(ert-deftest parseclj--parse-namespaced-map-test () + (should (equal + (parseclj-parse-clojure "#:foo.bar{}") + '((:node-type . :root) + (:position . 1) + (:children ((:map-prefix . ((:token-type . :map-prefix) + (:form . "#:foo.bar") + (:pos . 1))) + (:node-type . :map) + (:position . 10) + (:children))))))) + (ert-deftest parseclj--take-token-test () (should (equal (parseclj--take-token