diff --git a/.github/workflows/langs.yml b/.github/workflows/langs.yml index 63778254..73a4b137 100644 --- a/.github/workflows/langs.yml +++ b/.github/workflows/langs.yml @@ -6,17 +6,21 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04, ubuntu-20.04] + os: [ubuntu-20.04, ubuntu-22.04] racket-variant: ['BC', 'CS'] - racket-version: ['7.8', '7.9', '8.0', '8.1', '8.2'] + racket-version: ['8.6', '8.8'] name: Test on Racket ${{ matrix.racket-variant }} ${{ matrix.racket-version }} on ${{ matrix.os }} steps: - name: Checkout uses: actions/checkout@main - name: Install nasm run: sudo apt-get install nasm + - name: Install libunistring + run: | + sudo apt-get install libunistring2 + sudo apt-get install libunistring-dev - name: Install Racket - uses: Bogdanp/setup-racket@v1.5 + uses: Bogdanp/setup-racket@v1.10 with: architecture: 'x64' distribution: 'full' @@ -25,8 +29,13 @@ jobs: - name: Version info run: | nasm --version - gcc --version + gcc --version - name: Install langs package - run: raco pkg install langs/ + run: | + raco pkg install --auto ziggy/ + raco pkg install langs/ - name: Run tests - run: raco test -p langs + run: | + raco test -p ziggy + xvfb-run raco test -p langs + raco test -c outlaw diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index fd20c688..b9b0d980 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -1,5 +1,5 @@ on: - - push + - workflow_dispatch jobs: build-and-test: diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 352d8fc6..d26695cd 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -1,5 +1,5 @@ on: - - push + - workflow_dispatch jobs: build-and-test: @@ -13,15 +13,17 @@ jobs: sudo dpkg -i pandoc.deb sudo apt-get install nasm sudo apt-get install fonts-stix + sudo apt-get install libunistring-dev - name: Install Racket - uses: Bogdanp/setup-racket@v1.5 + uses: Bogdanp/setup-racket@v1.10 with: architecture: 'x64' distribution: 'full' variant: 'CS' - version: '8.1' + version: '8.6' - name: Build and test run: | + export LINK_DIR=/usr/lib/x86_64-linux-gnu raco pkg install langs/ raco make www/main.scrbl raco test langs @@ -45,7 +47,7 @@ jobs: uses: plum-umd/github-actions-rsync@master with: RSYNC_OPTIONS: -rvzp - RSYNC_TARGET: /fs/www/class/fall2021/ + RSYNC_TARGET: /fs/www/class/fall2022/ RSYNC_SOURCE: cmsc430 env: SSH_PRIVATE_KEY: ${{secrets.SSH_PRIVATE_KEY}} diff --git a/README.md b/README.md index 5f91e951..057063f5 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,8 @@ University of Maryland, College Park. The current instance of this course is: -* http://www.cs.umd.edu/class/fall2021/cmsc430/ +* http://www.cs.umd.edu/class/summer2023/cmsc430/ -Copyright © 2019 David Van Horn - -Copyright © 2020 David Van Horn and José Manuel Calderón Trilla +Copyright © David Van Horn and José Manuel Calderón Trilla and Leonidas Lampropoulos Licensed under the Academic Free License version 3.0 diff --git a/langs/a86/ast.rkt b/langs/a86/ast.rkt index 1205e729..cb4ddea1 100644 --- a/langs/a86/ast.rkt +++ b/langs/a86/ast.rkt @@ -7,77 +7,115 @@ ;; with decent error messages. (define check:label-symbol - (λ (x n) + (λ (a x n) (when (register? x) (error n "cannot use register as label name; given ~v" x)) (unless (symbol? x) (error n "expects symbol; given ~v" x)) - x)) + (unless (label? x) + (error n "label names must conform to nasm restrictions")) + (values a x))) + +(define check:label-symbol+integer + (λ (a x c n) + (check:label-symbol x n) + (unless (integer? c) + (error n "expects integer constant; given ~v" c)) + (values a x c))) (define check:target - (λ (x n) + (λ (a x n) (unless (or (symbol? x) (offset? x)); either register or label (error n "expects symbol; given ~v" x)) - x)) + (values a x))) + +(define check:cmov + (λ (a a1 a2 n) + (unless (register? a1) + (error n "expects register; given ~v" a1)) + (unless (or (register? a2) (offset? a2)) + (error n "expects register or offset; given ~v" a2)) + (values a a1 a2))) (define check:arith - (λ (a1 a2 n) + (λ (a a1 a2 n) (unless (register? a1) (error n "expects register; given ~v" a1)) (unless (or (exact-integer? a2) (register? a2) (offset? a2)) (error n "expects exact integer, register, or offset; given ~v" a2)) - (values a1 a2))) + (when (and (exact-integer? a2) (> (integer-length a2) 32)) + (error n "literal must not exceed 32-bits; given ~v (~v bits); go through a register instead" a2 (integer-length a2))) + (values a a1 a2))) (define check:register - (λ (a1 n) + (λ (a a1 n) (unless (register? a1) (error n "expects register; given ~v" a1)) - a1)) + (values a a1))) (define check:src-dest - (λ (a1 a2 n) + (λ (a a1 a2 n) (unless (or (register? a1) (offset? a1)) (error n "expects register or offset; given ~v" a1)) - (unless (or (register? a2) (offset? a2) (exact-integer? a2)) - (error n "expects register, offset, or exact integer; given ~v" a2)) + (unless (or (register? a2) (offset? a2) (exact-integer? a2) (Const? a2)) + (error n "expects register, offset, exact integer, or defined constant; given ~v" a2)) (when (and (offset? a1) (offset? a2)) (error n "cannot use two memory locations; given ~v, ~v" a1 a2)) + (when (and (exact-integer? a2) (> (integer-length a2) 32)) + (error n "literal must not exceed 32-bits; given ~v (~v bits); go through a register instead" a2 (integer-length a2))) (when (and (offset? a1) (exact-integer? a2)) (error n "cannot use a memory locations and literal; given ~v, ~v; go through a register instead" a1 a2)) - (values a1 a2))) + (values a a1 a2))) + +(define check:mov + (λ (a a1 a2 n) + (unless (or (register? a1) (offset? a1)) + (error n "expects register or offset; given ~v" a1)) + (unless (or (register? a2) (offset? a2) (exact-integer? a2) (Const? a2)) + (error n "expects register, offset, exact integer, or defined constant; given ~v" a2)) + (when (and (offset? a1) (offset? a2)) + (error n "cannot use two memory locations; given ~v, ~v" a1 a2)) + (when (and (exact-integer? a2) (> (integer-length a2) 64)) + (error n "literal must not exceed 64-bits; given ~v (~v bits)" a2 (integer-length a2))) + (when (and (offset? a1) (exact-integer? a2)) + (error n "cannot use a memory locations and literal; given ~v, ~v; go through a register instead" a1 a2)) + (values a a1 a2))) (define check:shift - (λ (a1 a2 n) + (λ (a a1 a2 n) (unless (register? a1) (error n "expects register; given ~v" a1)) - (unless (and (exact-integer? a2) (<= 0 a2 63)) + (unless (or (and (exact-integer? a2) (<= 0 a2 63)) + (eq? 'cl a2)) (error n "expects exact integer in [0,63]; given ~v" a2)) - (values a1 a2))) + (values a a1 a2))) (define check:offset - (λ (r i n) + (λ (a r i n) (unless (or (register? r) (label? r)) (error n "expects register or label as first argument; given ~v" r)) (unless (exact-integer? i) (error n "expects exact integer as second argument; given ~v" i)) - (values r i))) + (values a r i))) (define check:push - (λ (a1 n) + (λ (a a1 n) (unless (or (exact-integer? a1) (register? a1)) (error n "expects exact integer or register; given ~v" a1)) - a1)) + (when (and (exact-integer? a1) (> (integer-length a1) 32)) + (error n "literal must not exceed 32-bits; given ~v (~v bits); go through a register instead" a1 (integer-length a1))) + (values a a1))) (define check:lea - (λ (dst x n) + (λ (a dst x n) (unless (or (register? dst) (offset? dst)) (error n "expects register or offset; given ~v" dst)) - (unless (or (label? x) (offset? x)) - (error n "expects label or offset; given ~v" x)) - (values dst x))) + (unless (or (label? x) (offset? x) (exp? x)) + (error n "expects label, offset, or expression; given ~v" x)) + (values a dst x))) (define check:none - (λ (n) (values))) + (λ (a n) (values a))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Comments @@ -102,18 +140,74 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Instructions -(define-syntax-rule - (instruct Name (x ...) guard) - (begin (provide (struct-out Name)) - (struct Name (x ...) - #:transparent - #:guard guard))) +(require racket/struct) +(define current-annotation (make-parameter #f)) +(provide instruction-annotation current-annotation) + +(struct instruction (annotation)) + +(define-syntax (instruct stx) + (syntax-case stx () + [(instruct Name (x ...) guard) + (with-syntax ([Name? (datum->syntax stx (string->symbol (string-append (symbol->string (syntax->datum #'Name)) "?")))]) + #'(begin (provide Name Name?) + (define-match-expander Name + (lambda (stx) + (syntax-case stx () + [(_ elts (... ...)) + #'(%Name _ elts (... ...))])) + (lambda (stx) + (syntax-case stx () + [m (identifier? #'m) #'(λ (x ...) (%Name (current-annotation) x ...))] + [(m x ...) #'(%Name (current-annotation) x ...)]))) + (struct %Name instruction (x ...) + #:reflection-name 'Name + #:transparent + #:guard guard + #:methods gen:equal+hash + [(define equal-proc (λ (i1 i2 equal?) + (equal? (struct->vector i1) + (struct->vector i2)))) + (define hash-proc (λ (i hash) (hash (struct->vector i)))) + (define hash2-proc (λ (i hash) (hash (struct->vector i))))] + + #:property prop:custom-print-quotable 'never + #:methods gen:custom-write + [(define write-proc + (instr-print 'Name) + #;(make-constructor-style-printer + (lambda (obj) 'Name) + (lambda (obj) + (rest (rest (vector->list (struct->vector obj)))))))]) + (define Name? %Name?)))])) + +(define (instr-print type) + (lambda (instr port mode) + (if (number? mode) + (write-string "(" port) + (write-string "#(struct:" port)) + (write-string (symbol->string type) port) + (let ([recur (case mode + [(#t) write] + [(#f) display] + [else (lambda (p port) (print p port mode))])]) + (for-each (lambda (e) + (write-string " " port) + (recur e port)) + (rest (rest (vector->list (struct->vector instr)))))) + (if (number? mode) + (write-string ")" port) + (write-string ")" port)))) + + +(instruct Text () check:none) +(instruct Data () check:none) (instruct Global (x) check:label-symbol) (instruct Label (x) check:label-symbol) (instruct Call (x) check:target) (instruct Ret () check:none) -(instruct Mov (dst src) check:src-dest) +(instruct Mov (dst src) check:mov) (instruct Add (dst src) check:arith) (instruct Sub (dst src) check:arith) (instruct Cmp (a1 a2) check:src-dest) @@ -121,64 +215,94 @@ (instruct Je (x) check:target) (instruct Jne (x) check:target) (instruct Jl (x) check:target) +(instruct Jle (x) check:target) (instruct Jg (x) check:target) +(instruct Jge (x) check:target) +(instruct Jo (x) check:target) +(instruct Jno (x) check:target) +(instruct Jc (x) check:target) +(instruct Jnc (x) check:target) +(instruct Cmove (dst src) check:cmov) +(instruct Cmovne (dst src) check:cmov) +(instruct Cmovl (dst src) check:cmov) +(instruct Cmovle (dst src) check:cmov) +(instruct Cmovg (dst src) check:cmov) +(instruct Cmovge (dst src) check:cmov) +(instruct Cmovo (dst src) check:cmov) +(instruct Cmovno (dst src) check:cmov) +(instruct Cmovc (dst src) check:cmov) +(instruct Cmovnc (dst src) check:cmov) (instruct And (dst src) check:src-dest) (instruct Or (dst src) check:src-dest) (instruct Xor (dst src) check:src-dest) (instruct Sal (dst i) check:shift) (instruct Sar (dst i) check:shift) (instruct Push (a1) check:push) +(instruct Pushf () check:none) +(instruct Popf () check:none) (instruct Pop (a1) check:register) (instruct Lea (dst x) check:lea) +(instruct Not (x) check:register) +(instruct Div (den) check:register) -(instruct Offset (r i) check:offset) +(instruct Offset (r i) check:offset) ;; May need to make this not an instruction (instruct Extern (x) check:label-symbol) -(provide offset? register? instruction? label?) +(instruct Equ (x v) check:label-symbol+integer) +(instruct Const (x) check:label-symbol) + +;; IMPROVE: do more checking +(instruct Db (x) (lambda (a x n) (values a x))) +(instruct Dw (x) (lambda (a x n) (values a x))) +(instruct Dd (x) (lambda (a x n) (values a x))) +(instruct Dq (x) (lambda (a x n) (values a x))) + +(provide (struct-out Plus)) +(struct Plus (e1 e2) #:transparent) + +(provide exp?) +(define (exp? x) + (or (Offset? x) + (and (Plus? x) + (exp? (Plus-e1 x)) + (exp? (Plus-e2 x))) + (symbol? x) + (integer? x))) + +(provide offset? register? label? 64-bit-integer? 32-bit-integer?) (define offset? Offset?) (define (register? x) - (and (memq x '(eax rax rbx rcx rdx rbp rsp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15)) + (and (memq x '(cl eax rax rbx rcx rdx rbp rsp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15)) #t)) +(define (64-bit-integer? x) + (and (exact-integer? x) + (<= (integer-length x) 64))) + +(define (32-bit-integer? x) + (and (exact-integer? x) + (<= (integer-length x) 32))) + (define (label? x) (and (symbol? x) + (nasm-label? x) (not (register? x)))) -(define (instruction? x) - (or (Global? x) - (Label? x) - (Extern? x) - (Call? x) - (Ret? x) - (Mov? x) - (Add? x) - (Sub? x) - (Cmp? x) - (Jmp? x) - (Je? x) - (Jne? x) - (Jl? x) - (Jg? x) - (And? x) - (Or? x) - (Xor? x) - (Sal? x) - (Sar? x) - (Push? x) - (Pop? x) - (Lea? x) +(provide (rename-out [a86:instruction? instruction?])) +(define (a86:instruction? x) + (or (instruction? x) (Comment? x))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Instruction sequencing and program error checking (provide/contract - [seq (-> (or/c instruction? (listof instruction?)) ... - (listof instruction?))] - [prog (-> (or/c instruction? (listof instruction?)) ... - (listof instruction?))]) + [seq (-> (or/c a86:instruction? (listof a86:instruction?)) ... + (listof a86:instruction?))] + [prog (-> (or/c a86:instruction? (listof a86:instruction?)) ... + (listof a86:instruction?))]) ;; (U Instruction Asm) ... -> Asm ;; Convenient for sequencing instructions or groups of instructions @@ -198,6 +322,7 @@ (check-unique-label-decls p) (check-label-targets-declared p) (check-has-initial-label p) + (check-initial-label-global p) ;; anything else? p)) @@ -219,28 +344,32 @@ [(cons _ asm) (label-decls asm)])) -(define (label-symbol? x) - (and (symbol? x) - (not (register? x)))) +;; Symbol -> Boolean +(define (nasm-label? s) + (regexp-match #rx"^[a-zA-Z._?][a-zA-Z0-9_$#@~.?]*$" (symbol->string s))) ;; Asm -> (Listof Symbol) ;; Compute all uses of label names (define (label-uses asm) (match asm ['() '()] - [(cons (Jmp (? label-symbol? s)) asm) + [(cons (Jmp (? label? s)) asm) (cons s (label-uses asm))] - [(cons (Je (? label-symbol? s)) asm) + [(cons (Je (? label? s)) asm) (cons s (label-uses asm))] - [(cons (Jne (? label-symbol? s)) asm) + [(cons (Jne (? label? s)) asm) (cons s (label-uses asm))] - [(cons (Jg (? label-symbol? s)) asm) + [(cons (Jg (? label? s)) asm) (cons s (label-uses asm))] - [(cons (Jl (? label-symbol? s)) asm) + [(cons (Jge (? label? s)) asm) (cons s (label-uses asm))] - [(cons (Call (? label-symbol? s)) asm) + [(cons (Jl (? label? s)) asm) (cons s (label-uses asm))] - [(cons (Lea _ (? label-symbol? s)) asm) + [(cons (Jle (? label? s)) asm) + (cons s (label-uses asm))] + [(cons (Call (? label? s)) asm) + (cons s (label-uses asm))] + [(cons (Lea _ (? label? s)) asm) (cons s (label-uses asm))] [(cons _ asm) (label-uses asm)])) @@ -259,3 +388,34 @@ (define (check-has-initial-label asm) (unless (findf Label? asm) (error 'prog "no initial label found"))) + +;; Asm -> Void +(define (check-initial-label-global asm) + (match (findf Label? asm) + [(Label init) + (unless (member init (map (lambda (i) (match i [(Global l) l])) + (filter Global? asm))) + (error 'prog "initial label undeclared as global: ~v" init))])) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Symbol to Label + +;; Symbol -> Label +;; Produce a symbol that is a valid Nasm label +;; Guarantees that (eq? s1 s2) <=> (eq? (symbol->label s1) (symbol->label s1)) +(provide symbol->label) +(define (symbol->label s) + (string->symbol + (string-append + "label_" + (list->string + (map (λ (c) + (if (or (char<=? #\a c #\z) + (char<=? #\A c #\Z) + (char<=? #\0 c #\9) + (memq c '(#\_ #\$ #\# #\@ #\~ #\. #\?))) + c + #\_)) + (string->list (symbol->string s)))) + "_" + (number->string (eq-hash-code s) 16)))) diff --git a/langs/a86/check-nasm.rkt b/langs/a86/check-nasm.rkt new file mode 100644 index 00000000..aec72cc7 --- /dev/null +++ b/langs/a86/check-nasm.rkt @@ -0,0 +1,39 @@ +#lang racket +(provide check-nasm-available) +(require racket/gui/dynamic) + +(define nasm-msg + #<path "/") (find-system-path 'orig-dir))) + +(define (drracket?) + (gui-available?)) + +(define (check-nasm-available) + (unless (parameterize ([current-output-port (open-output-string)] + [current-error-port (open-output-string)]) + (system "nasm -v")) + (error (format nasm-msg + (getenv "PATH") + (if (and (drracket?) (macos?) (launched-with-finder?)) macosx-msg ""))))) \ No newline at end of file diff --git a/langs/a86/interp.rkt b/langs/a86/interp.rkt index e9390edd..295876d0 100644 --- a/langs/a86/interp.rkt +++ b/langs/a86/interp.rkt @@ -4,10 +4,20 @@ [asm-interp (-> (listof instruction?) any/c)] [asm-interp/io (-> (listof instruction?) string? any/c)]) -(require "printer.rkt" "ast.rkt" "callback.rkt" - (rename-in ffi/unsafe [-> _->])) +(define-logger a86) + +(require "printer.rkt" "ast.rkt" "callback.rkt" "check-nasm.rkt" + (rename-in ffi/unsafe [-> _->])) (require (submod "printer.rkt" private)) +;; Check NASM availability when required to fail fast. +(check-nasm-available) + +(define *debug*? + (let ((r (getenv "PLTSTDERR"))) + (and r + (string=? r "info@a86")))) + ;; Assembly code is linked with object files in this parameter (define current-objs (make-parameter '())) @@ -29,10 +39,43 @@ (define fmt (if (eq? (system-type 'os) 'macosx) 'macho64 'elf64)) +;; WARNING: The heap is re-used, so make sure you're done with it +;; before calling asm-interp again +(define *heap* + ; IMPROVE ME: hard-coded heap size + (malloc _int64 20000 'raw)) + + +;; Integer64 -> String +(define (int64->binary-string n) + (format "#b~a" + (~r n #:base 2 #:min-width 64 #:pad-string "0"))) + +;; Integer64 -> String +(define (int64->octal-string n) + (format "#o~a" + (~r n #:base 8 #:min-width 22 #:pad-string "0"))) + +;; Integer64 +(define (int64->hex-string n) + (format "#x~a" + (~r n #:base 16 #:min-width 16 #:pad-string "0"))) + +(define (show-state . regs) + (format "\n~a" + (map (lambda (r v) + (format "(~a ~a)" r (int64->hex-string v))) + '(rax rbx rcx rdx rbp rsp rsi rdi + r8 r9 r10 r11 r12 r13 r14 r15 instr flags) + regs))) + ;; Asm String -> (cons Value String) ;; Like asm-interp, but uses given string for input and returns ;; result with string output (define (asm-interp/io a input) + + (log-a86-info (~v a)) + (define t.s (make-temporary-file "nasm~a.s")) (define t.o (path-replace-extension t.s #".o")) (define t.so (path-replace-extension t.s #".so")) @@ -43,11 +86,13 @@ #:exists 'truncate (λ () (parameterize ((current-shared? #t)) - (displayln (asm-string a))))) + (asm-display (if *debug*? + (debug-transform a) + a))))) (nasm t.s t.o) (ld t.o t.so) - + (define libt.so (ffi-lib t.so)) (define init-label @@ -66,15 +111,28 @@ (set-ffi-obj! "error_handler" libt.so _pointer (function-ptr (λ () (raise 'err)) (_fun _-> _void)))) + (when *debug*? + (define log (ffi-obj-ref log-label libt.so (thunk #f))) + (when log + (set-ffi-obj! log-label libt.so _pointer + (function-ptr + (λ () (log-a86-info + (apply show-state + (build-list 18 (lambda (i) (ptr-ref log _int64 (add1 i))))))) + (_fun _-> _void))))) + + (define has-heap? #f) - (define current-heap #f) - ;; allocate a heap (when (ffi-obj-ref "heap" libt.so (thunk #f)) - (set! current-heap (make-c-parameter "heap" libt.so _pointer)) - (current-heap - ; IMPROVE ME: hard-coded heap size - (malloc _int64 10000 'raw))) - + (set! has-heap? #t) + + ;; This is a GC-enabled run-time so set from, to, and types space + (when (ffi-obj-ref "from" libt.so (thunk #f)) + ;; FIXME: leaks types memory + (set-ffi-obj! "from" libt.so _pointer *heap*) + (set-ffi-obj! "to" libt.so _pointer (ptr-add *heap* 10000 _int64)) + (set-ffi-obj! "types" libt.so _pointer (malloc _int32 10000)))) + (delete-file t.s) (delete-file t.o) (delete-file t.so) @@ -96,15 +154,9 @@ (current-out (fopen t.out "w")) (define result - (begin0 - (with-handlers ((symbol? identity)) - (guard-foreign-escape - (if current-heap - (cons (current-heap) (entry (current-heap))) - (entry #f)))) - #; - (when current-heap - (free (current-heap))))) + (with-handlers ((symbol? identity)) + (guard-foreign-escape + (entry *heap*)))) (fflush (current-out)) (fclose (current-in)) @@ -114,16 +166,10 @@ (delete-file t.in) (delete-file t.out) (cons result output)) - - (begin0 - (with-handlers ((symbol? identity)) - (guard-foreign-escape - (if current-heap - (cons (current-heap) (entry (current-heap))) - (entry #f)))) - #; - (when current-heap - (free (current-heap)))))) + + (with-handlers ((symbol? identity)) + (guard-foreign-escape + (entry *heap*))))) (define (string-splice xs) @@ -157,7 +203,7 @@ (define (ld:undef-symbol s) (ld:error - (string-append + (string-append (format "symbol ~a not defined in linked objects: ~a\n" s (current-objs)) "use `current-objs` to link in object containing symbol definition."))) @@ -169,7 +215,7 @@ (if (eq? (system-type 'os) 'macosx) "" "-z defs ")) - (unless (parameterize ((current-error-port err-port)) + (unless (parameterize ((current-error-port err-port)) (system (format "gcc ~a-v -shared ~a ~a -o ~a" -z-defs-maybe t.o objs t.so))) @@ -179,3 +225,69 @@ (regexp-match #rx"undefined reference to `(.*)'" err-msg)) ; linux [(list _ symbol) (ld:undef-symbol symbol)] [_ (ld:error (format "unknown link error.\n\n~a" err-msg))]))) + + + +;; Debugging facilities + +(define log-label (symbol->label (gensym 'log))) + +(define (Log i) + (seq (save-registers) + (Pushf) + (Mov 'rax i) + (Mov (Offset log-label (* 8 17)) 'rax) + (Mov 'rax (Offset 'rsp 0)) + (Mov (Offset log-label (* 8 18)) 'rax) + (Call (Offset log-label 0)) + (Popf) + (restore-registers))) + +(define (instrument is) + (for/fold ([ls '()] + #:result (reverse ls)) + ([idx (in-naturals)] + [ins (in-list is)]) + (if (serious-instruction? ins) + (seq ins (reverse (Log idx)) ls) + (seq ins ls)))) + +(define (serious-instruction? ins) + (match ins + [(Label _) #f] + [(Global _) #f] + [(? Comment?) #f] + [_ #t])) + +(define (debug-transform is) + (seq (instrument is) + ;; End of user program + (Data) + (Global log-label) + (Label log-label) + (Dq 0) ; callback placeholder + (static-alloc-registers) + (Dq 0) ; index of instruction + (Dq 0) ; flags + )) + +(define registers + '(rax rbx rcx rdx rbp rsp rsi rdi + r8 r9 r10 r11 r12 r13 r14 r15)) + +(define (static-alloc-registers) + (apply seq + (map (λ (r) (seq (Dq 0) (% (~a r)))) + registers))) + +(define (save-registers) + (apply seq + (map (λ (r i) (seq (Mov (Offset log-label (* 8 i)) r))) + registers + (build-list (length registers) add1)))) + +(define (restore-registers) + (apply seq + (map (λ (r i) (seq (Mov r (Offset log-label (* 8 i))))) + registers + (build-list (length registers) add1)))) diff --git a/langs/a86/printer.rkt b/langs/a86/printer.rkt index 664e6958..f6e9a4bc 100644 --- a/langs/a86/printer.rkt +++ b/langs/a86/printer.rkt @@ -1,6 +1,7 @@ #lang racket (provide/contract - [asm-string (-> (listof instruction?) string?)]) + [asm-string (-> (listof instruction?) string?)] ; deprecated + [asm-display (-> (listof instruction?) any)]) (define current-shared? (make-parameter #f)) @@ -10,16 +11,6 @@ (require "ast.rkt") -;; Arg -> String -(define (arg->string a) - (match a - [(? reg?) (reg->string a)] - [(? integer?) (number->string a)] - [(Offset (? reg? r) i) - (string-append "[" (reg->string r) " + " (number->string i) "]")] - [(Offset (? label? l) i) - (string-append "[" (symbol->string l) " + " (number->string i) "]")])) - ;; Any -> Boolean (define (reg? x) (register? x)) @@ -30,6 +21,10 @@ ;; Asm -> String (define (asm-string a) + (with-output-to-string (lambda () (asm-display a)))) + +;; Asm -> Void +(define (asm-display a) (define external-labels '()) ;; Label -> String @@ -54,13 +49,48 @@ [(? reg?) (reg->string t)] [(Offset (? reg? r) i) (string-append "[" (reg->string r) " + " (number->string i) "]")] + [(Offset (? label? l) i) + (string-append "[" (label-symbol->string l) " + " (number->string i) "]")] [_ (label-symbol->string t)])) + ;; Arg -> String + (define (arg->string a) + (match a + [(? reg?) (reg->string a)] + [(? integer?) (number->string a)] + [(Offset (? reg? r) i) + (string-append "[" (reg->string r) " + " (number->string i) "]")] + [(Offset (? label? l) i) + (string-append "[" (label-symbol->string l) " + " (number->string i) "]")] + [(Const l) + (symbol->string l)] + [(? exp?) (exp->string a)])) + + ;; Exp -> String + (define (exp->string e) + (match e + [(? integer?) (number->string e)] + [(Plus e1 e2) + (string-append "(" (exp->string e1) " + " (exp->string e2) ")")] + [_ (label-symbol->string e)])) + (define tab (make-string 8 #\space)) + + ;; Instruction -> String + (define (fancy-instr->string i) + (let ((s (simple-instr->string i))) + (if (instruction-annotation i) + (if (< (string-length s) 40) + (format "~a~a; ~.s" s (make-string (- 40 (string-length s)) #\space) (instruction-annotation i)) + (format "~a ; ~.s" s (instruction-annotation i))) + s))) + ;; Instruction -> String - (define (instr->string i) + (define (simple-instr->string i) (match i + [(Text) (string-append tab "section .text")] + [(Data) (string-append tab "section .data align=8")] ; 8-byte aligned data [(Ret) (string-append tab "ret")] [(Label l) (string-append (label-symbol->string l) ":")] [(Global x) (string-append tab "global " (label-symbol->string x))] @@ -114,15 +144,77 @@ [(Jl l) (string-append tab "jl " (jump-target->string l))] + [(Jle l) + (string-append tab "jle " + (jump-target->string l))] [(Jg l) (string-append tab "jg " (jump-target->string l))] + [(Jge l) + (string-append tab "jge " + (jump-target->string l))] + [(Jo l) + (string-append tab "jo " + (jump-target->string l))] + [(Jno l) + (string-append tab "jno " + (jump-target->string l))] + [(Jc l) + (string-append tab "jc " + (jump-target->string l))] + [(Jnc l) + (string-append tab "jnc " + (jump-target->string l))] + [(Cmove dst src) + (string-append tab "cmove " + (reg->string dst) ", " + (arg->string src))] + [(Cmovne dst src) + (string-append tab "cmovne " + (reg->string dst) ", " + (arg->string src))] + [(Cmovl dst src) + (string-append tab "cmovl " + (reg->string dst) ", " + (arg->string src))] + [(Cmovle dst src) + (string-append tab "cmovle " + (reg->string dst) ", " + (arg->string src))] + [(Cmovg dst src) + (string-append tab "cmovg " + (reg->string dst) ", " + (arg->string src))] + [(Cmovge dst src) + (string-append tab "cmovge " + (reg->string dst) ", " + (arg->string src))] + [(Cmovo dst src) + (string-append tab "cmovo " + (reg->string dst) ", " + (arg->string src))] + [(Cmovno dst src) + (string-append tab "cmovno " + (reg->string dst) ", " + (arg->string src))] + [(Cmovc dst src) + (string-append tab "cmovc " + (reg->string dst) ", " + (arg->string src))] + [(Cmovnc dst src) + (string-append tab "cmovnc " + (reg->string dst) ", " + (arg->string src))] [(Call l) (string-append tab "call " (jump-target->string l))] [(Push a) (string-append tab "push " (arg->string a))] + [(Pushf) + (string-append tab "pushf")] + [(Popf) + (string-append tab "popf")] [(Pop r) (string-append tab "pop " (reg->string r))] @@ -133,7 +225,30 @@ [(Lea d x) (string-append tab "lea " (arg->string d) ", [rel " - (label-symbol->string x) "]")])) + (exp->string x) "]")] + [(Not r) + (string-append tab "not " + (reg->string r))] + [(Div r) + (string-append tab "div " + (arg->string r))] + [(Equ x c) + (string-append tab + (symbol->string x) + " equ " + (number->string c))] + + [(Db (? bytes? bs)) + (apply string-append tab "db " (add-between (map number->string (bytes->list bs)) ", "))] + [(Db x) + (string-append tab "db " (arg->string x))] + [(Dw x) + (string-append tab "dw " (arg->string x))] + [(Dd x) + (string-append tab "dd " (arg->string x))] + [(Dq x) + (string-append tab "dq " (arg->string x))] + )) (define (comment->string c) (match c @@ -142,21 +257,37 @@ [(%%% s) (string-append ";;; " s)])) (define (line-comment i s) - (let ((i-str (instr->string i))) + (let ((i-str (simple-instr->string i))) (let ((pad (make-string (max 1 (- 32 (string-length i-str))) #\space))) (string-append i-str pad "; " s)))) - - (define (instrs->string a) + + ;; [Listof Instr] -> Void + (define (instrs-display a) (match a - ['() ""] + ['() (void)] [(cons (? Comment? c) a) - (string-append (comment->string c) "\n" (instrs->string a))] + (begin (write-string (comment->string c)) + (write-char #\newline) + (instrs-display a))] [(cons i (cons (% s) a)) - (string-append (line-comment i s) "\n" (instrs->string a))] + (begin (write-string (line-comment i s)) ; a line comment trumps an annotation + (write-char #\newline) + (instrs-display a))] [(cons i a) - (string-append (instr->string i) "\n" (instrs->string a))])) - - (string-append - tab "default rel\n" - tab "section .text\n" - (instrs->string a))) + (begin (write-string (fancy-instr->string i)) + (write-char #\newline) + (instrs-display a))])) + + ;; entry point will be first label + (match (findf Label? a) + [(Label g) + (begin + (write-string (string-append + ; tab "global " (label-symbol->string g) "\n" + tab "default rel\n" + tab "section .text\n")) + (instrs-display a))] + [_ + (instrs-display a) + #; + (error "program does not have an initial label")])) diff --git a/langs/a86/stepper.rkt b/langs/a86/stepper.rkt new file mode 100644 index 00000000..5d29724b --- /dev/null +++ b/langs/a86/stepper.rkt @@ -0,0 +1,42 @@ +#lang racket +(provide main) + +(require redex) + +(define-language L) + +;; A reduction relation that just relates elements +;; of the list to their successors +(define (r ls) + (define i 0) + (reduction-relation L + (--> any_i + any_j + (where any_j + ,(begin + (set! i (add1 i)) + (list-ref ls (min i (sub1 (length ls))))))))) + + +;; reads log file from stdin +(define (main) + (define ls + (let loop () + (if (eof-object? (read)) + '() + (cons (read) (loop))))) + + ;; replace instr indices with their instructions + (define ls1 + (map (λ (s) + (map (λ (p) + (match p + [(list 'instr i) + (list 'instr (list-ref (list-ref ls 0) + (add1 i)))] + [_ p])) + s)) + ls)) + + ;; run the stepper + (stepper (r (rest ls1)) (first (rest ls1)))) diff --git a/langs/a86/test/errors.rkt b/langs/a86/test/errors.rkt index 244c6008..c88bc32f 100644 --- a/langs/a86/test/errors.rkt +++ b/langs/a86/test/errors.rkt @@ -2,3 +2,30 @@ (require rackunit "../ast.rkt") (check-exn exn:fail? (thunk (Mov (Offset 'rax 0) 100))) + +;; Checking literal widths +(check-exn exn:fail? (thunk (Mov 'rax (expt 2 64)))) +(check-not-exn (thunk (Mov 'rax (sub1 (expt 2 64))))) +(check-exn exn:fail? (thunk (Cmp 'rax (expt 2 32)))) +(check-not-exn (thunk (Cmp 'rax (sub1 (expt 2 32))))) +(check-exn exn:fail? (thunk (And 'rax (expt 2 32)))) +(check-not-exn (thunk (And 'rax (sub1 (expt 2 32))))) +(check-exn exn:fail? (thunk (Or 'rax (expt 2 32)))) +(check-not-exn (thunk (Or 'rax (sub1 (expt 2 32))))) +(check-exn exn:fail? (thunk (Xor 'rax (expt 2 32)))) +(check-not-exn (thunk (Xor 'rax (sub1 (expt 2 32))))) +(check-exn exn:fail? (thunk (Push (expt 2 32)))) +(check-not-exn (thunk (Push (sub1 (expt 2 32))))) +(check-exn exn:fail? (thunk (Add 'rax (expt 2 32)))) +(check-not-exn (thunk (Add 'rax (sub1 (expt 2 32))))) +(check-exn exn:fail? (thunk (Sub 'rax (expt 2 32)))) +(check-not-exn (thunk (Sub 'rax (sub1 (expt 2 32))))) + +;; Check prog +(check-exn exn:fail? (thunk (prog (Ret)))) +(check-exn exn:fail? (thunk (prog (Label 'start) (Ret)))) +(check-exn exn:fail? (thunk (prog (Global 'foo) (Label 'start) (Label 'foo) (Ret)))) +(check-not-exn (thunk (prog (Global 'start) (Label 'start) (Ret)))) +(check-not-exn (thunk (prog (Label 'start) (Ret) (Global 'start)))) + + diff --git a/langs/abscond/Makefile b/langs/abscond/Makefile index d911308e..3200bc6d 100644 --- a/langs/abscond/Makefile +++ b/langs/abscond/Makefile @@ -1,35 +1,42 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ print.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/abscond/ast.rkt b/langs/abscond/ast.rkt index fa3d7869..7e3cda7a 100644 --- a/langs/abscond/ast.rkt +++ b/langs/abscond/ast.rkt @@ -1,5 +1,6 @@ #lang racket -(provide Int) +(provide Lit) -;; type Expr = (Int Integer) -(struct Int (i) #:prefab) +;; type Expr = (Lit Integer) + +(struct Lit (i) #:prefab) diff --git a/langs/abscond/compile-file.rkt b/langs/abscond/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/abscond/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/abscond/compile-stdin.rkt b/langs/abscond/compile-stdin.rkt new file mode 100644 index 00000000..532ee0eb --- /dev/null +++ b/langs/abscond/compile-stdin.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile (parse (read))))) + diff --git a/langs/abscond/compile.rkt b/langs/abscond/compile.rkt index b0410af3..b3212737 100644 --- a/langs/abscond/compile.rkt +++ b/langs/abscond/compile.rkt @@ -1,15 +1,19 @@ #lang racket -(provide compile) -(require "ast.rkt" a86/ast) +(provide (all-defined-out)) +(require "ast.rkt") +(require a86/ast) + +(define rax 'rax) ;; Expr -> Asm -(define (compile e) +(define (compile e) (prog (Global 'entry) (Label 'entry) (compile-e e) - (Ret))) - + (Ret))) + ;; Expr -> Asm (define (compile-e e) (match e - [(Int i) (seq (Mov 'rax i))])) + [(Lit i) (seq (Mov rax i))])) + diff --git a/langs/abscond/info.rkt b/langs/abscond/info.rkt new file mode 100644 index 00000000..73bc196a --- /dev/null +++ b/langs/abscond/info.rkt @@ -0,0 +1,2 @@ +#lang info +#;(define pre-install-collection "../installer.rkt") diff --git a/langs/abscond/interp-file.rkt b/langs/abscond/interp-file.rkt deleted file mode 100644 index aa84aa38..00000000 --- a/langs/abscond/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ([p (open-input-file fn)]) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/abscond/interp-stdin.rkt b/langs/abscond/interp-stdin.rkt new file mode 100644 index 00000000..ce4885f7 --- /dev/null +++ b/langs/abscond/interp-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "interp.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (println (interp (parse (read))))) + diff --git a/langs/abscond/interp.rkt b/langs/abscond/interp.rkt index 55a7ca2a..c4b71b31 100644 --- a/langs/abscond/interp.rkt +++ b/langs/abscond/interp.rkt @@ -3,7 +3,8 @@ (require "ast.rkt") ;; Expr -> Integer -;; Interpret given expression (define (interp e) (match e - [(Int i) i])) + [(Lit i) i])) + + diff --git a/langs/abscond/main.rkt b/langs/abscond/main.rkt new file mode 100644 index 00000000..e0e38924 --- /dev/null +++ b/langs/abscond/main.rkt @@ -0,0 +1,13 @@ +#lang racket +(require "ast.rkt") +(require "parse.rkt") +(require "interp.rkt") +(require "compile.rkt") +(require "run.rkt") +(provide (all-from-out "ast.rkt")) +(provide (all-from-out "parse.rkt")) +(provide (all-from-out "interp.rkt")) +(provide (all-from-out "compile.rkt")) +(provide (all-from-out "run.rkt")) + + diff --git a/langs/abscond/parse.rkt b/langs/abscond/parse.rkt index 8714fa9d..fb1b6198 100644 --- a/langs/abscond/parse.rkt +++ b/langs/abscond/parse.rkt @@ -5,5 +5,5 @@ ;; S-Expr -> Expr (define (parse s) (match s - [(? integer?) (Int s)] - [_ (error "Parse error")])) + [(? exact-integer?) (Lit s)] + [_ (error "Parse error")])) diff --git a/langs/abscond/run-stdin.rkt b/langs/abscond/run-stdin.rkt new file mode 100644 index 00000000..16cf99e0 --- /dev/null +++ b/langs/abscond/run-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require "run.rkt") + +;; -> Void +;; Compile contents of stdin and use asm-interp to run +(define (main) + (read-line) ; ignore #lang racket line + (run (compile (parse (read))))) + diff --git a/langs/abscond/run.rkt b/langs/abscond/run.rkt new file mode 100644 index 00000000..1191f553 --- /dev/null +++ b/langs/abscond/run.rkt @@ -0,0 +1,8 @@ +#lang racket +(require a86/interp) +(provide run) + +;; Asm -> Integer +(define (run is) + (asm-interp is)) + diff --git a/langs/abscond/test/compile.rkt b/langs/abscond/test/compile.rkt index cbf232ac..560e6f59 100644 --- a/langs/abscond/test/compile.rkt +++ b/langs/abscond/test/compile.rkt @@ -1,9 +1,8 @@ #lang racket -(require "../compile.rkt" a86/interp "../parse.rkt" rackunit) +(require "../compile.rkt") +(require "../parse.rkt") +(require "../run.rkt") +(require "test-runner.rkt") -(define (run e) - (asm-interp (compile (parse e)))) +(test (λ (e) (run (compile (parse e))))) -;; Abscond examples -(check-equal? (run 7) 7) -(check-equal? (run -8) -8) diff --git a/langs/abscond/test/interp.rkt b/langs/abscond/test/interp.rkt new file mode 100644 index 00000000..41aa8c04 --- /dev/null +++ b/langs/abscond/test/interp.rkt @@ -0,0 +1,7 @@ +#lang racket +(require "../interp.rkt") +(require "../parse.rkt") +(require "test-runner.rkt") + +(test (λ (e) (interp (parse e)))) + diff --git a/langs/abscond/test/test-runner.rkt b/langs/abscond/test/test-runner.rkt new file mode 100644 index 00000000..829c996f --- /dev/null +++ b/langs/abscond/test/test-runner.rkt @@ -0,0 +1,10 @@ +#lang racket +(provide test) +(require rackunit) + +(define (test run) + (begin ;; Abscond + (check-equal? (run 7) 7) + (check-equal? (run -8) -8))) + + diff --git a/langs/blackmail/Makefile b/langs/blackmail/Makefile index d911308e..3200bc6d 100644 --- a/langs/blackmail/Makefile +++ b/langs/blackmail/Makefile @@ -1,35 +1,42 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ print.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/blackmail/ast.rkt b/langs/blackmail/ast.rkt index 2531b081..c882e675 100644 --- a/langs/blackmail/ast.rkt +++ b/langs/blackmail/ast.rkt @@ -1,9 +1,11 @@ #lang racket -(provide Int Prim1) +(provide Lit Prim1) -;; type Expr = -;; | (Int Integer) -;; | (Prim1 Op Expr) -;; type Op = 'add1 | 'sub1 -(struct Int (i) #:prefab) +;; type Expr = (Lit Integer) +;; | (Prim1 Op1 Expr) + +;; type Op1 = 'add1 | 'sub1 + +(struct Lit (i) #:prefab) (struct Prim1 (p e) #:prefab) + diff --git a/langs/blackmail/compile-ops.rkt b/langs/blackmail/compile-ops.rkt new file mode 100644 index 00000000..8f6bd44c --- /dev/null +++ b/langs/blackmail/compile-ops.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide compile-op1) +(require "ast.rkt") +(require a86/ast) + +(define rax 'rax) + +;; Op1 -> Asm +(define (compile-op1 p) + (match p + ['add1 (Add rax 1)] + ['sub1 (Sub rax 1)])) + diff --git a/langs/blackmail/compile-stdin.rkt b/langs/blackmail/compile-stdin.rkt new file mode 100644 index 00000000..532ee0eb --- /dev/null +++ b/langs/blackmail/compile-stdin.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile (parse (read))))) + diff --git a/langs/blackmail/compile.rkt b/langs/blackmail/compile.rkt index dfc10aa5..24e9ee1a 100644 --- a/langs/blackmail/compile.rkt +++ b/langs/blackmail/compile.rkt @@ -1,9 +1,13 @@ #lang racket (provide (all-defined-out)) -(require "ast.rkt" a86/ast) +(require "ast.rkt") +(require "compile-ops.rkt") +(require a86/ast) + +(define rax 'rax) ;; Expr -> Asm -(define (compile e) +(define (compile e) (prog (Global 'entry) (Label 'entry) (compile-e e) @@ -12,16 +16,11 @@ ;; Expr -> Asm (define (compile-e e) (match e - [(Prim1 p e) (compile-prim1 p e)] - [(Int i) (compile-integer i)])) + [(Lit i) (seq (Mov rax i))] + [(Prim1 p e) (compile-prim1 p e)])) -;; Op Expr -> Asm +;; Op1 Expr -> Asm (define (compile-prim1 p e) (seq (compile-e e) - (match p - ['add1 (Add 'rax 1)] - ['sub1 (Sub 'rax 1)]))) + (compile-op1 p))) -;; Integer -> Asm -(define (compile-integer i) - (seq (Mov 'rax i))) diff --git a/langs/blackmail/info.rkt b/langs/blackmail/info.rkt new file mode 100644 index 00000000..73bc196a --- /dev/null +++ b/langs/blackmail/info.rkt @@ -0,0 +1,2 @@ +#lang info +#;(define pre-install-collection "../installer.rkt") diff --git a/langs/blackmail/interp-file.rkt b/langs/blackmail/interp-file.rkt deleted file mode 100644 index e6c9b1d3..00000000 --- a/langs/blackmail/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/blackmail/interp-prim.rkt b/langs/blackmail/interp-prim.rkt new file mode 100644 index 00000000..2d3b7ffb --- /dev/null +++ b/langs/blackmail/interp-prim.rkt @@ -0,0 +1,9 @@ +#lang racket +(provide interp-prim1) + +;; Op1 Integer -> Integer +(define (interp-prim1 op i) + (match op + ['add1 (add1 i)] + ['sub1 (sub1 i)])) + diff --git a/langs/blackmail/interp-stdin.rkt b/langs/blackmail/interp-stdin.rkt new file mode 100644 index 00000000..ce4885f7 --- /dev/null +++ b/langs/blackmail/interp-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "interp.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (println (interp (parse (read))))) + diff --git a/langs/blackmail/interp.rkt b/langs/blackmail/interp.rkt index f23a9595..58d2fe6e 100644 --- a/langs/blackmail/interp.rkt +++ b/langs/blackmail/interp.rkt @@ -1,15 +1,13 @@ #lang racket (provide interp) (require "ast.rkt") +(require "interp-prim.rkt") ;; Expr -> Integer (define (interp e) (match e - [(Int i) i] - [(Prim1 p e) (interp-prim1 p (interp e))])) + [(Lit i) i] + [(Prim1 p e) + (interp-prim1 p (interp e))])) + -;; Op Integer -> Integer -(define (interp-prim1 op i) - (match op - ['add1 (add1 i)] - ['sub1 (sub1 i)])) diff --git a/langs/blackmail/main.rkt b/langs/blackmail/main.rkt new file mode 100644 index 00000000..e0e38924 --- /dev/null +++ b/langs/blackmail/main.rkt @@ -0,0 +1,13 @@ +#lang racket +(require "ast.rkt") +(require "parse.rkt") +(require "interp.rkt") +(require "compile.rkt") +(require "run.rkt") +(provide (all-from-out "ast.rkt")) +(provide (all-from-out "parse.rkt")) +(provide (all-from-out "interp.rkt")) +(provide (all-from-out "compile.rkt")) +(provide (all-from-out "run.rkt")) + + diff --git a/langs/blackmail/parse.rkt b/langs/blackmail/parse.rkt index 78d78884..0147dc46 100644 --- a/langs/blackmail/parse.rkt +++ b/langs/blackmail/parse.rkt @@ -5,10 +5,10 @@ ;; S-Expr -> Expr (define (parse s) (match s - [(? integer?) (Int s)] + [(? exact-integer?) (Lit s)] [(list (? op1? o) e) (Prim1 o (parse e))] [_ (error "Parse error")])) -;; Any -> Boolean (define (op1? x) (memq x '(add1 sub1))) + diff --git a/langs/blackmail/run-stdin.rkt b/langs/blackmail/run-stdin.rkt new file mode 100644 index 00000000..16cf99e0 --- /dev/null +++ b/langs/blackmail/run-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require "run.rkt") + +;; -> Void +;; Compile contents of stdin and use asm-interp to run +(define (main) + (read-line) ; ignore #lang racket line + (run (compile (parse (read))))) + diff --git a/langs/blackmail/run.rkt b/langs/blackmail/run.rkt new file mode 100644 index 00000000..1191f553 --- /dev/null +++ b/langs/blackmail/run.rkt @@ -0,0 +1,8 @@ +#lang racket +(require a86/interp) +(provide run) + +;; Asm -> Integer +(define (run is) + (asm-interp is)) + diff --git a/langs/blackmail/test/compile.rkt b/langs/blackmail/test/compile.rkt index 4ba7d48f..560e6f59 100644 --- a/langs/blackmail/test/compile.rkt +++ b/langs/blackmail/test/compile.rkt @@ -1,13 +1,8 @@ #lang racket -(require "../compile.rkt" a86/interp "../parse.rkt" rackunit) +(require "../compile.rkt") +(require "../parse.rkt") +(require "../run.rkt") +(require "test-runner.rkt") -(define (run e) - (asm-interp (compile (parse e)))) +(test (λ (e) (run (compile (parse e))))) -;; Abscond examples -(check-equal? (run 7) 7) -(check-equal? (run -8) -8) - -;; Blackmail examples -(check-equal? (run '(add1 (add1 7))) 9) -(check-equal? (run '(add1 (sub1 7))) 7) diff --git a/langs/blackmail/test/interp.rkt b/langs/blackmail/test/interp.rkt new file mode 100644 index 00000000..41aa8c04 --- /dev/null +++ b/langs/blackmail/test/interp.rkt @@ -0,0 +1,7 @@ +#lang racket +(require "../interp.rkt") +(require "../parse.rkt") +(require "test-runner.rkt") + +(test (λ (e) (interp (parse e)))) + diff --git a/langs/blackmail/test/test-runner.rkt b/langs/blackmail/test/test-runner.rkt new file mode 100644 index 00000000..1e736f48 --- /dev/null +++ b/langs/blackmail/test/test-runner.rkt @@ -0,0 +1,14 @@ +#lang racket +(provide test) +(require rackunit) + +(define (test run) + (begin ;; Abscond + (check-equal? (run 7) 7) + (check-equal? (run -8) -8)) + + (begin ;; Blackmail + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7))) + + diff --git a/langs/con/Makefile b/langs/con/Makefile index d911308e..3200bc6d 100644 --- a/langs/con/Makefile +++ b/langs/con/Makefile @@ -1,35 +1,42 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ print.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/con/ast.rkt b/langs/con/ast.rkt index 5f47c42d..d68c3e6f 100644 --- a/langs/con/ast.rkt +++ b/langs/con/ast.rkt @@ -1,11 +1,13 @@ #lang racket -(provide Int Prim1 IfZero) - -;; type Expr = -;; | (Int Integer) -;; | (Prim1 Op Expr) -;; | (IfZero Expr Expr Expr) -;; type Op = 'add1 | 'sub1 -(struct Int (i) #:prefab) -(struct Prim1 (p e) #:prefab) +(provide Lit Prim1 IfZero) + +;; type Expr = (Lit Integer) +;; | (Prim1 Op1 Expr) +;; | (IfZero Expr Expr Expr) + +;; type Op1 = 'add1 | 'sub1 + +(struct Lit (i) #:prefab) +(struct Prim1 (p e) #:prefab) (struct IfZero (e1 e2 e3) #:prefab) + diff --git a/langs/con/compile-file.rkt b/langs/con/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/con/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/con/compile-ops.rkt b/langs/con/compile-ops.rkt new file mode 100644 index 00000000..8f6bd44c --- /dev/null +++ b/langs/con/compile-ops.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide compile-op1) +(require "ast.rkt") +(require a86/ast) + +(define rax 'rax) + +;; Op1 -> Asm +(define (compile-op1 p) + (match p + ['add1 (Add rax 1)] + ['sub1 (Sub rax 1)])) + diff --git a/langs/con/compile-prim.rkt b/langs/con/compile-prim.rkt deleted file mode 100644 index 77cb6c4f..00000000 --- a/langs/con/compile-prim.rkt +++ /dev/null @@ -1,10 +0,0 @@ -#lang racket -(provide compile-prim1) -(require "types.rkt" a86/ast) - -;; Op Asm -> Asm -(define (compile-prim1 p c) - (seq c - (match p - ['add1 (Add 'rax (value->bits 1))] - ['sub1 (Sub 'rax (value->bits 1))]))) diff --git a/langs/con/compile-stdin.rkt b/langs/con/compile-stdin.rkt new file mode 100644 index 00000000..532ee0eb --- /dev/null +++ b/langs/con/compile-stdin.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile (parse (read))))) + diff --git a/langs/con/compile.rkt b/langs/con/compile.rkt index 9479064a..59f077a6 100644 --- a/langs/con/compile.rkt +++ b/langs/con/compile.rkt @@ -1,34 +1,39 @@ #lang racket (provide (all-defined-out)) -(require "ast.rkt" a86/ast "compile-prim.rkt") +(require "ast.rkt") +(require "compile-ops.rkt") +(require a86/ast) +(define rax 'rax) ;; Expr -> Asm -(define (compile e) +(define (compile e) (prog (Global 'entry) (Label 'entry) (compile-e e) - (Ret))) + (Ret))) ;; Expr -> Asm (define (compile-e e) (match e - [(Int i) (compile-integer i)] - [(Prim1 p e) (compile-prim1 p (compile-e e))] - [(IfZero e1 e2 e3) (compile-ifzero e1 e2 e3)])) + [(Lit i) (seq (Mov rax i))] + [(Prim1 p e) (compile-prim1 p e)] + [(IfZero e1 e2 e3) + (compile-ifzero e1 e2 e3)])) -;; Integer -> Asm -(define (compile-integer i) - (seq (Mov 'rax i))) +;; Op1 Expr -> Asm +(define (compile-prim1 p e) + (seq (compile-e e) + (compile-op1 p))) ;; Expr Expr Expr -> Asm (define (compile-ifzero e1 e2 e3) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) + (let ((l1 (gensym 'ifz)) + (l2 (gensym 'ifz))) (seq (compile-e e1) - (Cmp 'rax 0) - (Je l1) - (compile-e e3) + (Cmp rax 0) + (Jne l1) + (compile-e e2) (Jmp l2) (Label l1) - (compile-e e2) + (compile-e e3) (Label l2)))) diff --git a/langs/con/info.rkt b/langs/con/info.rkt new file mode 100644 index 00000000..73bc196a --- /dev/null +++ b/langs/con/info.rkt @@ -0,0 +1,2 @@ +#lang info +#;(define pre-install-collection "../installer.rkt") diff --git a/langs/con/interp-file.rkt b/langs/con/interp-file.rkt deleted file mode 100644 index e6c9b1d3..00000000 --- a/langs/con/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/con/interp-prim.rkt b/langs/con/interp-prim.rkt index 306e1dbf..2d3b7ffb 100644 --- a/langs/con/interp-prim.rkt +++ b/langs/con/interp-prim.rkt @@ -1,8 +1,9 @@ #lang racket (provide interp-prim1) -;; Op Integer -> Integer +;; Op1 Integer -> Integer (define (interp-prim1 op i) (match op ['add1 (add1 i)] ['sub1 (sub1 i)])) + diff --git a/langs/con/interp-stdin.rkt b/langs/con/interp-stdin.rkt new file mode 100644 index 00000000..ce4885f7 --- /dev/null +++ b/langs/con/interp-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "interp.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (println (interp (parse (read))))) + diff --git a/langs/con/interp.rkt b/langs/con/interp.rkt index ee7bacb2..30fc6e04 100644 --- a/langs/con/interp.rkt +++ b/langs/con/interp.rkt @@ -1,15 +1,16 @@ #lang racket (provide interp) -(require "ast.rkt" "interp-prim.rkt") +(require "ast.rkt") +(require "interp-prim.rkt") ;; Expr -> Integer (define (interp e) (match e - [(Int i) i] - [(Prim1 p e) - (interp-prim1 p (interp e))] + [(Lit i) i] + [(Prim1 p e) (interp-prim1 p (interp e))] [(IfZero e1 e2 e3) (if (zero? (interp e1)) (interp e2) (interp e3))])) + diff --git a/langs/con/main.rkt b/langs/con/main.rkt new file mode 100644 index 00000000..e0e38924 --- /dev/null +++ b/langs/con/main.rkt @@ -0,0 +1,13 @@ +#lang racket +(require "ast.rkt") +(require "parse.rkt") +(require "interp.rkt") +(require "compile.rkt") +(require "run.rkt") +(provide (all-from-out "ast.rkt")) +(provide (all-from-out "parse.rkt")) +(provide (all-from-out "interp.rkt")) +(provide (all-from-out "compile.rkt")) +(provide (all-from-out "run.rkt")) + + diff --git a/langs/con/parse.rkt b/langs/con/parse.rkt index 9d98ec2f..792f883d 100644 --- a/langs/con/parse.rkt +++ b/langs/con/parse.rkt @@ -5,12 +5,13 @@ ;; S-Expr -> Expr (define (parse s) (match s - [(? integer?) (Int s)] - [(list (? op1? o) e) (Prim1 o (parse e))] + [(? exact-integer?) (Lit s)] + [(list (? op1? o) e) (Prim1 o (parse e))] + ;; NEW: [(list 'if (list 'zero? e1) e2 e3) (IfZero (parse e1) (parse e2) (parse e3))] [_ (error "Parse error")])) -;; Any -> Boolean (define (op1? x) (memq x '(add1 sub1))) + diff --git a/langs/con/run-stdin.rkt b/langs/con/run-stdin.rkt new file mode 100644 index 00000000..16cf99e0 --- /dev/null +++ b/langs/con/run-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require "run.rkt") + +;; -> Void +;; Compile contents of stdin and use asm-interp to run +(define (main) + (read-line) ; ignore #lang racket line + (run (compile (parse (read))))) + diff --git a/langs/con/run.rkt b/langs/con/run.rkt new file mode 100644 index 00000000..1191f553 --- /dev/null +++ b/langs/con/run.rkt @@ -0,0 +1,8 @@ +#lang racket +(require a86/interp) +(provide run) + +;; Asm -> Integer +(define (run is) + (asm-interp is)) + diff --git a/langs/con/test/compile.rkt b/langs/con/test/compile.rkt index 5d0b431c..560e6f59 100644 --- a/langs/con/test/compile.rkt +++ b/langs/con/test/compile.rkt @@ -1,26 +1,8 @@ #lang racket -(require "../compile.rkt" "../parse.rkt" a86/interp rackunit) +(require "../compile.rkt") +(require "../parse.rkt") +(require "../run.rkt") +(require "test-runner.rkt") -(define (run e) - (asm-interp (compile (parse e)))) +(test (λ (e) (run (compile (parse e))))) -;; Abscond examples -(check-equal? (run 7) 7) -(check-equal? (run -8) -8) - -;; Blackmail examples -(check-equal? (run '(add1 (add1 7))) 9) -(check-equal? (run '(add1 (sub1 7))) 7) - -;; Con examples -(check-equal? (run '(if (zero? 0) 1 2)) 1) -(check-equal? (run '(if (zero? 1) 1 2)) 2) -(check-equal? (run '(if (zero? -7) 1 2)) 2) -(check-equal? (run '(if (zero? 0) - (if (zero? 1) 1 2) - 7)) - 2) -(check-equal? (run '(if (zero? (if (zero? 0) 1 0)) - (if (zero? 1) 1 2) - 7)) - 7) diff --git a/langs/con/test/interp.rkt b/langs/con/test/interp.rkt index 08ac65dc..41aa8c04 100644 --- a/langs/con/test/interp.rkt +++ b/langs/con/test/interp.rkt @@ -1,26 +1,7 @@ #lang racket -(require "../interp.rkt" "../parse.rkt" rackunit) +(require "../interp.rkt") +(require "../parse.rkt") +(require "test-runner.rkt") + +(test (λ (e) (interp (parse e)))) -(define (run e) - (interp (parse e))) - -;; Abscond examples -(check-equal? (run 7) 7) -(check-equal? (run -8) -8) - -;; Blackmail examples -(check-equal? (run '(add1 (add1 7))) 9) -(check-equal? (run '(add1 (sub1 7))) 7) - -;; Con examples -(check-equal? (run '(if (zero? 0) 1 2)) 1) -(check-equal? (run '(if (zero? 1) 1 2)) 2) -(check-equal? (run '(if (zero? -7) 1 2)) 2) -(check-equal? (run '(if (zero? 0) - (if (zero? 1) 1 2) - 7)) - 2) -(check-equal? (run '(if (zero? (if (zero? 0) 1 0)) - (if (zero? 1) 1 2) - 7)) - 7) diff --git a/langs/con/test/test-runner.rkt b/langs/con/test/test-runner.rkt new file mode 100644 index 00000000..4428c634 --- /dev/null +++ b/langs/con/test/test-runner.rkt @@ -0,0 +1,27 @@ +#lang racket +(provide test) +(require rackunit) + +(define (test run) + (begin ;; Abscond + (check-equal? (run 7) 7) + (check-equal? (run -8) -8)) + + (begin ;; Blackmail + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7)) + + (begin ;; Con + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7))) + + diff --git a/langs/dodger/Makefile b/langs/dodger/Makefile index 4f0c5033..4555cee7 100644 --- a/langs/dodger/Makefile +++ b/langs/dodger/Makefile @@ -1,36 +1,43 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ - values.o \ - char.o + print.o \ + values.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/dodger/ast.rkt b/langs/dodger/ast.rkt deleted file mode 100644 index 0a546fba..00000000 --- a/langs/dodger/ast.rkt +++ /dev/null @@ -1,16 +0,0 @@ -#lang racket -(provide Int Bool Char Prim1 If) - -;; type Expr = -;; | (Int Integer) -;; | (Bool Boolean) -;; | (Char Character) -;; | (Prim1 Op Expr) -;; | (If Expr Expr Expr) -;; type Op = 'add1 | 'sub1 | 'zero? -;; | 'char? | 'integer->char | 'char->integer -(struct Int (i) #:prefab) -(struct Bool (b) #:prefab) -(struct Char (c) #:prefab) -(struct Prim1 (p e) #:prefab) -(struct If (e1 e2 e3) #:prefab) diff --git a/langs/dodger/char.c b/langs/dodger/char.c deleted file mode 100644 index 347f7d09..00000000 --- a/langs/dodger/char.c +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include -#include "types.h" -#include "values.h" - -void print_codepoint(val_char_t); -int utf8_encode_char(val_char_t, char *); - -void print_char(val_char_t c) -{ - printf("#\\"); - switch (c) { - case 0: - printf("nul"); break; - case 8: - printf("backspace"); break; - case 9: - printf("tab"); break; - case 10: - printf("newline"); break; - case 11: - printf("vtab"); break; - case 12: - printf("page"); break; - case 13: - printf("return"); break; - case 32: - printf("space"); break; - case 127: - printf("rubout"); break; - default: - print_codepoint(c); - } -} - -void print_codepoint(val_char_t c) -{ - static char buffer[5] = {0}; - utf8_encode_char(c, buffer); - printf("%s", buffer); -} - -int utf8_encode_char(val_char_t c, char *buffer) -{ - // Output to buffer using UTF-8 encoding of codepoint - // https://en.wikipedia.org/wiki/UTF-8 - if (c < 128) { - buffer[0] = (char) c; - return 1; - } else if (c < 2048) { - buffer[0] = (char)(c >> 6) | 192; - buffer[1] = ((char) c & 63) | 128; - return 2; - } else if (c < 65536) { - buffer[0] = (char)(c >> 12) | 224; - buffer[1] = ((char)(c >> 6) & 63) | 128; - buffer[2] = ((char) c & 63) | 128; - return 3; - } else { - buffer[0] = (char)(c >> 18) | 240; - buffer[1] = ((char)(c >> 12) & 63) | 128; - buffer[2] = ((char)(c >> 6) & 63) | 128; - buffer[3] = ((char) c & 63) | 128; - return 4; - } -} diff --git a/langs/dodger/compile-ops.rkt b/langs/dodger/compile-ops.rkt deleted file mode 100644 index 92412ccb..00000000 --- a/langs/dodger/compile-ops.rkt +++ /dev/null @@ -1,34 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) - -(define rax 'rax) - -;; Op1 -> Asm -(define (compile-op1 p) - (match p - ['add1 (Add rax (value->bits 1))] - ['sub1 (Sub rax (value->bits 1))] - ['zero? - (let ((l1 (gensym))) - (seq (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char? - (let ((l1 (gensym))) - (seq (And rax mask-char) - (Xor rax type-char) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char->integer - (seq (Sar rax char-shift) - (Sal rax int-shift))] - ['integer->char - (seq (Sar rax int-shift) - (Sal rax char-shift) - (Xor rax type-char))])) diff --git a/langs/dodger/compile.rkt b/langs/dodger/compile.rkt deleted file mode 100644 index 3310c5f3..00000000 --- a/langs/dodger/compile.rkt +++ /dev/null @@ -1,43 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" "compile-ops.rkt" a86/ast) - -(define rax 'rax) - -;; Expr -> Asm -(define (compile e) - (prog (Global 'entry) - (Label 'entry) - (compile-e e) - (Ret))) - -;; Expr -> Asm -(define (compile-e e) - (match e - [(Int i) (compile-value i)] - [(Bool b) (compile-value b)] - [(Char c) (compile-value c)] - [(Prim1 p e) (compile-prim1 p e)] - [(If e1 e2 e3) (compile-if e1 e2 e3)])) - -;; Value -> Asm -(define (compile-value v) - (seq (Mov rax (value->bits v)))) - -;; Op1 Expr -> Asm -(define (compile-prim1 p e) - (seq (compile-e e) - (compile-op1 p))) - -;; Expr Expr Expr -> Asm -(define (compile-if e1 e2 e3) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1) - (Cmp rax val-false) - (Je l1) - (compile-e e2) - (Jmp l2) - (Label l1) - (compile-e e3) - (Label l2)))) diff --git a/langs/dodger/info.rkt b/langs/dodger/info.rkt new file mode 100644 index 00000000..41ec40bb --- /dev/null +++ b/langs/dodger/info.rkt @@ -0,0 +1,2 @@ +#lang info +(define pre-install-collection "../installer.rkt") diff --git a/langs/dodger/interp-bits.rkt b/langs/dodger/interp-bits.rkt index 660841e9..6f65d551 100644 --- a/langs/dodger/interp-bits.rkt +++ b/langs/dodger/interp-bits.rkt @@ -16,21 +16,15 @@ ;; Expr -> Bits (define (interp-bits e) (match e - [(Int i) (value->bits i)] - [(Char c) (value->bits c)] - [(Bool b) (value->bits b)] + [(Lit d) (value->bits d)] [(Prim1 'add1 e0) (+ (interp-bits e0) (value->bits 1))] [(Prim1 'sub1 e0) (- (interp-bits e0) (value->bits 1))] [(Prim1 'zero? e) - (if (zero? (interp-bits e)) - val-true - val-false)] + (value->bits (zero? (interp-bits e)))] [(Prim1 'char? e0) - (if (= type-char (bitwise-and (interp-bits e0) #b11)) - val-true - val-false)] + (value->bits (char-bits? (interp-bits e0)))] [(Prim1 'char->integer e0) (arithmetic-shift (arithmetic-shift (interp-bits e0) (- char-shift)) @@ -42,6 +36,6 @@ char-shift) type-char)] [(If e1 e2 e3) - (if (= (interp-bits e1) val-false) + (if (= (interp-bits e1) (value->bits #f)) (interp-bits e3) (interp-bits e2))])) diff --git a/langs/dodger/interp-file.rkt b/langs/dodger/interp-file.rkt deleted file mode 100644 index e6c9b1d3..00000000 --- a/langs/dodger/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/dodger/interp-prim.rkt b/langs/dodger/interp-prim.rkt deleted file mode 100644 index 38633e47..00000000 --- a/langs/dodger/interp-prim.rkt +++ /dev/null @@ -1,12 +0,0 @@ -#lang racket -(provide interp-prim1) - -;; Op1 Value -> Value -(define (interp-prim1 op v) - (match op - ['add1 (add1 v)] - ['sub1 (sub1 v)] - ['zero? (zero? v)] - ['char? (char? v)] - ['integer->char (integer->char v)] - ['char->integer (char->integer v)])) diff --git a/langs/dodger/interp.rkt b/langs/dodger/interp.rkt deleted file mode 100644 index 7feaef77..00000000 --- a/langs/dodger/interp.rkt +++ /dev/null @@ -1,21 +0,0 @@ -#lang racket -(provide interp) -(require "ast.rkt" "interp-prim.rkt") - -;; type Value = -;; | Integer -;; | Boolean -;; | Character - -;; Expr -> Value -(define (interp e) - (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Prim1 p e) - (interp-prim1 p (interp e))] - [(If e1 e2 e3) - (if (interp e1) - (interp e2) - (interp e3))])) diff --git a/langs/dodger/parse.rkt b/langs/dodger/parse.rkt deleted file mode 100644 index fcf28c0c..00000000 --- a/langs/dodger/parse.rkt +++ /dev/null @@ -1,18 +0,0 @@ -#lang racket -(provide parse) -(require "ast.rkt") - -;; S-Expr -> Expr -(define (parse s) - (match s - [(? integer?) (Int s)] - [(? boolean?) (Bool s)] - [(? char?) (Char s)] - [(list (? op1? o) e) (Prim1 o (parse e))] - [(list 'if e1 e2 e3) - (If (parse e1) (parse e2) (parse e3))] - [_ (error "Parse error")])) - -;; Any -> Boolean -(define (op1? x) - (memq x '(add1 sub1 zero? char? integer->char char->integer))) diff --git a/langs/dodger/print.c b/langs/dodger/print.c index e4821653..162aaa54 100644 --- a/langs/dodger/print.c +++ b/langs/dodger/print.c @@ -1,6 +1,7 @@ #include #include #include "values.h" +#include "types.h" void print_char(val_char_t); void print_codepoint(val_char_t); @@ -18,11 +19,6 @@ void print_result(val_t x) case T_CHAR: print_char(val_unwrap_char(x)); break; - case T_EOF: - printf("#"); - break; - case T_VOID: - break; case T_INVALID: printf("internal error"); } diff --git a/langs/dodger/test/all.rkt b/langs/dodger/test/all.rkt index 6c4f2baa..b7c04c11 100644 --- a/langs/dodger/test/all.rkt +++ b/langs/dodger/test/all.rkt @@ -32,9 +32,9 @@ ;; Dupe examples (check-equal? (run #t) #t) (check-equal? (run #f) #f) - (check-equal? (run (if #t 1 2)) 1) - (check-equal? (run (if #f 1 2)) 2) - (check-equal? (run (if 0 1 2)) 1) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) (check-equal? (run '(if #t 3 4)) 3) (check-equal? (run '(if #f 3 4)) 4) (check-equal? (run '(if 0 3 4)) 3) diff --git a/langs/dodger/types.rkt b/langs/dodger/types.rkt deleted file mode 100644 index 933fdd31..00000000 --- a/langs/dodger/types.rkt +++ /dev/null @@ -1,28 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -(define int-shift 1) -(define char-shift 2) -(define type-int #b0) -(define type-char #b01) -(define mask-char #b11) -(define val-true #b011) -(define val-false #b111) - -(define (bits->value b) - (cond [(= type-int (bitwise-and b #b1)) - (arithmetic-shift b (- int-shift))] - [(= type-char (bitwise-and b #b11)) - (integer->char (arithmetic-shift b (- char-shift)))] - [(= b val-true) #t] - [(= b val-false) #f] - [else (error "invalid bits")])) - -(define (value->bits v) - (cond [(integer? v) (arithmetic-shift v int-shift)] - [(char? v) - (bitwise-ior type-char - (arithmetic-shift (char->integer v) char-shift))] - [(eq? v #t) val-true] - [(eq? v #f) val-false])) - diff --git a/langs/dupe/Makefile b/langs/dupe/Makefile index 54973ab2..4555cee7 100644 --- a/langs/dupe/Makefile +++ b/langs/dupe/Makefile @@ -1,10 +1,11 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ @@ -12,25 +13,31 @@ objs = \ print.o \ values.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/dupe/ast.rkt b/langs/dupe/ast.rkt index 00a2eb83..50183b13 100644 --- a/langs/dupe/ast.rkt +++ b/langs/dupe/ast.rkt @@ -1,13 +1,16 @@ #lang racket -(provide Int Bool Prim1 If) - -;; type Expr = -;; | (Int Integer) -;; | (Bool Boolean) -;; | (Prim1 Op Expr) -;; | (If Expr Expr Expr) -;; type Op = 'add1 | 'sub1 | 'zero? -(struct Int (i) #:prefab) -(struct Bool (b) #:prefab) -(struct Prim1 (p e) #:prefab) +(provide Lit Prim1 If) + +;; type Expr = (Lit Datum) +;; | (Prim1 Op1 Expr) +;; | (If Expr Expr Expr) + +;; type Datum = Integer +;; | Boolean + +;; type Op1 = 'add1 | 'sub1 +;; | 'zero? + +(struct Lit (d) #:prefab) +(struct Prim1 (p e) #:prefab) (struct If (e1 e2 e3) #:prefab) diff --git a/langs/dupe/compile-file.rkt b/langs/dupe/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/dupe/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/dupe/compile-ops.rkt b/langs/dupe/compile-ops.rkt index bbdecc25..0ef4ebc8 100644 --- a/langs/dupe/compile-ops.rkt +++ b/langs/dupe/compile-ops.rkt @@ -1,8 +1,11 @@ #lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) +(provide compile-op1) +(require "ast.rkt") +(require "types.rkt") +(require a86/ast) (define rax 'rax) +(define r9 'r9) ;; Op1 -> Asm (define (compile-op1 p) @@ -10,9 +13,8 @@ ['add1 (Add rax (value->bits 1))] ['sub1 (Sub rax (value->bits 1))] ['zero? - (let ((l1 (gensym))) - (seq (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))])) + (seq (Cmp rax 0) + (Mov rax (value->bits #f)) + (Mov r9 (value->bits #t)) + (Cmove rax r9))])) + diff --git a/langs/dupe/compile-stdin.rkt b/langs/dupe/compile-stdin.rkt new file mode 100644 index 00000000..532ee0eb --- /dev/null +++ b/langs/dupe/compile-stdin.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile (parse (read))))) + diff --git a/langs/dupe/compile.rkt b/langs/dupe/compile.rkt index 7672821f..fa817ac1 100644 --- a/langs/dupe/compile.rkt +++ b/langs/dupe/compile.rkt @@ -1,11 +1,14 @@ #lang racket (provide (all-defined-out)) -(require "ast.rkt" "types.rkt" "compile-ops.rkt" a86/ast) +(require "ast.rkt") +(require "compile-ops.rkt") +(require "types.rkt") +(require a86/ast) (define rax 'rax) ;; Expr -> Asm -(define (compile e) +(define (compile e) (prog (Global 'entry) (Label 'entry) (compile-e e) @@ -14,10 +17,10 @@ ;; Expr -> Asm (define (compile-e e) (match e - [(Int i) (compile-value i)] - [(Bool b) (compile-value b)] - [(Prim1 p e) (compile-prim1 p e)] - [(If e1 e2 e3) (compile-if e1 e2 e3)])) + [(Lit d) (compile-value d)] + [(Prim1 p e) (compile-prim1 p e)] + [(If e1 e2 e3) + (compile-if e1 e2 e3)])) ;; Value -> Asm (define (compile-value v) @@ -33,10 +36,11 @@ (let ((l1 (gensym 'if)) (l2 (gensym 'if))) (seq (compile-e e1) - (Cmp rax val-false) + (Cmp rax (value->bits #f)) (Je l1) (compile-e e2) (Jmp l2) (Label l1) (compile-e e3) (Label l2)))) + diff --git a/langs/dupe/compile/help.rkt b/langs/dupe/compile/help.rkt deleted file mode 100644 index 321135ab..00000000 --- a/langs/dupe/compile/help.rkt +++ /dev/null @@ -1,15 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -;; -> [List Label Label] -;; Guaranteed to be unique on each call -(define gen-if-labels - (let ((i 0)) - (λ () - (set! i (add1 i)) - (list (lab "f" i) - (lab "x" i))))) - -;; String Integer -> Symbol -(define (lab s i) - (string->symbol (string-append "if_" s "_" (number->string i)))) diff --git a/langs/dupe/info.rkt b/langs/dupe/info.rkt new file mode 100644 index 00000000..73bc196a --- /dev/null +++ b/langs/dupe/info.rkt @@ -0,0 +1,2 @@ +#lang info +#;(define pre-install-collection "../installer.rkt") diff --git a/langs/dupe/interp-bits-wrap.rkt b/langs/dupe/interp-bits-wrap.rkt index ce788f27..6c71a47a 100644 --- a/langs/dupe/interp-bits-wrap.rkt +++ b/langs/dupe/interp-bits-wrap.rkt @@ -1,6 +1,6 @@ #lang racket (provide interp-wrap interp-bits-wrap) -(require "ast.rkt") +(require "ast.rkt" "types.rkt") ;; type Value = ;; | Integer @@ -9,41 +9,27 @@ (define word-size 64) (define shift 1) -(define type-int #b0) -(define val-true #b01) -(define val-false #b11) ;; type Bits = Integer ;; Expr -> Bits (define (interp-bits-wrap e) (match e - [(Int i) (arithmetic-shift i shift)] - [(Bool b) (if b val-true val-false)] + [(Lit i) (value->bits i)] [(Prim1 'add1 e0) (wrap (add1 (interp-bits-wrap e0)))] [(Prim1 'sub1 e0) (wrap (sub1 (interp-bits-wrap e0)))] [(Prim1 'zero? e) - (if (zero? (interp-bits-wrap e)) - val-true - val-false)] + (value->bits (zero? (interp-bits-wrap e)))] [(If e1 e2 e3) - (if (= (interp-bits-wrap e1) val-false) + (if (= (interp-bits-wrap e1) (value->bits #f)) (interp-bits-wrap e3) (interp-bits-wrap e2))])) (define (interp-wrap e) (bits->value (interp-bits-wrap e))) -(define (bits->value b) - (if (even? b) - (arithmetic-shift b (- shift)) - (cond [(= b val-true) #t] - [(= b val-false) #f] - [else (error "invalid bits")]))) - - (define (wrap n) (if (>= (integer-length n) (- word-size shift)) (- (truncate n)) diff --git a/langs/dupe/interp-bits.rkt b/langs/dupe/interp-bits.rkt index 05396809..9d475632 100644 --- a/langs/dupe/interp-bits.rkt +++ b/langs/dupe/interp-bits.rkt @@ -1,6 +1,6 @@ #lang racket (provide interp interp-bits) -(require "ast.rkt" "types.rkt") +(require "ast.rkt" "types.rkt" "interp-prim-bits.rkt") ;; Expr -> Value (define (interp e) @@ -9,18 +9,10 @@ ;; Expr -> Bits (define (interp-bits e) (match e - [(Int i) (value->bits i)] - [(Bool b) (value->bits b)] - [(Prim1 'add1 e0) - (+ (interp-bits e0) (value->bits 1))] - [(Prim1 'sub1 e0) - (- (interp-bits e0) (value->bits 1))] - [(Prim1 'zero? e) - (if (zero? (interp-bits e)) - val-true - val-false)] + [(Lit d) (value->bits d)] + [(Prim1 p e) + (interp-prim1-bits p (interp-bits e))] [(If e1 e2 e3) - (if (= (interp-bits e1) val-false) + (if (= (interp-bits e1) (value->bits #f)) (interp-bits e3) (interp-bits e2))])) - diff --git a/langs/dupe/interp-file.rkt b/langs/dupe/interp-file.rkt deleted file mode 100644 index e6c9b1d3..00000000 --- a/langs/dupe/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/dupe/interp-prim-bits.rkt b/langs/dupe/interp-prim-bits.rkt new file mode 100644 index 00000000..330dcecf --- /dev/null +++ b/langs/dupe/interp-prim-bits.rkt @@ -0,0 +1,10 @@ +#lang racket +(require "types.rkt") +(provide interp-prim1-bits) + +;; Op Bits -> Bits +(define (interp-prim1-bits op b) + (match op + ['add1 (+ b (value->bits 1))] + ['sub1 (- b (value->bits 1))] + ['zero? (if (zero? b) (value->bits #t) (value->bits #f))])) diff --git a/langs/dupe/interp-prim.rkt b/langs/dupe/interp-prim.rkt index 5cc032d3..e0ea0720 100644 --- a/langs/dupe/interp-prim.rkt +++ b/langs/dupe/interp-prim.rkt @@ -1,9 +1,9 @@ #lang racket (provide interp-prim1) -;; Op Value -> Value +;; Op1 Value -> Value (define (interp-prim1 op v) (match op - ['add1 (add1 v)] - ['sub1 (sub1 v)] + ['add1 (add1 v)] + ['sub1 (sub1 v)] ['zero? (zero? v)])) diff --git a/langs/dupe/interp-stdin.rkt b/langs/dupe/interp-stdin.rkt new file mode 100644 index 00000000..ce4885f7 --- /dev/null +++ b/langs/dupe/interp-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "interp.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (println (interp (parse (read))))) + diff --git a/langs/dupe/interp.rkt b/langs/dupe/interp.rkt index c9aadb75..920f0920 100644 --- a/langs/dupe/interp.rkt +++ b/langs/dupe/interp.rkt @@ -1,6 +1,7 @@ #lang racket (provide interp) -(require "ast.rkt" "interp-prim.rkt") +(require "ast.rkt") +(require "interp-prim.rkt") ;; type Value = ;; | Integer @@ -9,8 +10,7 @@ ;; Expr -> Value (define (interp e) (match e - [(Int i) i] - [(Bool b) b] + [(Lit d) d] [(Prim1 p e) (interp-prim1 p (interp e))] [(If e1 e2 e3) diff --git a/langs/dupe/main.rkt b/langs/dupe/main.rkt new file mode 100644 index 00000000..e0e38924 --- /dev/null +++ b/langs/dupe/main.rkt @@ -0,0 +1,13 @@ +#lang racket +(require "ast.rkt") +(require "parse.rkt") +(require "interp.rkt") +(require "compile.rkt") +(require "run.rkt") +(provide (all-from-out "ast.rkt")) +(provide (all-from-out "parse.rkt")) +(provide (all-from-out "interp.rkt")) +(provide (all-from-out "compile.rkt")) +(provide (all-from-out "run.rkt")) + + diff --git a/langs/dupe/parse.rkt b/langs/dupe/parse.rkt index a9d5fcc4..673a0266 100644 --- a/langs/dupe/parse.rkt +++ b/langs/dupe/parse.rkt @@ -5,13 +5,17 @@ ;; S-Expr -> Expr (define (parse s) (match s - [(? integer?) (Int s)] - [(? boolean?) (Bool s)] - [(list (? op1? o) e) (Prim1 o (parse e))] + [(? datum?) (Lit s)] + [(list (? op1? o) e) (Prim1 o (parse e))] [(list 'if e1 e2 e3) (If (parse e1) (parse e2) (parse e3))] [_ (error "Parse error")])) ;; Any -> Boolean +(define (datum? x) + (or (exact-integer? x) + (boolean? x))) + (define (op1? x) (memq x '(add1 sub1 zero?))) + diff --git a/langs/dupe/run-stdin.rkt b/langs/dupe/run-stdin.rkt new file mode 100644 index 00000000..16cf99e0 --- /dev/null +++ b/langs/dupe/run-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require "run.rkt") + +;; -> Void +;; Compile contents of stdin and use asm-interp to run +(define (main) + (read-line) ; ignore #lang racket line + (run (compile (parse (read))))) + diff --git a/langs/dupe/run.rkt b/langs/dupe/run.rkt new file mode 100644 index 00000000..227703c1 --- /dev/null +++ b/langs/dupe/run.rkt @@ -0,0 +1,7 @@ +#lang racket +(require a86/interp) +(require "types.rkt") +(provide run);; Asm -> Value +(define (run is) + (bits->value (asm-interp is))) + diff --git a/langs/dupe/test/all.rkt b/langs/dupe/test/all.rkt index 3a766fe5..e836ef66 100644 --- a/langs/dupe/test/all.rkt +++ b/langs/dupe/test/all.rkt @@ -33,9 +33,9 @@ ;; Dupe examples (check-equal? (run #t) #t) (check-equal? (run #f) #f) - (check-equal? (run (if #t 1 2)) 1) - (check-equal? (run (if #f 1 2)) 2) - (check-equal? (run (if 0 1 2)) 1) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) (check-equal? (run '(if #t 3 4)) 3) (check-equal? (run '(if #f 3 4)) 4) (check-equal? (run '(if 0 3 4)) 3) diff --git a/langs/dupe/test/compile.rkt b/langs/dupe/test/compile.rkt new file mode 100644 index 00000000..560e6f59 --- /dev/null +++ b/langs/dupe/test/compile.rkt @@ -0,0 +1,8 @@ +#lang racket +(require "../compile.rkt") +(require "../parse.rkt") +(require "../run.rkt") +(require "test-runner.rkt") + +(test (λ (e) (run (compile (parse e))))) + diff --git a/langs/dupe/test/interp.rkt b/langs/dupe/test/interp.rkt new file mode 100644 index 00000000..41aa8c04 --- /dev/null +++ b/langs/dupe/test/interp.rkt @@ -0,0 +1,7 @@ +#lang racket +(require "../interp.rkt") +(require "../parse.rkt") +(require "test-runner.rkt") + +(test (λ (e) (interp (parse e)))) + diff --git a/langs/dupe/test/test-runner.rkt b/langs/dupe/test/test-runner.rkt new file mode 100644 index 00000000..7dcac9f1 --- /dev/null +++ b/langs/dupe/test/test-runner.rkt @@ -0,0 +1,39 @@ +#lang racket +(provide test) +(require rackunit) + +(define (test run) + (begin ;; Abscond + (check-equal? (run 7) 7) + (check-equal? (run -8) -8)) + + (begin ;; Blackmail + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7)) + + (begin ;; Con + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7)) + + (begin ;; Dupe + (check-equal? (run #t) #t) + (check-equal? (run #f) #f) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) + (check-equal? (run '(if #t 3 4)) 3) + (check-equal? (run '(if #f 3 4)) 4) + (check-equal? (run '(if 0 3 4)) 3) + (check-equal? (run '(zero? 4)) #f) + (check-equal? (run '(zero? 0)) #t))) + + diff --git a/langs/dupe/types.rkt b/langs/dupe/types.rkt index 2bf8a5f0..0ba82665 100644 --- a/langs/dupe/types.rkt +++ b/langs/dupe/types.rkt @@ -1,29 +1,21 @@ #lang racket (provide (all-defined-out)) - -;; type Value = -;; | Integer -;; | Boolean - -;; type Bits = Integer - (define int-shift 1) +(define mask-int #b1) (define type-int #b0) -(define type-bool #b1) -(define val-true #b01) -(define val-false #b11) -;; Bits -> Value (define (bits->value b) - (cond [(= type-int (bitwise-and b #b1)) + (cond [(= b (value->bits #t)) #t] + [(= b (value->bits #f)) #f] + [(int-bits? b) (arithmetic-shift b (- int-shift))] - [(= b val-true) #t] - [(= b val-false) #f] [else (error "invalid bits")])) -;; Value -> Bits (define (value->bits v) - (match v - [(? integer?) (arithmetic-shift v int-shift)] - [#t val-true] - [#f val-false])) + (cond [(eq? v #t) #b01] + [(eq? v #f) #b11] + [(integer? v) (arithmetic-shift v int-shift)])) + +(define (int-bits? v) + (= type-int (bitwise-and v mask-int))) + diff --git a/langs/evildoer/Makefile b/langs/evildoer/Makefile index 9b74bfc0..db6ac44a 100644 --- a/langs/evildoer/Makefile +++ b/langs/evildoer/Makefile @@ -1,37 +1,44 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ - values.o \ print.o \ + values.o \ io.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/evildoer/ast.rkt b/langs/evildoer/ast.rkt deleted file mode 100644 index adfb4ba9..00000000 --- a/langs/evildoer/ast.rkt +++ /dev/null @@ -1,24 +0,0 @@ -#lang racket -(provide Eof Int Bool Char Prim0 Prim1 If Begin) - -;; type Expr = -;; | (Eof) -;; | (Int Integer) -;; | (Bool Boolean) -;; | (Char Character) -;; | (Prim0 Op0) -;; | (Prim1 Op1 Expr) -;; | (If Expr Expr Expr) -;; | (Begin Expr Expr) -;; type Op0 = 'read-byte | 'peek-byte | 'void -;; type Op1 = 'add1 | 'sub1 | 'zero? -;; | 'char? | 'integer->char | 'char->integer -;; | 'write-byte | 'eof-object? -(struct Eof () #:prefab) -(struct Int (i) #:prefab) -(struct Bool (b) #:prefab) -(struct Char (c) #:prefab) -(struct Prim0 (p) #:prefab) -(struct Prim1 (p e) #:prefab) -(struct If (e1 e2 e3) #:prefab) -(struct Begin (e1 e2) #:prefab) diff --git a/langs/evildoer/compile-file.rkt b/langs/evildoer/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/evildoer/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/evildoer/compile-ops.rkt b/langs/evildoer/compile-ops.rkt deleted file mode 100644 index 2816d011..00000000 --- a/langs/evildoer/compile-ops.rkt +++ /dev/null @@ -1,54 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) - -(define rax 'rax) ; return -(define rdi 'rdi) ; arg -(define r8 'r8) ; scratch in +, - - -;; Op0 -> Asm -(define (compile-op0 p) - (match p - ['void (seq (Mov rax val-void))] - ['read-byte (seq (Call 'read_byte))] - ['peek-byte (seq (Call 'peek_byte))])) - -;; Op1 -> Asm -(define (compile-op1 p) - (match p - ['add1 (Add rax (value->bits 1))] - ['sub1 (Sub rax (value->bits 1))] - ['zero? - (let ((l1 (gensym))) - (seq (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char? - (let ((l1 (gensym))) - (seq (And rax mask-char) - (Xor rax type-char) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char->integer - (seq (Sar rax char-shift) - (Sal rax int-shift))] - ['integer->char - (seq (Sar rax int-shift) - (Sal rax char-shift) - (Xor rax type-char))] - ['eof-object? - (let ((l1 (gensym))) - (seq (Cmp rax val-eof) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['write-byte - (seq (Mov rdi rax) - (Call 'write_byte) - (Mov rax val-void))])) diff --git a/langs/evildoer/compile.rkt b/langs/evildoer/compile.rkt deleted file mode 100644 index c5ced0db..00000000 --- a/langs/evildoer/compile.rkt +++ /dev/null @@ -1,62 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" "compile-ops.rkt" a86/ast) - -;; Registers used -(define rax 'rax) -(define rsp 'rsp) - -;; Expr -> Asm -(define (compile e) - (prog (Extern 'peek_byte) - (Extern 'read_byte) - (Extern 'write_byte) - (Global 'entry) - (Label 'entry) - (Sub rsp 8) - (compile-e e) - (Add rsp 8) - (Ret))) - -;; Expr -> Asm -(define (compile-e e) - (match e - [(Int i) (compile-value i)] - [(Bool b) (compile-value b)] - [(Char c) (compile-value c)] - [(Eof) (compile-value eof)] - [(Prim0 p) (compile-prim0 p)] - [(Prim1 p e) (compile-prim1 p e)] - [(If e1 e2 e3) (compile-if e1 e2 e3)] - [(Begin e1 e2) (compile-begin e1 e2)])) - -;; Value -> Asm -(define (compile-value v) - (seq (Mov rax (value->bits v)))) - -;; Op0 -> Asm -(define (compile-prim0 p) - (compile-op0 p)) - -;; Op1 Expr -> Asm -(define (compile-prim1 p e) - (seq (compile-e e) - (compile-op1 p))) - -;; Expr Expr Expr -> Asm -(define (compile-if e1 e2 e3) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1) - (Cmp rax val-false) - (Je l1) - (compile-e e2) - (Jmp l2) - (Label l1) - (compile-e e3) - (Label l2)))) - -;; Expr Expr -> Asm -(define (compile-begin e1 e2) - (seq (compile-e e1) - (compile-e e2))) diff --git a/langs/evildoer/info.rkt b/langs/evildoer/info.rkt new file mode 100644 index 00000000..41ec40bb --- /dev/null +++ b/langs/evildoer/info.rkt @@ -0,0 +1,2 @@ +#lang info +(define pre-install-collection "../installer.rkt") diff --git a/langs/evildoer/interp-file.rkt b/langs/evildoer/interp-file.rkt deleted file mode 100644 index e6c9b1d3..00000000 --- a/langs/evildoer/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/evildoer/interp-io.rkt b/langs/evildoer/interp-io.rkt deleted file mode 100644 index ff74f010..00000000 --- a/langs/evildoer/interp-io.rkt +++ /dev/null @@ -1,12 +0,0 @@ -#lang racket -(provide interp/io) -(require "interp.rkt") - -;; Expr String -> (Cons Value String) -;; Interpret e with given string as input, -;; return value and collected output as string -(define (interp/io e input) - (parameterize ((current-output-port (open-output-string)) - (current-input-port (open-input-string input))) - (cons (interp e) - (get-output-string (current-output-port))))) diff --git a/langs/evildoer/interp-prim.rkt b/langs/evildoer/interp-prim.rkt deleted file mode 100644 index 088e3235..00000000 --- a/langs/evildoer/interp-prim.rkt +++ /dev/null @@ -1,21 +0,0 @@ -#lang racket -(provide interp-prim0 interp-prim1) - -;; Op0 -> Value -(define (interp-prim0 op) - (match op - ['read-byte (read-byte)] - ['peek-byte (peek-byte)] - ['void (void)])) - -;; Op1 Value -> Value -(define (interp-prim1 op v) - (match op - ['add1 (add1 v)] - ['sub1 (sub1 v)] - ['zero? (zero? v)] - ['char? (char? v)] - ['integer->char (integer->char v)] - ['char->integer (char->integer v)] - ['write-byte (write-byte v)] - ['eof-object? (eof-object? v)])) diff --git a/langs/evildoer/interp.rkt b/langs/evildoer/interp.rkt deleted file mode 100644 index 39778c6b..00000000 --- a/langs/evildoer/interp.rkt +++ /dev/null @@ -1,29 +0,0 @@ -#lang racket -(provide interp) -(require "ast.rkt" "interp-prim.rkt") - -;; type Value = -;; | Integer -;; | Boolean -;; | Character -;; | Eof -;; | Void - -;; Expr -> Value -(define (interp e) - (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Eof) eof] - [(Prim0 p) - (interp-prim0 p)] - [(Prim1 p e0) - (interp-prim1 p (interp e0))] - [(If e1 e2 e3) - (if (interp e1) - (interp e2) - (interp e3))] - [(Begin e1 e2) - (begin (interp e1) - (interp e2))])) diff --git a/langs/evildoer/io.c b/langs/evildoer/io.c index 7ef82281..8a417c91 100644 --- a/langs/evildoer/io.c +++ b/langs/evildoer/io.c @@ -7,14 +7,14 @@ val_t read_byte(void) { char c = getc(in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } val_t peek_byte(void) { char c = getc(in); ungetc(c, in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } diff --git a/langs/evildoer/parse.rkt b/langs/evildoer/parse.rkt deleted file mode 100644 index e8605a2a..00000000 --- a/langs/evildoer/parse.rkt +++ /dev/null @@ -1,24 +0,0 @@ -#lang racket -(provide parse) -(require "ast.rkt") - -;; S-Expr -> Expr -(define (parse s) - (match s - ['eof (Eof)] - [(? integer?) (Int s)] - [(? boolean?) (Bool s)] - [(? char?) (Char s)] - [(list (? op0? o)) (Prim0 o)] - [(list (? op1? o) e) (Prim1 o (parse e))] - [(list 'begin e1 e2) (Begin (parse e1) (parse e2))] - [(list 'if e1 e2 e3) - (If (parse e1) (parse e2) (parse e3))] - [_ (error "Parse error")])) - -;; Any -> Boolean -(define (op0? x) - (memq x '(read-byte peek-byte void))) -(define (op1? x) - (memq x '(add1 sub1 zero? char? integer->char char->integer - write-byte eof-object?))) diff --git a/langs/evildoer/test/all.rkt b/langs/evildoer/test/all.rkt index 4be80d06..4b456a1e 100644 --- a/langs/evildoer/test/all.rkt +++ b/langs/evildoer/test/all.rkt @@ -4,14 +4,13 @@ "../interp-io.rkt" "../parse.rkt" "../types.rkt" + "../build-runtime.rkt" a86/interp rackunit) ;; link with runtime for IO operations -(unless (file-exists? "../runtime.o") - (system "make -C .. runtime.o")) (current-objs - (list (path->string (normalize-path "../runtime.o")))) + (list (path->string runtime-path))) (define (test-runner run) ;; Abscond examples @@ -38,9 +37,9 @@ ;; Dupe examples (check-equal? (run #t) #t) (check-equal? (run #f) #f) - (check-equal? (run (if #t 1 2)) 1) - (check-equal? (run (if #f 1 2)) 2) - (check-equal? (run (if 0 1 2)) 1) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) (check-equal? (run '(if #t 3 4)) 3) (check-equal? (run '(if #f 3 4)) 4) (check-equal? (run '(if 0 3 4)) 3) @@ -54,7 +53,12 @@ (check-equal? (run '(char? #t)) #f) (check-equal? (run '(char? 8)) #f) (check-equal? (run '(char->integer #\a)) (char->integer #\a)) - (check-equal? (run '(integer->char 955)) #\λ)) + (check-equal? (run '(integer->char 955)) #\λ) + + ;; Evildoer examples + (check-equal? (run '(void)) (void)) + (check-equal? (run '(begin 1 2)) 2) + (check-equal? (run '(eof-object? (void))) #f)) (test-runner (λ (e) (interp (parse e)))) (test-runner (λ (e) (bits->value (asm-interp (compile (parse e)))))) diff --git a/langs/evildoer/types.rkt b/langs/evildoer/types.rkt deleted file mode 100644 index 28a5c69e..00000000 --- a/langs/evildoer/types.rkt +++ /dev/null @@ -1,34 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -(define int-shift 1) -(define char-shift 2) -(define type-int #b0) -(define type-char #b01) -(define mask-char #b11) -(define val-true #b0011) -(define val-false #b0111) -(define val-eof #b1011) -(define val-void #b1111) - -(define (bits->value b) - (cond [(= type-int (bitwise-and b #b1)) - (arithmetic-shift b (- int-shift))] - [(= type-char (bitwise-and b #b11)) - (integer->char (arithmetic-shift b (- char-shift)))] - [(= b val-true) #t] - [(= b val-false) #f] - [(= b val-eof) eof] - [(= b val-void) (void)] - [else (error "invalid bits")])) - -(define (value->bits v) - (cond [(eof-object? v) val-eof] - [(integer? v) (arithmetic-shift v int-shift)] - [(char? v) - (bitwise-ior type-char - (arithmetic-shift (char->integer v) char-shift))] - [(eq? v #t) val-true] - [(eq? v #f) val-false] - [(void? v) val-void])) - diff --git a/langs/evildoer/values.c b/langs/evildoer/values.c index 9bd2a704..bfdcf630 100644 --- a/langs/evildoer/values.c +++ b/langs/evildoer/values.c @@ -29,6 +29,10 @@ val_t val_wrap_int(int64_t i) { return (i << int_shift) | int_type_tag; } +val_t val_wrap_byte(unsigned char b) +{ + return (b << int_shift) | int_type_tag; +} int val_unwrap_bool(val_t x) { diff --git a/langs/evildoer/values.h b/langs/evildoer/values.h index 39cc43df..44f1c536 100644 --- a/langs/evildoer/values.h +++ b/langs/evildoer/values.h @@ -28,6 +28,7 @@ type_t val_typeof(val_t x); */ int64_t val_unwrap_int(val_t x); val_t val_wrap_int(int64_t i); +val_t val_wrap_byte(unsigned char b); int val_unwrap_bool(val_t x); val_t val_wrap_bool(int b); diff --git a/langs/extort/Makefile b/langs/extort/Makefile index 9b74bfc0..4c6bde9a 100644 --- a/langs/extort/Makefile +++ b/langs/extort/Makefile @@ -1,37 +1,44 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ - values.o \ print.o \ + values.o \ io.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x **compiled** runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/extort/ast.rkt b/langs/extort/ast.rkt deleted file mode 100644 index adfb4ba9..00000000 --- a/langs/extort/ast.rkt +++ /dev/null @@ -1,24 +0,0 @@ -#lang racket -(provide Eof Int Bool Char Prim0 Prim1 If Begin) - -;; type Expr = -;; | (Eof) -;; | (Int Integer) -;; | (Bool Boolean) -;; | (Char Character) -;; | (Prim0 Op0) -;; | (Prim1 Op1 Expr) -;; | (If Expr Expr Expr) -;; | (Begin Expr Expr) -;; type Op0 = 'read-byte | 'peek-byte | 'void -;; type Op1 = 'add1 | 'sub1 | 'zero? -;; | 'char? | 'integer->char | 'char->integer -;; | 'write-byte | 'eof-object? -(struct Eof () #:prefab) -(struct Int (i) #:prefab) -(struct Bool (b) #:prefab) -(struct Char (c) #:prefab) -(struct Prim0 (p) #:prefab) -(struct Prim1 (p e) #:prefab) -(struct If (e1 e2 e3) #:prefab) -(struct Begin (e1 e2) #:prefab) diff --git a/langs/extort/compile-file.rkt b/langs/extort/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/extort/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/extort/compile-ops.rkt b/langs/extort/compile-ops.rkt deleted file mode 100644 index d2cdc934..00000000 --- a/langs/extort/compile-ops.rkt +++ /dev/null @@ -1,125 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) - -(define rax 'rax) ; return -(define rdi 'rdi) ; arg -(define r8 'r8) ; scratch in +, - -(define r9 'r9) ; scratch in assert-type - -;; Op0 -> Asm -(define (compile-op0 p) - (match p - ['void (seq (Mov rax val-void))] - ['read-byte (seq (Call 'read_byte))] - ['peek-byte (seq (Call 'peek_byte))])) - -;; Op1 -> Asm -(define (compile-op1 p) - (match p - ['add1 - (seq (assert-integer rax) - (Add rax (value->bits 1)))] - ['sub1 - (seq (assert-integer rax) - (Sub rax (value->bits 1)))] - ['zero? - (let ((l1 (gensym))) - (seq (assert-integer rax) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char? - (let ((l1 (gensym))) - (seq (And rax mask-char) - (Xor rax type-char) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char->integer - (seq (assert-char rax) - (Sar rax char-shift) - (Sal rax int-shift))] - ['integer->char - (seq (assert-codepoint) - (Sar rax int-shift) - (Sal rax char-shift) - (Xor rax type-char))] - ['eof-object? (eq-imm val-eof)] - ['write-byte - (seq (assert-byte) - (Mov rdi rax) - (Call 'write_byte) - (Mov rax val-void))])) - -;; Op2 -> Asm -(define (compile-op2 p) - (match p - ['+ - (seq (Pop r8) - (assert-integer r8) - (assert-integer rax) - (Add rax r8))] - ['- - (seq (Pop r8) - (assert-integer r8) - (assert-integer rax) - (Sub r8 rax) - (Mov rax r8))])) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (assert-type mask type) - (λ (arg) - (seq (Mov r9 arg) - (And r9 mask) - (Cmp r9 type) - (Jne 'err)))) - -(define (type-pred mask type) - (let ((l (gensym))) - (seq (And rax mask) - (Cmp rax type) - (Mov rax (value->bits #t)) - (Je l) - (Mov rax (value->bits #f)) - (Label l)))) - -(define assert-integer - (assert-type mask-int type-int)) -(define assert-char - (assert-type mask-char type-char)) - -(define (assert-codepoint) - (let ((ok (gensym))) - (seq (assert-integer rax) - (Cmp rax (value->bits 0)) - (Jl 'err) - (Cmp rax (value->bits 1114111)) - (Jg 'err) - (Cmp rax (value->bits 55295)) - (Jl ok) - (Cmp rax (value->bits 57344)) - (Jg ok) - (Jmp 'err) - (Label ok)))) - -(define (assert-byte) - (seq (assert-integer rax) - (Cmp rax (value->bits 0)) - (Jl 'err) - (Cmp rax (value->bits 255)) - (Jg 'err))) - -;; Imm -> Asm -(define (eq-imm imm) - (let ((l1 (gensym))) - (seq (Cmp rax imm) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))) diff --git a/langs/extort/compile.rkt b/langs/extort/compile.rkt deleted file mode 100644 index 0ec6ba52..00000000 --- a/langs/extort/compile.rkt +++ /dev/null @@ -1,73 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" "compile-ops.rkt" a86/ast) - -;; Registers used -(define rax 'rax) ; return -(define rsp 'rsp) ; stack - -;; Expr -> Asm -(define (compile e) - (prog (Extern 'peek_byte) - (Extern 'read_byte) - (Extern 'write_byte) - (Extern 'raise_error) - (Global 'entry) - (Label 'entry) - (Sub 'rsp 8) - (compile-e e) - (Add 'rsp 8) - (Ret) - ;; Error handler - (Label 'err) - (Call 'raise_error))) - -;; Expr -> Asm -(define (compile-e e) - (match e - [(Int i) (compile-value i)] - [(Bool b) (compile-value b)] - [(Char c) (compile-value c)] - [(Eof) (compile-value eof)] - [(Prim0 p) (compile-prim0 p)] - [(Prim1 p e) (compile-prim1 p e)] - [(If e1 e2 e3) (compile-if e1 e2 e3)] - [(Begin e1 e2) (compile-begin e1 e2)])) - -;; Value -> Asm -(define (compile-value v) - (seq (Mov rax (value->bits v)))) - -;; Op0 -> Asm -(define (compile-prim0 p) - (compile-op0 p)) - -;; Op1 Expr -> Asm -(define (compile-prim1 p e) - (seq (compile-e e) - (compile-op1 p))) - -;; Op2 Expr Expr -> Asm -(define (compile-prim2 p e1 e2) - (seq (compile-e e1) - (Push rax) - (compile-e e2) - (compile-op2 p))) - -;; Expr Expr Expr -> Asm -(define (compile-if e1 e2 e3) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1) - (Cmp rax val-false) - (Je l1) - (compile-e e2) - (Jmp l2) - (Label l1) - (compile-e e3) - (Label l2)))) - -;; Expr Expr -> Asm -(define (compile-begin e1 e2) - (seq (compile-e e1) - (compile-e e2))) diff --git a/langs/extort/compile/help.rkt b/langs/extort/compile/help.rkt deleted file mode 100644 index 321135ab..00000000 --- a/langs/extort/compile/help.rkt +++ /dev/null @@ -1,15 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -;; -> [List Label Label] -;; Guaranteed to be unique on each call -(define gen-if-labels - (let ((i 0)) - (λ () - (set! i (add1 i)) - (list (lab "f" i) - (lab "x" i))))) - -;; String Integer -> Symbol -(define (lab s i) - (string->symbol (string-append "if_" s "_" (number->string i)))) diff --git a/langs/extort/info.rkt b/langs/extort/info.rkt new file mode 100644 index 00000000..41ec40bb --- /dev/null +++ b/langs/extort/info.rkt @@ -0,0 +1,2 @@ +#lang info +(define pre-install-collection "../installer.rkt") diff --git a/langs/extort/interp-file.rkt b/langs/extort/interp-file.rkt deleted file mode 100644 index e6c9b1d3..00000000 --- a/langs/extort/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/extort/interp-prim.rkt b/langs/extort/interp-prim.rkt deleted file mode 100644 index e0ac5dac..00000000 --- a/langs/extort/interp-prim.rkt +++ /dev/null @@ -1,27 +0,0 @@ -#lang racket -(provide interp-prim0 interp-prim1) - -;; Op0 -> Answer -(define (interp-prim0 op) - (match op - ['read-byte (read-byte)] - ['peek-byte (peek-byte)] - ['void (void)])) - -;; Op1 Value -> Answer -(define (interp-prim1 op v) - (match op - ['add1 (if (integer? v) (add1 v) 'err)] - ['sub1 (if (integer? v) (sub1 v) 'err)] - ['zero? (if (integer? v) (zero? v) 'err)] - ['char? (char? v)] - ['char->integer (if (char? v) (char->integer v) 'err)] - ['integer->char (if (codepoint? v) (integer->char v) 'err)] - ['eof-object? (eof-object? v)] - ['write-byte (if (byte? v) (write-byte v) 'err)])) - -;; Any -> Boolean -(define (codepoint? v) - (and (integer? v) - (or (<= 0 v 55295) - (<= 57344 v 1114111)))) diff --git a/langs/extort/interp.rkt b/langs/extort/interp.rkt deleted file mode 100644 index 86836693..00000000 --- a/langs/extort/interp.rkt +++ /dev/null @@ -1,37 +0,0 @@ -#lang racket -(provide interp) -(require "ast.rkt" "interp-prim.rkt") - -;; type Answer = Value | 'err - -;; type Value = -;; | Integer -;; | Boolean -;; | Character -;; | Eof -;; | Void - -;; Expr -> Answer -(define (interp e) - (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Eof) eof] - [(Prim0 p) - (interp-prim0 p)] - [(Prim1 p e0) - (match (interp e0) - ['err 'err] - [v (interp-prim1 p v)])] - [(If e1 e2 e3) - (match (interp e1) - ['err 'err] - [v - (if v - (interp e2) - (interp e3))])] - [(Begin e1 e2) - (match (interp e1) - ['err 'err] - [_ (interp e2)])])) diff --git a/langs/extort/io.c b/langs/extort/io.c index 7ef82281..8a417c91 100644 --- a/langs/extort/io.c +++ b/langs/extort/io.c @@ -7,14 +7,14 @@ val_t read_byte(void) { char c = getc(in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } val_t peek_byte(void) { char c = getc(in); ungetc(c, in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } diff --git a/langs/extort/parse.rkt b/langs/extort/parse.rkt deleted file mode 100644 index 6c5f5ea7..00000000 --- a/langs/extort/parse.rkt +++ /dev/null @@ -1,26 +0,0 @@ -#lang racket -(provide parse) -(require "ast.rkt") - -;; S-Expr -> Expr -(define (parse s) - (match s - [(? integer? s) (Int s)] - [(? boolean? s) (Bool s)] - [(? char? s) (Char s)] - ['eof (Eof)] - [(list (? op0? o)) (Prim0 o)] - [(list (? op1? o) e) (Prim1 o (parse e))] - [(list 'begin e1 e2) (Begin (parse e1) (parse e2))] - [(list 'if e1 e2 e3) - (If (parse e1) (parse e2) (parse e3))] - [_ (error "Parse error")])) - -;; Any -> Boolean -(define (op0? x) - (memq x '(read-byte peek-byte))) - -;; Any -> Boolean -(define (op1? x) - (memq x '(add1 sub1 zero? char? integer->char char->integer - write-byte eof-object?))) diff --git a/langs/extort/test/all.rkt b/langs/extort/test/all.rkt index e197b6ce..9bc3e3aa 100644 --- a/langs/extort/test/all.rkt +++ b/langs/extort/test/all.rkt @@ -4,14 +4,15 @@ "../interp-io.rkt" "../parse.rkt" "../types.rkt" + "../build-runtime.rkt" a86/interp rackunit) ;; link with runtime for IO operations -(unless (file-exists? "../runtime.o") - (system "make -C .. runtime.o")) +;(unless (file-exists? "../runtime.o") +; (system "make -C .. runtime.o")) (current-objs - (list (path->string (normalize-path "../runtime.o")))) + (list (path->string runtime-path))) (define (test-runner run) @@ -39,9 +40,9 @@ ;; Dupe examples (check-equal? (run #t) #t) (check-equal? (run #f) #f) - (check-equal? (run (if #t 1 2)) 1) - (check-equal? (run (if #f 1 2)) 2) - (check-equal? (run (if 0 1 2)) 1) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) (check-equal? (run '(if #t 3 4)) 3) (check-equal? (run '(if #f 3 4)) 4) (check-equal? (run '(if 0 3 4)) 3) @@ -55,6 +56,12 @@ (check-equal? (run '(char? 8)) #f) (check-equal? (run '(char->integer #\a)) (char->integer #\a)) (check-equal? (run '(integer->char 955)) #\λ) + + ;; Evildoer examples + (check-equal? (run '(void)) (void)) + (check-equal? (run '(begin 1 2)) 2) + (check-equal? (run '(eof-object? (void))) #f) + ;; Extort examples (check-equal? (run '(add1 #f)) 'err) (check-equal? (run '(sub1 #f)) 'err) diff --git a/langs/extort/types.rkt b/langs/extort/types.rkt deleted file mode 100644 index 18a1415a..00000000 --- a/langs/extort/types.rkt +++ /dev/null @@ -1,35 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -(define int-shift 1) -(define char-shift 2) -(define type-int #b0) -(define mask-int #b1) -(define type-char #b01) -(define mask-char #b11) -(define val-true #b0011) -(define val-false #b0111) -(define val-eof #b1011) -(define val-void #b1111) - -(define (bits->value b) - (cond [(= type-int (bitwise-and b #b1)) - (arithmetic-shift b (- int-shift))] - [(= type-char (bitwise-and b #b11)) - (integer->char (arithmetic-shift b (- char-shift)))] - [(= b val-true) #t] - [(= b val-false) #f] - [(= b val-eof) eof] - [(= b val-void) (void)] - [else (error "invalid bits")])) - -(define (value->bits v) - (cond [(eof-object? v) val-eof] - [(integer? v) (arithmetic-shift v int-shift)] - [(char? v) - (bitwise-ior type-char - (arithmetic-shift (char->integer v) char-shift))] - [(eq? v #t) val-true] - [(eq? v #f) val-false] - [(void? v) val-void])) - diff --git a/langs/extort/values.c b/langs/extort/values.c index 9bd2a704..bfdcf630 100644 --- a/langs/extort/values.c +++ b/langs/extort/values.c @@ -29,6 +29,10 @@ val_t val_wrap_int(int64_t i) { return (i << int_shift) | int_type_tag; } +val_t val_wrap_byte(unsigned char b) +{ + return (b << int_shift) | int_type_tag; +} int val_unwrap_bool(val_t x) { diff --git a/langs/extort/values.h b/langs/extort/values.h index 39cc43df..44f1c536 100644 --- a/langs/extort/values.h +++ b/langs/extort/values.h @@ -28,6 +28,7 @@ type_t val_typeof(val_t x); */ int64_t val_unwrap_int(val_t x); val_t val_wrap_int(int64_t i); +val_t val_wrap_byte(unsigned char b); int val_unwrap_bool(val_t x); val_t val_wrap_bool(int b); diff --git a/langs/fraud/Makefile b/langs/fraud/Makefile index 9b74bfc0..db6ac44a 100644 --- a/langs/fraud/Makefile +++ b/langs/fraud/Makefile @@ -1,37 +1,44 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ - values.o \ print.o \ + values.o \ io.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/fraud/ast.rkt b/langs/fraud/ast.rkt index 975405a9..d01004f8 100644 --- a/langs/fraud/ast.rkt +++ b/langs/fraud/ast.rkt @@ -1,32 +1,34 @@ #lang racket -(provide Eof Int Bool Char Prim0 Prim1 Prim2 If Begin Let Var) +(provide Lit Prim0 Prim1 Prim2 If Eof Begin Let + Var) +;; +;; type Expr = (Lit Datum) +;; | (Eof) +;; | (Prim0 Op0) +;; | (Prim1 Op1 Expr) +;; | (Prim2 Op2 Expr Expr) +;; | (If Expr Expr Expr) +;; | (Let Id Expr Expr) +;; | (Var Id) -;; type Expr = -;; | (Eof) -;; | (Int Integer) -;; | (Bool Boolean) -;; | (Char Character) -;; | (Prim0 Op0) -;; | (Prim1 Op1 Expr) -;; | (Prim2 Op2 Expr Expr) -;; | (If Expr Expr Expr) -;; | (Begin Expr Expr) -;; | (Let Id Expr Expr) -;; | (Var Id) ;; type Id = Symbol -;; type Op0 = 'read-byte -;; type Op1 = 'add1 | 'sub1 | 'zero? +;; type Datum = Integer +;; | Boolean +;; | Character +;; type Op0 = 'read-byte | 'peek-byte | 'void +;; type Op1 = 'add1 | 'sub1 +;; | 'zero? ;; | 'char? | 'integer->char | 'char->integer ;; | 'write-byte | 'eof-object? ;; type Op2 = '+ | '- | '< | '= -(struct Eof () #:prefab) -(struct Int (i) #:prefab) -(struct Bool (b) #:prefab) -(struct Char (c) #:prefab) -(struct Prim0 (p) #:prefab) -(struct Prim1 (p e) #:prefab) + +(struct Eof () #:prefab) +(struct Lit (d) #:prefab) +(struct Prim0 (p) #:prefab) +(struct Prim1 (p e) #:prefab) (struct Prim2 (p e1 e2) #:prefab) -(struct If (e1 e2 e3) #:prefab) -(struct Begin (e1 e2) #:prefab) -(struct Let (x e1 e2) #:prefab) -(struct Var (x) #:prefab) +(struct If (e1 e2 e3) #:prefab) +(struct Begin (e1 e2) #:prefab) +(struct Let (x e1 e2) #:prefab) +(struct Var (x) #:prefab) + diff --git a/langs/fraud/build-runtime.rkt b/langs/fraud/build-runtime.rkt new file mode 100644 index 00000000..18431504 --- /dev/null +++ b/langs/fraud/build-runtime.rkt @@ -0,0 +1,14 @@ +#lang racket +(provide runtime-path) + +(require racket/runtime-path) +(define-runtime-path here ".") + +(unless (system (string-append "make -C '" + (path->string (normalize-path here)) + "' runtime.o")) + (error 'build-runtime "could not build runtime")) + +(define runtime-path + (normalize-path (build-path here "runtime.o"))) + diff --git a/langs/fraud/compile-file.rkt b/langs/fraud/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/fraud/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/fraud/compile-ops.rkt b/langs/fraud/compile-ops.rkt index 1aba71eb..5db56cd6 100644 --- a/langs/fraud/compile-ops.rkt +++ b/langs/fraud/compile-ops.rkt @@ -1,174 +1,149 @@ #lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) +(provide compile-op0 compile-op1 compile-op2 pad-stack) +(require "ast.rkt") +(require "types.rkt") +(require a86/ast) -(define rax 'rax) ; return -(define rdi 'rdi) ; arg -(define r8 'r8) ; scratch in +, - -(define r9 'r9) ; scratch in assert-type +(define rax 'rax)(define rdi 'rdi) ; arg +(define r8 'r8) ; scratch in op2 +(define r9 'r9) ; scratch + +(define r15 'r15) ; stack pad (non-volatile) (define rsp 'rsp) ; stack -;; Op0 CEnv -> Asm -(define (compile-op0 p c) +;; Op0 -> Asm +(define (compile-op0 p) (match p - ['void (seq (Mov rax val-void))] - ['read-byte (seq (pad-stack c) - (Call 'read_byte) - (unpad-stack c))] - ['peek-byte (seq (pad-stack c) - (Call 'peek_byte) - (unpad-stack c))])) - -;; Op1 CEnv -> Asm -(define (compile-op1 p c) + ['void (seq (Mov rax (value->bits (void))))] + ['read-byte (seq pad-stack (Call 'read_byte) unpad-stack)] + ['peek-byte (seq pad-stack (Call 'peek_byte) unpad-stack)])) + +;; Op1 -> Asm +(define (compile-op1 p) (match p ['add1 - (seq (assert-integer rax c) - (Add rax (value->bits 1)))] + (seq (assert-integer rax) + (Add rax (value->bits 1)))] ['sub1 - (seq (assert-integer rax c) - (Sub rax (value->bits 1)))] + (seq (assert-integer rax) + (Sub rax (value->bits 1)))] ['zero? - (let ((l1 (gensym))) - (seq (assert-integer rax c) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] + (seq (assert-integer rax) + (Cmp rax 0) + if-equal)] ['char? - (let ((l1 (gensym))) - (seq (And rax mask-char) - (Xor rax type-char) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] + (seq (And rax mask-char) + (Cmp rax type-char) + if-equal)] ['char->integer - (seq (assert-char rax c) - (Sar rax char-shift) - (Sal rax int-shift))] + (seq (assert-char rax) + (Sar rax char-shift) + (Sal rax int-shift))] ['integer->char - (seq (assert-codepoint c) - (Sar rax int-shift) - (Sal rax char-shift) - (Xor rax type-char))] - ['eof-object? (eq-imm val-eof)] + (seq (assert-codepoint) + (Sar rax int-shift) + (Sal rax char-shift) + (Xor rax type-char))] + ['eof-object? + (seq (Cmp rax (value->bits eof)) + if-equal)] ['write-byte - (seq (assert-byte c) - (pad-stack c) - (Mov rdi rax) - (Call 'write_byte) - (unpad-stack c) - (Mov rax val-void))])) - -;; Op2 CEnv -> Asm -(define (compile-op2 p c) + (seq assert-byte + pad-stack + (Mov rdi rax) + (Call 'write_byte) + unpad-stack)])) + + +;; Op2 -> Asm +(define (compile-op2 p) (match p ['+ (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Add rax r8))] ['- (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Sub r8 rax) (Mov rax r8))] ['< (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) - (Cmp r8 rax) - (Mov rax val-true) - (let ((true (gensym))) - (seq (Jl true) - (Mov rax val-false) - (Label true))))] + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + if-lt)] ['= (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) - (Cmp r8 rax) - (Mov rax val-true) - (let ((true (gensym))) - (seq (Je true) - (Mov rax val-false) - (Label true))))])) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + if-equal)])) + + +;; -> Asm +;; set rax to #t or #f if comparison flag is equal +(define if-equal + (seq (Mov rax (value->bits #f)) + (Mov r9 (value->bits #t)) + (Cmove rax r9))) + +;; -> Asm +;; set rax to #t or #f if comparison flag is less than +(define if-lt + (seq (Mov rax (value->bits #f)) + (Mov r9 (value->bits #t)) + (Cmovl rax r9))) (define (assert-type mask type) - (λ (arg c) + (λ (arg) (seq (Mov r9 arg) (And r9 mask) (Cmp r9 type) - (Jne (error-label c))))) + (Jne 'err)))) (define (type-pred mask type) - (let ((l (gensym))) - (seq (And rax mask) - (Cmp rax type) - (Mov rax (value->bits #t)) - (Je l) - (Mov rax (value->bits #f)) - (Label l)))) + (seq (And rax mask) + (Cmp rax type) + if-equal)) (define assert-integer (assert-type mask-int type-int)) (define assert-char (assert-type mask-char type-char)) -(define (assert-codepoint c) +(define (assert-codepoint) (let ((ok (gensym))) - (seq (assert-integer rax c) + (seq (assert-integer rax) (Cmp rax (value->bits 0)) - (Jl (error-label c)) + (Jl 'err) (Cmp rax (value->bits 1114111)) - (Jg (error-label c)) + (Jg 'err) (Cmp rax (value->bits 55295)) (Jl ok) (Cmp rax (value->bits 57344)) (Jg ok) - (Jmp (error-label c)) + (Jmp 'err) (Label ok)))) -(define (assert-byte c) - (seq (assert-integer rax c) +(define assert-byte + (seq (assert-integer rax) (Cmp rax (value->bits 0)) - (Jl (error-label c)) + (Jl 'err) (Cmp rax (value->bits 255)) - (Jg (error-label c)))) - -;; Imm -> Asm -(define (eq-imm imm) - (let ((l1 (gensym))) - (seq (Cmp rax imm) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))) - -;; CEnv -> Asm -;; Pad the stack to be aligned for a call -(define (pad-stack c) - (match (even? (length c)) - [#t (seq (Sub rsp 8))] - [#f (seq)])) - -;; CEnv -> Asm + (Jg 'err))) + +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; Asm ;; Undo the stack alignment after a call -(define (unpad-stack c) - (match (even? (length c)) - [#t (seq (Add rsp 8))] - [#f (seq)])) - -;; CEnv -> Label -;; Determine correct error handler label to jump to. -(define (error-label c) - (match (even? (length c)) - [#t 'raise_error] - [#f 'raise_error_align])) +(define unpad-stack + (seq (Add rsp r15))) + diff --git a/langs/fraud/compile-stdin.rkt b/langs/fraud/compile-stdin.rkt new file mode 100644 index 00000000..532ee0eb --- /dev/null +++ b/langs/fraud/compile-stdin.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile (parse (read))))) + diff --git a/langs/fraud/compile.rkt b/langs/fraud/compile.rkt index 067a6eae..bb9cee39 100644 --- a/langs/fraud/compile.rkt +++ b/langs/fraud/compile.rkt @@ -1,43 +1,46 @@ #lang racket (provide (all-defined-out)) -(require "ast.rkt" "types.rkt" "compile-ops.rkt" a86/ast) +(require "ast.rkt") +(require "compile-ops.rkt") +(require "types.rkt") +(require a86/ast) -;; Registers used -(define rax 'rax) ; return -(define rbx 'rbx) ; heap -(define rsp 'rsp) ; stack -(define rdi 'rdi) ; arg - -;; type CEnv = [Listof Variable] +(define rax 'rax)(define rsp 'rsp) ; stack +(define r15 'r15) ; stack pad (non-volatile) ;; Expr -> Asm (define (compile e) - (prog (Extern 'peek_byte) + (prog (Global 'entry) + (Extern 'peek_byte) (Extern 'read_byte) (Extern 'write_byte) (Extern 'raise_error) - (Global 'entry) (Label 'entry) + (Push r15) ; save callee-saved register (compile-e e '()) + (Pop r15) ; restore callee-save register (Ret) - (Label 'raise_error_align) - (Sub rsp 8) - (Jmp 'raise_error))) + ;; Error handler + (Label 'err) + pad-stack + (Call 'raise_error))) +;; type CEnv = (Listof [Maybe Id]) ;; Expr CEnv -> Asm (define (compile-e e c) (match e - [(Int i) (compile-value i)] - [(Bool b) (compile-value b)] - [(Char c) (compile-value c)] - [(Eof) (compile-value eof)] - [(Var x) (compile-variable x c)] - [(Prim0 p) (compile-prim0 p c)] - [(Prim1 p e) (compile-prim1 p e c)] - [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] - [(If e1 e2 e3) (compile-if e1 e2 e3 c)] - [(Begin e1 e2) (compile-begin e1 e2 c)] - [(Let x e1 e2) (compile-let x e1 e2 c)])) + [(Lit d) (compile-value d)] + [(Eof) (compile-value eof)] + [(Var x) (compile-variable x c)] + [(Prim0 p) (compile-prim0 p)] + [(Prim1 p e) (compile-prim1 p e c)] + [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] + [(If e1 e2 e3) + (compile-if e1 e2 e3 c)] + [(Begin e1 e2) + (compile-begin e1 e2 c)] + [(Let x e1 e2) + (compile-let x e1 e2 c)])) ;; Value -> Asm (define (compile-value v) @@ -48,36 +51,30 @@ (let ((i (lookup x c))) (seq (Mov rax (Offset rsp i))))) -;; Op0 CEnv -> Asm -(define (compile-prim0 p c) - (compile-op0 p c)) - -;; Op1 Expr CEnv -> Asm +;; Op0 -> Asm +(define (compile-prim0 p) + (compile-op0 p));; Op1 Expr CEnv -> Asm (define (compile-prim1 p e c) (seq (compile-e e c) - (compile-op1 p c))) + (compile-op1 p))) ;; Op2 Expr Expr CEnv -> Asm (define (compile-prim2 p e1 e2 c) (seq (compile-e e1 c) (Push rax) (compile-e e2 (cons #f c)) - (compile-op2 p c))) - -;; Expr Expr Expr CEnv -> Asm + (compile-op2 p)));; Expr Expr Expr CEnv -> Asm (define (compile-if e1 e2 e3 c) (let ((l1 (gensym 'if)) (l2 (gensym 'if))) (seq (compile-e e1 c) - (Cmp rax val-false) + (Cmp rax (value->bits #f)) (Je l1) (compile-e e2 c) (Jmp l2) (Label l1) (compile-e e3 c) - (Label l2)))) - -;; Expr Expr CEnv -> Asm + (Label l2))));; Expr Expr CEnv -> Asm (define (compile-begin e1 e2 c) (seq (compile-e e1 c) (compile-e e2 c))) @@ -97,3 +94,4 @@ (match (eq? x y) [#t 0] [#f (+ 8 (lookup x rest))])])) + diff --git a/langs/fraud/info.rkt b/langs/fraud/info.rkt new file mode 100644 index 00000000..41ec40bb --- /dev/null +++ b/langs/fraud/info.rkt @@ -0,0 +1,2 @@ +#lang info +(define pre-install-collection "../installer.rkt") diff --git a/langs/fraud/interp-file.rkt b/langs/fraud/interp-file.rkt deleted file mode 100644 index e6c9b1d3..00000000 --- a/langs/fraud/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/fraud/interp-io.rkt b/langs/fraud/interp-io.rkt index 12da1b4b..29a82d0b 100644 --- a/langs/fraud/interp-io.rkt +++ b/langs/fraud/interp-io.rkt @@ -2,11 +2,12 @@ (provide interp/io) (require "interp.rkt") -;; Expr String -> (Cons Value String) +;; String Expr -> (Cons Value String) ;; Interpret e with given string as input, -;; collect output as string (including printed result) +;; return value and collected output as string (define (interp/io e input) (parameterize ((current-output-port (open-output-string)) (current-input-port (open-input-string input))) (cons (interp e) (get-output-string (current-output-port))))) + diff --git a/langs/fraud/interp-lexical.rkt b/langs/fraud/interp-lexical.rkt index 5c6ad19a..0d121985 100644 --- a/langs/fraud/interp-lexical.rkt +++ b/langs/fraud/interp-lexical.rkt @@ -8,13 +8,11 @@ (define (interp e) (interp-env (translate e) '())) -;; Expr VEnv -> Answer +;; IExpr VEnv -> Answer (define (interp-env e r) (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Eof) eof] + [(Lit d) d] + [(Eof) eof] [(Var a) (list-ref r a)] [(Prim0 p) (interp-prim0 p)] [(Prim1 p e) diff --git a/langs/fraud/interp-prim.rkt b/langs/fraud/interp-prim.rkt index 4393361e..2ef28e0c 100644 --- a/langs/fraud/interp-prim.rkt +++ b/langs/fraud/interp-prim.rkt @@ -1,7 +1,7 @@ #lang racket (provide interp-prim0 interp-prim1 interp-prim2) -;; Op0 -> Answer +;; Op0 -> Value (define (interp-prim0 op) (match op ['read-byte (read-byte)] @@ -10,26 +10,29 @@ ;; Op1 Value -> Answer (define (interp-prim1 op v) - (match op - ['add1 (if (integer? v) (add1 v) 'err)] - ['sub1 (if (integer? v) (sub1 v) 'err)] - ['zero? (if (integer? v) (zero? v) 'err)] - ['char? (char? v)] - ['char->integer (if (char? v) (char->integer v) 'err)] - ['integer->char (if (codepoint? v) (integer->char v) 'err)] - ['eof-object? (eof-object? v)] - ['write-byte (if (byte? v) (write-byte v) 'err)])) + (match (list op v) + [(list 'add1 (? integer?)) (add1 v)] + [(list 'sub1 (? integer?)) (sub1 v)] + [(list 'zero? (? integer?)) (zero? v)] + [(list 'char? v) (char? v)] + [(list 'integer->char (? codepoint?)) (integer->char v)] + [(list 'char->integer (? char?)) (char->integer v)] + [(list 'write-byte (? byte?)) (write-byte v)] + [(list 'eof-object? v) (eof-object? v)] + [_ 'err])) ;; Op2 Value Value -> Answer (define (interp-prim2 op v1 v2) - (match op - ['+ (if (and (integer? v1) (integer? v2)) (+ v1 v2) 'err)] - ['- (if (and (integer? v1) (integer? v2)) (- v1 v2) 'err)] - ['< (if (and (integer? v1) (integer? v2)) (< v1 v2) 'err)] - ['= (if (and (integer? v1) (integer? v2)) (= v1 v2) 'err)])) + (match (list op v1 v2) + [(list '+ (? integer?) (? integer?)) (+ v1 v2)] + [(list '- (? integer?) (? integer?)) (- v1 v2)] + [(list '< (? integer?) (? integer?)) (< v1 v2)] + [(list '= (? integer?) (? integer?)) (= v1 v2)] + [_ 'err])) ;; Any -> Boolean (define (codepoint? v) (and (integer? v) (or (<= 0 v 55295) (<= 57344 v 1114111)))) + diff --git a/langs/fraud/interp-stdin.rkt b/langs/fraud/interp-stdin.rkt new file mode 100644 index 00000000..ce4885f7 --- /dev/null +++ b/langs/fraud/interp-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "interp.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (println (interp (parse (read))))) + diff --git a/langs/fraud/interp.rkt b/langs/fraud/interp.rkt index ba5aa610..9510a8ab 100644 --- a/langs/fraud/interp.rkt +++ b/langs/fraud/interp.rkt @@ -1,8 +1,8 @@ #lang racket -(provide interp interp-env) -(require "ast.rkt" "interp-prim.rkt") - -;; type Answer = Value | 'err +(provide interp) +(provide interp-env) +(require "ast.rkt") +(require "interp-prim.rkt") ;; type Value = ;; | Integer @@ -11,8 +11,7 @@ ;; | Eof ;; | Void -;; type REnv = (Listof (List Id Value)) - +;; type Env = (Listof (List Id Value)) ;; Expr -> Answer (define (interp e) (interp-env e '())) @@ -20,12 +19,10 @@ ;; Expr Env -> Answer (define (interp-env e r) (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Eof) eof] + [(Lit d) d] + [(Eof) eof] [(Var x) (lookup r x)] - [(Prim0 p) (interp-prim0 p)] + [(Prim0 p) (interp-prim0 p)] [(Prim1 p e) (match (interp-env e r) ['err 'err] @@ -36,8 +33,8 @@ [v1 (match (interp-env e2 r) ['err 'err] [v2 (interp-prim2 p v1 v2)])])] - [(If p e1 e2) - (match (interp-env p r) + [(If e0 e1 e2) + (match (interp-env e0 r) ['err 'err] [v (if v @@ -61,6 +58,6 @@ (lookup r x))])) ;; Env Id Value -> Env -(define (ext r v val) - (cons (list v val) r)) +(define (ext r x v) + (cons (list x v) r)) diff --git a/langs/fraud/io.c b/langs/fraud/io.c index 7ef82281..8a417c91 100644 --- a/langs/fraud/io.c +++ b/langs/fraud/io.c @@ -7,14 +7,14 @@ val_t read_byte(void) { char c = getc(in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } val_t peek_byte(void) { char c = getc(in); ungetc(c, in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } diff --git a/langs/fraud/main.rkt b/langs/fraud/main.rkt new file mode 100644 index 00000000..e0e38924 --- /dev/null +++ b/langs/fraud/main.rkt @@ -0,0 +1,13 @@ +#lang racket +(require "ast.rkt") +(require "parse.rkt") +(require "interp.rkt") +(require "compile.rkt") +(require "run.rkt") +(provide (all-from-out "ast.rkt")) +(provide (all-from-out "parse.rkt")) +(provide (all-from-out "interp.rkt")) +(provide (all-from-out "compile.rkt")) +(provide (all-from-out "run.rkt")) + + diff --git a/langs/fraud/parse.rkt b/langs/fraud/parse.rkt index 653c16df..8ebed102 100644 --- a/langs/fraud/parse.rkt +++ b/langs/fraud/parse.rkt @@ -5,27 +5,34 @@ ;; S-Expr -> Expr (define (parse s) (match s - [(? integer?) (Int s)] - [(? boolean? s) (Bool s)] - [(? char? s) (Char s)] - ['eof (Eof)] - [(? symbol? s) (Var s)] - [(list (? op0? o)) (Prim0 o)] - [(list (? op1? o) e) (Prim1 o (parse e))] + ['eof (Eof)] + [(? datum?) (Lit s)] + [(? symbol?) (Var s)] + [(list (? op0? o)) (Prim0 o)] + [(list (? op1? o) e) (Prim1 o (parse e))] [(list (? op2? o) e1 e2) (Prim2 o (parse e1) (parse e2))] - [(list 'begin e1 e2) (Begin (parse e1) (parse e2))] + [(list 'begin e1 e2) (Begin (parse e1) (parse e2))] [(list 'if e1 e2 e3) (If (parse e1) (parse e2) (parse e3))] [(list 'let (list (list (? symbol? x) e1)) e2) (Let x (parse e1) (parse e2))] - [_ (error "Parse error" s)])) + [_ (error "Parse error")])) +;; Any -> Boolean +(define (datum? x) + (or (exact-integer? x) + (boolean? x) + (char? x))) + ;; Any -> Boolean (define (op0? x) - (memq x '(read-byte peek-byte))) + (memq x '(read-byte peek-byte void))) + (define (op1? x) (memq x '(add1 sub1 zero? char? integer->char char->integer write-byte eof-object?))) + (define (op2? x) (memq x '(+ - < =))) + diff --git a/langs/fraud/run-stdin.rkt b/langs/fraud/run-stdin.rkt new file mode 100644 index 00000000..16cf99e0 --- /dev/null +++ b/langs/fraud/run-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require "run.rkt") + +;; -> Void +;; Compile contents of stdin and use asm-interp to run +(define (main) + (read-line) ; ignore #lang racket line + (run (compile (parse (read))))) + diff --git a/langs/fraud/run.rkt b/langs/fraud/run.rkt new file mode 100644 index 00000000..7745c566 --- /dev/null +++ b/langs/fraud/run.rkt @@ -0,0 +1,19 @@ +#lang racket +(require a86/interp) +(require "types.rkt") +(require "build-runtime.rkt") +(provide run run/io);; Asm -> Answer +(define (run is) + (parameterize ((current-objs (list (path->string runtime-path)))) + (match (asm-interp is) + ['err 'err] + [b (bits->value b)]))) + +;; Asm String -> (cons Answer String) +(define (run/io is in) + (parameterize ((current-objs (list (path->string runtime-path)))) + (match (asm-interp/io is in) + [(cons 'err out) (cons 'err out)] + [(cons b out) + (cons (bits->value b) out)]))) + diff --git a/langs/fraud/semantics.rkt b/langs/fraud/semantics.rkt index 6061f1a4..47abe97f 100644 --- a/langs/fraud/semantics.rkt +++ b/langs/fraud/semantics.rkt @@ -104,12 +104,34 @@ (Prim1 'add1 (Var x))))) 8))) +;; replace any free variables with 0 +(define-metafunction F + F-close-with-zero : e (x ...) -> e + [(F-close-with-zero (Var x) (x_0 ... x x_1 ...)) (Var x)] + [(F-close-with-zero (Var x) any) (Int 0)] + [(F-close-with-zero (Int i) any) (Int i)] + [(F-close-with-zero (Bool b) any) (Bool b)] + [(F-close-with-zero (If e_1 e_2 e_3) any_r) + (If (F-close-with-zero e_1 any_r) + (F-close-with-zero e_2 any_r) + (F-close-with-zero e_3 any_r))] + [(F-close-with-zero (Prim1 p1 e_1) any_r) + (Prim1 p1 (close-with-zero e_1 any_r))] + #;[(F-close-with-zero (Prim2 p2 e_1 e_2) any_r) + (Prim2 p2 + (close-with-zero e_1 any_r) + (close-with-zero e_2 any_r))] + [(F-close-with-zero (Let x e_1 e_2) (x_0 ...)) + (Let x (close-with-zero e_1 (x_0 ...)) + (close-with-zero e_2 (x x_0 ...)))]) + (module+ test (require rackunit) - ;; Check that the semantics is total function + ;; Check that the semantics is total function on closed expressions (redex-check F e - (check-true (redex-match? F (a_0) (judgment-holds (𝑭 e a) a)) (term e)) + (redex-let F ([e_0 (term (F-close-with-zero e ()))]) + (check-true (redex-match? F (a_0) (judgment-holds (𝑭 e_0 a) a)) (format "~a" (term e)))) #:print? #f)) @@ -203,7 +225,6 @@ (define-metafunction G lookup : r x -> a - [(lookup () x) err] [(lookup ((x v) (x_1 v_1) ...) x) v] [(lookup ((x_0 v_0) (x_1 v_1) ...) x) (lookup ((x_1 v_1) ...) x)]) @@ -246,9 +267,31 @@ (test-judgment-holds (𝑮 (Prim2 '- (Int 1) (Bool #f)) err)) (test-judgment-holds (𝑮 (Prim2 '- (Prim1 'add1 (Bool #f)) (Bool #f)) err))) +;; replace any free variables with 0 +(define-metafunction G + close-with-zero : e (x ...) -> e + [(close-with-zero (Var x) (x_0 ... x x_1 ...)) (Var x)] + [(close-with-zero (Var x) any) (Int 0)] + [(close-with-zero (Int i) any) (Int i)] + [(close-with-zero (Bool b) any) (Bool b)] + [(close-with-zero (If e_1 e_2 e_3) any_r) + (If (close-with-zero e_1 any_r) + (close-with-zero e_2 any_r) + (close-with-zero e_3 any_r))] + [(close-with-zero (Prim1 p1 e_1) any_r) + (Prim1 p1 (close-with-zero e_1 any_r))] + [(close-with-zero (Prim2 p2 e_1 e_2) any_r) + (Prim2 p2 + (close-with-zero e_1 any_r) + (close-with-zero e_2 any_r))] + [(close-with-zero (Let x e_1 e_2) (x_0 ...)) + (Let x (close-with-zero e_1 (x_0 ...)) + (close-with-zero e_2 (x x_0 ...)))]) + (module+ test (require rackunit) - ;; Check that the semantics is total function + ;; Check that the semantics is total function -- for closed expressions (redex-check G e - (check-true (redex-match? G (a_0) (judgment-holds (𝑮 e a) a))) + (redex-let G ([e_0 (term (close-with-zero e ()))]) + (check-true (redex-match? G (a_0) (judgment-holds (𝑮 e_0 a) a)))) #:print? #f)) diff --git a/langs/fraud/test/compile.rkt b/langs/fraud/test/compile.rkt index 64184117..d52b46dd 100644 --- a/langs/fraud/test/compile.rkt +++ b/langs/fraud/test/compile.rkt @@ -1,24 +1,10 @@ #lang racket -(require "../compile.rkt" - "../parse.rkt" - "../types.rkt" - "test-runner.rkt" - a86/interp) +(require "../compile.rkt") +(require "../parse.rkt") +(require "../run.rkt") +(require "test-runner.rkt") -;; link with runtime for IO operations -(unless (file-exists? "../runtime.o") - (system "make -C .. runtime.o")) -(current-objs - (list (path->string (normalize-path "../runtime.o")))) - -(test-runner (λ (e) (match (asm-interp (compile (parse e))) - ['err 'err] - [bs (bits->value bs)]))) - -(test-runner-io (λ (e s) - (match (asm-interp/io (compile (parse e)) s) - [(cons 'err o) (cons 'err o)] - [(cons r o) - (cons (bits->value r) o)]))) +(test (λ (e) (run (compile (parse e))))) +(test/io (λ (in e) (run/io (compile (parse e)) in))) diff --git a/langs/fraud/test/interp.rkt b/langs/fraud/test/interp.rkt index 7ca855b9..74d4a050 100644 --- a/langs/fraud/test/interp.rkt +++ b/langs/fraud/test/interp.rkt @@ -1,8 +1,10 @@ #lang racket -(require "../interp.rkt" - "../interp-io.rkt" - "../parse.rkt" - "test-runner.rkt") +(require "../interp.rkt") +(require "../interp-io.rkt") +(require "../parse.rkt") +(require "test-runner.rkt") + +(test (λ (e) (interp (parse e)))) + +(test/io (λ (in e) (interp/io (parse e) in))) -(test-runner (λ (e) (interp (parse e)))) -(test-runner-io (λ (e s) (interp/io (parse e) s))) diff --git a/langs/fraud/test/test-runner.rkt b/langs/fraud/test/test-runner.rkt index 56632e76..7e78f795 100644 --- a/langs/fraud/test/test-runner.rkt +++ b/langs/fraud/test/test-runner.rkt @@ -1,117 +1,130 @@ #lang racket -(provide test-runner test-runner-io) +(provide test test/io) (require rackunit) -(define (test-runner run) - - ;; Abscond examples - (check-equal? (run 7) 7) - (check-equal? (run -8) -8) +(define (test run) + (begin ;; Abscond + (check-equal? (run 7) 7) + (check-equal? (run -8) -8)) - ;; Blackmail examples - (check-equal? (run '(add1 (add1 7))) 9) - (check-equal? (run '(add1 (sub1 7))) 7) + (begin ;; Blackmail + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7)) - ;; Con examples - (check-equal? (run '(if (zero? 0) 1 2)) 1) - (check-equal? (run '(if (zero? 1) 1 2)) 2) - (check-equal? (run '(if (zero? -7) 1 2)) 2) - (check-equal? (run '(if (zero? 0) - (if (zero? 1) 1 2) - 7)) - 2) - (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) - (if (zero? 1) 1 2) - 7)) - 7) + (begin ;; Con + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7)) - ;; Dupe examples - (check-equal? (run #t) #t) - (check-equal? (run #f) #f) - (check-equal? (run '(if #t 1 2)) 1) - (check-equal? (run '(if #f 1 2)) 2) - (check-equal? (run '(if 0 1 2)) 1) - (check-equal? (run '(if #t 3 4)) 3) - (check-equal? (run '(if #f 3 4)) 4) - (check-equal? (run '(if 0 3 4)) 3) - (check-equal? (run '(zero? 4)) #f) - (check-equal? (run '(zero? 0)) #t) - ;; Dodger examples - (check-equal? (run #\a) #\a) - (check-equal? (run #\b) #\b) - (check-equal? (run '(char? #\a)) #t) - (check-equal? (run '(char? #t)) #f) - (check-equal? (run '(char? 8)) #f) - (check-equal? (run '(char->integer #\a)) (char->integer #\a)) - (check-equal? (run '(integer->char 955)) #\λ) - ;; Extort examples - (check-equal? (run '(add1 #f)) 'err) - (check-equal? (run '(sub1 #f)) 'err) - (check-equal? (run '(zero? #f)) 'err) - (check-equal? (run '(char->integer #f)) 'err) - (check-equal? (run '(integer->char #f)) 'err) - (check-equal? (run '(integer->char -1)) 'err) - (check-equal? (run '(write-byte #f)) 'err) - (check-equal? (run '(write-byte -1)) 'err) - (check-equal? (run '(write-byte 256)) 'err) - ;; Fraud examples - (check-equal? (run '(let ((x 7)) x)) 7) - (check-equal? (run '(let ((x 7)) 2)) 2) - (check-equal? (run '(let ((x 7)) (add1 x))) 8) - (check-equal? (run '(let ((x (add1 7))) x)) 8) - (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) - (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) - (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) + (begin ;; Dupe + (check-equal? (run #t) #t) + (check-equal? (run #f) #f) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) + (check-equal? (run '(if #t 3 4)) 3) + (check-equal? (run '(if #f 3 4)) 4) + (check-equal? (run '(if 0 3 4)) 3) + (check-equal? (run '(zero? 4)) #f) + (check-equal? (run '(zero? 0)) #t)) - (check-equal? (run '(let ((x 0)) - (if (zero? x) 7 8))) - 7) - (check-equal? (run '(let ((x 1)) - (add1 (if (zero? x) 7 8)))) - 9) - (check-equal? (run '(+ 3 4)) 7) - (check-equal? (run '(- 3 4)) -1) - (check-equal? (run '(+ (+ 2 1) 4)) 7) - (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) - (check-equal? (run '(let ((x (+ 1 2))) - (let ((z (- 4 x))) - (+ (+ x x) z)))) - 7) + (begin ;; Dodger + (check-equal? (run #\a) #\a) + (check-equal? (run #\b) #\b) + (check-equal? (run '(char? #\a)) #t) + (check-equal? (run '(char? #t)) #f) + (check-equal? (run '(char? 8)) #f) + (check-equal? (run '(char->integer #\a)) (char->integer #\a)) + (check-equal? (run '(integer->char 955)) #\λ)) - (check-equal? (run '(= 5 5)) #t) - (check-equal? (run '(= 4 5)) #f) - (check-equal? (run '(= (add1 4) 5)) #t) - (check-equal? (run '(< 5 5)) #f) - (check-equal? (run '(< 4 5)) #t) - (check-equal? (run '(< (add1 4) 5)) #f)) + (begin ;; Evildoer + (check-equal? (run '(void)) (void)) + (check-equal? (run '(begin 1 2)) 2) + (check-equal? (run '(eof-object? (void))) #f)) + (begin ;; Extort + (check-equal? (run '(add1 #f)) 'err) + (check-equal? (run '(sub1 #f)) 'err) + (check-equal? (run '(zero? #f)) 'err) + (check-equal? (run '(char->integer #f)) 'err) + (check-equal? (run '(integer->char #f)) 'err) + (check-equal? (run '(integer->char -1)) 'err) + (check-equal? (run '(write-byte #f)) 'err) + (check-equal? (run '(write-byte -1)) 'err) + (check-equal? (run '(write-byte 256)) 'err) + (check-equal? (run '(begin (integer->char 97) + (integer->char 98))) + #\b)) -(define (test-runner-io run) - ;; Evildoer examples - (check-equal? (run 7 "") (cons 7 "")) - (check-equal? (run '(write-byte 97) "") (cons (void) "a")) - (check-equal? (run '(read-byte) "a") (cons 97 "")) - (check-equal? (run '(begin (write-byte 97) (read-byte)) "b") - (cons 98 "a")) - (check-equal? (run '(read-byte) "") (cons eof "")) - (check-equal? (run '(eof-object? (read-byte)) "") (cons #t "")) - (check-equal? (run '(eof-object? (read-byte)) "a") (cons #f "")) - (check-equal? (run '(begin (write-byte 97) (write-byte 98)) "") - (cons (void) "ab")) + (begin ;; Fraud + (check-equal? (run '(let ((x 7)) x)) 7) + (check-equal? (run '(let ((x 7)) 2)) 2) + (check-equal? (run '(let ((x 7)) (add1 x))) 8) + (check-equal? (run '(let ((x (add1 7))) x)) 8) + (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) + (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) + (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) - (check-equal? (run '(peek-byte) "ab") (cons 97 "")) - (check-equal? (run '(begin (peek-byte) (read-byte)) "ab") (cons 97 "")) - ;; Extort examples - (check-equal? (run '(write-byte #t) "") (cons 'err "")) + (check-equal? (run '(let ((x 0)) + (if (zero? x) 7 8))) + 7) + (check-equal? (run '(let ((x 1)) + (add1 (if (zero? x) 7 8)))) + 9) + (check-equal? (run '(+ 3 4)) 7) + (check-equal? (run '(- 3 4)) -1) + (check-equal? (run '(+ (+ 2 1) 4)) 7) + (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) + (check-equal? (run '(let ((x (+ 1 2))) + (let ((z (- 4 x))) + (+ (+ x x) z)))) + 7) + + (check-equal? (run '(= 5 5)) #t) + (check-equal? (run '(= 4 5)) #f) + (check-equal? (run '(= (add1 4) 5)) #t) + (check-equal? (run '(< 5 5)) #f) + (check-equal? (run '(< 4 5)) #t) + (check-equal? (run '(< (add1 4) 5)) #f))) + +(define (test/io run) + (begin ;; Evildoer + (check-equal? (run "" 7) (cons 7 "")) + (check-equal? (run "" '(write-byte 97)) (cons (void) "a")) + (check-equal? (run "a" '(read-byte)) (cons 97 "")) + (check-equal? (run "b" '(begin (write-byte 97) (read-byte))) + (cons 98 "a")) + (check-equal? (run "" '(read-byte)) (cons eof "")) + (check-equal? (run "" '(eof-object? (read-byte))) (cons #t "")) + (check-equal? (run "a" '(eof-object? (read-byte))) (cons #f "")) + (check-equal? (run "" '(begin (write-byte 97) (write-byte 98))) + (cons (void) "ab")) + + (check-equal? (run "ab" '(peek-byte)) (cons 97 "")) + (check-equal? (run "ab" '(begin (peek-byte) (read-byte))) (cons 97 "")) + (check-equal? (run "†" '(read-byte)) (cons 226 "")) + (check-equal? (run "†" '(peek-byte)) (cons 226 ""))) + + (begin ;; Extort + (check-equal? (run "" '(write-byte #t)) (cons 'err ""))) + + (begin ;; Fraud + (check-equal? (run "" '(let ((x 97)) (write-byte x))) (cons (void) "a")) + (check-equal? (run "" + '(let ((x 97)) + (begin (write-byte x) + x))) + (cons 97 "a")) + (check-equal? (run "b" '(let ((x 97)) (begin (read-byte) x))) + (cons 97 "")) + (check-equal? (run "b" '(let ((x 97)) (begin (peek-byte) x))) + (cons 97 "")))) - ;; Fraud examples - (check-equal? (run '(let ((x 97)) (write-byte x)) "") (cons (void) "a")) - (check-equal? (run '(let ((x 97)) - (begin (write-byte x) - x)) - "") - (cons 97 "a")) - (check-equal? (run '(let ((x 97)) (begin (read-byte) x)) "b") - (cons 97 "")) - (check-equal? (run '(let ((x 97)) (begin (peek-byte) x)) "b") - (cons 97 ""))) diff --git a/langs/fraud/test/translate.rkt b/langs/fraud/test/translate.rkt index 044a938d..f83844b4 100644 --- a/langs/fraud/test/translate.rkt +++ b/langs/fraud/test/translate.rkt @@ -4,12 +4,12 @@ (require "../ast.rkt") (require rackunit) (check-equal? (translate (parse '(let ((x 0)) x))) - (Let '_ (Int 0) (Var 0))) + (Let '_ (Lit 0) (Var 0))) (check-equal? (translate (parse '(let ((x 0)) (let ((y 1)) x)))) - (Let '_ (Int 0) (Let '_ (Int 1) (Var 1)))) + (Let '_ (Lit 0) (Let '_ (Lit 1) (Var 1)))) (check-equal? (translate (parse '(let ((x 0)) (let ((y 1)) y)))) - (Let '_ (Int 0) (Let '_ (Int 1) (Var 0)))) + (Let '_ (Lit 0) (Let '_ (Lit 1) (Var 0)))) (check-equal? (translate (parse '(let ((x 0)) (let ((y x)) y)))) - (Let '_ (Int 0) (Let '_ (Var 0) (Var 0)))) + (Let '_ (Lit 0) (Let '_ (Var 0) (Var 0)))) diff --git a/langs/fraud/translate.rkt b/langs/fraud/translate.rkt index 1a7e333c..7db32222 100644 --- a/langs/fraud/translate.rkt +++ b/langs/fraud/translate.rkt @@ -3,10 +3,8 @@ (require "ast.rkt") ;; type IExpr = +;; | (Lit Datum) ;; | (Eof) -;; | (Int Integer) -;; | (Bool Boolean) -;; | (Char Character) ;; | (Prim0 Op0) ;; | (Prim1 Op1 IExpr) ;; | (Prim2 Op2 IExpr IExpr) @@ -25,10 +23,8 @@ ;; Expr LEnv -> IExpr (define (translate-e e r) (match e - [(Eof) e] - [(Int i) e] - [(Bool b) e] - [(Char c) e] + [(Eof) e] + [(Lit d) e] [(Prim0 p) e] [(Prim1 p e0) (Prim1 p (translate-e e0 r))] diff --git a/langs/fraud/types.rkt b/langs/fraud/types.rkt index 18a1415a..928a05fe 100644 --- a/langs/fraud/types.rkt +++ b/langs/fraud/types.rkt @@ -1,35 +1,36 @@ #lang racket (provide (all-defined-out)) - -(define int-shift 1) -(define char-shift 2) -(define type-int #b0) -(define mask-int #b1) -(define type-char #b01) -(define mask-char #b11) -(define val-true #b0011) -(define val-false #b0111) -(define val-eof #b1011) -(define val-void #b1111) +(define int-shift 1) +(define mask-int #b1) +(define char-shift 2) +(define type-int #b0) +(define type-char #b01) +(define mask-char #b11) (define (bits->value b) - (cond [(= type-int (bitwise-and b #b1)) + (cond [(= b (value->bits #t)) #t] + [(= b (value->bits #f)) #f] + [(= b (value->bits eof)) eof] + [(= b (value->bits (void))) (void)] + [(int-bits? b) (arithmetic-shift b (- int-shift))] - [(= type-char (bitwise-and b #b11)) + [(char-bits? b) (integer->char (arithmetic-shift b (- char-shift)))] - [(= b val-true) #t] - [(= b val-false) #f] - [(= b val-eof) eof] - [(= b val-void) (void)] [else (error "invalid bits")])) (define (value->bits v) - (cond [(eof-object? v) val-eof] + (cond [(eq? v #t) #b011] + [(eq? v #f) #b111] [(integer? v) (arithmetic-shift v int-shift)] + [(eof-object? v) #b1011] + [(void? v) #b1111] [(char? v) (bitwise-ior type-char - (arithmetic-shift (char->integer v) char-shift))] - [(eq? v #t) val-true] - [(eq? v #f) val-false] - [(void? v) val-void])) + (arithmetic-shift (char->integer v) char-shift))])) + +(define (int-bits? v) + (= type-int (bitwise-and v mask-int))) + +(define (char-bits? v) + (= type-char (bitwise-and v mask-char))) diff --git a/langs/fraud/values.c b/langs/fraud/values.c index 9bd2a704..bfdcf630 100644 --- a/langs/fraud/values.c +++ b/langs/fraud/values.c @@ -29,6 +29,10 @@ val_t val_wrap_int(int64_t i) { return (i << int_shift) | int_type_tag; } +val_t val_wrap_byte(unsigned char b) +{ + return (b << int_shift) | int_type_tag; +} int val_unwrap_bool(val_t x) { diff --git a/langs/fraud/values.h b/langs/fraud/values.h index 39cc43df..44f1c536 100644 --- a/langs/fraud/values.h +++ b/langs/fraud/values.h @@ -28,6 +28,7 @@ type_t val_typeof(val_t x); */ int64_t val_unwrap_int(val_t x); val_t val_wrap_int(int64_t i); +val_t val_wrap_byte(unsigned char b); int val_unwrap_bool(val_t x); val_t val_wrap_bool(int b); diff --git a/langs/hoax/Makefile b/langs/hoax/Makefile index 9b74bfc0..db6ac44a 100644 --- a/langs/hoax/Makefile +++ b/langs/hoax/Makefile @@ -1,37 +1,44 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ - values.o \ print.o \ + values.o \ io.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/hoax/ast.rkt b/langs/hoax/ast.rkt deleted file mode 100644 index 50a257d3..00000000 --- a/langs/hoax/ast.rkt +++ /dev/null @@ -1,45 +0,0 @@ -#lang racket -(provide Eof Int Bool Char Str Prim0 Prim1 Prim2 Prim3 If Begin Let Var Empty) - -;; type Expr = (Eof) -;; | (Empty) -;; | (Int Integer) -;; | (Bool Boolean) -;; | (Char Character) -;; | (Str String) -;; | (Prim0 Op0) -;; | (Prim1 Op1 Expr) -;; | (Prim2 Op2 Expr Expr) -;; | (Prim3 Op3 Expr Expr Expr) -;; | (If Expr Expr Expr) -;; | (Begin Expr Expr) -;; | (Let Id Expr Expr) -;; | (Var Id) -;; type Id = Symbol -;; type Op0 = 'read-byte -;; type Op1 = 'add1 | 'sub1 | 'zero? -;; | 'char? | 'integer->char | 'char->integer -;; | 'write-byte | 'eof-object? -;; | 'box | 'car | 'cdr | 'unbox -;; | 'empty? | 'cons? | 'box? -;; | 'vector? | vector-length -;; | 'string? | string-length -;; type Op2 = '+ | '- | '< | '= -;; | 'cons -;; | 'make-vector | 'vector-ref -;; | 'make-string | 'string-ref -;; type Op3 = 'vector-set! -(struct Eof () #:prefab) -(struct Empty () #:prefab) -(struct Int (i) #:prefab) -(struct Bool (b) #:prefab) -(struct Char (c) #:prefab) -(struct Str (s) #:prefab) -(struct Prim0 (p) #:prefab) -(struct Prim1 (p e) #:prefab) -(struct Prim2 (p e1 e2) #:prefab) -(struct Prim3 (p e1 e2 e3) #:prefab) -(struct If (e1 e2 e3) #:prefab) -(struct Begin (e1 e2) #:prefab) -(struct Let (x e1 e2) #:prefab) -(struct Var (x) #:prefab) diff --git a/langs/hoax/compile-file.rkt b/langs/hoax/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/hoax/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/hoax/compile.rkt b/langs/hoax/compile.rkt deleted file mode 100644 index 226559b1..00000000 --- a/langs/hoax/compile.rkt +++ /dev/null @@ -1,134 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" "compile-ops.rkt" a86/ast) - -;; Registers used -(define rax 'rax) ; return -(define rbx 'rbx) ; heap -(define rsp 'rsp) ; stack -(define rdi 'rdi) ; arg - -;; type CEnv = [Listof Variable] - -;; Expr -> Asm -(define (compile e) - (prog (Extern 'peek_byte) - (Extern 'read_byte) - (Extern 'write_byte) - (Extern 'raise_error) - (Global 'entry) - (Label 'entry) - (Mov rbx rdi) ; recv heap pointer - (compile-e e '()) - (Ret) - (Label 'raise_error_align) - (Sub rsp 8) - (Jmp 'raise_error))) - -;; Expr CEnv -> Asm -(define (compile-e e c) - (match e - [(Int i) (compile-value i)] - [(Bool b) (compile-value b)] - [(Char c) (compile-value c)] - [(Eof) (compile-value eof)] - [(Empty) (compile-value '())] - [(Var x) (compile-variable x c)] - [(Str s) (compile-string s)] - [(Prim0 p) (compile-prim0 p c)] - [(Prim1 p e) (compile-prim1 p e c)] - [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] - [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] - [(If e1 e2 e3) (compile-if e1 e2 e3 c)] - [(Begin e1 e2) (compile-begin e1 e2 c)] - [(Let x e1 e2) (compile-let x e1 e2 c)])) - -;; Value -> Asm -(define (compile-value v) - (seq (Mov rax (imm->bits v)))) - -;; Id CEnv -> Asm -(define (compile-variable x c) - (let ((i (lookup x c))) - (seq (Mov rax (Offset rsp i))))) - -;; String -> Asm -(define (compile-string s) - (let ((len (string-length s))) - (if (zero? len) - (seq (Mov rax type-str)) - (seq (Mov rax len) - (Mov (Offset rbx 0) rax) - (compile-string-chars (string->list s) 8) - (Mov rax rbx) - (Or rax type-str) - (Add rbx - (+ 8 (* 4 (if (odd? len) (add1 len) len)))))))) - -;; [Listof Char] Integer -> Asm -(define (compile-string-chars cs i) - (match cs - ['() (seq)] - [(cons c cs) - (seq (Mov rax (char->integer c)) - (Mov (Offset rbx i) 'eax) - (compile-string-chars cs (+ 4 i)))])) - -;; Op0 CEnv -> Asm -(define (compile-prim0 p c) - (compile-op0 p c)) - -;; Op1 Expr CEnv -> Asm -(define (compile-prim1 p e c) - (seq (compile-e e c) - (compile-op1 p c))) - -;; Op2 Expr Expr CEnv -> Asm -(define (compile-prim2 p e1 e2 c) - (seq (compile-e e1 c) - (Push rax) - (compile-e e2 (cons #f c)) - (compile-op2 p c))) - -;; Op3 Expr Expr Expr CEnv -> Asm -(define (compile-prim3 p e1 e2 e3 c) - (seq (compile-e e1 c) - (Push rax) - (compile-e e2 (cons #f c)) - (Push rax) - (compile-e e3 (cons #f (cons #f c))) - (compile-op3 p c))) - -;; Expr Expr Expr CEnv -> Asm -(define (compile-if e1 e2 e3 c) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1 c) - (Cmp rax val-false) - (Je l1) - (compile-e e2 c) - (Jmp l2) - (Label l1) - (compile-e e3 c) - (Label l2)))) - -;; Expr Expr CEnv -> Asm -(define (compile-begin e1 e2 c) - (seq (compile-e e1 c) - (compile-e e2 c))) - -;; Id Expr Expr CEnv -> Asm -(define (compile-let x e1 e2 c) - (seq (compile-e e1 c) - (Push rax) - (compile-e e2 (cons x c)) - (Add rsp 8))) - -;; Id CEnv -> Integer -(define (lookup x cenv) - (match cenv - ['() (error "undefined variable:" x)] - [(cons y rest) - (match (eq? x y) - [#t 0] - [#f (+ 8 (lookup x rest))])])) diff --git a/langs/hoax/info.rkt b/langs/hoax/info.rkt new file mode 100644 index 00000000..41ec40bb --- /dev/null +++ b/langs/hoax/info.rkt @@ -0,0 +1,2 @@ +#lang info +(define pre-install-collection "../installer.rkt") diff --git a/langs/hoax/interp-file.rkt b/langs/hoax/interp-file.rkt deleted file mode 100644 index e6c9b1d3..00000000 --- a/langs/hoax/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/hoax/interp-io.rkt b/langs/hoax/interp-io.rkt deleted file mode 100644 index 12da1b4b..00000000 --- a/langs/hoax/interp-io.rkt +++ /dev/null @@ -1,12 +0,0 @@ -#lang racket -(provide interp/io) -(require "interp.rkt") - -;; Expr String -> (Cons Value String) -;; Interpret e with given string as input, -;; collect output as string (including printed result) -(define (interp/io e input) - (parameterize ((current-output-port (open-output-string)) - (current-input-port (open-input-string input))) - (cons (interp e) - (get-output-string (current-output-port))))) diff --git a/langs/hoax/interp-prims.rkt b/langs/hoax/interp-prims.rkt index 601b240b..c7afbb4e 100644 --- a/langs/hoax/interp-prims.rkt +++ b/langs/hoax/interp-prims.rkt @@ -34,6 +34,7 @@ [(list '< (? integer?) (? integer?)) (< v1 v2)] [(list '= (? integer?) (? integer?)) (= v1 v2)] [(list 'cons v1 v2) (cons v1 v2)] + [(list 'eq? v1 v2) (eq? v1 v2)] [(list 'make-vector (? integer?) _) (if (<= 0 v1) (make-vector v1 v2) diff --git a/langs/hoax/interp.rkt b/langs/hoax/interp.rkt deleted file mode 100644 index 89243152..00000000 --- a/langs/hoax/interp.rkt +++ /dev/null @@ -1,72 +0,0 @@ -#lang racket -(provide interp interp-env interp-prim1) -(require "ast.rkt" - "env.rkt" - "interp-prims.rkt") - -;; type Answer = Value | 'err - -;; type Value = -;; | Integer -;; | Boolean -;; | Character -;; | Eof -;; | Void -;; | '() -;; | (cons Value Value) -;; | (box Value) -;; | (vector Value ...) -;; | (string Char ...) - -;; type REnv = (Listof (List Id Value)) - -;; Expr -> Answer -(define (interp e) - (interp-env e '())) - -;; Expr Env -> Answer -(define (interp-env e r) - (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Eof) eof] - [(Empty) '()] - [(Var x) (lookup r x)] - [(Str s) (string-copy s)] - [(Prim0 'void) (void)] - [(Prim0 'read-byte) (read-byte)] - [(Prim0 'peek-byte) (peek-byte)] - [(Prim1 p e) - (match (interp-env e r) - ['err 'err] - [v (interp-prim1 p v)])] - [(Prim2 p e1 e2) - (match (interp-env e1 r) - ['err 'err] - [v1 (match (interp-env e2 r) - ['err 'err] - [v2 (interp-prim2 p v1 v2)])])] - [(Prim3 p e1 e2 e3) - (match (interp-env e1 r) - ['err 'err] - [v1 (match (interp-env e2 r) - ['err 'err] - [v2 (match (interp-env e3 r) - ['err 'err] - [v3 (interp-prim3 p v1 v2 v3)])])])] - [(If p e1 e2) - (match (interp-env p r) - ['err 'err] - [v - (if v - (interp-env e1 r) - (interp-env e2 r))])] - [(Begin e1 e2) - (match (interp-env e1 r) - ['err 'err] - [_ (interp-env e2 r)])] - [(Let x e1 e2) - (match (interp-env e1 r) - ['err 'err] - [v (interp-env e2 (ext r x v))])])) diff --git a/langs/hoax/io.c b/langs/hoax/io.c index 7ef82281..8a417c91 100644 --- a/langs/hoax/io.c +++ b/langs/hoax/io.c @@ -7,14 +7,14 @@ val_t read_byte(void) { char c = getc(in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } val_t peek_byte(void) { char c = getc(in); ungetc(c, in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } diff --git a/langs/hoax/parse.rkt b/langs/hoax/parse.rkt deleted file mode 100644 index 2f954630..00000000 --- a/langs/hoax/parse.rkt +++ /dev/null @@ -1,44 +0,0 @@ -#lang racket -(provide parse) -(require "ast.rkt") - -;; S-Expr -> Expr -(define (parse s) - (match s - [(? integer?) (Int s)] - [(? boolean?) (Bool s)] - [(? char?) (Char s)] - [(? string?) (Str s)] - ['eof (Eof)] - [(? symbol?) (Var s)] - [(list 'quote (list)) (Empty)] - [(list (? (op? op0) p0)) (Prim0 p0)] - [(list (? (op? op1) p1) e) (Prim1 p1 (parse e))] - [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse e1) (parse e2))] - [(list (? (op? op3) p3) e1 e2 e3) - (Prim3 p3 (parse e1) (parse e2) (parse e3))] - [(list 'begin e1 e2) - (Begin (parse e1) (parse e2))] - [(list 'if e1 e2 e3) - (If (parse e1) (parse e2) (parse e3))] - [(list 'let (list (list (? symbol? x) e1)) e2) - (Let x (parse e1) (parse e2))] - [_ (error "Parse error" s)])) - -(define op0 - '(read-byte peek-byte void)) - -(define op1 - '(add1 sub1 zero? char? write-byte eof-object? - integer->char char->integer - box unbox empty? cons? box? car cdr - vector? vector-length string? string-length)) -(define op2 - '(+ - < = cons make-vector vector-ref make-string string-ref)) -(define op3 - '(vector-set!)) - -(define (op? ops) - (λ (x) - (and (symbol? x) - (memq x ops)))) diff --git a/langs/hoax/print.c b/langs/hoax/print.c index a88a5779..acb1413b 100644 --- a/langs/hoax/print.c +++ b/langs/hoax/print.c @@ -808,7 +808,7 @@ void print_char(val_char_t c) void print_codepoint(val_char_t c) { - static char buffer[5] = {0}; + char buffer[5] = {0}; utf8_encode_char(c, buffer); printf("%s", buffer); } diff --git a/langs/hoax/test/compile.rkt b/langs/hoax/test/compile.rkt deleted file mode 100644 index 9e845570..00000000 --- a/langs/hoax/test/compile.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(require "test-runner.rkt" - "build-runtime.rkt" - "../parse.rkt" - "../compile.rkt" - "../unload-bits-asm.rkt" - a86/interp) - -(test-runner (λ (e) (unload/free (asm-interp (compile (parse e)))))) -(test-runner-io (λ (e s) - (match (asm-interp/io (compile (parse e)) s) - ['err 'err] - [(cons r o) (cons (unload/free r) o)]))) diff --git a/langs/hoax/test/interp.rkt b/langs/hoax/test/interp.rkt deleted file mode 100644 index 1eaa5864..00000000 --- a/langs/hoax/test/interp.rkt +++ /dev/null @@ -1,9 +0,0 @@ -#lang racket -(require "test-runner.rkt" - "../parse.rkt" - "../interp.rkt" - "../interp-io.rkt") - -(test-runner (λ (e) (interp (parse e)))) - -(test-runner-io (λ (e s) (interp/io (parse e) s))) diff --git a/langs/hoax/test/test-progs.rkt b/langs/hoax/test/test-progs.rkt index 46769a8e..0eca217e 100644 --- a/langs/hoax/test/test-progs.rkt +++ b/langs/hoax/test/test-progs.rkt @@ -2,5 +2,5 @@ ;; run command line compiler and compare against Racket as refernece implementation (require rackunit "../../test-programs/get-progs.rkt" - "build-runtime.rkt") + "../run.rkt") (for-each test-prog (get-progs "hoax")) diff --git a/langs/hoax/test/test-runner.rkt b/langs/hoax/test/test-runner.rkt deleted file mode 100644 index 600d3084..00000000 --- a/langs/hoax/test/test-runner.rkt +++ /dev/null @@ -1,198 +0,0 @@ -#lang racket -(provide test-runner test-runner-io) -(require rackunit) - -(define (test-runner run) - ;; Abscond examples - (check-equal? (run 7) 7) - (check-equal? (run -8) -8) - - ;; Blackmail examples - (check-equal? (run '(add1 (add1 7))) 9) - (check-equal? (run '(add1 (sub1 7))) 7) - - ;; Con examples - (check-equal? (run '(if (zero? 0) 1 2)) 1) - (check-equal? (run '(if (zero? 1) 1 2)) 2) - (check-equal? (run '(if (zero? -7) 1 2)) 2) - (check-equal? (run '(if (zero? 0) - (if (zero? 1) 1 2) - 7)) - 2) - (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) - (if (zero? 1) 1 2) - 7)) - 7) - - ;; Dupe examples - (check-equal? (run #t) #t) - (check-equal? (run #f) #f) - (check-equal? (run (if #t 1 2)) 1) - (check-equal? (run (if #f 1 2)) 2) - (check-equal? (run (if 0 1 2)) 1) - (check-equal? (run '(if #t 3 4)) 3) - (check-equal? (run '(if #f 3 4)) 4) - (check-equal? (run '(if 0 3 4)) 3) - (check-equal? (run '(zero? 4)) #f) - (check-equal? (run '(zero? 0)) #t) - - ;; Dodger examples - (check-equal? (run #\a) #\a) - (check-equal? (run #\b) #\b) - (check-equal? (run '(char? #\a)) #t) - (check-equal? (run '(char? #t)) #f) - (check-equal? (run '(char? 8)) #f) - (check-equal? (run '(char->integer #\a)) (char->integer #\a)) - (check-equal? (run '(integer->char 955)) #\λ) - ;; Extort examples - (check-equal? (run '(add1 #f)) 'err) - (check-equal? (run '(sub1 #f)) 'err) - (check-equal? (run '(zero? #f)) 'err) - (check-equal? (run '(char->integer #f)) 'err) - (check-equal? (run '(integer->char #f)) 'err) - (check-equal? (run '(integer->char -1)) 'err) - (check-equal? (run '(write-byte #f)) 'err) - (check-equal? (run '(write-byte -1)) 'err) - (check-equal? (run '(write-byte 256)) 'err) - - ;; Fraud examples - (check-equal? (run '(let ((x 7)) x)) 7) - (check-equal? (run '(let ((x 7)) 2)) 2) - (check-equal? (run '(let ((x 7)) (add1 x))) 8) - (check-equal? (run '(let ((x (add1 7))) x)) 8) - (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) - (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) - (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) - - (check-equal? (run '(let ((x 0)) - (if (zero? x) 7 8))) - 7) - (check-equal? (run '(let ((x 1)) - (add1 (if (zero? x) 7 8)))) - 9) - (check-equal? (run '(+ 3 4)) 7) - (check-equal? (run '(- 3 4)) -1) - (check-equal? (run '(+ (+ 2 1) 4)) 7) - (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) - (check-equal? (run '(let ((x (+ 1 2))) - (let ((z (- 4 x))) - (+ (+ x x) z)))) - 7) - (check-equal? (run '(= 5 5)) #t) - (check-equal? (run '(= 4 5)) #f) - (check-equal? (run '(= (add1 4) 5)) #t) - (check-equal? (run '(< 5 5)) #f) - (check-equal? (run '(< 4 5)) #t) - (check-equal? (run '(< (add1 4) 5)) #f) - - ;; Hustle examples - (check-equal? (run ''()) '()) - (check-equal? (run '(box 1)) (box 1)) - (check-equal? (run '(cons 1 2)) (cons 1 2)) - (check-equal? (run '(unbox (box 1))) 1) - (check-equal? (run '(car (cons 1 2))) 1) - (check-equal? (run '(cdr (cons 1 2))) 2) - (check-equal? (run '(cons 1 '())) (list 1)) - (check-equal? (run '(box? (box 7))) #t) - (check-equal? (run '(cons? (box 7))) #f) - (check-equal? (run '(box? (cons 7 8))) #f) - (check-equal? (run '(cons? (cons 7 8))) #t) - (check-equal? (run '(empty? '())) #t) - (check-equal? (run '(empty? 7)) #f) - (check-equal? (run '(let ((x (box 2))) (unbox x))) 2) - (check-equal? (run '(let ((x (cons 2 '()))) (car x))) 2) - (check-equal? (run '(let ((x (cons 1 2))) - (begin (cdr x) - (car x)))) - 1) - (check-equal? (run '(let ((x (cons 1 2))) - (let ((y (box 3))) - (unbox y)))) - 3) - - ;; Hoax examples - (check-equal? (run '(make-vector 0 0)) #()) - (check-equal? (run '(make-vector 1 0)) #(0)) - (check-equal? (run '(make-vector 3 0)) #(0 0 0)) - (check-equal? (run '(make-vector 3 5)) #(5 5 5)) - (check-equal? (run '(vector? (make-vector 0 0))) #t) - (check-equal? (run '(vector? (cons 0 0))) #f) - (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) - (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) - (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) - (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) - (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) - (check-equal? (run '(let ((x (make-vector 3 5))) - (begin (vector-set! x 0 4) - x))) - #(4 5 5)) - (check-equal? (run '(let ((x (make-vector 3 5))) - (begin (vector-set! x 1 4) - x))) - #(5 4 5)) - (check-equal? (run '(vector-length (make-vector 3 #f))) 3) - (check-equal? (run '(vector-length (make-vector 0 #f))) 0) - (check-equal? (run '"") "") - (check-equal? (run '"fred") "fred") - (check-equal? (run '"wilma") "wilma") - (check-equal? (run '(make-string 0 #\f)) "") - (check-equal? (run '(make-string 3 #\f)) "fff") - (check-equal? (run '(make-string 3 #\g)) "ggg") - (check-equal? (run '(string-length "")) 0) - (check-equal? (run '(string-length "fred")) 4) - (check-equal? (run '(string-ref "fred" 0)) #\f) - (check-equal? (run '(string-ref "fred" 1)) #\r) - (check-equal? (run '(string-ref "fred" 2)) #\e) - (check-equal? (run '(string-ref "fred" 4)) 'err) - (check-equal? (run '(string? "fred")) #t) - (check-equal? (run '(string? (cons 1 2))) #f)) - -(define (test-runner-io run) - ;; Evildoer examples - (check-equal? (run 7 "") (cons 7 "")) - (check-equal? (run '(write-byte 97) "") (cons (void) "a")) - (check-equal? (run '(read-byte) "a") (cons 97 "")) - (check-equal? (run '(begin (write-byte 97) (read-byte)) "b") - (cons 98 "a")) - (check-equal? (run '(read-byte) "") (cons eof "")) - (check-equal? (run '(eof-object? (read-byte)) "") (cons #t "")) - (check-equal? (run '(eof-object? (read-byte)) "a") (cons #f "")) - (check-equal? (run '(begin (write-byte 97) (write-byte 98)) "") - (cons (void) "ab")) - - (check-equal? (run '(peek-byte) "ab") (cons 97 "")) - (check-equal? (run '(begin (peek-byte) (read-byte)) "ab") (cons 97 "")) - ;; Extort examples - (check-equal? (run '(write-byte #t) "") (cons 'err "")) - - ;; Fraud examples - (check-equal? (run '(let ((x 97)) (write-byte x)) "") (cons (void) "a")) - (check-equal? (run '(let ((x 97)) - (begin (write-byte x) - x)) - "") - (cons 97 "a")) - (check-equal? (run '(let ((x 97)) (begin (read-byte) x)) "b") - (cons 97 "")) - (check-equal? (run '(let ((x 97)) (begin (peek-byte) x)) "b") - (cons 97 "")) - - ;; Hustle examples - (check-equal? (run '(let ((x 1)) - (begin (write-byte 97) - 1)) - "") - (cons 1 "a")) - - (check-equal? (run '(let ((x 1)) - (let ((y 2)) - (begin (write-byte 97) - 1))) - "") - (cons 1 "a")) - - (check-equal? (run '(let ((x (cons 1 2))) - (begin (write-byte 97) - (car x))) - "") - (cons 1 "a"))) diff --git a/langs/hoax/values.c b/langs/hoax/values.c index a61d65e6..b7e95f40 100644 --- a/langs/hoax/values.c +++ b/langs/hoax/values.c @@ -42,6 +42,10 @@ val_t val_wrap_int(int64_t i) { return (i << int_shift) | int_type_tag; } +val_t val_wrap_byte(unsigned char b) +{ + return (b << int_shift) | int_type_tag; +} int val_unwrap_bool(val_t x) { diff --git a/langs/hoax/values.h b/langs/hoax/values.h index 4cc48bbe..b6ac44f9 100644 --- a/langs/hoax/values.h +++ b/langs/hoax/values.h @@ -49,6 +49,7 @@ type_t val_typeof(val_t x); */ int64_t val_unwrap_int(val_t x); val_t val_wrap_int(int64_t i); +val_t val_wrap_byte(unsigned char b); int val_unwrap_bool(val_t x); val_t val_wrap_bool(int b); diff --git a/langs/hoodwink/Makefile b/langs/hoodwink/Makefile deleted file mode 100644 index 76f979af..00000000 --- a/langs/hoodwink/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -UNAME := $(shell uname) -.PHONY: test - -ifeq ($(UNAME), Darwin) - format=macho64 -else - format=elf64 -endif - -objs = \ - main.o \ - values.o \ - print.o \ - symbol.o \ - io.o - -default: runtime.o - -runtime.o: $(objs) - ld -r $(objs) -o runtime.o - -%.run: %.o runtime.o - gcc runtime.o $< -o $@ - -.c.o: - gcc -fPIC -c -g -o $@ $< - -.s.o: - nasm -g -f $(format) -o $@ $< - -%.s: %.rkt - racket -t compile-file.rkt -m $< > $@ - -clean: - rm *.o *.s *.run - -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" diff --git a/langs/hoodwink/ast.rkt b/langs/hoodwink/ast.rkt deleted file mode 100644 index 3b4aff69..00000000 --- a/langs/hoodwink/ast.rkt +++ /dev/null @@ -1,43 +0,0 @@ -#lang racket -(provide Eof Int Bool Char Str Prim0 Prim1 Prim2 Prim3 If Begin Let Var Empty) - -;; type Expr = (Eof) -;; | (Empty) -;; | (Int Integer) -;; | (Bool Boolean) -;; | (Char Character) -;; | (Str String) -;; | (Prim0 Op0) -;; | (Prim1 Op1 Expr) -;; | (Prim2 Op2 Expr Expr) -;; | (Prim3 Op3 Expr Expr Expr) -;; | (If Expr Expr Expr) -;; | (Begin Expr Expr) -;; | (Let Id Expr Expr) -;; | (Var Id) -;; type Id = Symbol -;; type Op0 = 'read-byte -;; type Op1 = 'add1 | 'sub1 | 'zero? -;; | 'char? | 'integer->char | 'char->integer -;; | 'write-byte | 'eof-object? -;; | 'box | 'car | 'cdr | 'unbox -;; | 'empty? | 'cons? | 'box? -;; | 'vector? | vector-length -;; type Op2 = '+ | '- -;; | 'cons -;; | 'make-vector | 'vector-ref -;; type Op3 = 'vector-set! -(struct Eof () #:prefab) -(struct Empty () #:prefab) -(struct Int (i) #:prefab) -(struct Bool (b) #:prefab) -(struct Char (c) #:prefab) -(struct Str (s) #:prefab) -(struct Prim0 (p) #:prefab) -(struct Prim1 (p e) #:prefab) -(struct Prim2 (p e1 e2) #:prefab) -(struct Prim3 (p e1 e2 e3) #:prefab) -(struct If (e1 e2 e3) #:prefab) -(struct Begin (e1 e2) #:prefab) -(struct Let (x e1 e2) #:prefab) -(struct Var (x) #:prefab) diff --git a/langs/hoodwink/compile-file.rkt b/langs/hoodwink/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/hoodwink/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/hoodwink/compile.rkt b/langs/hoodwink/compile.rkt deleted file mode 100644 index 0f0baf3e..00000000 --- a/langs/hoodwink/compile.rkt +++ /dev/null @@ -1,135 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" "compile-ops.rkt" a86/ast) - -;; Registers used -(define rax 'rax) ; return -(define rbx 'rbx) ; heap -(define rsp 'rsp) ; stack -(define rdi 'rdi) ; arg - -;; type CEnv = [Listof Variable] - -;; Expr -> Asm -(define (compile e) - (prog (Extern 'peek_byte) - (Extern 'read_byte) - (Extern 'write_byte) - (Extern 'gensym) - (Extern 'raise_error) - (Global 'entry) - (Label 'entry) - (Mov rbx rdi) ; recv heap pointer - (compile-e e '()) - (Ret) - (Label 'raise_error_align) - (Sub rsp 8) - (Jmp 'raise_error))) - -;; Expr CEnv -> Asm -(define (compile-e e c) - (match e - [(Int i) (compile-value i)] - [(Bool b) (compile-value b)] - [(Char c) (compile-value c)] - [(Eof) (compile-value eof)] - [(Empty) (compile-value '())] - [(Var x) (compile-variable x c)] - [(Str s) (compile-string s)] - [(Prim0 p) (compile-prim0 p c)] - [(Prim1 p e) (compile-prim1 p e c)] - [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] - [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] - [(If e1 e2 e3) (compile-if e1 e2 e3 c)] - [(Begin e1 e2) (compile-begin e1 e2 c)] - [(Let x e1 e2) (compile-let x e1 e2 c)])) - -;; Value -> Asm -(define (compile-value v) - (seq (Mov rax (imm->bits v)))) - -;; Id CEnv -> Asm -(define (compile-variable x c) - (let ((i (lookup x c))) - (seq (Mov rax (Offset rsp i))))) - -;; String -> Asm -(define (compile-string s) - (let ((len (string-length s))) - (if (zero? len) - (seq (Mov rax type-str)) - (seq (Mov rax len) - (Mov (Offset rbx 0) rax) - (compile-string-chars (string->list s) 8) - (Mov rax rbx) - (Or rax type-str) - (Add rbx - (+ 8 (* 4 (if (odd? len) (add1 len) len)))))))) - -;; [Listof Char] Integer -> Asm -(define (compile-string-chars cs i) - (match cs - ['() (seq)] - [(cons c cs) - (seq (Mov rax (char->integer c)) - (Mov (Offset rbx i) 'eax) - (compile-string-chars cs (+ 4 i)))])) - -;; Op0 CEnv -> Asm -(define (compile-prim0 p c) - (compile-op0 p c)) - -;; Op1 Expr CEnv -> Asm -(define (compile-prim1 p e c) - (seq (compile-e e c) - (compile-op1 p c))) - -;; Op2 Expr Expr CEnv -> Asm -(define (compile-prim2 p e1 e2 c) - (seq (compile-e e1 c) - (Push rax) - (compile-e e2 (cons #f c)) - (compile-op2 p c))) - -;; Op3 Expr Expr Expr CEnv -> Asm -(define (compile-prim3 p e1 e2 e3 c) - (seq (compile-e e1 c) - (Push rax) - (compile-e e2 (cons #f c)) - (Push rax) - (compile-e e3 (cons #f (cons #f c))) - (compile-op3 p c))) - -;; Expr Expr Expr CEnv -> Asm -(define (compile-if e1 e2 e3 c) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1 c) - (Cmp rax val-false) - (Je l1) - (compile-e e2 c) - (Jmp l2) - (Label l1) - (compile-e e3 c) - (Label l2)))) - -;; Expr Expr CEnv -> Asm -(define (compile-begin e1 e2 c) - (seq (compile-e e1 c) - (compile-e e2 c))) - -;; Id Expr Expr CEnv -> Asm -(define (compile-let x e1 e2 c) - (seq (compile-e e1 c) - (Push rax) - (compile-e e2 (cons x c)) - (Add rsp 8))) - -;; Id CEnv -> Integer -(define (lookup x cenv) - (match cenv - ['() (error "undefined variable:" x)] - [(cons y rest) - (match (eq? x y) - [#t 0] - [#f (+ 8 (lookup x rest))])])) diff --git a/langs/hoodwink/example.rkt b/langs/hoodwink/example.rkt deleted file mode 100644 index 2ee62b7b..00000000 --- a/langs/hoodwink/example.rkt +++ /dev/null @@ -1,2 +0,0 @@ -#lang racket -(cons '() '()) diff --git a/langs/hoodwink/interp-file.rkt b/langs/hoodwink/interp-file.rkt deleted file mode 100644 index e6c9b1d3..00000000 --- a/langs/hoodwink/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/hoodwink/interp-io.rkt b/langs/hoodwink/interp-io.rkt deleted file mode 100644 index 12da1b4b..00000000 --- a/langs/hoodwink/interp-io.rkt +++ /dev/null @@ -1,12 +0,0 @@ -#lang racket -(provide interp/io) -(require "interp.rkt") - -;; Expr String -> (Cons Value String) -;; Interpret e with given string as input, -;; collect output as string (including printed result) -(define (interp/io e input) - (parameterize ((current-output-port (open-output-string)) - (current-input-port (open-input-string input))) - (cons (interp e) - (get-output-string (current-output-port))))) diff --git a/langs/hoodwink/interp.rkt b/langs/hoodwink/interp.rkt deleted file mode 100644 index 89243152..00000000 --- a/langs/hoodwink/interp.rkt +++ /dev/null @@ -1,72 +0,0 @@ -#lang racket -(provide interp interp-env interp-prim1) -(require "ast.rkt" - "env.rkt" - "interp-prims.rkt") - -;; type Answer = Value | 'err - -;; type Value = -;; | Integer -;; | Boolean -;; | Character -;; | Eof -;; | Void -;; | '() -;; | (cons Value Value) -;; | (box Value) -;; | (vector Value ...) -;; | (string Char ...) - -;; type REnv = (Listof (List Id Value)) - -;; Expr -> Answer -(define (interp e) - (interp-env e '())) - -;; Expr Env -> Answer -(define (interp-env e r) - (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Eof) eof] - [(Empty) '()] - [(Var x) (lookup r x)] - [(Str s) (string-copy s)] - [(Prim0 'void) (void)] - [(Prim0 'read-byte) (read-byte)] - [(Prim0 'peek-byte) (peek-byte)] - [(Prim1 p e) - (match (interp-env e r) - ['err 'err] - [v (interp-prim1 p v)])] - [(Prim2 p e1 e2) - (match (interp-env e1 r) - ['err 'err] - [v1 (match (interp-env e2 r) - ['err 'err] - [v2 (interp-prim2 p v1 v2)])])] - [(Prim3 p e1 e2 e3) - (match (interp-env e1 r) - ['err 'err] - [v1 (match (interp-env e2 r) - ['err 'err] - [v2 (match (interp-env e3 r) - ['err 'err] - [v3 (interp-prim3 p v1 v2 v3)])])])] - [(If p e1 e2) - (match (interp-env p r) - ['err 'err] - [v - (if v - (interp-env e1 r) - (interp-env e2 r))])] - [(Begin e1 e2) - (match (interp-env e1 r) - ['err 'err] - [_ (interp-env e2 r)])] - [(Let x e1 e2) - (match (interp-env e1 r) - ['err 'err] - [v (interp-env e2 (ext r x v))])])) diff --git a/langs/hoodwink/parse.rkt b/langs/hoodwink/parse.rkt deleted file mode 100644 index db4ec746..00000000 --- a/langs/hoodwink/parse.rkt +++ /dev/null @@ -1,46 +0,0 @@ -#lang racket -(provide parse) -(require "ast.rkt") - -;; S-Expr -> Expr -(define (parse s) - (match s - [(? integer?) (Int s)] - [(? boolean?) (Bool s)] - [(? char?) (Char s)] - [(? string?) (Str s)] - ['eof (Eof)] - [(? symbol?) (Var s)] - [(list 'quote (list)) (Empty)] - [(list (? (op? op0) p0)) (Prim0 p0)] - [(list (? (op? op1) p1) e) (Prim1 p1 (parse e))] - [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse e1) (parse e2))] - [(list (? (op? op3) p3) e1 e2 e3) - (Prim3 p3 (parse e1) (parse e2) (parse e3))] - [(list 'begin e1 e2) - (Begin (parse e1) (parse e2))] - [(list 'if e1 e2 e3) - (If (parse e1) (parse e2) (parse e3))] - [(list 'let (list (list (? symbol? x) e1)) e2) - (Let x (parse e1) (parse e2))] - [_ (error "Parse error" s)])) - -(define op0 - '(read-byte peek-byte void gensym)) - -(define op1 - '(add1 sub1 zero? char? write-byte eof-object? - integer->char char->integer - box unbox empty? cons? box? car cdr - vector? vector-length string? string-length - symbol? string->symbol symbol->string)) -(define op2 - '(+ - cons make-vector vector-ref make-string string-ref - eq?)) -(define op3 - '(vector-set!)) - -(define (op? ops) - (λ (x) - (and (symbol? x) - (memq x ops)))) diff --git a/langs/hoodwink/symbol.c b/langs/hoodwink/symbol.c deleted file mode 100644 index ada4d6ea..00000000 --- a/langs/hoodwink/symbol.c +++ /dev/null @@ -1,33 +0,0 @@ -#include -#include -#include -#include -#include "values.h" - -static uint64_t gensym_ctr = 0; - -val_str_t *str_from_cstr(const char *); - -val_symb_t *gensym(void) -{ - char s[100]; // uint64_t has maximum 20 digits - sprintf(s, "g%" PRIu64, gensym_ctr++); - return (val_symb_t*)str_from_cstr(s); // uninterned symbol -} - -val_str_t *str_from_cstr(const char *s) -{ - int64_t len = strlen(s); - val_str_t *str = - malloc(sizeof(val_str_t) + len * sizeof(val_char_t)); - - if (!str) - return NULL; - - str->len = len; - int i; - for (i = 0; i < len; i++) { - str->codepoints[i] = (val_char_t)s[i]; - } - return str; -} diff --git a/langs/hoodwink/test/compile.rkt b/langs/hoodwink/test/compile.rkt deleted file mode 100644 index 00666520..00000000 --- a/langs/hoodwink/test/compile.rkt +++ /dev/null @@ -1,18 +0,0 @@ -#lang racket -(require "test-runner.rkt" - "../parse.rkt" - "../compile.rkt" - "../unload-bits-asm.rkt" - a86/interp) - -;; link with runtime for IO operations -(unless (file-exists? "../runtime.o") - (system "make -C .. runtime.o")) -(current-objs - (list (path->string (normalize-path "../runtime.o")))) - -(test-runner (λ (e) (unload/free (asm-interp (compile (parse e)))))) -(test-runner-io (λ (e s) - (match (asm-interp/io (compile (parse e)) s) - ['err 'err] - [(cons r o) (cons (unload/free r) o)]))) diff --git a/langs/hoodwink/test/test-runner.rkt b/langs/hoodwink/test/test-runner.rkt deleted file mode 100644 index 7b197c86..00000000 --- a/langs/hoodwink/test/test-runner.rkt +++ /dev/null @@ -1,191 +0,0 @@ -#lang racket -(provide test-runner test-runner-io) -(require rackunit) - -(define (test-runner run) - ;; Abscond examples - (check-equal? (run 7) 7) - (check-equal? (run -8) -8) - - ;; Blackmail examples - (check-equal? (run '(add1 (add1 7))) 9) - (check-equal? (run '(add1 (sub1 7))) 7) - - ;; Con examples - (check-equal? (run '(if (zero? 0) 1 2)) 1) - (check-equal? (run '(if (zero? 1) 1 2)) 2) - (check-equal? (run '(if (zero? -7) 1 2)) 2) - (check-equal? (run '(if (zero? 0) - (if (zero? 1) 1 2) - 7)) - 2) - (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) - (if (zero? 1) 1 2) - 7)) - 7) - - ;; Dupe examples - (check-equal? (run #t) #t) - (check-equal? (run #f) #f) - (check-equal? (run (if #t 1 2)) 1) - (check-equal? (run (if #f 1 2)) 2) - (check-equal? (run (if 0 1 2)) 1) - (check-equal? (run '(if #t 3 4)) 3) - (check-equal? (run '(if #f 3 4)) 4) - (check-equal? (run '(if 0 3 4)) 3) - (check-equal? (run '(zero? 4)) #f) - (check-equal? (run '(zero? 0)) #t) - - ;; Dodger examples - (check-equal? (run #\a) #\a) - (check-equal? (run #\b) #\b) - (check-equal? (run '(char? #\a)) #t) - (check-equal? (run '(char? #t)) #f) - (check-equal? (run '(char? 8)) #f) - (check-equal? (run '(char->integer #\a)) (char->integer #\a)) - (check-equal? (run '(integer->char 955)) #\λ) - ;; Extort examples - (check-equal? (run '(add1 #f)) 'err) - (check-equal? (run '(sub1 #f)) 'err) - (check-equal? (run '(zero? #f)) 'err) - (check-equal? (run '(char->integer #f)) 'err) - (check-equal? (run '(integer->char #f)) 'err) - (check-equal? (run '(integer->char -1)) 'err) - (check-equal? (run '(write-byte #f)) 'err) - (check-equal? (run '(write-byte -1)) 'err) - (check-equal? (run '(write-byte 256)) 'err) - - ;; Fraud examples - (check-equal? (run '(let ((x 7)) x)) 7) - (check-equal? (run '(let ((x 7)) 2)) 2) - (check-equal? (run '(let ((x 7)) (add1 x))) 8) - (check-equal? (run '(let ((x (add1 7))) x)) 8) - (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) - (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) - (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) - - (check-equal? (run '(let ((x 0)) - (if (zero? x) 7 8))) - 7) - (check-equal? (run '(let ((x 1)) - (add1 (if (zero? x) 7 8)))) - 9) - (check-equal? (run '(+ 3 4)) 7) - (check-equal? (run '(- 3 4)) -1) - (check-equal? (run '(+ (+ 2 1) 4)) 7) - (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) - (check-equal? (run '(let ((x (+ 1 2))) - (let ((z (- 4 x))) - (+ (+ x x) z)))) - 7) - ;; Hustle examples - (check-equal? (run ''()) '()) - (check-equal? (run '(box 1)) (box 1)) - (check-equal? (run '(cons 1 2)) (cons 1 2)) - (check-equal? (run '(unbox (box 1))) 1) - (check-equal? (run '(car (cons 1 2))) 1) - (check-equal? (run '(cdr (cons 1 2))) 2) - (check-equal? (run '(cons 1 '())) (list 1)) - (check-equal? (run '(box? (box 7))) #t) - (check-equal? (run '(cons? (box 7))) #f) - (check-equal? (run '(box? (cons 7 8))) #f) - (check-equal? (run '(cons? (cons 7 8))) #t) - (check-equal? (run '(empty? '())) #t) - (check-equal? (run '(empty? 7)) #f) - (check-equal? (run '(let ((x (box 2))) (unbox x))) 2) - (check-equal? (run '(let ((x (cons 2 '()))) (car x))) 2) - (check-equal? (run '(let ((x (cons 1 2))) - (begin (cdr x) - (car x)))) - 1) - (check-equal? (run '(let ((x (cons 1 2))) - (let ((y (box 3))) - (unbox y)))) - 3) - - ;; Hoax examples - (check-equal? (run '(make-vector 0 0)) #()) - (check-equal? (run '(make-vector 1 0)) #(0)) - (check-equal? (run '(make-vector 3 0)) #(0 0 0)) - (check-equal? (run '(make-vector 3 5)) #(5 5 5)) - (check-equal? (run '(vector? (make-vector 0 0))) #t) - (check-equal? (run '(vector? (cons 0 0))) #f) - (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) - (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) - (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) - (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) - (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) - (check-equal? (run '(let ((x (make-vector 3 5))) - (begin (vector-set! x 0 4) - x))) - #(4 5 5)) - (check-equal? (run '(let ((x (make-vector 3 5))) - (begin (vector-set! x 1 4) - x))) - #(5 4 5)) - (check-equal? (run '(vector-length (make-vector 3 #f))) 3) - (check-equal? (run '(vector-length (make-vector 0 #f))) 0) - (check-equal? (run '"") "") - (check-equal? (run '"fred") "fred") - (check-equal? (run '"wilma") "wilma") - (check-equal? (run '(make-string 0 #\f)) "") - (check-equal? (run '(make-string 3 #\f)) "fff") - (check-equal? (run '(make-string 3 #\g)) "ggg") - (check-equal? (run '(string-length "")) 0) - (check-equal? (run '(string-length "fred")) 4) - (check-equal? (run '(string-ref "fred" 0)) #\f) - (check-equal? (run '(string-ref "fred" 1)) #\r) - (check-equal? (run '(string-ref "fred" 2)) #\e) - (check-equal? (run '(string-ref "fred" 4)) 'err) - (check-equal? (run '(string? "fred")) #t) - (check-equal? (run '(string? (cons 1 2))) #f)) - -(define (test-runner-io run) - ;; Evildoer examples - (check-equal? (run 7 "") (cons 7 "")) - (check-equal? (run '(write-byte 97) "") (cons (void) "a")) - (check-equal? (run '(read-byte) "a") (cons 97 "")) - (check-equal? (run '(begin (write-byte 97) (read-byte)) "b") - (cons 98 "a")) - (check-equal? (run '(read-byte) "") (cons eof "")) - (check-equal? (run '(eof-object? (read-byte)) "") (cons #t "")) - (check-equal? (run '(eof-object? (read-byte)) "a") (cons #f "")) - (check-equal? (run '(begin (write-byte 97) (write-byte 98)) "") - (cons (void) "ab")) - - (check-equal? (run '(peek-byte) "ab") (cons 97 "")) - (check-equal? (run '(begin (peek-byte) (read-byte)) "ab") (cons 97 "")) - ;; Extort examples - (check-equal? (run '(write-byte #t) "") (cons 'err "")) - - ;; Fraud examples - (check-equal? (run '(let ((x 97)) (write-byte x)) "") (cons (void) "a")) - (check-equal? (run '(let ((x 97)) - (begin (write-byte x) - x)) - "") - (cons 97 "a")) - (check-equal? (run '(let ((x 97)) (begin (read-byte) x)) "b") - (cons 97 "")) - (check-equal? (run '(let ((x 97)) (begin (peek-byte) x)) "b") - (cons 97 "")) - - ;; Hustle examples - (check-equal? (run '(let ((x 1)) - (begin (write-byte 97) - 1)) - "") - (cons 1 "a")) - - (check-equal? (run '(let ((x 1)) - (let ((y 2)) - (begin (write-byte 97) - 1))) - "") - (cons 1 "a")) - - (check-equal? (run '(let ((x (cons 1 2))) - (begin (write-byte 97) - (car x))) - "") - (cons 1 "a"))) diff --git a/langs/hoodwink/types.rkt b/langs/hoodwink/types.rkt deleted file mode 100644 index 1ac70167..00000000 --- a/langs/hoodwink/types.rkt +++ /dev/null @@ -1,70 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -(define imm-shift 3) -(define imm-mask #b111) -(define ptr-mask #b111) -(define type-box #b001) -(define type-cons #b010) -(define type-vect #b011) -(define type-str #b100) -(define type-symb #b101) -(define int-shift (+ 1 imm-shift)) -(define char-shift (+ 2 imm-shift)) -(define type-int #b0000) -(define mask-int #b1111) -(define type-char #b01000) -(define mask-char #b11111) -(define val-true #b0011000) -(define val-false #b0111000) -(define val-eof #b1011000) -(define val-void #b1111000) -(define val-empty #b10011000) - -(define (bits->value b) - (cond [(= type-int (bitwise-and b mask-int)) - (arithmetic-shift b (- int-shift))] - [(= type-char (bitwise-and b mask-char)) - (integer->char (arithmetic-shift b (- char-shift)))] - [(= b val-true) #t] - [(= b val-false) #f] - [(= b val-eof) eof] - [(= b val-void) (void)] - [(= b val-empty) '()] - [else (error "invalid bits")])) - -(define (imm->bits v) - (cond [(eof-object? v) val-eof] - [(integer? v) (arithmetic-shift v int-shift)] - [(char? v) - (bitwise-ior type-char - (arithmetic-shift (char->integer v) char-shift))] - [(eq? v #t) val-true] - [(eq? v #f) val-false] - [(void? v) val-void] - [(empty? v) val-empty])) - - -(define (imm-bits? v) - (zero? (bitwise-and v imm-mask))) - -(define (int-bits? v) - (zero? (bitwise-and v mask-int))) - -(define (char-bits? v) - (= type-char (bitwise-and v mask-char))) - -(define (cons-bits? v) - (zero? (bitwise-xor (bitwise-and v ptr-mask) type-cons))) - -(define (box-bits? v) - (zero? (bitwise-xor (bitwise-and v ptr-mask) type-box))) - -(define (vect-bits? v) - (zero? (bitwise-xor (bitwise-and v ptr-mask) type-vect))) - -(define (str-bits? v) - (zero? (bitwise-xor (bitwise-and v ptr-mask) type-str))) - -(define (symb-bits? v) - (zero? (bitwise-xor (bitwise-and v ptr-mask) type-symb))) diff --git a/langs/hustle/Makefile b/langs/hustle/Makefile index 9b74bfc0..db6ac44a 100644 --- a/langs/hustle/Makefile +++ b/langs/hustle/Makefile @@ -1,37 +1,44 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ - values.o \ print.o \ + values.o \ io.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/hustle/ast.rkt b/langs/hustle/ast.rkt deleted file mode 100644 index a16cdb21..00000000 --- a/langs/hustle/ast.rkt +++ /dev/null @@ -1,36 +0,0 @@ -#lang racket -(provide Eof Int Bool Char Prim0 Prim1 Prim2 If Begin Let Var Empty) - -;; type Expr = (Eof) -;; | (Empty) -;; | (Int Integer) -;; | (Bool Boolean) -;; | (Char Character) -;; | (Prim0 Op0) -;; | (Prim1 Op1 Expr) -;; | (Prim2 Op2 Expr Expr) -;; | (If Expr Expr Expr) -;; | (Begin Expr Expr) -;; | (Let Id Expr Expr) -;; | (Var Id) -;; type Id = Symbol -;; type Op0 = 'read-byte -;; type Op1 = 'add1 | 'sub1 | 'zero? -;; | 'char? | 'integer->char | 'char->integer -;; | 'write-byte | 'eof-object? -;; | 'box | 'car | 'cdr | 'unbox -;; | 'empty? | 'cons? | 'box? -;; type Op2 = '+ | '- | '< | '= -;; | 'cons -(struct Eof () #:prefab) -(struct Empty () #:prefab) -(struct Int (i) #:prefab) -(struct Bool (b) #:prefab) -(struct Char (c) #:prefab) -(struct Prim0 (p) #:prefab) -(struct Prim1 (p e) #:prefab) -(struct Prim2 (p e1 e2) #:prefab) -(struct If (e1 e2 e3) #:prefab) -(struct Begin (e1 e2) #:prefab) -(struct Let (x e1 e2) #:prefab) -(struct Var (x) #:prefab) diff --git a/langs/hustle/compile-file.rkt b/langs/hustle/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/hustle/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/hustle/compile-ops.rkt b/langs/hustle/compile-ops.rkt deleted file mode 100644 index a0951d74..00000000 --- a/langs/hustle/compile-ops.rkt +++ /dev/null @@ -1,222 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) - -(define rax 'rax) ; return -(define rbx 'rbx) ; heap -(define rdi 'rdi) ; arg -(define r8 'r8) ; scratch in +, - -(define r9 'r9) ; scratch in assert-type -(define rsp 'rsp) ; stack - -;; Op0 CEnv -> Asm -(define (compile-op0 p c) - (match p - ['void (seq (Mov rax val-void))] - ['read-byte (seq (pad-stack c) - (Call 'read_byte) - (unpad-stack c))] - ['peek-byte (seq (pad-stack c) - (Call 'peek_byte) - (unpad-stack c))])) - -;; Op1 CEnv -> Asm -(define (compile-op1 p c) - (match p - ['add1 - (seq (assert-integer rax c) - (Add rax (imm->bits 1)))] - ['sub1 - (seq (assert-integer rax c) - (Sub rax (imm->bits 1)))] - ['zero? - (let ((l1 (gensym))) - (seq (assert-integer rax c) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char? - (let ((l1 (gensym))) - (seq (And rax mask-char) - (Xor rax type-char) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char->integer - (seq (assert-char rax c) - (Sar rax char-shift) - (Sal rax int-shift))] - ['integer->char - (seq (assert-codepoint c) - (Sar rax int-shift) - (Sal rax char-shift) - (Xor rax type-char))] - ['eof-object? (eq-imm val-eof)] - ['write-byte - (seq (assert-byte c) - (pad-stack c) - (Mov rdi rax) - (Call 'write_byte) - (unpad-stack c) - (Mov rax val-void))] - ['box - (seq (Mov (Offset rbx 0) rax) - (Mov rax rbx) - (Or rax type-box) - (Add rbx 8))] - ['unbox - (seq (assert-box rax c) - (Xor rax type-box) - (Mov rax (Offset rax 0)))] - ['car - (seq (assert-cons rax c) - (Xor rax type-cons) - (Mov rax (Offset rax 8)))] - ['cdr - (seq (assert-cons rax c) - (Xor rax type-cons) - (Mov rax (Offset rax 0)))] - ['empty? (eq-imm val-empty)] - ['cons? - (let ((l1 (gensym))) - (seq (And rax ptr-mask) - (Xor rax type-cons) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['box? - (let ((l1 (gensym))) - (seq (And rax ptr-mask) - (Xor rax type-box) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))])) - -;; Op2 CEnv -> Asm -(define (compile-op2 p c) - (match p - ['+ - (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) - (Add rax r8))] - ['- - (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) - (Sub r8 rax) - (Mov rax r8))] - ['< - (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) - (Cmp r8 rax) - (Mov rax val-true) - (let ((true (gensym))) - (seq (Jl true) - (Mov rax val-false) - (Label true))))] - ['= - (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) - (Cmp r8 rax) - (Mov rax val-true) - (let ((true (gensym))) - (seq (Je true) - (Mov rax val-false) - (Label true))))] - ['cons - (seq (Mov (Offset rbx 0) rax) - (Pop rax) - (Mov (Offset rbx 8) rax) - (Mov rax rbx) - (Or rax type-cons) - (Add rbx 16))])) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (assert-type mask type) - (λ (arg c) - (seq (Mov r9 arg) - (And r9 mask) - (Cmp r9 type) - (Jne (error-label c))))) - -(define (type-pred mask type) - (let ((l (gensym))) - (seq (And rax mask) - (Cmp rax type) - (Mov rax (imm->bits #t)) - (Je l) - (Mov rax (imm->bits #f)) - (Label l)))) - -(define assert-integer - (assert-type mask-int type-int)) -(define assert-char - (assert-type mask-char type-char)) -(define assert-box - (assert-type ptr-mask type-box)) -(define assert-cons - (assert-type ptr-mask type-cons)) - -(define (assert-codepoint c) - (let ((ok (gensym))) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 1114111)) - (Jg (error-label c)) - (Cmp rax (imm->bits 55295)) - (Jl ok) - (Cmp rax (imm->bits 57344)) - (Jg ok) - (Jmp (error-label c)) - (Label ok)))) - -(define (assert-byte c) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 255)) - (Jg (error-label c)))) - -;; Imm -> Asm -(define (eq-imm imm) - (let ((l1 (gensym))) - (seq (Cmp rax imm) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))) - -;; CEnv -> Asm -;; Pad the stack to be aligned for a call -(define (pad-stack c) - (match (even? (length c)) - [#t (seq (Sub rsp 8))] - [#f (seq)])) - -;; CEnv -> Asm -;; Undo the stack alignment after a call -(define (unpad-stack c) - (match (even? (length c)) - [#t (seq (Add rsp 8))] - [#f (seq)])) - -;; CEnv -> Label -;; Determine correct error handler label to jump to. -(define (error-label c) - (match (even? (length c)) - [#t 'raise_error] - [#f 'raise_error_align])) diff --git a/langs/hustle/compile.rkt b/langs/hustle/compile.rkt deleted file mode 100644 index 1dc5bf50..00000000 --- a/langs/hustle/compile.rkt +++ /dev/null @@ -1,101 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" "compile-ops.rkt" a86/ast) - -;; Registers used -(define rax 'rax) ; return -(define rbx 'rbx) ; heap -(define rsp 'rsp) ; stack -(define rdi 'rdi) ; arg - -;; type CEnv = [Listof Variable] - -;; Expr -> Asm -(define (compile e) - (prog (Extern 'peek_byte) - (Extern 'read_byte) - (Extern 'write_byte) - (Extern 'raise_error) - (Global 'entry) - (Label 'entry) - (Mov rbx rdi) ; recv heap pointer - (compile-e e '()) - (Ret) - (Label 'raise_error_align) - (Sub rsp 8) - (Jmp 'raise_error))) - -;; Expr CEnv -> Asm -(define (compile-e e c) - (match e - [(Int i) (compile-value i)] - [(Bool b) (compile-value b)] - [(Char c) (compile-value c)] - [(Eof) (compile-value eof)] - [(Empty) (compile-value '())] - [(Var x) (compile-variable x c)] - [(Prim0 p) (compile-prim0 p c)] - [(Prim1 p e) (compile-prim1 p e c)] - [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] - [(If e1 e2 e3) (compile-if e1 e2 e3 c)] - [(Begin e1 e2) (compile-begin e1 e2 c)] - [(Let x e1 e2) (compile-let x e1 e2 c)])) - -;; Value -> Asm -(define (compile-value v) - (seq (Mov rax (imm->bits v)))) - -;; Id CEnv -> Asm -(define (compile-variable x c) - (let ((i (lookup x c))) - (seq (Mov rax (Offset rsp i))))) - -;; Op0 CEnv -> Asm -(define (compile-prim0 p c) - (compile-op0 p c)) - -;; Op1 Expr CEnv -> Asm -(define (compile-prim1 p e c) - (seq (compile-e e c) - (compile-op1 p c))) - -;; Op2 Expr Expr CEnv -> Asm -(define (compile-prim2 p e1 e2 c) - (seq (compile-e e1 c) - (Push rax) - (compile-e e2 (cons #f c)) - (compile-op2 p c))) - -;; Expr Expr Expr CEnv -> Asm -(define (compile-if e1 e2 e3 c) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1 c) - (Cmp rax val-false) - (Je l1) - (compile-e e2 c) - (Jmp l2) - (Label l1) - (compile-e e3 c) - (Label l2)))) - -;; Expr Expr CEnv -> Asm -(define (compile-begin e1 e2 c) - (seq (compile-e e1 c) - (compile-e e2 c))) - -;; Id Expr Expr CEnv -> Asm -(define (compile-let x e1 e2 c) - (seq (compile-e e1 c) - (Push rax) - (compile-e e2 (cons x c)) - (Add rsp 8))) - -;; Id CEnv -> Integer -(define (lookup x cenv) - (match cenv - ['() (error "undefined variable:" x)] - [(cons y rest) - (match (eq? x y) - [#t 0] - [#f (+ 8 (lookup x rest))])])) diff --git a/langs/hustle/correctness.rkt b/langs/hustle/correctness.rkt index 0e5feb83..b117f263 100644 --- a/langs/hustle/correctness.rkt +++ b/langs/hustle/correctness.rkt @@ -4,15 +4,11 @@ "compile.rkt" "types.rkt" "parse.rkt" - "unload-bits-asm.rkt" - a86 rackunit) + "run.rkt" + rackunit) -(unless (file-exists? "runtime.o") - (system "make runtime.o")) -(current-objs - (list (path->string (normalize-path "runtime.o")))) (define (check-compiler e) - (check-equal? (unload/free (asm-interp (compile (parse e)))) + (check-equal? (run (compile (parse e))) (interp (parse e)) e)) diff --git a/langs/hustle/info.rkt b/langs/hustle/info.rkt new file mode 100644 index 00000000..41ec40bb --- /dev/null +++ b/langs/hustle/info.rkt @@ -0,0 +1,2 @@ +#lang info +(define pre-install-collection "../installer.rkt") diff --git a/langs/hustle/interp-file.rkt b/langs/hustle/interp-file.rkt deleted file mode 100644 index e6c9b1d3..00000000 --- a/langs/hustle/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/hustle/interp-heap-bits.rkt b/langs/hustle/interp-heap-bits.rkt index 9093a6ec..25862ed8 100644 --- a/langs/hustle/interp-heap-bits.rkt +++ b/langs/hustle/interp-heap-bits.rkt @@ -1,6 +1,6 @@ #lang racket (provide interp interp-env-heap) -(require "types.rkt" +(require (except-in "types.rkt" heap-ref) "env.rkt" "heap-bits.rkt" "interp-prims-heap-bits.rkt" @@ -18,15 +18,13 @@ ;; Expr REnv Heap -> Answer (define (interp-env-heap e r h) (match e - [(Int i) (cons h (imm->bits i))] - [(Bool b) (cons h (imm->bits b))] - [(Char c) (cons h (imm->bits c))] - [(Eof) (cons h (imm->bits eof))] - [(Empty) (cons h (imm->bits '()))] + [(Lit d) (cons h (value->bits d))] + [(Eof) (cons h (value->bits eof))] + [(Empty) (cons h (value->bits '()))] [(Var x) (cons h (lookup r x))] - [(Prim0 'void) (cons h (imm->bits (void)))] - [(Prim0 'read-byte) (cons h (imm->bits (read-byte)))] - [(Prim0 'peek-byte) (cons h (imm->bits (peek-byte)))] + [(Prim0 'void) (cons h (value->bits (void)))] + [(Prim0 'read-byte) (cons h (value->bits (read-byte)))] + [(Prim0 'peek-byte) (cons h (value->bits (peek-byte)))] [(Prim1 p e) (match (interp-env-heap e r h) ['err 'err] @@ -44,7 +42,7 @@ (match (interp-env-heap p r h) ['err 'err] [(cons h v) - (if (= v (imm->bits #f)) + (if (= v (value->bits #f)) (interp-env-heap e2 r h) (interp-env-heap e1 r h))])] [(Begin e1 e2) diff --git a/langs/hustle/interp-heap.rkt b/langs/hustle/interp-heap.rkt index e5b18f3c..69a0527c 100644 --- a/langs/hustle/interp-heap.rkt +++ b/langs/hustle/interp-heap.rkt @@ -1,7 +1,6 @@ #lang racket (provide interp interp-env-heap) -(require "heap.rkt" - "env.rkt" +(require "env.rkt" "unload.rkt" "interp-prims-heap.rkt" "ast.rkt") @@ -30,11 +29,9 @@ ;; Expr REnv Heap -> Answer* (define (interp-env-heap e r h) (match e - [(Int i) (cons h i)] - [(Bool b) (cons h b)] - [(Char c) (cons h c)] - [(Eof) (cons h eof)] + [(Lit d) (cons h d)] [(Empty) (cons h '())] + [(Eof) (cons h eof)] [(Var x) (cons h (lookup r x))] [(Prim0 'void) (cons h (void))] [(Prim0 'peek-byte) (cons h (peek-byte))] diff --git a/langs/hustle/interp-prims-heap-bits.rkt b/langs/hustle/interp-prims-heap-bits.rkt index e6bd9be7..a9f26ade 100644 --- a/langs/hustle/interp-prims-heap-bits.rkt +++ b/langs/hustle/interp-prims-heap-bits.rkt @@ -1,24 +1,24 @@ #lang racket (provide interp-prim1 interp-prim2) -(require "types.rkt" +(require (except-in "types.rkt" heap-ref) "heap-bits.rkt") ;; Op1 Value* Heap -> Answer* (define (interp-prim1 p v h) (match (list p v) - [(list 'add1 (? int-bits? i)) (cons h (+ i (imm->bits 1)))] - [(list 'sub1 (? int-bits? i)) (cons h (- i (imm->bits 1)))] - [(list 'zero? (? int-bits? i)) (cons h (imm->bits (zero? i)))] - [(list 'char? v) (cons h (imm->bits (char-bits? v)))] - [(list 'char->integer (? char-bits?)) (cons h (imm->bits (char->integer (bits->value v))))] - [(list 'integer->char (? cp-bits?)) (cons h (imm->bits (integer->char (bits->value v))))] - [(list 'eof-object? v) (cons h (if (= v (imm->bits eof)) val-true val-false))] - [(list 'write-byte (? byte-bits?)) (cons h (begin (write-byte (bits->value v)) val-void))] + [(list 'add1 (? int-bits? i)) (cons h (+ i (value->bits 1)))] + [(list 'sub1 (? int-bits? i)) (cons h (- i (value->bits 1)))] + [(list 'zero? (? int-bits? i)) (cons h (value->bits (zero? i)))] + [(list 'char? v) (cons h (value->bits (char-bits? v)))] + [(list 'char->integer (? char-bits?)) (cons h (value->bits (char->integer (bits->value v))))] + [(list 'integer->char (? cp-bits?)) (cons h (value->bits (integer->char (bits->value v))))] + [(list 'eof-object? v) (cons h (value->bits (= v (value->bits eof))))] + [(list 'write-byte (? byte-bits?)) (cons h (begin (write-byte (bits->value v)) (value->bits (void))))] [(list 'box v) (alloc-box v h)] [(list 'unbox (? box-bits? i)) (cons h (heap-ref h i))] [(list 'car (? cons-bits? i)) (cons h (heap-ref h i))] [(list 'cdr (? cons-bits? i)) (cons h (heap-ref h (+ i (arithmetic-shift 1 imm-shift))))] - [(list 'empty? v) (cons h (if (= (imm->bits '()) v) val-true val-false))] + [(list 'empty? v) (cons h (value->bits (= (value->bits '()) v)))] [_ 'err])) ;; Op2 Value* Value* Heap -> Answer* @@ -26,19 +26,19 @@ (match (list p v1 v2) [(list '+ (? int-bits? i1) (? int-bits? i2)) (cons h (+ i1 i2))] [(list '- (? int-bits? i1) (? int-bits? i2)) (cons h (- i1 i2))] - [(list '< (? int-bits? i1) (? int-bits? i2)) (cons h (imm->bits (< i1 i2)))] - [(list '= (? int-bits? i1) (? int-bits? i2)) (cons h (imm->bits (= i1 i2)))] - [(list 'eq? v1 v2) (cons h (= v1 v2))] + [(list '< (? int-bits? i1) (? int-bits? i2)) (cons h (value->bits (< i1 i2)))] + [(list '= (? int-bits? i1) (? int-bits? i2)) (cons h (value->bits (= i1 i2)))] + [(list 'eq? v1 v2) (cons h (value->bits (= v1 v2)))] [(list 'cons v1 v2) (alloc-cons v1 v2 h)] [_ 'err])) ;; Bits -> Boolean (define (byte-bits? i) (and (int-bits? i) - (<= (imm->bits 0) i (imm->bits 255)))) + (<= (value->bits 0) i (value->bits 255)))) ;; Bits -> Boolean (define (cp-bits? v) (and (int-bits? v) - (or (<= (imm->bits 0) v (imm->bits 55295)) - (<= (imm->bits 57344) v (imm->bits 1114111))))) + (or (<= (value->bits 0) v (value->bits 55295)) + (<= (value->bits 57344) v (value->bits 1114111))))) diff --git a/langs/hustle/interp-prims-heap.rkt b/langs/hustle/interp-prims-heap.rkt index 4596f44b..53cbd4f4 100644 --- a/langs/hustle/interp-prims-heap.rkt +++ b/langs/hustle/interp-prims-heap.rkt @@ -1,5 +1,5 @@ #lang racket -(provide interp-prim1 interp-prim2 interp-prim3) +(provide interp-prim1 interp-prim2) (require "heap.rkt") ;; Op1 Value* Heap -> Answer* @@ -18,9 +18,6 @@ [(list 'car (list 'cons i)) (cons h (heap-ref h i))] [(list 'cdr (list 'cons i)) (cons h (heap-ref h (add1 i)))] [(list 'empty? v) (cons h (empty? v))] - [(list 'string? (list 'str s)) (cons h #t)] - [(list 'string? v) (cons h #f)] - [(list 'string-length (list 'str a)) (cons h (heap-ref h a))] [_ 'err])) ;; Op2 Value* Value* Heap -> Answer* @@ -35,24 +32,8 @@ [(list (list t1 a1) (list t2 a2)) (cons h (and (eq? t1 t2) (= a1 a2)))] [_ (cons h (eqv? v1 v2))])] [(list 'cons v1 v2) (alloc-cons v1 v2 h)] - [(list 'make-string (? integer? i) (? char? c)) - (if (<= 0 i) - (alloc-str (make-string i c) h) - 'err)] - [(list 'string-ref (list 'str a) (? integer? i)) - (cons h (heap-ref h (+ a i 1)))] [_ 'err])) -;; Op2 Value* Value* Heap -> Answer* -(define (interp-prim3 p v1 v2 v3 h) - (match (list p v1 v2 v3) - [(list 'string-set! (list 'str a) (? integer? i) (? char? c)) - (if (<= 0 v2 (sub1 (heap-ref h a))) - (cons (heap-set h (+ a i 1) c) - (void)) - 'err)] - [_ 'err])) - ;; Any -> Boolean (define (codepoint? v) (and (integer? v) diff --git a/langs/hustle/interp.rkt b/langs/hustle/interp.rkt deleted file mode 100644 index 016781b9..00000000 --- a/langs/hustle/interp.rkt +++ /dev/null @@ -1,62 +0,0 @@ -#lang racket -(provide interp interp-env interp-prim1) -(require "ast.rkt" - "env.rkt" - "interp-prims.rkt") - -;; type Answer = Value | 'err - -;; type Value = -;; | Integer -;; | Boolean -;; | Character -;; | Eof -;; | Void -;; | '() -;; | (cons Value Value) -;; | (box Value) - -;; type REnv = (Listof (List Id Value)) - -;; Expr -> Answer -(define (interp e) - (interp-env e '())) - -;; Expr Env -> Answer -(define (interp-env e r) - (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Eof) eof] - [(Empty) '()] - [(Var x) (lookup r x)] - [(Prim0 'void) (void)] - [(Prim0 'read-byte) (read-byte)] - [(Prim0 'peek-byte) (peek-byte)] - [(Prim1 p e) - (match (interp-env e r) - ['err 'err] - [v (interp-prim1 p v)])] - [(Prim2 p e1 e2) - (match (interp-env e1 r) - ['err 'err] - [v1 (match (interp-env e2 r) - ['err 'err] - [v2 (interp-prim2 p v1 v2)])])] - [(If p e1 e2) - (match (interp-env p r) - ['err 'err] - [v - (if v - (interp-env e1 r) - (interp-env e2 r))])] - [(Begin e1 e2) - (match (interp-env e1 r) - ['err 'err] - [_ (interp-env e2 r)])] - [(Let x e1 e2) - (match (interp-env e1 r) - ['err 'err] - [v (interp-env e2 (ext r x v))])])) - diff --git a/langs/hustle/io.c b/langs/hustle/io.c index 7ef82281..8a417c91 100644 --- a/langs/hustle/io.c +++ b/langs/hustle/io.c @@ -7,14 +7,14 @@ val_t read_byte(void) { char c = getc(in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } val_t peek_byte(void) { char c = getc(in); ungetc(c, in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } diff --git a/langs/hustle/parse.rkt b/langs/hustle/parse.rkt deleted file mode 100644 index fa1000bb..00000000 --- a/langs/hustle/parse.rkt +++ /dev/null @@ -1,38 +0,0 @@ -#lang racket -(provide parse) -(require "ast.rkt") - -;; S-Expr -> Expr -(define (parse s) - (match s - [(? integer?) (Int s)] - [(? boolean?) (Bool s)] - [(? char?) (Char s)] - ['eof (Eof)] - [(? symbol?) (Var s)] - [(list 'quote (list)) (Empty)] - [(list (? (op? op0) p0)) (Prim0 p0)] - [(list (? (op? op1) p1) e) (Prim1 p1 (parse e))] - [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse e1) (parse e2))] - [(list 'begin e1 e2) - (Begin (parse e1) (parse e2))] - [(list 'if e1 e2 e3) - (If (parse e1) (parse e2) (parse e3))] - [(list 'let (list (list (? symbol? x) e1)) e2) - (Let x (parse e1) (parse e2))] - [_ (error "Parse error" s)])) - -(define op0 - '(read-byte peek-byte void)) - -(define op1 - '(add1 sub1 zero? char? write-byte eof-object? - integer->char char->integer - box unbox empty? cons? box? car cdr)) -(define op2 - '(+ - < = cons)) - -(define (op? ops) - (λ (x) - (and (symbol? x) - (memq x ops)))) diff --git a/langs/hustle/print.c b/langs/hustle/print.c index 1b265aaf..da04a825 100644 --- a/langs/hustle/print.c +++ b/langs/hustle/print.c @@ -104,7 +104,7 @@ void print_char(val_char_t c) void print_codepoint(val_char_t c) { - static char buffer[5] = {0}; + char buffer[5] = {0}; utf8_encode_char(c, buffer); printf("%s", buffer); } diff --git a/langs/hustle/test/all.rkt b/langs/hustle/test/all.rkt deleted file mode 100644 index 053b183f..00000000 --- a/langs/hustle/test/all.rkt +++ /dev/null @@ -1,146 +0,0 @@ -#lang racket -(require "../compile.rkt" - "../interp.rkt" - "../interp-io.rkt" - "../parse.rkt" - "../types.rkt" - "../unload-bits-asm.rkt" - a86 - rackunit) - -;; link with runtime for IO operations -(unless (file-exists? "../runtime.o") - (system "make -C .. runtime.o")) -(current-objs - (list (path->string (normalize-path "../runtime.o")))) - -(define (test-runner run) - - ;; Abscond examples - (check-equal? (run 7) 7) - (check-equal? (run -8) -8) - - ;; Blackmail examples - (check-equal? (run '(add1 (add1 7))) 9) - (check-equal? (run '(add1 (sub1 7))) 7) - - ;; Con examples - (check-equal? (run '(if (zero? 0) 1 2)) 1) - (check-equal? (run '(if (zero? 1) 1 2)) 2) - (check-equal? (run '(if (zero? -7) 1 2)) 2) - (check-equal? (run '(if (zero? 0) - (if (zero? 1) 1 2) - 7)) - 2) - (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) - (if (zero? 1) 1 2) - 7)) - 7) - - ;; Dupe examples - (check-equal? (run #t) #t) - (check-equal? (run #f) #f) - (check-equal? (run (if #t 1 2)) 1) - (check-equal? (run (if #f 1 2)) 2) - (check-equal? (run (if 0 1 2)) 1) - (check-equal? (run '(if #t 3 4)) 3) - (check-equal? (run '(if #f 3 4)) 4) - (check-equal? (run '(if 0 3 4)) 3) - (check-equal? (run '(zero? 4)) #f) - (check-equal? (run '(zero? 0)) #t) - ;; Dodger examples - (check-equal? (run #\a) #\a) - (check-equal? (run #\b) #\b) - (check-equal? (run '(char? #\a)) #t) - (check-equal? (run '(char? #t)) #f) - (check-equal? (run '(char? 8)) #f) - (check-equal? (run '(char->integer #\a)) (char->integer #\a)) - (check-equal? (run '(integer->char 955)) #\λ) - ;; Extort examples - (check-equal? (run '(add1 #f)) 'err) - (check-equal? (run '(sub1 #f)) 'err) - (check-equal? (run '(zero? #f)) 'err) - (check-equal? (run '(char->integer #f)) 'err) - (check-equal? (run '(integer->char #f)) 'err) - (check-equal? (run '(integer->char -1)) 'err) - (check-equal? (run '(write-byte #f)) 'err) - (check-equal? (run '(write-byte -1)) 'err) - (check-equal? (run '(write-byte 256)) 'err) - ;; Fraud examples - (check-equal? (run '(let ((x 7)) x)) 7) - (check-equal? (run '(let ((x 7)) 2)) 2) - (check-equal? (run '(let ((x 7)) (add1 x))) 8) - (check-equal? (run '(let ((x (add1 7))) x)) 8) - (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) - (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) - (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) - - (check-equal? (run '(let ((x 0)) - (if (zero? x) 7 8))) - 7) - (check-equal? (run '(let ((x 1)) - (add1 (if (zero? x) 7 8)))) - 9) - (check-equal? (run '(+ 3 4)) 7) - (check-equal? (run '(- 3 4)) -1) - (check-equal? (run '(+ (+ 2 1) 4)) 7) - (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) - (check-equal? (run '(let ((x (+ 1 2))) - (let ((z (- 4 x))) - (+ (+ x x) z)))) - 7) - - ;; Hustle examples - (check-equal? (run '(unbox (box 7))) 7) - (check-equal? (run '(let ((x (box 2))) (unbox x))) 2) - (check-equal? (run '(let ((x (cons 2 '()))) (car x))) 2) - (check-equal? (run '(box? (box 7))) #t) - (check-equal? (run '(cons? (box 7))) #f) - (check-equal? (run '(box? (cons 7 8))) #f) - (check-equal? (run '(cons? (cons 7 8))) #t) - (check-equal? (run '(empty? '())) #t) - (check-equal? (run '(empty? 7)) #f)) - -(test-runner (λ (e) (interp (parse e)))) -(test-runner (λ (e) (unload/free (asm-interp (compile (parse e)))))) - -(define (test-runner-io run) - ;; Evildoer examples - (check-equal? (run 7 "") (cons 7 "")) - (check-equal? (run '(write-byte 97) "") (cons (void) "a")) - (check-equal? (run '(read-byte) "a") (cons 97 "")) - (check-equal? (run '(begin (write-byte 97) (read-byte)) "b") - (cons 98 "a")) - (check-equal? (run '(read-byte) "") (cons eof "")) - (check-equal? (run '(eof-object? (read-byte)) "") (cons #t "")) - (check-equal? (run '(eof-object? (read-byte)) "a") (cons #f "")) - (check-equal? (run '(begin (write-byte 97) (write-byte 98)) "") - (cons (void) "ab")) - - (check-equal? (run '(peek-byte) "ab") (cons 97 "")) - (check-equal? (run '(begin (peek-byte) (read-byte)) "ab") (cons 97 "")) - ;; Extort examples - (check-equal? (run '(write-byte #t) "") (cons 'err "")) - - ;; Fraud examples - (check-equal? (run '(let ((x 97)) (write-byte x)) "") (cons (void) "a")) - (check-equal? (run '(let ((x 97)) - (begin (write-byte x) - x)) - "") - (cons 97 "a")) - (check-equal? (run '(let ((x 97)) (begin (read-byte) x)) "b") - (cons 97 "")) - (check-equal? (run '(let ((x 97)) (begin (peek-byte) x)) "b") - (cons 97 ""))) - - -(test-runner-io (λ (e s) (interp/io (parse e) s))) -(test-runner-io (λ (e s) - (match (asm-interp/io (compile (parse e)) s) - [(cons 'err o) (cons 'err o)] - [(cons r o) (cons (unload/free r) o)]))) - -;; run command line compiler and compare against Racket as refernece implementation -(require rackunit "../../test-programs/get-progs.rkt") -(for-each test-prog (get-progs "hustle")) diff --git a/langs/hustle/test/compile.rkt b/langs/hustle/test/compile.rkt deleted file mode 100644 index 00666520..00000000 --- a/langs/hustle/test/compile.rkt +++ /dev/null @@ -1,18 +0,0 @@ -#lang racket -(require "test-runner.rkt" - "../parse.rkt" - "../compile.rkt" - "../unload-bits-asm.rkt" - a86/interp) - -;; link with runtime for IO operations -(unless (file-exists? "../runtime.o") - (system "make -C .. runtime.o")) -(current-objs - (list (path->string (normalize-path "../runtime.o")))) - -(test-runner (λ (e) (unload/free (asm-interp (compile (parse e)))))) -(test-runner-io (λ (e s) - (match (asm-interp/io (compile (parse e)) s) - ['err 'err] - [(cons r o) (cons (unload/free r) o)]))) diff --git a/langs/hustle/test/interp-heap-bits.rkt b/langs/hustle/test/interp-heap-bits.rkt index 00fcf6c6..dc527441 100644 --- a/langs/hustle/test/interp-heap-bits.rkt +++ b/langs/hustle/test/interp-heap-bits.rkt @@ -4,6 +4,6 @@ "../interp-heap-bits.rkt" "../interp-io.rkt") -(test-runner (λ (e) (interp (parse e)))) +(test (λ (e) (interp (parse e)))) -(test-runner-io (λ (e s) (interp/io (parse e) s))) +(test/io (λ (s e) (interp/io (parse e) s))) diff --git a/langs/hustle/test/interp-heap.rkt b/langs/hustle/test/interp-heap.rkt index 0955d26b..06f12b82 100644 --- a/langs/hustle/test/interp-heap.rkt +++ b/langs/hustle/test/interp-heap.rkt @@ -4,6 +4,6 @@ "../interp-heap.rkt" "../interp-io.rkt") -(test-runner (λ (e) (interp (parse e)))) +(test (λ (e) (interp (parse e)))) -(test-runner-io (λ (e s) (interp/io (parse e) s))) +(test/io (λ (s e) (interp/io (parse e) s))) diff --git a/langs/hustle/test/test-runner.rkt b/langs/hustle/test/test-runner.rkt deleted file mode 100644 index 49497b5b..00000000 --- a/langs/hustle/test/test-runner.rkt +++ /dev/null @@ -1,151 +0,0 @@ -#lang racket -(provide test-runner test-runner-io) -(require rackunit) - -(define (test-runner run) - ;; Abscond examples - (check-equal? (run 7) 7) - (check-equal? (run -8) -8) - - ;; Blackmail examples - (check-equal? (run '(add1 (add1 7))) 9) - (check-equal? (run '(add1 (sub1 7))) 7) - - ;; Con examples - (check-equal? (run '(if (zero? 0) 1 2)) 1) - (check-equal? (run '(if (zero? 1) 1 2)) 2) - (check-equal? (run '(if (zero? -7) 1 2)) 2) - (check-equal? (run '(if (zero? 0) - (if (zero? 1) 1 2) - 7)) - 2) - (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) - (if (zero? 1) 1 2) - 7)) - 7) - - ;; Dupe examples - (check-equal? (run #t) #t) - (check-equal? (run #f) #f) - (check-equal? (run '(if #t 1 2)) 1) - (check-equal? (run '(if #f 1 2)) 2) - (check-equal? (run '(if 0 1 2)) 1) - (check-equal? (run '(if #t 3 4)) 3) - (check-equal? (run '(if #f 3 4)) 4) - (check-equal? (run '(if 0 3 4)) 3) - (check-equal? (run '(zero? 4)) #f) - (check-equal? (run '(zero? 0)) #t) - ;; Dodger examples - (check-equal? (run #\a) #\a) - (check-equal? (run #\b) #\b) - (check-equal? (run '(char? #\a)) #t) - (check-equal? (run '(char? #t)) #f) - (check-equal? (run '(char? 8)) #f) - (check-equal? (run '(char->integer #\a)) (char->integer #\a)) - (check-equal? (run '(integer->char 955)) #\λ) - ;; Extort examples - (check-equal? (run '(add1 #f)) 'err) - (check-equal? (run '(sub1 #f)) 'err) - (check-equal? (run '(zero? #f)) 'err) - (check-equal? (run '(char->integer #f)) 'err) - (check-equal? (run '(integer->char #f)) 'err) - (check-equal? (run '(integer->char -1)) 'err) - (check-equal? (run '(write-byte #f)) 'err) - (check-equal? (run '(write-byte -1)) 'err) - (check-equal? (run '(write-byte 256)) 'err) - ;; Fraud examples - (check-equal? (run '(let ((x 7)) x)) 7) - (check-equal? (run '(let ((x 7)) 2)) 2) - (check-equal? (run '(let ((x 7)) (add1 x))) 8) - (check-equal? (run '(let ((x (add1 7))) x)) 8) - (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) - (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) - (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) - - (check-equal? (run '(let ((x 0)) - (if (zero? x) 7 8))) - 7) - (check-equal? (run '(let ((x 1)) - (add1 (if (zero? x) 7 8)))) - 9) - (check-equal? (run '(+ 3 4)) 7) - (check-equal? (run '(- 3 4)) -1) - (check-equal? (run '(+ (+ 2 1) 4)) 7) - (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) - (check-equal? (run '(let ((x (+ 1 2))) - (let ((z (- 4 x))) - (+ (+ x x) z)))) - 7) - (check-equal? (run '(= 5 5)) #t) - (check-equal? (run '(= 4 5)) #f) - (check-equal? (run '(= (add1 4) 5)) #t) - (check-equal? (run '(< 5 5)) #f) - (check-equal? (run '(< 4 5)) #t) - (check-equal? (run '(< (add1 4) 5)) #f) - - ;; Hustle examples - (check-equal? (run ''()) '()) - (check-equal? (run '(box 1)) (box 1)) - (check-equal? (run '(cons 1 2)) (cons 1 2)) - (check-equal? (run '(unbox (box 1))) 1) - (check-equal? (run '(car (cons 1 2))) 1) - (check-equal? (run '(cdr (cons 1 2))) 2) - (check-equal? (run '(cons 1 '())) (list 1)) - (check-equal? (run '(let ((x (cons 1 2))) - (begin (cdr x) - (car x)))) - 1) - (check-equal? (run '(let ((x (cons 1 2))) - (let ((y (box 3))) - (unbox y)))) - 3)) - -(define (test-runner-io run) - ;; Evildoer examples - (check-equal? (run 7 "") (cons 7 "")) - (check-equal? (run '(write-byte 97) "") (cons (void) "a")) - (check-equal? (run '(read-byte) "a") (cons 97 "")) - (check-equal? (run '(begin (write-byte 97) (read-byte)) "b") - (cons 98 "a")) - (check-equal? (run '(read-byte) "") (cons eof "")) - (check-equal? (run '(eof-object? (read-byte)) "") (cons #t "")) - (check-equal? (run '(eof-object? (read-byte)) "a") (cons #f "")) - (check-equal? (run '(begin (write-byte 97) (write-byte 98)) "") - (cons (void) "ab")) - - (check-equal? (run '(peek-byte) "ab") (cons 97 "")) - (check-equal? (run '(begin (peek-byte) (read-byte)) "ab") (cons 97 "")) - ;; Extort examples - (check-equal? (run '(write-byte #t) "") (cons 'err "")) - - ;; Fraud examples - (check-equal? (run '(let ((x 97)) (write-byte x)) "") (cons (void) "a")) - (check-equal? (run '(let ((x 97)) - (begin (write-byte x) - x)) - "") - (cons 97 "a")) - (check-equal? (run '(let ((x 97)) (begin (read-byte) x)) "b") - (cons 97 "")) - (check-equal? (run '(let ((x 97)) (begin (peek-byte) x)) "b") - (cons 97 "")) - - ;; Hustle examples - (check-equal? (run '(let ((x 1)) - (begin (write-byte 97) - 1)) - "") - (cons 1 "a")) - - (check-equal? (run '(let ((x 1)) - (let ((y 2)) - (begin (write-byte 97) - 1))) - "") - (cons 1 "a")) - - (check-equal? (run '(let ((x (cons 1 2))) - (begin (write-byte 97) - (car x))) - "") - (cons 1 "a"))) diff --git a/langs/hustle/types.rkt b/langs/hustle/types.rkt deleted file mode 100644 index 359165bd..00000000 --- a/langs/hustle/types.rkt +++ /dev/null @@ -1,58 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -(define imm-shift 3) -(define imm-mask #b111) -(define ptr-mask #b111) -(define type-box #b001) -(define type-cons #b010) -(define int-shift (+ 1 imm-shift)) -(define char-shift (+ 2 imm-shift)) -(define type-int #b0000) -(define mask-int #b1111) -(define type-char #b01000) -(define mask-char #b11111) -(define val-true #b0011000) -(define val-false #b0111000) -(define val-eof #b1011000) -(define val-void #b1111000) -(define val-empty #b10011000) - -(define (bits->value b) - (cond [(= type-int (bitwise-and b mask-int)) - (arithmetic-shift b (- int-shift))] - [(= type-char (bitwise-and b mask-char)) - (integer->char (arithmetic-shift b (- char-shift)))] - [(= b val-true) #t] - [(= b val-false) #f] - [(= b val-eof) eof] - [(= b val-void) (void)] - [(= b val-empty) '()] - [else (error "invalid bits")])) - -(define (imm->bits v) - (cond [(eof-object? v) val-eof] - [(integer? v) (arithmetic-shift v int-shift)] - [(char? v) - (bitwise-ior type-char - (arithmetic-shift (char->integer v) char-shift))] - [(eq? v #t) val-true] - [(eq? v #f) val-false] - [(void? v) val-void] - [(empty? v) val-empty])) - - -(define (imm-bits? v) - (zero? (bitwise-and v imm-mask))) - -(define (int-bits? v) - (zero? (bitwise-and v mask-int))) - -(define (char-bits? v) - (= type-char (bitwise-and v mask-char))) - -(define (cons-bits? v) - (zero? (bitwise-xor (bitwise-and v imm-mask) type-cons))) - -(define (box-bits? v) - (zero? (bitwise-xor (bitwise-and v imm-mask) type-box))) diff --git a/langs/hustle/unload-bits-asm.rkt b/langs/hustle/unload-bits-asm.rkt deleted file mode 100644 index 6ed2dd86..00000000 --- a/langs/hustle/unload-bits-asm.rkt +++ /dev/null @@ -1,28 +0,0 @@ -#lang racket -(provide unload/free unload-value) -(require "types.rkt" - ffi/unsafe) - -;; Answer* -> Answer -(define (unload/free a) - (match a - ['err 'err] - [(cons h v) (begin0 (unload-value v) - (free h))])) - -;; Value* -> Value -(define (unload-value v) - (match v - [(? imm-bits?) (bits->value v)] - [(? box-bits? i) - (box (unload-value (heap-ref i)))] - [(? cons-bits? i) - (cons (unload-value (heap-ref (+ i (arithmetic-shift 1 imm-shift)))) - (unload-value (heap-ref i)))])) - -(define (untag i) - (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) - (integer-length ptr-mask))) - -(define (heap-ref i) - (ptr-ref (cast (untag i) _int64 _pointer) _uint64)) diff --git a/langs/hustle/unload-bits.rkt b/langs/hustle/unload-bits.rkt index 3f850a07..21d084b6 100644 --- a/langs/hustle/unload-bits.rkt +++ b/langs/hustle/unload-bits.rkt @@ -1,6 +1,6 @@ #lang racket (provide unload unload-value) -(require "types.rkt" +(require (except-in "types.rkt" heap-ref) "heap-bits.rkt") ;; Answer* -> Answer diff --git a/langs/hustle/values.c b/langs/hustle/values.c index 3330f8d2..b96fffbf 100644 --- a/langs/hustle/values.c +++ b/langs/hustle/values.c @@ -38,6 +38,10 @@ val_t val_wrap_int(int64_t i) { return (i << int_shift) | int_type_tag; } +val_t val_wrap_byte(unsigned char b) +{ + return (b << int_shift) | int_type_tag; +} int val_unwrap_bool(val_t x) { diff --git a/langs/hustle/values.h b/langs/hustle/values.h index 92e67e5b..ceab2e0a 100644 --- a/langs/hustle/values.h +++ b/langs/hustle/values.h @@ -39,6 +39,7 @@ type_t val_typeof(val_t x); */ int64_t val_unwrap_int(val_t x); val_t val_wrap_int(int64_t i); +val_t val_wrap_byte(unsigned char b); int val_unwrap_bool(val_t x); val_t val_wrap_bool(int b); diff --git a/langs/info.rkt b/langs/info.rkt index 87ff4185..2a677859 100644 --- a/langs/info.rkt +++ b/langs/info.rkt @@ -1,4 +1,15 @@ #lang info (define version "1.0") (define collection 'multi) -(define deps (list)) +(define deps (list "base" "rackunit" "redex-lib")) +(define build-deps + (list "https://github.com/cmsc430/www.git?path=ziggy#ziggy")) + +;; Outlaw is omitted here because it depends on libraries that are a pain +;; to ensure are set up properly and we don't want students to see failing +;; tests at the beginning of the semester, nor do we want to get into +;; setting up libraries only needed in the last week and only if you +;; actually care to run Outlaw. + +;; To test outlaw you should do an explicit: raco test -c outlaw +(define test-omit-paths (list "outlaw")) diff --git a/langs/iniquity-gc/Makefile b/langs/iniquity-gc/Makefile new file mode 100644 index 00000000..47b2b108 --- /dev/null +++ b/langs/iniquity-gc/Makefile @@ -0,0 +1,40 @@ +UNAME := $(shell uname) + +ifeq ($(UNAME), Darwin) + format=macho64 + CC=arch -x86_64 gcc +else + format=elf64 + CC=gcc +endif + +objs = \ + main.o \ + print.o \ + values.o \ + io.o \ + gc.o + +default: runtime.o + +runtime.o: $(objs) + ld -r $(objs) -o runtime.o + +%.run: %.o runtime.o + $(CC) runtime.o $< -o $@ + +.c.o: + $(CC) -fPIC -c -g -o $@ $< + +.s.o: + nasm -g -f $(format) -o $@ $< + +%.s: %.rkt + cat $< | racket -t compile-stdin.rkt -m > $@ + +clean: + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" + +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/iniquity/ast.rkt b/langs/iniquity-gc/ast.rkt similarity index 100% rename from langs/iniquity/ast.rkt rename to langs/iniquity-gc/ast.rkt diff --git a/langs/iniquity-gc/build-runtime.rkt b/langs/iniquity-gc/build-runtime.rkt new file mode 100644 index 00000000..66aad89f --- /dev/null +++ b/langs/iniquity-gc/build-runtime.rkt @@ -0,0 +1,14 @@ +#lang racket +(require racket/runtime-path) +(provide runtime-path) + +(define-runtime-path here ".") + +(void + (system (string-append "make -C '" + (path->string (normalize-path here)) + "' runtime.o"))) + +(define runtime-path + (path->string + (normalize-path (build-path here "runtime.o")))) diff --git a/langs/iniquity-gc/compile-ops.rkt b/langs/iniquity-gc/compile-ops.rkt new file mode 100644 index 00000000..265e559f --- /dev/null +++ b/langs/iniquity-gc/compile-ops.rkt @@ -0,0 +1,427 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" "types.rkt" a86/ast) + +(define rax 'rax) ; return +(define eax 'eax) ; 32-bit load/store +(define rbx 'rbx) ; heap +(define rdi 'rdi) ; arg +(define rsi 'rsi) ; arg +(define rdx 'rdx) ; arg +(define rcx 'rcx) ; arg +(define r8 'r8) ; scratch +(define r9 'r9) ; scratch +(define r10 'r10) ; scratch +(define r14 'r14) ; stack pad (non-volatile) +(define r15 'r15) ; stack pad (non-volatile) +(define rsp 'rsp) ; stack +(define rbp 'rbp) ; base stack + +;; Op0 -> Asm +(define (compile-op0 p) + (match p + ['void (seq (Mov rax (value->bits (void))))] + ['read-byte (seq pad-stack + (Call 'read_byte) + unpad-stack)] + ['peek-byte (seq pad-stack + (Call 'peek_byte) + unpad-stack)] + ['dump-memory-stats + (seq (Mov rdi rsp) + (Mov rsi rbp) + (Mov rdx rbx) + pad-stack + (Call 'print_memory) + unpad-stack + (Mov rax (value->bits (void))))] + ['collect-garbage + (seq (Mov rdi rsp) + (Mov rsi rbp) + (Mov rdx rbx) + pad-stack + (Call 'collect_garbage) + unpad-stack + (Mov rbx rax) + (Mov rax (value->bits (void))))])) + +;; Op1 -> Asm +(define (compile-op1 p) + (match p + ['add1 + (seq (assert-integer rax) + (Add rax (value->bits 1)))] + ['sub1 + (seq (assert-integer rax) + (Sub rax (value->bits 1)))] + ['zero? + (seq (assert-integer rax) + (eq-imm 0))] + ['char? + (type-pred mask-char type-char)] + ['char->integer + (seq (assert-char rax) + (Sar rax char-shift) + (Sal rax int-shift))] + ['integer->char + (seq (assert-codepoint rax) + (Sar rax int-shift) + (Sal rax char-shift) + (Xor rax type-char))] + ['eof-object? (eq-imm eof)] + ['write-byte + (seq (assert-byte rax) + pad-stack + (Mov rdi rax) + (Call 'write_byte) + unpad-stack)] + ['box + (seq (Push rax) + (allocate 1) + (Pop rax) + (Mov (Offset rbx 0) rax) + (Mov rax rbx) + (Or rax type-box) + (Add rbx 8))] + ['unbox + (seq (assert-box rax) + (Xor rax type-box) + (Mov rax (Offset rax 0)))] + ['car + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 8)))] + ['cdr + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 0)))] + ['empty? (eq-imm '())] + ['box? + (type-pred ptr-mask type-box)] + ['cons? + (type-pred ptr-mask type-cons)] + ['vector? + (type-pred ptr-mask type-vect)] + ['string? + (type-pred ptr-mask type-str)] + ['vector-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-vector rax) + (Xor rax type-vect) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))] + ['string-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-string rax) + (Xor rax type-str) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))])) + +(define (allocate n) + (seq (Mov rdi rsp) + (Mov rsi rbp) + (Mov rdx rbx) + (Mov rcx n) + pad-stack + (Call 'alloc_val) + unpad-stack + (Mov rbx rax))) + +;; Op2 -> Asm +(define (compile-op2 p) + (match p + ['+ + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Add rax r8))] + ['- + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Sub r8 rax) + (Mov rax r8))] + ['< + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax (value->bits #t)) + (let ((true (gensym))) + (seq (Jl true) + (Mov rax (value->bits #f)) + (Label true))))] + ['= + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax (value->bits #t)) + (let ((true (gensym))) + (seq (Je true) + (Mov rax (value->bits #f)) + (Label true))))] + ;; tricky: if you have a pointer in a register, GC might collect + ;; what it points to and create a dangling reference + ['cons + (seq (Push rax) + (allocate 2) + (Pop rax) + (Mov (Offset rbx 0) rax) + (Pop rax) + (Mov (Offset rbx 8) rax) + (Mov rax rbx) + (Or rax type-cons) + (Add rbx 16))] + ['eq? + (seq (Pop r8) + (eq r8 rax))] + ['make-vector + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (Cmp r8 0) ; special case empty vector + (Je empty) + + + (Push rax) + (Mov rax r8) + (Sar rax int-shift) + (Add rax 1) + (allocate rax) + (Pop rax) + + + (Mov r9 rbx) + (Or r9 type-vect) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Label loop) + (Mov (Offset rbx 0) rax) + (Add rbx 8) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-vect) + (Label done)))] + ['vector-ref + (seq (Pop r8) + (assert-vector r8) + (assert-integer rax) + (Cmp rax 0) + (Jl 'raise_error_align) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'raise_error_align) + (Sal rax 3) + (Add r8 rax) + (Mov rax (Offset r8 8)))] + + ['make-string + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (assert-char rax) + (Cmp r8 0) ; special case empty string + (Je empty) + + (Push rax) + (Mov rax r8) + (Sar rax int-shift) + (Add rax 1) ; adds 1 + (Sar rax 1) ; when + (Sal rax 1) ; len is odd + (Add rax 1) + (allocate rax) + (Pop rax) + + + (Mov r9 rbx) + (Or r9 type-str) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Sar rax char-shift) + + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd + + (Label loop) + (Mov (Offset rbx 0) eax) + (Add rbx 4) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-str) + (Label done)))] + + + ['string-ref + (seq (Pop r8) + (assert-string r8) + (assert-integer rax) + (Cmp rax 0) + (Jl 'raise_error_align) + (Xor r8 type-str) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'raise_error_align) + (Sal rax 2) + (Add r8 rax) + (Mov 'eax (Offset r8 8)) + (Sal rax char-shift) + (Or rax type-char))] + + ['set-box! + (seq (Pop r8) + (assert-box r8) + (Xor r8 type-box) + (Mov (Offset r8 0) rax) + (Mov rax (value->bits (void))))])) + +;; Op3 -> Asm +(define (compile-op3 p) + (match p + ['vector-set! + (seq (Pop r10) + (Pop r8) + (assert-vector r8) + (assert-integer r10) + (Cmp r10 0) + (Jl 'raise_error_align) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar r10 int-shift) ; r10 = index + (Sub r9 1) + (Cmp r9 r10) + (Jl 'raise_error_align) + (Sal r10 3) + (Add r8 r10) + (Mov (Offset r8 8) rax) + (Mov rax (value->bits (void))))])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (assert-type mask type) + (λ (arg) + (seq (Mov r9 arg) + (And r9 mask) + (Cmp r9 type) + (Jne 'raise_error_align)))) + +(define (type-pred mask type) + (let ((l (gensym))) + (seq (And rax mask) + (Cmp rax type) + (Mov rax (value->bits #t)) + (Je l) + (Mov rax (value->bits #f)) + (Label l)))) + +(define assert-integer + (assert-type mask-int type-int)) +(define assert-char + (assert-type mask-char type-char)) +(define assert-box + (assert-type ptr-mask type-box)) +(define assert-cons + (assert-type ptr-mask type-cons)) +(define assert-vector + (assert-type ptr-mask type-vect)) +(define assert-string + (assert-type ptr-mask type-str)) + +(define (assert-codepoint r) + (let ((ok (gensym))) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align) + (Cmp r (value->bits 1114111)) + (Jg 'raise_error_align) + (Cmp r (value->bits 55295)) + (Jl ok) + (Cmp r (value->bits 57344)) + (Jg ok) + (Jmp 'raise_error_align) + (Label ok)))) + +(define (assert-byte r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align) + (Cmp r (value->bits 255)) + (Jg 'raise_error_align))) + +(define (assert-natural r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align))) + +;; Value -> Asm +(define (eq-imm imm) + (let ((l1 (gensym))) + (seq (Cmp rax (value->bits imm)) + (Mov rax (value->bits #t)) + (Je l1) + (Mov rax (value->bits #f)) + (Label l1)))) + +(define (eq ir1 ir2) + (let ((l1 (gensym))) + (seq (Cmp ir1 ir2) + (Mov rax (value->bits #t)) + (Je l1) + (Mov rax (value->bits #f)) + (Label l1)))) + +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; Asm +;; Undo the stack alignment after a call +(define unpad-stack + (seq (Add rsp r15))) diff --git a/langs/iniquity-gc/compile-stdin.rkt b/langs/iniquity-gc/compile-stdin.rkt new file mode 100644 index 00000000..cfa15106 --- /dev/null +++ b/langs/iniquity-gc/compile-stdin.rkt @@ -0,0 +1,10 @@ +#lang racket +(provide main) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile (parse (read-all))))) diff --git a/langs/iniquity/compile.rkt b/langs/iniquity-gc/compile.rkt similarity index 87% rename from langs/iniquity/compile.rkt rename to langs/iniquity-gc/compile.rkt index f2c2a1ff..258e5a6f 100644 --- a/langs/iniquity/compile.rkt +++ b/langs/iniquity-gc/compile.rkt @@ -17,19 +17,27 @@ (prog (externs) (Global 'entry) (Label 'entry) - (Mov rbx rdi) ; recv heap pointer + (Push 'rbx) + (Push 'rbp) + (Mov 'rbp 'rsp) ; save stack base pointer + (Mov rbx rdi) ; recv heap pointer (compile-e e '()) + (Pop 'rbp) + (Pop 'rbx) (Ret) (compile-defines ds) (Label 'raise_error_align) - (Sub rsp 8) - (Jmp 'raise_error))])) + pad-stack + (Call 'raise_error))])) (define (externs) (seq (Extern 'peek_byte) (Extern 'read_byte) (Extern 'write_byte) - (Extern 'raise_error))) + (Extern 'raise_error) + (Extern 'print_memory) + (Extern 'collect_garbage) + (Extern 'alloc_val))) ;; [Listof Defn] -> Asm (define (compile-defines ds) @@ -69,7 +77,7 @@ ;; Value -> Asm (define (compile-value v) - (seq (Mov rax (imm->bits v)))) + (seq (Mov rax (value->bits v)))) ;; Id CEnv -> Asm (define (compile-variable x c) @@ -81,7 +89,8 @@ (let ((len (string-length s))) (if (zero? len) (seq (Mov rax type-str)) - (seq (Mov rax len) + (seq (allocate (add1 (quotient (add1 len) 2))) + (Mov rax len) (Mov (Offset rbx 0) rax) (compile-string-chars (string->list s) 8) (Mov rax rbx) @@ -100,19 +109,19 @@ ;; Op0 CEnv -> Asm (define (compile-prim0 p c) - (compile-op0 p c)) + (compile-op0 p)) ;; Op1 Expr CEnv -> Asm (define (compile-prim1 p e c) (seq (compile-e e c) - (compile-op1 p c))) + (compile-op1 p))) ;; Op2 Expr Expr CEnv -> Asm (define (compile-prim2 p e1 e2 c) (seq (compile-e e1 c) (Push rax) (compile-e e2 (cons #f c)) - (compile-op2 p c))) + (compile-op2 p))) ;; Op3 Expr Expr Expr CEnv -> Asm (define (compile-prim3 p e1 e2 e3 c) @@ -121,14 +130,14 @@ (compile-e e2 (cons #f c)) (Push rax) (compile-e e3 (cons #f (cons #f c))) - (compile-op3 p c))) + (compile-op3 p))) ;; Expr Expr Expr CEnv -> Asm (define (compile-if e1 e2 e3 c) (let ((l1 (gensym 'if)) (l2 (gensym 'if))) (seq (compile-e e1 c) - (Cmp rax val-false) + (Cmp rax (value->bits #f)) (Je l1) (compile-e e2 c) (Jmp l2) @@ -153,19 +162,11 @@ ;; arguments and return address is next frame (define (compile-app f es c) (let ((r (gensym 'ret))) - (seq (pad-stack c) - (Lea rax r) + (seq (Lea rax r) (Push rax) - (compile-es es (static-pad (cons #f c))) + (compile-es es (cons #f c)) (Jmp (symbol->label f)) - (Label r) - (unpad-stack c)))) - -;; CEnv -> CEnv -(define (static-pad c) - (if (odd? (length c)) - (cons #f c) - c)) + (Label r)))) ;; [Listof Expr] CEnv -> Asm (define (compile-es es c) diff --git a/langs/hoodwink/env.rkt b/langs/iniquity-gc/env.rkt similarity index 100% rename from langs/hoodwink/env.rkt rename to langs/iniquity-gc/env.rkt diff --git a/langs/iniquity-gc/gc.c b/langs/iniquity-gc/gc.c new file mode 100644 index 00000000..a5f5e48f --- /dev/null +++ b/langs/iniquity-gc/gc.c @@ -0,0 +1,150 @@ +#include +#include +#include +#include +#include "values.h" +#include "runtime.h" + +const char* val_typeof_string(int64_t t) { + switch (val_typeof(t)) { + case T_INT: return "INT"; + case T_BOOL: return "BOOL"; + case T_CHAR: return "CHAR"; + case T_EOF: return "EOF"; + case T_VOID: return "VOID"; + case T_EMPTY: return "EMPTY"; + case T_BOX: return "BOX"; + case T_CONS: return "CONS"; + case T_VECT: return "VECT"; + case T_STR: return "STR"; + default: return "UNKNOWN"; + } +} + +void step(val_t** to_curr, val_t** to_next, int count, int* t_back) { + type_t t; + int i; + int size; + val_t v; + val_t *ptr_v; + for (i = 0; i < count; i++) { + v = **to_curr; + t = val_typeof(v); + switch (t) { + case T_BOX: + case T_CONS: + case T_VECT: + case T_STR: + ptr_v = val_unwrap(v); + if (ptr_v >= from && ptr_v < from + heap_size) { + // this is a pointer to from space so we need to deal with it + if (val_unwrap(*ptr_v) >= to && + val_unwrap(*ptr_v) < to + heap_size) { + // it points to a fwd pointer (points in to to-space), so just set + // curr to what it points to. + **to_curr = *ptr_v; + *to_curr = *to_curr + 1; + } else { + // copy, fwd, update + size = val_size(ptr_v, t); + types[*t_back] = t; // enqueue type + *t_back = *t_back + 1; + memcpy(*to_next, ptr_v, 8 * size); // copy + *ptr_v = val_wrap(*to_next, t); // fwd + **to_curr = val_wrap(*to_next, t); // update + *to_next = *to_next + size; + *to_curr = *to_curr + 1; + } + } else { + // looks like a pointer, but doesn't point to from-space + // leave it alone + *to_curr = *to_curr + 1; + } + break; + default: + // not a pointer + *to_curr = *to_curr + 1; + } + } +} + + +int64_t* collect_garbage(int64_t* rsp, int64_t *rbp, int64_t* rbx) { + + printf("Collect garbage: rsp = %" PRIx64 ", rbp = %" PRIx64 ", rbx = %" PRIx64 "\n", + (int64_t)rsp, (int64_t)rbp, (int64_t)rbx); + + int stack_count = rbp - rsp; + + val_t *tmp; + val_t *to_next = to; + val_t *to_curr = to; + + int t_back = 0; + int t_front = 0; + + // Step through everything on the stack + val_t *rsp_curr = rsp; + step(&rsp_curr, &to_next, stack_count, &t_back); + int vi; + // now play catch up between to_curr and to_next + while (to_curr != to_next) { + switch (types[t_front++]) { + case T_VECT: + vi = to_curr[0]; + to_curr++; + step(&to_curr, &to_next, vi, &t_back); + break; + case T_BOX: + step(&to_curr, &to_next, 1, &t_back); + break; + case T_CONS: + step(&to_curr, &to_next, 2, &t_back); + break; + case T_STR: + to_curr = to_curr + 1 + ((*to_curr + 1) / 2); + break; + default: + to_curr++; + break; + } + } + + tmp = from; + from = to; + to = tmp; + return to_next; +} + + +void print_memory(int64_t* rsp, int64_t* rbp, int64_t* rbx) { + + int stack_count = rbp - rsp; + int heap_count = rbx - from; + + printf("----------------------------------------------------------------\n"); + int i; + + printf("STACK:\n"); + for (i = 0; i < stack_count; i++) { + printf("[%" PRIx64 "] = %016" PRIx64 ", %s\n", + (int64_t)rsp + 8*i, rsp[i], val_typeof_string(rsp[i])); + } + printf("HEAP:\n"); + for (i = 0; i < heap_count; i++) { + printf("[%" PRIx64 "] = %016" PRIx64 ", %s\n", + (int64_t)from + 8*i, from[i], val_typeof_string(from[i])); + } +} + +int64_t* alloc_val(int64_t* rsp, int64_t* rbp, int64_t* rbx, int words) { + if (rbx + words >= from + heap_size) { + rbx = collect_garbage(rsp, rbp, rbx); + if (rbx + words >= from + heap_size) { + printf("OUT OF MEMORY!!\n"); + error_handler(); + } + } + // printf("returning %" PRIx64 "\n", (int64_t)rbx); + return rbx; +} diff --git a/langs/iniquity/interp-io.rkt b/langs/iniquity-gc/interp-io.rkt similarity index 100% rename from langs/iniquity/interp-io.rkt rename to langs/iniquity-gc/interp-io.rkt diff --git a/langs/hustle/interp-prims.rkt b/langs/iniquity-gc/interp-prims.rkt similarity index 54% rename from langs/hustle/interp-prims.rkt rename to langs/iniquity-gc/interp-prims.rkt index 261084ea..4cbabc6c 100644 --- a/langs/hustle/interp-prims.rkt +++ b/langs/iniquity-gc/interp-prims.rkt @@ -1,6 +1,6 @@ #lang racket (require "ast.rkt") -(provide interp-prim1 interp-prim2) +(provide interp-prim1 interp-prim2 interp-prim3) ;; Op1 Value -> Answer (define (interp-prim1 p1 v) @@ -20,6 +20,10 @@ [(list 'empty? v) (empty? v)] [(list 'cons? v) (cons? v)] [(list 'box? v) (box? v)] + [(list 'vector? v) (vector? v)] + [(list 'vector-length (? vector?)) (vector-length v)] + [(list 'string? v) (string? v)] + [(list 'string-length (? string?)) (string-length v)] [_ 'err])) ;; Op2 Value Value -> Answer @@ -28,9 +32,35 @@ [(list '+ (? integer?) (? integer?)) (+ v1 v2)] [(list '- (? integer?) (? integer?)) (- v1 v2)] [(list '< (? integer?) (? integer?)) (< v1 v2)] - [(list '= (? integer?) (? integer?)) (= v1 v2)] + [(list '= (? integer?) (? integer?)) (= v1 v2)] [(list 'cons v1 v2) (cons v1 v2)] - [_ 'err])) + [(list 'eq? v1 v2) (eq? v1 v2)] + [(list 'make-vector (? integer?) _) + (if (<= 0 v1) + (make-vector v1 v2) + 'err)] + [(list 'vector-ref (? vector?) (? integer?)) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-ref v1 v2) + 'err)] + [(list 'make-string (? integer?) (? char?)) + (if (<= 0 v1) + (make-string v1 v2) + 'err)] + [(list 'string-ref (? string?) (? integer?)) + (if (<= 0 v2 (sub1 (string-length v1))) + (string-ref v1 v2) + 'err)] + [_ 'err])) + +;; Op3 Value Value Value -> Answer +(define (interp-prim3 p v1 v2 v3) + (match (list p v1 v2 v3) + [(list 'vector-set! (? vector?) (? integer?) _) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-set! v1 v2 v3) + 'err)] + [_ 'err])) ;; Any -> Boolean (define (codepoint? v) diff --git a/langs/iniquity-gc/interp-stdin.rkt b/langs/iniquity-gc/interp-stdin.rkt new file mode 100644 index 00000000..965b9cc4 --- /dev/null +++ b/langs/iniquity-gc/interp-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt" "interp.rkt" "read-all.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (let ((r (interp (parse (read-all))))) + (unless (void? r) + (println r)))) diff --git a/langs/iniquity/interp.rkt b/langs/iniquity-gc/interp.rkt similarity index 91% rename from langs/iniquity/interp.rkt rename to langs/iniquity-gc/interp.rkt index db40c7f5..3576d437 100644 --- a/langs/iniquity/interp.rkt +++ b/langs/iniquity-gc/interp.rkt @@ -21,7 +21,7 @@ ;; type REnv = (Listof (List Id Value)) ;; type Defns = (Listof Defn) -;; Prog Defns -> Answer +;; Prog -> Answer (define (interp p) (match p [(Prog ds e) @@ -36,10 +36,12 @@ [(Eof) eof] [(Empty) '()] [(Var x) (lookup r x)] - [(Str s) (string-copy s)] + [(Str s) s] [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] + [(Prim0 'dump-memory-stats) (dump-memory-stats)] + [(Prim0 'collect-garbage) (collect-garbage)] [(Prim1 p e) (match (interp-env e r ds) ['err 'err] @@ -91,7 +93,9 @@ [(cons e es) (match (interp-env e r ds) ['err 'err] - [v (cons v (interp-env* es r ds))])])) + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) ;; Defns Symbol -> Defn (define (defns-lookup ds f) diff --git a/langs/hoodwink/io.c b/langs/iniquity-gc/io.c similarity index 100% rename from langs/hoodwink/io.c rename to langs/iniquity-gc/io.c diff --git a/langs/iniquity-gc/main.c b/langs/iniquity-gc/main.c new file mode 100644 index 00000000..1157f0b7 --- /dev/null +++ b/langs/iniquity-gc/main.c @@ -0,0 +1,46 @@ +#include +#include +#include "values.h" +#include "print.h" +#include "runtime.h" + +FILE* in; +FILE* out; +void (*error_handler)(); +val_t *heap; +val_t *to; +val_t *from; +type_t *types; + +void error_exit() +{ + printf("err\n"); + exit(1); +} + +void raise_error() +{ + return error_handler(); +} + +int main(int argc, char** argv) +{ + in = stdin; + out = stdout; + error_handler = &error_exit; + heap = malloc(2 * 8 * heap_size); + from = heap; + to = heap + heap_size; + types = malloc(sizeof(type_t) * heap_size); + + val_t result; + + result = entry(heap); + + print_result(result); + if (val_typeof(result) != T_VOID) + putchar('\n'); + + free(heap); + return 0; +} diff --git a/langs/jig/parse.rkt b/langs/iniquity-gc/parse.rkt similarity index 92% rename from langs/jig/parse.rkt rename to langs/iniquity-gc/parse.rkt index 49941588..b5d9565b 100644 --- a/langs/jig/parse.rkt +++ b/langs/iniquity-gc/parse.rkt @@ -47,7 +47,7 @@ [_ (error "Parse error" s)])) (define op0 - '(read-byte peek-byte void)) + '(read-byte peek-byte void dump-memory-stats collect-garbage)) (define op1 '(add1 sub1 zero? char? write-byte eof-object? @@ -55,7 +55,7 @@ box unbox empty? cons? box? car cdr vector? vector-length string? string-length)) (define op2 - '(+ - < = cons make-vector vector-ref make-string string-ref)) + '(+ - < = cons eq? make-vector vector-ref make-string string-ref set-box!)) (define op3 '(vector-set!)) diff --git a/langs/hoodwink/print.c b/langs/iniquity-gc/print.c similarity index 96% rename from langs/hoodwink/print.c rename to langs/iniquity-gc/print.c index 365357e2..acb1413b 100644 --- a/langs/hoodwink/print.c +++ b/langs/iniquity-gc/print.c @@ -7,8 +7,8 @@ void print_codepoint(val_char_t); void print_cons(val_cons_t *); void print_vect(val_vect_t*); void print_str(val_str_t*); -void print_symb(val_symb_t*); void print_str_char(val_char_t); +void print_result_interior(val_t); int utf8_encode_char(val_char_t, char *); void print_result(val_t x) @@ -29,47 +29,54 @@ void print_result(val_t x) case T_VOID: break; case T_EMPTY: - printf("'()"); - break; case T_BOX: - printf("#&"); - print_result(val_unwrap_box(x)->val); - break; case T_CONS: - printf("'("); - print_cons(val_unwrap_cons(x)); - printf(")"); - break; - case T_VECT: - print_vect(val_unwrap_vect(x)); + case T_VECT: + printf("'"); + print_result_interior(x); break; case T_STR: putchar('"'); print_str(val_unwrap_str(x)); putchar('"'); - break; - case T_SYMB: - print_symb(val_unwrap_symb(x)); - break; + break; case T_INVALID: printf("internal error"); } } -void print_symb(val_symb_t *s) +void print_result_interior(val_t x) { - print_str((val_str_t*) s); + switch (val_typeof(x)) { + case T_EMPTY: + printf("()"); + break; + case T_BOX: + printf("#&"); + print_result_interior(val_unwrap_box(x)->val); + break; + case T_CONS: + printf("("); + print_cons(val_unwrap_cons(x)); + printf(")"); + break; + case T_VECT: + print_vect(val_unwrap_vect(x)); + break; + default: + print_result(x); + } } void print_vect(val_vect_t *v) { uint64_t i; - if (!v) { printf("'#()"); return; } + if (!v) { printf("#()"); return; } - printf("'#("); + printf("#("); for (i = 0; i < v->len; ++i) { - print_result(v->elems[i]); + print_result_interior(v->elems[i]); if (i < v->len - 1) putchar(' '); @@ -79,7 +86,7 @@ void print_vect(val_vect_t *v) void print_cons(val_cons_t *cons) { - print_result(cons->fst); + print_result_interior(cons->fst); switch (val_typeof(cons->snd)) { case T_EMPTY: @@ -91,7 +98,7 @@ void print_cons(val_cons_t *cons) break; default: printf(" . "); - print_result(cons->snd); + print_result_interior(cons->snd); break; } } @@ -801,7 +808,7 @@ void print_char(val_char_t c) void print_codepoint(val_char_t c) { - static char buffer[5] = {0}; + char buffer[5] = {0}; utf8_encode_char(c, buffer); printf("%s", buffer); } diff --git a/langs/hoodwink/print.h b/langs/iniquity-gc/print.h similarity index 100% rename from langs/hoodwink/print.h rename to langs/iniquity-gc/print.h diff --git a/langs/jig/read-all.rkt b/langs/iniquity-gc/read-all.rkt similarity index 57% rename from langs/jig/read-all.rkt rename to langs/iniquity-gc/read-all.rkt index fd03042b..8a3289a5 100644 --- a/langs/jig/read-all.rkt +++ b/langs/iniquity-gc/read-all.rkt @@ -1,8 +1,8 @@ #lang racket (provide read-all) ;; read all s-expression until eof -(define (read-all p) - (let ((r (read p))) +(define (read-all) + (let ((r (read))) (if (eof-object? r) '() - (cons r (read-all p))))) + (cons r (read-all))))) diff --git a/langs/iniquity-gc/run.rkt b/langs/iniquity-gc/run.rkt new file mode 100644 index 00000000..eaa53eb9 --- /dev/null +++ b/langs/iniquity-gc/run.rkt @@ -0,0 +1,18 @@ +#lang racket +(provide run run/io) +(require "types.rkt" "build-runtime.rkt" + a86/interp) + +;; Asm -> Answer +(define (run is) + (parameterize ((current-objs (list runtime-path))) + (match (asm-interp is) + ['err 'err] + [b (bits->value b)]))) + +;; Asm String -> (cons Answer String) +(define (run/io is s) + (parameterize ((current-objs (list runtime-path))) + (match (asm-interp/io is s) + [(cons 'err o) (cons 'err o)] + [(cons b o) (cons (bits->value b) o)]))) diff --git a/langs/iniquity-gc/runtime.h b/langs/iniquity-gc/runtime.h new file mode 100644 index 00000000..6588ad1f --- /dev/null +++ b/langs/iniquity-gc/runtime.h @@ -0,0 +1,15 @@ +#ifndef RUNTIME_H +#define RUNTIME_H +int64_t entry(); +extern FILE* in; +extern FILE* out; +extern void (*error_handler)(); + +// in words +#define heap_size 10000 +extern int64_t *heap; +extern val_t *from; +extern val_t *to; + +extern type_t *types; +#endif /* RUNTIME_H */ diff --git a/langs/iniquity-gc/test/all.rkt b/langs/iniquity-gc/test/all.rkt new file mode 100644 index 00000000..f880d506 --- /dev/null +++ b/langs/iniquity-gc/test/all.rkt @@ -0,0 +1,5 @@ +#lang racket + +;; run command line compiler and compare against Racket as refernece implementation +(require rackunit "../../test-programs/get-progs.rkt") +(for-each test-prog (get-progs "iniquity")) diff --git a/langs/iniquity-gc/test/compile.rkt b/langs/iniquity-gc/test/compile.rkt new file mode 100644 index 00000000..9a9d707f --- /dev/null +++ b/langs/iniquity-gc/test/compile.rkt @@ -0,0 +1,8 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../compile.rkt" + "../run.rkt") + +(test-runner (λ p (run (compile (parse p))))) +;(test-runner-io (λ (s . p) (run/io (compile (parse p)) s))) diff --git a/langs/iniquity/test/interp.rkt b/langs/iniquity-gc/test/interp.rkt similarity index 100% rename from langs/iniquity/test/interp.rkt rename to langs/iniquity-gc/test/interp.rkt diff --git a/langs/iniquity/test/test-runner.rkt b/langs/iniquity-gc/test/test-runner.rkt similarity index 74% rename from langs/iniquity/test/test-runner.rkt rename to langs/iniquity-gc/test/test-runner.rkt index 21e306e5..7c044cd6 100644 --- a/langs/iniquity/test/test-runner.rkt +++ b/langs/iniquity-gc/test/test-runner.rkt @@ -102,8 +102,12 @@ (let ((y (box 3))) (unbox y)))) 3) + (check-equal? (run '(eq? 1 1)) #t) + (check-equal? (run '(eq? 1 2)) #f) + (check-equal? (run '(eq? (cons 1 2) (cons 1 2))) #f) + (check-equal? (run '(let ((x (cons 1 2))) (eq? x x))) #t) - ;; Hoax examples + ;; Hoax examples (check-equal? (run '(make-vector 0 0)) #()) (check-equal? (run '(make-vector 1 0)) #(0)) (check-equal? (run '(make-vector 3 0)) #(0 0 0)) @@ -122,9 +126,9 @@ (check-equal? (run '(let ((x (make-vector 3 5))) (begin (vector-set! x 1 4) x))) - #(5 4 5)) + #(5 4 5)) (check-equal? (run '(vector-length (make-vector 3 #f))) 3) - (check-equal? (run '(vector-length (make-vector 0 #f))) 0) + (check-equal? (run '(vector-length (make-vector 0 #f))) 0) (check-equal? (run '"") "") (check-equal? (run '"fred") "fred") (check-equal? (run '"wilma") "wilma") @@ -139,6 +143,9 @@ (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") ;; Iniquity tests (check-equal? (run @@ -172,7 +179,41 @@ (cons (add1 (car xs)) (map-add1 (cdr xs))))) '(map-add1 (cons 1 (cons 2 (cons 3 '()))))) - '(2 3 4))) + '(2 3 4)) + (check-equal? (run '(define (f x y) y) + '(f 1 (add1 #f))) + 'err) + + (check-equal? (run '(collect-garbage)) (void)) + (check-equal? (run '(begin (box 0) (collect-garbage))) (void)) + (check-equal? (run '(begin (collect-garbage) (box 0))) (box 0)) + (check-equal? (run '(let ((x (box 0))) (collect-garbage))) (void)) + (check-equal? (run '(let ((x (box 0))) + (begin (collect-garbage) + x))) + (box 0)) + ;; GC tests + (check-equal? (run + '(define (n-boxes n) + (if (zero? n) + (void) + (begin (box 0) + (n-boxes (sub1 n))))) + '(n-boxes 10001)) + (void)) + + ;; can't test this in the interpreter, because it doesn't exhaust the heap there. + #; + (check-equal? (run + '(define (nested-boxes n) + (if (zero? n) + (void) + (box (nested-boxes (sub1 n))))) + '(begin (nested-boxes 10001) (void))) + 'err) + ) + + (define (test-runner-io run) ;; Evildoer examples @@ -231,4 +272,41 @@ (begin (write-byte (- 123 i)) (print-alphabet (sub1 i))))) '(print-alphabet 26)) - (cons (void) "abcdefghijklmnopqrstuvwxyz"))) + (cons (void) "abcdefghijklmnopqrstuvwxyz")) + + (check-equal? (run "" + '(define (f x) + (write-byte x)) + '(f 97)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x y) + (write-byte x)) + '(f 97 98)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x) + (let ((y x)) + (write-byte y))) + '(f 97)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x y) + (let ((y x)) + (write-byte y))) + '(f 97 98)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x) + (write-byte x)) + '(let ((z 97)) + (f z))) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x y) + (write-byte x)) + '(let ((z 97)) + (f z 98))) + (cons (void) "a"))) + + diff --git a/langs/jig-playground/types.h b/langs/iniquity-gc/types.h similarity index 100% rename from langs/jig-playground/types.h rename to langs/iniquity-gc/types.h diff --git a/langs/iniquity-gc/types.rkt b/langs/iniquity-gc/types.rkt new file mode 100644 index 00000000..9dbc9d59 --- /dev/null +++ b/langs/iniquity-gc/types.rkt @@ -0,0 +1,90 @@ +#lang racket +(provide (all-defined-out)) +(require ffi/unsafe) + +(define imm-shift 3) +(define imm-mask #b111) +(define ptr-mask #b111) +(define type-box #b001) +(define type-cons #b010) +(define type-vect #b011) +(define type-str #b100) +(define int-shift (+ 1 imm-shift)) +(define char-shift (+ 2 imm-shift)) +(define type-int #b0000) +(define mask-int #b1111) +(define type-char #b01000) +(define mask-char #b11111) + +(define (bits->value b) + (cond [(= b (value->bits #t)) #t] + [(= b (value->bits #f)) #f] + [(= b (value->bits eof)) eof] + [(= b (value->bits (void))) (void)] + [(= b (value->bits '())) '()] + [(int-bits? b) + (arithmetic-shift b (- int-shift))] + [(char-bits? b) + (integer->char (arithmetic-shift b (- char-shift)))] + [(box-bits? b) + (box (bits->value (heap-ref b)))] + [(cons-bits? b) + (cons (bits->value (heap-ref (+ b 8))) + (bits->value (heap-ref b)))] + [(vect-bits? b) + (if (zero? (untag b)) + (vector) + (build-vector (heap-ref b) + (lambda (j) + (bits->value (heap-ref (+ b (* 8 (add1 j))))))))] + [(str-bits? b) + (if (zero? (untag b)) + (string) + (build-string (heap-ref b) + (lambda (j) + (char-ref (+ b 8) j))))] + [else (error "invalid bits")])) + +(define (value->bits v) + (cond [(eq? v #t) #b00011000] + [(eq? v #f) #b00111000] + [(eof-object? v) #b01011000] + [(void? v) #b01111000] + [(empty? v) #b10011000] + [(integer? v) + (arithmetic-shift v int-shift)] + [(char? v) + (bitwise-ior type-char + (arithmetic-shift (char->integer v) char-shift))] + [else (error "not an immediate value")])) + +(define (imm-bits? v) + (zero? (bitwise-and v imm-mask))) + +(define (int-bits? v) + (= type-int (bitwise-and v mask-int))) + +(define (char-bits? v) + (= type-char (bitwise-and v mask-char))) + +(define (cons-bits? v) + (= type-cons (bitwise-and v imm-mask))) + +(define (box-bits? v) + (= type-box (bitwise-and v imm-mask))) + +(define (vect-bits? v) + (= type-vect (bitwise-and v imm-mask))) + +(define (str-bits? v) + (= type-str (bitwise-and v imm-mask))) + +(define (untag i) + (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) + (integer-length ptr-mask))) + +(define (heap-ref i) + (ptr-ref (cast (untag i) _int64 _pointer) _int64)) + +(define (char-ref i j) + (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j))) diff --git a/langs/iniquity-gc/values.c b/langs/iniquity-gc/values.c new file mode 100644 index 00000000..df54adeb --- /dev/null +++ b/langs/iniquity-gc/values.c @@ -0,0 +1,143 @@ +#include +#include +#include "types.h" +#include "values.h" + +type_t val_typeof(val_t x) +{ + switch (x & ptr_type_mask) { + case box_type_tag: + return T_BOX; + case cons_type_tag: + return T_CONS; + case vect_type_tag: + return T_VECT; + case str_type_tag: + return T_STR; + } + + if ((int_type_mask & x) == int_type_tag) + return T_INT; + if ((char_type_mask & x) == char_type_tag) + return T_CHAR; + + switch (x) { + case val_true: + case val_false: + return T_BOOL; + case val_eof: + return T_EOF; + case val_void: + return T_VOID; + case val_empty: + return T_EMPTY; + } + + return T_INVALID; +} + +val_t* val_unwrap(val_t v) { + return (val_t*)((v >> imm_shift) << imm_shift); +} + +int64_t type_tag(type_t t) { + switch (t) { + case T_BOX: + return box_type_tag; + case T_CONS: + return cons_type_tag; + case T_STR: + return str_type_tag; + case T_VECT: + return vect_type_tag; + default: + printf("type_tag called on non-pointer type"); + exit(1); + } +} + +int val_size(val_t *v, type_t t) { + switch (t) { + case T_CONS: return 2; + case T_VECT: return 1 + v[0]; + case T_STR: return 1 + ((v[0] + 1) / 2); + default: return 1; + } +}; + +val_t val_wrap(val_t* v, type_t t) { + return (val_t)((int64_t)v ^ type_tag(t)); +} + +int64_t val_unwrap_int(val_t x) +{ + return x >> int_shift; +} +val_t val_wrap_int(int64_t i) +{ + return (i << int_shift) | int_type_tag; +} + +int val_unwrap_bool(val_t x) +{ + return x == val_true; +} +val_t val_wrap_bool(int b) +{ + return b ? val_true : val_false; +} + +val_char_t val_unwrap_char(val_t x) +{ + return (val_char_t)(x >> char_shift); +} +val_t val_wrap_char(val_char_t c) +{ + return (((val_t)c) << char_shift) | char_type_tag; +} + +val_t val_wrap_eof(void) +{ + return val_eof; +} + +val_t val_wrap_void(void) +{ + return val_void; +} + +val_box_t* val_unwrap_box(val_t x) +{ + return (val_box_t *)(x ^ box_type_tag); +} +val_t val_wrap_box(val_box_t* b) +{ + return ((val_t)b) | box_type_tag; +} + +val_cons_t* val_unwrap_cons(val_t x) +{ + return (val_cons_t *)(x ^ cons_type_tag); +} +val_t val_wrap_cons(val_cons_t *c) +{ + return ((val_t)c) | cons_type_tag; +} + +val_vect_t* val_unwrap_vect(val_t x) +{ + return (val_vect_t *)(x ^ vect_type_tag); +} +val_t val_wrap_vect(val_vect_t *v) +{ + return ((val_t)v) | vect_type_tag; +} + +val_str_t* val_unwrap_str(val_t x) +{ + return (val_str_t *)(x ^ str_type_tag); +} +val_t val_wrap_str(val_str_t *v) +{ + return ((val_t)v) | str_type_tag; +} diff --git a/langs/iniquity-gc/values.h b/langs/iniquity-gc/values.h new file mode 100644 index 00000000..00f7070c --- /dev/null +++ b/langs/iniquity-gc/values.h @@ -0,0 +1,80 @@ +#ifndef VALUES_H +#define VALUES_H + +#include + +/* any abstract value */ +typedef int64_t val_t; + +typedef enum type_t { + T_INVALID = -1, + /* immediates */ + T_INT, + T_BOOL, + T_CHAR, + T_EOF, + T_VOID, + T_EMPTY, + /* pointers */ + T_BOX, + T_CONS, + T_VECT, + T_STR, +} type_t; + +typedef uint32_t val_char_t; +typedef struct val_box_t { + val_t val; +} val_box_t; +typedef struct val_cons_t { + val_t snd; + val_t fst; +} val_cons_t; +typedef struct val_vect_t { + uint64_t len; + val_t elems[]; +} val_vect_t; +typedef struct val_str_t { + uint64_t len; + val_char_t codepoints[]; +} val_str_t; + +/* return the type of x */ +type_t val_typeof(val_t x); + +/** + * Wrap/unwrap values + * + * The behavior of unwrap functions are undefined on type mismatch. + */ +val_t* val_unwrap(val_t v); // v is a pointer type value +val_t val_wrap(val_t* v, type_t t); + +int val_size(val_t *v, type_t t); + +int64_t val_unwrap_int(val_t x); +val_t val_wrap_int(int64_t i); + +int val_unwrap_bool(val_t x); +val_t val_wrap_bool(int b); + +val_char_t val_unwrap_char(val_t x); +val_t val_wrap_char(val_char_t b); + +val_t val_wrap_eof(); + +val_t val_wrap_void(); + +val_box_t* val_unwrap_box(val_t x); +val_t val_wrap_box(val_box_t* b); + +val_cons_t* val_unwrap_cons(val_t x); +val_t val_wrap_cons(val_cons_t* c); + +val_vect_t* val_unwrap_vect(val_t x); +val_t val_wrap_vect(val_vect_t* c); + +val_str_t* val_unwrap_str(val_t x); +val_t val_wrap_str(val_str_t* c); + +#endif diff --git a/langs/iniquity-plus/ast.rkt b/langs/iniquity-plus/ast.rkt new file mode 100644 index 00000000..5d675b9c --- /dev/null +++ b/langs/iniquity-plus/ast.rkt @@ -0,0 +1,173 @@ +#lang racket +(provide (all-defined-out)) + +;; type Prog = (Prog (Listof Defn) Expr) +(struct Prog (ds e) #:prefab) + +;; type Defn = (Defn Id Fun) +(struct Defn (f fun) #:prefab) + +;; type Fun = (FunPlain [Listof Id] Expr) +;; | (FunRest [Listof Id] Id Expr) +;; | (FunCase [Listof FunCaseClause]) +;; type FunCaseClause = (FunPlain [Listof Id] Expr) +;; | (FunRest [Listof Id] Id Expr) +(struct FunPlain (xs e) #:prefab) +(struct FunRest (xs x e) #:prefab) +(struct FunCase (cs) #:prefab) + +;; type Expr = (Eof) +;; | (Empty) +;; | (Int Integer) +;; | (Bool Boolean) +;; | (Char Character) +;; | (Str String) +;; | (Prim0 Op0) +;; | (Prim1 Op1 Expr) +;; | (Prim2 Op2 Expr Expr) +;; | (Prim3 Op3 Expr Expr Expr) +;; | (If Expr Expr Expr) +;; | (Begin Expr Expr) +;; | (Let Id Expr Expr) +;; | (Var Id) +;; | (App Id (Listof Expr)) +;; | (Apply Id (Listof Expr) Expr) +;; type Id = Symbol +;; type Op0 = 'read-byte +;; type Op1 = 'add1 | 'sub1 | 'zero? +;; | 'char? | 'integer->char | 'char->integer +;; | 'write-byte | 'eof-object? +;; | 'box | 'car | 'cdr | 'unbox +;; | 'empty? | 'cons? | 'box? +;; | 'vector? | vector-length +;; | 'string? | string-length +;; type Op2 = '+ | '- | '< | '= +;; | 'cons +;; | 'make-vector | 'vector-ref +;; | 'make-string | 'string-ref +;; type Op3 = 'vector-set! +(struct Eof () #:prefab) +(struct Empty () #:prefab) +(struct Int (i) #:prefab) +(struct Bool (b) #:prefab) +(struct Char (c) #:prefab) +(struct Str (s) #:prefab) +(struct Prim0 (p) #:prefab) +(struct Prim1 (p e) #:prefab) +(struct Prim2 (p e1 e2) #:prefab) +(struct Prim3 (p e1 e2 e3) #:prefab) +(struct If (e1 e2 e3) #:prefab) +(struct Begin (e1 e2) #:prefab) +(struct Let (x e1 e2) #:prefab) +(struct Var (x) #:prefab) +(struct App (f es) #:prefab) +(struct Apply (f es e) #:prefab) + +;; Prog -> Void +(define (check-syntax p) + (match p + [(Prog ds e) + (let ((dr (defined-ids ds))) + (check-syntax-unique-defines ds) + (check-syntax-defines ds dr) + (check-syntax-e e dr '()))])) + +;; [Listof Defn] -> [Listof Id] +(define (defined-ids ds) + (map (λ (d) (match d [(Defn f _) f])) + ds)) + +;; [Listof Defn] -> Void +(define (check-syntax-unique-defines ds) + (unless (= (length ds) + (length (remove-duplicates ds #:key Defn-f))) + (error "duplicate definition for function"))) + +;; [Listof Defn] [Listof Id] -> Void +(define (check-syntax-defines ds r) + (for-each (λ (d) (check-syntax-define d r)) ds)) + +;; Defn [Listof Id] -> Void +(define (check-syntax-define d dr) + (match d + [(Defn f (FunPlain xs e)) + (check-unique (cons f xs)) + (check-syntax-e e dr xs)] + [(Defn f (FunRest xs x e)) + (check-unique (cons f (cons x xs))) + (check-syntax-e e dr (cons x xs))] + [(Defn f (FunCase '())) + (void)] + [(Defn f (FunCase (cons c cs))) + (check-syntax-define (Defn f c) dr) + (check-syntax-define (Defn f (FunCase cs)) dr)])) + +;; [Listof Id] -> Void +(define (check-unique xs) + (unless (= (length xs) (length (remove-duplicates xs))) + (error "duplicate identifier"))) + +;; Expr [Listof Id] [Listof Id] -> Void +(define (check-syntax-e e dr r) + (match e + [(Eof) (void)] + [(Empty) (void)] + [(Int i) (void)] + [(Bool b) (void)] + [(Char c) (void)] + [(Str s) (void)] + [(Prim0 p) (void)] + [(Prim1 p e) (check-syntax-e e dr r)] + [(Prim2 p e1 e2) + (check-syntax-e e1 dr r) + (check-syntax-e e2 dr r)] + [(Prim3 p e1 e2 e3) + (check-syntax-e e1 dr r) + (check-syntax-e e2 dr r) + (check-syntax-e e3 dr r)] + [(If e1 e2 e3) + (check-syntax-e e1 dr r) + (check-syntax-e e2 dr r) + (check-syntax-e e3 dr r)] + [(Begin e1 e2) + (check-syntax-e e1 dr r) + (check-syntax-e e2 dr r)] + [(Let x e1 e2) + (check-syntax-e e1 dr r) + (check-syntax-e e2 dr (cons x r))] + [(Var x) + (unless (member x r) + (error "unbound variable"))] + [(App f es) + (unless (member f dr) + (error "undefined function")) + (for-each (λ (e) (check-syntax-e e dr r)) es)] + [(Apply f es e) + (unless (member f dr) + (error "undefined function")) + (check-syntax-e e dr r) + (for-each (λ (e) (check-syntax-e e dr r)) es)])) + +(module+ test + (require rackunit) + (check-exn exn:fail? (λ () (check-syntax-e (Var 'x) '() '()))) + (check-exn exn:fail? (λ () (check-syntax-e (Var 'x) '(x) '()))) + (check-not-exn (λ () (check-syntax-e (Var 'x) '() '(x)))) + (check-not-exn (λ () (check-syntax-e (Let 'x (Int 1) (Var 'x)) '() '()))) + (check-not-exn (λ () (check-syntax-e (Let 'x (Int 1) (Let 'y (Int 2) (Var 'x))) '() '()))) + (check-not-exn (λ () (check-syntax-e (Let 'x (Int 1) (Let 'x (Int 2) (Var 'x))) '() '()))) + (check-not-exn (λ () (check-syntax-e (Let 'x (Int 1) (Let 'y (Int 2) (Var 'y))) '() '()))) + (check-exn exn:fail? (λ () (check-syntax (Prog (list (Defn 'f (FunPlain '() (Int 1)))) (Var 'f))))) + (check-exn exn:fail? (λ () (check-syntax (Prog (list (Defn 'f (FunPlain '(f) (Int 1)))) (Int 1))))) + (check-exn exn:fail? (λ () (check-syntax (Prog (list (Defn 'f (FunRest '(f) 'x (Int 1)))) (Int 1))))) + (check-exn exn:fail? (λ () (check-syntax (Prog (list (Defn 'f (FunRest '() 'f (Int 1)))) (Int 1))))) + (check-exn exn:fail? (λ () (check-syntax (Prog (list (Defn 'f (FunPlain '(x x) (Int 1)))) (Int 1))))) + (check-exn exn:fail? + (λ () (check-syntax + (Prog (list (Defn 'f (FunPlain '(x) (Int 1))) + (Defn 'f (FunPlain '(y) (Int 2)))) + (Int 1))))) + (check-exn exn:fail? (λ () (check-syntax (Prog '() (App 'f '()))))) + (check-exn exn:fail? (λ () (check-syntax (Prog '() (Apply 'f '() (Int 1)))))) + (check-not-exn (λ () (check-syntax (Prog (list (Defn 'f (FunPlain '() (Int 1)))) (App 'f '()))))) + (check-not-exn (λ () (check-syntax (Prog (list (Defn 'f (FunPlain '() (Int 1)))) (Apply 'f '() (Int 1))))))) diff --git a/langs/iniquity-plus/env.rkt b/langs/iniquity-plus/env.rkt new file mode 100644 index 00000000..c43be9c3 --- /dev/null +++ b/langs/iniquity-plus/env.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide lookup ext) + +;; Env Variable -> Answer +(define (lookup env x) + (match env + ['() 'err] + [(cons (list y i) env) + (match (symbol=? x y) + [#t i] + [#f (lookup env x)])])) + +;; Env Variable Value -> Value +(define (ext r x i) + (cons (list x i) r)) \ No newline at end of file diff --git a/langs/jig/interp-io.rkt b/langs/iniquity-plus/interp-io.rkt similarity index 100% rename from langs/jig/interp-io.rkt rename to langs/iniquity-plus/interp-io.rkt diff --git a/langs/hoodwink/interp-prims.rkt b/langs/iniquity-plus/interp-prims.rkt similarity index 95% rename from langs/hoodwink/interp-prims.rkt rename to langs/iniquity-plus/interp-prims.rkt index fdbdc591..a73bc6af 100644 --- a/langs/hoodwink/interp-prims.rkt +++ b/langs/iniquity-plus/interp-prims.rkt @@ -31,6 +31,8 @@ (match (list p v1 v2) [(list '+ (? integer?) (? integer?)) (+ v1 v2)] [(list '- (? integer?) (? integer?)) (- v1 v2)] + [(list '< (? integer?) (? integer?)) (< v1 v2)] + [(list '= (? integer?) (? integer?)) (= v1 v2)] [(list 'cons v1 v2) (cons v1 v2)] [(list 'make-vector (? integer?) _) (if (<= 0 v1) diff --git a/langs/jig/interp.rkt b/langs/iniquity-plus/interp.rkt similarity index 58% rename from langs/jig/interp.rkt rename to langs/iniquity-plus/interp.rkt index db40c7f5..f3dc29ab 100644 --- a/langs/jig/interp.rkt +++ b/langs/iniquity-plus/interp.rkt @@ -21,8 +21,9 @@ ;; type REnv = (Listof (List Id Value)) ;; type Defns = (Listof Defn) -;; Prog Defns -> Answer +;; Prog -> Answer (define (interp p) + (check-syntax p) (match p [(Prog ds e) (interp-env e '() ds)])) @@ -78,12 +79,56 @@ ['err 'err] [vs (match (defns-lookup ds f) - [(Defn f xs e) - ; check arity matches - (if (= (length xs) (length vs)) - (interp-env e (zip xs vs) ds) + [(Defn _ fun) + (apply-fun fun vs ds)])])] + [(Apply f es e) + (match (interp-env* es r ds) + ['err 'err] + [vs + (match (interp-env e r ds) + ['err 'err] + [ws + (if (list? ws) + (match (defns-lookup ds f) + [(Defn _ fun) + (apply-fun fun (append vs ws) ds)]) 'err)])])])) +;; Fun [Listof Values] Defns -> Answer +(define (apply-fun f vs ds) + (match f + [(FunPlain xs e) + ; check arity matches-arity-exactly? + (if (= (length xs) (length vs)) + (interp-env e (zip xs vs) ds) + 'err)] + [(FunRest xs x e) + ; check arity is acceptable + (if (< (length vs) (length xs)) + 'err + (interp-env e + (zip (cons x xs) + (cons (drop vs (length xs)) + (take vs (length xs)))) + ds))] + [(FunCase cs) + (match (select-case-lambda cs (length vs)) + ['err 'err] + [f (apply-fun f vs ds)])])) + +;; [Listof FunCaseClause] Nat -> Fun | 'err +(define (select-case-lambda cs n) + (match cs + ['() 'err] + [(cons (and (FunPlain xs e) f) cs) + (if (= (length xs) n) + f + (select-case-lambda cs n))] + [(cons (and (FunRest xs x e) f) cs) + (if (<= (length xs) n) + f + (select-case-lambda cs n))])) + ;; (Listof Expr) REnv Defns -> (Listof Value) | 'err (define (interp-env* es r ds) (match es @@ -91,11 +136,13 @@ [(cons e es) (match (interp-env e r ds) ['err 'err] - [v (cons v (interp-env* es r ds))])])) + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) ;; Defns Symbol -> Defn (define (defns-lookup ds f) - (findf (match-lambda [(Defn g _ _) (eq? f g)]) + (findf (match-lambda [(Defn g _) (eq? f g)]) ds)) (define (zip xs ys) diff --git a/langs/iniquity-plus/parse.rkt b/langs/iniquity-plus/parse.rkt new file mode 100644 index 00000000..11d03152 --- /dev/null +++ b/langs/iniquity-plus/parse.rkt @@ -0,0 +1,122 @@ +#lang racket +(provide parse parse-define parse-e) +(require "ast.rkt") + +;; [Listof S-Expr] -> Prog +(define (parse s) + (match s + [(cons (and (cons 'define _) d) s) + (match (parse s) + [(Prog ds e) + (Prog (cons (parse-define d) ds) e)])] + [(cons e '()) (Prog '() (parse-e e))] + [_ (error "program parse error")])) + +;; S-Expr -> Defn +(define (parse-define s) + (match s + [(list 'define (? symbol? f) + (list-rest 'case-lambda cs)) + (Defn f (FunCase (parse-case-lambda-clauses cs)))] + [(list 'define (cons (? symbol? f) xs) e) + (if (all symbol? xs) + (Defn f (parse-param-list xs e)) + (error "parse definition error"))] + [_ (error "Parse defn error" s)])) + +;; like andmap, but work on improper lists too +(define (all p? xs) + (match xs + ['() #t] + [(cons x xs) (and (p? x) (all p? xs))] + [x (p? x)])) + +;; S-Expr -> [Listof FunCaseClause] +(define (parse-case-lambda-clauses cs) + (match cs + ['() '()] + [(cons c cs) + (cons (parse-case-lambda-clause c) + (parse-case-lambda-clauses cs))] + [_ + (error "parse case-lambda error")])) + +;; S-Expr -> FunRest +(define (parse-case-lambda-clause c) + (match c + [(list (? symbol? x) e) + (FunRest '() x (parse-e e))] + [(list xs e) + (parse-param-list xs e)])) + +;; S-Expr S-Expr -> FunPlain or FunRest +(define (parse-param-list xs e) + (match xs + ['() (FunPlain '() (parse-e e))] + [(cons x xs) + (match (parse-param-list xs e) + [(FunPlain xs e) (FunPlain (cons x xs) e)] + [(FunRest xs y e) (FunRest (cons x xs) y e)])] + [(? symbol? xs) + (FunRest '() xs (parse-e e))] + [_ + (error "parse parameter list error")])) + + +;; S-Expr -> Expr +(define (parse-e s) + (match s + [(? integer?) (Int s)] + [(? boolean?) (Bool s)] + [(? char?) (Char s)] + [(? string?) (Str s)] + ['eof (Eof)] + [(? symbol?) (Var s)] + [(list 'quote (list)) (Empty)] + [(list (? (op? op0) p0)) (Prim0 p0)] + [(list (? (op? op1) p1) e) (Prim1 p1 (parse-e e))] + [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse-e e1) (parse-e e2))] + [(list (? (op? op3) p3) e1 e2 e3) + (Prim3 p3 (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'begin e1 e2) + (Begin (parse-e e1) (parse-e e2))] + [(list 'if e1 e2 e3) + (If (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'let (list (list (? symbol? x) e1)) e2) + (Let x (parse-e e1) (parse-e e2))] + [(list-rest 'apply es) + (match es + [(cons (? symbol? f) es) + (parse-apply f es)] + [_ (error "parse apply error")])] + [(cons (? symbol? f) es) + (App f (map parse-e es))] + [_ (error "Parse error" s)])) + +;; Id S-Expr -> Expr +(define (parse-apply f es) + (match es + [(list e) (Apply f '() (parse-e e))] + [(cons e es) + (match (parse-apply f es) + [(Apply f es e0) + (Apply f (cons (parse-e e) es) e0)])] + [_ (error "parse apply error")])) + +(define op0 + '(read-byte peek-byte void)) + +(define op1 + '(add1 sub1 zero? char? write-byte eof-object? + integer->char char->integer + box unbox empty? cons? box? car cdr + vector? vector-length string? string-length)) +(define op2 + '(+ - < = cons make-vector vector-ref make-string string-ref)) +(define op3 + '(vector-set!)) + +(define (op? ops) + (λ (x) + (and (symbol? x) + (memq x ops)))) diff --git a/langs/iniquity/read-all.rkt b/langs/iniquity-plus/read-all.rkt similarity index 100% rename from langs/iniquity/read-all.rkt rename to langs/iniquity-plus/read-all.rkt diff --git a/langs/jig/test/interp.rkt b/langs/iniquity-plus/test/interp.rkt similarity index 100% rename from langs/jig/test/interp.rkt rename to langs/iniquity-plus/test/interp.rkt diff --git a/langs/iniquity-plus/test/test-runner.rkt b/langs/iniquity-plus/test/test-runner.rkt new file mode 100644 index 00000000..128ad4a5 --- /dev/null +++ b/langs/iniquity-plus/test/test-runner.rkt @@ -0,0 +1,375 @@ +#lang racket +(provide test-runner test-runner-io) +(require rackunit) + +(define (test-runner run) + ;; Abscond examples + (check-equal? (run 7) 7) + (check-equal? (run -8) -8) + + ;; Blackmail examples + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7) + + ;; Con examples + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7) + + ;; Dupe examples + (check-equal? (run #t) #t) + (check-equal? (run #f) #f) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) + (check-equal? (run '(if #t 3 4)) 3) + (check-equal? (run '(if #f 3 4)) 4) + (check-equal? (run '(if 0 3 4)) 3) + (check-equal? (run '(zero? 4)) #f) + (check-equal? (run '(zero? 0)) #t) + + ;; Dodger examples + (check-equal? (run #\a) #\a) + (check-equal? (run #\b) #\b) + (check-equal? (run '(char? #\a)) #t) + (check-equal? (run '(char? #t)) #f) + (check-equal? (run '(char? 8)) #f) + (check-equal? (run '(char->integer #\a)) (char->integer #\a)) + (check-equal? (run '(integer->char 955)) #\λ) + + ;; Extort examples + (check-equal? (run '(add1 #f)) 'err) + (check-equal? (run '(sub1 #f)) 'err) + (check-equal? (run '(zero? #f)) 'err) + (check-equal? (run '(char->integer #f)) 'err) + (check-equal? (run '(integer->char #f)) 'err) + (check-equal? (run '(integer->char -1)) 'err) + (check-equal? (run '(write-byte #f)) 'err) + (check-equal? (run '(write-byte -1)) 'err) + (check-equal? (run '(write-byte 256)) 'err) + + ;; Fraud examples + (check-equal? (run '(let ((x 7)) x)) 7) + (check-equal? (run '(let ((x 7)) 2)) 2) + (check-equal? (run '(let ((x 7)) (add1 x))) 8) + (check-equal? (run '(let ((x (add1 7))) x)) 8) + (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) + (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) + (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) + + (check-equal? (run '(let ((x 0)) + (if (zero? x) 7 8))) + 7) + (check-equal? (run '(let ((x 1)) + (add1 (if (zero? x) 7 8)))) + 9) + (check-equal? (run '(+ 3 4)) 7) + (check-equal? (run '(- 3 4)) -1) + (check-equal? (run '(+ (+ 2 1) 4)) 7) + (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) + (check-equal? (run '(let ((x (+ 1 2))) + (let ((z (- 4 x))) + (+ (+ x x) z)))) + 7) + (check-equal? (run '(= 5 5)) #t) + (check-equal? (run '(= 4 5)) #f) + (check-equal? (run '(= (add1 4) 5)) #t) + (check-equal? (run '(< 5 5)) #f) + (check-equal? (run '(< 4 5)) #t) + (check-equal? (run '(< (add1 4) 5)) #f) + + ;; Hustle examples + (check-equal? (run ''()) '()) + (check-equal? (run '(box 1)) (box 1)) + (check-equal? (run '(cons 1 2)) (cons 1 2)) + (check-equal? (run '(unbox (box 1))) 1) + (check-equal? (run '(car (cons 1 2))) 1) + (check-equal? (run '(cdr (cons 1 2))) 2) + (check-equal? (run '(cons 1 '())) (list 1)) + (check-equal? (run '(let ((x (cons 1 2))) + (begin (cdr x) + (car x)))) + 1) + (check-equal? (run '(let ((x (cons 1 2))) + (let ((y (box 3))) + (unbox y)))) + 3) + + ;; Hoax examples + (check-equal? (run '(make-vector 0 0)) #()) + (check-equal? (run '(make-vector 1 0)) #(0)) + (check-equal? (run '(make-vector 3 0)) #(0 0 0)) + (check-equal? (run '(make-vector 3 5)) #(5 5 5)) + (check-equal? (run '(vector? (make-vector 0 0))) #t) + (check-equal? (run '(vector? (cons 0 0))) #f) + (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 0 4) + x))) + #(4 5 5)) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 1 4) + x))) + #(5 4 5)) + (check-equal? (run '(vector-length (make-vector 3 #f))) 3) + (check-equal? (run '(vector-length (make-vector 0 #f))) 0) + (check-equal? (run '"") "") + (check-equal? (run '"fred") "fred") + (check-equal? (run '"wilma") "wilma") + (check-equal? (run '(make-string 0 #\f)) "") + (check-equal? (run '(make-string 3 #\f)) "fff") + (check-equal? (run '(make-string 3 #\g)) "ggg") + (check-equal? (run '(string-length "")) 0) + (check-equal? (run '(string-length "fred")) 4) + (check-equal? (run '(string-ref "fred" 0)) #\f) + (check-equal? (run '(string-ref "fred" 1)) #\r) + (check-equal? (run '(string-ref "fred" 2)) #\e) + (check-equal? (run '(string-ref "fred" 4)) 'err) + (check-equal? (run '(string? "fred")) #t) + (check-equal? (run '(string? (cons 1 2))) #f) + + ;; Iniquity tests + (check-equal? (run + '(define (f x) x) + '(f 5)) + 5) + (check-equal? (run + '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) + '(tri 9)) + 45) + + (check-equal? (run + '(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) + '(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + '(even? 101)) + #f) + + (check-equal? (run + '(define (map-add1 xs) + (if (empty? xs) + '() + (cons (add1 (car xs)) + (map-add1 (cdr xs))))) + '(map-add1 (cons 1 (cons 2 (cons 3 '()))))) + '(2 3 4)) + + ;; Iniquity+ + (check-equal? (run '(define (f x) x) + '(f)) + 'err) + (check-equal? (run '(define (f) 1) + '(f 2)) + 'err) + (check-equal? (run '(define (f x) x) + '(let ((y 2)) + (f 1 y))) + 'err) + (check-equal? (run '(define (f . xs) + (if (empty? xs) + #t + (f))) + '(f 1 2 3)) + #t) + (check-equal? (run '(define (list . xs) xs) + '(list (list) (list 1 2 3) (list #t) (list 3 4 5))) + '(() (1 2 3) (#t) (3 4 5))) + (check-equal? (run '(define (f x y . z) (cons x (cons y z))) + '(cons (f 1 2) (cons (f 8 9 10) '()))) + '((1 2) (8 9 10))) + (check-equal? (run '(define (f x . xs) x) + '(f 1)) + 1) + (check-equal? (run '(define (f x . xs) xs) + '(f 1)) + '()) + (check-equal? (run '(define (f x . xs) xs) + '(f)) + 'err) + (check-equal? (run '(define (f x . xs) xs) + '(let ((x 3)) + (f 1 x))) + '(3)) + (check-equal? (run '(define f + (case-lambda)) + '(f)) + 'err) + (check-equal? (run '(define f + (case-lambda)) + '(add1 8)) + 9) + (check-equal? (run '(define f + (case-lambda + [(x) x])) + '(f 1)) + 1) + (check-equal? (run '(define f + (case-lambda + [x #t] + [(x) x])) + '(f 1)) + #t) + (check-equal? (run '(define f + (case-lambda + [(x y) #f] + [(x) x])) + '(cons (f 1) (cons (f 1 2) '()))) + '(1 #f)) + (check-equal? (run '(define f + (case-lambda + [x #f] + [y #t])) + '(cons (f 1) (cons (f 1 2) '()))) + '(#f #f)) + (check-equal? (run '(define f + (case-lambda + [(x y . z) z] + [(x) (+ x x)] + [z 2])) + '(cons (f 1 2) + (cons (f 1) + (cons (f 1 2 3) + '())))) + '(() 2 (3))) + + (check-equal? (run '(define (f) 1) + '(apply f '())) + 1) + (check-equal? (run '(define (f . xs) 1) + '(apply f '())) + 1) + (check-equal? (run '(define (f . xs) xs) + '(apply f '())) + '()) + (check-equal? (run '(define (f . xs) xs) + '(apply f (cons 1 (cons 2 (cons 3 '()))))) + '(1 2 3)) + (check-equal? (run '(define (f . xs) xs) + '(apply f 1 2 (cons 3 '()))) + '(1 2 3)) + (check-equal? (run '(define (append . xss) + (if (empty? xss) + '() + (if (empty? (car xss)) + (apply append (cdr xss)) + (cons (car (car xss)) + (apply append (cdr (car xss)) (cdr xss)))))) + '(define (list . xs) xs) + '(define (flatten xs) + (apply append xs)) + '(flatten (list (append) (append (list 1 2 3) (list 4 5) (list 6)) (list 7)))) + '(1 2 3 4 5 6 7))) + +(define (test-runner-io run) + ;; Evildoer examples + (check-equal? (run "" 7) (cons 7 "")) + (check-equal? (run "" '(write-byte 97)) (cons (void) "a")) + (check-equal? (run "a" '(read-byte)) (cons 97 "")) + (check-equal? (run "b" '(begin (write-byte 97) (read-byte))) + (cons 98 "a")) + (check-equal? (run "" '(read-byte)) (cons eof "")) + (check-equal? (run "" '(eof-object? (read-byte))) (cons #t "")) + (check-equal? (run "a" '(eof-object? (read-byte))) (cons #f "")) + (check-equal? (run "" '(begin (write-byte 97) (write-byte 98))) + (cons (void) "ab")) + + (check-equal? (run "ab" '(peek-byte)) (cons 97 "")) + (check-equal? (run "ab" '(begin (peek-byte) (read-byte))) (cons 97 "")) + ;; Extort examples + (check-equal? (run "" '(write-byte #t)) (cons 'err "")) + + ;; Fraud examples + (check-equal? (run "" '(let ((x 97)) (write-byte x))) (cons (void) "a")) + (check-equal? (run "" + '(let ((x 97)) + (begin (write-byte x) + x))) + (cons 97 "a")) + (check-equal? (run "b" '(let ((x 97)) (begin (read-byte) x))) + (cons 97 "")) + (check-equal? (run "b" '(let ((x 97)) (begin (peek-byte) x))) + (cons 97 "")) + + ;; Hustle examples + (check-equal? (run "" + '(let ((x 1)) + (begin (write-byte 97) + 1))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x 1)) + (let ((y 2)) + (begin (write-byte 97) + 1)))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x (cons 1 2))) + (begin (write-byte 97) + (car x)))) + (cons 1 "a")) + ;; Iniquity examples + (check-equal? (run "" + '(define (print-alphabet i) + (if (zero? i) + (void) + (begin (write-byte (- 123 i)) + (print-alphabet (sub1 i))))) + '(print-alphabet 26)) + (cons (void) "abcdefghijklmnopqrstuvwxyz")) + + (check-equal? (run "" + '(define (f x) + (write-byte x)) + '(f 97)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x y) + (write-byte x)) + '(f 97 98)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x) + (let ((y x)) + (write-byte y))) + '(f 97)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x y) + (let ((y x)) + (write-byte y))) + '(f 97 98)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x) + (write-byte x)) + '(let ((z 97)) + (f z))) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x y) + (write-byte x)) + '(let ((z 97)) + (f z 98))) + (cons (void) "a"))) diff --git a/langs/hoax/types.rkt b/langs/iniquity-plus/types.rkt similarity index 100% rename from langs/hoax/types.rkt rename to langs/iniquity-plus/types.rkt diff --git a/langs/iniquity/Makefile b/langs/iniquity/Makefile index 9b74bfc0..db6ac44a 100644 --- a/langs/iniquity/Makefile +++ b/langs/iniquity/Makefile @@ -1,37 +1,44 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ - values.o \ print.o \ + values.o \ io.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/iniquity/info.rkt b/langs/iniquity/info.rkt new file mode 100644 index 00000000..41ec40bb --- /dev/null +++ b/langs/iniquity/info.rkt @@ -0,0 +1,2 @@ +#lang info +(define pre-install-collection "../installer.rkt") diff --git a/langs/iniquity/interp-file.rkt b/langs/iniquity/interp-file.rkt deleted file mode 100644 index aabe615b..00000000 --- a/langs/iniquity/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt" "read-all.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read-all p)))) - (close-input-port p)))) diff --git a/langs/iniquity/interp-prims.rkt b/langs/iniquity/interp-prims.rkt index a73bc6af..4cbabc6c 100644 --- a/langs/iniquity/interp-prims.rkt +++ b/langs/iniquity/interp-prims.rkt @@ -34,6 +34,7 @@ [(list '< (? integer?) (? integer?)) (< v1 v2)] [(list '= (? integer?) (? integer?)) (= v1 v2)] [(list 'cons v1 v2) (cons v1 v2)] + [(list 'eq? v1 v2) (eq? v1 v2)] [(list 'make-vector (? integer?) _) (if (<= 0 v1) (make-vector v1 v2) diff --git a/langs/iniquity/io.c b/langs/iniquity/io.c index 7ef82281..8a417c91 100644 --- a/langs/iniquity/io.c +++ b/langs/iniquity/io.c @@ -7,14 +7,14 @@ val_t read_byte(void) { char c = getc(in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } val_t peek_byte(void) { char c = getc(in); ungetc(c, in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } diff --git a/langs/iniquity/main.c b/langs/iniquity/main.c index 1ca6115f..4fcd8b6b 100644 --- a/langs/iniquity/main.c +++ b/langs/iniquity/main.c @@ -32,8 +32,9 @@ int main(int argc, char** argv) result = entry(heap); print_result(result); + if (val_typeof(result) != T_VOID) - putchar('\n'); + putchar('\n'); free(heap); return 0; diff --git a/langs/iniquity/print.c b/langs/iniquity/print.c index a88a5779..acb1413b 100644 --- a/langs/iniquity/print.c +++ b/langs/iniquity/print.c @@ -808,7 +808,7 @@ void print_char(val_char_t c) void print_codepoint(val_char_t c) { - static char buffer[5] = {0}; + char buffer[5] = {0}; utf8_encode_char(c, buffer); printf("%s", buffer); } diff --git a/langs/iniquity/test/compile.rkt b/langs/iniquity/test/compile.rkt deleted file mode 100644 index 81defae6..00000000 --- a/langs/iniquity/test/compile.rkt +++ /dev/null @@ -1,18 +0,0 @@ -#lang racket -(require "test-runner.rkt" - "../parse.rkt" - "../compile.rkt" - "../unload-bits-asm.rkt" - a86/interp) - -;; link with runtime for IO operations -(unless (file-exists? "../runtime.o") - (system "make -C .. runtime.o")) -(current-objs - (list (path->string (normalize-path "../runtime.o")))) - -(test-runner (λ p (unload/free (asm-interp (compile (parse p)))))) -(test-runner-io (λ (s . p) - (match (asm-interp/io (compile (parse p)) s) - ['err 'err] - [(cons r o) (cons (unload/free r) o)]))) diff --git a/langs/iniquity/unload-bits-asm.rkt b/langs/iniquity/unload-bits-asm.rkt deleted file mode 100644 index be9b50c8..00000000 --- a/langs/iniquity/unload-bits-asm.rkt +++ /dev/null @@ -1,43 +0,0 @@ -#lang racket -(provide unload/free unload-value) -(require "types.rkt" - ffi/unsafe) - -;; Answer* -> Answer -(define (unload/free a) - (match a - ['err 'err] - [(cons h v) (begin0 (unload-value v) - (free h))])) - -;; Value* -> Value -(define (unload-value v) - (match v - [(? imm-bits?) (bits->value v)] - [(? box-bits? i) - (box (unload-value (heap-ref i)))] - [(? cons-bits? i) - (cons (unload-value (heap-ref (+ i 8))) - (unload-value (heap-ref i)))] - [(? vect-bits? i) - (if (zero? (untag i)) - (vector) - (build-vector (heap-ref i) - (lambda (j) - (unload-value (heap-ref (+ i (* 8 (add1 j))))))))] - [(? str-bits? i) - (if (zero? (untag i)) - (string) - (build-string (heap-ref i) - (lambda (j) - (char-ref (+ i 8) j))))])) - -(define (untag i) - (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) - (integer-length ptr-mask))) - -(define (heap-ref i) - (ptr-ref (cast (untag i) _int64 _pointer) _uint64)) - -(define (char-ref i j) - (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j))) diff --git a/langs/iniquity/values.c b/langs/iniquity/values.c index a61d65e6..62bca18c 100644 --- a/langs/iniquity/values.c +++ b/langs/iniquity/values.c @@ -38,6 +38,10 @@ int64_t val_unwrap_int(val_t x) { return x >> int_shift; } +val_t val_wrap_byte(unsigned char b) +{ + return (b << int_shift) | int_type_tag; +} val_t val_wrap_int(int64_t i) { return (i << int_shift) | int_type_tag; diff --git a/langs/iniquity/values.h b/langs/iniquity/values.h index 4cc48bbe..b6ac44f9 100644 --- a/langs/iniquity/values.h +++ b/langs/iniquity/values.h @@ -49,6 +49,7 @@ type_t val_typeof(val_t x); */ int64_t val_unwrap_int(val_t x); val_t val_wrap_int(int64_t i); +val_t val_wrap_byte(unsigned char b); int val_unwrap_bool(val_t x); val_t val_wrap_bool(int b); diff --git a/langs/installer.rkt b/langs/installer.rkt new file mode 100644 index 00000000..4c52c1c6 --- /dev/null +++ b/langs/installer.rkt @@ -0,0 +1,28 @@ +#lang racket +(provide pre-installer) +(require crook/pi) + +(define (pre-installer cs own) + + (define lang-name + (let-values ([(b f d?) (split-path own)]) + (path->string f))) + + (main (path->string (collection-file-path "src/" "ziggy")) own + ;; NOTE: To re-enable any disabled languages, you must also un-comment + ;; the [pre-install-collection] definition in the [info.rkt] file in + ;; that language's directory. + (cdr (or (assoc lang-name '(#;("abscond" . "A") + #;("blackmail" . "B") + #;("con" . "C") + #;("dupe" . "D0") + ("dodger" . "D1") + ("evildoer" . "E0") + ("extort" . "E1") + ("fraud" . "F") + ("hustle" . "H0") + ("hoax" . "H1") + ("iniquity" . "I") + ("jig" . "J") + ("knock" . "K"))) + (error 'ziggy-pre-installer (format "unsupported lang: ~s" lang-name)))))) diff --git a/langs/jig-playground/compile-ops.rkt b/langs/jig-playground/compile-ops.rkt deleted file mode 100644 index 4f36708e..00000000 --- a/langs/jig-playground/compile-ops.rkt +++ /dev/null @@ -1,338 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) - -(define rax 'rax) ; return -(define eax 'eax) ; 32-bit load/store -(define rbx 'rbx) ; heap -(define rdi 'rdi) ; arg -(define r8 'r8) ; scratch -(define r9 'r9) ; scratch -(define r10 'r10) ; scratch -(define rsp 'rsp) ; stack - -;; Op0 CEnv -> Asm -(define (compile-op0 p c) - (match p - ['void (seq (Mov rax val-void))] - ['read-byte (seq (pad-stack c) - (Call 'read_byte) - (unpad-stack c))] - ['peek-byte (seq (pad-stack c) - (Call 'peek_byte) - (unpad-stack c))])) - -;; Op1 CEnv -> Asm -(define (compile-op1 p c) - (match p - ['add1 - (seq (assert-integer rax c) - (Add rax (imm->bits 1)))] - ['sub1 - (seq (assert-integer rax c) - (Sub rax (imm->bits 1)))] - ['zero? - (seq (assert-integer rax c) - (eq-imm 0))] - ['char? - (type-pred mask-char type-char)] - ['char->integer - (seq (assert-char rax c) - (Sar rax char-shift) - (Sal rax int-shift))] - ['integer->char - (seq (assert-codepoint c) - (Sar rax int-shift) - (Sal rax char-shift) - (Xor rax type-char))] - ['eof-object? (eq-imm eof)] - ['write-byte - (seq (assert-byte c) - (pad-stack c) - (Mov rdi rax) - (Call 'write_byte) - (unpad-stack c) - (Mov rax val-void))] - ['box - (seq (Mov (Offset rbx 0) rax) - (Mov rax rbx) - (Or rax type-box) - (Add rbx 8))] - ['unbox - (seq (assert-box rax c) - (Xor rax type-box) - (Mov rax (Offset rax 0)))] - ['car - (seq (assert-cons rax c) - (Xor rax type-cons) - (Mov rax (Offset rax 8)))] - ['cdr - (seq (assert-cons rax c) - (Xor rax type-cons) - (Mov rax (Offset rax 0)))] - ['empty? (eq-imm '())] - ['box? - (type-pred ptr-mask type-box)] - ['cons? - (type-pred ptr-mask type-cons)] - ['vector? - (type-pred ptr-mask type-vect)] - ['string? - (type-pred ptr-mask type-str)] - ['vector-length - (let ((zero (gensym)) - (done (gensym))) - (seq (assert-vector rax c) - (Xor rax type-vect) - (Cmp rax 0) - (Je zero) - (Mov rax (Offset rax 0)) - (Sal rax int-shift) - (Jmp done) - (Label zero) - (Mov rax 0) - (Label done)))] - ['string-length - (let ((zero (gensym)) - (done (gensym))) - (seq (assert-string rax c) - (Xor rax type-str) - (Cmp rax 0) - (Je zero) - (Mov rax (Offset rax 0)) - (Sal rax int-shift) - (Jmp done) - (Label zero) - (Mov rax 0) - (Label done)))])) - -;; Op2 CEnv -> Asm -(define (compile-op2 p c) - (match p - ['+ - (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) - (Add rax r8))] - ['- - (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) - (Sub r8 rax) - (Mov rax r8))] - ['cons - (seq (Mov (Offset rbx 0) rax) - (Pop rax) - (Mov (Offset rbx 8) rax) - (Mov rax rbx) - (Or rax type-cons) - (Add rbx 16))] - ['make-vector - (let ((loop (gensym)) - (done (gensym)) - (empty (gensym))) - (seq (Pop r8) - (assert-natural r8 c) - (Cmp r8 0) ; special case empty vector - (Je empty) - - (Mov r9 rbx) - (Or r9 type-vect) - - (Sar r8 int-shift) - (Mov (Offset rbx 0) r8) - (Add rbx 8) - - (Label loop) - (Mov (Offset rbx 0) rax) - (Add rbx 8) - (Sub r8 1) - (Cmp r8 0) - (Jne loop) - - (Mov rax r9) - (Jmp done) - - (Label empty) - (Mov rax type-vect) - (Label done)))] - - ['vector-ref - (seq (Pop r8) - (assert-vector r8 c) - (assert-integer rax c) - (Cmp rax 0) - (Jl (error-label c)) - (Xor r8 type-vect) ; r8 = ptr - (Mov r9 (Offset r8 0)) ; r9 = len - (Sar rax int-shift) ; rax = index - (Sub r9 1) - (Cmp r9 rax) - (Jl (error-label c)) - (Sal rax 3) - (Add r8 rax) - (Mov rax (Offset r8 8)))] - - ['make-string - (let ((loop (gensym)) - (done (gensym)) - (empty (gensym))) - (seq (Pop r8) - (assert-natural r8 c) - (assert-char rax c) - (Cmp r8 0) ; special case empty string - (Je empty) - - (Mov r9 rbx) - (Or r9 type-str) - - (Sar r8 int-shift) - (Mov (Offset rbx 0) r8) - (Add rbx 8) - - (Sar rax char-shift) - - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd - - (Label loop) - (Mov (Offset rbx 0) eax) - (Add rbx 4) - (Sub r8 1) - (Cmp r8 0) - (Jne loop) - - (Mov rax r9) - (Jmp done) - - (Label empty) - (Mov rax type-str) - (Label done)))] - - ['string-ref - (seq (Pop r8) - (assert-string r8 c) - (assert-integer rax c) - (Cmp rax 0) - (Jl (error-label c)) - (Xor r8 type-str) ; r8 = ptr - (Mov r9 (Offset r8 0)) ; r9 = len - (Sar rax int-shift) ; rax = index - (Sub r9 1) - (Cmp r9 rax) - (Jl (error-label c)) - (Sal rax 2) - (Add r8 rax) - (Mov 'eax (Offset r8 8)) - (Sal rax char-shift) - (Or rax type-char))])) - -;; Op3 CEnv -> Asm -(define (compile-op3 p c) - (match p - ['vector-set! - (seq (Pop r10) - (Pop r8) - (assert-vector r8 c) - (assert-integer r10 c) - (Cmp r10 0) - (Jl (error-label c)) - (Xor r8 type-vect) ; r8 = ptr - (Mov r9 (Offset r8 0)) ; r9 = len - (Sar r10 int-shift) ; r10 = index - (Sub r9 1) - (Cmp r9 r10) - (Jl (error-label c)) - (Sal r10 3) - (Add r8 r10) - (Mov (Offset r8 8) rax) - (Mov rax val-void))])) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (assert-type mask type) - (λ (arg c) - (seq (Mov r9 arg) - (And r9 mask) - (Cmp r9 type) - (Jne (error-label c))))) - -(define (type-pred mask type) - (let ((l (gensym))) - (seq (And rax mask) - (Cmp rax type) - (Mov rax (imm->bits #t)) - (Je l) - (Mov rax (imm->bits #f)) - (Label l)))) - -(define assert-integer - (assert-type mask-int type-int)) -(define assert-char - (assert-type mask-char type-char)) -(define assert-box - (assert-type ptr-mask type-box)) -(define assert-cons - (assert-type ptr-mask type-cons)) -(define assert-vector - (assert-type ptr-mask type-vect)) -(define assert-string - (assert-type ptr-mask type-str)) - -(define (assert-codepoint c) - (let ((ok (gensym))) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 1114111)) - (Jg (error-label c)) - (Cmp rax (imm->bits 55295)) - (Jl ok) - (Cmp rax (imm->bits 57344)) - (Jg ok) - (Jmp (error-label c)) - (Label ok)))) - -(define (assert-byte c) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 255)) - (Jg (error-label c)))) - -(define (assert-natural r c) - (seq (assert-integer r c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)))) - -;; Value -> Asm -(define (eq-imm imm) - (let ((l1 (gensym))) - (seq (Cmp rax (imm->bits imm)) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))) - -;; CEnv -> Asm -;; Pad the stack to be aligned for a call -(define (pad-stack c) - (match (even? (length c)) - [#t (seq (Sub rsp 8))] - [#f (seq)])) - -;; CEnv -> Asm -;; Undo the stack alignment after a call -(define (unpad-stack c) - (match (even? (length c)) - [#t (seq (Add rsp 8))] - [#f (seq)])) - -;; CEnv -> Label -;; Determine correct error handler label to jump to. -(define (error-label c) - (match (even? (length c)) - [#t 'raise_error] - [#f 'raise_error_align])) diff --git a/langs/jig-playground/example.rkt b/langs/jig-playground/example.rkt deleted file mode 100644 index ecab4c55..00000000 --- a/langs/jig-playground/example.rkt +++ /dev/null @@ -1,5 +0,0 @@ -#lang racket -(require "sum.rkt") -(sum (reverse (cons 1 (cons 2 (cons 3 '()))))) - - diff --git a/langs/jig-playground/stdlib-provides.rkt b/langs/jig-playground/stdlib-provides.rkt deleted file mode 100644 index c71feaf9..00000000 --- a/langs/jig-playground/stdlib-provides.rkt +++ /dev/null @@ -1,7 +0,0 @@ -#lang racket -(provide stdlib-provides) -(require "parse.rkt" "ast.rkt") - -(define stdlib-provides - (Module-ps (parse-module-file "stdlib.rkt"))) - diff --git a/langs/jig-playground/sum.rkt b/langs/jig-playground/sum.rkt deleted file mode 100644 index c126e60a..00000000 --- a/langs/jig-playground/sum.rkt +++ /dev/null @@ -1,6 +0,0 @@ -#lang racket -(provide sum) -(define (sum xs) - (if (empty? xs) - 0 - (+ (car xs) (sum (cdr xs))))) diff --git a/langs/jig/Makefile b/langs/jig/Makefile index 9b74bfc0..db6ac44a 100644 --- a/langs/jig/Makefile +++ b/langs/jig/Makefile @@ -1,37 +1,44 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif objs = \ main.o \ - values.o \ print.o \ + values.o \ io.o -default: runtime.o +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* runtime.o: $(objs) ld -r $(objs) -o runtime.o %.run: %.o runtime.o - gcc runtime.o $< -o $@ + $(CC) runtime.o $< -o $@ .c.o: - gcc -fPIC -c -g -o $@ $< + $(CC) -fPIC -c -g -o $@ $< .s.o: nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" -test: example.run - @test "$(shell ./example.run)" = "$(shell racket example.rkt)" +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/jig/asm/interp.rkt b/langs/jig/asm/interp.rkt deleted file mode 100644 index 8e05688f..00000000 --- a/langs/jig/asm/interp.rkt +++ /dev/null @@ -1,23 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "printer.rkt" racket/runtime-path) -(define-runtime-path dir "..") - -;; Asm -> Integer -;; Interpret (by assemblying, linking, and exec'ing) x86-64 code -;; Assume: starts with entry point run-time expects -(define (asm-interp a) - (let* ((t.s (make-temporary-file "nasm~a.s")) - (t.run (path-replace-extension t.s #".run"))) - (with-output-to-file t.s - #:exists 'truncate - (λ () - (asm-display a))) - (system (format "(cd ~a && make -s ~a) 2>&1 >/dev/null" dir t.run)) - (delete-file t.s) - (with-input-from-string - (with-output-to-string - (λ () - (system (path->string t.run)) - (delete-file t.run))) - read))) diff --git a/langs/jig/asm/printer.rkt b/langs/jig/asm/printer.rkt deleted file mode 100644 index 1abddc83..00000000 --- a/langs/jig/asm/printer.rkt +++ /dev/null @@ -1,82 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -;; Asm -> String -(define (asm->string a) - (foldr (λ (i s) (string-append (instr->string i) s)) "" a)) - -;; Instruction -> String -(define (instr->string i) - (match i - [`(,(? opcode2? o) ,a1 ,a2) - (string-append "\t" - (symbol->string o) " " - (arg->string a1) ", " - (arg->string a2) "\n")] - [`(jmp ,l) - (string-append "\tjmp " (arg->string l) "\n")] - [`(je ,l) - (string-append "\tje " (label->string l) "\n")] - [`(jle ,l) - (string-append "\tjle " (label->string l) "\n")] - [`(jl ,l) - (string-append "\tjl " (label->string l) "\n")] - [`(jg ,l) - (string-append "\tjg " (label->string l) "\n")] - [`(jge ,l) - (string-append "\tjge " (label->string l) "\n")] - [`(jne ,l) - (string-append "\tjne " (label->string l) "\n")] - [`ret "\tret\n"] - [`(neg ,a1) - (string-append "\tneg " (arg->string a1) "\n")] - [`(call ,l) - (string-append "\tcall " (label->string l) "\n")] - [`(push ,r) - (string-append "\tpush " (reg->string r) "\n")] - [`(pop ,r) - (string-append "\tpop " (reg->string r) "\n")] - [l (string-append (label->string l) ":\n")])) - -(define (opcode2? x) - (memq x '(mov add sub cmp and cmovl xor or sal sar lea))) - -;; Arg -> String -(define (arg->string a) - (match a - [(? reg?) (reg->string a)] - [`(offset ,r) - (string-append "[" (arg->string r) "]")] - [`(offset ,r ,i) - (string-append "[" (arg->string r) " + " (number->string (* i 8)) "]")] - [(? integer?) (number->string a)] - [(? symbol?) (label->string a)])) - -;; Any -> Boolean -(define (reg? x) - (and (symbol? x) - (memq x '(rax rbx rcx rdx rsp rdi rip rbp rsi r8 r9 r10 r11 r12 r13 r14 r15)))) - -;; Reg -> String -(define (reg->string r) - (symbol->string r)) - -;; Label -> String -;; prefix with _ for Mac -(define label->string - (match (system-type 'os) - ['macosx - (λ (s) (string-append "_" (symbol->string s)))] - [_ symbol->string])) - -;; Asm -> Void -(define (asm-display a) - ;; entry point will be first label - (let ((g (findf symbol? a))) - (display - (string-append "\tglobal " (label->string g) "\n" - "\tdefault rel\n" - "\textern " (label->string 'error) "\n" - "\textern " (label->string 'plus_two) "\n" - "\tsection .text\n" - (asm->string a))))) diff --git a/langs/jig/compile.rkt b/langs/jig/compile.rkt deleted file mode 100644 index e7b10b1b..00000000 --- a/langs/jig/compile.rkt +++ /dev/null @@ -1,227 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" "compile-ops.rkt" a86/ast) - -;; Registers used -(define rax 'rax) ; return -(define rbx 'rbx) ; heap -(define rsp 'rsp) ; stack -(define rdi 'rdi) ; arg -(define r15 'r15) - -;; type CEnv = [Listof Variable] - -;; Prog -> Asm -(define (compile p) - (match p - [(Prog ds e) - (prog (Extern 'peek_byte) - (Extern 'read_byte) - (Extern 'write_byte) - (Extern 'raise_error) - (Global 'entry) - (Label 'entry) - (Mov rbx rdi) ; recv heap pointer - (Pop r15) - (compile-e e '() #t) - (Jmp r15) - (Label 'raise_error_align) - (Sub rsp 8) - (Jmp 'raise_error) - (compile-defines ds))])) - -;; [Listof Defn] -> Asm -(define (compile-defines ds) - (match ds - ['() (seq)] - [(cons d ds) - (seq (compile-define d) - (compile-defines ds))])) - -;; Defn -> Asm -(define (compile-define d) - (match d - [(Defn f xs e) - (seq (Label (symbol->label f)) - (compile-e e (reverse xs) #t) - (Add rsp (* 8 (length xs))) ; pop args - (Jmp r15))])) - -;; Expr CEnv Bool -> Asm -(define (compile-e e c t?) - (match e - [(Int i) (compile-value i)] - [(Bool b) (compile-value b)] - [(Char c) (compile-value c)] - [(Eof) (compile-value eof)] - [(Empty) (compile-value '())] - [(Var x) (compile-variable x c)] - [(Str s) (compile-string s)] - [(Prim0 p) (compile-prim0 p c)] - [(Prim1 p e) (compile-prim1 p e c)] - [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] - [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] - [(If e1 e2 e3) (compile-if e1 e2 e3 c t?)] - [(Begin e1 e2) (compile-begin e1 e2 c t?)] - [(Let x e1 e2) (compile-let x e1 e2 c t?)] - [(App f es) (compile-app f es c t?)])) - -;; Value -> Asm -(define (compile-value v) - (seq (Mov rax (imm->bits v)))) - -;; Id CEnv -> Asm -(define (compile-variable x c) - (let ((i (lookup x c))) - (seq (Mov rax (Offset rsp i))))) - -;; String -> Asm -(define (compile-string s) - (let ((len (string-length s))) - (if (zero? len) - (seq (Mov rax type-str)) - (seq (Mov rax len) - (Mov (Offset rbx 0) rax) - (compile-string-chars (string->list s) 8) - (Mov rax rbx) - (Or rax type-str) - (Add rbx - (+ 8 (* 4 (if (odd? len) (add1 len) len)))))))) - -;; [Listof Char] Integer -> Asm -(define (compile-string-chars cs i) - (match cs - ['() (seq)] - [(cons c cs) - (seq (Mov rax (char->integer c)) - (Mov (Offset rbx i) 'eax) - (compile-string-chars cs (+ 4 i)))])) - -;; Op0 CEnv -> Asm -(define (compile-prim0 p c) - (compile-op0 p c)) - -;; Op1 Expr CEnv -> Asm -(define (compile-prim1 p e c) - (seq (compile-e e c #f) - (compile-op1 p c))) - -;; Op2 Expr Expr CEnv -> Asm -(define (compile-prim2 p e1 e2 c) - (seq (compile-e e1 c #f) - (Push rax) - (compile-e e2 (cons #f c) #f) - (compile-op2 p c))) - -;; Op3 Expr Expr Expr CEnv -> Asm -(define (compile-prim3 p e1 e2 e3 c) - (seq (compile-e e1 c #f) - (Push rax) - (compile-e e2 (cons #f c) #f) - (Push rax) - (compile-e e3 (cons #f (cons #f c)) #f) - (compile-op3 p c))) - -;; Expr Expr Expr CEnv Bool -> Asm -(define (compile-if e1 e2 e3 c t?) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1 c #f) - (Cmp rax val-false) - (Je l1) - (compile-e e2 c t?) - (Jmp l2) - (Label l1) - (compile-e e3 c t?) - (Label l2)))) - -;; Expr Expr CEnv Bool -> Asm -(define (compile-begin e1 e2 c t?) - (seq (compile-e e1 c #f) - (compile-e e2 c t?))) - -;; Id Expr Expr CEnv Bool -> Asm -(define (compile-let x e1 e2 c t?) - (seq (compile-e e1 c #f) - (Push rax) - (compile-e e2 (cons x c) t?) - (Add rsp 8))) - -;; Id [Listof Expr] CEnv Bool -> Asm -(define (compile-app f es c t?) - (if t? - (compile-app-tail f es c) - (compile-app-nontail f es c))) - -;; Id [Listof Expr] CEnv -> Asm -(define (compile-app-tail f es c) - (seq (compile-es es c) - (if (zero? (length c)) - (seq) - (move-args (length es) (length c))) - (Add rsp (* 8 (length c))) - (Jmp (symbol->label f)))) - -;; Integer Integer -> Asm -(define (move-args i off) - (cond [(zero? i) (seq)] - [else - (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) - (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) - (move-args (sub1 i) off))])) - -;; Id [Listof Expr] CEnv -> Asm -;; The return address is placed above the arguments, so callee pops -;; arguments and return address is next frame -(define (compile-app-nontail f es c) - (let ((ret (gensym 'ret)) - (c (cons #f c))) - (seq (pad-stack c) - (Push r15) - (Lea r15 ret) - (compile-es es (static-pad c)) - (Jmp (symbol->label f)) - (Label ret) - (Pop r15) - (unpad-stack c)))) - -(define (static-pad c) - (if (odd? (length c)) - (cons #f c) - c)) - -;; [Listof Expr] CEnv -> Asm -(define (compile-es es c) - (match es - ['() '()] - [(cons e es) - (seq (compile-e e c #f) - (Push rax) - (compile-es es (cons #f c)))])) - -;; Id CEnv -> Integer -(define (lookup x cenv) - (match cenv - ['() (error "undefined variable:" x)] - [(cons y rest) - (match (eq? x y) - [#t 0] - [#f (+ 8 (lookup x rest))])])) - -;; Symbol -> Label -;; Produce a symbol that is a valid Nasm label -(define (symbol->label s) - (string->symbol - (string-append - "label_" - (list->string - (map (λ (c) - (if (or (char<=? #\a c #\z) - (char<=? #\A c #\Z) - (char<=? #\0 c #\9) - (memq c '(#\_ #\$ #\# #\@ #\~ #\. #\?))) - c - #\_)) - (string->list (symbol->string s)))) - "_" - (number->string (eq-hash-code s) 16)))) diff --git a/langs/jig/gc-racket.rkt b/langs/jig/gc-racket.rkt deleted file mode 100644 index d12d97a6..00000000 --- a/langs/jig/gc-racket.rkt +++ /dev/null @@ -1,147 +0,0 @@ -#lang racket - -;; This is a sketch of a copying collector written in Racket - -;; RVal ::= -;; | integer -;; | boolean -;; | char -;; | (list 'cons a) -;; | (list 'box a) -;; | (list 'str a) - -;; SVal ::= -;; | rval -;; | integer ; notice the overlap -;; | char - -;; a ::= (list h i) - -(define heap-size 10) -(define to (make-vector heap-size)) -(define from (make-vector heap-size)) - -(define *to-next* 0) -(define *curr* 0) -(define *type-queue* '()) - - -;; [Listof RVal] -> [Listof RVal] -(define (collect roots) - (set! *curr* 0) - (set! *to-next* 0) - (begin0 (move-roots roots) - (move-all) - (let ((tmp to)) - (set! to from) - (set! from tmp)) - (set! *from-next* *to-next*))) - -;; [Listof RVal] -> [Listof RVal] -;; EFFECT: shallowly moves data pointed to by roots to 'to' space, -;; leaving fowarding address in 'from' space -(define (move-roots rs) - (map move-root rs)) - -;; [Listof RVal] -> [Listof RVal] -;; EFFECT: shallowly moves data pointed to by root to 'to' space, -;; leaving fowarding address in 'from' space -(define (move-root r) - (match r - [(list τ (list h i)) - (match (vector-ref h i) - [(list _ (list (? to?) j)) ; fwd reference - (list τ (list to j))] - [_ - (begin0 (list τ (list to *to-next*)) - (move-obj τ i))])] - ;; not a pointer - [_ r])) - -;; Type Index -> Void -;; EFFECT: Moves object of type τ at from-i to *to-next* -;; Pushes the type on the type queue so the moved object -;; can be interpreted appropriately later -;; If any objects were not word-aligned records, -;; this would need to be adapted -(define (move-obj τ from-i) - (for ((i (size-of (list τ (list from from-i))))) - (vector-set! to *to-next* (vector-ref from (+ from-i i))) - (when (zero? i) ; fwd pointer - (vector-set! from from-i (list τ (list to *to-next*)))) - (set! *to-next* (add1 *to-next*))) - (push! τ)) - -(define to? (λ (h) (eq? h to))) - -;; -> Void -;; EFFECT: Move all objects starting from *curr*, -;; interpreting bits according to the type queue. -(define (move-all) - (let loop () - (unless (= *curr* *to-next*) - (move-curr) - (loop)))) - -;; -> Void -;; EFFECT: Move object at *curr*, interpreting bits according -;; to front of type queue. -(define (move-curr) - (let ((τ (pop!))) - (match τ - ['box (scan-word)] - ['cons (scan-word) (scan-word)] - ['str (set! *curr* (+ *curr* (add1 (vector-ref to *curr*))))]))) - -;; -> Void -;; *curr* is at the start of a value (i.e. a single word) -(define (scan-word) - (match (vector-ref to *curr*) - [(list τ (list from i)) - (match (vector-ref from i) - [(list _ (list (? to?) j)) ; fwd reference - (vector-set! to *curr* (list τ (list to j)))] - [_ - (vector-set! to *curr* (list τ (list to *to-next*))) - (move-obj τ i)])] - [_ (void)]) - - (set! *curr* (add1 *curr*))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (size-of r) - (match r - [(list 'box _) 1] - [(list 'cons _) 2] - [(list 'str (list h i)) - (add1 (vector-ref h i))])) - -(define (push! τ) - (printf "pushing ~a\n" τ) - (set! *type-queue* - (append *type-queue* (list τ)))) - -(define (pop!) - (let ((τ (car *type-queue*))) - (printf "popping ~a\n" τ) - (begin0 τ - (set! *type-queue* (cdr *type-queue*))))) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; an example - -(define roots - (list (list 'cons (list from 0)) - (list 'cons (list from 0)))) - -(vector-set! from 0 (list 'str (list from 3))) -(vector-set! from 1 (list 'box (list from 6))) ;(list 'str (list from 3))) ; sharing a string -(vector-set! from 2 3) ; dead -(vector-set! from 3 2) -(vector-set! from 4 #\a) -(vector-set! from 5 #\b) -(vector-set! from 6 #\c) - -(define *from-next* 7) diff --git a/langs/jig/gc.c b/langs/jig/gc.c deleted file mode 100644 index adc38a19..00000000 --- a/langs/jig/gc.c +++ /dev/null @@ -1,236 +0,0 @@ -#include -#include -#include -#include "types.h" -#include "heap.h" - -#define DEBUG 1 - -#ifdef DEBUG -# define GC_DEBUG(x) x -#else -# define GC_DEBUG(x) -#endif - -void print_result(int64_t result); -void print_mem(int64_t *, int64_t *); - -const char * ptr_type_to_string(int64_t tag); - -int obj_size(int64_t v) { - int type_tag = ptr_type_mask & v; - int64_t * obj = (int64_t *)(v ^ type_tag); - switch (type_tag) { - case box_type_tag: - return 1; - case cons_type_tag: - return 2; - default: - printf("unkown object type in obj_size"); - exit(1); - } -} - -void print_types(type_front, type_rear) { - int j; - printf("TYPES:\n"); - for (j = type_front; j < type_rear; j++) { - printf(" [%d]: %s\n", j, - ((type[j] == box_type_tag) ? "box" : - (type[j] == cons_type_tag) ? "cons" : - "unknown")); - } -} - -void move_obj(char ptr_type, int64_t * addr, int64_t ** to_next) { - GC_DEBUG(printf("move_obj <%s> at [%" PRIx64 "] to [%" PRIx64 "]\n", - ptr_type_to_string((int64_t) ptr_type), - (int64_t) addr, - (int64_t) *to_next)); - int size = - (ptr_type == box_type_tag) ? 1 : - (ptr_type == cons_type_tag) ? 2 : - -1; - - int i; - for (i = 0; i < size; i++) { - GC_DEBUG(printf(" [%" PRIx64 "] <- [%" PRIx64 "]\n", - (int64_t) *to_next, (int64_t) (addr + i))); - - *to_next[0] = addr[i]; - if (i == 0) { - *addr = (int64_t) *to_next | ptr_type; // fwd - GC_DEBUG(printf(" [%" PRIx64 "] <- [%" PRIx64 "] (fwd)\n", - (int64_t) addr , (int64_t) *to_next)); - - } - *to_next = *to_next + 1; - } -} - -#define is_fwd(a) ((((a & ptr_addr_mask) - (int64_t) &heap[heap_size]) ^ (int64_t) from_side) >= 0) - -void scan_word(int64_t ** curr, int64_t ** to_next, char * type_rear) { - GC_DEBUG(printf("scan_word [%" PRIx64 "]: ", (int64_t) *curr)); - int64_t v = **curr; - if (v & ptr_type_mask) { - int64_t t = ptr_type_mask & v; - int64_t * a = (int64_t *) (ptr_addr_mask & v); - if ((*a & ptr_type_mask) && (is_fwd(*a))) { - GC_DEBUG(printf("&[%" PRIx64 "] -> [%" PRIx64 "]", - (int64_t) a, (int64_t) (*a & ptr_addr_mask))); - *curr[0] = *a; - } else { - GC_DEBUG(printf("&<%s>[%" PRIx64 "]\n", ptr_type_to_string(t), (int64_t) a)); - **curr = ((int64_t) (*to_next)) | t; - move_obj(t, a, to_next); - GC_DEBUG(printf("PUSH!: %s", ptr_type_to_string((int64_t) t))); - type[*type_rear] = t; (*type_rear)++; - } - } else { - GC_DEBUG(print_result(v)); // an immediate - } - GC_DEBUG(printf("\n")); - *curr = *curr + 1; -} - -struct Pair { - int64_t x; - int64_t y; -}; - - -int64_t * collect_garbage_p(int64_t * rdi, int64_t * rbp, int64_t * rsp) { - struct Pair p; - p.x = 9 << int_shift; - p.y = 32 << int_shift; - return rdi; -} - -int64_t * collect_garbage(int64_t * rdi, int64_t * rbp, int64_t * rsp) { - - int64_t * to_space = (from_side < 0) ? heap + heap_size : heap; - int64_t * from_space = (from_side > 0) ? heap + heap_size : heap; - int64_t * to_next = to_space; - - char type_front = 0; - char type_rear = 0; - - GC_DEBUG(printf("--------------------------------------------\n")); - GC_DEBUG(printf("TRACING ROOTS\nROOTS:\n")); - GC_DEBUG(print_mem(rsp, rbp)); - GC_DEBUG(printf("FROM:\n")); - GC_DEBUG(print_mem(from_space, rdi)); - - // roots - // shallowly move data pointed to by each root to 'to' space, - // leaving forwarding address in 'from' space. - int64_t * root = rsp; - while (root != rbp) { - GC_DEBUG(printf("scan_root:")); - GC_DEBUG(print_mem(root, root + 1)); - int64_t v = root[0]; - if (ptr_type_mask & v) { - int64_t * a = (int64_t *) (ptr_addr_mask & v); - if ((*a & ptr_type_mask) && (is_fwd(*a))) { - root[0] = *a; - GC_DEBUG(printf("forward pointer, resolving\n")); - } else { - int64_t t = ptr_type_mask & v; - move_obj(t, a, &to_next); - root[0] = *a; - GC_DEBUG(printf("PUSH!: %s\n", ptr_type_to_string((int64_t) t))); - type[type_rear] = t; type_rear++; - } - } else { - GC_DEBUG(printf(" ")); - GC_DEBUG(print_result(root[0])); - } - - GC_DEBUG(print_types(type_front, type_rear)); - GC_DEBUG(printf("ROOT:\n")); - GC_DEBUG(print_mem(rsp, rbp)); - GC_DEBUG(printf("FROM:\n")); - GC_DEBUG(print_mem(from_space, rdi)); - GC_DEBUG(printf("TO:\n")); - GC_DEBUG(print_mem(to_space, to_next)); - - // advance - root = &root[1]; - } - - GC_DEBUG(printf("--------------------------------------------\n")); - GC_DEBUG(printf("TRACING TO SPACE\n")); - - int64_t * curr = to_space; - while (curr != to_next) { - int j; - char t = type[type_front]; type_front++; - - GC_DEBUG(printf("TRACING A %s\n", ptr_type_to_string((int64_t) t))); - - switch (t) { - case box_type_tag: - scan_word(&curr, &to_next, &type_rear); - break; - case cons_type_tag: - scan_word(&curr, &to_next, &type_rear); - scan_word(&curr, &to_next, &type_rear); - break; - default: - printf("unknown type: %d!!!\n", t); - exit(1); - } - - GC_DEBUG(print_types(type_front, type_rear)); - GC_DEBUG(printf("FROM:\n")); - GC_DEBUG(print_mem(from_space, rdi)); - GC_DEBUG(printf("TO:\n")); - GC_DEBUG(print_mem(to_space, to_next)); - } - - GC_DEBUG(printf("--------------------------------------------\n")); - GC_DEBUG(printf("DONE\n")); - - GC_DEBUG(printf("ROOT:\n")); - GC_DEBUG(print_mem(rsp, rbp)); - GC_DEBUG(printf("TO:\n")); - GC_DEBUG(print_mem(to_space, to_next)); - - from_side = 0 - from_side; - return to_next; -} - -const char * ptr_type_to_string(int64_t tag) { - switch (tag) { - case box_type_tag: - return "box"; - case cons_type_tag: - return "cons"; - default: - return "unknown"; - } -} - -void print_blobs(void *h, void *end_of_heap, int size_in_bytes, void (* print)()) { - int i; - while (h < end_of_heap) { - (*print)(h); - h = h + size_in_bytes; - } -} - -void print_word(int64_t *a) { - printf(" [%" PRIx64 "]: ", (int64_t)a); - if (ptr_type_mask & *a) { - printf("&<%s>%" PRIx64 "\n", ptr_type_to_string(ptr_type_mask & *a), *a & ptr_addr_mask); - } else { - print_result(*a); - printf("\n"); - } -} - -void print_mem(int64_t *h, int64_t *end_of_heap) { - print_blobs(h, end_of_heap, 8, &print_word); -} - diff --git a/langs/jig/info.rkt b/langs/jig/info.rkt new file mode 100644 index 00000000..41ec40bb --- /dev/null +++ b/langs/jig/info.rkt @@ -0,0 +1,2 @@ +#lang info +(define pre-install-collection "../installer.rkt") diff --git a/langs/jig/interp-file.rkt b/langs/jig/interp-file.rkt deleted file mode 100644 index aabe615b..00000000 --- a/langs/jig/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt" "read-all.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (println (interp (parse (read-all p)))) - (close-input-port p)))) diff --git a/langs/jig/interp-prims.rkt b/langs/jig/interp-prims.rkt index 601b240b..c7afbb4e 100644 --- a/langs/jig/interp-prims.rkt +++ b/langs/jig/interp-prims.rkt @@ -34,6 +34,7 @@ [(list '< (? integer?) (? integer?)) (< v1 v2)] [(list '= (? integer?) (? integer?)) (= v1 v2)] [(list 'cons v1 v2) (cons v1 v2)] + [(list 'eq? v1 v2) (eq? v1 v2)] [(list 'make-vector (? integer?) _) (if (<= 0 v1) (make-vector v1 v2) diff --git a/langs/jig/io.c b/langs/jig/io.c index 7ef82281..8a417c91 100644 --- a/langs/jig/io.c +++ b/langs/jig/io.c @@ -7,14 +7,14 @@ val_t read_byte(void) { char c = getc(in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } val_t peek_byte(void) { char c = getc(in); ungetc(c, in); - return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } diff --git a/langs/jig/print.c b/langs/jig/print.c index a88a5779..acb1413b 100644 --- a/langs/jig/print.c +++ b/langs/jig/print.c @@ -808,7 +808,7 @@ void print_char(val_char_t c) void print_codepoint(val_char_t c) { - static char buffer[5] = {0}; + char buffer[5] = {0}; utf8_encode_char(c, buffer); printf("%s", buffer); } diff --git a/langs/jig/types.rkt b/langs/jig/types.rkt deleted file mode 100644 index 806fd02e..00000000 --- a/langs/jig/types.rkt +++ /dev/null @@ -1,66 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -(define imm-shift 3) -(define imm-mask #b111) -(define ptr-mask #b111) -(define type-box #b001) -(define type-cons #b010) -(define type-vect #b011) -(define type-str #b100) -(define int-shift (+ 1 imm-shift)) -(define char-shift (+ 2 imm-shift)) -(define type-int #b0000) -(define mask-int #b1111) -(define type-char #b01000) -(define mask-char #b11111) -(define val-true #b0011000) -(define val-false #b0111000) -(define val-eof #b1011000) -(define val-void #b1111000) -(define val-empty #b10011000) - -(define (bits->value b) - (cond [(= type-int (bitwise-and b mask-int)) - (arithmetic-shift b (- int-shift))] - [(= type-char (bitwise-and b mask-char)) - (integer->char (arithmetic-shift b (- char-shift)))] - [(= b val-true) #t] - [(= b val-false) #f] - [(= b val-eof) eof] - [(= b val-void) (void)] - [(= b val-empty) '()] - [else (error "invalid bits")])) - -(define (imm->bits v) - (cond [(eof-object? v) val-eof] - [(integer? v) (arithmetic-shift v int-shift)] - [(char? v) - (bitwise-ior type-char - (arithmetic-shift (char->integer v) char-shift))] - [(eq? v #t) val-true] - [(eq? v #f) val-false] - [(void? v) val-void] - [(empty? v) val-empty])) - - -(define (imm-bits? v) - (zero? (bitwise-and v imm-mask))) - -(define (int-bits? v) - (zero? (bitwise-and v mask-int))) - -(define (char-bits? v) - (= type-char (bitwise-and v mask-char))) - -(define (cons-bits? v) - (zero? (bitwise-xor (bitwise-and v imm-mask) type-cons))) - -(define (box-bits? v) - (zero? (bitwise-xor (bitwise-and v imm-mask) type-box))) - -(define (vect-bits? v) - (zero? (bitwise-xor (bitwise-and v imm-mask) type-vect))) - -(define (str-bits? v) - (zero? (bitwise-xor (bitwise-and v imm-mask) type-str))) diff --git a/langs/jig/unload-bits-asm.rkt b/langs/jig/unload-bits-asm.rkt deleted file mode 100644 index be9b50c8..00000000 --- a/langs/jig/unload-bits-asm.rkt +++ /dev/null @@ -1,43 +0,0 @@ -#lang racket -(provide unload/free unload-value) -(require "types.rkt" - ffi/unsafe) - -;; Answer* -> Answer -(define (unload/free a) - (match a - ['err 'err] - [(cons h v) (begin0 (unload-value v) - (free h))])) - -;; Value* -> Value -(define (unload-value v) - (match v - [(? imm-bits?) (bits->value v)] - [(? box-bits? i) - (box (unload-value (heap-ref i)))] - [(? cons-bits? i) - (cons (unload-value (heap-ref (+ i 8))) - (unload-value (heap-ref i)))] - [(? vect-bits? i) - (if (zero? (untag i)) - (vector) - (build-vector (heap-ref i) - (lambda (j) - (unload-value (heap-ref (+ i (* 8 (add1 j))))))))] - [(? str-bits? i) - (if (zero? (untag i)) - (string) - (build-string (heap-ref i) - (lambda (j) - (char-ref (+ i 8) j))))])) - -(define (untag i) - (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) - (integer-length ptr-mask))) - -(define (heap-ref i) - (ptr-ref (cast (untag i) _int64 _pointer) _uint64)) - -(define (char-ref i j) - (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j))) diff --git a/langs/jig/values.c b/langs/jig/values.c index a61d65e6..62bca18c 100644 --- a/langs/jig/values.c +++ b/langs/jig/values.c @@ -38,6 +38,10 @@ int64_t val_unwrap_int(val_t x) { return x >> int_shift; } +val_t val_wrap_byte(unsigned char b) +{ + return (b << int_shift) | int_type_tag; +} val_t val_wrap_int(int64_t i) { return (i << int_shift) | int_type_tag; diff --git a/langs/jig/values.h b/langs/jig/values.h index 4cc48bbe..b6ac44f9 100644 --- a/langs/jig/values.h +++ b/langs/jig/values.h @@ -49,6 +49,7 @@ type_t val_typeof(val_t x); */ int64_t val_unwrap_int(val_t x); val_t val_wrap_int(int64_t i); +val_t val_wrap_byte(unsigned char b); int val_unwrap_bool(val_t x); val_t val_wrap_bool(int b); diff --git a/langs/knock/Makefile b/langs/knock/Makefile index 7b1934bc..db6ac44a 100644 --- a/langs/knock/Makefile +++ b/langs/knock/Makefile @@ -1,32 +1,44 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif -%.run: %.o runtime.o - gcc runtime.o $< -o $@ +objs = \ + main.o \ + print.o \ + values.o \ + io.o + +default: submit.zip -runtime.o: main.o char.o io.o - ld -r main.o char.o io.o -o runtime.o +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* -main.o: main.c types.h runtime.h - gcc -fPIC -c main.c -o main.o +runtime.o: $(objs) + ld -r $(objs) -o runtime.o -char.o: char.c types.h - gcc -fPIC -c char.c -o char.o +%.run: %.o runtime.o + $(CC) runtime.o $< -o $@ -io.o: io.c runtime.h - gcc -fPIC -c io.c -o io.o +.c.o: + $(CC) -fPIC -c -g -o $@ $< -%.o: %.s - nasm -f $(format) -o $@ $< +.s.o: + nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" + +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/knock/ast.rkt b/langs/knock/ast.rkt deleted file mode 100644 index 3efcd792..00000000 --- a/langs/knock/ast.rkt +++ /dev/null @@ -1,73 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -;; type Prog = (Prog (Listof Defn) Expr) -(struct Prog (ds e) #:prefab) - -;; type Defn = (Defn Id (Listof Id) Expr) -(struct Defn (f xs e) #:prefab) - -;; type Expr = (Eof) -;; | (Empty) -;; | (Fun Id) <-- New for Knock -;; | (Int Integer) -;; | (Bool Boolean) -;; | (Char Character) -;; | (Prim0 Op0) -;; | (Prim1 Op1 Expr) -;; | (Prim1 Op2 Op2 Expr) -;; | (If Expr Expr Expr) -;; | (Begin Expr Expr) -;; | (Let Id Expr Expr) -;; | (Var Id) -;; | (App Id (Listof Expr)) -;; | (Call Expr (Listof Expr)) <-- New for Knock -;; type Id = Symbol -;; type Op0 = 'read-byte | 'void | 'collect-garbage -;; type Op1 = 'add1 | 'sub1 | 'zero? -;; | 'char? | 'integer->char | 'char->integer -;; | 'write-byte | 'eof-object? -;; | 'box | 'car | 'cdr | 'unbox -;; | 'empty? -;; type Op2 = '+ | '- | 'eq? -;; | 'cons -(struct Eof () #:prefab) -(struct Empty () #:prefab) -(struct Int (i) #:prefab) -(struct Bool (b) #:prefab) -(struct Char (c) #:prefab) -(struct Prim0 (p) #:prefab) -(struct Prim1 (p e) #:prefab) -(struct Prim2 (p e1 e2) #:prefab) -(struct If (e1 e2 e3) #:prefab) -(struct Begin (e1 e2) #:prefab) -(struct Let (x e1 e2) #:prefab) -(struct Var (x) #:prefab) -(struct Fun (f) #:prefab) -(struct App (f es) #:prefab) -(struct FCall (f es) #:prefab) - - -;; Helper functions - -;; Does an Expr represent an immediate (i.e. flat) value? -;; Expr -> Bool -(define (imm? e) - (match e - [(Int i) #t] - [(Bool b) #t] - [(Char c) #t] - [(Eof) #t] - [(Empty) #t] - [_ #f])) - -;; Get the 'actual' value out of an immediate. -;; Expr -> Imm -(define (get-imm e) - (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Eof) eof] - [(Empty) '()] - [_ (error (~a "get-imm: " e " is not an immedate!"))])) diff --git a/langs/knock/compile-file.rkt b/langs/knock/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/knock/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/knock/compile.rkt b/langs/knock/compile.rkt deleted file mode 100644 index 1246f406..00000000 --- a/langs/knock/compile.rkt +++ /dev/null @@ -1,472 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) - -;; Registers used -(define rax 'rax) ; return -(define rbx 'rbx) ; heap -(define rdx 'rdx) ; return, 2 -(define r8 'r8) ; scratch in +, - -(define r9 'r9) ; scratch in assert-type and tail-calls -(define rsp 'rsp) ; stack -(define rdi 'rdi) ; arg - -;; type CEnv = [Listof Variable] - -;; Expr -> Asm -(define (compile p) - (match p - [(Prog ds e) - (prog (Extern 'peek_byte) - (Extern 'read_byte) - (Extern 'write_byte) - (Extern 'raise_error) - (Global 'entry) - (Label 'entry) - (Mov rbx rdi) ; recv heap pointer - (compile-e e '(#f)) ; NOT A TAIL CALL! We can't re-use the frame!!! - (Mov rdx rbx) ; return heap pointer in second return register - (Ret) - (compile-defines ds))])) - -;; [Listof Defn] -> Asm -(define (compile-defines ds) - (seq - (match ds - ['() (seq)] - [(cons d ds) - (seq (compile-define d) - (compile-defines ds))]))) - -;; Defn -> Asm -(define (compile-define d) - (match d - [(Defn f xs e) - ; leave space for RIP - (let ((env (parity (cons #f (reverse xs))))) - (seq (Label (symbol->label f)) - ; we need the #args on the frame, not the length of the entire - ; env (which may have padding) - (compile-tail-e e env (length xs)) - (Ret)))])) - -(define (parity c) - (if (even? (length c)) - (append c (list #f)) - c)) - -;; Expr Expr Expr CEnv Int -> Asm -(define (compile-tail-e e c s) - (seq - (match e - [(If e1 e2 e3) (compile-tail-if e1 e2 e3 c s)] - [(Let x e1 e2) (compile-tail-let x e1 e2 c s)] - [(App f es) (if (<= (length es) s) - (compile-tail-call f es c) - (compile-app f es c))] - [(FCall e1 es) (if (<= (length es) s) - (compile-tail-fun-call e1 es c) - (compile-fun-call e1 es c))] - [(Begin e1 e2) (compile-tail-begin e1 e2 c s)] - [_ (compile-e e c)]))) - -;; Expr CEnv -> Asm -(define (compile-e e c) - (seq - (match e - [(? imm? i) (compile-value (get-imm i))] - [(Var x) (compile-variable x c)] - [(Fun f) (compile-fun f)] - [(App f es) (compile-app f es c)] - [(FCall e1 es) (compile-fun-call e1 es c)] - [(Prim0 p) (compile-prim0 p c)] - [(Prim1 p e) (compile-prim1 p e c)] - [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] - [(If e1 e2 e3) (compile-if e1 e2 e3 c)] - [(Begin e1 e2) (compile-begin e1 e2 c)] - [(Let x e1 e2) (compile-let x e1 e2 c)]))) - -;; Value -> Asm -(define (compile-value v) - (seq (Mov rax (imm->bits v)))) - -;; Id CEnv -> Asm -(define (compile-variable x c) - (let ((i (lookup x c))) - (seq (Mov rax (Offset rsp i))))) - -;; Id CEnv -> Asm -(define (compile-fun f) - ; Load the address of the label into rax - (seq (Lea rax (symbol->label f)) - ; Copy the value onto the heap - (Mov (Offset rbx 0) rax) - ; Copy the heap address into rax - (Mov rax rbx) - ; Tag the value as a proc - (Or rax type-proc) - ; Bump the heap pointer - (Add rbx 8))) - -;; Op0 CEnv -> Asm -(define (compile-prim0 p c) - (match p - ['void (seq (Mov rax val-void))] - ['read-byte (seq (pad-stack c) - (Call 'read_byte) - (unpad-stack c))] - ['peek-byte (seq (pad-stack c) - (Call 'peek_byte) - (unpad-stack c))])) - -;; Op1 Expr CEnv -> Asm -(define (compile-prim1 p e c) - (seq (compile-e e c) - (match p - ['add1 - (seq (assert-integer rax) - (Add rax (imm->bits 1)))] - ['sub1 - (seq (assert-integer rax) - (Sub rax (imm->bits 1)))] - ['zero? - (let ((l1 (gensym))) - (seq (assert-integer rax) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char? - (let ((l1 (gensym))) - (seq (And rax mask-char) - (Xor rax type-char) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char->integer - (seq (assert-char rax) - (Sar rax char-shift) - (Sal rax int-shift))] - ['integer->char - (seq assert-codepoint - (Sar rax int-shift) - (Sal rax char-shift) - (Xor rax type-char))] - ['eof-object? (eq-imm val-eof)] - ['write-byte - (seq assert-byte - (pad-stack c) - (Mov rdi rax) - (Call 'write_byte) - (unpad-stack c) - (Mov rax val-void))] - ['box - (seq (Mov (Offset rbx 0) rax) - (Mov rax rbx) - (Or rax type-box) - (Add rbx 8))] - ['unbox - (seq (assert-box rax) - (Xor rax type-box) - (Mov rax (Offset rax 0)))] - ['car - (seq (assert-cons rax) - (Xor rax type-cons) - (Mov rax (Offset rax 8)))] - ['cdr - (seq (assert-cons rax) - (Xor rax type-cons) - (Mov rax (Offset rax 0)))] - ['empty? (eq-imm val-empty)]))) - -;; Op2 Expr Expr CEnv -> Asm -(define (compile-prim2 p e1 e2 c) - (seq (compile-e e1 c) - (Push rax) - (compile-e e2 (cons #f c)) - (match p - ['+ - (seq (Pop r8) - (assert-integer r8) - (assert-integer rax) - (Add rax r8))] - ['- - (seq (Pop r8) - (assert-integer r8) - (assert-integer rax) - (Sub r8 rax) - (Mov rax r8))] - ['eq? - (let ((l (gensym))) - (seq (Cmp rax (Offset rsp 0)) - (Sub rsp 8) - (Mov rax val-true) - (Je l) - (Mov rax val-false) - (Label l)))] - ['cons - (seq (Mov (Offset rbx 0) rax) - (Pop rax) - (Mov (Offset rbx 8) rax) - (Mov rax rbx) - (Or rax type-cons) - (Add rbx 16))]))) - -;; Id [Listof Expr] CEnv -> Asm -;; Here's why this code is so gross: you have to align the stack for the call -;; but you have to do it *before* evaluating the arguments es, because you need -;; es's values to be just above 'rsp when the call is made. But if you push -;; a frame in order to align the call, you've got to compile es in a static -;; environment that accounts for that frame, hence: -(define (compile-app f es c) - (if (even? (+ (length es) (length c))) - (seq (compile-es es c) - (Call (symbol->label f)) - (Add rsp (* 8 (length es)))) ; pop args - (seq (Sub rsp 8) ; adjust stack - (compile-es es (cons #f c)) - (Call (symbol->label f)) - (Add rsp (* 8 (add1 (length es))))))) ; pop args and pad - - -;; Variable (Listof Expr) CEnv -> Asm -;; Compile a call in tail position -(define (compile-tail-call f es c) - (let ((cnt (length es))) - (seq (compile-es es c) - (move-args cnt (+ cnt (in-frame c))) - (Add rsp (* 8 (+ cnt (in-frame c)))) - (Jmp (symbol->label f))))) - -;; Similar to `compile-app` we have to be concerned about 16-byte alignment -;; of `rsp`. However, the wrinkle is that we also have the function pointer -;; on the stack, so we have to do the calculation with an `extended` env: `env` -(define (compile-fun-call e es c) - (let ((d (length es)) - (env (cons #f c))) - ; We have to computer the function pointer either way. - (seq (compile-e e c) - (assert-proc rax) - (Push rax) - - ; Then we worry about alignment - (if (even? (+ d (length env))) - - ; We will be 16-byte aligned - (seq (compile-es es env) - (Mov rax (Offset rsp (* 8 d))) - (Xor rax type-proc) - (Call (Offset rax 0)) - (Add rsp (* 8 (add1 d)))) - - ; We won't be 16-byte aligned, and need to adjust `rsp` - (seq (Sub rsp 8) - (compile-es es env) - (Mov rax (Offset rsp (* 8 (add1 d)))) - (Xor rax type-proc) - (Call (Offset rax 0)) - ; pop arguments, padding, and function pointer - (Add rsp (* 8 (+ 2 d)))))))) - -;; Variable (Listof Expr) CEnv -> Asm -;; Compile a call in tail position -(define (compile-tail-fun-call f es c) - (let ((cnt (length es))) - (seq (compile-e f c) - (assert-proc rax) - (Push rax) - (compile-es es (cons #f c)) - (move-args cnt (+ cnt (add1 (in-frame c)))) - (Mov rax (Offset rsp (* 8 cnt))) - (Xor rax type-proc) - (Add rsp (* 8 (+ cnt (add1 (in-frame c))))) - (Jmp (Offset rax 0))))) - -;; Integer Integer -> Asm -;; Move i arguments upward on stack by offset off -(define (move-args i cnt) - (match i - [0 (seq)] - [_ (seq - ; mov first arg to temp reg - (Mov r9 (Offset rsp (* 8 (sub1 i)))) - ; mov value to correct place on the old frame - (Mov (Offset rsp (* 8 (+ i cnt))) r9) - ; Now do the next one - (move-args (sub1 i) cnt))])) - -;; [Listof Expr] CEnv -> Asm -(define (compile-es es c) - (match es - ['() '()] - [(cons e es) - (seq (compile-e e c) - (Push rax) - (compile-es es (cons #f c)))])) - -;; Imm -> Asm -(define (eq-imm imm) - (let ((l1 (gensym))) - (seq (Cmp rax imm) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))) - -;; Expr Expr Expr CEnv -> Asm -(define (compile-if e1 e2 e3 c) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1 c) - (Cmp rax val-false) - (Je l1) - (compile-e e2 c) - (Jmp l2) - (Label l1) - (compile-e e3 c) - (Label l2)))) - -;; Expr Expr Expr CEnv -> Asm -(define (compile-tail-if e1 e2 e3 c s) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1 c) - (Cmp rax val-false) - (Je l1) - (compile-tail-e e2 c s) - (Jmp l2) - (Label l1) - (compile-tail-e e3 c s) - (Label l2)))) - -;; Expr Expr CEnv -> Asm -(define (compile-begin e1 e2 c) - (seq (compile-e e1 c) - (compile-e e2 c))) - -;; Expr Expr CEnv -> Asm -(define (compile-tail-begin e1 e2 c s) - (seq (compile-e e1 c) - (compile-tail-e e2 c s))) - -;; Id Expr Expr CEnv -> Asm -(define (compile-let x e1 e2 c) - (seq (compile-e e1 c) - (Push rax) - (compile-e e2 (cons x c)) - (Add rsp 8))) - -;; Id Expr Expr CEnv -> Asm -(define (compile-tail-let x e1 e2 c s) - (seq (compile-e e1 c) - (Push rax) - (compile-tail-e e2 (cons x c) s) - (Add rsp 8))) - -;; CEnv -> Asm -;; Pad the stack to be aligned for a call with stack arguments -(define (pad-stack-call c i) - (match (even? (+ (length c) i)) - [#f (seq (Sub rsp 8) (% "padding stack"))] - [#t (seq)])) - -;; CEnv -> Asm -;; Pad the stack to be aligned for a call -(define (pad-stack c) - (pad-stack-call c 0)) - -;; CEnv -> Asm -;; Undo the stack alignment after a call -(define (unpad-stack-call c i) - (match (even? (+ (length c) i)) - [#f (seq (Add rsp 8) (% "unpadding"))] - [#t (seq)])) - -;; CEnv -> Asm -;; Undo the stack alignment after a call -(define (unpad-stack c) - (unpad-stack-call c 0)) - -;; Id CEnv -> Integer -(define (lookup x cenv) - (match cenv - ['() (error "undefined variable:" x " Env: " cenv)] - [(cons y rest) - (match (eq? x y) - [#t 0] - [#f (+ 8 (lookup x rest))])])) - -(define (in-frame cenv) - (match cenv - ['() 0] - [(cons #f rest) 0] - [(cons y rest) (+ 1 (in-frame rest))])) - -(define (assert-type mask type) - (λ (arg) - (seq (Mov r9 arg) - (And r9 mask) - (Cmp r9 type) - (Jne 'raise_error)))) - -(define (type-pred mask type) - (let ((l (gensym))) - (seq (And rax mask) - (Cmp rax type) - (Mov rax (imm->bits #t)) - (Je l) - (Mov rax (imm->bits #f)) - (Label l)))) - -(define assert-integer - (assert-type mask-int type-int)) -(define assert-char - (assert-type mask-char type-char)) -(define assert-box - (assert-type ptr-mask type-box)) -(define assert-cons - (assert-type ptr-mask type-cons)) -(define assert-proc - (assert-type ptr-mask type-proc)) - -(define assert-codepoint - (let ((ok (gensym))) - (seq (assert-integer rax) - (Cmp rax (imm->bits 0)) - (Jl 'raise_error) - (Cmp rax (imm->bits 1114111)) - (Jg 'raise_error) - (Cmp rax (imm->bits 55295)) - (Jl ok) - (Cmp rax (imm->bits 57344)) - (Jg ok) - (Jmp 'raise_error) - (Label ok)))) - -(define assert-byte - (seq (assert-integer rax) - (Cmp rax (imm->bits 0)) - (Jl 'raise_error) - (Cmp rax (imm->bits 255)) - (Jg 'raise_error))) - -;; Symbol -> Label -;; Produce a symbol that is a valid Nasm label -(define (symbol->label s) - (string->symbol - (string-append - "label_" - (list->string - (map (λ (c) - (if (or (char<=? #\a c #\z) - (char<=? #\A c #\Z) - (char<=? #\0 c #\9) - (memq c '(#\_ #\$ #\# #\@ #\~ #\. #\?))) - c - #\_)) - (string->list (symbol->string s)))) - "_" - (number->string (eq-hash-code s) 16)))) diff --git a/langs/knock/gc-racket.rkt b/langs/knock/gc-racket.rkt deleted file mode 100644 index d12d97a6..00000000 --- a/langs/knock/gc-racket.rkt +++ /dev/null @@ -1,147 +0,0 @@ -#lang racket - -;; This is a sketch of a copying collector written in Racket - -;; RVal ::= -;; | integer -;; | boolean -;; | char -;; | (list 'cons a) -;; | (list 'box a) -;; | (list 'str a) - -;; SVal ::= -;; | rval -;; | integer ; notice the overlap -;; | char - -;; a ::= (list h i) - -(define heap-size 10) -(define to (make-vector heap-size)) -(define from (make-vector heap-size)) - -(define *to-next* 0) -(define *curr* 0) -(define *type-queue* '()) - - -;; [Listof RVal] -> [Listof RVal] -(define (collect roots) - (set! *curr* 0) - (set! *to-next* 0) - (begin0 (move-roots roots) - (move-all) - (let ((tmp to)) - (set! to from) - (set! from tmp)) - (set! *from-next* *to-next*))) - -;; [Listof RVal] -> [Listof RVal] -;; EFFECT: shallowly moves data pointed to by roots to 'to' space, -;; leaving fowarding address in 'from' space -(define (move-roots rs) - (map move-root rs)) - -;; [Listof RVal] -> [Listof RVal] -;; EFFECT: shallowly moves data pointed to by root to 'to' space, -;; leaving fowarding address in 'from' space -(define (move-root r) - (match r - [(list τ (list h i)) - (match (vector-ref h i) - [(list _ (list (? to?) j)) ; fwd reference - (list τ (list to j))] - [_ - (begin0 (list τ (list to *to-next*)) - (move-obj τ i))])] - ;; not a pointer - [_ r])) - -;; Type Index -> Void -;; EFFECT: Moves object of type τ at from-i to *to-next* -;; Pushes the type on the type queue so the moved object -;; can be interpreted appropriately later -;; If any objects were not word-aligned records, -;; this would need to be adapted -(define (move-obj τ from-i) - (for ((i (size-of (list τ (list from from-i))))) - (vector-set! to *to-next* (vector-ref from (+ from-i i))) - (when (zero? i) ; fwd pointer - (vector-set! from from-i (list τ (list to *to-next*)))) - (set! *to-next* (add1 *to-next*))) - (push! τ)) - -(define to? (λ (h) (eq? h to))) - -;; -> Void -;; EFFECT: Move all objects starting from *curr*, -;; interpreting bits according to the type queue. -(define (move-all) - (let loop () - (unless (= *curr* *to-next*) - (move-curr) - (loop)))) - -;; -> Void -;; EFFECT: Move object at *curr*, interpreting bits according -;; to front of type queue. -(define (move-curr) - (let ((τ (pop!))) - (match τ - ['box (scan-word)] - ['cons (scan-word) (scan-word)] - ['str (set! *curr* (+ *curr* (add1 (vector-ref to *curr*))))]))) - -;; -> Void -;; *curr* is at the start of a value (i.e. a single word) -(define (scan-word) - (match (vector-ref to *curr*) - [(list τ (list from i)) - (match (vector-ref from i) - [(list _ (list (? to?) j)) ; fwd reference - (vector-set! to *curr* (list τ (list to j)))] - [_ - (vector-set! to *curr* (list τ (list to *to-next*))) - (move-obj τ i)])] - [_ (void)]) - - (set! *curr* (add1 *curr*))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (size-of r) - (match r - [(list 'box _) 1] - [(list 'cons _) 2] - [(list 'str (list h i)) - (add1 (vector-ref h i))])) - -(define (push! τ) - (printf "pushing ~a\n" τ) - (set! *type-queue* - (append *type-queue* (list τ)))) - -(define (pop!) - (let ((τ (car *type-queue*))) - (printf "popping ~a\n" τ) - (begin0 τ - (set! *type-queue* (cdr *type-queue*))))) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; an example - -(define roots - (list (list 'cons (list from 0)) - (list 'cons (list from 0)))) - -(vector-set! from 0 (list 'str (list from 3))) -(vector-set! from 1 (list 'box (list from 6))) ;(list 'str (list from 3))) ; sharing a string -(vector-set! from 2 3) ; dead -(vector-set! from 3 2) -(vector-set! from 4 #\a) -(vector-set! from 5 #\b) -(vector-set! from 6 #\c) - -(define *from-next* 7) diff --git a/langs/knock/info.rkt b/langs/knock/info.rkt new file mode 100644 index 00000000..41ec40bb --- /dev/null +++ b/langs/knock/info.rkt @@ -0,0 +1,2 @@ +#lang info +(define pre-install-collection "../installer.rkt") diff --git a/langs/knock/interp-file.rkt b/langs/knock/interp-file.rkt deleted file mode 100644 index 69340664..00000000 --- a/langs/knock/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/knock/interp-prims.rkt b/langs/knock/interp-prims.rkt index a1ed9ce4..15039f9f 100644 --- a/langs/knock/interp-prims.rkt +++ b/langs/knock/interp-prims.rkt @@ -1,6 +1,6 @@ #lang racket (require "ast.rkt") -(provide interp-prim1 interp-prim2) +(provide interp-prim1 interp-prim2 interp-prim3) ;; Op1 Value -> Answer (define (interp-prim1 p1 v) @@ -18,6 +18,12 @@ [(list 'car (? pair?)) (car v)] [(list 'cdr (? pair?)) (cdr v)] [(list 'empty? v) (empty? v)] + [(list 'cons? v) (cons? v)] + [(list 'box? v) (box? v)] + [(list 'vector? v) (vector? v)] + [(list 'vector-length (? vector?)) (vector-length v)] + [(list 'string? v) (string? v)] + [(list 'string-length (? string?)) (string-length v)] [_ 'err])) ;; Op2 Value Value -> Answer @@ -25,9 +31,36 @@ (match (list p v1 v2) [(list '+ (? integer?) (? integer?)) (+ v1 v2)] [(list '- (? integer?) (? integer?)) (- v1 v2)] - [(list 'eq? v1 v2) (eqv? v1 v2)] + [(list '< (? integer?) (? integer?)) (< v1 v2)] + [(list '= (? integer?) (? integer?)) (= v1 v2)] [(list 'cons v1 v2) (cons v1 v2)] - [_ 'err])) + [(list 'eq? v1 v2) (eq? v1 v2)] + [(list 'make-vector (? integer?) _) + (if (<= 0 v1) + (make-vector v1 v2) + 'err)] + [(list 'vector-ref (? vector?) (? integer?)) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-ref v1 v2) + 'err)] + [(list 'make-string (? integer?) (? char?)) + (if (<= 0 v1) + (make-string v1 v2) + 'err)] + [(list 'string-ref (? string?) (? integer?)) + (if (<= 0 v2 (sub1 (string-length v1))) + (string-ref v1 v2) + 'err)] + [_ 'err])) + +;; Op3 Value Value Value -> Answer +(define (interp-prim3 p v1 v2 v3) + (match (list p v1 v2 v3) + [(list 'vector-set! (? vector?) (? integer?) _) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-set! v1 v2 v3) + 'err)] + [_ 'err])) ;; Any -> Boolean (define (codepoint? v) diff --git a/langs/knock/interp.rkt b/langs/knock/interp.rkt deleted file mode 100644 index 28cd9b4e..00000000 --- a/langs/knock/interp.rkt +++ /dev/null @@ -1,110 +0,0 @@ -#lang racket -(provide interp interp-env interp-prim1) -(require "ast.rkt" - "env.rkt" - "interp-prims.rkt") - -;; type Answer = Value | 'err - -;; type Value = -;; | Integer -;; | Boolean -;; | Character -;; | (Fun f) -;; | Eof -;; | Void -;; | '() -;; | (cons Value Value) -;; | (box Value) - -;; type REnv = (Listof (List Id Value)) -;; type Defns = (Listof Defn) - -;; Prog Defns -> Answer -(define (interp p) - (match p - [(Prog ds e) - (interp-env e '() ds)])) - -;; Expr Env Defns -> Answer -(define (interp-env e r ds) - (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Eof) eof] - [(Empty) '()] - [(Var x) (lookup r x)] - [(Prim0 'void) (void)] - [(Prim0 'read-byte) (read-byte)] - [(Prim0 'peek-byte) (peek-byte)] - [(Prim1 p e) - (match (interp-env e r ds) - ['err 'err] - [v (interp-prim1 p v)])] - [(Prim2 p e1 e2) - (match (interp-env e1 r ds) - ['err 'err] - [v1 (match (interp-env e2 r ds) - ['err 'err] - [v2 (interp-prim2 p v1 v2)])])] - [(If p e1 e2) - (match (interp-env p r ds) - ['err 'err] - [v - (if v - (interp-env e1 r ds) - (interp-env e2 r ds))])] - [(Begin e1 e2) - (match (interp-env e1 r ds) - ['err 'err] - [_ (interp-env e2 r ds)])] - [(Let x e1 e2) - (match (interp-env e1 r ds) - ['err 'err] - [v (interp-env e2 (ext r x v) ds)])] - [(App f es) - (match (interp-env* es r ds) - [(list vs ...) - (match (defns-lookup ds f) - [(Defn f xs body) - ; arity check - (if (= (length vs) (length xs)) - (interp-env body (zip xs vs) ds) - 'err)])])] - [(Fun f) - (match (defns-lookup ds f) - [(Defn f xs body) - (lambda (es r) - (match (interp-env* es r ds) - [(list vs ...) - (if (= (length vs) (length xs)) - (interp-env body (zip xs vs) ds) - 'err)]))] - [_ 'err])] - [(FCall f es) - (match (interp-env f r ds) - [(? procedure? f) (f es r)] - [_ 'err])] - [_ 'err])) - -;; (Listof Expr) REnv Defns -> (Listof Value) | 'err -(define (interp-env* es r ds) - (match es - ['() '()] - [(cons e es) - (match (interp-env e r ds) - ['err 'err] - [v (cons v (interp-env* es r ds))])])) - -;; Defns Symbol -> Defn -(define (defns-lookup ds f) - (findf (match-lambda [(Defn g _ _) (eq? f g)]) - ds)) - -(define (zip xs ys) - (match* (xs ys) - [('() '()) '()] - [((cons x xs) (cons y ys)) - (cons (list x y) - (zip xs ys))])) diff --git a/langs/knock/io.c b/langs/knock/io.c index 8c6b713f..8a417c91 100644 --- a/langs/knock/io.c +++ b/langs/knock/io.c @@ -1,25 +1,25 @@ #include #include #include "types.h" +#include "values.h" #include "runtime.h" -int64_t read_byte(void) { +val_t read_byte(void) +{ char c = getc(in); - return (c == EOF) ? - val_eof : - (int64_t)(c << int_shift); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); } -int64_t peek_byte(void) { +val_t peek_byte(void) +{ char c = getc(in); ungetc(c, in); - return (c == EOF) ? - val_eof : - (int64_t)(c << int_shift); + return (c == EOF) ? val_wrap_eof() : val_wrap_byte(c); + } -int64_t write_byte(int64_t c) { - int64_t codepoint = c >> int_shift; - putc((char) codepoint, out); - return 0; +val_t write_byte(val_t c) +{ + putc((char) val_unwrap_int(c), out); + return val_wrap_void(); } diff --git a/langs/knock/main.c b/langs/knock/main.c index 0c743c30..1ca6115f 100644 --- a/langs/knock/main.c +++ b/langs/knock/main.c @@ -1,83 +1,40 @@ #include -#include #include -#include "types.h" +#include "values.h" +#include "print.h" #include "runtime.h" FILE* in; FILE* out; void (*error_handler)(); -int64_t *heap; +val_t *heap; -void print_result(int64_t); - -void error_exit() { +void error_exit() +{ printf("err\n"); exit(1); } -void raise_error() { +void raise_error() +{ return error_handler(); } -int main(int argc, char** argv) { +int main(int argc, char** argv) +{ in = stdin; out = stdout; error_handler = &error_exit; heap = malloc(8 * heap_size); - int64_t result = entry(heap); - // See if we need to print the initial tick - if (cons_type_tag == (ptr_type_mask & result)) printf("'"); - print_result(result); - if (result != val_void) printf("\n"); - free(heap); - return 0; -} -void print_char(int64_t); -void print_cons(int64_t); + val_t result; -void print_result(int64_t result) { - if (cons_type_tag == (ptr_type_mask & result)) { - printf("("); - print_cons(result); - printf(")"); - } else if (box_type_tag == (ptr_type_mask & result)) { - printf("#&"); - print_result (*((int64_t *)(result ^ box_type_tag))); - } else if (proc_type_tag == (ptr_type_mask & result)) { - printf(""); - } else if (int_type_tag == (int_type_mask & result)) { - printf("%" PRId64, result >> int_shift); - } else if (char_type_tag == (char_type_mask & result)) { - print_char(result); - } else { - switch (result) { - case val_true: - printf("#t"); break; - case val_false: - printf("#f"); break; - case val_eof: - printf("#"); break; - case val_empty: - printf("()"); break; - case val_void: - /* nothing */ break; - } - } -} + result = entry(heap); + + print_result(result); + if (val_typeof(result) != T_VOID) + putchar('\n'); -void print_cons(int64_t a) { - int64_t car = *((int64_t *)((a + 8) ^ cons_type_tag)); - int64_t cdr = *((int64_t *)((a + 0) ^ cons_type_tag)); - print_result(car); - if (cdr == val_empty) { - // nothing - } else if (cons_type_tag == (ptr_type_mask & cdr)) { - printf(" "); - print_cons(cdr); - } else { - printf(" . "); - print_result(cdr); - } + free(heap); + return 0; } diff --git a/langs/knock/parse.rkt b/langs/knock/parse.rkt deleted file mode 100644 index 1e54805c..00000000 --- a/langs/knock/parse.rkt +++ /dev/null @@ -1,56 +0,0 @@ -#lang racket -(provide parse parse-e) -(require "ast.rkt") - -;; S-Expr -> Prog -(define (parse s) - (match s - [(list 'begin (and ds (list 'define _ _)) ... e) - (Prog (map parse-d ds) (parse-e e))] - [e (Prog '() (parse-e e))])) - -;; S-Expr -> Defn -(define (parse-d s) - (match s - [(list 'define (list (? symbol? f) (? symbol? xs) ...) e) - (Defn f xs (parse-e e))] - [_ (error "Parse defn error" s)])) - -;; S-Expr -> Expr -(define (parse-e s) - (match s - [(? integer?) (Int s)] - [(? boolean?) (Bool s)] - [(? char?) (Char s)] - ['eof (Eof)] - [(? symbol?) (Var s)] - [(list 'quote (list)) (Empty)] - [(list 'fun f) (Fun f)] - [(list (? (op? op0) p0)) (Prim0 p0)] - [(list (? (op? op1) p1) e) (Prim1 p1 (parse-e e))] - [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse-e e1) (parse-e e2))] - [(list 'begin e1 e2) - (Begin (parse-e e1) (parse-e e2))] - [(list 'if e1 e2 e3) - (If (parse-e e1) (parse-e e2) (parse-e e3))] - [(list 'let (list (list (? symbol? x) e1)) e2) - (Let x (parse-e e1) (parse-e e2))] - [`(call ,e1 ,@es) - (FCall (parse-e e1) (map parse-e es))] - [(cons (? symbol? f) es) - (App f (map parse-e es))] - [_ (error "Parse error" s)])) - -(define op0 - '(read-byte peek-byte void)) -(define op1 - '(add1 sub1 zero? char? write-byte eof-object? - integer->char char->integer box unbox empty? car cdr - string? string-length)) -(define op2 - '(+ - eq? cons string-ref make-string)) - -(define (op? ops) - (λ (x) - (and (symbol? x) - (memq x ops)))) diff --git a/langs/shakedown/char.c b/langs/knock/print.c similarity index 80% rename from langs/shakedown/char.c rename to langs/knock/print.c index 631e02ea..acb1413b 100644 --- a/langs/shakedown/char.c +++ b/langs/knock/print.c @@ -1,43 +1,131 @@ #include #include -#include "types.h" +#include "values.h" -void print_string_char_u(int64_t v) { - printf("\\u%04X", (int)(v >> imm_shift)); +void print_char(val_char_t); +void print_codepoint(val_char_t); +void print_cons(val_cons_t *); +void print_vect(val_vect_t*); +void print_str(val_str_t*); +void print_str_char(val_char_t); +void print_result_interior(val_t); +int utf8_encode_char(val_char_t, char *); + +void print_result(val_t x) +{ + switch (val_typeof(x)) { + case T_INT: + printf("%" PRId64, val_unwrap_int(x)); + break; + case T_BOOL: + printf(val_unwrap_bool(x) ? "#t" : "#f"); + break; + case T_CHAR: + print_char(val_unwrap_char(x)); + break; + case T_EOF: + printf("#"); + break; + case T_VOID: + break; + case T_EMPTY: + case T_BOX: + case T_CONS: + case T_VECT: + printf("'"); + print_result_interior(x); + break; + case T_STR: + putchar('"'); + print_str(val_unwrap_str(x)); + putchar('"'); + break; + case T_INVALID: + printf("internal error"); + } } -void print_string_char_U(int64_t v) { - printf("\\U%08X", (int)(v >> imm_shift)); +void print_result_interior(val_t x) +{ + switch (val_typeof(x)) { + case T_EMPTY: + printf("()"); + break; + case T_BOX: + printf("#&"); + print_result_interior(val_unwrap_box(x)->val); + break; + case T_CONS: + printf("("); + print_cons(val_unwrap_cons(x)); + printf(")"); + break; + case T_VECT: + print_vect(val_unwrap_vect(x)); + break; + default: + print_result(x); + } } -void print_codepoint(int64_t v) { - int64_t codepoint = v >> imm_shift; - // Print using UTF-8 encoding of codepoint - // https://en.wikipedia.org/wiki/UTF-8 - if (codepoint < 128) { - printf("%c", (char) codepoint); - } else if (codepoint < 2048) { - printf("%c%c", - (char)(codepoint >> 6) | 192, - ((char)codepoint & 63) | 128); - } else if (codepoint < 65536) { - printf("%c%c%c", - (char)(codepoint >> 12) | 224, - ((char)(codepoint >> 6) & 63) | 128, - ((char)codepoint & 63) | 128); - } else { - printf("%c%c%c%c", - (char)(codepoint >> 18) | 240, - ((char)(codepoint >> 12) & 63) | 128, - ((char)(codepoint >> 6) & 63) | 128, - ((char)codepoint & 63) | 128); +void print_vect(val_vect_t *v) +{ + uint64_t i; + + if (!v) { printf("#()"); return; } + + printf("#("); + for (i = 0; i < v->len; ++i) { + print_result_interior(v->elems[i]); + + if (i < v->len - 1) + putchar(' '); + } + printf(")"); +} + +void print_cons(val_cons_t *cons) +{ + print_result_interior(cons->fst); + + switch (val_typeof(cons->snd)) { + case T_EMPTY: + // nothing + break; + case T_CONS: + printf(" "); + print_cons(val_unwrap_cons(cons->snd)); + break; + default: + printf(" . "); + print_result_interior(cons->snd); + break; } } -void print_string_char(int64_t v) { - switch (v >> imm_shift) { +void print_str(val_str_t* s) +{ + if (!s) return; + uint64_t i; + for (i = 0; i < s->len; ++i) + print_str_char(s->codepoints[i]); +} + +void print_str_char_u(val_char_t c) +{ + printf("\\u%04X", c); +} + +void print_str_char_U(val_char_t c) +{ + printf("\\U%08X", c); +} + +void print_str_char(val_char_t c) +{ + switch (c) { case 0 ... 6: - print_string_char_u(v); + print_str_char_u(c); break; case 7: printf("\\a"); @@ -61,13 +149,13 @@ void print_string_char(int64_t v) { printf("\\r"); break; case 14 ... 26: - print_string_char_u(v); + print_str_char_u(c); break; case 27: printf("\\e"); break; case 28 ... 31: - print_string_char_u(v); + print_str_char_u(c); break; case 34: printf("\\\""); @@ -451,7 +539,7 @@ void print_string_char(int64_t v) { case 65511 ... 65511: case 65519 ... 65531: case 65534 ... 65535: - print_string_char_u(v); + print_str_char_u(c); break; case 65548 ... 65548: case 65575 ... 65575: @@ -683,10 +771,69 @@ void print_string_char(int64_t v) { case 178206 ... 194559: case 195102 ... 917759: case 918000 ... 1114110: - print_string_char_U(v); + print_str_char_U(c); break; default: - print_codepoint(v); + print_codepoint(c); break; } } + +void print_char(val_char_t c) +{ + printf("#\\"); + switch (c) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(c); + } +} + +void print_codepoint(val_char_t c) +{ + char buffer[5] = {0}; + utf8_encode_char(c, buffer); + printf("%s", buffer); +} + +int utf8_encode_char(val_char_t c, char *buffer) +{ + // Output to buffer using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (c < 128) { + buffer[0] = (char) c; + return 1; + } else if (c < 2048) { + buffer[0] = (char)(c >> 6) | 192; + buffer[1] = ((char) c & 63) | 128; + return 2; + } else if (c < 65536) { + buffer[0] = (char)(c >> 12) | 224; + buffer[1] = ((char)(c >> 6) & 63) | 128; + buffer[2] = ((char) c & 63) | 128; + return 3; + } else { + buffer[0] = (char)(c >> 18) | 240; + buffer[1] = ((char)(c >> 12) & 63) | 128; + buffer[2] = ((char)(c >> 6) & 63) | 128; + buffer[3] = ((char) c & 63) | 128; + return 4; + } +} diff --git a/langs/jig-playground/print.h b/langs/knock/print.h similarity index 100% rename from langs/jig-playground/print.h rename to langs/knock/print.h diff --git a/langs/hoax/test/build-runtime.rkt b/langs/knock/test/build-runtime.rkt similarity index 100% rename from langs/hoax/test/build-runtime.rkt rename to langs/knock/test/build-runtime.rkt diff --git a/langs/knock/test/compile.rkt b/langs/knock/test/compile.rkt deleted file mode 100644 index 00666520..00000000 --- a/langs/knock/test/compile.rkt +++ /dev/null @@ -1,18 +0,0 @@ -#lang racket -(require "test-runner.rkt" - "../parse.rkt" - "../compile.rkt" - "../unload-bits-asm.rkt" - a86/interp) - -;; link with runtime for IO operations -(unless (file-exists? "../runtime.o") - (system "make -C .. runtime.o")) -(current-objs - (list (path->string (normalize-path "../runtime.o")))) - -(test-runner (λ (e) (unload/free (asm-interp (compile (parse e)))))) -(test-runner-io (λ (e s) - (match (asm-interp/io (compile (parse e)) s) - ['err 'err] - [(cons r o) (cons (unload/free r) o)]))) diff --git a/langs/knock/test/test-runner.rkt b/langs/knock/test/test-runner.rkt deleted file mode 100644 index 36634471..00000000 --- a/langs/knock/test/test-runner.rkt +++ /dev/null @@ -1,196 +0,0 @@ -#lang racket -(provide test-runner test-runner-io) -(require rackunit) - -(define (test-runner run) - ;; Abscond examples - (check-equal? (run 7) 7) - (check-equal? (run -8) -8) - - ;; Blackmail examples - (check-equal? (run '(add1 (add1 7))) 9) - (check-equal? (run '(add1 (sub1 7))) 7) - - ;; Con examples - (check-equal? (run '(if (zero? 0) 1 2)) 1) - (check-equal? (run '(if (zero? 1) 1 2)) 2) - (check-equal? (run '(if (zero? -7) 1 2)) 2) - (check-equal? (run '(if (zero? 0) - (if (zero? 1) 1 2) - 7)) - 2) - (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) - (if (zero? 1) 1 2) - 7)) - 7) - - ;; Dupe examples - (check-equal? (run #t) #t) - (check-equal? (run #f) #f) - (check-equal? (run (if #t 1 2)) 1) - (check-equal? (run (if #f 1 2)) 2) - (check-equal? (run (if 0 1 2)) 1) - (check-equal? (run '(if #t 3 4)) 3) - (check-equal? (run '(if #f 3 4)) 4) - (check-equal? (run '(if 0 3 4)) 3) - (check-equal? (run '(zero? 4)) #f) - (check-equal? (run '(zero? 0)) #t) - ;; Dodger examples - (check-equal? (run #\a) #\a) - (check-equal? (run #\b) #\b) - (check-equal? (run '(char? #\a)) #t) - (check-equal? (run '(char? #t)) #f) - (check-equal? (run '(char? 8)) #f) - (check-equal? (run '(char->integer #\a)) (char->integer #\a)) - (check-equal? (run '(integer->char 955)) #\λ) - ;; Extort examples - (check-equal? (run '(add1 #f)) 'err) - (check-equal? (run '(sub1 #f)) 'err) - (check-equal? (run '(zero? #f)) 'err) - (check-equal? (run '(char->integer #f)) 'err) - (check-equal? (run '(integer->char #f)) 'err) - (check-equal? (run '(integer->char -1)) 'err) - (check-equal? (run '(write-byte #f)) 'err) - (check-equal? (run '(write-byte -1)) 'err) - (check-equal? (run '(write-byte 256)) 'err) - ;; Fraud examples - (check-equal? (run '(let ((x 7)) x)) 7) - (check-equal? (run '(let ((x 7)) 2)) 2) - (check-equal? (run '(let ((x 7)) (add1 x))) 8) - (check-equal? (run '(let ((x (add1 7))) x)) 8) - (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) - (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) - (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) - - (check-equal? (run '(let ((x 0)) - (if (zero? x) 7 8))) - 7) - (check-equal? (run '(let ((x 1)) - (add1 (if (zero? x) 7 8)))) - 9) - (check-equal? (run '(+ 3 4)) 7) - (check-equal? (run '(- 3 4)) -1) - (check-equal? (run '(+ (+ 2 1) 4)) 7) - (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) - (check-equal? (run '(let ((x (+ 1 2))) - (let ((z (- 4 x))) - (+ (+ x x) z)))) - 7) - ;; Hustle examples - (check-equal? (run ''()) '()) - (check-equal? (run '(box 1)) (box 1)) - (check-equal? (run '(cons 1 2)) (cons 1 2)) - (check-equal? (run '(unbox (box 1))) 1) - (check-equal? (run '(car (cons 1 2))) 1) - (check-equal? (run '(cdr (cons 1 2))) 2) - (check-equal? (run '(cons 1 '())) (list 1)) - (check-equal? (run '(let ((x (cons 1 2))) - (begin (cdr x) - (car x)))) - 1) - (check-equal? (run '(let ((x (cons 1 2))) - (let ((y (box 3))) - (unbox y)))) - 3) - ;; Iniquity tests - (check-equal? (run - '(begin (define (f x) x) - (f 5))) - 5) - (check-equal? (run - '(begin (define (tri x) - (if (zero? x) - 0 - (+ x (tri (sub1 x))))) - (tri 9))) - 45) - ;; Knock tests - (check-equal? (run - '(begin (define (f x) x) - (call (fun f) 42))) - 42) - (check-equal? (run - '(begin (define (f x) x) - (define (g x) x) - (call (car (cons (fun f) (cons (fun g) '()))) 42))) - 42) -#| - (check-equal? (run - '(begin (define (even? x) - (if (zero? x) - #t - (odd? (sub1 x)))) - (define (odd? x) - (if (zero? x) - #f - (even? (sub1 x)))) - (even? 101))) - #f) - - (check-equal? (run - '(begin (define (map-add1 xs) - (if (empty? xs) - '() - (cons (add1 (car xs)) - (map-add1 (cdr xs))))) - (map-add1 (cons 1 (cons 2 (cons 3 '())))))) - '(2 3 4))|#) - -(define (test-runner-io run) - ;; Evildoer examples - (check-equal? (run 7 "") (cons 7 "")) - (check-equal? (run '(write-byte 97) "") (cons (void) "a")) - (check-equal? (run '(read-byte) "a") (cons 97 "")) - (check-equal? (run '(begin (write-byte 97) (read-byte)) "b") - (cons 98 "a")) - (check-equal? (run '(read-byte) "") (cons eof "")) - (check-equal? (run '(eof-object? (read-byte)) "") (cons #t "")) - (check-equal? (run '(eof-object? (read-byte)) "a") (cons #f "")) - (check-equal? (run '(begin (write-byte 97) (write-byte 98)) "") - (cons (void) "ab")) - - (check-equal? (run '(peek-byte) "ab") (cons 97 "")) - (check-equal? (run '(begin (peek-byte) (read-byte)) "ab") (cons 97 "")) - ;; Extort examples - (check-equal? (run '(write-byte #t) "") (cons 'err "")) - - ;; Fraud examples - (check-equal? (run '(let ((x 97)) (write-byte x)) "") (cons (void) "a")) - (check-equal? (run '(let ((x 97)) - (begin (write-byte x) - x)) - "") - (cons 97 "a")) - (check-equal? (run '(let ((x 97)) (begin (read-byte) x)) "b") - (cons 97 "")) - (check-equal? (run '(let ((x 97)) (begin (peek-byte) x)) "b") - (cons 97 "")) - - ;; Hustle examples - (check-equal? (run '(let ((x 1)) - (begin (write-byte 97) - 1)) - "") - (cons 1 "a")) - - (check-equal? (run '(let ((x 1)) - (let ((y 2)) - (begin (write-byte 97) - 1))) - "") - (cons 1 "a")) - - (check-equal? (run '(let ((x (cons 1 2))) - (begin (write-byte 97) - (car x))) - "") - (cons 1 "a")) - ;; Iniquity examples - (check-equal? (run '(begin (define (print-alphabet i) - (if (zero? i) - (void) - (begin (write-byte (- 123 i)) - (print-alphabet (sub1 i))))) - (print-alphabet 26)) - "") - (cons (void) "abcdefghijklmnopqrstuvwxyz"))) diff --git a/langs/knock/types.h b/langs/knock/types.h index cffe4ea4..b79f45b2 100644 --- a/langs/knock/types.h +++ b/langs/knock/types.h @@ -1,3 +1,6 @@ +#ifndef TYPES_H +#define TYPES_H + /* Bit layout of values @@ -13,18 +16,13 @@ - Eof: #b10 11 000 - Void: #b11 11 000 - Empty: #b100 11 000 - - Pointers are - - Box: end in #b001 - - Cons: end in #b010 - - Proc: end in #b100 */ #define imm_shift 3 #define ptr_type_mask ((1 << imm_shift) - 1) -#define ptr_addr_mask ~ptr_type_mask #define box_type_tag 1 #define cons_type_tag 2 -#define proc_type_tag 4 +#define vect_type_tag 3 +#define str_type_tag 4 #define int_shift (1 + imm_shift) #define int_type_mask ((1 << int_shift) - 1) #define int_type_tag (0 << (int_shift - 1)) @@ -38,3 +36,5 @@ #define val_eof ((2 << char_shift) | nonchar_type_tag) #define val_void ((3 << char_shift) | nonchar_type_tag) #define val_empty ((4 << char_shift) | nonchar_type_tag) + +#endif diff --git a/langs/knock/types.rkt b/langs/knock/types.rkt deleted file mode 100644 index 033f7f57..00000000 --- a/langs/knock/types.rkt +++ /dev/null @@ -1,59 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -(define imm-shift 3) -(define imm-mask #b111) -(define ptr-mask #b111) -(define type-box #b001) -(define type-cons #b010) -(define type-proc #b100) -(define int-shift (+ 1 imm-shift)) -(define char-shift (+ 2 imm-shift)) -(define type-int #b0000) -(define mask-int #b1111) -(define type-char #b01000) -(define mask-char #b11111) -(define val-true #b0011000) -(define val-false #b0111000) -(define val-eof #b1011000) -(define val-void #b1111000) -(define val-empty #b10011000) - -(define (bits->imm b) - (cond [(= type-int (bitwise-and b mask-int)) - (arithmetic-shift b (- int-shift))] - [(= type-char (bitwise-and b mask-char)) - (integer->char (arithmetic-shift b (- char-shift)))] - [(= b val-true) #t] - [(= b val-false) #f] - [(= b val-eof) eof] - [(= b val-void) (void)] - [(= b val-empty) '()] - [else (error "invalid bits")])) - -(define (imm->bits v) - (cond [(eof-object? v) val-eof] - [(integer? v) (arithmetic-shift v int-shift)] - [(char? v) - (bitwise-ior type-char - (arithmetic-shift (char->integer v) char-shift))] - [(eq? v #t) val-true] - [(eq? v #f) val-false] - [(void? v) val-void] - [(empty? v) val-empty])) - - -(define (imm-bits? v) - (zero? (bitwise-and v imm-mask))) - -(define (int-bits? v) - (zero? (bitwise-and v mask-int))) - -(define (char-bits? v) - (= type-char (bitwise-and v mask-char))) - -(define (cons-bits? v) - (zero? (bitwise-xor (bitwise-and v imm-mask) type-cons))) - -(define (box-bits? v) - (zero? (bitwise-xor (bitwise-and v imm-mask) type-box))) diff --git a/langs/knock/unload-bits-asm.rkt b/langs/knock/unload-bits-asm.rkt deleted file mode 100644 index 4946fe26..00000000 --- a/langs/knock/unload-bits-asm.rkt +++ /dev/null @@ -1,28 +0,0 @@ -#lang racket -(provide unload/free unload-value) -(require "types.rkt" - ffi/unsafe) - -;; Answer* -> Answer -(define (unload/free a) - (match a - ['err 'err] - [(cons h v) (begin0 (unload-value v) - (free h))])) - -;; Value* -> Value -(define (unload-value v) - (match v - [(? imm-bits?) (bits->imm v)] - [(? box-bits? i) - (box (unload-value (heap-ref i)))] - [(? cons-bits? i) - (cons (unload-value (heap-ref (+ i (arithmetic-shift 1 imm-shift)))) - (unload-value (heap-ref i)))])) - -(define (untag i) - (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) - (integer-length ptr-mask))) - -(define (heap-ref i) - (ptr-ref (cast (untag i) _int64 _pointer) _uint64)) diff --git a/langs/knock/values.c b/langs/knock/values.c new file mode 100644 index 00000000..62bca18c --- /dev/null +++ b/langs/knock/values.c @@ -0,0 +1,112 @@ +#include "types.h" +#include "values.h" + +type_t val_typeof(val_t x) +{ + switch (x & ptr_type_mask) { + case box_type_tag: + return T_BOX; + case cons_type_tag: + return T_CONS; + case vect_type_tag: + return T_VECT; + case str_type_tag: + return T_STR; + } + + if ((int_type_mask & x) == int_type_tag) + return T_INT; + if ((char_type_mask & x) == char_type_tag) + return T_CHAR; + + switch (x) { + case val_true: + case val_false: + return T_BOOL; + case val_eof: + return T_EOF; + case val_void: + return T_VOID; + case val_empty: + return T_EMPTY; + } + + return T_INVALID; +} + +int64_t val_unwrap_int(val_t x) +{ + return x >> int_shift; +} +val_t val_wrap_byte(unsigned char b) +{ + return (b << int_shift) | int_type_tag; +} +val_t val_wrap_int(int64_t i) +{ + return (i << int_shift) | int_type_tag; +} + +int val_unwrap_bool(val_t x) +{ + return x == val_true; +} +val_t val_wrap_bool(int b) +{ + return b ? val_true : val_false; +} + +val_char_t val_unwrap_char(val_t x) +{ + return (val_char_t)(x >> char_shift); +} +val_t val_wrap_char(val_char_t c) +{ + return (((val_t)c) << char_shift) | char_type_tag; +} + +val_t val_wrap_eof(void) +{ + return val_eof; +} + +val_t val_wrap_void(void) +{ + return val_void; +} + +val_box_t* val_unwrap_box(val_t x) +{ + return (val_box_t *)(x ^ box_type_tag); +} +val_t val_wrap_box(val_box_t* b) +{ + return ((val_t)b) | box_type_tag; +} + +val_cons_t* val_unwrap_cons(val_t x) +{ + return (val_cons_t *)(x ^ cons_type_tag); +} +val_t val_wrap_cons(val_cons_t *c) +{ + return ((val_t)c) | cons_type_tag; +} + +val_vect_t* val_unwrap_vect(val_t x) +{ + return (val_vect_t *)(x ^ vect_type_tag); +} +val_t val_wrap_vect(val_vect_t *v) +{ + return ((val_t)v) | vect_type_tag; +} + +val_str_t* val_unwrap_str(val_t x) +{ + return (val_str_t *)(x ^ str_type_tag); +} +val_t val_wrap_str(val_str_t *v) +{ + return ((val_t)v) | str_type_tag; +} diff --git a/langs/knock/values.h b/langs/knock/values.h new file mode 100644 index 00000000..b6ac44f9 --- /dev/null +++ b/langs/knock/values.h @@ -0,0 +1,76 @@ +#ifndef VALUES_H +#define VALUES_H + +#include + +/* any abstract value */ +typedef int64_t val_t; + +typedef enum type_t { + T_INVALID = -1, + /* immediates */ + T_INT, + T_BOOL, + T_CHAR, + T_EOF, + T_VOID, + T_EMPTY, + /* pointers */ + T_BOX, + T_CONS, + T_VECT, + T_STR, +} type_t; + +typedef uint32_t val_char_t; +typedef struct val_box_t { + val_t val; +} val_box_t; +typedef struct val_cons_t { + val_t snd; + val_t fst; +} val_cons_t; +typedef struct val_vect_t { + uint64_t len; + val_t elems[]; +} val_vect_t; +typedef struct val_str_t { + uint64_t len; + val_char_t codepoints[]; +} val_str_t; + +/* return the type of x */ +type_t val_typeof(val_t x); + +/** + * Wrap/unwrap values + * + * The behavior of unwrap functions are undefined on type mismatch. + */ +int64_t val_unwrap_int(val_t x); +val_t val_wrap_int(int64_t i); +val_t val_wrap_byte(unsigned char b); + +int val_unwrap_bool(val_t x); +val_t val_wrap_bool(int b); + +val_char_t val_unwrap_char(val_t x); +val_t val_wrap_char(val_char_t b); + +val_t val_wrap_eof(); + +val_t val_wrap_void(); + +val_box_t* val_unwrap_box(val_t x); +val_t val_wrap_box(val_box_t* b); + +val_cons_t* val_unwrap_cons(val_t x); +val_t val_wrap_cons(val_cons_t* c); + +val_vect_t* val_unwrap_vect(val_t x); +val_t val_wrap_vect(val_vect_t* c); + +val_str_t* val_unwrap_str(val_t x); +val_t val_wrap_str(val_str_t* c); + +#endif diff --git a/langs/loot/Makefile b/langs/loot/Makefile index 7b1934bc..db6ac44a 100644 --- a/langs/loot/Makefile +++ b/langs/loot/Makefile @@ -1,32 +1,44 @@ UNAME := $(shell uname) -.PHONY: test ifeq ($(UNAME), Darwin) format=macho64 + CC=arch -x86_64 gcc else format=elf64 + CC=gcc endif -%.run: %.o runtime.o - gcc runtime.o $< -o $@ +objs = \ + main.o \ + print.o \ + values.o \ + io.o + +default: submit.zip -runtime.o: main.o char.o io.o - ld -r main.o char.o io.o -o runtime.o +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* -main.o: main.c types.h runtime.h - gcc -fPIC -c main.c -o main.o +runtime.o: $(objs) + ld -r $(objs) -o runtime.o -char.o: char.c types.h - gcc -fPIC -c char.c -o char.o +%.run: %.o runtime.o + $(CC) runtime.o $< -o $@ -io.o: io.c runtime.h - gcc -fPIC -c io.c -o io.o +.c.o: + $(CC) -fPIC -c -g -o $@ $< -%.o: %.s - nasm -f $(format) -o $@ $< +.s.o: + nasm -g -f $(format) -o $@ $< %.s: %.rkt - racket -t compile-file.rkt -m $< > $@ + cat $< | racket -t compile-stdin.rkt -m > $@ clean: - rm *.o *.s *.run + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" + +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/loot/ast.rkt b/langs/loot/ast.rkt index a84a0600..b430b22d 100644 --- a/langs/loot/ast.rkt +++ b/langs/loot/ast.rkt @@ -7,228 +7,69 @@ ;; type Defn = (Defn Id (Listof Id) Expr) (struct Defn (f xs e) #:prefab) -;; Differences from Knock -;; -;; * We _remove_: -;; - `Fun` -;; - `Call` -;; -;; * We add -;; - `Lam` -;; -;; * We change: -;; - `App` -;; ;; type Expr = (Eof) ;; | (Empty) ;; | (Int Integer) ;; | (Bool Boolean) ;; | (Char Character) +;; | (Str String) ;; | (Prim0 Op0) ;; | (Prim1 Op1 Expr) -;; | (Prim1 Op2 Op2 Expr) +;; | (Prim2 Op2 Expr Expr) +;; | (Prim3 Op3 Expr Expr Expr) ;; | (If Expr Expr Expr) ;; | (Begin Expr Expr) ;; | (Let Id Expr Expr) -;; | LetRec (Binding list) Expr <--- New for Loot (See the lecture notes!) -;; | Lam Name [Variable] Expr <--- New for Loot ;; | (Var Id) -;; | (App Expr (Listof Expr)) <--- Changed from Knock +;; | (Match Expr (Listof Pat) (Listof Expr)) +;; | (App Expr (Listof Expr)) +;; | (Lam Id (Listof Id) Expr) ;; type Id = Symbol -;; type Op0 = 'read-byte | 'void | 'collect-garbage +;; type Op0 = 'read-byte ;; type Op1 = 'add1 | 'sub1 | 'zero? ;; | 'char? | 'integer->char | 'char->integer ;; | 'write-byte | 'eof-object? ;; | 'box | 'car | 'cdr | 'unbox -;; | 'empty? -;; type Op2 = '+ | '- | 'eq? +;; | 'empty? | 'cons? | 'box? +;; | 'vector? | 'vector-length +;; | 'string? | 'string-length +;; type Op2 = '+ | '- | '< | '= ;; | 'cons -(struct Eof () #:prefab) -(struct Empty () #:prefab) -(struct Int (i) #:prefab) -(struct Bool (b) #:prefab) -(struct Char (c) #:prefab) -(struct Prim0 (p) #:prefab) -(struct Prim1 (p e) #:prefab) -(struct Prim2 (p e1 e2) #:prefab) -(struct If (e1 e2 e3) #:prefab) -(struct Begin (e1 e2) #:prefab) -(struct Let (x e1 e2) #:prefab) -(struct LetRec (bs e1) #:prefab) -(struct Lam (n xs e) #:prefab) -(struct Var (x) #:prefab) -(struct App (f es) #:prefab) - - -;; For pedagogical purposes -(struct Closure (xs e r) #:prefab) -(struct RecClosure (f r) #:prefab) - -;; Helper functions - -;; Does an Expr represent an immediate (i.e. flat) value? -;; Expr -> Bool -(define (imm? e) - (match e - [(Int i) #t] - [(Bool b) #t] - [(Char c) #t] - [(Eof) #t] - [(Empty) #t] - [_ #f])) - -;; Get the 'actual' value out of an immediate. -;; Expr -> Imm -(define (get-imm e) - (match e - [(Int i) i] - [(Bool b) b] - [(Char c) c] - [(Eof) eof] - [(Empty) '()] - [_ (error (~a "get-imm: " e " is not an immedate!"))])) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Free Variables -;; -;; Expr -> [Var] -(define (fvs e) - (define (fvs e) - (match e - [(Prim1 p e) (fvs e)] - [(Prim2 p e1 e2) (append (fvs e1) (fvs e2))] - [(If e1 e2 e3) (append (fvs e1) (fvs e2) (fvs e3))] - [(Begin e1 e2) (append (fvs e1) (fvs e2))] - [(Let x e1 e2) (append (fvs e1) (remq* (list x) (fvs e2)))] - [(LetRec bs e1) (let ((bound (map car bs)) - (def-fvs (append-map fvs-bind bs))) - (remq* bound (append def-fvs (fvs e1))))] - [(Lam n xs e1) (remq* xs (fvs e1))] - [(Var x) (list x)] - [(App f es) (append (fvs f) (append-map fvs es))] - [_ '()])) - (remove-duplicates (fvs e))) - - - - - -(define (fvs-bind d) - (match d - [(list x e1) (fvs e1)])) - - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Desugaring Definitions -;; -;; Now that we have lambdas, we can actually treat user-defined functions -;; as syntactic sugar for lambdas. For example: -;; -;; (begin -;; (define (f x) (+ x x)) -;; (f 42)) -;; -;; Can be transformed to: -;; -;; (let ((f (lambda (x) (+ x x)))) -;; (f 42)) -;; -;; That's not _quite_ enough, as top-level functions can refer to each other: -;; -;; (begin -;; (define (f x) (+ x x)) -;; (define (g y) (+ (f y) y)) -;; (g 42)) -;; -;; Becomes: -;; -;; (letrec ((f (lambda (x) (+ x x))) -;; (g (lambda (y) (+ (f y) y)))) -;; (g 42)) -;; -;; Since we can represent our programs using this 'more fundamental' feature -;; we can always _desugar_ from the nice-to-write version to the more -;; fundamental version. -;; -;; Prog -> Prog -(define (desugar e+) - (match e+ - [(Prog '() e) (Prog '() (desugar e))] - [(Prog ds e) (let ((defs (map desugar ds))) - (Prog '() (LetRec defs e)))] - [(Defn f xs e) (list f (Lam f xs e))] - [(Prim1 p e) (Prim1 p (desugar e))] - [(Prim2 p e1 e2) (Prim2 p (desugar e1) (desugar e2))] - [(If e1 e2 e3) (If (desugar e1) (desugar e2) (desugar e3))] - [(Begin e1 e2) (Begin (desugar e1) (desugar e2))] - [(Let x e1 e2) (Let x (desugar e1) (desugar e2))] - [(LetRec bs e1) (LetRec (map (lambda (xs) (map desugar xs)) bs) (desugar e1))] - [(Lam n xs e) (Lam (gensym 'lam) xs (desugar e))] - [(App f es) (App (desugar f) (map desugar es))] - [_ e+])) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Labelling Lambdas -;; -;; Each lambda in a program needs to have a unique name so that we know what -;; code we need to jump to when that lambda is 'called'. -;; Luckily, `gensym` provides all the functionality that we need here. -;; -;; The flat values are easy: no possibility of there being a lambda, so -;; we just return the unaltered expression. For everything else we traverse -;; down the structure, the only case that actually 'does' anything is -;; for `Lam` -;; -;; Prog -> Prog -(define (label-λ e) - (match e - [(Prog ds e) (Prog (map label-λ ds) (label-λ e))] - [(Defn f xs e) (Defn f xs (label-λ e))] - [(Prim1 p e) (Prim1 p (label-λ e))] - [(Prim2 p e1 e2) (Prim2 p (label-λ e1) (label-λ e2))] - [(If e1 e2 e3) (If (label-λ e1) (label-λ e2) (label-λ e3))] - [(Begin e1 e2) (Begin (label-λ e1) (label-λ e2))] - [(Let x e1 e2) (Let x (label-λ e1) (label-λ e2))] - [(LetRec bs e1) (LetRec (map (lambda (xs) (map label-λ xs)) bs) (label-λ e1))] - [(Lam '() xs e) (Lam (gensym 'lam) xs (label-λ e))] - [(Lam n xs e) (Lam (gensym n) xs (label-λ e))] - [(App f es) (App (label-λ f) (map label-λ es))] - [_ e])) - -;; For those that struggle with typing unicode -(define label-lambda label-λ) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Collecting all Lambdas -;; -;; While the lambdas could be _written_ anywhere in the source code, we do need -;; to write the generated target code somewhere reliable. There are a few ways -;; to do this, but we've decided to take the most straightforward route: collect -;; the lambdas and treat them as 'additional' function definitions. -;; -;; In order to do this we'll need a list of all the lambdas in a program. -;; This function traverses our program and collects all the lambdas. -;; -;; Prog -> [Expr] -(define (λs e) - (match e - [(Prog ds e) (append (append-map λs ds) (λs e))] - [(Defn f xs e) (λs e)] - [(Prim1 p e) (λs e)] - [(Prim2 p e1 e2) (append (λs e1) (λs e2))] - [(If e1 e2 e3) (append (λs e1) (λs e2) (λs e3))] - [(Begin e1 e2) (append (λs e1) (λs e2))] - [(Let x e1 e2) (append (λs e1) (λs e2))] - [(LetRec bs e1) (append (append-map lambda-defs bs) (λs e1))] - [(Lam n xs e1) (cons e (λs e1))] - [(App f es) (append (λs f) (append-map λs es))] - [_ '()])) - -(define (lambda-defs d) - (match d - [(list x e) (λs e)])) - -;; For those that struggle with typing unicode -(define lambdas λs) +;; | 'make-vector | 'vector-ref +;; | 'make-string | 'string-ref +;; type Op3 = 'vector-set! +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() + +(struct Eof () #:prefab) +(struct Empty () #:prefab) +(struct Int (i) #:prefab) +(struct Bool (b) #:prefab) +(struct Char (c) #:prefab) +(struct Str (s) #:prefab) +(struct Prim0 (p) #:prefab) +(struct Prim1 (p e) #:prefab) +(struct Prim2 (p e1 e2) #:prefab) +(struct Prim3 (p e1 e2 e3) #:prefab) +(struct If (e1 e2 e3) #:prefab) +(struct Begin (e1 e2) #:prefab) +(struct Let (x e1 e2) #:prefab) +(struct Var (x) #:prefab) +(struct App (e es) #:prefab) +(struct Lam (f xs e) #:prefab) +(struct Match (e ps es) #:prefab) + +(struct PVar (x) #:prefab) +(struct PWild () #:prefab) +(struct PLit (x) #:prefab) +(struct PBox (p) #:prefab) +(struct PCons (p1 p2) #:prefab) +(struct PAnd (p1 p2) #:prefab) diff --git a/langs/loot/build-list-cons-rec.rkt b/langs/loot/build-list-cons-rec.rkt new file mode 100644 index 00000000..5c845e07 --- /dev/null +++ b/langs/loot/build-list-cons-rec.rkt @@ -0,0 +1,8 @@ +#lang racket +(define (repeat n v) + (if (zero? n) + '() + (cons v (repeat (sub1 n) v)))) + +(repeat 10 42) + diff --git a/langs/loot/build-runtime.rkt b/langs/loot/build-runtime.rkt new file mode 100644 index 00000000..66aad89f --- /dev/null +++ b/langs/loot/build-runtime.rkt @@ -0,0 +1,14 @@ +#lang racket +(require racket/runtime-path) +(provide runtime-path) + +(define-runtime-path here ".") + +(void + (system (string-append "make -C '" + (path->string (normalize-path here)) + "' runtime.o"))) + +(define runtime-path + (path->string + (normalize-path (build-path here "runtime.o")))) diff --git a/langs/loot/compile-file.rkt b/langs/loot/compile-file.rkt deleted file mode 100644 index 988e3121..00000000 --- a/langs/loot/compile-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "compile.rkt" a86/printer) - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) - (close-input-port p)))) diff --git a/langs/hoax/compile-ops.rkt b/langs/loot/compile-ops.rkt similarity index 61% rename from langs/hoax/compile-ops.rkt rename to langs/loot/compile-ops.rkt index e3b9e00e..ff8a6b2d 100644 --- a/langs/hoax/compile-ops.rkt +++ b/langs/loot/compile-ops.rkt @@ -9,68 +9,68 @@ (define r8 'r8) ; scratch (define r9 'r9) ; scratch (define r10 'r10) ; scratch +(define r15 'r15) ; stack pad (non-volatile) (define rsp 'rsp) ; stack -;; Op0 CEnv -> Asm -(define (compile-op0 p c) +;; Op0 -> Asm +(define (compile-op0 p) (match p - ['void (seq (Mov rax val-void))] - ['read-byte (seq (pad-stack c) + ['void (seq (Mov rax (value->bits (void))))] + ['read-byte (seq pad-stack (Call 'read_byte) - (unpad-stack c))] - ['peek-byte (seq (pad-stack c) + unpad-stack)] + ['peek-byte (seq pad-stack (Call 'peek_byte) - (unpad-stack c))])) + unpad-stack)])) -;; Op1 CEnv -> Asm -(define (compile-op1 p c) +;; Op1 -> Asm +(define (compile-op1 p) (match p ['add1 - (seq (assert-integer rax c) - (Add rax (imm->bits 1)))] + (seq (assert-integer rax) + (Add rax (value->bits 1)))] ['sub1 - (seq (assert-integer rax c) - (Sub rax (imm->bits 1)))] + (seq (assert-integer rax) + (Sub rax (value->bits 1)))] ['zero? - (seq (assert-integer rax c) - (eq-imm 0))] + (seq (assert-integer rax) + (eq-value 0))] ['char? (type-pred mask-char type-char)] ['char->integer - (seq (assert-char rax c) + (seq (assert-char rax) (Sar rax char-shift) (Sal rax int-shift))] ['integer->char - (seq (assert-codepoint c) + (seq (assert-codepoint rax) (Sar rax int-shift) (Sal rax char-shift) (Xor rax type-char))] - ['eof-object? (eq-imm eof)] + ['eof-object? (eq-value eof)] ['write-byte - (seq (assert-byte c) - (pad-stack c) + (seq (assert-byte rax) + pad-stack (Mov rdi rax) (Call 'write_byte) - (unpad-stack c) - (Mov rax val-void))] + unpad-stack)] ['box (seq (Mov (Offset rbx 0) rax) (Mov rax rbx) (Or rax type-box) (Add rbx 8))] ['unbox - (seq (assert-box rax c) + (seq (assert-box rax) (Xor rax type-box) (Mov rax (Offset rax 0)))] ['car - (seq (assert-cons rax c) + (seq (assert-cons rax) (Xor rax type-cons) (Mov rax (Offset rax 8)))] ['cdr - (seq (assert-cons rax c) + (seq (assert-cons rax) (Xor rax type-cons) (Mov rax (Offset rax 0)))] - ['empty? (eq-imm '())] + ['empty? (eq-value '())] ['box? (type-pred ptr-mask type-box)] ['cons? @@ -82,7 +82,7 @@ ['vector-length (let ((zero (gensym)) (done (gensym))) - (seq (assert-vector rax c) + (seq (assert-vector rax) (Xor rax type-vect) (Cmp rax 0) (Je zero) @@ -95,7 +95,7 @@ ['string-length (let ((zero (gensym)) (done (gensym))) - (seq (assert-string rax c) + (seq (assert-string rax) (Xor rax type-str) (Cmp rax 0) (Je zero) @@ -106,40 +106,32 @@ (Mov rax 0) (Label done)))])) -;; Op2 CEnv -> Asm -(define (compile-op2 p c) +;; Op2 -> Asm +(define (compile-op2 p) (match p ['+ (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Add rax r8))] ['- (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Sub r8 rax) (Mov rax r8))] ['< (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Cmp r8 rax) - (Mov rax val-true) - (let ((true (gensym))) - (seq (Jl true) - (Mov rax val-false) - (Label true))))] + (if-lt))] ['= (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Cmp r8 rax) - (Mov rax val-true) - (let ((true (gensym))) - (seq (Je true) - (Mov rax val-false) - (Label true))))] + (if-equal))] ['cons (seq (Mov (Offset rbx 0) rax) (Pop rax) @@ -147,12 +139,16 @@ (Mov rax rbx) (Or rax type-cons) (Add rbx 16))] + ['eq? + (seq (Pop r8) + (Cmp rax r8) + (if-equal))] ['make-vector (let ((loop (gensym)) (done (gensym)) (empty (gensym))) (seq (Pop r8) - (assert-natural r8 c) + (assert-natural r8) (Cmp r8 0) ; special case empty vector (Je empty) @@ -179,16 +175,18 @@ ['vector-ref (seq (Pop r8) - (assert-vector r8 c) - (assert-integer rax c) + (assert-vector r8) + (assert-integer rax) + (Cmp r8 type-vect) + (Je 'raise_error_align) ; special case for empty vector (Cmp rax 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-vect) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar rax int-shift) ; rax = index (Sub r9 1) (Cmp r9 rax) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal rax 3) (Add r8 rax) (Mov rax (Offset r8 8)))] @@ -198,8 +196,8 @@ (done (gensym)) (empty (gensym))) (seq (Pop r8) - (assert-natural r8 c) - (assert-char rax c) + (assert-natural r8) + (assert-char rax) (Cmp r8 0) ; special case empty string (Je empty) @@ -212,9 +210,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) @@ -232,60 +230,62 @@ ['string-ref (seq (Pop r8) - (assert-string r8 c) - (assert-integer rax c) + (assert-string r8) + (assert-integer rax) + (Cmp r8 type-str) + (Je 'raise_error_align) ; special case for empty string (Cmp rax 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-str) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar rax int-shift) ; rax = index (Sub r9 1) (Cmp r9 rax) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal rax 2) (Add r8 rax) (Mov 'eax (Offset r8 8)) (Sal rax char-shift) (Or rax type-char))])) -;; Op3 CEnv -> Asm -(define (compile-op3 p c) +;; Op3 -> Asm +(define (compile-op3 p) (match p ['vector-set! (seq (Pop r10) (Pop r8) - (assert-vector r8 c) - (assert-integer r10 c) + (assert-vector r8) + (assert-integer r10) (Cmp r10 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-vect) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar r10 int-shift) ; r10 = index (Sub r9 1) (Cmp r9 r10) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal r10 3) (Add r8 r10) (Mov (Offset r8 8) rax) - (Mov rax val-void))])) + (Mov rax (value->bits (void))))])) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (assert-type mask type) - (λ (arg c) + (λ (arg) (seq (Mov r9 arg) (And r9 mask) (Cmp r9 type) - (Jne (error-label c))))) + (Jne 'raise_error_align)))) (define (type-pred mask type) (let ((l (gensym))) (seq (And rax mask) (Cmp rax type) - (Mov rax (imm->bits #t)) + (Mov rax (value->bits #t)) (Je l) - (Mov rax (imm->bits #f)) + (Mov rax (value->bits #f)) (Label l)))) (define assert-integer @@ -300,59 +300,58 @@ (assert-type ptr-mask type-vect)) (define assert-string (assert-type ptr-mask type-str)) +(define assert-proc + (assert-type ptr-mask type-proc)) -(define (assert-codepoint c) +(define (assert-codepoint r) (let ((ok (gensym))) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 1114111)) - (Jg (error-label c)) - (Cmp rax (imm->bits 55295)) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align) + (Cmp r (value->bits 1114111)) + (Jg 'raise_error_align) + (Cmp r (value->bits 55295)) (Jl ok) - (Cmp rax (imm->bits 57344)) + (Cmp r (value->bits 57344)) (Jg ok) - (Jmp (error-label c)) + (Jmp 'raise_error_align) (Label ok)))) -(define (assert-byte c) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 255)) - (Jg (error-label c)))) +(define (assert-byte r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align) + (Cmp r (value->bits 255)) + (Jg 'raise_error_align))) + +(define (assert-natural r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align))) + +;; -> Asm +;; set rax to #t or #f based on given comparison +(define (if-compare c) + (seq (Mov rax (value->bits #f)) + (Mov r9 (value->bits #t)) + (c rax r9))) -(define (assert-natural r c) - (seq (assert-integer r c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)))) +(define (if-equal) (if-compare Cmove)) +(define (if-lt) (if-compare Cmovl)) ;; Value -> Asm -(define (eq-imm imm) - (let ((l1 (gensym))) - (seq (Cmp rax (imm->bits imm)) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))) - -;; CEnv -> Asm -;; Pad the stack to be aligned for a call -(define (pad-stack c) - (match (even? (length c)) - [#t (seq (Sub rsp 8))] - [#f (seq)])) - -;; CEnv -> Asm +(define (eq-value v) + (seq (Cmp rax (value->bits v)) + (if-equal))) + +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; Asm ;; Undo the stack alignment after a call -(define (unpad-stack c) - (match (even? (length c)) - [#t (seq (Add rsp 8))] - [#f (seq)])) - -;; CEnv -> Label -;; Determine correct error handler label to jump to. -(define (error-label c) - (match (even? (length c)) - [#t 'raise_error] - [#f 'raise_error_align])) +(define unpad-stack + (seq (Add rsp r15))) diff --git a/langs/loot/compile-stdin.rkt b/langs/loot/compile-stdin.rkt new file mode 100644 index 00000000..cfa15106 --- /dev/null +++ b/langs/loot/compile-stdin.rkt @@ -0,0 +1,10 @@ +#lang racket +(provide main) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile (parse (read-all))))) diff --git a/langs/loot/compile.rkt b/langs/loot/compile.rkt index 6d469a9a..f571bca6 100644 --- a/langs/loot/compile.rkt +++ b/langs/loot/compile.rkt @@ -1,595 +1,415 @@ #lang racket (provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) +(require "ast.rkt" "types.rkt" "lambdas.rkt" "fv.rkt" "compile-ops.rkt" a86/ast) ;; Registers used (define rax 'rax) ; return (define rbx 'rbx) ; heap -(define rcx 'rcx) ; scratch -(define rdx 'rdx) ; return, 2 -(define r8 'r8) ; scratch in +, - -(define r9 'r9) ; scratch in assert-type and tail-calls (define rsp 'rsp) ; stack (define rdi 'rdi) ; arg +(define r15 'r15) ; stack pad (non-volatile) -;; type CEnv = [Listof Variable] +;; type CEnv = (Listof [Maybe Id]) -;; Expr -> Asm +;; Prog -> Asm (define (compile p) - (match (label-λ (desugar p)) ; <-- changed! - [(Prog '() e) - (prog (Extern 'peek_byte) - (Extern 'read_byte) - (Extern 'write_byte) - (Extern 'raise_error) + (match p + [(Prog ds e) + (prog (externs) (Global 'entry) (Label 'entry) - (Mov rbx rdi) - (compile-e e '(#f)) - (Mov rdx rbx) + (Push rbx) ; save callee-saved register + (Push r15) + (Mov rbx rdi) ; recv heap pointer + (compile-defines-values ds) + (compile-e e (reverse (define-ids ds)) #f) + (Add rsp (* 8 (length ds))) ;; pop function definitions + (Pop r15) ; restore callee-save register + (Pop rbx) (Ret) - (compile-λ-definitions (λs e)))])) ; <-- changed! + (compile-defines ds) + (compile-lambda-defines (lambdas p)) + (Label 'raise_error_align) + pad-stack + (Call 'raise_error))])) + +(define (externs) + (seq (Extern 'peek_byte) + (Extern 'read_byte) + (Extern 'write_byte) + (Extern 'raise_error))) + +;; [Listof Defn] -> [Listof Id] +(define (define-ids ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (cons f (define-ids ds))])) ;; [Listof Defn] -> Asm -(define (compile-λ-definitions ds) - (seq - (match ds - ['() (seq)] - [(cons d ds) - (seq (compile-λ-definition d) - (compile-λ-definitions ds))]))) - -;; This is the code generation for the lambdas themselves. -;; It's not very different from generating code for user-defined functions, -;; because lambdas _are_ user defined functions, they just don't have a name -;; +(define (compile-defines ds) + (match ds + ['() (seq)] + [(cons d ds) + (seq (compile-define d) + (compile-defines ds))])) + ;; Defn -> Asm -(define (compile-λ-definition l) - (match l - [(Lam '() xs e) (error "Lambdas must be labelled before code gen (contact your compiler writer)")] - [(Lam f xs e) - (let* ((free (remq* xs (fvs e))) - ; leave space for RIP - (env (parity (cons #f (cons #f (reverse (append xs free))))))) - (seq (Label (symbol->label f)) - (%% "Compiling the body of the function") - (%% (~a "free vars: " free)) - (%% (~a "args: " xs)) - (%% (~a "env: " env)) - ; we need the #args on the frame, not the length of the entire - ; env (which may have padding) - ; Ignore tail calls for now - (compile-e e env) - (Ret)))])) - -(define (parity c) - (if (even? (length c)) - (append c (list #f)) - c)) - -;; Expr Expr Expr CEnv Int -> Asm -(define (compile-tail-e e c s) - (seq - (match e - [(If e1 e2 e3) (compile-tail-if e1 e2 e3 c s)] - [(Let x e1 e2) (compile-tail-let x e1 e2 c s)] - [(LetRec bs e1) (compile-tail-letrec (map car bs) (map cadr bs) e1 c)] - [(App f es) (if (<= (length es) s) - (compile-tail-call f es c) - (compile-call f es c))] - [(Begin e1 e2) (compile-tail-begin e1 e2 c s)] - [_ (compile-e e c)]))) - -;; Expr CEnv -> Asm -(define (compile-e e c) - (seq - (match e - [(? imm? i) (compile-value (get-imm i))] - [(Var x) (compile-variable x c)] - [(App f es) (compile-call f es c)] - [(Lam l xs e0) (compile-λ xs l (fvs e) c)] ; why do we ignore e0? - [(Prim0 p) (compile-prim0 p c)] - [(Prim1 p e) (compile-prim1 p e c)] - [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] - [(If e1 e2 e3) (compile-if e1 e2 e3 c)] - [(Begin e1 e2) (compile-begin e1 e2 c)] - [(LetRec bs e1) (compile-letrec (map car bs) (map cadr bs) e1 c)] - [(Let x e1 e2) (compile-let x e1 e2 c)]))) +(define (compile-define d) + (match d + [(Defn f xs e) + (compile-lambda-define (Lam f xs e))])) + +;; [Listof Lam] -> Asm +(define (compile-lambda-defines ls) + (match ls + ['() (seq)] + [(cons l ls) + (seq (compile-lambda-define l) + (compile-lambda-defines ls))])) + +;; Lam -> Asm +(define (compile-lambda-define l) + (let ((fvs (fv l))) + (match l + [(Lam f xs e) + (let ((env (append (reverse fvs) (reverse xs) (list #f)))) + (seq (Label (symbol->label f)) + (Mov rax (Offset rsp (* 8 (length xs)))) + (Xor rax type-proc) + (copy-env-to-stack fvs 8) + (compile-e e env #t) + (Add rsp (* 8 (length env))) ; pop env + (Ret)))]))) + +;; [Listof Id] Int -> Asm +;; Copy the closure environment at given offset to stack +(define (copy-env-to-stack fvs off) + (match fvs + ['() (seq)] + [(cons _ fvs) + (seq (Mov r9 (Offset rax off)) + (Push r9) + (copy-env-to-stack fvs (+ 8 off)))])) + +;; Expr CEnv Bool -> Asm +(define (compile-e e c t?) + (match e + [(Int i) (compile-value i)] + [(Bool b) (compile-value b)] + [(Char c) (compile-value c)] + [(Eof) (compile-value eof)] + [(Empty) (compile-value '())] + [(Var x) (compile-variable x c)] + [(Str s) (compile-string s)] + [(Prim0 p) (compile-prim0 p c)] + [(Prim1 p e) (compile-prim1 p e c)] + [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] + [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] + [(If e1 e2 e3) (compile-if e1 e2 e3 c t?)] + [(Begin e1 e2) (compile-begin e1 e2 c t?)] + [(Let x e1 e2) (compile-let x e1 e2 c t?)] + [(App e es) (compile-app e es c t?)] + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) ;; Value -> Asm (define (compile-value v) - (seq (Mov rax (imm->bits v)))) + (seq (Mov rax (value->bits v)))) ;; Id CEnv -> Asm (define (compile-variable x c) - (let ((i (lookup x c))) + (let ((i (lookup x c))) (seq (Mov rax (Offset rsp i))))) -;; (Listof Variable) Label (Listof Variable) CEnv -> Asm -(define (compile-λ xs f ys c) - (seq - ; Save label address - (Lea rax (symbol->label f)) - (Mov (Offset rbx 0) rax) - - ; Save the environment - (%% "Begin saving the env") - (%% (~a "free vars: " ys)) - (Mov r8 (length ys)) - - (Mov (Offset rbx 8) r8) - (Mov r9 rbx) - (Add r9 16) - (copy-env-to-heap ys c 0) - (%% "end saving the env") - - ; Return a pointer to the closure - (Mov rax rbx) - (Or rax type-proc) - (Add rbx (* 8 (+ 2 (length ys)))))) - -;; (Listof Variable) CEnv Natural -> Asm -;; Pointer to beginning of environment in r9 -(define (copy-env-to-heap fvs c i) - (match fvs +;; String -> Asm +(define (compile-string s) + (let ((len (string-length s))) + (if (zero? len) + (seq (Mov rax type-str)) + (seq (Mov rax len) + (Mov (Offset rbx 0) rax) + (compile-string-chars (string->list s) 8) + (Mov rax rbx) + (Or rax type-str) + (Add rbx + (+ 8 (* 4 (if (odd? len) (add1 len) len)))))))) + +;; [Listof Char] Integer -> Asm +(define (compile-string-chars cs i) + (match cs ['() (seq)] - [(cons x fvs) - (seq - ; Move the stack item in question to a temp register - (Mov r8 (Offset rsp (lookup x c))) - - ; Put the iterm in the heap - (Mov (Offset r9 i) r8) - - ; Do it again for the rest of the items, incrementing how - ; far away from r9 the next item should be - (copy-env-to-heap fvs c (+ 8 i)))])) + [(cons c cs) + (seq (Mov rax (char->integer c)) + (Mov (Offset rbx i) 'eax) + (compile-string-chars cs (+ 4 i)))])) ;; Op0 CEnv -> Asm (define (compile-prim0 p c) - (match p - ['void (seq (Mov rax val-void))] - ['read-byte (seq (pad-stack c) - (Call 'read_byte) - (unpad-stack c))] - ['peek-byte (seq (pad-stack c) - (Call 'peek_byte) - (unpad-stack c))])) + (compile-op0 p)) ;; Op1 Expr CEnv -> Asm (define (compile-prim1 p e c) - (seq (compile-e e c) - (match p - ['add1 - (seq (assert-integer rax) - (Add rax (imm->bits 1)))] - ['sub1 - (seq (assert-integer rax) - (Sub rax (imm->bits 1)))] - ['zero? - (let ((l1 (gensym))) - (seq (assert-integer rax) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char? - (let ((l1 (gensym))) - (seq (And rax mask-char) - (Xor rax type-char) - (Cmp rax 0) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))] - ['char->integer - (seq (assert-char rax) - (Sar rax char-shift) - (Sal rax int-shift))] - ['integer->char - (seq assert-codepoint - (Sar rax int-shift) - (Sal rax char-shift) - (Xor rax type-char))] - ['eof-object? (eq-imm val-eof)] - ['write-byte - (seq assert-byte - (pad-stack c) - (Mov rdi rax) - (Call 'write_byte) - (unpad-stack c) - (Mov rax val-void))] - ['box - (seq (Mov (Offset rbx 0) rax) - (Mov rax rbx) - (Or rax type-box) - (Add rbx 8))] - ['unbox - (seq (assert-box rax) - (Xor rax type-box) - (Mov rax (Offset rax 0)))] - ['car - (seq (assert-cons rax) - (Xor rax type-cons) - (Mov rax (Offset rax 8)))] - ['cdr - (seq (assert-cons rax) - (Xor rax type-cons) - (Mov rax (Offset rax 0)))] - ['empty? (eq-imm val-empty)]))) + (seq (compile-e e c #f) + (compile-op1 p))) ;; Op2 Expr Expr CEnv -> Asm (define (compile-prim2 p e1 e2 c) - (seq (compile-e e1 c) + (seq (compile-e e1 c #f) (Push rax) - (compile-e e2 (cons #f c)) - (match p - ['+ - (seq (Pop r8) - (assert-integer r8) - (assert-integer rax) - (Add rax r8))] - ['- - (seq (Pop r8) - (assert-integer r8) - (assert-integer rax) - (Sub r8 rax) - (Mov rax r8))] - ['eq? - (let ((l (gensym))) - (seq (Cmp rax (Offset rsp 0)) - (Sub rsp 8) - (Mov rax val-true) - (Je l) - (Mov rax val-false) - (Label l)))] - ['cons - (seq (Mov (Offset rbx 0) rax) - (Pop rax) - (Mov (Offset rbx 8) rax) - (Mov rax rbx) - (Or rax type-cons) - (Add rbx 16))]))) - -;; Id [Listof Expr] CEnv -> Asm -;; Here's (part of) why this code is so gross: you have to align the stack for -;; the call but you have to do it *before* evaluating the arguments es, -;; because you need es's values to be just above 'rsp when the call is made. -;; But if you push a frame in order to align the call, you've got to compile es -;; in a static environment that accounts for that frame, hence: -(define (compile-call f es c) - (let* ((cnt (length es)) - (aligned (even? (+ cnt (length c)))) - (i (if aligned 1 2)) - (c+ (if aligned - c - (cons #f c))) - (c++ (cons #f c+))) - (seq - - (%% "Begin compile-call") - ; Adjust the stack for alignment, if necessary - (if aligned - (seq) - (Sub rsp 8)) - - ; Generate the code for the thing being called - ; and push the result on the stack - (compile-e f c+) - (%% "Push function on stack") - (Push rax) - - ; Generate the code for the arguments - ; all results will be put on the stack (compile-es does this) - (compile-es es c++) - - ; Get the function being called off the stack - ; Ensure it's a proc and remove the tag - ; Remember it points to the _closure_ - (%% "Get function off stack") - (Mov rax (Offset rsp (* 8 cnt))) - (assert-proc rax) - (Xor rax type-proc) - - (%% "Get closure env") - (copy-closure-env-to-stack) - (%% "finish closure env") - - ; get the size of the env and save it on the stack - (Mov rcx (Offset rax 8)) - (Push rcx) - - ; Actually call the function - (Mov rax (Offset rax 0)) - (Call rax) - - ; Get the size of the env off the stack - (Pop rcx) - (Sal rcx 3) - - ; pop args - ; First the number of arguments + alignment + the closure - ; then captured values - (Add rsp (* 8 (+ i cnt))) - (Add rsp rcx)))) - - -;; LExpr (Listof LExpr) CEnv -> Asm -(define (compile-tail-call e0 es c) - (let ((cnt (length es))) - (seq - (%% (~a "Begin compile-tail-call: function = " e0)) - ; Generate the code for the thing being called - ; and push the result on the stack - (compile-e e0 c) - (%% "Push function on stack") - (Push rax) - - ; Generate the code for the arguments - ; all results will be put on the stack (compile-es does this) - (compile-es es (cons #f c)) - - ; Reuse the stack frame (as it's a tail call) - (move-args cnt (+ cnt (+ 2 (in-frame c)))) - - ; Get the function being called off the stack - ; Ensure it's a proc and remove the tag - ; Remember it points to the _closure_ - (Mov rax (Offset rsp (* 8 cnt))) - (assert-proc rax) - (Xor rax type-proc) - - ; Bump stack pointer (this is where the tail-call - ; savings kick in) - (Add rsp (* 8 (+ cnt (+ 2 (in-frame c))))) - - (copy-closure-env-to-stack) - - (Jmp (Offset rax 0))))) - -;; -> Asm -;; Copy closure's (in rax) env to stack in rcx -(define (copy-closure-env-to-stack) - (let ((copy-loop (symbol->label (gensym 'copy_closure))) - (copy-done (symbol->label (gensym 'copy_done)))) - (seq - - (Mov r8 (Offset rax 8)) ; length - (Mov r9 rax) - (Add r9 16) ; start of env - (Label copy-loop) - (Cmp r8 0) - (Je copy-done) - (Mov rcx (Offset r9 0)) - (Push rcx) ; Move val onto stack - (Sub r8 1) - (Add r9 8) - (Jmp copy-loop) - (Label copy-done)))) - -;; Integer Integer -> Asm -;; Move i arguments upward on stack by offset off -(define (move-args i cnt) - (match i - [0 (seq)] - [_ (seq - ; mov first arg to temp reg - (Mov r9 (Offset rsp (* 8 (sub1 i)))) - ; mov value to correct place on the old frame - (Mov (Offset rsp (* 8 (+ i cnt))) r9) - ; Now do the next one - (move-args (sub1 i) cnt))])) - -;; [Listof Expr] CEnv -> Asm -(define (compile-es es c) - (match es - ['() '()] - [(cons e es) - (seq (compile-e e c) - (Push rax) - (compile-es es (cons #f c)))])) + (compile-e e2 (cons #f c) #f) + (compile-op2 p))) -;; Imm -> Asm -(define (eq-imm imm) - (let ((l1 (gensym))) - (seq (Cmp rax imm) - (Mov rax val-true) - (Je l1) - (Mov rax val-false) - (Label l1)))) - -;; Expr Expr Expr CEnv -> Asm -(define (compile-if e1 e2 e3 c) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1 c) - (Cmp rax val-false) - (Je l1) - (compile-e e2 c) - (Jmp l2) - (Label l1) - (compile-e e3 c) - (Label l2)))) +;; Op3 Expr Expr Expr CEnv -> Asm +(define (compile-prim3 p e1 e2 e3 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (Push rax) + (compile-e e3 (cons #f (cons #f c)) #f) + (compile-op3 p))) -;; Expr Expr Expr CEnv -> Asm -(define (compile-tail-if e1 e2 e3 c s) +;; Expr Expr Expr CEnv Bool -> Asm +(define (compile-if e1 e2 e3 c t?) (let ((l1 (gensym 'if)) (l2 (gensym 'if))) - (seq (compile-e e1 c) - (Cmp rax val-false) + (seq (compile-e e1 c #f) + (Cmp rax (value->bits #f)) (Je l1) - (compile-tail-e e2 c s) + (compile-e e2 c t?) (Jmp l2) (Label l1) - (compile-tail-e e3 c s) + (compile-e e3 c t?) (Label l2)))) -;; Expr Expr CEnv -> Asm -(define (compile-begin e1 e2 c) - (seq (compile-e e1 c) - (compile-e e2 c))) +;; Expr Expr CEnv Bool -> Asm +(define (compile-begin e1 e2 c t?) + (seq (compile-e e1 c #f) + (compile-e e2 c t?))) -;; Expr Expr CEnv -> Asm -(define (compile-tail-begin e1 e2 c s) - (seq (compile-e e1 c) - (compile-tail-e e2 c s))) - -;; Id Expr Expr CEnv -> Asm -(define (compile-let x e1 e2 c) - (seq (compile-e e1 c) +;; Id Expr Expr CEnv Bool -> Asm +(define (compile-let x e1 e2 c t?) + (seq (compile-e e1 c #f) (Push rax) - (compile-e e2 (cons x c)) + (compile-e e2 (cons x c) t?) (Add rsp 8))) -;; Id Expr Expr CEnv -> Asm -(define (compile-tail-let x e1 e2 c s) - (seq (compile-e e1 c) - (Push rax) - (compile-tail-e e2 (cons x c) s) - (Add rsp 8))) +;; Id [Listof Expr] CEnv Bool -> Asm +(define (compile-app f es c t?) + ;(compile-app-nontail f es c) + (if t? + (compile-app-tail f es c) + (compile-app-nontail f es c))) + +;; Expr [Listof Expr] CEnv -> Asm +(define (compile-app-tail e es c) + (seq (compile-es (cons e es) c) + (move-args (add1 (length es)) (length c)) + (Add rsp (* 8 (length c))) + (Mov rax (Offset rsp (* 8 (length es)))) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) + (Jmp rax))) -;; (Listof Variable) (Listof Lambda) Expr CEnv -> Asm -(define (compile-letrec fs ls e c) - (seq - (%% (~a "Start compile letrec with" fs)) - (compile-letrec-λs ls c) - (compile-letrec-init fs ls (append (reverse fs) c)) - (%% "Finish compile-letrec-init") - (compile-e e (append (reverse fs) c)) - (Add rsp (* 8 (length fs))))) - -;; (Listof Variable) (Listof Lambda) Expr CEnv -> Asm -(define (compile-tail-letrec fs ls e c) - (seq - (compile-letrec-λs ls c) - (compile-letrec-init fs ls (append (reverse fs) c)) - (%% "Finish compile-letrec-init") - (compile-tail-e e (append (reverse fs) c)) - (Add rsp (* 8 (length fs))))) - -;; (Listof Lambda) CEnv -> Asm -;; Create a bunch of uninitialized closures and push them on the stack -(define (compile-letrec-λs ls c) - (match ls +;; Integer Integer -> Asm +(define (move-args i off) + (cond [(zero? off) (seq)] + [(zero? i) (seq)] + [else + (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) + (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) + (move-args (sub1 i) off))])) + +;; Expr [Listof Expr] CEnv -> Asm +;; The return address is placed above the arguments, so callee pops +;; arguments and return address is next frame +(define (compile-app-nontail e es c) + (let ((r (gensym 'ret)) + (i (* 8 (length es)))) + (seq (Lea rax r) + (Push rax) + (compile-es (cons e es) (cons #f c)) + (Mov rax (Offset rsp i)) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) ; fetch the code label + (Jmp rax) + (Label r)))) + +;; Defns -> Asm +;; Compile the closures for ds and push them on the stack +(define (compile-defines-values ds) + (seq (alloc-defines ds 0) + (init-defines ds (reverse (define-ids ds)) 8) + (add-rbx-defines ds 0))) + +;; Defns Int -> Asm +;; Allocate closures for ds at given offset, but don't write environment yet +(define (alloc-defines ds off) + (match ds ['() (seq)] - [(cons l ls) - (match l - [(Lam lab as body) - (let ((ys (fvs l))) - (seq - (Lea rax (symbol->label lab)) - (Mov (Offset rbx 0) rax) - (Mov rax (length ys)) - (Mov (Offset rbx 8) rax) - (Mov rax rbx) - (Or rax type-proc) - (Add rbx (* 8 (+ 2 (length ys)))) - (Push rax) - (compile-letrec-λs ls (cons #f c))))])])) - -;; (Listof Variable) (Listof Lambda) CEnv -> Asm -(define (compile-letrec-init fs ls c) - (match fs + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx off) rax) + (Mov rax rbx) + (Add rax off) + (Or rax type-proc) + (Push rax) + (alloc-defines ds (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns CEnv Int -> Asm +;; Initialize the environment for each closure for ds at given offset +(define (init-defines ds c off) + (match ds ['() (seq)] - [(cons f fs) - (let ((ys (fvs (first ls)))) - (seq - (Mov r9 (Offset rsp (lookup f c))) - (Xor r9 type-proc) - (Add r9 16) ; move past label and length - (copy-env-to-heap ys c 0) - (compile-letrec-init fs (rest ls) c)))])) - -;; CEnv -> Asm -;; Pad the stack to be aligned for a call with stack arguments -(define (pad-stack-call c i) - (match (even? (+ (length c) i)) - [#f (seq (Sub rsp 8) (% "padding stack"))] - [#t (seq)])) - -;; CEnv -> Asm -;; Pad the stack to be aligned for a call -(define (pad-stack c) - (pad-stack-call c 0)) - -;; CEnv -> Asm -;; Undo the stack alignment after a call -(define (unpad-stack-call c i) - (match (even? (+ (length c) i)) - [#f (seq (Add rsp 8) (% "unpadding"))] - [#t (seq)])) - -;; CEnv -> Asm -;; Undo the stack alignment after a call -(define (unpad-stack c) - (unpad-stack-call c 0)) + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (free-vars-to-heap fvs c off) + (init-defines ds c (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns Int -> Asm +;; Compute adjustment to rbx for allocation of all ds +(define (add-rbx-defines ds n) + (match ds + ['() (seq (Add rbx (* n 8)))] + [(cons (Defn f xs e) ds) + (add-rbx-defines ds (+ n (add1 (length (fv (Lam f xs e))))))])) + +;; Id [Listof Id] Expr CEnv -> Asm +(define (compile-lam f xs e c) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx 0) rax) + (free-vars-to-heap fvs c 8) + (Mov rax rbx) ; return value + (Or rax type-proc) + (Add rbx (* 8 (add1 (length fvs))))))) + +;; [Listof Id] CEnv Int -> Asm +;; Copy the values of given free variables into the heap at given offset +(define (free-vars-to-heap fvs c off) + (match fvs + ['() (seq)] + [(cons x fvs) + (seq (Mov r8 (Offset rsp (lookup x c))) + (Mov (Offset rbx off) r8) + (free-vars-to-heap fvs c (+ off 8)))])) + +;; [Listof Expr] CEnv -> Asm +(define (compile-es es c) + (match es + ['() '()] + [(cons e es) + (seq (compile-e e c #f) + (Push rax) + (compile-es es (cons #f c)))])) + +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c #f) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) cm)] + [(PVar x) + (list (seq (Push rax)) (cons x cm))] + [(PLit l) + (let ((ok (gensym))) + (list (seq (Cmp rax (value->bits l)) + (Je ok) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 cm1) + (let ((ok (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Je ok) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next) + (Label ok) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 cm2) + (let ((ok (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Je ok) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next) + (Label ok) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + cm2))])])])) ;; Id CEnv -> Integer (define (lookup x cenv) (match cenv - ['() (error (~a "undefined variable:" x " Env: " cenv))] + ['() (error "undefined variable:" x)] [(cons y rest) (match (eq? x y) [#t 0] [#f (+ 8 (lookup x rest))])])) -(define (in-frame cenv) - (match cenv - ['() 0] - [(cons #f rest) 0] - [(cons y rest) (+ 1 (in-frame rest))])) - -(define (assert-type mask type) - (λ (arg) - (seq (%% "Begin Assert") - (Mov r9 arg) - (And r9 mask) - (Cmp r9 type) - (Jne 'raise_error) - (%% "End Assert")))) - -(define (type-pred mask type) - (let ((l (gensym))) - (seq (And rax mask) - (Cmp rax type) - (Mov rax (imm->bits #t)) - (Je l) - (Mov rax (imm->bits #f)) - (Label l)))) - -(define assert-integer - (assert-type mask-int type-int)) -(define assert-char - (assert-type mask-char type-char)) -(define assert-box - (assert-type ptr-mask type-box)) -(define assert-cons - (assert-type ptr-mask type-cons)) -(define assert-proc - (assert-type ptr-mask type-proc)) - -(define assert-codepoint - (let ((ok (gensym))) - (seq (assert-integer rax) - (Cmp rax (imm->bits 0)) - (Jl 'raise_error) - (Cmp rax (imm->bits 1114111)) - (Jg 'raise_error) - (Cmp rax (imm->bits 55295)) - (Jl ok) - (Cmp rax (imm->bits 57344)) - (Jg ok) - (Jmp 'raise_error) - (Label ok)))) - -(define assert-byte - (seq (assert-integer rax) - (Cmp rax (imm->bits 0)) - (Jl 'raise_error) - (Cmp rax (imm->bits 255)) - (Jg 'raise_error))) - ;; Symbol -> Label ;; Produce a symbol that is a valid Nasm label (define (symbol->label s) diff --git a/langs/loot/env.rkt b/langs/loot/env.rkt index d085b4ca..c43be9c3 100644 --- a/langs/loot/env.rkt +++ b/langs/loot/env.rkt @@ -12,4 +12,4 @@ ;; Env Variable Value -> Value (define (ext r x i) - (cons (list x i) r)) + (cons (list x i) r)) \ No newline at end of file diff --git a/langs/loot/example.knock b/langs/loot/example.knock deleted file mode 100644 index 0a8d0bb1..00000000 --- a/langs/loot/example.knock +++ /dev/null @@ -1,5 +0,0 @@ -#lang racket -(begin - (define (double x y) (+ (+ y y) (+ x x))) - (define (f a b c d) (call (fun double) 2 2)) - (f 1 2 3 4)) diff --git a/langs/loot/example.rkt b/langs/loot/example.rkt index e27d0ec3..24c4fbd0 100644 --- a/langs/loot/example.rkt +++ b/langs/loot/example.rkt @@ -1,3 +1,3 @@ #lang racket -(begin (define (f x) (if (zero? x) 0 (f (sub1 x)))) - (f 1)) +((lambda (x y) y) 1 2) + diff --git a/langs/loot/fv.rkt b/langs/loot/fv.rkt new file mode 100644 index 00000000..2377b7e5 --- /dev/null +++ b/langs/loot/fv.rkt @@ -0,0 +1,35 @@ +#lang racket +(require "ast.rkt") +(provide fv) + +;; Expr -> [Listof Id] +;; List all of the free variables in e +(define (fv e) + (remove-duplicates (fv* e))) + +(define (fv* e) + (match e + [(Var x) (list x)] + [(Prim1 p e) (fv* e)] + [(Prim2 p e1 e2) (append (fv* e1) (fv* e2))] + [(Prim3 p e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(If e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(Begin e1 e2) (append (fv* e1) (fv* e2))] + [(Let x e1 e2) (append (fv* e1) (remq* (list x) (fv* e2)))] + [(App e1 es) (append (fv* e1) (append-map fv* es))] + [(Lam f xs e) (remq* xs (fv* e))] + [(Match e ps es) (append (fv* e) (append-map fv-clause* ps es))] + [_ '()])) + +;; Pat Expr -> [Listof Id] +(define (fv-clause* p e) + (remq* (bv-pat* p) (fv* e))) + +;; Pat -> [Listof Id] +(define (bv-pat* p) + (match p + [(PVar x) (list x)] + [(PCons p1 p2) (append (bv-pat* p1) (bv-pat* p2))] + [(PAnd p1 p2) (append (bv-pat* p1) (bv-pat* p2))] + [(PBox p) (bv-pat* p)] + [_ '()])) diff --git a/langs/loot/gc-racket.rkt b/langs/loot/gc-racket.rkt deleted file mode 100644 index d12d97a6..00000000 --- a/langs/loot/gc-racket.rkt +++ /dev/null @@ -1,147 +0,0 @@ -#lang racket - -;; This is a sketch of a copying collector written in Racket - -;; RVal ::= -;; | integer -;; | boolean -;; | char -;; | (list 'cons a) -;; | (list 'box a) -;; | (list 'str a) - -;; SVal ::= -;; | rval -;; | integer ; notice the overlap -;; | char - -;; a ::= (list h i) - -(define heap-size 10) -(define to (make-vector heap-size)) -(define from (make-vector heap-size)) - -(define *to-next* 0) -(define *curr* 0) -(define *type-queue* '()) - - -;; [Listof RVal] -> [Listof RVal] -(define (collect roots) - (set! *curr* 0) - (set! *to-next* 0) - (begin0 (move-roots roots) - (move-all) - (let ((tmp to)) - (set! to from) - (set! from tmp)) - (set! *from-next* *to-next*))) - -;; [Listof RVal] -> [Listof RVal] -;; EFFECT: shallowly moves data pointed to by roots to 'to' space, -;; leaving fowarding address in 'from' space -(define (move-roots rs) - (map move-root rs)) - -;; [Listof RVal] -> [Listof RVal] -;; EFFECT: shallowly moves data pointed to by root to 'to' space, -;; leaving fowarding address in 'from' space -(define (move-root r) - (match r - [(list τ (list h i)) - (match (vector-ref h i) - [(list _ (list (? to?) j)) ; fwd reference - (list τ (list to j))] - [_ - (begin0 (list τ (list to *to-next*)) - (move-obj τ i))])] - ;; not a pointer - [_ r])) - -;; Type Index -> Void -;; EFFECT: Moves object of type τ at from-i to *to-next* -;; Pushes the type on the type queue so the moved object -;; can be interpreted appropriately later -;; If any objects were not word-aligned records, -;; this would need to be adapted -(define (move-obj τ from-i) - (for ((i (size-of (list τ (list from from-i))))) - (vector-set! to *to-next* (vector-ref from (+ from-i i))) - (when (zero? i) ; fwd pointer - (vector-set! from from-i (list τ (list to *to-next*)))) - (set! *to-next* (add1 *to-next*))) - (push! τ)) - -(define to? (λ (h) (eq? h to))) - -;; -> Void -;; EFFECT: Move all objects starting from *curr*, -;; interpreting bits according to the type queue. -(define (move-all) - (let loop () - (unless (= *curr* *to-next*) - (move-curr) - (loop)))) - -;; -> Void -;; EFFECT: Move object at *curr*, interpreting bits according -;; to front of type queue. -(define (move-curr) - (let ((τ (pop!))) - (match τ - ['box (scan-word)] - ['cons (scan-word) (scan-word)] - ['str (set! *curr* (+ *curr* (add1 (vector-ref to *curr*))))]))) - -;; -> Void -;; *curr* is at the start of a value (i.e. a single word) -(define (scan-word) - (match (vector-ref to *curr*) - [(list τ (list from i)) - (match (vector-ref from i) - [(list _ (list (? to?) j)) ; fwd reference - (vector-set! to *curr* (list τ (list to j)))] - [_ - (vector-set! to *curr* (list τ (list to *to-next*))) - (move-obj τ i)])] - [_ (void)]) - - (set! *curr* (add1 *curr*))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (size-of r) - (match r - [(list 'box _) 1] - [(list 'cons _) 2] - [(list 'str (list h i)) - (add1 (vector-ref h i))])) - -(define (push! τ) - (printf "pushing ~a\n" τ) - (set! *type-queue* - (append *type-queue* (list τ)))) - -(define (pop!) - (let ((τ (car *type-queue*))) - (printf "popping ~a\n" τ) - (begin0 τ - (set! *type-queue* (cdr *type-queue*))))) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; an example - -(define roots - (list (list 'cons (list from 0)) - (list 'cons (list from 0)))) - -(vector-set! from 0 (list 'str (list from 3))) -(vector-set! from 1 (list 'box (list from 6))) ;(list 'str (list from 3))) ; sharing a string -(vector-set! from 2 3) ; dead -(vector-set! from 3 2) -(vector-set! from 4 #\a) -(vector-set! from 5 #\b) -(vector-set! from 6 #\c) - -(define *from-next* 7) diff --git a/langs/loot/gc.c b/langs/loot/gc.c deleted file mode 100644 index adc38a19..00000000 --- a/langs/loot/gc.c +++ /dev/null @@ -1,236 +0,0 @@ -#include -#include -#include -#include "types.h" -#include "heap.h" - -#define DEBUG 1 - -#ifdef DEBUG -# define GC_DEBUG(x) x -#else -# define GC_DEBUG(x) -#endif - -void print_result(int64_t result); -void print_mem(int64_t *, int64_t *); - -const char * ptr_type_to_string(int64_t tag); - -int obj_size(int64_t v) { - int type_tag = ptr_type_mask & v; - int64_t * obj = (int64_t *)(v ^ type_tag); - switch (type_tag) { - case box_type_tag: - return 1; - case cons_type_tag: - return 2; - default: - printf("unkown object type in obj_size"); - exit(1); - } -} - -void print_types(type_front, type_rear) { - int j; - printf("TYPES:\n"); - for (j = type_front; j < type_rear; j++) { - printf(" [%d]: %s\n", j, - ((type[j] == box_type_tag) ? "box" : - (type[j] == cons_type_tag) ? "cons" : - "unknown")); - } -} - -void move_obj(char ptr_type, int64_t * addr, int64_t ** to_next) { - GC_DEBUG(printf("move_obj <%s> at [%" PRIx64 "] to [%" PRIx64 "]\n", - ptr_type_to_string((int64_t) ptr_type), - (int64_t) addr, - (int64_t) *to_next)); - int size = - (ptr_type == box_type_tag) ? 1 : - (ptr_type == cons_type_tag) ? 2 : - -1; - - int i; - for (i = 0; i < size; i++) { - GC_DEBUG(printf(" [%" PRIx64 "] <- [%" PRIx64 "]\n", - (int64_t) *to_next, (int64_t) (addr + i))); - - *to_next[0] = addr[i]; - if (i == 0) { - *addr = (int64_t) *to_next | ptr_type; // fwd - GC_DEBUG(printf(" [%" PRIx64 "] <- [%" PRIx64 "] (fwd)\n", - (int64_t) addr , (int64_t) *to_next)); - - } - *to_next = *to_next + 1; - } -} - -#define is_fwd(a) ((((a & ptr_addr_mask) - (int64_t) &heap[heap_size]) ^ (int64_t) from_side) >= 0) - -void scan_word(int64_t ** curr, int64_t ** to_next, char * type_rear) { - GC_DEBUG(printf("scan_word [%" PRIx64 "]: ", (int64_t) *curr)); - int64_t v = **curr; - if (v & ptr_type_mask) { - int64_t t = ptr_type_mask & v; - int64_t * a = (int64_t *) (ptr_addr_mask & v); - if ((*a & ptr_type_mask) && (is_fwd(*a))) { - GC_DEBUG(printf("&[%" PRIx64 "] -> [%" PRIx64 "]", - (int64_t) a, (int64_t) (*a & ptr_addr_mask))); - *curr[0] = *a; - } else { - GC_DEBUG(printf("&<%s>[%" PRIx64 "]\n", ptr_type_to_string(t), (int64_t) a)); - **curr = ((int64_t) (*to_next)) | t; - move_obj(t, a, to_next); - GC_DEBUG(printf("PUSH!: %s", ptr_type_to_string((int64_t) t))); - type[*type_rear] = t; (*type_rear)++; - } - } else { - GC_DEBUG(print_result(v)); // an immediate - } - GC_DEBUG(printf("\n")); - *curr = *curr + 1; -} - -struct Pair { - int64_t x; - int64_t y; -}; - - -int64_t * collect_garbage_p(int64_t * rdi, int64_t * rbp, int64_t * rsp) { - struct Pair p; - p.x = 9 << int_shift; - p.y = 32 << int_shift; - return rdi; -} - -int64_t * collect_garbage(int64_t * rdi, int64_t * rbp, int64_t * rsp) { - - int64_t * to_space = (from_side < 0) ? heap + heap_size : heap; - int64_t * from_space = (from_side > 0) ? heap + heap_size : heap; - int64_t * to_next = to_space; - - char type_front = 0; - char type_rear = 0; - - GC_DEBUG(printf("--------------------------------------------\n")); - GC_DEBUG(printf("TRACING ROOTS\nROOTS:\n")); - GC_DEBUG(print_mem(rsp, rbp)); - GC_DEBUG(printf("FROM:\n")); - GC_DEBUG(print_mem(from_space, rdi)); - - // roots - // shallowly move data pointed to by each root to 'to' space, - // leaving forwarding address in 'from' space. - int64_t * root = rsp; - while (root != rbp) { - GC_DEBUG(printf("scan_root:")); - GC_DEBUG(print_mem(root, root + 1)); - int64_t v = root[0]; - if (ptr_type_mask & v) { - int64_t * a = (int64_t *) (ptr_addr_mask & v); - if ((*a & ptr_type_mask) && (is_fwd(*a))) { - root[0] = *a; - GC_DEBUG(printf("forward pointer, resolving\n")); - } else { - int64_t t = ptr_type_mask & v; - move_obj(t, a, &to_next); - root[0] = *a; - GC_DEBUG(printf("PUSH!: %s\n", ptr_type_to_string((int64_t) t))); - type[type_rear] = t; type_rear++; - } - } else { - GC_DEBUG(printf(" ")); - GC_DEBUG(print_result(root[0])); - } - - GC_DEBUG(print_types(type_front, type_rear)); - GC_DEBUG(printf("ROOT:\n")); - GC_DEBUG(print_mem(rsp, rbp)); - GC_DEBUG(printf("FROM:\n")); - GC_DEBUG(print_mem(from_space, rdi)); - GC_DEBUG(printf("TO:\n")); - GC_DEBUG(print_mem(to_space, to_next)); - - // advance - root = &root[1]; - } - - GC_DEBUG(printf("--------------------------------------------\n")); - GC_DEBUG(printf("TRACING TO SPACE\n")); - - int64_t * curr = to_space; - while (curr != to_next) { - int j; - char t = type[type_front]; type_front++; - - GC_DEBUG(printf("TRACING A %s\n", ptr_type_to_string((int64_t) t))); - - switch (t) { - case box_type_tag: - scan_word(&curr, &to_next, &type_rear); - break; - case cons_type_tag: - scan_word(&curr, &to_next, &type_rear); - scan_word(&curr, &to_next, &type_rear); - break; - default: - printf("unknown type: %d!!!\n", t); - exit(1); - } - - GC_DEBUG(print_types(type_front, type_rear)); - GC_DEBUG(printf("FROM:\n")); - GC_DEBUG(print_mem(from_space, rdi)); - GC_DEBUG(printf("TO:\n")); - GC_DEBUG(print_mem(to_space, to_next)); - } - - GC_DEBUG(printf("--------------------------------------------\n")); - GC_DEBUG(printf("DONE\n")); - - GC_DEBUG(printf("ROOT:\n")); - GC_DEBUG(print_mem(rsp, rbp)); - GC_DEBUG(printf("TO:\n")); - GC_DEBUG(print_mem(to_space, to_next)); - - from_side = 0 - from_side; - return to_next; -} - -const char * ptr_type_to_string(int64_t tag) { - switch (tag) { - case box_type_tag: - return "box"; - case cons_type_tag: - return "cons"; - default: - return "unknown"; - } -} - -void print_blobs(void *h, void *end_of_heap, int size_in_bytes, void (* print)()) { - int i; - while (h < end_of_heap) { - (*print)(h); - h = h + size_in_bytes; - } -} - -void print_word(int64_t *a) { - printf(" [%" PRIx64 "]: ", (int64_t)a); - if (ptr_type_mask & *a) { - printf("&<%s>%" PRIx64 "\n", ptr_type_to_string(ptr_type_mask & *a), *a & ptr_addr_mask); - } else { - print_result(*a); - printf("\n"); - } -} - -void print_mem(int64_t *h, int64_t *end_of_heap) { - print_blobs(h, end_of_heap, 8, &print_word); -} - diff --git a/langs/loot/interp-defun.rkt b/langs/loot/interp-defun.rkt index 9783f592..b5cb4521 100644 --- a/langs/loot/interp-defun.rkt +++ b/langs/loot/interp-defun.rkt @@ -1,5 +1,5 @@ #lang racket -(provide interp interp-env interp-prim1 apply-function) +(provide interp interp-env (struct-out Closure) zip) (require "ast.rkt" "env.rkt" "interp-prims.rkt") @@ -10,110 +10,148 @@ ;; | Integer ;; | Boolean ;; | Character -;; | (Fun f) ;; | Eof ;; | Void ;; | '() ;; | (cons Value Value) ;; | (box Value) -;; | Function - -;; type Function = -;; | `(closure ,Formals ,Expr ,Env) -;; | `(rec-closure ,Lambda ,(-> Env)) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Closure [Listof Id] Expr Env) +(struct Closure (xs e r) #:prefab) ;; type REnv = (Listof (List Id Value)) ;; type Defns = (Listof Defn) -;; Prog Defns -> Answer +;; Prog -> Answer (define (interp p) - (interp-env (desugar p) '())) + (match p + [(Prog ds e) + (interp-env e '() ds)])) ;; Expr Env Defns -> Answer -(define (interp-env e r) +(define (interp-env e r ds) (match e - [(Prog '() e) (interp-env e r)] [(Int i) i] [(Bool b) b] [(Char c) c] [(Eof) eof] [(Empty) '()] - [(Var x) (lookup r x)] + [(Var x) (interp-var x r ds)] + [(Str s) (string-copy s)] [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] [(Prim1 p e) - (match (interp-env e r) + (match (interp-env e r ds) ['err 'err] [v (interp-prim1 p v)])] [(Prim2 p e1 e2) - (match (interp-env e1 r) + (match (interp-env e1 r ds) ['err 'err] - [v1 (match (interp-env e2 r) + [v1 (match (interp-env e2 r ds) ['err 'err] [v2 (interp-prim2 p v1 v2)])])] + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (match (interp-env e3 r ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] [(If p e1 e2) - (match (interp-env p r) + (match (interp-env p r ds) ['err 'err] [v (if v - (interp-env e1 r) - (interp-env e2 r))])] + (interp-env e1 r ds) + (interp-env e2 r ds))])] [(Begin e1 e2) - (match (interp-env e1 r) + (match (interp-env e1 r ds) ['err 'err] - [_ (interp-env e2 r)])] + [_ (interp-env e2 r ds)])] [(Let x e1 e2) - (match (interp-env e1 r) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (Closure xs e r)] + [(App e es) + (match (interp-env e r ds) ['err 'err] - [v (interp-env e2 (ext r x v))])] - [(LetRec bs e) - (letrec ((r* (λ () - (append - (zip (map car bs) - ;; η-expansion to delay evaluating r* - ;; relies on RHSs being functions - (map (λ (l) (RecClosure l r*)) - (map cadr bs))) - r)))) - (interp-env e (r*)))] - [(Lam _ xs e1) - (Closure xs e r)] - [(App f es) - (match (interp-env* (cons f es) r) - [(list (? function? f) vs ...) - (apply apply-function f vs)] - [e e])] - [_ 'err])) + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (match f + [(Closure xs e r) + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err)] + [_ 'err])])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) -(define (function? f) - (match f - [(Closure _ _ _) #t] - [(RecClosure _ _) #t] - [(? procedure?) #t] - [_ #f])) +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) -;; Function Value ... -> Answer -(define (apply-function f . vs) - (match f - [(Closure xs e r) - (if (= (length xs) (length vs)) - (interp-env e (append (zip xs vs) r)) - 'errwat)] - [(RecClosure (Lam '() xs e) r*) - ; You've got to apply the the r* thunk - (apply apply-function (Closure xs e (r*)) vs)] - [(? procedure? f) (apply f vs)])) +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) ;; (Listof Expr) REnv Defns -> (Listof Value) | 'err -(define (interp-env* es r) +(define (interp-env* es r ds) (match es ['() '()] [(cons e es) - (match (interp-env e r) - ['err 'errsdf] - [v (cons v (interp-env* es r))])])) + (match (interp-env e r ds) + ['err 'err] + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) (define (zip xs ys) (match* (xs ys) diff --git a/langs/loot/interp-file.rkt b/langs/loot/interp-file.rkt deleted file mode 100644 index 69340664..00000000 --- a/langs/loot/interp-file.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang racket -(provide main) -(require "parse.rkt" "interp.rkt") - -;; String -> Void -;; Parse and interpret contents of given filename, -;; print result on stdout -(define (main fn) - (let ((p (open-input-file fn))) - (begin - (read-line p) ; ignore #lang racket line - (displayln (interp (parse (read p)))) - (close-input-port p)))) diff --git a/langs/loot/interp-prims.rkt b/langs/loot/interp-prims.rkt index a1ed9ce4..15039f9f 100644 --- a/langs/loot/interp-prims.rkt +++ b/langs/loot/interp-prims.rkt @@ -1,6 +1,6 @@ #lang racket (require "ast.rkt") -(provide interp-prim1 interp-prim2) +(provide interp-prim1 interp-prim2 interp-prim3) ;; Op1 Value -> Answer (define (interp-prim1 p1 v) @@ -18,6 +18,12 @@ [(list 'car (? pair?)) (car v)] [(list 'cdr (? pair?)) (cdr v)] [(list 'empty? v) (empty? v)] + [(list 'cons? v) (cons? v)] + [(list 'box? v) (box? v)] + [(list 'vector? v) (vector? v)] + [(list 'vector-length (? vector?)) (vector-length v)] + [(list 'string? v) (string? v)] + [(list 'string-length (? string?)) (string-length v)] [_ 'err])) ;; Op2 Value Value -> Answer @@ -25,9 +31,36 @@ (match (list p v1 v2) [(list '+ (? integer?) (? integer?)) (+ v1 v2)] [(list '- (? integer?) (? integer?)) (- v1 v2)] - [(list 'eq? v1 v2) (eqv? v1 v2)] + [(list '< (? integer?) (? integer?)) (< v1 v2)] + [(list '= (? integer?) (? integer?)) (= v1 v2)] [(list 'cons v1 v2) (cons v1 v2)] - [_ 'err])) + [(list 'eq? v1 v2) (eq? v1 v2)] + [(list 'make-vector (? integer?) _) + (if (<= 0 v1) + (make-vector v1 v2) + 'err)] + [(list 'vector-ref (? vector?) (? integer?)) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-ref v1 v2) + 'err)] + [(list 'make-string (? integer?) (? char?)) + (if (<= 0 v1) + (make-string v1 v2) + 'err)] + [(list 'string-ref (? string?) (? integer?)) + (if (<= 0 v2 (sub1 (string-length v1))) + (string-ref v1 v2) + 'err)] + [_ 'err])) + +;; Op3 Value Value Value -> Answer +(define (interp-prim3 p v1 v2 v3) + (match (list p v1 v2 v3) + [(list 'vector-set! (? vector?) (? integer?) _) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-set! v1 v2 v3) + 'err)] + [_ 'err])) ;; Any -> Boolean (define (codepoint? v) diff --git a/langs/loot/interp-stdin.rkt b/langs/loot/interp-stdin.rkt new file mode 100644 index 00000000..965b9cc4 --- /dev/null +++ b/langs/loot/interp-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt" "interp.rkt" "read-all.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (let ((r (interp (parse (read-all))))) + (unless (void? r) + (println r)))) diff --git a/langs/loot/interp.rkt b/langs/loot/interp.rkt index 5dce29b9..a0697318 100644 --- a/langs/loot/interp.rkt +++ b/langs/loot/interp.rkt @@ -1,5 +1,5 @@ #lang racket -(provide interp interp-env interp-prim1) +(provide interp interp-env) (require "ast.rkt" "env.rkt" "interp-prims.rkt") @@ -10,89 +10,147 @@ ;; | Integer ;; | Boolean ;; | Character -;; | (Fun f) ;; | Eof ;; | Void ;; | '() ;; | (cons Value Value) ;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Value ... -> Answer) ;; type REnv = (Listof (List Id Value)) ;; type Defns = (Listof Defn) -;; Prog Defns -> Answer +;; Prog -> Answer (define (interp p) - (interp-env (desugar p) '())) + (match p + [(Prog ds e) + (interp-env e '() ds)])) ;; Expr Env Defns -> Answer -(define (interp-env e r) +(define (interp-env e r ds) (match e - [(Prog '() e) (interp-env e r)] [(Int i) i] [(Bool b) b] [(Char c) c] [(Eof) eof] [(Empty) '()] - [(Var x) (lookup r x)] + [(Var x) (interp-var x r ds)] + [(Str s) (string-copy s)] [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] [(Prim1 p e) - (match (interp-env e r) + (match (interp-env e r ds) ['err 'err] [v (interp-prim1 p v)])] [(Prim2 p e1 e2) - (match (interp-env e1 r) + (match (interp-env e1 r ds) ['err 'err] - [v1 (match (interp-env e2 r) + [v1 (match (interp-env e2 r ds) ['err 'err] [v2 (interp-prim2 p v1 v2)])])] + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (match (interp-env e3 r ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] [(If p e1 e2) - (match (interp-env p r) + (match (interp-env p r ds) ['err 'err] [v (if v - (interp-env e1 r) - (interp-env e2 r))])] + (interp-env e1 r ds) + (interp-env e2 r ds))])] [(Begin e1 e2) - (match (interp-env e1 r) + (match (interp-env e1 r ds) ['err 'err] - [_ (interp-env e2 r)])] + [_ (interp-env e2 r ds)])] [(Let x e1 e2) - (match (interp-env e1 r) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (λ vs + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err))] + [(App e es) + (match (interp-env e r ds) ['err 'err] - [v (interp-env e2 (ext r x v))])] - [(LetRec bs e) - (letrec ((r* (λ () - (append - (zip (map car bs) - ;; η-expansion to delay evaluating r* - ;; relies on RHSs being functions - (map (λ (l) (λ vs (apply (interp-env l (r*)) vs))) - (map cadr bs))) - r)))) - (interp-env e (r*)))] - [(Lam _ xs e1) - (lambda vs - (if (= (length vs) (length xs)) - (interp-env e1 (append (zip xs vs) r)) - 'err))] - [(App f es) - (match (interp-env* (cons f es) r) - [(list f vs ...) - (if (procedure? f) - (apply f vs) - 'err)])] - [_ 'err])) + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (if (procedure? f) + (apply f vs) + 'err)])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) ;; (Listof Expr) REnv Defns -> (Listof Value) | 'err -(define (interp-env* es r) +(define (interp-env* es r ds) (match es ['() '()] [(cons e es) - (match (interp-env e r) + (match (interp-env e r ds) ['err 'err] - [v (cons v (interp-env* es r))])])) + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) (define (zip xs ys) (match* (xs ys) diff --git a/langs/loot/io.c b/langs/loot/io.c index 8c6b713f..7ef82281 100644 --- a/langs/loot/io.c +++ b/langs/loot/io.c @@ -1,25 +1,25 @@ #include #include #include "types.h" +#include "values.h" #include "runtime.h" -int64_t read_byte(void) { +val_t read_byte(void) +{ char c = getc(in); - return (c == EOF) ? - val_eof : - (int64_t)(c << int_shift); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); } -int64_t peek_byte(void) { +val_t peek_byte(void) +{ char c = getc(in); ungetc(c, in); - return (c == EOF) ? - val_eof : - (int64_t)(c << int_shift); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + } -int64_t write_byte(int64_t c) { - int64_t codepoint = c >> int_shift; - putc((char) codepoint, out); - return 0; +val_t write_byte(val_t c) +{ + putc((char) val_unwrap_int(c), out); + return val_wrap_void(); } diff --git a/langs/loot/label.rkt b/langs/loot/label.rkt deleted file mode 100644 index a64947d0..00000000 --- a/langs/loot/label.rkt +++ /dev/null @@ -1,58 +0,0 @@ -#lang racket -(require "ast.rkt") -(provide (all-defined-out)) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Labelling Lambdas -;; -;; Each lambda in a program needs to have a unique name so that we know what -;; code we need to jump to when that lambda is 'called'. -;; Luckily, `gensym` provides all the functionality that we need here. -;; -;; The flat values are easy: no possibility of there being a lambda, so -;; we just return the unaltered expression. For everything else we traverse -;; down the structure, the only case that actually 'does' anything is -;; for `Lam` -(define (label-λ e) - (match e - [(Prog ds e) (Prog (map label-λ ds) (label-λ e))] - [(Defn f xs e) (Defn f xs (label-λ e))] - [(Prim1 p e) (Prim1 p (label-λ e))] - [(Prim2 p e1 e2) (Prim2 p (label-λ e1) (label-λ e2))] - [(If e1 e2 e3) (If (label-λ e1) (label-λ e2) (label-λ e3))] - [(Begin e1 e2) (Begin (label-λ e1) (label-λ e2))] - [(Let x e1 e2) (Let x (label-λ e1) (label-λ e2))] - [(LetRec bs e1) (LetRec (map (lambda (xs) (map label-λ xs)) bs) (label-λ e1))] - [(Lam n xs e) (Lam (gensym 'lam) xs (label-λ e))] - [(App f es) (App (label-λ f) (map label-λ es))] - [_ e])) - -;; For those that struggle with typing unicode -(define label-lambda label-λ) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Collecting all Lambdas -;; -;; While the lambdas could be _written_ anywhere in the source code, we do need -;; to write the generated target code somewhere reliable. There are a few ways -;; to do this, but we've decided to take the most straightforward route: collect -;; the lambdas and treat them as 'additional' function definitions. -;; -;; In order to do this we'll need a list of all the lambdas in a program. -;; This function traverses our program and collects all the lambdas. -(define (λs e) - (match e - [(Prog ds e) (append (append-map λs ds) (λs e))] - [(Defn f xs e) (λs e)] - [(Prim1 p e) (λs e)] - [(Prim2 p e1 e2) (append (λs e1) (λs e2))] - [(If e1 e2 e3) (append (λs e1) (λs e2) (λs e3))] - [(Begin e1 e2) (append (λs e1) (λs e2))] - [(Let x e1 e2) (append (λs e1) (λs e2))] - [(LetRec bs e1) (append (append-map (lambda (xs) ((compose cdr map) λs xs)) bs) (λs e1))] - [(Lam n xs e1) (cons e (λs e1))] - [(App f es) (append (λs f) (append-map λs es))] - [_ '()])) - -;; For those that struggle with typing unicode -(define lambdas λs) diff --git a/langs/loot/lambdas.rkt b/langs/loot/lambdas.rkt new file mode 100644 index 00000000..0a246408 --- /dev/null +++ b/langs/loot/lambdas.rkt @@ -0,0 +1,35 @@ +#lang racket +(require "ast.rkt") +(provide lambdas) + + +;; Prog -> [Listof Lam] +;; List all of the lambda expressions in p +(define (lambdas p) + (match p + [(Prog ds e) + (append (lambdas-ds ds) (lambdas-e e))])) + +;; Defns -> [Listof Lam] +;; List all of the lambda expressions in ds +(define (lambdas-ds ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (append (lambdas-e e) + (lambdas-ds ds))])) + +;; Expr -> [Listof Lam] +;; List all of the lambda expressions in e +(define (lambdas-e e) + (match e + [(Prim1 p e) (lambdas-e e)] + [(Prim2 p e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Prim3 p e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(If e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(Begin e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Let x e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(App e1 es) (append (lambdas-e e1) (append-map lambdas-e es))] + [(Lam f xs e1) (cons e (lambdas-e e1))] + [(Match e ps es) (append (lambdas-e e) (append-map lambdas-e es))] + [_ '()])) diff --git a/langs/loot/main.c b/langs/loot/main.c index 0c743c30..1ca6115f 100644 --- a/langs/loot/main.c +++ b/langs/loot/main.c @@ -1,83 +1,40 @@ #include -#include #include -#include "types.h" +#include "values.h" +#include "print.h" #include "runtime.h" FILE* in; FILE* out; void (*error_handler)(); -int64_t *heap; +val_t *heap; -void print_result(int64_t); - -void error_exit() { +void error_exit() +{ printf("err\n"); exit(1); } -void raise_error() { +void raise_error() +{ return error_handler(); } -int main(int argc, char** argv) { +int main(int argc, char** argv) +{ in = stdin; out = stdout; error_handler = &error_exit; heap = malloc(8 * heap_size); - int64_t result = entry(heap); - // See if we need to print the initial tick - if (cons_type_tag == (ptr_type_mask & result)) printf("'"); - print_result(result); - if (result != val_void) printf("\n"); - free(heap); - return 0; -} -void print_char(int64_t); -void print_cons(int64_t); + val_t result; -void print_result(int64_t result) { - if (cons_type_tag == (ptr_type_mask & result)) { - printf("("); - print_cons(result); - printf(")"); - } else if (box_type_tag == (ptr_type_mask & result)) { - printf("#&"); - print_result (*((int64_t *)(result ^ box_type_tag))); - } else if (proc_type_tag == (ptr_type_mask & result)) { - printf(""); - } else if (int_type_tag == (int_type_mask & result)) { - printf("%" PRId64, result >> int_shift); - } else if (char_type_tag == (char_type_mask & result)) { - print_char(result); - } else { - switch (result) { - case val_true: - printf("#t"); break; - case val_false: - printf("#f"); break; - case val_eof: - printf("#"); break; - case val_empty: - printf("()"); break; - case val_void: - /* nothing */ break; - } - } -} + result = entry(heap); + + print_result(result); + if (val_typeof(result) != T_VOID) + putchar('\n'); -void print_cons(int64_t a) { - int64_t car = *((int64_t *)((a + 8) ^ cons_type_tag)); - int64_t cdr = *((int64_t *)((a + 0) ^ cons_type_tag)); - print_result(car); - if (cdr == val_empty) { - // nothing - } else if (cons_type_tag == (ptr_type_mask & cdr)) { - printf(" "); - print_cons(cdr); - } else { - printf(" . "); - print_result(cdr); - } + free(heap); + return 0; } diff --git a/langs/iniquity/compile-file.rkt b/langs/loot/parse-file.rkt similarity index 83% rename from langs/iniquity/compile-file.rkt rename to langs/loot/parse-file.rkt index 3593dc7d..a5021320 100644 --- a/langs/iniquity/compile-file.rkt +++ b/langs/loot/parse-file.rkt @@ -9,5 +9,5 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read-all p))))) + (displayln (parse (read-all p))) (close-input-port p)))) diff --git a/langs/loot/parse.rkt b/langs/loot/parse.rkt index 44551ac7..3277428d 100644 --- a/langs/loot/parse.rkt +++ b/langs/loot/parse.rkt @@ -1,65 +1,96 @@ #lang racket -(provide parse parse-e) +(provide parse parse-define parse-e) (require "ast.rkt") -;; S-Expr -> Prog +;; [Listof S-Expr] -> Prog (define (parse s) (match s - [(list 'begin (and ds (list 'define _ _)) ... e) - (Prog (map parse-d ds) (parse-e e))] - [e (Prog '() (parse-e e))])) + [(cons (and (cons 'define _) d) s) + (match (parse s) + [(Prog ds e) + (Prog (cons (parse-define d) ds) e)])] + [(cons e '()) (Prog '() (parse-e e))] + [_ (error "program parse error")])) ;; S-Expr -> Defn -(define (parse-d s) +(define (parse-define s) (match s - [(list 'define (list (? symbol? f) (? symbol? xs) ...) e) - (Defn f xs (parse-e e))] + [(list 'define (list-rest (? symbol? f) xs) e) + (if (andmap symbol? xs) + (Defn f xs (parse-e e)) + (error "parse definition error"))] [_ (error "Parse defn error" s)])) ;; S-Expr -> Expr (define (parse-e s) (match s - [(? integer?) (Int s)] + [(? exact-integer?) (Int s)] [(? boolean?) (Bool s)] [(? char?) (Char s)] + [(? string?) (Str s)] ['eof (Eof)] [(? symbol?) (Var s)] [(list 'quote (list)) (Empty)] [(list (? (op? op0) p0)) (Prim0 p0)] [(list (? (op? op1) p1) e) (Prim1 p1 (parse-e e))] [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse-e e1) (parse-e e2))] + [(list (? (op? op3) p3) e1 e2 e3) + (Prim3 p3 (parse-e e1) (parse-e e2) (parse-e e3))] [(list 'begin e1 e2) (Begin (parse-e e1) (parse-e e2))] [(list 'if e1 e2 e3) (If (parse-e e1) (parse-e e2) (parse-e e3))] [(list 'let (list (list (? symbol? x) e1)) e2) (Let x (parse-e e1) (parse-e e2))] - [(list 'letrec bs e1) - (LetRec (parse-bindings bs) (parse-e e1))] - [(list 'λ (? symbol-list? as) e1) (Lam '() as (parse-e e1))] - [(list 'lambda (? symbol-list? as) e1) (Lam '() as (parse-e e1))] - [(cons f es) - (App (parse-e f) (map parse-e es))] + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] + [(list (or 'lambda 'λ) xs e) + (if (and (list? xs) + (andmap symbol? xs)) + (Lam (gensym 'lambda) xs (parse-e e)) + (error "parse lambda error"))] + [(cons e es) + (App (parse-e e) (map parse-e es))] [_ (error "Parse error" s)])) -(define (parse-bindings bs) - (match bs - ['() '()] - [(cons (list (? symbol? x) e1) rest) - (cons (list x (parse-e e1)) (parse-bindings rest))])) +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])])) -(define (symbol-list? xs) - (match xs - [(list (? symbol?) ...) xs])) +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? exact-integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (PAnd (parse-pat p1) (parse-pat p2))])) (define op0 '(read-byte peek-byte void)) + (define op1 '(add1 sub1 zero? char? write-byte eof-object? - integer->char char->integer box unbox empty? car cdr - string? string-length)) + integer->char char->integer + box unbox empty? cons? box? car cdr + vector? vector-length string? string-length)) (define op2 - '(+ - eq? cons string-ref make-string)) + '(+ - < = cons eq? make-vector vector-ref make-string string-ref)) +(define op3 + '(vector-set!)) (define (op? ops) (λ (x) diff --git a/langs/loot/print.c b/langs/loot/print.c new file mode 100644 index 00000000..6cb5b1b0 --- /dev/null +++ b/langs/loot/print.c @@ -0,0 +1,842 @@ +#include +#include +#include "values.h" + +void print_char(val_char_t); +void print_codepoint(val_char_t); +void print_cons(val_cons_t *); +void print_vect(val_vect_t*); +void print_str(val_str_t*); +void print_str_char(val_char_t); +void print_result_interior(val_t); +int utf8_encode_char(val_char_t, char *); + +void print_result(val_t x) +{ + switch (val_typeof(x)) { + case T_INT: + printf("%" PRId64, val_unwrap_int(x)); + break; + case T_BOOL: + printf(val_unwrap_bool(x) ? "#t" : "#f"); + break; + case T_CHAR: + print_char(val_unwrap_char(x)); + break; + case T_EOF: + printf("#"); + break; + case T_VOID: + break; + case T_EMPTY: + case T_BOX: + case T_CONS: + case T_VECT: + printf("'"); + print_result_interior(x); + break; + case T_STR: + putchar('"'); + print_str(val_unwrap_str(x)); + putchar('"'); + break; + case T_PROC: + printf("#"); + break; + case T_INVALID: + printf("internal error"); + } +} + +void print_result_interior(val_t x) +{ + switch (val_typeof(x)) { + case T_EMPTY: + printf("()"); + break; + case T_BOX: + printf("#&"); + print_result_interior(val_unwrap_box(x)->val); + break; + case T_CONS: + printf("("); + print_cons(val_unwrap_cons(x)); + printf(")"); + break; + case T_VECT: + print_vect(val_unwrap_vect(x)); + break; + default: + print_result(x); + } +} + +void print_vect(val_vect_t *v) +{ + uint64_t i; + + if (!v) { printf("#()"); return; } + + printf("#("); + for (i = 0; i < v->len; ++i) { + print_result_interior(v->elems[i]); + + if (i < v->len - 1) + putchar(' '); + } + printf(")"); +} + +void print_cons(val_cons_t *cons) +{ + print_result_interior(cons->fst); + + switch (val_typeof(cons->snd)) { + case T_EMPTY: + // nothing + break; + case T_CONS: + printf(" "); + print_cons(val_unwrap_cons(cons->snd)); + break; + default: + printf(" . "); + print_result_interior(cons->snd); + break; + } +} + +void print_str(val_str_t* s) +{ + if (!s) return; + uint64_t i; + for (i = 0; i < s->len; ++i) + print_str_char(s->codepoints[i]); +} + +void print_str_char_u(val_char_t c) +{ + printf("\\u%04X", c); +} + +void print_str_char_U(val_char_t c) +{ + printf("\\U%08X", c); +} + +void print_str_char(val_char_t c) +{ + switch (c) { + case 0 ... 6: + print_str_char_u(c); + break; + case 7: + printf("\\a"); + break; + case 8: + printf("\\b"); + break; + case 9: + printf("\\t"); + break; + case 10: + printf("\\n"); + break; + case 11: + printf("\\v"); + break; + case 12: + printf("\\f"); + break; + case 13: + printf("\\r"); + break; + case 14 ... 26: + print_str_char_u(c); + break; + case 27: + printf("\\e"); + break; + case 28 ... 31: + print_str_char_u(c); + break; + case 34: + printf("\\\""); + break; + case 39: + printf("'"); + break; + case 92: + printf("\\\\"); + break; + case 127 ... 159: + case 173 ... 173: + case 888 ... 889: + case 896 ... 899: + case 907 ... 907: + case 909 ... 909: + case 930 ... 930: + case 1328 ... 1328: + case 1367 ... 1368: + case 1376 ... 1376: + case 1416 ... 1416: + case 1419 ... 1420: + case 1424 ... 1424: + case 1480 ... 1487: + case 1515 ... 1519: + case 1525 ... 1541: + case 1564 ... 1565: + case 1757 ... 1757: + case 1806 ... 1807: + case 1867 ... 1868: + case 1970 ... 1983: + case 2043 ... 2047: + case 2094 ... 2095: + case 2111 ... 2111: + case 2140 ... 2141: + case 2143 ... 2207: + case 2227 ... 2275: + case 2436 ... 2436: + case 2445 ... 2446: + case 2449 ... 2450: + case 2473 ... 2473: + case 2481 ... 2481: + case 2483 ... 2485: + case 2490 ... 2491: + case 2501 ... 2502: + case 2505 ... 2506: + case 2511 ... 2518: + case 2520 ... 2523: + case 2526 ... 2526: + case 2532 ... 2533: + case 2556 ... 2560: + case 2564 ... 2564: + case 2571 ... 2574: + case 2577 ... 2578: + case 2601 ... 2601: + case 2609 ... 2609: + case 2612 ... 2612: + case 2615 ... 2615: + case 2618 ... 2619: + case 2621 ... 2621: + case 2627 ... 2630: + case 2633 ... 2634: + case 2638 ... 2640: + case 2642 ... 2648: + case 2653 ... 2653: + case 2655 ... 2661: + case 2678 ... 2688: + case 2692 ... 2692: + case 2702 ... 2702: + case 2706 ... 2706: + case 2729 ... 2729: + case 2737 ... 2737: + case 2740 ... 2740: + case 2746 ... 2747: + case 2758 ... 2758: + case 2762 ... 2762: + case 2766 ... 2767: + case 2769 ... 2783: + case 2788 ... 2789: + case 2802 ... 2816: + case 2820 ... 2820: + case 2829 ... 2830: + case 2833 ... 2834: + case 2857 ... 2857: + case 2865 ... 2865: + case 2868 ... 2868: + case 2874 ... 2875: + case 2885 ... 2886: + case 2889 ... 2890: + case 2894 ... 2901: + case 2904 ... 2907: + case 2910 ... 2910: + case 2916 ... 2917: + case 2936 ... 2945: + case 2948 ... 2948: + case 2955 ... 2957: + case 2961 ... 2961: + case 2966 ... 2968: + case 2971 ... 2971: + case 2973 ... 2973: + case 2976 ... 2978: + case 2981 ... 2983: + case 2987 ... 2989: + case 3002 ... 3005: + case 3011 ... 3013: + case 3017 ... 3017: + case 3022 ... 3023: + case 3025 ... 3030: + case 3032 ... 3045: + case 3067 ... 3071: + case 3076 ... 3076: + case 3085 ... 3085: + case 3089 ... 3089: + case 3113 ... 3113: + case 3130 ... 3132: + case 3141 ... 3141: + case 3145 ... 3145: + case 3150 ... 3156: + case 3159 ... 3159: + case 3162 ... 3167: + case 3172 ... 3173: + case 3184 ... 3191: + case 3200 ... 3200: + case 3204 ... 3204: + case 3213 ... 3213: + case 3217 ... 3217: + case 3241 ... 3241: + case 3252 ... 3252: + case 3258 ... 3259: + case 3269 ... 3269: + case 3273 ... 3273: + case 3278 ... 3284: + case 3287 ... 3293: + case 3295 ... 3295: + case 3300 ... 3301: + case 3312 ... 3312: + case 3315 ... 3328: + case 3332 ... 3332: + case 3341 ... 3341: + case 3345 ... 3345: + case 3387 ... 3388: + case 3397 ... 3397: + case 3401 ... 3401: + case 3407 ... 3414: + case 3416 ... 3423: + case 3428 ... 3429: + case 3446 ... 3448: + case 3456 ... 3457: + case 3460 ... 3460: + case 3479 ... 3481: + case 3506 ... 3506: + case 3516 ... 3516: + case 3518 ... 3519: + case 3527 ... 3529: + case 3531 ... 3534: + case 3541 ... 3541: + case 3543 ... 3543: + case 3552 ... 3557: + case 3568 ... 3569: + case 3573 ... 3584: + case 3643 ... 3646: + case 3676 ... 3712: + case 3715 ... 3715: + case 3717 ... 3718: + case 3721 ... 3721: + case 3723 ... 3724: + case 3726 ... 3731: + case 3736 ... 3736: + case 3744 ... 3744: + case 3748 ... 3748: + case 3750 ... 3750: + case 3752 ... 3753: + case 3756 ... 3756: + case 3770 ... 3770: + case 3774 ... 3775: + case 3781 ... 3781: + case 3783 ... 3783: + case 3790 ... 3791: + case 3802 ... 3803: + case 3808 ... 3839: + case 3912 ... 3912: + case 3949 ... 3952: + case 3992 ... 3992: + case 4029 ... 4029: + case 4045 ... 4045: + case 4059 ... 4095: + case 4294 ... 4294: + case 4296 ... 4300: + case 4302 ... 4303: + case 4681 ... 4681: + case 4686 ... 4687: + case 4695 ... 4695: + case 4697 ... 4697: + case 4702 ... 4703: + case 4745 ... 4745: + case 4750 ... 4751: + case 4785 ... 4785: + case 4790 ... 4791: + case 4799 ... 4799: + case 4801 ... 4801: + case 4806 ... 4807: + case 4823 ... 4823: + case 4881 ... 4881: + case 4886 ... 4887: + case 4955 ... 4956: + case 4989 ... 4991: + case 5018 ... 5023: + case 5109 ... 5119: + case 5789 ... 5791: + case 5881 ... 5887: + case 5901 ... 5901: + case 5909 ... 5919: + case 5943 ... 5951: + case 5972 ... 5983: + case 5997 ... 5997: + case 6001 ... 6001: + case 6004 ... 6015: + case 6110 ... 6111: + case 6122 ... 6127: + case 6138 ... 6143: + case 6158 ... 6159: + case 6170 ... 6175: + case 6264 ... 6271: + case 6315 ... 6319: + case 6390 ... 6399: + case 6431 ... 6431: + case 6444 ... 6447: + case 6460 ... 6463: + case 6465 ... 6467: + case 6510 ... 6511: + case 6517 ... 6527: + case 6572 ... 6575: + case 6602 ... 6607: + case 6619 ... 6621: + case 6684 ... 6685: + case 6751 ... 6751: + case 6781 ... 6782: + case 6794 ... 6799: + case 6810 ... 6815: + case 6830 ... 6831: + case 6847 ... 6911: + case 6988 ... 6991: + case 7037 ... 7039: + case 7156 ... 7163: + case 7224 ... 7226: + case 7242 ... 7244: + case 7296 ... 7359: + case 7368 ... 7375: + case 7415 ... 7415: + case 7418 ... 7423: + case 7670 ... 7675: + case 7958 ... 7959: + case 7966 ... 7967: + case 8006 ... 8007: + case 8014 ... 8015: + case 8024 ... 8024: + case 8026 ... 8026: + case 8028 ... 8028: + case 8030 ... 8030: + case 8062 ... 8063: + case 8117 ... 8117: + case 8133 ... 8133: + case 8148 ... 8149: + case 8156 ... 8156: + case 8176 ... 8177: + case 8181 ... 8181: + case 8191 ... 8191: + case 8203 ... 8207: + case 8232 ... 8238: + case 8288 ... 8303: + case 8306 ... 8307: + case 8335 ... 8335: + case 8349 ... 8351: + case 8382 ... 8399: + case 8433 ... 8447: + case 8586 ... 8591: + case 9211 ... 9215: + case 9255 ... 9279: + case 9291 ... 9311: + case 11124 ... 11125: + case 11158 ... 11159: + case 11194 ... 11196: + case 11209 ... 11209: + case 11218 ... 11263: + case 11311 ... 11311: + case 11359 ... 11359: + case 11508 ... 11512: + case 11558 ... 11558: + case 11560 ... 11564: + case 11566 ... 11567: + case 11624 ... 11630: + case 11633 ... 11646: + case 11671 ... 11679: + case 11687 ... 11687: + case 11695 ... 11695: + case 11703 ... 11703: + case 11711 ... 11711: + case 11719 ... 11719: + case 11727 ... 11727: + case 11735 ... 11735: + case 11743 ... 11743: + case 11843 ... 11903: + case 11930 ... 11930: + case 12020 ... 12031: + case 12246 ... 12271: + case 12284 ... 12287: + case 12352 ... 12352: + case 12439 ... 12440: + case 12544 ... 12548: + case 12590 ... 12592: + case 12687 ... 12687: + case 12731 ... 12735: + case 12772 ... 12783: + case 12831 ... 12831: + case 13055 ... 13055: + case 19894 ... 19903: + case 40909 ... 40959: + case 42125 ... 42127: + case 42183 ... 42191: + case 42540 ... 42559: + case 42654 ... 42654: + case 42744 ... 42751: + case 42895 ... 42895: + case 42926 ... 42927: + case 42930 ... 42998: + case 43052 ... 43055: + case 43066 ... 43071: + case 43128 ... 43135: + case 43205 ... 43213: + case 43226 ... 43231: + case 43260 ... 43263: + case 43348 ... 43358: + case 43389 ... 43391: + case 43470 ... 43470: + case 43482 ... 43485: + case 43519 ... 43519: + case 43575 ... 43583: + case 43598 ... 43599: + case 43610 ... 43611: + case 43715 ... 43738: + case 43767 ... 43776: + case 43783 ... 43784: + case 43791 ... 43792: + case 43799 ... 43807: + case 43815 ... 43815: + case 43823 ... 43823: + case 43872 ... 43875: + case 43878 ... 43967: + case 44014 ... 44015: + case 44026 ... 44031: + case 55204 ... 55215: + case 55239 ... 55242: + case 55292 ... 55295: + case 57344 ... 63743: + case 64110 ... 64111: + case 64218 ... 64255: + case 64263 ... 64274: + case 64280 ... 64284: + case 64311 ... 64311: + case 64317 ... 64317: + case 64319 ... 64319: + case 64322 ... 64322: + case 64325 ... 64325: + case 64450 ... 64466: + case 64832 ... 64847: + case 64912 ... 64913: + case 64968 ... 65007: + case 65022 ... 65023: + case 65050 ... 65055: + case 65070 ... 65071: + case 65107 ... 65107: + case 65127 ... 65127: + case 65132 ... 65135: + case 65141 ... 65141: + case 65277 ... 65280: + case 65471 ... 65473: + case 65480 ... 65481: + case 65488 ... 65489: + case 65496 ... 65497: + case 65501 ... 65503: + case 65511 ... 65511: + case 65519 ... 65531: + case 65534 ... 65535: + print_str_char_u(c); + break; + case 65548 ... 65548: + case 65575 ... 65575: + case 65595 ... 65595: + case 65598 ... 65598: + case 65614 ... 65615: + case 65630 ... 65663: + case 65787 ... 65791: + case 65795 ... 65798: + case 65844 ... 65846: + case 65933 ... 65935: + case 65948 ... 65951: + case 65953 ... 65999: + case 66046 ... 66175: + case 66205 ... 66207: + case 66257 ... 66271: + case 66300 ... 66303: + case 66340 ... 66351: + case 66379 ... 66383: + case 66427 ... 66431: + case 66462 ... 66462: + case 66500 ... 66503: + case 66518 ... 66559: + case 66718 ... 66719: + case 66730 ... 66815: + case 66856 ... 66863: + case 66916 ... 66926: + case 66928 ... 67071: + case 67383 ... 67391: + case 67414 ... 67423: + case 67432 ... 67583: + case 67590 ... 67591: + case 67593 ... 67593: + case 67638 ... 67638: + case 67641 ... 67643: + case 67645 ... 67646: + case 67670 ... 67670: + case 67743 ... 67750: + case 67760 ... 67839: + case 67868 ... 67870: + case 67898 ... 67902: + case 67904 ... 67967: + case 68024 ... 68029: + case 68032 ... 68095: + case 68100 ... 68100: + case 68103 ... 68107: + case 68116 ... 68116: + case 68120 ... 68120: + case 68148 ... 68151: + case 68155 ... 68158: + case 68168 ... 68175: + case 68185 ... 68191: + case 68256 ... 68287: + case 68327 ... 68330: + case 68343 ... 68351: + case 68406 ... 68408: + case 68438 ... 68439: + case 68467 ... 68471: + case 68498 ... 68504: + case 68509 ... 68520: + case 68528 ... 68607: + case 68681 ... 69215: + case 69247 ... 69631: + case 69710 ... 69713: + case 69744 ... 69758: + case 69821 ... 69821: + case 69826 ... 69839: + case 69865 ... 69871: + case 69882 ... 69887: + case 69941 ... 69941: + case 69956 ... 69967: + case 70007 ... 70015: + case 70089 ... 70092: + case 70094 ... 70095: + case 70107 ... 70112: + case 70133 ... 70143: + case 70162 ... 70162: + case 70206 ... 70319: + case 70379 ... 70383: + case 70394 ... 70400: + case 70404 ... 70404: + case 70413 ... 70414: + case 70417 ... 70418: + case 70441 ... 70441: + case 70449 ... 70449: + case 70452 ... 70452: + case 70458 ... 70459: + case 70469 ... 70470: + case 70473 ... 70474: + case 70478 ... 70486: + case 70488 ... 70492: + case 70500 ... 70501: + case 70509 ... 70511: + case 70517 ... 70783: + case 70856 ... 70863: + case 70874 ... 71039: + case 71094 ... 71095: + case 71114 ... 71167: + case 71237 ... 71247: + case 71258 ... 71295: + case 71352 ... 71359: + case 71370 ... 71839: + case 71923 ... 71934: + case 71936 ... 72383: + case 72441 ... 73727: + case 74649 ... 74751: + case 74863 ... 74863: + case 74869 ... 77823: + case 78895 ... 92159: + case 92729 ... 92735: + case 92767 ... 92767: + case 92778 ... 92781: + case 92784 ... 92879: + case 92910 ... 92911: + case 92918 ... 92927: + case 92998 ... 93007: + case 93018 ... 93018: + case 93026 ... 93026: + case 93048 ... 93052: + case 93072 ... 93951: + case 94021 ... 94031: + case 94079 ... 94094: + case 94112 ... 110591: + case 110594 ... 113663: + case 113771 ... 113775: + case 113789 ... 113791: + case 113801 ... 113807: + case 113818 ... 113819: + case 113824 ... 118783: + case 119030 ... 119039: + case 119079 ... 119080: + case 119155 ... 119162: + case 119262 ... 119295: + case 119366 ... 119551: + case 119639 ... 119647: + case 119666 ... 119807: + case 119893 ... 119893: + case 119965 ... 119965: + case 119968 ... 119969: + case 119971 ... 119972: + case 119975 ... 119976: + case 119981 ... 119981: + case 119994 ... 119994: + case 119996 ... 119996: + case 120004 ... 120004: + case 120070 ... 120070: + case 120075 ... 120076: + case 120085 ... 120085: + case 120093 ... 120093: + case 120122 ... 120122: + case 120127 ... 120127: + case 120133 ... 120133: + case 120135 ... 120137: + case 120145 ... 120145: + case 120486 ... 120487: + case 120780 ... 120781: + case 120832 ... 124927: + case 125125 ... 125126: + case 125143 ... 126463: + case 126468 ... 126468: + case 126496 ... 126496: + case 126499 ... 126499: + case 126501 ... 126502: + case 126504 ... 126504: + case 126515 ... 126515: + case 126520 ... 126520: + case 126522 ... 126522: + case 126524 ... 126529: + case 126531 ... 126534: + case 126536 ... 126536: + case 126538 ... 126538: + case 126540 ... 126540: + case 126544 ... 126544: + case 126547 ... 126547: + case 126549 ... 126550: + case 126552 ... 126552: + case 126554 ... 126554: + case 126556 ... 126556: + case 126558 ... 126558: + case 126560 ... 126560: + case 126563 ... 126563: + case 126565 ... 126566: + case 126571 ... 126571: + case 126579 ... 126579: + case 126584 ... 126584: + case 126589 ... 126589: + case 126591 ... 126591: + case 126602 ... 126602: + case 126620 ... 126624: + case 126628 ... 126628: + case 126634 ... 126634: + case 126652 ... 126703: + case 126706 ... 126975: + case 127020 ... 127023: + case 127124 ... 127135: + case 127151 ... 127152: + case 127168 ... 127168: + case 127184 ... 127184: + case 127222 ... 127231: + case 127245 ... 127247: + case 127279 ... 127279: + case 127340 ... 127343: + case 127387 ... 127461: + case 127491 ... 127503: + case 127547 ... 127551: + case 127561 ... 127567: + case 127570 ... 127743: + case 127789 ... 127791: + case 127870 ... 127871: + case 127951 ... 127955: + case 127992 ... 127999: + case 128255 ... 128255: + case 128331 ... 128335: + case 128378 ... 128378: + case 128420 ... 128420: + case 128579 ... 128580: + case 128720 ... 128735: + case 128749 ... 128751: + case 128756 ... 128767: + case 128884 ... 128895: + case 128981 ... 129023: + case 129036 ... 129039: + case 129096 ... 129103: + case 129114 ... 129119: + case 129160 ... 129167: + case 129198 ... 131071: + case 173783 ... 173823: + case 177973 ... 177983: + case 178206 ... 194559: + case 195102 ... 917759: + case 918000 ... 1114110: + print_str_char_U(c); + break; + default: + print_codepoint(c); + break; + } +} + +void print_char(val_char_t c) +{ + printf("#\\"); + switch (c) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(c); + } +} + +void print_codepoint(val_char_t c) +{ + char buffer[5] = {0}; + utf8_encode_char(c, buffer); + printf("%s", buffer); +} + +int utf8_encode_char(val_char_t c, char *buffer) +{ + // Output to buffer using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (c < 128) { + buffer[0] = (char) c; + return 1; + } else if (c < 2048) { + buffer[0] = (char)(c >> 6) | 192; + buffer[1] = ((char) c & 63) | 128; + return 2; + } else if (c < 65536) { + buffer[0] = (char)(c >> 12) | 224; + buffer[1] = ((char)(c >> 6) & 63) | 128; + buffer[2] = ((char) c & 63) | 128; + return 3; + } else { + buffer[0] = (char)(c >> 18) | 240; + buffer[1] = ((char)(c >> 12) & 63) | 128; + buffer[2] = ((char)(c >> 6) & 63) | 128; + buffer[3] = ((char) c & 63) | 128; + return 4; + } +} diff --git a/langs/loot/print.h b/langs/loot/print.h new file mode 100644 index 00000000..c22081a2 --- /dev/null +++ b/langs/loot/print.h @@ -0,0 +1,8 @@ +#ifndef PRINT_H +#define PRINT_H + +#include "values.h" + +void print_result(val_t); + +#endif diff --git a/langs/loot/read-all.rkt b/langs/loot/read-all.rkt new file mode 100644 index 00000000..8a3289a5 --- /dev/null +++ b/langs/loot/read-all.rkt @@ -0,0 +1,8 @@ +#lang racket +(provide read-all) +;; read all s-expression until eof +(define (read-all) + (let ((r (read))) + (if (eof-object? r) + '() + (cons r (read-all))))) diff --git a/langs/loot/run.rkt b/langs/loot/run.rkt new file mode 100644 index 00000000..eaa53eb9 --- /dev/null +++ b/langs/loot/run.rkt @@ -0,0 +1,18 @@ +#lang racket +(provide run run/io) +(require "types.rkt" "build-runtime.rkt" + a86/interp) + +;; Asm -> Answer +(define (run is) + (parameterize ((current-objs (list runtime-path))) + (match (asm-interp is) + ['err 'err] + [b (bits->value b)]))) + +;; Asm String -> (cons Answer String) +(define (run/io is s) + (parameterize ((current-objs (list runtime-path))) + (match (asm-interp/io is s) + [(cons 'err o) (cons 'err o)] + [(cons b o) (cons (bits->value b) o)]))) diff --git a/langs/loot/test/build-runtime.rkt b/langs/loot/test/build-runtime.rkt new file mode 100644 index 00000000..7023ee0b --- /dev/null +++ b/langs/loot/test/build-runtime.rkt @@ -0,0 +1,8 @@ +#lang racket +(require a86/interp) + +;; link with runtime for IO operations +(unless (file-exists? "../runtime.o") + (system "make -C .. runtime.o")) +(current-objs + (list (path->string (normalize-path "../runtime.o")))) diff --git a/langs/loot/test/compile.rkt b/langs/loot/test/compile.rkt index 00666520..ee289de8 100644 --- a/langs/loot/test/compile.rkt +++ b/langs/loot/test/compile.rkt @@ -2,17 +2,7 @@ (require "test-runner.rkt" "../parse.rkt" "../compile.rkt" - "../unload-bits-asm.rkt" - a86/interp) + "../run.rkt") -;; link with runtime for IO operations -(unless (file-exists? "../runtime.o") - (system "make -C .. runtime.o")) -(current-objs - (list (path->string (normalize-path "../runtime.o")))) - -(test-runner (λ (e) (unload/free (asm-interp (compile (parse e)))))) -(test-runner-io (λ (e s) - (match (asm-interp/io (compile (parse e)) s) - ['err 'err] - [(cons r o) (cons (unload/free r) o)]))) +(test-runner (λ p (run (compile (parse p))))) +(test-runner-io (λ (s . p) (run/io (compile (parse p)) s))) diff --git a/langs/loot/test/interp-defun.rkt b/langs/loot/test/interp-defun.rkt new file mode 100644 index 00000000..68ef4191 --- /dev/null +++ b/langs/loot/test/interp-defun.rkt @@ -0,0 +1,24 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../interp-defun.rkt" + "../interp-io.rkt") + +(define (closure->proc xs e r) + ;; Could make this better by calling the interpreter, + ;; but it's only used in tests where all we care about + ;; is that you get a procedure. + (lambda _ + (error "This function is not callable."))) + +(test-runner + (λ p + (match (interp (parse p)) + [(Closure xs e r) (closure->proc xs e r)] + [v v]))) +(test-runner-io + (λ (s . p) + (match (interp/io (parse p) s) + [(cons (Closure xs e r) o) + (cons (closure->proc xs e r) o)] + [r r]))) diff --git a/langs/loot/test/interp.rkt b/langs/loot/test/interp.rkt index 70d041fe..cd7b654e 100644 --- a/langs/loot/test/interp.rkt +++ b/langs/loot/test/interp.rkt @@ -4,5 +4,5 @@ "../interp.rkt" "../interp-io.rkt") -(test-runner (λ (e) (interp (parse e)))) -(test-runner-io (λ (e s) (interp/io (parse e) s))) +(test-runner (λ p (interp (parse p)))) +(test-runner-io (λ (s . p) (interp/io (parse p) s))) diff --git a/langs/loot/test/test-runner.rkt b/langs/loot/test/test-runner.rkt index 4c143796..5789da91 100644 --- a/langs/loot/test/test-runner.rkt +++ b/langs/loot/test/test-runner.rkt @@ -27,14 +27,15 @@ ;; Dupe examples (check-equal? (run #t) #t) (check-equal? (run #f) #f) - (check-equal? (run (if #t 1 2)) 1) - (check-equal? (run (if #f 1 2)) 2) - (check-equal? (run (if 0 1 2)) 1) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) (check-equal? (run '(if #t 3 4)) 3) (check-equal? (run '(if #f 3 4)) 4) (check-equal? (run '(if 0 3 4)) 3) (check-equal? (run '(zero? 4)) #f) (check-equal? (run '(zero? 0)) #t) + ;; Dodger examples (check-equal? (run #\a) #\a) (check-equal? (run #\b) #\b) @@ -43,6 +44,7 @@ (check-equal? (run '(char? 8)) #f) (check-equal? (run '(char->integer #\a)) (char->integer #\a)) (check-equal? (run '(integer->char 955)) #\λ) + ;; Extort examples (check-equal? (run '(add1 #f)) 'err) (check-equal? (run '(sub1 #f)) 'err) @@ -53,6 +55,7 @@ (check-equal? (run '(write-byte #f)) 'err) (check-equal? (run '(write-byte -1)) 'err) (check-equal? (run '(write-byte 256)) 'err) + ;; Fraud examples (check-equal? (run '(let ((x 7)) x)) 7) (check-equal? (run '(let ((x 7)) 2)) 2) @@ -76,9 +79,17 @@ (let ((z (- 4 x))) (+ (+ x x) z)))) 7) + (check-equal? (run '(= 5 5)) #t) + (check-equal? (run '(= 4 5)) #f) + (check-equal? (run '(= (add1 4) 5)) #t) + (check-equal? (run '(< 5 5)) #f) + (check-equal? (run '(< 4 5)) #t) + (check-equal? (run '(< (add1 4) 5)) #f) + ;; Hustle examples (check-equal? (run ''()) '()) (check-equal? (run '(box 1)) (box 1)) + (check-equal? (run '(box -1)) (box -1)) (check-equal? (run '(cons 1 2)) (cons 1 2)) (check-equal? (run '(unbox (box 1))) 1) (check-equal? (run '(car (cons 1 2))) 1) @@ -92,95 +103,253 @@ (let ((y (box 3))) (unbox y)))) 3) - ;; Iniquity tests + (check-equal? (run '(eq? 1 1)) #t) + (check-equal? (run '(eq? 1 2)) #f) + (check-equal? (run '(eq? (cons 1 2) (cons 1 2))) #f) + (check-equal? (run '(let ((x (cons 1 2))) (eq? x x))) #t) + + ;; Hoax examples + (check-equal? (run '(make-vector 0 0)) #()) + (check-equal? (run '(make-vector 1 0)) #(0)) + (check-equal? (run '(make-vector 3 0)) #(0 0 0)) + (check-equal? (run '(make-vector 3 5)) #(5 5 5)) + (check-equal? (run '(vector? (make-vector 0 0))) #t) + (check-equal? (run '(vector? (cons 0 0))) #f) + (check-equal? (run '(vector-ref (make-vector 0 #f) 0)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 0 4) + x))) + #(4 5 5)) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 1 4) + x))) + #(5 4 5)) + (check-equal? (run '(vector-length (make-vector 3 #f))) 3) + (check-equal? (run '(vector-length (make-vector 0 #f))) 0) + (check-equal? (run '"") "") + (check-equal? (run '"fred") "fred") + (check-equal? (run '"wilma") "wilma") + (check-equal? (run '(make-string 0 #\f)) "") + (check-equal? (run '(make-string 3 #\f)) "fff") + (check-equal? (run '(make-string 3 #\g)) "ggg") + (check-equal? (run '(string-length "")) 0) + (check-equal? (run '(string-length "fred")) 4) + (check-equal? (run '(string-ref "" 0)) 'err) + (check-equal? (run '(string-ref (make-string 0 #\a) 0)) 'err) + (check-equal? (run '(string-ref "fred" 0)) #\f) + (check-equal? (run '(string-ref "fred" 1)) #\r) + (check-equal? (run '(string-ref "fred" 2)) #\e) + (check-equal? (run '(string-ref "fred" 4)) 'err) + (check-equal? (run '(string? "fred")) #t) + (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") + + ;; Iniquity tests (check-equal? (run - '(begin (define (f x) x) - (f 5))) + '(define (f x) x) + '(f 5)) 5) + (check-equal? (run - '(begin (define (tri x) - (if (zero? x) - 0 - (+ x (tri (sub1 x))))) - (tri 9))) + '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) + '(tri 9)) 45) -#| + (check-equal? (run - '(begin (define (even? x) - (if (zero? x) - #t - (odd? (sub1 x)))) - (define (odd? x) - (if (zero? x) - #f - (even? (sub1 x)))) - (even? 101))) + '(define (f x) x) + '(define (g x) (f x)) + '(g 5)) + 5) + (check-equal? (run + '(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) + '(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + '(even? 101)) #f) - (check-equal? (run - '(begin (define (map-add1 xs) - (if (empty? xs) - '() - (cons (add1 (car xs)) - (map-add1 (cdr xs))))) - (map-add1 (cons 1 (cons 2 (cons 3 '())))))) - '(2 3 4))|#) + '(define (map-add1 xs) + (if (empty? xs) + '() + (cons (add1 (car xs)) + (map-add1 (cdr xs))))) + '(map-add1 (cons 1 (cons 2 (cons 3 '()))))) + '(2 3 4)) + (check-equal? (run + '(define (f x) + 10) + '(f 1)) + 10) + (check-equal? (run + '(define (f x) + 10) + '(let ((x 2)) (f 1))) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(f 1 2)) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(let ((z 2)) (f 1 2))) + 10) + (check-equal? (run '(define (f x y) y) + '(f 1 (add1 #f))) + 'err) + + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) + + ;; Loot examples + (check-true (procedure? (run '(λ (x) x)))) + (check-equal? (run '((λ (x) x) 5)) + 5) + + (check-equal? (run '(let ((f (λ (x) x))) (f 5))) + 5) + (check-equal? (run '(let ((f (λ (x y) x))) (f 5 7))) + 5) + (check-equal? (run '(let ((f (λ (x y) y))) (f 5 7))) + 7) + (check-equal? (run '((let ((x 1)) + (let ((y 2)) + (lambda (z) (cons x (cons y (cons z '())))))) + 3)) + '(1 2 3)) + (check-equal? (run '(define (adder n) + (λ (x) (+ x n))) + '((adder 5) 10)) + 15) + (check-equal? (run '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36)) + 666) + (check-equal? (run '(define (tri n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))) + '(tri 36)) + 666) + (check-equal? (run '(define (tri n) + (match n + [0 0] + [m (+ m (tri (sub1 m)))])) + '(tri 36)) + 666) + (check-equal? (run '((match 8 [8 (lambda (x) x)]) 12)) + 12)) (define (test-runner-io run) ;; Evildoer examples - (check-equal? (run 7 "") (cons 7 "")) - (check-equal? (run '(write-byte 97) "") (cons (void) "a")) - (check-equal? (run '(read-byte) "a") (cons 97 "")) - (check-equal? (run '(begin (write-byte 97) (read-byte)) "b") + (check-equal? (run "" 7) (cons 7 "")) + (check-equal? (run "" '(write-byte 97)) (cons (void) "a")) + (check-equal? (run "a" '(read-byte)) (cons 97 "")) + (check-equal? (run "b" '(begin (write-byte 97) (read-byte))) (cons 98 "a")) - (check-equal? (run '(read-byte) "") (cons eof "")) - (check-equal? (run '(eof-object? (read-byte)) "") (cons #t "")) - (check-equal? (run '(eof-object? (read-byte)) "a") (cons #f "")) - (check-equal? (run '(begin (write-byte 97) (write-byte 98)) "") + (check-equal? (run "" '(read-byte)) (cons eof "")) + (check-equal? (run "" '(eof-object? (read-byte))) (cons #t "")) + (check-equal? (run "a" '(eof-object? (read-byte))) (cons #f "")) + (check-equal? (run "" '(begin (write-byte 97) (write-byte 98))) (cons (void) "ab")) - (check-equal? (run '(peek-byte) "ab") (cons 97 "")) - (check-equal? (run '(begin (peek-byte) (read-byte)) "ab") (cons 97 "")) + (check-equal? (run "ab" '(peek-byte)) (cons 97 "")) + (check-equal? (run "ab" '(begin (peek-byte) (read-byte))) (cons 97 "")) ;; Extort examples - (check-equal? (run '(write-byte #t) "") (cons 'err "")) + (check-equal? (run "" '(write-byte #t)) (cons 'err "")) ;; Fraud examples - (check-equal? (run '(let ((x 97)) (write-byte x)) "") (cons (void) "a")) - (check-equal? (run '(let ((x 97)) + (check-equal? (run "" '(let ((x 97)) (write-byte x))) (cons (void) "a")) + (check-equal? (run "" + '(let ((x 97)) (begin (write-byte x) - x)) - "") + x))) (cons 97 "a")) - (check-equal? (run '(let ((x 97)) (begin (read-byte) x)) "b") + (check-equal? (run "b" '(let ((x 97)) (begin (read-byte) x))) (cons 97 "")) - (check-equal? (run '(let ((x 97)) (begin (peek-byte) x)) "b") + (check-equal? (run "b" '(let ((x 97)) (begin (peek-byte) x))) (cons 97 "")) ;; Hustle examples - (check-equal? (run '(let ((x 1)) + (check-equal? (run "" + '(let ((x 1)) (begin (write-byte 97) - 1)) - "") + 1))) (cons 1 "a")) - (check-equal? (run '(let ((x 1)) + (check-equal? (run "" + '(let ((x 1)) (let ((y 2)) (begin (write-byte 97) - 1))) - "") + 1)))) (cons 1 "a")) - (check-equal? (run '(let ((x (cons 1 2))) + (check-equal? (run "" + '(let ((x (cons 1 2))) (begin (write-byte 97) - (car x))) - "") + (car x)))) (cons 1 "a")) ;; Iniquity examples - (check-equal? (run '(begin (define (print-alphabet i) - (if (zero? i) - (void) - (begin (write-byte (- 123 i)) - (print-alphabet (sub1 i))))) - (print-alphabet 26)) - "") - (cons (void) "abcdefghijklmnopqrstuvwxyz"))) + #| + (check-equal? (run "" + '(define (print-alphabet i) + (if (zero? i) + (void) + (begin (write-byte (- 123 i)) + (print-alphabet (sub1 i))))) + '(print-alphabet 26)) + (cons (void) "abcdefghijklmnopqrstuvwxyz")) +|#) diff --git a/langs/loot/tri.rkt b/langs/loot/tri.rkt new file mode 100644 index 00000000..c55b3bbb --- /dev/null +++ b/langs/loot/tri.rkt @@ -0,0 +1,10 @@ +#lang racket +(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36) diff --git a/langs/loot/types.h b/langs/loot/types.h index cffe4ea4..90668957 100644 --- a/langs/loot/types.h +++ b/langs/loot/types.h @@ -1,3 +1,6 @@ +#ifndef TYPES_H +#define TYPES_H + /* Bit layout of values @@ -13,18 +16,14 @@ - Eof: #b10 11 000 - Void: #b11 11 000 - Empty: #b100 11 000 - - Pointers are - - Box: end in #b001 - - Cons: end in #b010 - - Proc: end in #b100 */ #define imm_shift 3 #define ptr_type_mask ((1 << imm_shift) - 1) -#define ptr_addr_mask ~ptr_type_mask #define box_type_tag 1 #define cons_type_tag 2 -#define proc_type_tag 4 +#define vect_type_tag 3 +#define str_type_tag 4 +#define proc_type_tag 5 #define int_shift (1 + imm_shift) #define int_type_mask ((1 << int_shift) - 1) #define int_type_tag (0 << (int_shift - 1)) @@ -38,3 +37,5 @@ #define val_eof ((2 << char_shift) | nonchar_type_tag) #define val_void ((3 << char_shift) | nonchar_type_tag) #define val_empty ((4 << char_shift) | nonchar_type_tag) + +#endif diff --git a/langs/loot/types.rkt b/langs/loot/types.rkt index 033f7f57..19b9df2e 100644 --- a/langs/loot/types.rkt +++ b/langs/loot/types.rkt @@ -1,59 +1,97 @@ #lang racket (provide (all-defined-out)) +(require ffi/unsafe) (define imm-shift 3) (define imm-mask #b111) (define ptr-mask #b111) (define type-box #b001) (define type-cons #b010) -(define type-proc #b100) +(define type-vect #b011) +(define type-str #b100) +(define type-proc #b101) (define int-shift (+ 1 imm-shift)) (define char-shift (+ 2 imm-shift)) (define type-int #b0000) (define mask-int #b1111) (define type-char #b01000) (define mask-char #b11111) -(define val-true #b0011000) -(define val-false #b0111000) -(define val-eof #b1011000) -(define val-void #b1111000) -(define val-empty #b10011000) - -(define (bits->imm b) - (cond [(= type-int (bitwise-and b mask-int)) + +(define (bits->value b) + (cond [(= b (value->bits #t)) #t] + [(= b (value->bits #f)) #f] + [(= b (value->bits eof)) eof] + [(= b (value->bits (void))) (void)] + [(= b (value->bits '())) '()] + [(int-bits? b) (arithmetic-shift b (- int-shift))] - [(= type-char (bitwise-and b mask-char)) + [(char-bits? b) (integer->char (arithmetic-shift b (- char-shift)))] - [(= b val-true) #t] - [(= b val-false) #f] - [(= b val-eof) eof] - [(= b val-void) (void)] - [(= b val-empty) '()] + [(box-bits? b) + (box (bits->value (heap-ref b)))] + [(cons-bits? b) + (cons (bits->value (heap-ref (+ b 8))) + (bits->value (heap-ref b)))] + [(vect-bits? b) + (if (zero? (untag b)) + (vector) + (build-vector (heap-ref b) + (lambda (j) + (bits->value (heap-ref (+ b (* 8 (add1 j))))))))] + [(str-bits? b) + (if (zero? (untag b)) + (string) + (build-string (heap-ref b) + (lambda (j) + (char-ref (+ b 8) j))))] + [(proc-bits? b) + (lambda _ + (error "This function is not callable."))] [else (error "invalid bits")])) -(define (imm->bits v) - (cond [(eof-object? v) val-eof] - [(integer? v) (arithmetic-shift v int-shift)] +(define (value->bits v) + (cond [(eq? v #t) #b00011000] + [(eq? v #f) #b00111000] + [(eof-object? v) #b01011000] + [(void? v) #b01111000] + [(empty? v) #b10011000] + [(integer? v) + (arithmetic-shift v int-shift)] [(char? v) (bitwise-ior type-char (arithmetic-shift (char->integer v) char-shift))] - [(eq? v #t) val-true] - [(eq? v #f) val-false] - [(void? v) val-void] - [(empty? v) val-empty])) - + [else (error "not an immediate value")])) (define (imm-bits? v) (zero? (bitwise-and v imm-mask))) (define (int-bits? v) - (zero? (bitwise-and v mask-int))) + (= type-int (bitwise-and v mask-int))) (define (char-bits? v) (= type-char (bitwise-and v mask-char))) (define (cons-bits? v) - (zero? (bitwise-xor (bitwise-and v imm-mask) type-cons))) + (= type-cons (bitwise-and v imm-mask))) (define (box-bits? v) - (zero? (bitwise-xor (bitwise-and v imm-mask) type-box))) + (= type-box (bitwise-and v imm-mask))) + +(define (vect-bits? v) + (= type-vect (bitwise-and v imm-mask))) + +(define (str-bits? v) + (= type-str (bitwise-and v imm-mask))) + +(define (proc-bits? v) + (= type-proc (bitwise-and v imm-mask))) + +(define (untag i) + (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) + (integer-length ptr-mask))) + +(define (heap-ref i) + (ptr-ref (cast (untag i) _int64 _pointer) _int64)) + +(define (char-ref i j) + (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j))) diff --git a/langs/loot/unload-bits-asm.rkt b/langs/loot/unload-bits-asm.rkt deleted file mode 100644 index e2283dc5..00000000 --- a/langs/loot/unload-bits-asm.rkt +++ /dev/null @@ -1,27 +0,0 @@ -#lang racket -(provide unload/free unload-value) -(require "types.rkt" - ffi/unsafe) - -;; Answer* -> Answer -(define (unload/free a) - (match a - ['err 'err] - [(cons h v) (unload-value v)])) - -;; Value* -> Value -(define (unload-value v) - (match v - [(? imm-bits?) (bits->imm v)] - [(? box-bits? i) - (box (unload-value (heap-ref i)))] - [(? cons-bits? i) - (cons (unload-value (heap-ref (+ i (arithmetic-shift 1 imm-shift)))) - (unload-value (heap-ref i)))])) - -(define (untag i) - (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) - (integer-length ptr-mask))) - -(define (heap-ref i) - (ptr-ref (cast (untag i) _int64 _pointer) _uint64)) diff --git a/langs/loot/values.c b/langs/loot/values.c new file mode 100644 index 00000000..c6b27244 --- /dev/null +++ b/langs/loot/values.c @@ -0,0 +1,110 @@ +#include "types.h" +#include "values.h" + +type_t val_typeof(val_t x) +{ + switch (x & ptr_type_mask) { + case box_type_tag: + return T_BOX; + case cons_type_tag: + return T_CONS; + case vect_type_tag: + return T_VECT; + case str_type_tag: + return T_STR; + case proc_type_tag: + return T_PROC; + } + + if ((int_type_mask & x) == int_type_tag) + return T_INT; + if ((char_type_mask & x) == char_type_tag) + return T_CHAR; + + switch (x) { + case val_true: + case val_false: + return T_BOOL; + case val_eof: + return T_EOF; + case val_void: + return T_VOID; + case val_empty: + return T_EMPTY; + } + + return T_INVALID; +} + +int64_t val_unwrap_int(val_t x) +{ + return x >> int_shift; +} +val_t val_wrap_int(int64_t i) +{ + return (i << int_shift) | int_type_tag; +} + +int val_unwrap_bool(val_t x) +{ + return x == val_true; +} +val_t val_wrap_bool(int b) +{ + return b ? val_true : val_false; +} + +val_char_t val_unwrap_char(val_t x) +{ + return (val_char_t)(x >> char_shift); +} +val_t val_wrap_char(val_char_t c) +{ + return (((val_t)c) << char_shift) | char_type_tag; +} + +val_t val_wrap_eof(void) +{ + return val_eof; +} + +val_t val_wrap_void(void) +{ + return val_void; +} + +val_box_t* val_unwrap_box(val_t x) +{ + return (val_box_t *)(x ^ box_type_tag); +} +val_t val_wrap_box(val_box_t* b) +{ + return ((val_t)b) | box_type_tag; +} + +val_cons_t* val_unwrap_cons(val_t x) +{ + return (val_cons_t *)(x ^ cons_type_tag); +} +val_t val_wrap_cons(val_cons_t *c) +{ + return ((val_t)c) | cons_type_tag; +} + +val_vect_t* val_unwrap_vect(val_t x) +{ + return (val_vect_t *)(x ^ vect_type_tag); +} +val_t val_wrap_vect(val_vect_t *v) +{ + return ((val_t)v) | vect_type_tag; +} + +val_str_t* val_unwrap_str(val_t x) +{ + return (val_str_t *)(x ^ str_type_tag); +} +val_t val_wrap_str(val_str_t *v) +{ + return ((val_t)v) | str_type_tag; +} diff --git a/langs/loot/values.h b/langs/loot/values.h new file mode 100644 index 00000000..54fb2039 --- /dev/null +++ b/langs/loot/values.h @@ -0,0 +1,76 @@ +#ifndef VALUES_H +#define VALUES_H + +#include + +/* any abstract value */ +typedef int64_t val_t; + +typedef enum type_t { + T_INVALID = -1, + /* immediates */ + T_INT, + T_BOOL, + T_CHAR, + T_EOF, + T_VOID, + T_EMPTY, + /* pointers */ + T_BOX, + T_CONS, + T_VECT, + T_STR, + T_PROC, +} type_t; + +typedef uint32_t val_char_t; +typedef struct val_box_t { + val_t val; +} val_box_t; +typedef struct val_cons_t { + val_t snd; + val_t fst; +} val_cons_t; +typedef struct val_vect_t { + uint64_t len; + val_t elems[]; +} val_vect_t; +typedef struct val_str_t { + uint64_t len; + val_char_t codepoints[]; +} val_str_t; + +/* return the type of x */ +type_t val_typeof(val_t x); + +/** + * Wrap/unwrap values + * + * The behavior of unwrap functions are undefined on type mismatch. + */ +int64_t val_unwrap_int(val_t x); +val_t val_wrap_int(int64_t i); + +int val_unwrap_bool(val_t x); +val_t val_wrap_bool(int b); + +val_char_t val_unwrap_char(val_t x); +val_t val_wrap_char(val_char_t b); + +val_t val_wrap_eof(); + +val_t val_wrap_void(); + +val_box_t* val_unwrap_box(val_t x); +val_t val_wrap_box(val_box_t* b); + +val_cons_t* val_unwrap_cons(val_t x); +val_t val_wrap_cons(val_cons_t* c); + +val_vect_t* val_unwrap_vect(val_t x); +val_t val_wrap_vect(val_vect_t* c); + +val_str_t* val_unwrap_str(val_t x); +val_t val_wrap_str(val_str_t* c); + +#endif diff --git a/langs/jig-playground/Makefile b/langs/modules-in-progress/Makefile similarity index 73% rename from langs/jig-playground/Makefile rename to langs/modules-in-progress/Makefile index 0370d4c5..bd832496 100644 --- a/langs/jig-playground/Makefile +++ b/langs/modules-in-progress/Makefile @@ -3,6 +3,8 @@ UNAME := $(shell uname) .SECONDARY: $(wild *.make) +BUILD_DIR := . + ifeq ($(UNAME), Darwin) format=macho64 else @@ -31,12 +33,14 @@ runtime.o: $(objs) racket -t compile-file.rkt -m $< > $@ %.make: %.rkt + echo "MAKING" racket -t build.rkt -m $< > $@ %.run: %.make make $@ clean: - -rm *.o *.s *.run *.make + @$(RM) *.o *.s *.run $(BUILD_DIR)/*.make ||: + @echo "$(shell basename $(shell pwd)): cleaned!" --include $(wildcard *.make) +-include $(wildcard $(BUILD_DIR)/*.make) diff --git a/langs/jig-playground/README.md b/langs/modules-in-progress/README.md similarity index 95% rename from langs/jig-playground/README.md rename to langs/modules-in-progress/README.md index cc6adbd5..5210cbcf 100644 --- a/langs/jig-playground/README.md +++ b/langs/modules-in-progress/README.md @@ -1,6 +1,13 @@ Standard Library and Modules ============================ +This is a work in progress on a uniform approach to a standard library +and modules. It's mostly OK, but needs work on the file system +aspects and the Makefile is a little brittle. Not sure it fits in +with the `test-progs` approach to testing either. + + + Standard Library ---------------- diff --git a/langs/jig-playground/ast.rkt b/langs/modules-in-progress/ast.rkt similarity index 100% rename from langs/jig-playground/ast.rkt rename to langs/modules-in-progress/ast.rkt diff --git a/langs/jig-playground/build.rkt b/langs/modules-in-progress/build.rkt similarity index 100% rename from langs/jig-playground/build.rkt rename to langs/modules-in-progress/build.rkt diff --git a/langs/jig-playground/compile-file.rkt b/langs/modules-in-progress/compile-file.rkt similarity index 79% rename from langs/jig-playground/compile-file.rkt rename to langs/modules-in-progress/compile-file.rkt index 7550bddb..c786fb19 100644 --- a/langs/jig-playground/compile-file.rkt +++ b/langs/modules-in-progress/compile-file.rkt @@ -7,3 +7,4 @@ ;; emit asm code on stdout (define (main fn) (displayln (asm-string (compile-module (parse-module-file fn))))) +;(parse-module-file (simplify-path (path->complete-path fn)))) diff --git a/langs/jig/compile-ops.rkt b/langs/modules-in-progress/compile-ops.rkt similarity index 71% rename from langs/jig/compile-ops.rkt rename to langs/modules-in-progress/compile-ops.rkt index c685d9e2..bd96e16d 100644 --- a/langs/jig/compile-ops.rkt +++ b/langs/modules-in-progress/compile-ops.rkt @@ -9,49 +9,50 @@ (define r8 'r8) ; scratch (define r9 'r9) ; scratch (define r10 'r10) ; scratch +(define r15 'r15) ; stack pad (non-volatile) (define rsp 'rsp) ; stack -;; Op0 CEnv -> Asm -(define (compile-op0 p c) +;; Op0 -> Asm +(define (compile-op0 p) (match p ['void (seq (Mov rax val-void))] - ['read-byte (seq (pad-stack c) + ['read-byte (seq pad-stack (Call 'read_byte) - (unpad-stack c))] - ['peek-byte (seq (pad-stack c) + unpad-stack)] + ['peek-byte (seq pad-stack (Call 'peek_byte) - (unpad-stack c))])) + unpad-stack)])) -;; Op1 CEnv -> Asm -(define (compile-op1 p c) +;; Op1 -> Asm +(define (compile-op1 p) (match p ['add1 - (seq (assert-integer rax c) + (seq (assert-integer rax) (Add rax (imm->bits 1)))] ['sub1 - (seq (assert-integer rax c) + (seq (assert-integer rax) (Sub rax (imm->bits 1)))] ['zero? - (seq (assert-integer rax c) + (seq (assert-integer rax) (eq-imm 0))] ['char? (type-pred mask-char type-char)] ['char->integer - (seq (assert-char rax c) + (seq (assert-char rax) (Sar rax char-shift) (Sal rax int-shift))] ['integer->char - (seq (assert-codepoint c) + (seq (assert-codepoint rax) (Sar rax int-shift) (Sal rax char-shift) (Xor rax type-char))] ['eof-object? (eq-imm eof)] ['write-byte - (seq (assert-byte c) - (pad-stack c) + (seq (assert-byte rax) + pad-stack (Mov rdi rax) (Call 'write_byte) - (unpad-stack c) + unpad-stack (Mov rax val-void))] ['box (seq (Mov (Offset rbx 0) rax) @@ -59,15 +60,15 @@ (Or rax type-box) (Add rbx 8))] ['unbox - (seq (assert-box rax c) + (seq (assert-box rax) (Xor rax type-box) (Mov rax (Offset rax 0)))] ['car - (seq (assert-cons rax c) + (seq (assert-cons rax) (Xor rax type-cons) (Mov rax (Offset rax 8)))] ['cdr - (seq (assert-cons rax c) + (seq (assert-cons rax) (Xor rax type-cons) (Mov rax (Offset rax 0)))] ['empty? (eq-imm '())] @@ -82,7 +83,7 @@ ['vector-length (let ((zero (gensym)) (done (gensym))) - (seq (assert-vector rax c) + (seq (assert-vector rax) (Xor rax type-vect) (Cmp rax 0) (Je zero) @@ -95,7 +96,7 @@ ['string-length (let ((zero (gensym)) (done (gensym))) - (seq (assert-string rax c) + (seq (assert-string rax) (Xor rax type-str) (Cmp rax 0) (Je zero) @@ -106,24 +107,24 @@ (Mov rax 0) (Label done)))])) -;; Op2 CEnv -> Asm -(define (compile-op2 p c) +;; Op2 -> Asm +(define (compile-op2 p) (match p ['+ (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Add rax r8))] ['- (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Sub r8 rax) (Mov rax r8))] ['< (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Cmp r8 rax) (Mov rax val-true) (let ((true (gensym))) @@ -132,8 +133,8 @@ (Label true))))] ['= (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Cmp r8 rax) (Mov rax val-true) (let ((true (gensym))) @@ -152,7 +153,7 @@ (done (gensym)) (empty (gensym))) (seq (Pop r8) - (assert-natural r8 c) + (assert-natural r8) (Cmp r8 0) ; special case empty vector (Je empty) @@ -179,16 +180,16 @@ ['vector-ref (seq (Pop r8) - (assert-vector r8 c) - (assert-integer rax c) + (assert-vector r8) + (assert-integer rax) (Cmp rax 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-vect) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar rax int-shift) ; rax = index (Sub r9 1) (Cmp r9 rax) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal rax 3) (Add r8 rax) (Mov rax (Offset r8 8)))] @@ -198,8 +199,8 @@ (done (gensym)) (empty (gensym))) (seq (Pop r8) - (assert-natural r8 c) - (assert-char rax c) + (assert-natural r8) + (assert-char rax) (Cmp r8 0) ; special case empty string (Je empty) @@ -232,38 +233,38 @@ ['string-ref (seq (Pop r8) - (assert-string r8 c) - (assert-integer rax c) + (assert-string r8) + (assert-integer rax) (Cmp rax 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-str) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar rax int-shift) ; rax = index (Sub r9 1) (Cmp r9 rax) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal rax 2) (Add r8 rax) (Mov 'eax (Offset r8 8)) (Sal rax char-shift) (Or rax type-char))])) -;; Op3 CEnv -> Asm -(define (compile-op3 p c) +;; Op3 -> Asm +(define (compile-op3 p) (match p ['vector-set! (seq (Pop r10) (Pop r8) - (assert-vector r8 c) - (assert-integer r10 c) + (assert-vector r8) + (assert-integer r10) (Cmp r10 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-vect) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar r10 int-shift) ; r10 = index (Sub r9 1) (Cmp r9 r10) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal r10 3) (Add r8 r10) (Mov (Offset r8 8) rax) @@ -273,11 +274,11 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (assert-type mask type) - (λ (arg c) + (λ (arg) (seq (Mov r9 arg) (And r9 mask) (Cmp r9 type) - (Jne (error-label c))))) + (Jne 'raise_error_align)))) (define (type-pred mask type) (let ((l (gensym))) @@ -301,31 +302,31 @@ (define assert-string (assert-type ptr-mask type-str)) -(define (assert-codepoint c) +(define (assert-codepoint r) (let ((ok (gensym))) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 1114111)) - (Jg (error-label c)) - (Cmp rax (imm->bits 55295)) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align) + (Cmp r (imm->bits 1114111)) + (Jg 'raise_error_align) + (Cmp r (imm->bits 55295)) (Jl ok) - (Cmp rax (imm->bits 57344)) + (Cmp r (imm->bits 57344)) (Jg ok) - (Jmp (error-label c)) + (Jmp 'raise_error_align) (Label ok)))) -(define (assert-byte c) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 255)) - (Jg (error-label c)))) +(define (assert-byte r) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align) + (Cmp r (imm->bits 255)) + (Jg 'raise_error_align))) -(define (assert-natural r c) - (seq (assert-integer r c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)))) +(define (assert-natural r) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align))) ;; Value -> Asm (define (eq-imm imm) @@ -336,23 +337,14 @@ (Mov rax val-false) (Label l1)))) -;; CEnv -> Asm -;; Pad the stack to be aligned for a call -(define (pad-stack c) - (match (odd? (length c)) - [#t (seq (Sub rsp 8))] - [#f (seq)])) +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) -;; CEnv -> Asm +;; Asm ;; Undo the stack alignment after a call -(define (unpad-stack c) - (match (odd? (length c)) - [#t (seq (Add rsp 8))] - [#f (seq)])) - -;; CEnv -> Label -;; Determine correct error handler label to jump to. -(define (error-label c) - (match (odd? (length c)) - [#t 'raise_error] - [#f 'raise_error_align])) +(define unpad-stack + (seq (Add rsp r15))) diff --git a/langs/jig-playground/compile.rkt b/langs/modules-in-progress/compile.rkt similarity index 88% rename from langs/jig-playground/compile.rkt rename to langs/modules-in-progress/compile.rkt index fab064d4..2d2e778b 100644 --- a/langs/jig-playground/compile.rkt +++ b/langs/modules-in-progress/compile.rkt @@ -20,8 +20,10 @@ (compile-main ds) (compile-defines ds) (Label 'raise_error_align) - (Sub rsp 8) - (Jmp 'raise_error))])) + pad-stack + (Call 'raise_error))])) + +(define compile compile-module) ;; Module -> Module ;; Remove anything from requires that is provided by this module @@ -49,10 +51,10 @@ [(cons _ ds) (main? ds)])) (define (compile-provides ps) - (map (lambda (p) (Global (symbol->label p))) ps)) + (map (λ (p) (Global (symbol->label p))) ps)) (define (compile-requires rs) - (map (lambda (r) (Extern (symbol->label r))) rs)) + (map (λ (r) (Extern (symbol->label r))) rs)) #| ;; Prog -> Asm @@ -154,19 +156,19 @@ ;; Op0 CEnv -> Asm (define (compile-prim0 p c) - (compile-op0 p c)) + (compile-op0 p)) ;; Op1 Expr CEnv -> Asm (define (compile-prim1 p e c) (seq (compile-e e c #f) - (compile-op1 p c))) + (compile-op1 p))) ;; Op2 Expr Expr CEnv -> Asm (define (compile-prim2 p e1 e2 c) (seq (compile-e e1 c #f) (Push rax) (compile-e e2 (cons #f c) #f) - (compile-op2 p c))) + (compile-op2 p))) ;; Op3 Expr Expr Expr CEnv -> Asm (define (compile-prim3 p e1 e2 e3 c) @@ -174,8 +176,8 @@ (Push rax) (compile-e e2 (cons #f c) #f) (Push rax) - (compile-e e3 (cons #f (cons #f c))) - (compile-op3 p c))) + (compile-e e3 (cons #f (cons #f c)) #f) + (compile-op3 p))) ;; Expr Expr Expr CEnv Bool -> Asm (define (compile-if e1 e2 e3 c t?) @@ -211,15 +213,14 @@ ;; Id [Listof Expr] CEnv -> Asm (define (compile-app-tail f es c) (seq (compile-es es c) - (if (zero? (length c)) - (seq) - (move-args (length es) (length c))) + (move-args (length es) (length c)) (Add rsp (* 8 (length c))) (Jmp (symbol->label f)))) ;; Integer Integer -> Asm (define (move-args i off) - (cond [(zero? i) (seq)] + (cond [(zero? off) (seq)] + [(zero? i) (seq)] [else (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) @@ -229,20 +230,12 @@ ;; The return address is placed above the arguments, so callee pops ;; arguments and return address is next frame (define (compile-app-nontail f es c) - (let ((ret (gensym 'ret))) - (if (odd? (length c)) - (seq (Lea r8 ret) - (Push r8) - (compile-es es (cons #f c)) - (Jmp (symbol->label f)) - (Label ret)) - (seq (Sub rsp 8) - (Lea r8 ret) - (Push r8) - (compile-es es (cons #f (cons #f c))) - (Jmp (symbol->label f)) - (Label ret) - (Add rsp 8))))) + (let ((r (gensym 'ret))) + (seq (Lea rax r) + (Push rax) + (compile-es es (cons #f c)) + (Jmp (symbol->label f)) + (Label r)))) ;; [Listof Expr] CEnv -> Asm (define (compile-es es c) diff --git a/langs/jig-playground/io.c b/langs/modules-in-progress/io.c similarity index 100% rename from langs/jig-playground/io.c rename to langs/modules-in-progress/io.c diff --git a/langs/hoodwink/main.c b/langs/modules-in-progress/main.c similarity index 100% rename from langs/hoodwink/main.c rename to langs/modules-in-progress/main.c diff --git a/langs/jig-playground/parse.rkt b/langs/modules-in-progress/parse.rkt similarity index 84% rename from langs/jig-playground/parse.rkt rename to langs/modules-in-progress/parse.rkt index 17cf36e3..d4f931e8 100644 --- a/langs/jig-playground/parse.rkt +++ b/langs/modules-in-progress/parse.rkt @@ -1,12 +1,15 @@ #lang racket -(provide parse-e parse-define parse-module parse-module-file) +(provide parse parse-e parse-define parse-module parse-module-file) (require "ast.rkt") +;; Need to pass around the module's file name in order to resolve +;; paths in requires. + ;; String -> Module (define (parse-module-file fn) (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (begin0 (parse-module (read-all p)) + (begin0 (parse-module (read-all p) fn) (close-input-port p))))) ;; Port -> SExpr @@ -16,25 +19,27 @@ '() (cons r (read-all p))))) -;; S-Expr -> Module -(define (parse-module m) - (match (parse-module* m) +;; S-Expr Path -> Module +(define (parse-module m p) + (match (parse-module* m p) [(list ps rs ds #f) (Module ps rs ds)] [(list ps rs ds e) (Module (cons 'main ps) rs (cons (Defn 'main '() e) ds))])) -(define (parse-module* m) +(define parse (lambda (m) (parse-module m ""))) + +(define (parse-module* m p) (match m ['() (list '() '() '() #f)] [(cons x m) - (match (parse-module* m) + (match (parse-module* m p) [(list ps rs ds e) (match x [(cons 'provide _) (list (append (parse-provide x) ps) rs ds e)] [(cons 'require _) - (list ps (append (parse-require x) rs) ds e)] + (list ps (append (parse-require x p) rs) ds e)] [(cons 'define _) (list ps rs (cons (parse-define x) ds) e)] [_ @@ -100,7 +105,7 @@ box unbox empty? cons? box? car cdr vector? vector-length string? string-length)) (define op2 - '(+ - cons make-vector vector-ref make-string string-ref)) + '(+ - < = cons make-vector vector-ref make-string string-ref)) (define op3 '(vector-set!)) diff --git a/langs/jig-playground/print.c b/langs/modules-in-progress/print.c similarity index 100% rename from langs/jig-playground/print.c rename to langs/modules-in-progress/print.c diff --git a/langs/modules-in-progress/print.h b/langs/modules-in-progress/print.h new file mode 100644 index 00000000..c22081a2 --- /dev/null +++ b/langs/modules-in-progress/print.h @@ -0,0 +1,8 @@ +#ifndef PRINT_H +#define PRINT_H + +#include "values.h" + +void print_result(val_t); + +#endif diff --git a/langs/jig-playground/runtime.h b/langs/modules-in-progress/runtime.h similarity index 100% rename from langs/jig-playground/runtime.h rename to langs/modules-in-progress/runtime.h diff --git a/langs/modules-in-progress/stdlib-provides.rkt b/langs/modules-in-progress/stdlib-provides.rkt new file mode 100644 index 00000000..45671c5c --- /dev/null +++ b/langs/modules-in-progress/stdlib-provides.rkt @@ -0,0 +1,9 @@ +#lang racket +(provide stdlib-provides) +(require "parse.rkt" "ast.rkt" racket/runtime-path) + +(define-runtime-path stdlib "stdlib.rkt") + +(define stdlib-provides + (Module-ps (parse-module-file stdlib))) + diff --git a/langs/jig-playground/stdlib.rkt b/langs/modules-in-progress/stdlib.rkt similarity index 100% rename from langs/jig-playground/stdlib.rkt rename to langs/modules-in-progress/stdlib.rkt diff --git a/langs/modules-in-progress/test/build-runtime.rkt b/langs/modules-in-progress/test/build-runtime.rkt new file mode 100644 index 00000000..7023ee0b --- /dev/null +++ b/langs/modules-in-progress/test/build-runtime.rkt @@ -0,0 +1,8 @@ +#lang racket +(require a86/interp) + +;; link with runtime for IO operations +(unless (file-exists? "../runtime.o") + (system "make -C .. runtime.o")) +(current-objs + (list (path->string (normalize-path "../runtime.o")))) diff --git a/langs/jig/test/test-runner.rkt b/langs/modules-in-progress/test/test-runner.rkt similarity index 100% rename from langs/jig/test/test-runner.rkt rename to langs/modules-in-progress/test/test-runner.rkt diff --git a/langs/hoodwink/types.h b/langs/modules-in-progress/types.h similarity index 97% rename from langs/hoodwink/types.h rename to langs/modules-in-progress/types.h index e9a5025b..b79f45b2 100644 --- a/langs/hoodwink/types.h +++ b/langs/modules-in-progress/types.h @@ -23,7 +23,6 @@ #define cons_type_tag 2 #define vect_type_tag 3 #define str_type_tag 4 -#define symb_type_tag 5 #define int_shift (1 + imm_shift) #define int_type_mask ((1 << int_shift) - 1) #define int_type_tag (0 << (int_shift - 1)) diff --git a/langs/iniquity/types.rkt b/langs/modules-in-progress/types.rkt similarity index 100% rename from langs/iniquity/types.rkt rename to langs/modules-in-progress/types.rkt diff --git a/langs/hoax/unload-bits-asm.rkt b/langs/modules-in-progress/unload-bits-asm.rkt similarity index 100% rename from langs/hoax/unload-bits-asm.rkt rename to langs/modules-in-progress/unload-bits-asm.rkt diff --git a/langs/jig-playground/values.c b/langs/modules-in-progress/values.c similarity index 100% rename from langs/jig-playground/values.c rename to langs/modules-in-progress/values.c diff --git a/langs/jig-playground/values.h b/langs/modules-in-progress/values.h similarity index 100% rename from langs/jig-playground/values.h rename to langs/modules-in-progress/values.h diff --git a/langs/mountebank/Makefile b/langs/mountebank/Makefile new file mode 100644 index 00000000..ed8a85f4 --- /dev/null +++ b/langs/mountebank/Makefile @@ -0,0 +1,45 @@ +UNAME := $(shell uname) + +ifeq ($(UNAME), Darwin) + format=macho64 + CC=arch -x86_64 gcc +else + format=elf64 + CC=gcc +endif + +objs = \ + main.o \ + print.o \ + values.o \ + io.o \ + symbol.o + +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* + +runtime.o: $(objs) + ld -r $(objs) -o runtime.o + +%.run: %.o runtime.o + $(CC) runtime.o $< -o $@ + +.c.o: + $(CC) -fPIC -c -g -o $@ $< + +.s.o: + nasm -g -f $(format) -o $@ $< + +%.s: %.rkt + cat $< | racket -t compile-stdin.rkt -m > $@ + +clean: + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" + +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/mountebank/ast.rkt b/langs/mountebank/ast.rkt new file mode 100644 index 00000000..a9467599 --- /dev/null +++ b/langs/mountebank/ast.rkt @@ -0,0 +1,81 @@ +#lang racket +(provide (all-defined-out)) + +;; type Prog = (Prog (Listof Defn) Expr) +(struct Prog (ds e) #:prefab) + +;; type Defn = (Defn Id (Listof Id) Expr) +(struct Defn (f xs e) #:prefab) + +;; type Expr = (Eof) +;; | (Quote Datum) +;; | (Prim0 Op0) +;; | (Prim1 Op1 Expr) +;; | (Prim2 Op2 Expr Expr) +;; | (Prim3 Op3 Expr Expr Expr) +;; | (If Expr Expr Expr) +;; | (Begin Expr Expr) +;; | (Let Id Expr Expr) +;; | (Var Id) +;; | (Match Expr (Listof Pat) (Listof Expr)) +;; | (App Expr (Listof Expr)) +;; | (Lam Id (Listof Id) Expr) +;; type Datum = Integer +;; | Char +;; | Boolean +;; | String +;; | Symbol +;; | (Boxof Datum) +;; | (Listof Datum) +;; | (Vectorof Datum) +;; type Id = Symbol +;; type Op0 = 'read-byte +;; type Op1 = 'add1 | 'sub1 | 'zero? +;; | 'char? | 'integer->char | 'char->integer +;; | 'write-byte | 'eof-object? +;; | 'box | 'car | 'cdr | 'unbox +;; | 'empty? | 'cons? | 'box? +;; | 'vector? | 'vector-length +;; | 'string? | 'string-length +;; | 'symbol? | 'string->symbol +;; | 'string->symbol | 'string->uninterned-symbol +;; type Op2 = '+ | '- | '< | '= +;; | 'cons | 'eq? +;; | 'make-vector | 'vector-ref +;; | 'make-string | 'string-ref +;; type Op3 = 'vector-set! +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; | (PSymb Symbol) +;; | (PStr String) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() + +(struct Eof () #:prefab) +(struct Prim0 (p) #:prefab) +(struct Prim1 (p e) #:prefab) +(struct Prim2 (p e1 e2) #:prefab) +(struct Prim3 (p e1 e2 e3) #:prefab) +(struct If (e1 e2 e3) #:prefab) +(struct Begin (e1 e2) #:prefab) +(struct Let (x e1 e2) #:prefab) +(struct Var (x) #:prefab) +(struct App (e es) #:prefab) +(struct Lam (f xs e) #:prefab) +(struct Quote (d) #:prefab) +(struct Match (e ps es) #:prefab) + +(struct PVar (x) #:prefab) +(struct PWild () #:prefab) +(struct PLit (x) #:prefab) +(struct PBox (p) #:prefab) +(struct PCons (p1 p2) #:prefab) +(struct PAnd (p1 p2) #:prefab) +(struct PSymb (s) #:prefab) +(struct PStr (s) #:prefab) diff --git a/langs/mountebank/build-runtime.rkt b/langs/mountebank/build-runtime.rkt new file mode 100644 index 00000000..66aad89f --- /dev/null +++ b/langs/mountebank/build-runtime.rkt @@ -0,0 +1,14 @@ +#lang racket +(require racket/runtime-path) +(provide runtime-path) + +(define-runtime-path here ".") + +(void + (system (string-append "make -C '" + (path->string (normalize-path here)) + "' runtime.o"))) + +(define runtime-path + (path->string + (normalize-path (build-path here "runtime.o")))) diff --git a/langs/mountebank/char.c b/langs/mountebank/char.c new file mode 100644 index 00000000..d11f16e0 --- /dev/null +++ b/langs/mountebank/char.c @@ -0,0 +1,57 @@ +#include +#include +#include "types.h" + +void print_codepoint(int64_t); + +void print_char (int64_t v) { + int64_t codepoint = v >> char_shift; + printf("#\\"); + switch (codepoint) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(v); + } +} + +void print_codepoint(int64_t v) { + int64_t codepoint = v >> char_shift; + // Print using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (codepoint < 128) { + printf("%c", (char) codepoint); + } else if (codepoint < 2048) { + printf("%c%c", + (char)(codepoint >> 6) | 192, + ((char)codepoint & 63) | 128); + } else if (codepoint < 65536) { + printf("%c%c%c", + (char)(codepoint >> 12) | 224, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } else { + printf("%c%c%c%c", + (char)(codepoint >> 18) | 240, + ((char)(codepoint >> 12) & 63) | 128, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } +} + diff --git a/langs/mountebank/compile-datum.rkt b/langs/mountebank/compile-datum.rkt new file mode 100644 index 00000000..90f8170f --- /dev/null +++ b/langs/mountebank/compile-datum.rkt @@ -0,0 +1,88 @@ +#lang racket +(provide compile-datum) +(require "types.rkt" + "utils.rkt" + a86/ast) + +;; Registers used +(define rax 'rax) ; return + +;; Datum -> Asm +(define (compile-datum d) + (cond + [(string? d) (seq (Lea rax (load-string d)))] + [(symbol? d) (seq (Lea rax (load-symbol d)))] + [(compound? d) (compile-compound-datum d)] + [else (compile-atom d)])) + +(define (load-symbol s) + (Plus (symbol->data-label s) type-symb)) + +(define (load-string s) + (Plus (symbol->data-label (string->symbol s)) type-str)) + +;; Value -> Asm +(define (compile-atom v) + (seq (Mov rax (value->bits v)))) + +;; Datum -> Boolean +(define (compound? d) + (or (box? d) + (cons? d) + (vector? d))) + +;; Datum -> Asm +(define (compile-compound-datum d) + (match (compile-quoted d) + [(cons l is) + (seq (Data) + is + (Text) + (Lea rax l))])) + +;; Datum -> (cons AsmExpr Asm) +(define (compile-quoted c) + (cond + [(vector? c) (compile-datum-vector (vector->list c))] + [(box? c) (compile-datum-box (unbox c))] + [(cons? c) (compile-datum-cons (car c) (cdr c))] + [(symbol? c) (cons (load-symbol c) '())] + [(string? c) (cons (load-string c) '())] + [else (cons (value->bits c) '())])) + +;; Datum -> (cons AsmExpr Asm) +(define (compile-datum-box c) + (match (compile-quoted c) + [(cons l1 is1) + (let ((l (gensym 'box))) + (cons (Plus l type-box) + (seq (Label l) + (Dq l1) + is1)))])) + +;; Datum Datum -> (cons AsmExpr Asm) +(define (compile-datum-cons c1 c2) + (match (compile-quoted c1) + [(cons l1 is1) + (match (compile-quoted c2) + [(cons l2 is2) + (let ((l (gensym 'cons))) + (cons (Plus l type-cons) + (seq (Label l) + (Dq l2) + (Dq l1) + is1 + is2)))])])) + +;; [Listof Datum] -> (cons AsmExpr Asm) +(define (compile-datum-vector ds) + (match ds + ['() (cons type-vect '())] + [_ + (let ((l (gensym 'vector)) + (cds (map compile-quoted ds))) + (cons (Plus l type-vect) + (seq (Label l) + (Dq (length ds)) + (map (λ (cd) (Dq (car cd))) cds) + (append-map cdr cds))))])) diff --git a/langs/mountebank/compile-define.rkt b/langs/mountebank/compile-define.rkt new file mode 100644 index 00000000..354e6f26 --- /dev/null +++ b/langs/mountebank/compile-define.rkt @@ -0,0 +1,69 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "fv.rkt" + "utils.rkt" + "compile-expr.rkt" + a86/ast) + +;; [Listof Defn] -> [Listof Id] +(define (define-ids ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (cons f (define-ids ds))])) + +;; [Listof Defn] -> Asm +(define (compile-defines ds) + (match ds + ['() (seq)] + [(cons d ds) + (seq (compile-define d) + (compile-defines ds))])) + +;; Defn -> Asm +(define (compile-define d) + (match d + [(Defn f xs e) + (compile-lambda-define (Lam f xs e))])) + +;; Defns -> Asm +;; Compile the closures for ds and push them on the stack +(define (compile-defines-values ds) + (seq (alloc-defines ds 0) + (init-defines ds (reverse (define-ids ds)) 8) + (add-rbx-defines ds 0))) + +;; Defns Int -> Asm +;; Allocate closures for ds at given offset, but don't write environment yet +(define (alloc-defines ds off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx off) rax) + (Mov rax rbx) + (Add rax off) + (Or rax type-proc) + (Push rax) + (alloc-defines ds (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns CEnv Int -> Asm +;; Initialize the environment for each closure for ds at given offset +(define (init-defines ds c off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (free-vars-to-heap fvs c off) + (init-defines ds c (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns Int -> Asm +;; Compute adjustment to rbx for allocation of all ds +(define (add-rbx-defines ds n) + (match ds + ['() (seq (Add rbx (* n 8)))] + [(cons (Defn f xs e) ds) + (add-rbx-defines ds (+ n (add1 (length (fv (Lam f xs e))))))])) diff --git a/langs/mountebank/compile-expr.rkt b/langs/mountebank/compile-expr.rkt new file mode 100644 index 00000000..ed9bba27 --- /dev/null +++ b/langs/mountebank/compile-expr.rkt @@ -0,0 +1,317 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-ops.rkt" + "compile-datum.rkt" + a86/ast) + +;; Registers used +(define rax 'rax) ; return +(define rbx 'rbx) ; heap +(define rsp 'rsp) ; stack +(define rdi 'rdi) ; arg + +;; Expr CEnv Bool -> Asm +(define (compile-e e c t?) + (match e + [(Quote d) (compile-datum d)] + [(Eof) (seq (Mov rax (value->bits eof)))] + [(Var x) (compile-variable x c)] + [(Prim0 p) (compile-prim0 p c)] + [(Prim1 p e) (compile-prim1 p e c)] + [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] + [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] + [(If e1 e2 e3) (compile-if e1 e2 e3 c t?)] + [(Begin e1 e2) (compile-begin e1 e2 c t?)] + [(Let x e1 e2) (compile-let x e1 e2 c t?)] + [(App e es) (compile-app e es c t?)] + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) + +;; Id CEnv -> Asm +(define (compile-variable x c) + (match (lookup x c) + [#f (error "unbound variable")] ;(seq (Lea rax (symbol->label x)))] + [i (seq (Mov rax (Offset rsp i)))])) + +;; Op0 CEnv -> Asm +(define (compile-prim0 p c) + (compile-op0 p)) + +;; Op1 Expr CEnv -> Asm +(define (compile-prim1 p e c) + (seq (compile-e e c #f) + (compile-op1 p))) + +;; Op2 Expr Expr CEnv -> Asm +(define (compile-prim2 p e1 e2 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (compile-op2 p))) + +;; Op3 Expr Expr Expr CEnv -> Asm +(define (compile-prim3 p e1 e2 e3 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (Push rax) + (compile-e e3 (cons #f (cons #f c)) #f) + (compile-op3 p))) + +;; Expr Expr Expr CEnv Bool -> Asm +(define (compile-if e1 e2 e3 c t?) + (let ((l1 (gensym 'if)) + (l2 (gensym 'if))) + (seq (compile-e e1 c #f) + (Cmp rax (value->bits #f)) + (Je l1) + (compile-e e2 c t?) + (Jmp l2) + (Label l1) + (compile-e e3 c t?) + (Label l2)))) + +;; Expr Expr CEnv Bool -> Asm +(define (compile-begin e1 e2 c t?) + (seq (compile-e e1 c #f) + (compile-e e2 c t?))) + +;; Id Expr Expr CEnv Bool -> Asm +(define (compile-let x e1 e2 c t?) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons x c) t?) + (Add rsp 8))) + +;; Id [Listof Expr] CEnv Bool -> Asm +(define (compile-app f es c t?) + ;(compile-app-nontail f es c) + (if t? + (compile-app-tail f es c) + (compile-app-nontail f es c))) + +;; Expr [Listof Expr] CEnv -> Asm +(define (compile-app-tail e es c) + (seq (compile-es (cons e es) c) + (move-args (add1 (length es)) (length c)) + (Add rsp (* 8 (length c))) + (Mov rax (Offset rsp (* 8 (length es)))) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) + (Jmp rax))) + +;; Integer Integer -> Asm +(define (move-args i off) + (cond [(zero? off) (seq)] + [(zero? i) (seq)] + [else + (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) + (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) + (move-args (sub1 i) off))])) + +;; Expr [Listof Expr] CEnv -> Asm +;; The return address is placed above the arguments, so callee pops +;; arguments and return address is next frame +(define (compile-app-nontail e es c) + (let ((r (gensym 'ret)) + (i (* 8 (length es)))) + (seq (Lea rax r) + (Push rax) + (compile-es (cons e es) (cons #f c)) + (Mov rax (Offset rsp i)) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) ; fetch the code label + (Jmp rax) + (Label r)))) + +;; Id [Listof Id] Expr CEnv -> Asm +(define (compile-lam f xs e c) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx 0) rax) + (free-vars-to-heap fvs c 8) + (Mov rax rbx) ; return value + (Or rax type-proc) + (Add rbx (* 8 (add1 (length fvs))))))) + +;; [Listof Id] CEnv Int -> Asm +;; Copy the values of given free variables into the heap at given offset +(define (free-vars-to-heap fvs c off) + (match fvs + ['() (seq)] + [(cons x fvs) + (seq (Mov r8 (Offset rsp (lookup x c))) + (Mov (Offset rbx off) r8) + (free-vars-to-heap fvs c (+ off 8)))])) + +;; [Listof Lam] -> Asm +(define (compile-lambda-defines ls) + (match ls + ['() (seq)] + [(cons l ls) + (seq (compile-lambda-define l) + (compile-lambda-defines ls))])) + +;; Lam -> Asm +(define (compile-lambda-define l) + (let ((fvs (fv l))) + (match l + [(Lam f xs e) + (let ((env (append (reverse fvs) (reverse xs) (list #f)))) + (seq (Label (symbol->label f)) + (Mov rax (Offset rsp (* 8 (length xs)))) + (Xor rax type-proc) + (copy-env-to-stack fvs 8) + (compile-e e env #t) + (Add rsp (* 8 (length env))) ; pop env + (Ret)))]))) + +;; [Listof Id] Int -> Asm +;; Copy the closure environment at given offset to stack +(define (copy-env-to-stack fvs off) + (match fvs + ['() (seq)] + [(cons _ fvs) + (seq (Mov r9 (Offset rax off)) + (Push r9) + (copy-env-to-stack fvs (+ 8 off)))])) + +;; [Listof Expr] CEnv -> Asm +(define (compile-es es c) + (match es + ['() '()] + [(cons e es) + (seq (compile-e e c #f) + (Push rax) + (compile-es es (cons #f c)))])) + +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c #f) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) cm)] + [(PVar x) + (list (seq (Push rax)) (cons x cm))] + [(PStr s) + (let ((ok (gensym)) + (fail (gensym))) + (list (seq (Lea rdi (symbol->data-label (string->symbol s))) + (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-str) + (Je ok) + (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok) + (Xor rax type-str) + (Mov rsi rax) + pad-stack + (Call 'symb_cmp) + unpad-stack + (Cmp rax 0) + (Jne fail)) + cm))] + [(PSymb s) + (let ((ok (gensym))) + (list (seq (Lea r9 (Plus (symbol->data-label s) type-symb)) + (Cmp rax r9) + (Je ok) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok)) + cm))] + [(PLit l) + (let ((ok (gensym))) + (list (seq (Cmp rax (value->bits l)) + (Je ok) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 cm1) + (let ((ok (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Je ok) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next) + (Label ok) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 cm2) + (let ((ok (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Je ok) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next) + (Label ok) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + cm2))])])])) diff --git a/langs/mountebank/compile-literals.rkt b/langs/mountebank/compile-literals.rkt new file mode 100644 index 00000000..e2484e16 --- /dev/null +++ b/langs/mountebank/compile-literals.rkt @@ -0,0 +1,122 @@ +#lang racket +(provide compile-literals init-symbol-table literals) +(require "ast.rkt" + "utils.rkt" + a86/ast) + +(define rdi 'rdi) + +;; Prog -> Asm +(define (compile-literals p) + (append-map compile-literal (literals p))) + +;; Symbol -> Asm +(define (compile-literal s) + (let ((str (symbol->string s))) + (seq (Label (symbol->data-label s)) + (Dq (string-length str)) + (compile-string-chars (string->list str)) + (if (odd? (string-length str)) + (seq (Dd 0)) + (seq))))) + +;; Prog -> Asm +;; Call intern_symbol on every symbol in the program +(define (init-symbol-table p) + (match (symbols p) + ['() (seq)] + [ss (seq (Sub 'rsp 8) + (append-map init-symbol ss) + (Add 'rsp 8))])) + +;; Symbol -> Asm +(define (init-symbol s) + (seq (Lea rdi (symbol->data-label s)) + (Call 'intern_symbol))) + +;; Prog -> [Listof Symbol] +(define (literals p) + (remove-duplicates + (map to-symbol (literals* p)))) + +;; Prog -> [Listof Symbol] +(define (symbols p) + (remove-duplicates (filter symbol? (literals* p)))) + +;; (U String Symbol) -> Symbol +(define (to-symbol s) + (if (string? s) + (string->symbol s) + s)) + +;; Prog -> [Listof (U Symbol String)] +(define (literals* p) + (match p + [(Prog ds e) + (append (append-map literals-d ds) (literals-e e))])) + +;; Defn -> [Listof (U Symbol String)] +(define (literals-d d) + (match d + [(Defn f xs e) + (literals-e e)])) + +;; Expr -> [Listof (U Symbol String)] +(define (literals-e e) + (match e + [(Quote d) (literals-datum d)] + [(Prim1 p e) + (literals-e e)] + [(Prim2 p e1 e2) + (append (literals-e e1) (literals-e e2))] + [(Prim3 p e1 e2 e3) + (append (literals-e e1) (literals-e e2) (literals-e e3))] + [(If e1 e2 e3) + (append (literals-e e1) (literals-e e2) (literals-e e3))] + [(Begin e1 e2) + (append (literals-e e1) (literals-e e2))] + [(Let x e1 e2) + (append (literals-e e1) (literals-e e2))] + [(App e1 es) + (append (literals-e e1) (append-map literals-e es))] + [(Lam f xs e) + (literals-e e)] + [(Match e ps es) + (append (literals-e e) (append-map literals-match-clause ps es))] + [_ '()])) + +;; Pat Expr -> [Listof Symbol] +(define (literals-match-clause p e) + (append (literals-pat p) (literals-e e))) + +;; Pat -> [Listof (U Symbol String)] +(define (literals-pat p) + (match p + [(PSymb s) (list s)] + [(PStr s) (list s)] + [(PBox p) (literals-pat p)] + [(PCons p1 p2) (append (literals-pat p1) (literals-pat p2))] + [(PAnd p1 p2) (append (literals-pat p1) (literals-pat p2))] + [_ '()])) + +;; Datum -> [Listof (U Symbol String)] +(define (literals-datum d) + (cond + [(string? d) (list d)] + [(symbol? d) (list d)] + [(cons? d) + (append (literals-datum (car d)) + (literals-datum (cdr d)))] + [(box? d) + (literals-datum (unbox d))] + [(vector? d) + (append-map literals-datum (vector->list d))] + [else '()])) + +;; [Listof Char] -> Asm +(define (compile-string-chars cs) + (match cs + ['() (seq)] + [(cons c cs) + (seq (Dd (char->integer c)) + (compile-string-chars cs))])) diff --git a/langs/iniquity/compile-ops.rkt b/langs/mountebank/compile-ops.rkt similarity index 55% rename from langs/iniquity/compile-ops.rkt rename to langs/mountebank/compile-ops.rkt index ba204845..8a52cd49 100644 --- a/langs/iniquity/compile-ops.rkt +++ b/langs/mountebank/compile-ops.rkt @@ -1,73 +1,76 @@ #lang racket (provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) +(require "ast.rkt" "types.rkt" "utils.rkt" a86/ast) (define rax 'rax) ; return (define eax 'eax) ; 32-bit load/store (define rbx 'rbx) ; heap -(define rdi 'rdi) ; arg +(define rdi 'rdi) ; arg1 +(define rsi 'rsi) ; arg2 +(define rdx 'rdx) ; arg3 (define r8 'r8) ; scratch (define r9 'r9) ; scratch (define r10 'r10) ; scratch +(define r12 'r12) ; save across call to memcpy +(define r15 'r15) ; stack pad (non-volatile) (define rsp 'rsp) ; stack -;; Op0 CEnv -> Asm -(define (compile-op0 p c) +;; Op0 -> Asm +(define (compile-op0 p) (match p - ['void (seq (Mov rax val-void))] - ['read-byte (seq (pad-stack c) + ['void (seq (Mov rax (value->bits (void))))] + ['read-byte (seq pad-stack (Call 'read_byte) - (unpad-stack c))] - ['peek-byte (seq (pad-stack c) + unpad-stack)] + ['peek-byte (seq pad-stack (Call 'peek_byte) - (unpad-stack c))])) + unpad-stack)])) -;; Op1 CEnv -> Asm -(define (compile-op1 p c) +;; Op1 -> Asm +(define (compile-op1 p) (match p ['add1 - (seq (assert-integer rax c) - (Add rax (imm->bits 1)))] + (seq (assert-integer rax) + (Add rax (value->bits 1)))] ['sub1 - (seq (assert-integer rax c) - (Sub rax (imm->bits 1)))] + (seq (assert-integer rax) + (Sub rax (value->bits 1)))] ['zero? - (seq (assert-integer rax c) + (seq (assert-integer rax) (eq-imm 0))] ['char? (type-pred mask-char type-char)] ['char->integer - (seq (assert-char rax c) + (seq (assert-char rax) (Sar rax char-shift) (Sal rax int-shift))] ['integer->char - (seq (assert-codepoint c) + (seq (assert-codepoint rax) (Sar rax int-shift) (Sal rax char-shift) (Xor rax type-char))] ['eof-object? (eq-imm eof)] ['write-byte - (seq (assert-byte c) - (pad-stack c) + (seq (assert-byte rax) + pad-stack (Mov rdi rax) (Call 'write_byte) - (unpad-stack c) - (Mov rax val-void))] + unpad-stack)] ['box (seq (Mov (Offset rbx 0) rax) (Mov rax rbx) (Or rax type-box) (Add rbx 8))] ['unbox - (seq (assert-box rax c) + (seq (assert-box rax) (Xor rax type-box) (Mov rax (Offset rax 0)))] ['car - (seq (assert-cons rax c) + (seq (assert-cons rax) (Xor rax type-cons) (Mov rax (Offset rax 8)))] ['cdr - (seq (assert-cons rax c) + (seq (assert-cons rax) (Xor rax type-cons) (Mov rax (Offset rax 0)))] ['empty? (eq-imm '())] @@ -79,10 +82,12 @@ (type-pred ptr-mask type-vect)] ['string? (type-pred ptr-mask type-str)] + ['symbol? + (type-pred ptr-mask type-symb)] ['vector-length (let ((zero (gensym)) (done (gensym))) - (seq (assert-vector rax c) + (seq (assert-vector rax) (Xor rax type-vect) (Cmp rax 0) (Je zero) @@ -95,7 +100,7 @@ ['string-length (let ((zero (gensym)) (done (gensym))) - (seq (assert-string rax c) + (seq (assert-string rax) (Xor rax type-str) (Cmp rax 0) (Je zero) @@ -104,42 +109,78 @@ (Jmp done) (Label zero) (Mov rax 0) - (Label done)))])) - -;; Op2 CEnv -> Asm -(define (compile-op2 p c) + (Label done)))] + ['string->symbol + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + pad-stack + (Call 'intern_symbol) + unpad-stack + (Or rax type-symb))] + ['symbol->string + (seq (assert-symbol rax) + (Xor rax type-symb) + char-array-copy + (Or rax type-str))] + ['string->uninterned-symbol + (seq (assert-string rax) + (Xor rax type-str) + char-array-copy + (Or rax type-symb))])) + +;; Asm +;; Copy sized array of characters pointed to by rax +(define char-array-copy + (seq (Mov rdi rbx) ; dst + (Mov rsi rax) ; src + (Mov rdx (Offset rax 0)) ; len + (Add rdx 1) ; #words = 1 + (len+1)/2 + (Sar rdx 1) + (Add rdx 1) + (Sal rdx 3) ; #bytes = 8*#words + (Mov r12 rdx) ; save rdx before destroyed + pad-stack + (Call 'memcpy) + unpad-stack + ; rbx should be preserved by memcpy + ;(Mov rbx rax) ; dst is returned, install as heap pointer + (Add rbx r12))) + +;; Op2 -> Asm +(define (compile-op2 p) (match p ['+ (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Add rax r8))] ['- (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Sub r8 rax) (Mov rax r8))] ['< (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Cmp r8 rax) - (Mov rax val-true) + (Mov rax (value->bits #t)) (let ((true (gensym))) (seq (Jl true) - (Mov rax val-false) + (Mov rax (value->bits #f)) (Label true))))] ['= (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Cmp r8 rax) - (Mov rax val-true) + (Mov rax (value->bits #t)) (let ((true (gensym))) (seq (Je true) - (Mov rax val-false) - (Label true))))] + (Mov rax (value->bits #f)) + (Label true))))] ['cons (seq (Mov (Offset rbx 0) rax) (Pop rax) @@ -147,12 +188,15 @@ (Mov rax rbx) (Or rax type-cons) (Add rbx 16))] + ['eq? + (seq (Pop r8) + (eq r8 rax))] ['make-vector (let ((loop (gensym)) (done (gensym)) (empty (gensym))) (seq (Pop r8) - (assert-natural r8 c) + (assert-natural r8) (Cmp r8 0) ; special case empty vector (Je empty) @@ -179,16 +223,18 @@ ['vector-ref (seq (Pop r8) - (assert-vector r8 c) - (assert-integer rax c) + (assert-vector r8) + (assert-integer rax) + (Cmp r8 type-vect) + (Je 'raise_error_align) ; special case for empty vector (Cmp rax 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-vect) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar rax int-shift) ; rax = index (Sub r9 1) (Cmp r9 rax) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal rax 3) (Add r8 rax) (Mov rax (Offset r8 8)))] @@ -198,8 +244,8 @@ (done (gensym)) (empty (gensym))) (seq (Pop r8) - (assert-natural r8 c) - (assert-char rax c) + (assert-natural r8) + (assert-char rax) (Cmp r8 0) ; special case empty string (Je empty) @@ -212,9 +258,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) @@ -232,60 +278,62 @@ ['string-ref (seq (Pop r8) - (assert-string r8 c) - (assert-integer rax c) + (assert-string r8) + (assert-integer rax) + (Cmp r8 type-str) + (Je 'raise_error_align) ; special case for empty string (Cmp rax 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-str) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar rax int-shift) ; rax = index (Sub r9 1) (Cmp r9 rax) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal rax 2) (Add r8 rax) (Mov 'eax (Offset r8 8)) (Sal rax char-shift) (Or rax type-char))])) -;; Op3 CEnv -> Asm -(define (compile-op3 p c) +;; Op3 -> Asm +(define (compile-op3 p) (match p ['vector-set! (seq (Pop r10) (Pop r8) - (assert-vector r8 c) - (assert-integer r10 c) + (assert-vector r8) + (assert-integer r10) (Cmp r10 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-vect) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar r10 int-shift) ; r10 = index (Sub r9 1) (Cmp r9 r10) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal r10 3) (Add r8 r10) (Mov (Offset r8 8) rax) - (Mov rax val-void))])) + (Mov rax (value->bits (void))))])) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (assert-type mask type) - (λ (arg c) + (λ (arg) (seq (Mov r9 arg) (And r9 mask) (Cmp r9 type) - (Jne (error-label c))))) + (Jne 'raise_error_align)))) (define (type-pred mask type) (let ((l (gensym))) (seq (And rax mask) (Cmp rax type) - (Mov rax (imm->bits #t)) + (Mov rax (value->bits #t)) (Je l) - (Mov rax (imm->bits #f)) + (Mov rax (value->bits #f)) (Label l)))) (define assert-integer @@ -300,59 +348,50 @@ (assert-type ptr-mask type-vect)) (define assert-string (assert-type ptr-mask type-str)) +(define assert-symbol + (assert-type ptr-mask type-symb)) +(define assert-proc + (assert-type ptr-mask type-proc)) -(define (assert-codepoint c) +(define (assert-codepoint r) (let ((ok (gensym))) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 1114111)) - (Jg (error-label c)) - (Cmp rax (imm->bits 55295)) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align) + (Cmp r (value->bits 1114111)) + (Jg 'raise_error_align) + (Cmp r (value->bits 55295)) (Jl ok) - (Cmp rax (imm->bits 57344)) + (Cmp r (value->bits 57344)) (Jg ok) - (Jmp (error-label c)) + (Jmp 'raise_error_align) (Label ok)))) -(define (assert-byte c) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 255)) - (Jg (error-label c)))) +(define (assert-byte r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align) + (Cmp r (value->bits 255)) + (Jg 'raise_error_align))) -(define (assert-natural r c) - (seq (assert-integer r c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)))) +(define (assert-natural r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align))) ;; Value -> Asm (define (eq-imm imm) (let ((l1 (gensym))) - (seq (Cmp rax (imm->bits imm)) - (Mov rax val-true) + (seq (Cmp rax (value->bits imm)) + (Mov rax (value->bits #t)) (Je l1) - (Mov rax val-false) + (Mov rax (value->bits #f)) (Label l1)))) -;; CEnv -> Asm -;; Pad the stack to be aligned for a call -(define (pad-stack c) - (match (even? (length c)) - [#t (seq (Sub rsp 8))] - [#f (seq)])) - -;; CEnv -> Asm -;; Undo the stack alignment after a call -(define (unpad-stack c) - (match (even? (length c)) - [#t (seq (Add rsp 8))] - [#f (seq)])) - -;; CEnv -> Label -;; Determine correct error handler label to jump to. -(define (error-label c) - (match (even? (length c)) - [#t 'raise_error] - [#f 'raise_error_align])) +(define (eq ir1 ir2) + (let ((l1 (gensym))) + (seq (Cmp ir1 ir2) + (Mov rax (value->bits #t)) + (Je l1) + (Mov rax (value->bits #f)) + (Label l1)))) diff --git a/langs/mountebank/compile-stdin.rkt b/langs/mountebank/compile-stdin.rkt new file mode 100644 index 00000000..cfa15106 --- /dev/null +++ b/langs/mountebank/compile-stdin.rkt @@ -0,0 +1,10 @@ +#lang racket +(provide main) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile (parse (read-all))))) diff --git a/langs/mountebank/compile.rkt b/langs/mountebank/compile.rkt new file mode 100644 index 00000000..7ab2e884 --- /dev/null +++ b/langs/mountebank/compile.rkt @@ -0,0 +1,53 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-define.rkt" + "compile-expr.rkt" + "compile-literals.rkt" + a86/ast) + +;; Registers used +(define rbx 'rbx) ; heap +(define rsp 'rsp) ; stack +(define rdi 'rdi) ; arg +(define r15 'r15) ; stack pad (non-volatile) + +;; type CEnv = (Listof [Maybe Id]) + +;; Prog -> Asm +(define (compile p) + (match p + [(Prog ds e) + (prog (externs) + (Global 'entry) + (Label 'entry) + (Push rbx) ; save callee-saved register + (Push r15) + (Mov rbx rdi) ; recv heap pointer + (init-symbol-table p) + (compile-defines-values ds) + (compile-e e (reverse (define-ids ds)) #f) + (Add rsp (* 8 (length ds))) ;; pop function definitions + (Pop r15) ; restore callee-save register + (Pop rbx) + (Ret) + (compile-defines ds) + (compile-lambda-defines (lambdas p)) + (Label 'raise_error_align) + pad-stack + (Call 'raise_error) + (Data) + (compile-literals p))])) + +(define (externs) + (seq (Extern 'peek_byte) + (Extern 'read_byte) + (Extern 'write_byte) + (Extern 'raise_error) + (Extern 'intern_symbol) + (Extern 'symb_cmp) + (Extern 'memcpy))) diff --git a/langs/mountebank/env.rkt b/langs/mountebank/env.rkt new file mode 100644 index 00000000..c43be9c3 --- /dev/null +++ b/langs/mountebank/env.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide lookup ext) + +;; Env Variable -> Answer +(define (lookup env x) + (match env + ['() 'err] + [(cons (list y i) env) + (match (symbol=? x y) + [#t i] + [#f (lookup env x)])])) + +;; Env Variable Value -> Value +(define (ext r x i) + (cons (list x i) r)) \ No newline at end of file diff --git a/langs/mountebank/fv.rkt b/langs/mountebank/fv.rkt new file mode 100644 index 00000000..2377b7e5 --- /dev/null +++ b/langs/mountebank/fv.rkt @@ -0,0 +1,35 @@ +#lang racket +(require "ast.rkt") +(provide fv) + +;; Expr -> [Listof Id] +;; List all of the free variables in e +(define (fv e) + (remove-duplicates (fv* e))) + +(define (fv* e) + (match e + [(Var x) (list x)] + [(Prim1 p e) (fv* e)] + [(Prim2 p e1 e2) (append (fv* e1) (fv* e2))] + [(Prim3 p e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(If e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(Begin e1 e2) (append (fv* e1) (fv* e2))] + [(Let x e1 e2) (append (fv* e1) (remq* (list x) (fv* e2)))] + [(App e1 es) (append (fv* e1) (append-map fv* es))] + [(Lam f xs e) (remq* xs (fv* e))] + [(Match e ps es) (append (fv* e) (append-map fv-clause* ps es))] + [_ '()])) + +;; Pat Expr -> [Listof Id] +(define (fv-clause* p e) + (remq* (bv-pat* p) (fv* e))) + +;; Pat -> [Listof Id] +(define (bv-pat* p) + (match p + [(PVar x) (list x)] + [(PCons p1 p2) (append (bv-pat* p1) (bv-pat* p2))] + [(PAnd p1 p2) (append (bv-pat* p1) (bv-pat* p2))] + [(PBox p) (bv-pat* p)] + [_ '()])) diff --git a/langs/mountebank/heap.h b/langs/mountebank/heap.h new file mode 100644 index 00000000..8f2f5e23 --- /dev/null +++ b/langs/mountebank/heap.h @@ -0,0 +1,9 @@ +#include + +extern int64_t heap[]; +extern int from_side; + +extern char type[]; + +// in words +#define heap_size 1001 diff --git a/langs/mountebank/interp-defun.rkt b/langs/mountebank/interp-defun.rkt new file mode 100644 index 00000000..c4bcc05e --- /dev/null +++ b/langs/mountebank/interp-defun.rkt @@ -0,0 +1,156 @@ +#lang racket +(provide interp interp-env (struct-out Closure) zip) +(require "ast.rkt" + "env.rkt" + "interp-prims.rkt") + +;; type Answer = Value | 'err + +;; type Value = +;; | Datum +;; | Eof +;; | Void +;; | (cons Value Value) +;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Closure [Listof Id] Expr Env) +(struct Closure (xs e r) #:prefab) + +;; type REnv = (Listof (List Id Value)) +;; type Defns = (Listof Defn) + +;; Prog -> Answer +(define (interp p) + (match p + [(Prog ds e) + (interp-env e '() ds)])) + +;; Expr Env Defns -> Answer +(define (interp-env e r ds) + (match e + [(Quote d) d] + [(Eof) eof] + [(Var x) (interp-var x r ds)] + [(Prim0 'void) (void)] + [(Prim0 'read-byte) (read-byte)] + [(Prim0 'peek-byte) (peek-byte)] + [(Prim1 p e) + (match (interp-env e r ds) + ['err 'err] + [v (interp-prim1 p v)])] + [(Prim2 p e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (interp-prim2 p v1 v2)])])] + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (match (interp-env e3 r ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] + [(If p e1 e2) + (match (interp-env p r ds) + ['err 'err] + [v + (if v + (interp-env e1 r ds) + (interp-env e2 r ds))])] + [(Begin e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [_ (interp-env e2 r ds)])] + [(Let x e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (Closure xs e r)] + [(App e es) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (match f + [(Closure xs e r) + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err)] + [_ 'err])])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PSymb s) (and (eq? s v) r)] + [(PStr s) (and (string? v) (string=? s v) r)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) + +;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) + +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) diff --git a/langs/knock/interp-io.rkt b/langs/mountebank/interp-io.rkt similarity index 100% rename from langs/knock/interp-io.rkt rename to langs/mountebank/interp-io.rkt diff --git a/langs/mountebank/interp-prims.rkt b/langs/mountebank/interp-prims.rkt new file mode 100644 index 00000000..7797de69 --- /dev/null +++ b/langs/mountebank/interp-prims.rkt @@ -0,0 +1,74 @@ +#lang racket +(require "ast.rkt") +(provide interp-prim1 interp-prim2 interp-prim3) + +;; Op1 Value -> Answer +(define (interp-prim1 p1 v) + (match (list p1 v) + [(list 'add1 (? integer?)) (add1 v)] + [(list 'sub1 (? integer?)) (sub1 v)] + [(list 'zero? (? integer?)) (zero? v)] + [(list 'char? v) (char? v)] + [(list 'char->integer (? char?)) (char->integer v)] + [(list 'integer->char (? codepoint?)) (integer->char v)] + [(list 'eof-object? v) (eof-object? v)] + [(list 'write-byte (? byte?)) (write-byte v)] + [(list 'box v) (box v)] + [(list 'unbox (? box?)) (unbox v)] + [(list 'car (? pair?)) (car v)] + [(list 'cdr (? pair?)) (cdr v)] + [(list 'empty? v) (empty? v)] + [(list 'cons? v) (cons? v)] + [(list 'box? v) (box? v)] + [(list 'vector? v) (vector? v)] + [(list 'vector-length (? vector?)) (vector-length v)] + [(list 'string? v) (string? v)] + [(list 'string-length (? string?)) (string-length v)] + [(list 'symbol? v) (symbol? v)] + [(list 'symbol->string (? symbol?)) (symbol->string v)] + [(list 'string->symbol (? string?)) (string->symbol v)] + [(list 'string->uninterned-symbol (? string?)) + (string->uninterned-symbol v)] + [_ 'err])) + +;; Op2 Value Value -> Answer +(define (interp-prim2 p v1 v2) + (match (list p v1 v2) + [(list '+ (? integer?) (? integer?)) (+ v1 v2)] + [(list '- (? integer?) (? integer?)) (- v1 v2)] + [(list '< (? integer?) (? integer?)) (< v1 v2)] + [(list '= (? integer?) (? integer?)) (= v1 v2)] + [(list 'cons v1 v2) (cons v1 v2)] + [(list 'eq? v1 v2) (eq? v1 v2)] + [(list 'make-vector (? integer?) _) + (if (<= 0 v1) + (make-vector v1 v2) + 'err)] + [(list 'vector-ref (? vector?) (? integer?)) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-ref v1 v2) + 'err)] + [(list 'make-string (? integer?) (? char?)) + (if (<= 0 v1) + (make-string v1 v2) + 'err)] + [(list 'string-ref (? string?) (? integer?)) + (if (<= 0 v2 (sub1 (string-length v1))) + (string-ref v1 v2) + 'err)] + [_ 'err])) + +;; Op3 Value Value Value -> Answer +(define (interp-prim3 p v1 v2 v3) + (match (list p v1 v2 v3) + [(list 'vector-set! (? vector?) (? integer?) _) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-set! v1 v2 v3) + 'err)] + [_ 'err])) + +;; Any -> Boolean +(define (codepoint? v) + (and (integer? v) + (or (<= 0 v 55295) + (<= 57344 v 1114111)))) diff --git a/langs/mountebank/interp-stdin.rkt b/langs/mountebank/interp-stdin.rkt new file mode 100644 index 00000000..965b9cc4 --- /dev/null +++ b/langs/mountebank/interp-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt" "interp.rkt" "read-all.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (let ((r (interp (parse (read-all))))) + (unless (void? r) + (println r)))) diff --git a/langs/mountebank/interp.rkt b/langs/mountebank/interp.rkt new file mode 100644 index 00000000..3accf29b --- /dev/null +++ b/langs/mountebank/interp.rkt @@ -0,0 +1,155 @@ +#lang racket +(provide interp interp-env) +(require "ast.rkt" + "env.rkt" + "interp-prims.rkt") + +;; type Answer = Value | 'err + +;; type Value = +;; | Datum +;; | Eof +;; | Void +;; | (cons Value Value) +;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Value ... -> Answer) + +;; type REnv = (Listof (List Id Value)) +;; type Defns = (Listof Defn) + +;; Prog -> Answer +(define (interp p) + (match p + [(Prog ds e) + (interp-env e '() ds)])) + +;; Expr Env Defns -> Answer +(define (interp-env e r ds) + (match e + [(Quote d) d] + [(Eof) eof] + [(Var x) (interp-var x r ds)] + [(Prim0 'void) (void)] + [(Prim0 'read-byte) (read-byte)] + [(Prim0 'peek-byte) (peek-byte)] + [(Prim1 p e) + (match (interp-env e r ds) + ['err 'err] + [v (interp-prim1 p v)])] + [(Prim2 p e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (interp-prim2 p v1 v2)])])] + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (match (interp-env e3 r ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] + [(If p e1 e2) + (match (interp-env p r ds) + ['err 'err] + [v + (if v + (interp-env e1 r ds) + (interp-env e2 r ds))])] + [(Begin e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [_ (interp-env e2 r ds)])] + [(Let x e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (λ vs + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err))] + [(App e es) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (if (procedure? f) + (apply f vs) + 'err)])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PSymb s) (and (eq? s v) r)] + [(PStr s) (and (string? v) (string=? s v) r)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) + +;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) + +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) diff --git a/langs/mountebank/io.c b/langs/mountebank/io.c new file mode 100644 index 00000000..7ef82281 --- /dev/null +++ b/langs/mountebank/io.c @@ -0,0 +1,25 @@ +#include +#include +#include "types.h" +#include "values.h" +#include "runtime.h" + +val_t read_byte(void) +{ + char c = getc(in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); +} + +val_t peek_byte(void) +{ + char c = getc(in); + ungetc(c, in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + +} + +val_t write_byte(val_t c) +{ + putc((char) val_unwrap_int(c), out); + return val_wrap_void(); +} diff --git a/langs/mountebank/lambdas.rkt b/langs/mountebank/lambdas.rkt new file mode 100644 index 00000000..0a246408 --- /dev/null +++ b/langs/mountebank/lambdas.rkt @@ -0,0 +1,35 @@ +#lang racket +(require "ast.rkt") +(provide lambdas) + + +;; Prog -> [Listof Lam] +;; List all of the lambda expressions in p +(define (lambdas p) + (match p + [(Prog ds e) + (append (lambdas-ds ds) (lambdas-e e))])) + +;; Defns -> [Listof Lam] +;; List all of the lambda expressions in ds +(define (lambdas-ds ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (append (lambdas-e e) + (lambdas-ds ds))])) + +;; Expr -> [Listof Lam] +;; List all of the lambda expressions in e +(define (lambdas-e e) + (match e + [(Prim1 p e) (lambdas-e e)] + [(Prim2 p e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Prim3 p e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(If e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(Begin e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Let x e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(App e1 es) (append (lambdas-e e1) (append-map lambdas-e es))] + [(Lam f xs e1) (cons e (lambdas-e e1))] + [(Match e ps es) (append (lambdas-e e) (append-map lambdas-e es))] + [_ '()])) diff --git a/langs/jig-playground/main.c b/langs/mountebank/main.c similarity index 100% rename from langs/jig-playground/main.c rename to langs/mountebank/main.c diff --git a/langs/jig/compile-file.rkt b/langs/mountebank/parse-file.rkt similarity index 83% rename from langs/jig/compile-file.rkt rename to langs/mountebank/parse-file.rkt index 3593dc7d..a5021320 100644 --- a/langs/jig/compile-file.rkt +++ b/langs/mountebank/parse-file.rkt @@ -9,5 +9,5 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read-all p))))) + (displayln (parse (read-all p))) (close-input-port p)))) diff --git a/langs/mountebank/parse.rkt b/langs/mountebank/parse.rkt new file mode 100644 index 00000000..29d4db48 --- /dev/null +++ b/langs/mountebank/parse.rkt @@ -0,0 +1,112 @@ +#lang racket +(provide parse parse-define parse-e) +(require "ast.rkt") + +;; [Listof S-Expr] -> Prog +(define (parse s) + (match s + [(cons (and (cons 'define _) d) s) + (match (parse s) + [(Prog ds e) + (Prog (cons (parse-define d) ds) e)])] + [(cons e '()) (Prog '() (parse-e e))] + [_ (error "program parse error")])) + +;; S-Expr -> Defn +(define (parse-define s) + (match s + [(list 'define (list-rest (? symbol? f) xs) e) + (if (andmap symbol? xs) + (Defn f xs (parse-e e)) + (error "parse definition error"))] + [_ (error "Parse defn error" s)])) + +;; S-Expr -> Expr +(define (parse-e s) + (match s + [(? self-quoting?) (Quote s)] + [(list 'quote d) (Quote d)] + ['eof (Eof)] + [(? symbol?) (Var s)] + [(list (? (op? op0) p0)) (Prim0 p0)] + [(list (? (op? op1) p1) e) (Prim1 p1 (parse-e e))] + [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse-e e1) (parse-e e2))] + [(list (? (op? op3) p3) e1 e2 e3) + (Prim3 p3 (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'begin e1 e2) + (Begin (parse-e e1) (parse-e e2))] + [(list 'if e1 e2 e3) + (If (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'let (list (list (? symbol? x) e1)) e2) + (Let x (parse-e e1) (parse-e e2))] + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] + [(list (or 'lambda 'λ) xs e) + (if (and (list? xs) + (andmap symbol? xs)) + (Lam (gensym 'lambda) xs (parse-e e)) + (error "parse lambda error"))] + [(cons e es) + (App (parse-e e) (map parse-e es))] + [_ (error "Parse error" s)])) + +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])])) + +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? exact-integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(? string?) (PStr p)] + [(list 'quote (? symbol? s)) + (PSymb s)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (PAnd (parse-pat p1) (parse-pat p2))] + [(cons 'list '()) + (PLit '())] + [(cons 'list (cons p1 ps)) + (PCons (parse-pat p1) + (parse-pat (cons 'list ps)))])) + +(define (self-quoting? x) + (or (integer? x) + (boolean? x) + (char? x) + (string? x) + (box? x) + (vector? x))) + +(define op0 + '(read-byte peek-byte void)) + +(define op1 + '(add1 sub1 zero? char? write-byte eof-object? + integer->char char->integer + box unbox empty? cons? box? car cdr + vector? vector-length string? string-length + symbol? symbol->string string->symbol string->uninterned-symbol)) +(define op2 + '(+ - < = cons eq? make-vector vector-ref make-string string-ref)) +(define op3 + '(vector-set!)) + +(define (op? ops) + (λ (x) + (and (symbol? x) + (memq x ops)))) diff --git a/langs/mountebank/print.c b/langs/mountebank/print.c new file mode 100644 index 00000000..2bcb21dc --- /dev/null +++ b/langs/mountebank/print.c @@ -0,0 +1,855 @@ +#include +#include +#include "values.h" + +void print_char(val_char_t); +void print_codepoint(val_char_t); +void print_cons(val_cons_t *); +void print_vect(val_vect_t*); +void print_str(val_str_t*); +void print_symb(val_symb_t*); +void print_str_char(val_char_t); +void print_result_interior(val_t); +int utf8_encode_char(val_char_t, char *); + +void print_result(val_t x) +{ + switch (val_typeof(x)) { + case T_INT: + printf("%" PRId64, val_unwrap_int(x)); + break; + case T_BOOL: + printf(val_unwrap_bool(x) ? "#t" : "#f"); + break; + case T_CHAR: + print_char(val_unwrap_char(x)); + break; + case T_EOF: + printf("#"); + break; + case T_VOID: + break; + case T_EMPTY: + case T_BOX: + case T_CONS: + case T_VECT: + printf("'"); + print_result_interior(x); + break; + case T_STR: + putchar('"'); + print_str(val_unwrap_str(x)); + putchar('"'); + break; + case T_SYMB: + printf("'"); + print_result_interior(x); + break; + case T_PROC: + printf("#"); + break; + case T_INVALID: + printf("internal error"); + } +} + +void print_symb(val_symb_t *s) +{ + print_str((val_str_t*) s); +} + +void print_result_interior(val_t x) +{ + switch (val_typeof(x)) { + case T_EMPTY: + printf("()"); + break; + case T_BOX: + printf("#&"); + print_result_interior(val_unwrap_box(x)->val); + break; + case T_CONS: + printf("("); + print_cons(val_unwrap_cons(x)); + printf(")"); + break; + case T_SYMB: + print_symb(val_unwrap_symb(x)); + break; + case T_VECT: + print_vect(val_unwrap_vect(x)); + break; + default: + print_result(x); + } +} + +void print_vect(val_vect_t *v) +{ + uint64_t i; + + if (!v) { printf("#()"); return; } + + printf("#("); + for (i = 0; i < v->len; ++i) { + print_result_interior(v->elems[i]); + + if (i < v->len - 1) + putchar(' '); + } + printf(")"); +} + +void print_cons(val_cons_t *cons) +{ + print_result_interior(cons->fst); + + switch (val_typeof(cons->snd)) { + case T_EMPTY: + // nothing + break; + case T_CONS: + printf(" "); + print_cons(val_unwrap_cons(cons->snd)); + break; + default: + printf(" . "); + print_result_interior(cons->snd); + break; + } +} + +void print_str(val_str_t* s) +{ + if (!s) return; + uint64_t i; + for (i = 0; i < s->len; ++i) + print_str_char(s->codepoints[i]); +} + +void print_str_char_u(val_char_t c) +{ + printf("\\u%04X", c); +} + +void print_str_char_U(val_char_t c) +{ + printf("\\U%08X", c); +} + +void print_str_char(val_char_t c) +{ + switch (c) { + case 0 ... 6: + print_str_char_u(c); + break; + case 7: + printf("\\a"); + break; + case 8: + printf("\\b"); + break; + case 9: + printf("\\t"); + break; + case 10: + printf("\\n"); + break; + case 11: + printf("\\v"); + break; + case 12: + printf("\\f"); + break; + case 13: + printf("\\r"); + break; + case 14 ... 26: + print_str_char_u(c); + break; + case 27: + printf("\\e"); + break; + case 28 ... 31: + print_str_char_u(c); + break; + case 34: + printf("\\\""); + break; + case 39: + printf("'"); + break; + case 92: + printf("\\\\"); + break; + case 127 ... 159: + case 173 ... 173: + case 888 ... 889: + case 896 ... 899: + case 907 ... 907: + case 909 ... 909: + case 930 ... 930: + case 1328 ... 1328: + case 1367 ... 1368: + case 1376 ... 1376: + case 1416 ... 1416: + case 1419 ... 1420: + case 1424 ... 1424: + case 1480 ... 1487: + case 1515 ... 1519: + case 1525 ... 1541: + case 1564 ... 1565: + case 1757 ... 1757: + case 1806 ... 1807: + case 1867 ... 1868: + case 1970 ... 1983: + case 2043 ... 2047: + case 2094 ... 2095: + case 2111 ... 2111: + case 2140 ... 2141: + case 2143 ... 2207: + case 2227 ... 2275: + case 2436 ... 2436: + case 2445 ... 2446: + case 2449 ... 2450: + case 2473 ... 2473: + case 2481 ... 2481: + case 2483 ... 2485: + case 2490 ... 2491: + case 2501 ... 2502: + case 2505 ... 2506: + case 2511 ... 2518: + case 2520 ... 2523: + case 2526 ... 2526: + case 2532 ... 2533: + case 2556 ... 2560: + case 2564 ... 2564: + case 2571 ... 2574: + case 2577 ... 2578: + case 2601 ... 2601: + case 2609 ... 2609: + case 2612 ... 2612: + case 2615 ... 2615: + case 2618 ... 2619: + case 2621 ... 2621: + case 2627 ... 2630: + case 2633 ... 2634: + case 2638 ... 2640: + case 2642 ... 2648: + case 2653 ... 2653: + case 2655 ... 2661: + case 2678 ... 2688: + case 2692 ... 2692: + case 2702 ... 2702: + case 2706 ... 2706: + case 2729 ... 2729: + case 2737 ... 2737: + case 2740 ... 2740: + case 2746 ... 2747: + case 2758 ... 2758: + case 2762 ... 2762: + case 2766 ... 2767: + case 2769 ... 2783: + case 2788 ... 2789: + case 2802 ... 2816: + case 2820 ... 2820: + case 2829 ... 2830: + case 2833 ... 2834: + case 2857 ... 2857: + case 2865 ... 2865: + case 2868 ... 2868: + case 2874 ... 2875: + case 2885 ... 2886: + case 2889 ... 2890: + case 2894 ... 2901: + case 2904 ... 2907: + case 2910 ... 2910: + case 2916 ... 2917: + case 2936 ... 2945: + case 2948 ... 2948: + case 2955 ... 2957: + case 2961 ... 2961: + case 2966 ... 2968: + case 2971 ... 2971: + case 2973 ... 2973: + case 2976 ... 2978: + case 2981 ... 2983: + case 2987 ... 2989: + case 3002 ... 3005: + case 3011 ... 3013: + case 3017 ... 3017: + case 3022 ... 3023: + case 3025 ... 3030: + case 3032 ... 3045: + case 3067 ... 3071: + case 3076 ... 3076: + case 3085 ... 3085: + case 3089 ... 3089: + case 3113 ... 3113: + case 3130 ... 3132: + case 3141 ... 3141: + case 3145 ... 3145: + case 3150 ... 3156: + case 3159 ... 3159: + case 3162 ... 3167: + case 3172 ... 3173: + case 3184 ... 3191: + case 3200 ... 3200: + case 3204 ... 3204: + case 3213 ... 3213: + case 3217 ... 3217: + case 3241 ... 3241: + case 3252 ... 3252: + case 3258 ... 3259: + case 3269 ... 3269: + case 3273 ... 3273: + case 3278 ... 3284: + case 3287 ... 3293: + case 3295 ... 3295: + case 3300 ... 3301: + case 3312 ... 3312: + case 3315 ... 3328: + case 3332 ... 3332: + case 3341 ... 3341: + case 3345 ... 3345: + case 3387 ... 3388: + case 3397 ... 3397: + case 3401 ... 3401: + case 3407 ... 3414: + case 3416 ... 3423: + case 3428 ... 3429: + case 3446 ... 3448: + case 3456 ... 3457: + case 3460 ... 3460: + case 3479 ... 3481: + case 3506 ... 3506: + case 3516 ... 3516: + case 3518 ... 3519: + case 3527 ... 3529: + case 3531 ... 3534: + case 3541 ... 3541: + case 3543 ... 3543: + case 3552 ... 3557: + case 3568 ... 3569: + case 3573 ... 3584: + case 3643 ... 3646: + case 3676 ... 3712: + case 3715 ... 3715: + case 3717 ... 3718: + case 3721 ... 3721: + case 3723 ... 3724: + case 3726 ... 3731: + case 3736 ... 3736: + case 3744 ... 3744: + case 3748 ... 3748: + case 3750 ... 3750: + case 3752 ... 3753: + case 3756 ... 3756: + case 3770 ... 3770: + case 3774 ... 3775: + case 3781 ... 3781: + case 3783 ... 3783: + case 3790 ... 3791: + case 3802 ... 3803: + case 3808 ... 3839: + case 3912 ... 3912: + case 3949 ... 3952: + case 3992 ... 3992: + case 4029 ... 4029: + case 4045 ... 4045: + case 4059 ... 4095: + case 4294 ... 4294: + case 4296 ... 4300: + case 4302 ... 4303: + case 4681 ... 4681: + case 4686 ... 4687: + case 4695 ... 4695: + case 4697 ... 4697: + case 4702 ... 4703: + case 4745 ... 4745: + case 4750 ... 4751: + case 4785 ... 4785: + case 4790 ... 4791: + case 4799 ... 4799: + case 4801 ... 4801: + case 4806 ... 4807: + case 4823 ... 4823: + case 4881 ... 4881: + case 4886 ... 4887: + case 4955 ... 4956: + case 4989 ... 4991: + case 5018 ... 5023: + case 5109 ... 5119: + case 5789 ... 5791: + case 5881 ... 5887: + case 5901 ... 5901: + case 5909 ... 5919: + case 5943 ... 5951: + case 5972 ... 5983: + case 5997 ... 5997: + case 6001 ... 6001: + case 6004 ... 6015: + case 6110 ... 6111: + case 6122 ... 6127: + case 6138 ... 6143: + case 6158 ... 6159: + case 6170 ... 6175: + case 6264 ... 6271: + case 6315 ... 6319: + case 6390 ... 6399: + case 6431 ... 6431: + case 6444 ... 6447: + case 6460 ... 6463: + case 6465 ... 6467: + case 6510 ... 6511: + case 6517 ... 6527: + case 6572 ... 6575: + case 6602 ... 6607: + case 6619 ... 6621: + case 6684 ... 6685: + case 6751 ... 6751: + case 6781 ... 6782: + case 6794 ... 6799: + case 6810 ... 6815: + case 6830 ... 6831: + case 6847 ... 6911: + case 6988 ... 6991: + case 7037 ... 7039: + case 7156 ... 7163: + case 7224 ... 7226: + case 7242 ... 7244: + case 7296 ... 7359: + case 7368 ... 7375: + case 7415 ... 7415: + case 7418 ... 7423: + case 7670 ... 7675: + case 7958 ... 7959: + case 7966 ... 7967: + case 8006 ... 8007: + case 8014 ... 8015: + case 8024 ... 8024: + case 8026 ... 8026: + case 8028 ... 8028: + case 8030 ... 8030: + case 8062 ... 8063: + case 8117 ... 8117: + case 8133 ... 8133: + case 8148 ... 8149: + case 8156 ... 8156: + case 8176 ... 8177: + case 8181 ... 8181: + case 8191 ... 8191: + case 8203 ... 8207: + case 8232 ... 8238: + case 8288 ... 8303: + case 8306 ... 8307: + case 8335 ... 8335: + case 8349 ... 8351: + case 8382 ... 8399: + case 8433 ... 8447: + case 8586 ... 8591: + case 9211 ... 9215: + case 9255 ... 9279: + case 9291 ... 9311: + case 11124 ... 11125: + case 11158 ... 11159: + case 11194 ... 11196: + case 11209 ... 11209: + case 11218 ... 11263: + case 11311 ... 11311: + case 11359 ... 11359: + case 11508 ... 11512: + case 11558 ... 11558: + case 11560 ... 11564: + case 11566 ... 11567: + case 11624 ... 11630: + case 11633 ... 11646: + case 11671 ... 11679: + case 11687 ... 11687: + case 11695 ... 11695: + case 11703 ... 11703: + case 11711 ... 11711: + case 11719 ... 11719: + case 11727 ... 11727: + case 11735 ... 11735: + case 11743 ... 11743: + case 11843 ... 11903: + case 11930 ... 11930: + case 12020 ... 12031: + case 12246 ... 12271: + case 12284 ... 12287: + case 12352 ... 12352: + case 12439 ... 12440: + case 12544 ... 12548: + case 12590 ... 12592: + case 12687 ... 12687: + case 12731 ... 12735: + case 12772 ... 12783: + case 12831 ... 12831: + case 13055 ... 13055: + case 19894 ... 19903: + case 40909 ... 40959: + case 42125 ... 42127: + case 42183 ... 42191: + case 42540 ... 42559: + case 42654 ... 42654: + case 42744 ... 42751: + case 42895 ... 42895: + case 42926 ... 42927: + case 42930 ... 42998: + case 43052 ... 43055: + case 43066 ... 43071: + case 43128 ... 43135: + case 43205 ... 43213: + case 43226 ... 43231: + case 43260 ... 43263: + case 43348 ... 43358: + case 43389 ... 43391: + case 43470 ... 43470: + case 43482 ... 43485: + case 43519 ... 43519: + case 43575 ... 43583: + case 43598 ... 43599: + case 43610 ... 43611: + case 43715 ... 43738: + case 43767 ... 43776: + case 43783 ... 43784: + case 43791 ... 43792: + case 43799 ... 43807: + case 43815 ... 43815: + case 43823 ... 43823: + case 43872 ... 43875: + case 43878 ... 43967: + case 44014 ... 44015: + case 44026 ... 44031: + case 55204 ... 55215: + case 55239 ... 55242: + case 55292 ... 55295: + case 57344 ... 63743: + case 64110 ... 64111: + case 64218 ... 64255: + case 64263 ... 64274: + case 64280 ... 64284: + case 64311 ... 64311: + case 64317 ... 64317: + case 64319 ... 64319: + case 64322 ... 64322: + case 64325 ... 64325: + case 64450 ... 64466: + case 64832 ... 64847: + case 64912 ... 64913: + case 64968 ... 65007: + case 65022 ... 65023: + case 65050 ... 65055: + case 65070 ... 65071: + case 65107 ... 65107: + case 65127 ... 65127: + case 65132 ... 65135: + case 65141 ... 65141: + case 65277 ... 65280: + case 65471 ... 65473: + case 65480 ... 65481: + case 65488 ... 65489: + case 65496 ... 65497: + case 65501 ... 65503: + case 65511 ... 65511: + case 65519 ... 65531: + case 65534 ... 65535: + print_str_char_u(c); + break; + case 65548 ... 65548: + case 65575 ... 65575: + case 65595 ... 65595: + case 65598 ... 65598: + case 65614 ... 65615: + case 65630 ... 65663: + case 65787 ... 65791: + case 65795 ... 65798: + case 65844 ... 65846: + case 65933 ... 65935: + case 65948 ... 65951: + case 65953 ... 65999: + case 66046 ... 66175: + case 66205 ... 66207: + case 66257 ... 66271: + case 66300 ... 66303: + case 66340 ... 66351: + case 66379 ... 66383: + case 66427 ... 66431: + case 66462 ... 66462: + case 66500 ... 66503: + case 66518 ... 66559: + case 66718 ... 66719: + case 66730 ... 66815: + case 66856 ... 66863: + case 66916 ... 66926: + case 66928 ... 67071: + case 67383 ... 67391: + case 67414 ... 67423: + case 67432 ... 67583: + case 67590 ... 67591: + case 67593 ... 67593: + case 67638 ... 67638: + case 67641 ... 67643: + case 67645 ... 67646: + case 67670 ... 67670: + case 67743 ... 67750: + case 67760 ... 67839: + case 67868 ... 67870: + case 67898 ... 67902: + case 67904 ... 67967: + case 68024 ... 68029: + case 68032 ... 68095: + case 68100 ... 68100: + case 68103 ... 68107: + case 68116 ... 68116: + case 68120 ... 68120: + case 68148 ... 68151: + case 68155 ... 68158: + case 68168 ... 68175: + case 68185 ... 68191: + case 68256 ... 68287: + case 68327 ... 68330: + case 68343 ... 68351: + case 68406 ... 68408: + case 68438 ... 68439: + case 68467 ... 68471: + case 68498 ... 68504: + case 68509 ... 68520: + case 68528 ... 68607: + case 68681 ... 69215: + case 69247 ... 69631: + case 69710 ... 69713: + case 69744 ... 69758: + case 69821 ... 69821: + case 69826 ... 69839: + case 69865 ... 69871: + case 69882 ... 69887: + case 69941 ... 69941: + case 69956 ... 69967: + case 70007 ... 70015: + case 70089 ... 70092: + case 70094 ... 70095: + case 70107 ... 70112: + case 70133 ... 70143: + case 70162 ... 70162: + case 70206 ... 70319: + case 70379 ... 70383: + case 70394 ... 70400: + case 70404 ... 70404: + case 70413 ... 70414: + case 70417 ... 70418: + case 70441 ... 70441: + case 70449 ... 70449: + case 70452 ... 70452: + case 70458 ... 70459: + case 70469 ... 70470: + case 70473 ... 70474: + case 70478 ... 70486: + case 70488 ... 70492: + case 70500 ... 70501: + case 70509 ... 70511: + case 70517 ... 70783: + case 70856 ... 70863: + case 70874 ... 71039: + case 71094 ... 71095: + case 71114 ... 71167: + case 71237 ... 71247: + case 71258 ... 71295: + case 71352 ... 71359: + case 71370 ... 71839: + case 71923 ... 71934: + case 71936 ... 72383: + case 72441 ... 73727: + case 74649 ... 74751: + case 74863 ... 74863: + case 74869 ... 77823: + case 78895 ... 92159: + case 92729 ... 92735: + case 92767 ... 92767: + case 92778 ... 92781: + case 92784 ... 92879: + case 92910 ... 92911: + case 92918 ... 92927: + case 92998 ... 93007: + case 93018 ... 93018: + case 93026 ... 93026: + case 93048 ... 93052: + case 93072 ... 93951: + case 94021 ... 94031: + case 94079 ... 94094: + case 94112 ... 110591: + case 110594 ... 113663: + case 113771 ... 113775: + case 113789 ... 113791: + case 113801 ... 113807: + case 113818 ... 113819: + case 113824 ... 118783: + case 119030 ... 119039: + case 119079 ... 119080: + case 119155 ... 119162: + case 119262 ... 119295: + case 119366 ... 119551: + case 119639 ... 119647: + case 119666 ... 119807: + case 119893 ... 119893: + case 119965 ... 119965: + case 119968 ... 119969: + case 119971 ... 119972: + case 119975 ... 119976: + case 119981 ... 119981: + case 119994 ... 119994: + case 119996 ... 119996: + case 120004 ... 120004: + case 120070 ... 120070: + case 120075 ... 120076: + case 120085 ... 120085: + case 120093 ... 120093: + case 120122 ... 120122: + case 120127 ... 120127: + case 120133 ... 120133: + case 120135 ... 120137: + case 120145 ... 120145: + case 120486 ... 120487: + case 120780 ... 120781: + case 120832 ... 124927: + case 125125 ... 125126: + case 125143 ... 126463: + case 126468 ... 126468: + case 126496 ... 126496: + case 126499 ... 126499: + case 126501 ... 126502: + case 126504 ... 126504: + case 126515 ... 126515: + case 126520 ... 126520: + case 126522 ... 126522: + case 126524 ... 126529: + case 126531 ... 126534: + case 126536 ... 126536: + case 126538 ... 126538: + case 126540 ... 126540: + case 126544 ... 126544: + case 126547 ... 126547: + case 126549 ... 126550: + case 126552 ... 126552: + case 126554 ... 126554: + case 126556 ... 126556: + case 126558 ... 126558: + case 126560 ... 126560: + case 126563 ... 126563: + case 126565 ... 126566: + case 126571 ... 126571: + case 126579 ... 126579: + case 126584 ... 126584: + case 126589 ... 126589: + case 126591 ... 126591: + case 126602 ... 126602: + case 126620 ... 126624: + case 126628 ... 126628: + case 126634 ... 126634: + case 126652 ... 126703: + case 126706 ... 126975: + case 127020 ... 127023: + case 127124 ... 127135: + case 127151 ... 127152: + case 127168 ... 127168: + case 127184 ... 127184: + case 127222 ... 127231: + case 127245 ... 127247: + case 127279 ... 127279: + case 127340 ... 127343: + case 127387 ... 127461: + case 127491 ... 127503: + case 127547 ... 127551: + case 127561 ... 127567: + case 127570 ... 127743: + case 127789 ... 127791: + case 127870 ... 127871: + case 127951 ... 127955: + case 127992 ... 127999: + case 128255 ... 128255: + case 128331 ... 128335: + case 128378 ... 128378: + case 128420 ... 128420: + case 128579 ... 128580: + case 128720 ... 128735: + case 128749 ... 128751: + case 128756 ... 128767: + case 128884 ... 128895: + case 128981 ... 129023: + case 129036 ... 129039: + case 129096 ... 129103: + case 129114 ... 129119: + case 129160 ... 129167: + case 129198 ... 131071: + case 173783 ... 173823: + case 177973 ... 177983: + case 178206 ... 194559: + case 195102 ... 917759: + case 918000 ... 1114110: + print_str_char_U(c); + break; + default: + print_codepoint(c); + break; + } +} + +void print_char(val_char_t c) +{ + printf("#\\"); + switch (c) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(c); + } +} + +void print_codepoint(val_char_t c) +{ + char buffer[5] = {0}; + utf8_encode_char(c, buffer); + printf("%s", buffer); +} + +int utf8_encode_char(val_char_t c, char *buffer) +{ + // Output to buffer using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (c < 128) { + buffer[0] = (char) c; + return 1; + } else if (c < 2048) { + buffer[0] = (char)(c >> 6) | 192; + buffer[1] = ((char) c & 63) | 128; + return 2; + } else if (c < 65536) { + buffer[0] = (char)(c >> 12) | 224; + buffer[1] = ((char)(c >> 6) & 63) | 128; + buffer[2] = ((char) c & 63) | 128; + return 3; + } else { + buffer[0] = (char)(c >> 18) | 240; + buffer[1] = ((char)(c >> 12) & 63) | 128; + buffer[2] = ((char)(c >> 6) & 63) | 128; + buffer[3] = ((char) c & 63) | 128; + return 4; + } +} diff --git a/langs/mountebank/print.h b/langs/mountebank/print.h new file mode 100644 index 00000000..c22081a2 --- /dev/null +++ b/langs/mountebank/print.h @@ -0,0 +1,8 @@ +#ifndef PRINT_H +#define PRINT_H + +#include "values.h" + +void print_result(val_t); + +#endif diff --git a/langs/mountebank/read-all.rkt b/langs/mountebank/read-all.rkt new file mode 100644 index 00000000..8a3289a5 --- /dev/null +++ b/langs/mountebank/read-all.rkt @@ -0,0 +1,8 @@ +#lang racket +(provide read-all) +;; read all s-expression until eof +(define (read-all) + (let ((r (read))) + (if (eof-object? r) + '() + (cons r (read-all))))) diff --git a/langs/mountebank/run.rkt b/langs/mountebank/run.rkt new file mode 100644 index 00000000..eaa53eb9 --- /dev/null +++ b/langs/mountebank/run.rkt @@ -0,0 +1,18 @@ +#lang racket +(provide run run/io) +(require "types.rkt" "build-runtime.rkt" + a86/interp) + +;; Asm -> Answer +(define (run is) + (parameterize ((current-objs (list runtime-path))) + (match (asm-interp is) + ['err 'err] + [b (bits->value b)]))) + +;; Asm String -> (cons Answer String) +(define (run/io is s) + (parameterize ((current-objs (list runtime-path))) + (match (asm-interp/io is s) + [(cons 'err o) (cons 'err o)] + [(cons b o) (cons (bits->value b) o)]))) diff --git a/langs/hoodwink/runtime.h b/langs/mountebank/runtime.h similarity index 73% rename from langs/hoodwink/runtime.h rename to langs/mountebank/runtime.h index 813214d6..f594f0f6 100644 --- a/langs/hoodwink/runtime.h +++ b/langs/mountebank/runtime.h @@ -1,15 +1,11 @@ #ifndef RUNTIME_H #define RUNTIME_H - -#include "values.h" - -val_t entry(); +int64_t entry(); extern FILE* in; extern FILE* out; extern void (*error_handler)(); // in words #define heap_size 10000 -extern val_t *heap; - +extern int64_t *heap; #endif /* RUNTIME_H */ diff --git a/langs/mountebank/simple-interp.rkt b/langs/mountebank/simple-interp.rkt new file mode 100644 index 00000000..e2eb2b62 --- /dev/null +++ b/langs/mountebank/simple-interp.rkt @@ -0,0 +1,59 @@ +#lang racket + +;; type Expr = Number +;; | Boolean +;; | (list Op1 Expr) +;; | (list Op2 Expr) +;; | (list 'if Expr Expr Expr) +;; | (list Expr Expr) +;; | (list 'λ (list Id) Expr) +;; | Id + +;; type Id = Symbol +;; type Op1 = 'sub1 | 'zero? +;; type Op2 = '+ + +;; type Value = Number +;; | Boolean +;; | (Value -> Value) + +;; Expr Env -> Value +(define (interp e r) + (match e + [(list '+ e1 e2) + (+ (interp e1 r) (interp e2 r))] + [(list 'sub1 e1) + (sub1 (interp e1 r))] + [(list 'zero? e1) + (zero? (interp e1 r))] + [(list 'if e1 e2 e3) + (if (interp e1 r) + (interp e2 r) + (interp e3 r))] + [(list 'λ (list x) e1) + (λ (v) (interp e1 (cons (cons x v) r)))] + [(list e1 e2) + ((interp e1 r) (interp e2 r))] + [_ + (if (symbol? e) + (lookup e r) + e)])) + +;; Id Env -> Value +(define (lookup x r) + (match r + [(cons (cons y v) r) + (if (eq? x y) + v + (lookup x r))])) + +(interp '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36) + '()) diff --git a/langs/mountebank/symbol.c b/langs/mountebank/symbol.c new file mode 100644 index 00000000..bcff4f3f --- /dev/null +++ b/langs/mountebank/symbol.c @@ -0,0 +1,55 @@ +#include +#include +#include "values.h" + +int symb_cmp(const val_symb_t *, const val_symb_t *); + +// binary tree node +struct Node { + val_symb_t* elem; + struct Node* left; + struct Node* right; +}; + +static struct Node *symbol_tbl = NULL; + +val_symb_t *intern_symbol(val_symb_t* symb) +{ + struct Node **curr = &symbol_tbl; + + while (*curr) { + struct Node *t = *curr; + int r = symb_cmp(symb, t->elem); + if (r == 0) { + // found it, so return saved pointer + return t->elem; + } else if (r < 0) { + curr = &t->left; + } else { + curr = &t->right; + } + } + + // wasn't found, so insert it and return pointer + *curr = calloc(1, sizeof(struct Node)); + (*curr)->elem = symb; + return (*curr)->elem; +} + +int symb_cmp(const val_symb_t *s1, const val_symb_t *s2) +{ + if (s1 == s2) return 0; + + int64_t len1 = s1->len; + int64_t len2 = s2->len; + + int64_t len = len1 < len2 ? len1 : len2; + int i; + + for (i = 0; i < len; i++) { + if (s1->codepoints[i] != s2->codepoints[i]) + return s1->codepoints[i] - s2->codepoints[i]; + } + + return len1 - len2; +} diff --git a/langs/mountebank/test/build-runtime.rkt b/langs/mountebank/test/build-runtime.rkt new file mode 100644 index 00000000..7023ee0b --- /dev/null +++ b/langs/mountebank/test/build-runtime.rkt @@ -0,0 +1,8 @@ +#lang racket +(require a86/interp) + +;; link with runtime for IO operations +(unless (file-exists? "../runtime.o") + (system "make -C .. runtime.o")) +(current-objs + (list (path->string (normalize-path "../runtime.o")))) diff --git a/langs/mountebank/test/compile.rkt b/langs/mountebank/test/compile.rkt new file mode 100644 index 00000000..ee289de8 --- /dev/null +++ b/langs/mountebank/test/compile.rkt @@ -0,0 +1,8 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../compile.rkt" + "../run.rkt") + +(test-runner (λ p (run (compile (parse p))))) +(test-runner-io (λ (s . p) (run/io (compile (parse p)) s))) diff --git a/langs/mountebank/test/interp-defun.rkt b/langs/mountebank/test/interp-defun.rkt new file mode 100644 index 00000000..68ef4191 --- /dev/null +++ b/langs/mountebank/test/interp-defun.rkt @@ -0,0 +1,24 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../interp-defun.rkt" + "../interp-io.rkt") + +(define (closure->proc xs e r) + ;; Could make this better by calling the interpreter, + ;; but it's only used in tests where all we care about + ;; is that you get a procedure. + (lambda _ + (error "This function is not callable."))) + +(test-runner + (λ p + (match (interp (parse p)) + [(Closure xs e r) (closure->proc xs e r)] + [v v]))) +(test-runner-io + (λ (s . p) + (match (interp/io (parse p) s) + [(cons (Closure xs e r) o) + (cons (closure->proc xs e r) o)] + [r r]))) diff --git a/langs/hustle/test/interp.rkt b/langs/mountebank/test/interp.rkt similarity index 55% rename from langs/hustle/test/interp.rkt rename to langs/mountebank/test/interp.rkt index 1eaa5864..cd7b654e 100644 --- a/langs/hustle/test/interp.rkt +++ b/langs/mountebank/test/interp.rkt @@ -4,6 +4,5 @@ "../interp.rkt" "../interp-io.rkt") -(test-runner (λ (e) (interp (parse e)))) - -(test-runner-io (λ (e s) (interp/io (parse e) s))) +(test-runner (λ p (interp (parse p)))) +(test-runner-io (λ (s . p) (interp/io (parse p) s))) diff --git a/langs/mountebank/test/test-runner.rkt b/langs/mountebank/test/test-runner.rkt new file mode 100644 index 00000000..d4cb5b2e --- /dev/null +++ b/langs/mountebank/test/test-runner.rkt @@ -0,0 +1,452 @@ +#lang racket +(provide test-runner test-runner-io) +(require rackunit) + +(define (test-runner run) + ;; Abscond examples + (check-equal? (run 7) 7) + (check-equal? (run -8) -8) + + ;; Blackmail examples + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7) + + ;; Con examples + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7) + + ;; Dupe examples + (check-equal? (run #t) #t) + (check-equal? (run #f) #f) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) + (check-equal? (run '(if #t 3 4)) 3) + (check-equal? (run '(if #f 3 4)) 4) + (check-equal? (run '(if 0 3 4)) 3) + (check-equal? (run '(zero? 4)) #f) + (check-equal? (run '(zero? 0)) #t) + + ;; Dodger examples + (check-equal? (run #\a) #\a) + (check-equal? (run #\b) #\b) + (check-equal? (run '(char? #\a)) #t) + (check-equal? (run '(char? #t)) #f) + (check-equal? (run '(char? 8)) #f) + (check-equal? (run '(char->integer #\a)) (char->integer #\a)) + (check-equal? (run '(integer->char 955)) #\λ) + + ;; Extort examples + (check-equal? (run '(add1 #f)) 'err) + (check-equal? (run '(sub1 #f)) 'err) + (check-equal? (run '(zero? #f)) 'err) + (check-equal? (run '(char->integer #f)) 'err) + (check-equal? (run '(integer->char #f)) 'err) + (check-equal? (run '(integer->char -1)) 'err) + (check-equal? (run '(write-byte #f)) 'err) + (check-equal? (run '(write-byte -1)) 'err) + (check-equal? (run '(write-byte 256)) 'err) + + ;; Fraud examples + (check-equal? (run '(let ((x 7)) x)) 7) + (check-equal? (run '(let ((x 7)) 2)) 2) + (check-equal? (run '(let ((x 7)) (add1 x))) 8) + (check-equal? (run '(let ((x (add1 7))) x)) 8) + (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) + (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) + (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) + + (check-equal? (run '(let ((x 0)) + (if (zero? x) 7 8))) + 7) + (check-equal? (run '(let ((x 1)) + (add1 (if (zero? x) 7 8)))) + 9) + (check-equal? (run '(+ 3 4)) 7) + (check-equal? (run '(- 3 4)) -1) + (check-equal? (run '(+ (+ 2 1) 4)) 7) + (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) + (check-equal? (run '(let ((x (+ 1 2))) + (let ((z (- 4 x))) + (+ (+ x x) z)))) + 7) + (check-equal? (run '(= 5 5)) #t) + (check-equal? (run '(= 4 5)) #f) + (check-equal? (run '(= (add1 4) 5)) #t) + (check-equal? (run '(< 5 5)) #f) + (check-equal? (run '(< 4 5)) #t) + (check-equal? (run '(< (add1 4) 5)) #f) + + ;; Hustle examples + (check-equal? (run ''()) '()) + (check-equal? (run '(box 1)) (box 1)) + (check-equal? (run '(box -1)) (box -1)) + (check-equal? (run '(cons 1 2)) (cons 1 2)) + (check-equal? (run '(unbox (box 1))) 1) + (check-equal? (run '(car (cons 1 2))) 1) + (check-equal? (run '(cdr (cons 1 2))) 2) + (check-equal? (run '(cons 1 '())) (list 1)) + (check-equal? (run '(let ((x (cons 1 2))) + (begin (cdr x) + (car x)))) + 1) + (check-equal? (run '(let ((x (cons 1 2))) + (let ((y (box 3))) + (unbox y)))) + 3) + (check-equal? (run '(eq? 1 1)) #t) + (check-equal? (run '(eq? 1 2)) #f) + (check-equal? (run '(eq? (cons 1 2) (cons 1 2))) #f) + (check-equal? (run '(let ((x (cons 1 2))) (eq? x x))) #t) + + ;; Hoax examples + (check-equal? (run '(make-vector 0 0)) #()) + (check-equal? (run '(make-vector 1 0)) #(0)) + (check-equal? (run '(make-vector 3 0)) #(0 0 0)) + (check-equal? (run '(make-vector 3 5)) #(5 5 5)) + (check-equal? (run '(vector? (make-vector 0 0))) #t) + (check-equal? (run '(vector? (cons 0 0))) #f) + (check-equal? (run '(vector-ref (make-vector 0 #f) 0)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 0 4) + x))) + #(4 5 5)) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 1 4) + x))) + #(5 4 5)) + (check-equal? (run '(vector-length (make-vector 3 #f))) 3) + (check-equal? (run '(vector-length (make-vector 0 #f))) 0) + (check-equal? (run '"") "") + (check-equal? (run '"fred") "fred") + (check-equal? (run '"wilma") "wilma") + (check-equal? (run '(make-string 0 #\f)) "") + (check-equal? (run '(make-string 3 #\f)) "fff") + (check-equal? (run '(make-string 3 #\g)) "ggg") + (check-equal? (run '(string-length "")) 0) + (check-equal? (run '(string-length "fred")) 4) + (check-equal? (run '(string-ref "" 0)) 'err) + (check-equal? (run '(string-ref (make-string 0 #\a) 0)) 'err) + (check-equal? (run '(string-ref "fred" 0)) #\f) + (check-equal? (run '(string-ref "fred" 1)) #\r) + (check-equal? (run '(string-ref "fred" 2)) #\e) + (check-equal? (run '(string-ref "fred" 4)) 'err) + (check-equal? (run '(string? "fred")) #t) + (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") + + ;; Iniquity tests + (check-equal? (run + '(define (f x) x) + '(f 5)) + 5) + + (check-equal? (run + '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) + '(tri 9)) + 45) + + (check-equal? (run + '(define (f x) x) + '(define (g x) (f x)) + '(g 5)) + 5) + (check-equal? (run + '(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) + '(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + '(even? 101)) + #f) + (check-equal? (run + '(define (map-add1 xs) + (if (empty? xs) + '() + (cons (add1 (car xs)) + (map-add1 (cdr xs))))) + '(map-add1 (cons 1 (cons 2 (cons 3 '()))))) + '(2 3 4)) + (check-equal? (run + '(define (f x) + 10) + '(f 1)) + 10) + (check-equal? (run + '(define (f x) + 10) + '(let ((x 2)) (f 1))) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(f 1 2)) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(let ((z 2)) (f 1 2))) + 10) + (check-equal? (run '(define (f x y) y) + '(f 1 (add1 #f))) + 'err) + + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) + + ;; Loot examples + (check-true (procedure? (run '(λ (x) x)))) + (check-equal? (run '((λ (x) x) 5)) + 5) + + (check-equal? (run '(let ((f (λ (x) x))) (f 5))) + 5) + (check-equal? (run '(let ((f (λ (x y) x))) (f 5 7))) + 5) + (check-equal? (run '(let ((f (λ (x y) y))) (f 5 7))) + 7) + (check-equal? (run '((let ((x 1)) + (let ((y 2)) + (lambda (z) (cons x (cons y (cons z '())))))) + 3)) + '(1 2 3)) + (check-equal? (run '(define (adder n) + (λ (x) (+ x n))) + '((adder 5) 10)) + 15) + (check-equal? (run '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36)) + 666) + (check-equal? (run '(define (tri n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))) + '(tri 36)) + 666) + (check-equal? (run '(define (tri n) + (match n + [0 0] + [m (+ m (tri (sub1 m)))])) + '(tri 36)) + 666) + (check-equal? (run '((match 8 [8 (lambda (x) x)]) 12)) + 12) + + ;; Mug examples + (check-equal? (run '(symbol? 'foo)) #t) + (check-equal? (run '(symbol? (string->symbol "foo"))) #t) + (check-equal? (run '(eq? 'foo 'foo)) #t) + (check-equal? (run '(eq? (string->symbol "foo") + (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'foo (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) + #t) + (check-equal? (run '(symbol? 'g0)) #t) + (check-equal? (run '(symbol? "g0")) #f) + (check-equal? (run '(symbol? (string->symbol "g0"))) #t) + (check-equal? (run '(symbol? (string->uninterned-symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->uninterned-symbol "g0"))) #f) + (check-equal? (run '(eq? (string->uninterned-symbol "g0") (string->uninterned-symbol "g0"))) + #f) + (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f) + (check-equal? (run '(string? (symbol->string 'foo))) #t) + (check-equal? (run '(eq? (symbol->string 'foo) "foo")) #f) + (check-equal? (run ''foo) 'foo) + (check-equal? (run '(eq? (match #t [_ "foo"]) "bar")) #f) + (check-equal? (run '(eq? (match #t [_ 'foo]) 'bar)) #f) + (check-equal? (run '(match 'foo ['bar #t] [_ #f])) #f) + (check-equal? (run '(match 'foo ['foo #t] [_ #f])) #t) + (check-equal? (run '(match "foo" ["foo" #t] [_ #f])) #t) + (check-equal? (run '(match "foo" ["bar" #t] [_ #f])) #f) + (check-equal? (run '(match (cons '+ (cons 1 (cons 2 '()))) + [(cons '+ (cons x (cons y '()))) + (+ x y)])) + 3) + + ;; Mountebank examples + (check-equal? (run '#()) + #()) + (check-equal? (run ''#()) + #()) + (check-equal? (run ''#t) + #t) + (check-equal? (run ''7) + 7) + (check-equal? (run ''(1 2 3)) + '(1 2 3)) + (check-equal? (run ''(1 . 2)) + '(1 . 2)) + (check-equal? (run ''(("1") (#() #(1 #(2))) (#&(1)) (#f) (4) (5))) + '(("1") (#() #(1 #(2))) (#&(1)) (#f) (4) (5))) + (check-equal? (run '(define (f) (cons 1 2)) + '(eq? (f) (f))) + #f) + (check-equal? (run '(define (f) '(1 . 2)) + '(eq? (f) (f))) + #t) + (check-equal? (run '(let ((x '(foo . foo))) + (eq? (car x) (cdr x)))) + #t) + (check-equal? + (run '(define (eval e r) + (match e + [(list 'zero? e) + (zero? (eval e r))] + [(list 'sub1 e) + (sub1 (eval e r))] + [(list '+ e1 e2) + (+ (eval e1 r) (eval e2 r))] + [(list 'if e1 e2 e3) + (if (eval e1 r) + (eval e2 r) + (eval e3 r))] + [(list 'λ (list x) e) + (lambda (v) (eval e (cons (cons x v) r)))] + [(list e1 e2) + ((eval e1 r) (eval e2 r))] + [_ + (if (symbol? e) + (lookup r e) + e)])) + '(define (lookup r x) + (match r + [(cons (cons y v) r) + (if (eq? x y) + v + (lookup r x))])) + '(eval '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36) + '())) + 666)) + +(define (test-runner-io run) + ;; Evildoer examples + (check-equal? (run "" 7) (cons 7 "")) + (check-equal? (run "" '(write-byte 97)) (cons (void) "a")) + (check-equal? (run "a" '(read-byte)) (cons 97 "")) + (check-equal? (run "b" '(begin (write-byte 97) (read-byte))) + (cons 98 "a")) + (check-equal? (run "" '(read-byte)) (cons eof "")) + (check-equal? (run "" '(eof-object? (read-byte))) (cons #t "")) + (check-equal? (run "a" '(eof-object? (read-byte))) (cons #f "")) + (check-equal? (run "" '(begin (write-byte 97) (write-byte 98))) + (cons (void) "ab")) + + (check-equal? (run "ab" '(peek-byte)) (cons 97 "")) + (check-equal? (run "ab" '(begin (peek-byte) (read-byte))) (cons 97 "")) + ;; Extort examples + (check-equal? (run "" '(write-byte #t)) (cons 'err "")) + + ;; Fraud examples + (check-equal? (run "" '(let ((x 97)) (write-byte x))) (cons (void) "a")) + (check-equal? (run "" + '(let ((x 97)) + (begin (write-byte x) + x))) + (cons 97 "a")) + (check-equal? (run "b" '(let ((x 97)) (begin (read-byte) x))) + (cons 97 "")) + (check-equal? (run "b" '(let ((x 97)) (begin (peek-byte) x))) + (cons 97 "")) + + ;; Hustle examples + (check-equal? (run "" + '(let ((x 1)) + (begin (write-byte 97) + 1))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x 1)) + (let ((y 2)) + (begin (write-byte 97) + 1)))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x (cons 1 2))) + (begin (write-byte 97) + (car x)))) + (cons 1 "a")) + ;; Iniquity examples + #| + (check-equal? (run "" + '(define (print-alphabet i) + (if (zero? i) + (void) + (begin (write-byte (- 123 i)) + (print-alphabet (sub1 i))))) + '(print-alphabet 26)) + (cons (void) "abcdefghijklmnopqrstuvwxyz")) +|#) diff --git a/langs/mountebank/types.h b/langs/mountebank/types.h new file mode 100644 index 00000000..4093c4f7 --- /dev/null +++ b/langs/mountebank/types.h @@ -0,0 +1,42 @@ +#ifndef TYPES_H +#define TYPES_H + +/* + Bit layout of values + + Values are either: + - Immediates: end in #b000 + - Pointers + + Immediates are either + - Integers: end in #b0 000 + - Characters: end in #b01 000 + - True: #b11 000 + - False: #b1 11 000 + - Eof: #b10 11 000 + - Void: #b11 11 000 + - Empty: #b100 11 000 +*/ +#define imm_shift 3 +#define ptr_type_mask ((1 << imm_shift) - 1) +#define box_type_tag 1 +#define cons_type_tag 2 +#define vect_type_tag 3 +#define str_type_tag 4 +#define proc_type_tag 5 +#define symb_type_tag 6 +#define int_shift (1 + imm_shift) +#define int_type_mask ((1 << int_shift) - 1) +#define int_type_tag (0 << (int_shift - 1)) +#define nonint_type_tag (1 << (int_shift - 1)) +#define char_shift (int_shift + 1) +#define char_type_mask ((1 << char_shift) - 1) +#define char_type_tag ((0 << (char_shift - 1)) | nonint_type_tag) +#define nonchar_type_tag ((1 << (char_shift - 1)) | nonint_type_tag) +#define val_true ((0 << char_shift) | nonchar_type_tag) +#define val_false ((1 << char_shift) | nonchar_type_tag) +#define val_eof ((2 << char_shift) | nonchar_type_tag) +#define val_void ((3 << char_shift) | nonchar_type_tag) +#define val_empty ((4 << char_shift) | nonchar_type_tag) + +#endif diff --git a/langs/mountebank/types.rkt b/langs/mountebank/types.rkt new file mode 100644 index 00000000..f4cbf7d8 --- /dev/null +++ b/langs/mountebank/types.rkt @@ -0,0 +1,109 @@ +#lang racket +(provide (all-defined-out)) +(require ffi/unsafe) + +(define imm-shift 3) +(define imm-mask #b111) +(define ptr-mask #b111) +(define type-box #b001) +(define type-cons #b010) +(define type-vect #b011) +(define type-str #b100) +(define type-proc #b101) +(define type-symb #b110) +(define int-shift (+ 1 imm-shift)) +(define char-shift (+ 2 imm-shift)) +(define type-int #b0000) +(define mask-int #b1111) +(define type-char #b01000) +(define mask-char #b11111) + +(define (bits->value b) + (cond [(= b (value->bits #t)) #t] + [(= b (value->bits #f)) #f] + [(= b (value->bits eof)) eof] + [(= b (value->bits (void))) (void)] + [(= b (value->bits '())) '()] + [(int-bits? b) + (arithmetic-shift b (- int-shift))] + [(char-bits? b) + (integer->char (arithmetic-shift b (- char-shift)))] + [(box-bits? b) + (box (bits->value (heap-ref b)))] + [(cons-bits? b) + (cons (bits->value (heap-ref (+ b 8))) + (bits->value (heap-ref b)))] + [(vect-bits? b) + (if (zero? (untag b)) + (vector) + (build-vector (heap-ref b) + (lambda (j) + (bits->value (heap-ref (+ b (* 8 (add1 j))))))))] + [(str-bits? b) + (if (zero? (untag b)) + (string) + (build-string (heap-ref b) + (lambda (j) + (char-ref (+ b 8) j))))] + [(symb-bits? b) + (string->symbol + (if (zero? (untag b)) + (string) + (build-string (heap-ref b) + (lambda (j) + (char-ref (+ b 8) j)))))] + [(proc-bits? b) + (lambda _ + (error "This function is not callable."))] + [else (error "invalid bits")])) + +(define (value->bits v) + (cond [(eq? v #t) #b00011000] + [(eq? v #f) #b00111000] + [(eof-object? v) #b01011000] + [(void? v) #b01111000] + [(empty? v) #b10011000] + [(integer? v) + (arithmetic-shift v int-shift)] + [(char? v) + (bitwise-ior type-char + (arithmetic-shift (char->integer v) char-shift))] + [else (error "not an immediate value")])) + + +(define (imm-bits? v) + (zero? (bitwise-and v imm-mask))) + +(define (int-bits? v) + (= type-int (bitwise-and v mask-int))) + +(define (char-bits? v) + (= type-char (bitwise-and v mask-char))) + +(define (cons-bits? v) + (= type-cons (bitwise-and v imm-mask))) + +(define (box-bits? v) + (= type-box (bitwise-and v imm-mask))) + +(define (vect-bits? v) + (= type-vect (bitwise-and v imm-mask))) + +(define (str-bits? v) + (= type-str (bitwise-and v imm-mask))) + +(define (proc-bits? v) + (= type-proc (bitwise-and v imm-mask))) + +(define (symb-bits? v) + (= type-symb (bitwise-and v imm-mask))) + +(define (untag i) + (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) + (integer-length ptr-mask))) + +(define (heap-ref i) + (ptr-ref (cast (untag i) _int64 _pointer) _int64)) + +(define (char-ref i j) + (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j))) diff --git a/langs/mountebank/utils.rkt b/langs/mountebank/utils.rkt new file mode 100644 index 00000000..612b7381 --- /dev/null +++ b/langs/mountebank/utils.rkt @@ -0,0 +1,33 @@ +#lang racket +(provide symbol->data-label lookup pad-stack unpad-stack) +(require a86/ast) + +(define rsp 'rsp) +(define r15 'r15) + +(define (symbol->data-label s) + (symbol->label + (string->symbol (string-append "data_" (symbol->string s))))) + +;; Id CEnv -> [Maybe Integer] +(define (lookup x cenv) + (match cenv + ['() #f] + [(cons y rest) + (match (eq? x y) + [#t 0] + [#f (match (lookup x rest) + [#f #f] + [i (+ 8 i)])])])) + +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; Asm +;; Undo the stack alignment after a call +(define unpad-stack + (seq (Add rsp r15))) diff --git a/langs/hoodwink/values.c b/langs/mountebank/values.c similarity index 97% rename from langs/hoodwink/values.c rename to langs/mountebank/values.c index 5f4df97e..32e922bd 100644 --- a/langs/hoodwink/values.c +++ b/langs/mountebank/values.c @@ -14,6 +14,8 @@ type_t val_typeof(val_t x) return T_STR; case symb_type_tag: return T_SYMB; + case proc_type_tag: + return T_PROC; } if ((int_type_mask & x) == int_type_tag) diff --git a/langs/hoodwink/values.h b/langs/mountebank/values.h similarity index 93% rename from langs/hoodwink/values.h rename to langs/mountebank/values.h index b3805ddf..c1de09d6 100644 --- a/langs/hoodwink/values.h +++ b/langs/mountebank/values.h @@ -21,6 +21,7 @@ typedef enum type_t { T_VECT, T_STR, T_SYMB, + T_PROC, } type_t; typedef uint32_t val_char_t; @@ -39,7 +40,10 @@ typedef struct val_str_t { uint64_t len; val_char_t codepoints[]; } val_str_t; -typedef val_str_t val_symb_t; +typedef struct val_symb_t { + uint64_t len; + val_char_t codepoints[]; +} val_symb_t; /* return the type of x */ type_t val_typeof(val_t x); diff --git a/langs/mug/Makefile b/langs/mug/Makefile new file mode 100644 index 00000000..ed8a85f4 --- /dev/null +++ b/langs/mug/Makefile @@ -0,0 +1,45 @@ +UNAME := $(shell uname) + +ifeq ($(UNAME), Darwin) + format=macho64 + CC=arch -x86_64 gcc +else + format=elf64 + CC=gcc +endif + +objs = \ + main.o \ + print.o \ + values.o \ + io.o \ + symbol.o + +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* + +runtime.o: $(objs) + ld -r $(objs) -o runtime.o + +%.run: %.o runtime.o + $(CC) runtime.o $< -o $@ + +.c.o: + $(CC) -fPIC -c -g -o $@ $< + +.s.o: + nasm -g -f $(format) -o $@ $< + +%.s: %.rkt + cat $< | racket -t compile-stdin.rkt -m > $@ + +clean: + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" + +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/jig/ast.rkt b/langs/mug/ast.rkt similarity index 58% rename from langs/jig/ast.rkt rename to langs/mug/ast.rkt index 29e4e06d..537deedb 100644 --- a/langs/jig/ast.rkt +++ b/langs/mug/ast.rkt @@ -13,6 +13,7 @@ ;; | (Bool Boolean) ;; | (Char Character) ;; | (Str String) +;; | (Symb Symbol) ;; | (Prim0 Op0) ;; | (Prim1 Op1 Expr) ;; | (Prim2 Op2 Expr Expr) @@ -21,7 +22,9 @@ ;; | (Begin Expr Expr) ;; | (Let Id Expr Expr) ;; | (Var Id) -;; | (App Id (Listof Expr)) +;; | (Match Expr (Listof Pat) (Listof Expr)) +;; | (App Expr (Listof Expr)) +;; | (Lam Id (Listof Id) Expr) ;; type Id = Symbol ;; type Op0 = 'read-byte ;; type Op1 = 'add1 | 'sub1 | 'zero? @@ -29,19 +32,35 @@ ;; | 'write-byte | 'eof-object? ;; | 'box | 'car | 'cdr | 'unbox ;; | 'empty? | 'cons? | 'box? -;; | 'vector? | vector-length -;; | 'string? | string-length +;; | 'vector? | 'vector-length +;; | 'string? | 'string-length +;; | 'symbol? | 'symbol->string +;; | 'string->symbol | 'string->uninterned-symbol ;; type Op2 = '+ | '- | '< | '= ;; | 'cons ;; | 'make-vector | 'vector-ref ;; | 'make-string | 'string-ref ;; type Op3 = 'vector-set! +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; | (PSymb Symbol) +;; | (PStr String) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() + (struct Eof () #:prefab) (struct Empty () #:prefab) (struct Int (i) #:prefab) (struct Bool (b) #:prefab) (struct Char (c) #:prefab) (struct Str (s) #:prefab) +(struct Symb (s) #:prefab) (struct Prim0 (p) #:prefab) (struct Prim1 (p e) #:prefab) (struct Prim2 (p e1 e2) #:prefab) @@ -50,4 +69,15 @@ (struct Begin (e1 e2) #:prefab) (struct Let (x e1 e2) #:prefab) (struct Var (x) #:prefab) -(struct App (f es) #:prefab) +(struct App (e es) #:prefab) +(struct Lam (f xs e) #:prefab) +(struct Match (e ps es) #:prefab) + +(struct PVar (x) #:prefab) +(struct PWild () #:prefab) +(struct PLit (x) #:prefab) +(struct PBox (p) #:prefab) +(struct PCons (p1 p2) #:prefab) +(struct PAnd (p1 p2) #:prefab) +(struct PSymb (s) #:prefab) +(struct PStr (s) #:prefab) diff --git a/langs/mug/build-runtime.rkt b/langs/mug/build-runtime.rkt new file mode 100644 index 00000000..66aad89f --- /dev/null +++ b/langs/mug/build-runtime.rkt @@ -0,0 +1,14 @@ +#lang racket +(require racket/runtime-path) +(provide runtime-path) + +(define-runtime-path here ".") + +(void + (system (string-append "make -C '" + (path->string (normalize-path here)) + "' runtime.o"))) + +(define runtime-path + (path->string + (normalize-path (build-path here "runtime.o")))) diff --git a/langs/mug/char.c b/langs/mug/char.c new file mode 100644 index 00000000..d11f16e0 --- /dev/null +++ b/langs/mug/char.c @@ -0,0 +1,57 @@ +#include +#include +#include "types.h" + +void print_codepoint(int64_t); + +void print_char (int64_t v) { + int64_t codepoint = v >> char_shift; + printf("#\\"); + switch (codepoint) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(v); + } +} + +void print_codepoint(int64_t v) { + int64_t codepoint = v >> char_shift; + // Print using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (codepoint < 128) { + printf("%c", (char) codepoint); + } else if (codepoint < 2048) { + printf("%c%c", + (char)(codepoint >> 6) | 192, + ((char)codepoint & 63) | 128); + } else if (codepoint < 65536) { + printf("%c%c%c", + (char)(codepoint >> 12) | 224, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } else { + printf("%c%c%c%c", + (char)(codepoint >> 18) | 240, + ((char)(codepoint >> 12) & 63) | 128, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } +} + diff --git a/langs/mug/compile-define.rkt b/langs/mug/compile-define.rkt new file mode 100644 index 00000000..354e6f26 --- /dev/null +++ b/langs/mug/compile-define.rkt @@ -0,0 +1,69 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "fv.rkt" + "utils.rkt" + "compile-expr.rkt" + a86/ast) + +;; [Listof Defn] -> [Listof Id] +(define (define-ids ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (cons f (define-ids ds))])) + +;; [Listof Defn] -> Asm +(define (compile-defines ds) + (match ds + ['() (seq)] + [(cons d ds) + (seq (compile-define d) + (compile-defines ds))])) + +;; Defn -> Asm +(define (compile-define d) + (match d + [(Defn f xs e) + (compile-lambda-define (Lam f xs e))])) + +;; Defns -> Asm +;; Compile the closures for ds and push them on the stack +(define (compile-defines-values ds) + (seq (alloc-defines ds 0) + (init-defines ds (reverse (define-ids ds)) 8) + (add-rbx-defines ds 0))) + +;; Defns Int -> Asm +;; Allocate closures for ds at given offset, but don't write environment yet +(define (alloc-defines ds off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx off) rax) + (Mov rax rbx) + (Add rax off) + (Or rax type-proc) + (Push rax) + (alloc-defines ds (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns CEnv Int -> Asm +;; Initialize the environment for each closure for ds at given offset +(define (init-defines ds c off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (free-vars-to-heap fvs c off) + (init-defines ds c (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns Int -> Asm +;; Compute adjustment to rbx for allocation of all ds +(define (add-rbx-defines ds n) + (match ds + ['() (seq (Add rbx (* n 8)))] + [(cons (Defn f xs e) ds) + (add-rbx-defines ds (+ n (add1 (length (fv (Lam f xs e))))))])) diff --git a/langs/mug/compile-expr.rkt b/langs/mug/compile-expr.rkt new file mode 100644 index 00000000..40dee014 --- /dev/null +++ b/langs/mug/compile-expr.rkt @@ -0,0 +1,333 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-ops.rkt" + a86/ast) + +;; Registers used +(define rax 'rax) ; return +(define rbx 'rbx) ; heap +(define rsp 'rsp) ; stack +(define rdi 'rdi) ; arg + +;; Expr CEnv Bool -> Asm +(define (compile-e e c t?) + (match e + [(Int i) (compile-value i)] + [(Bool b) (compile-value b)] + [(Char c) (compile-value c)] + [(Eof) (compile-value eof)] + [(Empty) (compile-value '())] + [(Str s) (compile-string s)] + [(Symb s) (compile-symbol s)] + [(Var x) (compile-variable x c)] + [(Prim0 p) (compile-prim0 p c)] + [(Prim1 p e) (compile-prim1 p e c)] + [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] + [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] + [(If e1 e2 e3) (compile-if e1 e2 e3 c t?)] + [(Begin e1 e2) (compile-begin e1 e2 c t?)] + [(Let x e1 e2) (compile-let x e1 e2 c t?)] + [(App e es) (compile-app e es c t?)] + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) + +;; Symbol -> Asm +(define (compile-symbol s) + (seq (Lea rax (Plus (symbol->data-label s) type-symb)))) + +;; String -> Asm +(define (compile-string s) + (seq (Lea rax (Plus (symbol->data-label (string->symbol s)) type-str)))) + +;; Value -> Asm +(define (compile-value v) + (seq (Mov rax (value->bits v)))) + +;; Id CEnv -> Asm +(define (compile-variable x c) + (match (lookup x c) + [#f (error "unbound variable")] ;(seq (Lea rax (symbol->label x)))] + [i (seq (Mov rax (Offset rsp i)))])) + +;; Op0 CEnv -> Asm +(define (compile-prim0 p c) + (compile-op0 p)) + +;; Op1 Expr CEnv -> Asm +(define (compile-prim1 p e c) + (seq (compile-e e c #f) + (compile-op1 p))) + +;; Op2 Expr Expr CEnv -> Asm +(define (compile-prim2 p e1 e2 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (compile-op2 p))) + +;; Op3 Expr Expr Expr CEnv -> Asm +(define (compile-prim3 p e1 e2 e3 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (Push rax) + (compile-e e3 (cons #f (cons #f c)) #f) + (compile-op3 p))) + +;; Expr Expr Expr CEnv Bool -> Asm +(define (compile-if e1 e2 e3 c t?) + (let ((l1 (gensym 'if)) + (l2 (gensym 'if))) + (seq (compile-e e1 c #f) + (Cmp rax (value->bits #f)) + (Je l1) + (compile-e e2 c t?) + (Jmp l2) + (Label l1) + (compile-e e3 c t?) + (Label l2)))) + +;; Expr Expr CEnv Bool -> Asm +(define (compile-begin e1 e2 c t?) + (seq (compile-e e1 c #f) + (compile-e e2 c t?))) + +;; Id Expr Expr CEnv Bool -> Asm +(define (compile-let x e1 e2 c t?) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons x c) t?) + (Add rsp 8))) + +;; Id [Listof Expr] CEnv Bool -> Asm +(define (compile-app f es c t?) + ;(compile-app-nontail f es c) + (if t? + (compile-app-tail f es c) + (compile-app-nontail f es c))) + +;; Expr [Listof Expr] CEnv -> Asm +(define (compile-app-tail e es c) + (seq (compile-es (cons e es) c) + (move-args (add1 (length es)) (length c)) + (Add rsp (* 8 (length c))) + (Mov rax (Offset rsp (* 8 (length es)))) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) + (Jmp rax))) + +;; Integer Integer -> Asm +(define (move-args i off) + (cond [(zero? off) (seq)] + [(zero? i) (seq)] + [else + (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) + (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) + (move-args (sub1 i) off))])) + +;; Expr [Listof Expr] CEnv -> Asm +;; The return address is placed above the arguments, so callee pops +;; arguments and return address is next frame +(define (compile-app-nontail e es c) + (let ((r (gensym 'ret)) + (i (* 8 (length es)))) + (seq (Lea rax r) + (Push rax) + (compile-es (cons e es) (cons #f c)) + (Mov rax (Offset rsp i)) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) ; fetch the code label + (Jmp rax) + (Label r)))) + +;; Id [Listof Id] Expr CEnv -> Asm +(define (compile-lam f xs e c) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx 0) rax) + (free-vars-to-heap fvs c 8) + (Mov rax rbx) ; return value + (Or rax type-proc) + (Add rbx (* 8 (add1 (length fvs))))))) + +;; [Listof Id] CEnv Int -> Asm +;; Copy the values of given free variables into the heap at given offset +(define (free-vars-to-heap fvs c off) + (match fvs + ['() (seq)] + [(cons x fvs) + (seq (Mov r8 (Offset rsp (lookup x c))) + (Mov (Offset rbx off) r8) + (free-vars-to-heap fvs c (+ off 8)))])) + +;; [Listof Lam] -> Asm +(define (compile-lambda-defines ls) + (match ls + ['() (seq)] + [(cons l ls) + (seq (compile-lambda-define l) + (compile-lambda-defines ls))])) + +;; Lam -> Asm +(define (compile-lambda-define l) + (let ((fvs (fv l))) + (match l + [(Lam f xs e) + (let ((env (append (reverse fvs) (reverse xs) (list #f)))) + (seq (Label (symbol->label f)) + (Mov rax (Offset rsp (* 8 (length xs)))) + (Xor rax type-proc) + (copy-env-to-stack fvs 8) + (compile-e e env #t) + (Add rsp (* 8 (length env))) ; pop env + (Ret)))]))) + +;; [Listof Id] Int -> Asm +;; Copy the closure environment at given offset to stack +(define (copy-env-to-stack fvs off) + (match fvs + ['() (seq)] + [(cons _ fvs) + (seq (Mov r9 (Offset rax off)) + (Push r9) + (copy-env-to-stack fvs (+ 8 off)))])) + +;; [Listof Expr] CEnv -> Asm +(define (compile-es es c) + (match es + ['() '()] + [(cons e es) + (seq (compile-e e c #f) + (Push rax) + (compile-es es (cons #f c)))])) + +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c #f) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) cm)] + [(PVar x) + (list (seq (Push rax)) (cons x cm))] + [(PStr s) + (let ((ok (gensym)) + (fail (gensym))) + (list (seq (Lea rdi (symbol->data-label (string->symbol s))) + (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-str) + (Je ok) + (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok) + (Xor rax type-str) + (Mov rsi rax) + pad-stack + (Call 'symb_cmp) + unpad-stack + (Cmp rax 0) + (Jne fail)) + cm))] + [(PSymb s) + (let ((ok (gensym))) + (list (seq (Lea r9 (Plus (symbol->data-label s) type-symb)) + (Cmp rax r9) + (Je ok) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok)) + cm))] + [(PLit l) + (let ((ok (gensym))) + (list (seq (Cmp rax (value->bits l)) + (Je ok) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 cm1) + (let ((ok (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Je ok) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next) + (Label ok) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 cm2) + (let ((ok (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Je ok) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next) + (Label ok) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + cm2))])])])) diff --git a/langs/mug/compile-literals.rkt b/langs/mug/compile-literals.rkt new file mode 100644 index 00000000..04359610 --- /dev/null +++ b/langs/mug/compile-literals.rkt @@ -0,0 +1,109 @@ +#lang racket +(provide compile-literals init-symbol-table literals compile-string-chars) +(require "ast.rkt" + "utils.rkt" + a86/ast) + +(define rdi 'rdi) + +;; Prog -> Asm +(define (compile-literals p) + (append-map compile-literal (literals p))) + +;; Symbol -> Asm +(define (compile-literal s) + (let ((str (symbol->string s))) + (seq (Label (symbol->data-label s)) + (Dq (string-length str)) + (compile-string-chars (string->list str)) + (if (odd? (string-length str)) + (seq (Dd 0)) + (seq))))) + +;; Prog -> Asm +;; Call intern_symbol on every symbol in the program +(define (init-symbol-table p) + (match (symbols p) + ['() (seq)] + [ss (seq (Sub 'rsp 8) + (append-map init-symbol ss) + (Add 'rsp 8))])) + +;; Symbol -> Asm +(define (init-symbol s) + (seq (Lea rdi (symbol->data-label s)) + (Call 'intern_symbol))) + +;; Prog -> [Listof Symbol] +(define (literals p) + (remove-duplicates + (map to-symbol (literals* p)))) + +;; Prog -> [Listof Symbol] +(define (symbols p) + (remove-duplicates (filter symbol? (literals* p)))) + +;; (U String Symbol) -> Symbol +(define (to-symbol s) + (if (string? s) + (string->symbol s) + s)) + +;; Prog -> [Listof (U Symbol String)] +(define (literals* p) + (match p + [(Prog ds e) + (append (append-map literals-d ds) (literals-e e))])) + +;; Defn -> [Listof (U Symbol String)] +(define (literals-d d) + (match d + [(Defn f xs e) + (literals-e e)])) + +;; Expr -> [Listof (U Symbol String)] +(define (literals-e e) + (match e + [(Str s) (list s)] + [(Symb s) (list s)] + [(Prim1 p e) + (literals-e e)] + [(Prim2 p e1 e2) + (append (literals-e e1) (literals-e e2))] + [(Prim3 p e1 e2 e3) + (append (literals-e e1) (literals-e e2) (literals-e e3))] + [(If e1 e2 e3) + (append (literals-e e1) (literals-e e2) (literals-e e3))] + [(Begin e1 e2) + (append (literals-e e1) (literals-e e2))] + [(Let x e1 e2) + (append (literals-e e1) (literals-e e2))] + [(App e1 es) + (append (literals-e e1) (append-map literals-e es))] + [(Lam f xs e) + (literals-e e)] + [(Match e ps es) + (append (literals-e e) (append-map literals-match-clause ps es))] + [_ '()])) + +;; Pat Expr -> [Listof (U Symbol String)] +(define (literals-match-clause p e) + (append (literals-pat p) (literals-e e))) + +;; Pat -> [Listof (U Symbol String)] +(define (literals-pat p) + (match p + [(PSymb s) (list s)] + [(PStr s) (list s)] + [(PBox p) (literals-pat p)] + [(PCons p1 p2) (append (literals-pat p1) (literals-pat p2))] + [(PAnd p1 p2) (append (literals-pat p1) (literals-pat p2))] + [_ '()])) + +;; [Listof Char] -> Asm +(define (compile-string-chars cs) + (match cs + ['() (seq)] + [(cons c cs) + (seq (Dd (char->integer c)) + (compile-string-chars cs))])) diff --git a/langs/hoodwink/compile-ops.rkt b/langs/mug/compile-ops.rkt similarity index 54% rename from langs/hoodwink/compile-ops.rkt rename to langs/mug/compile-ops.rkt index 2df0ab77..8a52cd49 100644 --- a/langs/hoodwink/compile-ops.rkt +++ b/langs/mug/compile-ops.rkt @@ -1,77 +1,76 @@ #lang racket (provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) +(require "ast.rkt" "types.rkt" "utils.rkt" a86/ast) (define rax 'rax) ; return (define eax 'eax) ; 32-bit load/store (define rbx 'rbx) ; heap -(define rdi 'rdi) ; arg +(define rdi 'rdi) ; arg1 +(define rsi 'rsi) ; arg2 +(define rdx 'rdx) ; arg3 (define r8 'r8) ; scratch (define r9 'r9) ; scratch (define r10 'r10) ; scratch +(define r12 'r12) ; save across call to memcpy +(define r15 'r15) ; stack pad (non-volatile) (define rsp 'rsp) ; stack -;; Op0 CEnv -> Asm -(define (compile-op0 p c) +;; Op0 -> Asm +(define (compile-op0 p) (match p - ['void (seq (Mov rax val-void))] - ['read-byte (seq (pad-stack c) + ['void (seq (Mov rax (value->bits (void))))] + ['read-byte (seq pad-stack (Call 'read_byte) - (unpad-stack c))] - ['peek-byte (seq (pad-stack c) + unpad-stack)] + ['peek-byte (seq pad-stack (Call 'peek_byte) - (unpad-stack c))] - ['gensym (seq (pad-stack c) - (Call 'gensym) - (unpad-stack c) - (Or rax type-symb))])) + unpad-stack)])) -;; Op1 CEnv -> Asm -(define (compile-op1 p c) +;; Op1 -> Asm +(define (compile-op1 p) (match p ['add1 - (seq (assert-integer rax c) - (Add rax (imm->bits 1)))] + (seq (assert-integer rax) + (Add rax (value->bits 1)))] ['sub1 - (seq (assert-integer rax c) - (Sub rax (imm->bits 1)))] + (seq (assert-integer rax) + (Sub rax (value->bits 1)))] ['zero? - (seq (assert-integer rax c) + (seq (assert-integer rax) (eq-imm 0))] ['char? (type-pred mask-char type-char)] ['char->integer - (seq (assert-char rax c) + (seq (assert-char rax) (Sar rax char-shift) (Sal rax int-shift))] ['integer->char - (seq (assert-codepoint c) + (seq (assert-codepoint rax) (Sar rax int-shift) (Sal rax char-shift) (Xor rax type-char))] ['eof-object? (eq-imm eof)] ['write-byte - (seq (assert-byte c) - (pad-stack c) + (seq (assert-byte rax) + pad-stack (Mov rdi rax) (Call 'write_byte) - (unpad-stack c) - (Mov rax val-void))] + unpad-stack)] ['box (seq (Mov (Offset rbx 0) rax) (Mov rax rbx) (Or rax type-box) (Add rbx 8))] ['unbox - (seq (assert-box rax c) + (seq (assert-box rax) (Xor rax type-box) (Mov rax (Offset rax 0)))] ['car - (seq (assert-cons rax c) + (seq (assert-cons rax) (Xor rax type-cons) (Mov rax (Offset rax 8)))] ['cdr - (seq (assert-cons rax c) + (seq (assert-cons rax) (Xor rax type-cons) (Mov rax (Offset rax 0)))] ['empty? (eq-imm '())] @@ -83,10 +82,12 @@ (type-pred ptr-mask type-vect)] ['string? (type-pred ptr-mask type-str)] + ['symbol? + (type-pred ptr-mask type-symb)] ['vector-length (let ((zero (gensym)) (done (gensym))) - (seq (assert-vector rax c) + (seq (assert-vector rax) (Xor rax type-vect) (Cmp rax 0) (Je zero) @@ -99,7 +100,7 @@ ['string-length (let ((zero (gensym)) (done (gensym))) - (seq (assert-string rax c) + (seq (assert-string rax) (Xor rax type-str) (Cmp rax 0) (Je zero) @@ -109,29 +110,77 @@ (Label zero) (Mov rax 0) (Label done)))] - ['symbol? - (type-pred ptr-mask type-symb)] ['string->symbol - (seq (assert-string rax c) - )] + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + pad-stack + (Call 'intern_symbol) + unpad-stack + (Or rax type-symb))] ['symbol->string - (seq (assert-symbol rax c) - )])) - -;; Op2 CEnv -> Asm -(define (compile-op2 p c) + (seq (assert-symbol rax) + (Xor rax type-symb) + char-array-copy + (Or rax type-str))] + ['string->uninterned-symbol + (seq (assert-string rax) + (Xor rax type-str) + char-array-copy + (Or rax type-symb))])) + +;; Asm +;; Copy sized array of characters pointed to by rax +(define char-array-copy + (seq (Mov rdi rbx) ; dst + (Mov rsi rax) ; src + (Mov rdx (Offset rax 0)) ; len + (Add rdx 1) ; #words = 1 + (len+1)/2 + (Sar rdx 1) + (Add rdx 1) + (Sal rdx 3) ; #bytes = 8*#words + (Mov r12 rdx) ; save rdx before destroyed + pad-stack + (Call 'memcpy) + unpad-stack + ; rbx should be preserved by memcpy + ;(Mov rbx rax) ; dst is returned, install as heap pointer + (Add rbx r12))) + +;; Op2 -> Asm +(define (compile-op2 p) (match p ['+ (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Add rax r8))] ['- (seq (Pop r8) - (assert-integer r8 c) - (assert-integer rax c) + (assert-integer r8) + (assert-integer rax) (Sub r8 rax) (Mov rax r8))] + ['< + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax (value->bits #t)) + (let ((true (gensym))) + (seq (Jl true) + (Mov rax (value->bits #f)) + (Label true))))] + ['= + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax (value->bits #t)) + (let ((true (gensym))) + (seq (Je true) + (Mov rax (value->bits #f)) + (Label true))))] ['cons (seq (Mov (Offset rbx 0) rax) (Pop rax) @@ -139,12 +188,15 @@ (Mov rax rbx) (Or rax type-cons) (Add rbx 16))] + ['eq? + (seq (Pop r8) + (eq r8 rax))] ['make-vector (let ((loop (gensym)) (done (gensym)) (empty (gensym))) (seq (Pop r8) - (assert-natural r8 c) + (assert-natural r8) (Cmp r8 0) ; special case empty vector (Je empty) @@ -171,16 +223,18 @@ ['vector-ref (seq (Pop r8) - (assert-vector r8 c) - (assert-integer rax c) + (assert-vector r8) + (assert-integer rax) + (Cmp r8 type-vect) + (Je 'raise_error_align) ; special case for empty vector (Cmp rax 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-vect) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar rax int-shift) ; rax = index (Sub r9 1) (Cmp r9 rax) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal rax 3) (Add r8 rax) (Mov rax (Offset r8 8)))] @@ -190,8 +244,8 @@ (done (gensym)) (empty (gensym))) (seq (Pop r8) - (assert-natural r8 c) - (assert-char rax c) + (assert-natural r8) + (assert-char rax) (Cmp r8 0) ; special case empty string (Je empty) @@ -204,9 +258,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) @@ -224,68 +278,62 @@ ['string-ref (seq (Pop r8) - (assert-string r8 c) - (assert-integer rax c) + (assert-string r8) + (assert-integer rax) + (Cmp r8 type-str) + (Je 'raise_error_align) ; special case for empty string (Cmp rax 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-str) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar rax int-shift) ; rax = index (Sub r9 1) (Cmp r9 rax) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal rax 2) (Add r8 rax) (Mov 'eax (Offset r8 8)) (Sal rax char-shift) - (Or rax type-char))] - - ['eq? - (seq (Pop r8) - (eq r8 rax))] - ['string->symbol - (seq)] - ['symbol->string - (seq)])) + (Or rax type-char))])) -;; Op3 CEnv -> Asm -(define (compile-op3 p c) +;; Op3 -> Asm +(define (compile-op3 p) (match p ['vector-set! (seq (Pop r10) (Pop r8) - (assert-vector r8 c) - (assert-integer r10 c) + (assert-vector r8) + (assert-integer r10) (Cmp r10 0) - (Jl (error-label c)) + (Jl 'raise_error_align) (Xor r8 type-vect) ; r8 = ptr (Mov r9 (Offset r8 0)) ; r9 = len (Sar r10 int-shift) ; r10 = index (Sub r9 1) (Cmp r9 r10) - (Jl (error-label c)) + (Jl 'raise_error_align) (Sal r10 3) (Add r8 r10) (Mov (Offset r8 8) rax) - (Mov rax val-void))])) + (Mov rax (value->bits (void))))])) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (assert-type mask type) - (λ (arg c) + (λ (arg) (seq (Mov r9 arg) (And r9 mask) (Cmp r9 type) - (Jne (error-label c))))) + (Jne 'raise_error_align)))) (define (type-pred mask type) (let ((l (gensym))) (seq (And rax mask) (Cmp rax type) - (Mov rax (imm->bits #t)) + (Mov rax (value->bits #t)) (Je l) - (Mov rax (imm->bits #f)) + (Mov rax (value->bits #f)) (Label l)))) (define assert-integer @@ -301,63 +349,49 @@ (define assert-string (assert-type ptr-mask type-str)) (define assert-symbol - (assert-type ptr-mask type-str)) + (assert-type ptr-mask type-symb)) +(define assert-proc + (assert-type ptr-mask type-proc)) -(define (assert-codepoint c) +(define (assert-codepoint r) (let ((ok (gensym))) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 1114111)) - (Jg (error-label c)) - (Cmp rax (imm->bits 55295)) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align) + (Cmp r (value->bits 1114111)) + (Jg 'raise_error_align) + (Cmp r (value->bits 55295)) (Jl ok) - (Cmp rax (imm->bits 57344)) + (Cmp r (value->bits 57344)) (Jg ok) - (Jmp (error-label c)) + (Jmp 'raise_error_align) (Label ok)))) -(define (assert-byte c) - (seq (assert-integer rax c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)) - (Cmp rax (imm->bits 255)) - (Jg (error-label c)))) +(define (assert-byte r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align) + (Cmp r (value->bits 255)) + (Jg 'raise_error_align))) -(define (assert-natural r c) - (seq (assert-integer r c) - (Cmp rax (imm->bits 0)) - (Jl (error-label c)))) +(define (assert-natural r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align))) ;; Value -> Asm (define (eq-imm imm) - (eq rax (imm->bits imm))) + (let ((l1 (gensym))) + (seq (Cmp rax (value->bits imm)) + (Mov rax (value->bits #t)) + (Je l1) + (Mov rax (value->bits #f)) + (Label l1)))) (define (eq ir1 ir2) (let ((l1 (gensym))) (seq (Cmp ir1 ir2) - (Mov rax val-true) + (Mov rax (value->bits #t)) (Je l1) - (Mov rax val-false) + (Mov rax (value->bits #f)) (Label l1)))) - -;; CEnv -> Asm -;; Pad the stack to be aligned for a call -(define (pad-stack c) - (match (even? (length c)) - [#t (seq (Sub rsp 8))] - [#f (seq)])) - -;; CEnv -> Asm -;; Undo the stack alignment after a call -(define (unpad-stack c) - (match (even? (length c)) - [#t (seq (Add rsp 8))] - [#f (seq)])) - -;; CEnv -> Label -;; Determine correct error handler label to jump to. -(define (error-label c) - (match (even? (length c)) - [#t 'raise_error] - [#f 'raise_error_align])) diff --git a/langs/mug/compile-stdin.rkt b/langs/mug/compile-stdin.rkt new file mode 100644 index 00000000..cfa15106 --- /dev/null +++ b/langs/mug/compile-stdin.rkt @@ -0,0 +1,10 @@ +#lang racket +(provide main) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile (parse (read-all))))) diff --git a/langs/mug/compile.rkt b/langs/mug/compile.rkt new file mode 100644 index 00000000..7ab2e884 --- /dev/null +++ b/langs/mug/compile.rkt @@ -0,0 +1,53 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-define.rkt" + "compile-expr.rkt" + "compile-literals.rkt" + a86/ast) + +;; Registers used +(define rbx 'rbx) ; heap +(define rsp 'rsp) ; stack +(define rdi 'rdi) ; arg +(define r15 'r15) ; stack pad (non-volatile) + +;; type CEnv = (Listof [Maybe Id]) + +;; Prog -> Asm +(define (compile p) + (match p + [(Prog ds e) + (prog (externs) + (Global 'entry) + (Label 'entry) + (Push rbx) ; save callee-saved register + (Push r15) + (Mov rbx rdi) ; recv heap pointer + (init-symbol-table p) + (compile-defines-values ds) + (compile-e e (reverse (define-ids ds)) #f) + (Add rsp (* 8 (length ds))) ;; pop function definitions + (Pop r15) ; restore callee-save register + (Pop rbx) + (Ret) + (compile-defines ds) + (compile-lambda-defines (lambdas p)) + (Label 'raise_error_align) + pad-stack + (Call 'raise_error) + (Data) + (compile-literals p))])) + +(define (externs) + (seq (Extern 'peek_byte) + (Extern 'read_byte) + (Extern 'write_byte) + (Extern 'raise_error) + (Extern 'intern_symbol) + (Extern 'symb_cmp) + (Extern 'memcpy))) diff --git a/langs/mug/env.rkt b/langs/mug/env.rkt new file mode 100644 index 00000000..c43be9c3 --- /dev/null +++ b/langs/mug/env.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide lookup ext) + +;; Env Variable -> Answer +(define (lookup env x) + (match env + ['() 'err] + [(cons (list y i) env) + (match (symbol=? x y) + [#t i] + [#f (lookup env x)])])) + +;; Env Variable Value -> Value +(define (ext r x i) + (cons (list x i) r)) \ No newline at end of file diff --git a/langs/mug/fv.rkt b/langs/mug/fv.rkt new file mode 100644 index 00000000..2377b7e5 --- /dev/null +++ b/langs/mug/fv.rkt @@ -0,0 +1,35 @@ +#lang racket +(require "ast.rkt") +(provide fv) + +;; Expr -> [Listof Id] +;; List all of the free variables in e +(define (fv e) + (remove-duplicates (fv* e))) + +(define (fv* e) + (match e + [(Var x) (list x)] + [(Prim1 p e) (fv* e)] + [(Prim2 p e1 e2) (append (fv* e1) (fv* e2))] + [(Prim3 p e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(If e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(Begin e1 e2) (append (fv* e1) (fv* e2))] + [(Let x e1 e2) (append (fv* e1) (remq* (list x) (fv* e2)))] + [(App e1 es) (append (fv* e1) (append-map fv* es))] + [(Lam f xs e) (remq* xs (fv* e))] + [(Match e ps es) (append (fv* e) (append-map fv-clause* ps es))] + [_ '()])) + +;; Pat Expr -> [Listof Id] +(define (fv-clause* p e) + (remq* (bv-pat* p) (fv* e))) + +;; Pat -> [Listof Id] +(define (bv-pat* p) + (match p + [(PVar x) (list x)] + [(PCons p1 p2) (append (bv-pat* p1) (bv-pat* p2))] + [(PAnd p1 p2) (append (bv-pat* p1) (bv-pat* p2))] + [(PBox p) (bv-pat* p)] + [_ '()])) diff --git a/langs/mug/heap.h b/langs/mug/heap.h new file mode 100644 index 00000000..8f2f5e23 --- /dev/null +++ b/langs/mug/heap.h @@ -0,0 +1,9 @@ +#include + +extern int64_t heap[]; +extern int from_side; + +extern char type[]; + +// in words +#define heap_size 1001 diff --git a/langs/mug/interp-defun.rkt b/langs/mug/interp-defun.rkt new file mode 100644 index 00000000..7d59532f --- /dev/null +++ b/langs/mug/interp-defun.rkt @@ -0,0 +1,164 @@ +#lang racket +(provide interp interp-env (struct-out Closure) zip) +(require "ast.rkt" + "env.rkt" + "interp-prims.rkt") + +;; type Answer = Value | 'err + +;; type Value = +;; | Integer +;; | Boolean +;; | Character +;; | Eof +;; | Void +;; | '() +;; | (cons Value Value) +;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Closure [Listof Id] Expr Env) +(struct Closure (xs e r) #:prefab) + +;; type REnv = (Listof (List Id Value)) +;; type Defns = (Listof Defn) + +;; Prog -> Answer +(define (interp p) + (match p + [(Prog ds e) + (interp-env e '() ds)])) + +;; Expr Env Defns -> Answer +(define (interp-env e r ds) + (match e + [(Int i) i] + [(Bool b) b] + [(Char c) c] + [(Eof) eof] + [(Empty) '()] + [(Var x) (interp-var x r ds)] + [(Str s) s] + [(Symb s) s] + [(Prim0 'void) (void)] + [(Prim0 'read-byte) (read-byte)] + [(Prim0 'peek-byte) (peek-byte)] + [(Prim1 p e) + (match (interp-env e r ds) + ['err 'err] + [v (interp-prim1 p v)])] + [(Prim2 p e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (interp-prim2 p v1 v2)])])] + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (match (interp-env e3 r ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] + [(If p e1 e2) + (match (interp-env p r ds) + ['err 'err] + [v + (if v + (interp-env e1 r ds) + (interp-env e2 r ds))])] + [(Begin e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [_ (interp-env e2 r ds)])] + [(Let x e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (Closure xs e r)] + [(App e es) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (match f + [(Closure xs e r) + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err)] + [_ 'err])])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PSymb s) (and (eq? s v) r)] + [(PStr s) (and (string? v) (string=? s v) r)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) + +;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) + +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) diff --git a/langs/mug/interp-env.rkt b/langs/mug/interp-env.rkt deleted file mode 100644 index c63284ee..00000000 --- a/langs/mug/interp-env.rkt +++ /dev/null @@ -1,190 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require (only-in "syntax.rkt" prim?)) - -;; type Expr = -;; | Integer -;; | Boolean -;; | Character -;; | String -;; | Symbol -;; | Variable -;; | '() -;; | `(box ,Expr) -;; | `(if ,Expr ,Expr ,Expr) -;; | `(let ,(List Variable Expr) ... ,Expr) -;; | `(letrec ,(List Variable Lambda) ... ,Expr) -;; | `(λ ,Bindings ,Expr) -;; | `(apply ,Expr ,Expr) -;; | `(,Prim ,Expr ...) -;; | `(,Expr ,Expr ...) - -;; type Value = -;; | Integer -;; | Boolean -;; | Character -;; | String -;; | Symbol -;; | '() -;; | (Box Value) -;; | (Cons Value Value) -;; | Function - -;; type Function = -;; | (Values ... -> Answer) - -;; type Answer = Value | 'err - -;; type REnv = (Listof (List Variable Value)) - -;; Expr REnv -> Answer -(define (interp-env e r) - (match e - ;; produce fresh strings each time a literal is eval'd - [(? string? s) (string-copy s)] - [(? value? v) v] - [''() '()] - [`',(? symbol? s) s] - [`(if ,e0 ,e1 ,e2) - (match (interp-env e0 r) - ['err 'err] - [v - (if v - (interp-env e1 r) - (interp-env e2 r))])] - [(? symbol? x) - (lookup r x)] - [`(let (,`(,xs ,es) ...) ,e) - (match (interp-env* es r) - ['err 'err] - [vs - (interp-env e (append (zip xs vs) r))])] - [`(letrec (,`(,xs ,es) ...) ,e) - (letrec ((r* (λ () - (append - (zip xs - (map (λ (l) (λ vs (apply (interp-env l (r*)) vs))) - es)) - r)))) - (interp-env e (r*)))] - [`(λ (,xs ...) ,e) - (λ vs - (if (= (length vs) (length xs)) - (interp-env e (append (zip xs vs) r)) - 'err))] - [`(λ (,xs ... . ,x) ,e) - (λ vs - (if (>= (length vs) (length xs)) - (interp-env e (append (zip/remainder xs vs x) r)) - 'err))] - [`(apply ,e0 ,e1) - (let ((v0 (interp-env e0 r)) - (vs (interp-env e1 r))) - (if (list? vs) - (apply v0 vs) - 'err))] - [`(,(? prim? p) ,es ...) - (let ((as (interp-env* es r))) - (interp-prim p as))] - [`(,e ,es ...) - (match (interp-env* (cons e es) r) - [(list f vs ...) - (if (procedure? f) - (apply f vs) - 'err)] - ['err 'err])] - [_ 'err])) - -;; (Listof Expr) REnv (Listof Defn) -> (Listof Value) | 'err -(define (interp-env* es r) - (match es - ['() '()] - [(cons e es) - (match (interp-env e r) - ['err 'err] - [v (cons v (interp-env* es r))])])) - -;; Any -> Boolean -(define (value? x) - (or (integer? x) - (boolean? x) - (char? x) - (string? x))) - -;; Prim (Listof Answer) -> Answer -(define (interp-prim p as) - (match (cons p as) - [(list p (? value?) ... 'err _ ...) 'err] - [(list '- (? integer? i0)) (- i0)] - [(list '- (? integer? i0) (? integer? i1)) (- i0 i1)] - [(list 'abs (? integer? i0)) (abs i0)] - [(list 'add1 (? integer? i0)) (+ i0 1)] - [(list 'sub1 (? integer? i0)) (- i0 1)] - [(list 'zero? (? integer? i0)) (zero? i0)] - [(list 'char? v0) (char? v0)] - [(list 'integer? v0) (integer? v0)] - [(list 'boolean? v0) (boolean? v0)] - [(list 'integer->char (? codepoint? i0)) (integer->char i0)] - [(list 'char->integer (? char? c)) (char->integer c)] - [(list '+ (? integer? i0) (? integer? i1)) (+ i0 i1)] - [(list 'cons v0 v1) (cons v0 v1)] - [(list 'car (? cons? v0)) (car v0)] - [(list 'cdr (? cons? v0)) (cdr v0)] - [(list 'string? v0) (string? v0)] - [(list 'box? v0) (box? v0)] - [(list 'empty? v0) (empty? v0)] - [(list 'cons? v0) (cons? v0)] - [(list 'cons v0 v1) (cons v0 v1)] - [(list 'box v0) (box v0)] - [(list 'unbox (? box? v0)) (unbox v0)] - [(list 'string-length (? string? v0)) (string-length v0)] - [(list 'make-string (? natural? v0) (? char? v1)) (make-string v0 v1)] - [(list 'string-ref (? string? v0) (? natural? v1)) - (if (< v1 (string-length v0)) - (string-ref v0 v1) - 'err)] - [(list '= (? integer? v0) (? integer? v1)) (= v0 v1)] - [(list '< (? integer? v0) (? integer? v1)) (< v0 v1)] - [(list '<= (? integer? v0) (? integer? v1)) (<= v0 v1)] - [(list 'char=? (? char? v0) (? char? v1)) (char=? v0 v1)] - [(list 'boolean=? (? boolean? v0) (? boolean? v1)) (boolean=? v0 v1)] - [(list 'eq? v0 v1) (eq? v0 v1)] - [(list 'gensym) (gensym)] - [(list 'symbol? v0) (symbol? v0)] - [(list 'procedure? v0) (procedure? v0)] - [_ 'err])) - -;; REnv Variable -> Answer -(define (lookup env x) - (match env - ['() 'err] - [(cons (list y v) env) - (match (symbol=? x y) - [#t v] - [#f (lookup env x)])])) - -;; REnv Variable Value -> Value -(define (ext r x v) - (cons (list x v) r)) - -;; Any -> Boolean -(define (codepoint? x) - (and (integer? x) - (<= 0 x #x10FFFF) - (not (<= #xD800 x #xDFFF)))) - -;; (Listof A) (Listof B) -> (Listof (List A B)) -(define (zip xs ys) - (match* (xs ys) - [('() '()) '()] - [((cons x xs) (cons y ys)) - (cons (list x y) (zip xs ys))])) - -;; like zip but ys can be longer and remainder is associated with r -(define (zip/remainder xs ys r) - (match* (xs ys) - [('() ys) (list (list r ys))] - [((cons x xs) (cons y ys)) - (cons (list x y) (zip/remainder xs ys r))])) - - diff --git a/langs/hustle/interp-io.rkt b/langs/mug/interp-io.rkt similarity index 50% rename from langs/hustle/interp-io.rkt rename to langs/mug/interp-io.rkt index 12da1b4b..93f7d3c6 100644 --- a/langs/hustle/interp-io.rkt +++ b/langs/mug/interp-io.rkt @@ -2,11 +2,11 @@ (provide interp/io) (require "interp.rkt") -;; Expr String -> (Cons Value String) +;; (Expr String -> String ;; Interpret e with given string as input, ;; collect output as string (including printed result) -(define (interp/io e input) +(define (interp/io e in) (parameterize ((current-output-port (open-output-string)) - (current-input-port (open-input-string input))) - (cons (interp e) - (get-output-string (current-output-port))))) + (current-input-port (open-input-string in))) + (cons (interp e) + (get-output-string (current-output-port))))) diff --git a/langs/mug/interp-prims.rkt b/langs/mug/interp-prims.rkt new file mode 100644 index 00000000..7797de69 --- /dev/null +++ b/langs/mug/interp-prims.rkt @@ -0,0 +1,74 @@ +#lang racket +(require "ast.rkt") +(provide interp-prim1 interp-prim2 interp-prim3) + +;; Op1 Value -> Answer +(define (interp-prim1 p1 v) + (match (list p1 v) + [(list 'add1 (? integer?)) (add1 v)] + [(list 'sub1 (? integer?)) (sub1 v)] + [(list 'zero? (? integer?)) (zero? v)] + [(list 'char? v) (char? v)] + [(list 'char->integer (? char?)) (char->integer v)] + [(list 'integer->char (? codepoint?)) (integer->char v)] + [(list 'eof-object? v) (eof-object? v)] + [(list 'write-byte (? byte?)) (write-byte v)] + [(list 'box v) (box v)] + [(list 'unbox (? box?)) (unbox v)] + [(list 'car (? pair?)) (car v)] + [(list 'cdr (? pair?)) (cdr v)] + [(list 'empty? v) (empty? v)] + [(list 'cons? v) (cons? v)] + [(list 'box? v) (box? v)] + [(list 'vector? v) (vector? v)] + [(list 'vector-length (? vector?)) (vector-length v)] + [(list 'string? v) (string? v)] + [(list 'string-length (? string?)) (string-length v)] + [(list 'symbol? v) (symbol? v)] + [(list 'symbol->string (? symbol?)) (symbol->string v)] + [(list 'string->symbol (? string?)) (string->symbol v)] + [(list 'string->uninterned-symbol (? string?)) + (string->uninterned-symbol v)] + [_ 'err])) + +;; Op2 Value Value -> Answer +(define (interp-prim2 p v1 v2) + (match (list p v1 v2) + [(list '+ (? integer?) (? integer?)) (+ v1 v2)] + [(list '- (? integer?) (? integer?)) (- v1 v2)] + [(list '< (? integer?) (? integer?)) (< v1 v2)] + [(list '= (? integer?) (? integer?)) (= v1 v2)] + [(list 'cons v1 v2) (cons v1 v2)] + [(list 'eq? v1 v2) (eq? v1 v2)] + [(list 'make-vector (? integer?) _) + (if (<= 0 v1) + (make-vector v1 v2) + 'err)] + [(list 'vector-ref (? vector?) (? integer?)) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-ref v1 v2) + 'err)] + [(list 'make-string (? integer?) (? char?)) + (if (<= 0 v1) + (make-string v1 v2) + 'err)] + [(list 'string-ref (? string?) (? integer?)) + (if (<= 0 v2 (sub1 (string-length v1))) + (string-ref v1 v2) + 'err)] + [_ 'err])) + +;; Op3 Value Value Value -> Answer +(define (interp-prim3 p v1 v2 v3) + (match (list p v1 v2 v3) + [(list 'vector-set! (? vector?) (? integer?) _) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-set! v1 v2 v3) + 'err)] + [_ 'err])) + +;; Any -> Boolean +(define (codepoint? v) + (and (integer? v) + (or (<= 0 v 55295) + (<= 57344 v 1114111)))) diff --git a/langs/mug/interp-stdin.rkt b/langs/mug/interp-stdin.rkt new file mode 100644 index 00000000..965b9cc4 --- /dev/null +++ b/langs/mug/interp-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt" "interp.rkt" "read-all.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (let ((r (interp (parse (read-all))))) + (unless (void? r) + (println r)))) diff --git a/langs/mug/interp.rkt b/langs/mug/interp.rkt index a1e7a2f1..c295ca6e 100644 --- a/langs/mug/interp.rkt +++ b/langs/mug/interp.rkt @@ -1,75 +1,163 @@ #lang racket -(provide (all-defined-out)) -(require "syntax.rkt" - "interp-env.rkt") +(provide interp interp-env) +(require "ast.rkt" + "env.rkt" + "interp-prims.rkt") +;; type Answer = Value | 'err -(define (interp e) - (interp-env (desugar e) stdlib)) +;; type Value = +;; | Integer +;; | Boolean +;; | Character +;; | Eof +;; | Void +;; | '() +;; | (cons Value Value) +;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Value ... -> Answer) -(define stdlib - `((append ,append) - (list? ,list?) - (first ,first) - (second ,second) - (rest ,rest) - (reverse ,reverse) - (not ,not) - (compose ,compose) - (symbol=? ,symbol=?) - (memq ,memq) - (length ,length) - (remq* ,remq*) - (remove-duplicates ,remove-duplicates) - (remove ,remove) - (member ,member) - (equal? ,equal?))) +;; type REnv = (Listof (List Id Value)) +;; type Defns = (Listof Defn) +;; Prog -> Answer +(define (interp p) + (match p + [(Prog ds e) + (interp-env e '() ds)])) -;; Expr REnv Natural -> Answer -(define (interp-qq d r n) - ;(println `(interp-qq ,d ,n)) - (match d - [`(,'unquote ,e) - (if (zero? n) - (interp-env (desugar e) r) ;! - (cons 'unquote (interp-qq-list e r (sub1 n))))] - [`(,'unquote-splicing ,e) 'err] - [`(,'quasiquote ,d) - (cons 'quasiquote (interp-qq-list d r (add1 n)))] - [`(,x . ,y) - (match (interp-qq-list x r n) +;; Expr Env Defns -> Answer +(define (interp-env e r ds) + (match e + [(Int i) i] + [(Bool b) b] + [(Char c) c] + [(Eof) eof] + [(Empty) '()] + [(Var x) (interp-var x r ds)] + [(Str s) s] + [(Symb s) s] + [(Prim0 'void) (void)] + [(Prim0 'read-byte) (read-byte)] + [(Prim0 'peek-byte) (peek-byte)] + [(Prim1 p e) + (match (interp-env e r ds) ['err 'err] - [xv (match (interp-qq y r n) + [v (interp-prim1 p v)])] + [(Prim2 p e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) ['err 'err] - ['() xv] - [yv (if (list? xv) - (append xv yv) - 'err)])])] - [d d])) - -;; Expr REnv Natural -> Answer -(define (interp-qq-list d r n) - ;(println `(interp-qq-list ,d ,n)) - (match d - [`(,'unquote ,e) - (if (zero? n) - (match (interp-env (desugar e) r) ;! - ['err 'err] - [v (list v)]) - (list (cons 'unquote (interp-qq-list e r (sub1 n)))))] - [`(,'unquote-splicing ,e) - (if (zero? n) - (interp-env e r) - (list (cons 'unquote-splicing (interp-qq-list e r (sub1 n)))))] - [`(,'quasiquote ,d) - (list (cons 'quasiquote (interp-qq-list d r (add1 n))))] - [`(,x . ,y) - (match (interp-qq-list x r n) + [v2 (interp-prim2 p v1 v2)])])] + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r ds) ['err 'err] - [xv (match (interp-qq y r n) + [v1 (match (interp-env e2 r ds) ['err 'err] - [yv (list (append xv yv))])])] - [d (list d)])) + [v2 (match (interp-env e3 r ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] + [(If p e1 e2) + (match (interp-env p r ds) + ['err 'err] + [v + (if v + (interp-env e1 r ds) + (interp-env e2 r ds))])] + [(Begin e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [_ (interp-env e2 r ds)])] + [(Let x e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (λ vs + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err))] + [(App e es) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (if (procedure? f) + (apply f vs) + 'err)])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PSymb s) (and (eq? s v) r)] + [(PStr s) (and (string? v) (string=? s v) r)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) + +;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) - +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) diff --git a/langs/mug/io.c b/langs/mug/io.c new file mode 100644 index 00000000..7ef82281 --- /dev/null +++ b/langs/mug/io.c @@ -0,0 +1,25 @@ +#include +#include +#include "types.h" +#include "values.h" +#include "runtime.h" + +val_t read_byte(void) +{ + char c = getc(in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); +} + +val_t peek_byte(void) +{ + char c = getc(in); + ungetc(c, in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + +} + +val_t write_byte(val_t c) +{ + putc((char) val_unwrap_int(c), out); + return val_wrap_void(); +} diff --git a/langs/mug/lambdas.rkt b/langs/mug/lambdas.rkt new file mode 100644 index 00000000..0a246408 --- /dev/null +++ b/langs/mug/lambdas.rkt @@ -0,0 +1,35 @@ +#lang racket +(require "ast.rkt") +(provide lambdas) + + +;; Prog -> [Listof Lam] +;; List all of the lambda expressions in p +(define (lambdas p) + (match p + [(Prog ds e) + (append (lambdas-ds ds) (lambdas-e e))])) + +;; Defns -> [Listof Lam] +;; List all of the lambda expressions in ds +(define (lambdas-ds ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (append (lambdas-e e) + (lambdas-ds ds))])) + +;; Expr -> [Listof Lam] +;; List all of the lambda expressions in e +(define (lambdas-e e) + (match e + [(Prim1 p e) (lambdas-e e)] + [(Prim2 p e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Prim3 p e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(If e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(Begin e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Let x e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(App e1 es) (append (lambdas-e e1) (append-map lambdas-e es))] + [(Lam f xs e1) (cons e (lambdas-e e1))] + [(Match e ps es) (append (lambdas-e e) (append-map lambdas-e es))] + [_ '()])) diff --git a/langs/mug/main.c b/langs/mug/main.c new file mode 100644 index 00000000..1ca6115f --- /dev/null +++ b/langs/mug/main.c @@ -0,0 +1,40 @@ +#include +#include +#include "values.h" +#include "print.h" +#include "runtime.h" + +FILE* in; +FILE* out; +void (*error_handler)(); +val_t *heap; + +void error_exit() +{ + printf("err\n"); + exit(1); +} + +void raise_error() +{ + return error_handler(); +} + +int main(int argc, char** argv) +{ + in = stdin; + out = stdout; + error_handler = &error_exit; + heap = malloc(8 * heap_size); + + val_t result; + + result = entry(heap); + + print_result(result); + if (val_typeof(result) != T_VOID) + putchar('\n'); + + free(heap); + return 0; +} diff --git a/langs/blackmail/compile-file.rkt b/langs/mug/parse-file.rkt similarity index 70% rename from langs/blackmail/compile-file.rkt rename to langs/mug/parse-file.rkt index 988e3121..a5021320 100644 --- a/langs/blackmail/compile-file.rkt +++ b/langs/mug/parse-file.rkt @@ -1,6 +1,6 @@ #lang racket (provide main) -(require "parse.rkt" "compile.rkt" a86/printer) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) ;; String -> Void ;; Compile contents of given file name, @@ -9,5 +9,5 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) + (displayln (parse (read-all p))) (close-input-port p)))) diff --git a/langs/iniquity/parse.rkt b/langs/mug/parse.rkt similarity index 57% rename from langs/iniquity/parse.rkt rename to langs/mug/parse.rkt index 49941588..5de4e9b4 100644 --- a/langs/iniquity/parse.rkt +++ b/langs/mug/parse.rkt @@ -24,13 +24,14 @@ ;; S-Expr -> Expr (define (parse-e s) (match s - [(? integer?) (Int s)] + [(? exact-integer?) (Int s)] [(? boolean?) (Bool s)] [(? char?) (Char s)] [(? string?) (Str s)] ['eof (Eof)] [(? symbol?) (Var s)] [(list 'quote (list)) (Empty)] + [(list 'quote (? symbol? s)) (Symb s)] [(list (? (op? op0) p0)) (Prim0 p0)] [(list (? (op? op1) p1) e) (Prim1 p1 (parse-e e))] [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse-e e1) (parse-e e2))] @@ -42,20 +43,56 @@ (If (parse-e e1) (parse-e e2) (parse-e e3))] [(list 'let (list (list (? symbol? x) e1)) e2) (Let x (parse-e e1) (parse-e e2))] - [(cons (? symbol? f) es) - (App f (map parse-e es))] + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] + [(list (or 'lambda 'λ) xs e) + (if (and (list? xs) + (andmap symbol? xs)) + (Lam (gensym 'lambda) xs (parse-e e)) + (error "parse lambda error"))] + [(cons e es) + (App (parse-e e) (map parse-e es))] [_ (error "Parse error" s)])) +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])])) + +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(? string?) (PStr p)] + [(list 'quote (? symbol? s)) + (PSymb s)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (PAnd (parse-pat p1) (parse-pat p2))])) + (define op0 '(read-byte peek-byte void)) - (define op1 '(add1 sub1 zero? char? write-byte eof-object? integer->char char->integer box unbox empty? cons? box? car cdr - vector? vector-length string? string-length)) + vector? vector-length string? string-length + symbol? symbol->string string->symbol string->uninterned-symbol)) (define op2 - '(+ - < = cons make-vector vector-ref make-string string-ref)) + '(+ - < = cons eq? make-vector vector-ref make-string string-ref)) (define op3 '(vector-set!)) diff --git a/langs/mug/pat.rkt b/langs/mug/pat.rkt deleted file mode 100644 index 2a5c11f6..00000000 --- a/langs/mug/pat.rkt +++ /dev/null @@ -1,200 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -;; type Expr+ = -;; .... -;; | Match - -;; type Match = (match ,Expr+ ,(list Pat Expr+) ...) - -;; type Pat = -;; | #t -;; | #f -;; | Integer -;; | String -;; | Variable -;; | `_ -;; | `'() -;; | `(quote ,Symbol) -;; | `(cons ,Pat ,Pat) -;; | `(list ,Pat ...) -;; | `(? ,Expr ,Pat ...) - -;; Match -> Expr -;; Rewrite match expression into an equivalent cond expression -(define (match->cond m) - (match m - [(cons 'match (cons e mcs)) - (let ((x (gensym))) - `(let ((,x ,e)) - (cond ,@(map (λ (mc) - (match mc - [(list p e) - (list (pat-match p x) (pat-bind p x e))])) - mcs) - ;; fall through to error - [else (car '())])))])) - -;; Example -#; -(define (sum bt) - (match bt - ['leaf 0] - [(list 'node v l r) - (+ v - (+ (sum l) - (sum r)))])) -#; -(define (sum^ bt) - (cond - [(eq? 'leaf bt) 0] - [(and (list? bt) - (= 4 (length bt)) - (eq? 'node (first bt))) - (let ((v (second bt)) - (l (third bt)) - (r (fourth bt))) - (+ v - (+ (sum l) - (sum r))))])) - -#; -`(define (sum bt) - ,(match->cond - '(match bt - ['leaf 0] - [(list 'node v l r) - (+ v - (+ (sum l) - (sum r)))]))) - -;; Two tasks: -;; 1. rewrite patterns into Boolean valued expressions that answer -;; whether the pattern matches the scrutiny -;; 2. rewrite pattern and RHS in to expressions in which the pattern variables -;; of pattern are bound to the appropriately deconstructed parts of the scrutiny - -;; Assume: the scrutiny is a variable. -;; (It's easy to establish this assumption in general.) - -;; Two functions: - -#; -;; Pat Variable -> Expr -;; Produces an expression determining if p matches v -(define (pat-match p v) ...) - -#; -;; Pat Variable Expr -> Expr -;; Produce an expression that deconstructs v and binds pattern variables -;; of p in scope of e. -;; ASSUME: v matches p -(define (pat-bind p v e) ...) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Pattern matching - -;; Pat Variable -> Expr -;; Produces an expression determining if p matches v -(define (pat-match p v) - (match p - [#t `(eq? #t ,v)] - [#f `(eq? #f ,v)] - [(? integer? i) `(eq? ,i ,v)] - [(? string? s) - `(and (string? ,v) - (string=? ,s ,v))] - [(list 'quote '()) `(eq? '() ,v)] - [(? symbol?) #t] - [(list 'quote (? symbol? s)) `(eq? ,v ',s)] - [(list 'cons p1 p2) - (let ((v1 (gensym)) - (v2 (gensym))) - `(and (cons? ,v) - (let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - (and ,(pat-match p1 v1) - ,(pat-match p2 v2)))))] - [(cons 'list ps) - `(and (list? ,v) - (= (length ,v) ,(length ps)) - ,(pat-match-list ps v))] - [(cons '? (cons e ps)) - `(and (,e ,v) - ,(pats-match ps v))])) - -;; (Listof Pat) Variable -> Expr -;; Produces an expression determining if every ps matches x -(define (pats-match ps v) - (match ps - ['() #t] - [(cons p ps) - `(and ,(pat-match p v) - ,(pats-match ps v))])) - -;; (Listof Pat) Variable -> Expr -;; Produces an expression determining if each ps matches each element of list v -(define (pat-match-list ps v) - (match ps - ['() #t] - [(cons p ps) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - (and ,(pat-match p v1) - ,(pat-match-list ps v2))))])) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Pattern binding - -;; Pat Variable Expr -> Expr -;; Produce an expression that deconstructs v and binds pattern variables -;; of p in scope of e. -;; ASSUME: v matches p -(define (pat-bind p v e) - (match p - [#t e] - [#f e] - [(? integer?) e] - [(? string?) e] - [(list 'quote '()) e] - ['_ e] - [(? symbol? x) `(let ((,x ,v)) ,e)] - [(list 'quote (? symbol?)) e] - [(list 'cons p1 p2) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - ,(pat-bind p1 v1 - (pat-bind p2 v2 e))))] - [(cons 'list ps) - (pat-bind-list ps v e)] - [(cons '? (cons _ ps)) - (pats-bind ps v e)])) - -;; (Listof Pat) Variable Expr -> Expr -;; Produce an expression that doconstructs v and binds pattern variables -;; of ps (each matched against v) in scope of e. -;; ASSUME: v matches every element of ps -(define (pats-bind ps v e) - (match ps - ['() e] - [(cons p ps) - (pat-bind p v (pats-bind ps v e))])) - -;; (Listof Pat) Variable Expr -> Expr -;; Produce an expression that deconstructs list v and binds pattern variables -;; of ps (matched element-wise against v) in scope of e. -;; ASSUME: elemens of v matches elements of ps -(define (pat-bind-list ps v e) - (match ps - ['() e] - [(cons p ps) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - ,(pat-bind p v1 (pat-bind-list ps v2 e))))])) diff --git a/langs/mug/print.c b/langs/mug/print.c new file mode 100644 index 00000000..2bcb21dc --- /dev/null +++ b/langs/mug/print.c @@ -0,0 +1,855 @@ +#include +#include +#include "values.h" + +void print_char(val_char_t); +void print_codepoint(val_char_t); +void print_cons(val_cons_t *); +void print_vect(val_vect_t*); +void print_str(val_str_t*); +void print_symb(val_symb_t*); +void print_str_char(val_char_t); +void print_result_interior(val_t); +int utf8_encode_char(val_char_t, char *); + +void print_result(val_t x) +{ + switch (val_typeof(x)) { + case T_INT: + printf("%" PRId64, val_unwrap_int(x)); + break; + case T_BOOL: + printf(val_unwrap_bool(x) ? "#t" : "#f"); + break; + case T_CHAR: + print_char(val_unwrap_char(x)); + break; + case T_EOF: + printf("#"); + break; + case T_VOID: + break; + case T_EMPTY: + case T_BOX: + case T_CONS: + case T_VECT: + printf("'"); + print_result_interior(x); + break; + case T_STR: + putchar('"'); + print_str(val_unwrap_str(x)); + putchar('"'); + break; + case T_SYMB: + printf("'"); + print_result_interior(x); + break; + case T_PROC: + printf("#"); + break; + case T_INVALID: + printf("internal error"); + } +} + +void print_symb(val_symb_t *s) +{ + print_str((val_str_t*) s); +} + +void print_result_interior(val_t x) +{ + switch (val_typeof(x)) { + case T_EMPTY: + printf("()"); + break; + case T_BOX: + printf("#&"); + print_result_interior(val_unwrap_box(x)->val); + break; + case T_CONS: + printf("("); + print_cons(val_unwrap_cons(x)); + printf(")"); + break; + case T_SYMB: + print_symb(val_unwrap_symb(x)); + break; + case T_VECT: + print_vect(val_unwrap_vect(x)); + break; + default: + print_result(x); + } +} + +void print_vect(val_vect_t *v) +{ + uint64_t i; + + if (!v) { printf("#()"); return; } + + printf("#("); + for (i = 0; i < v->len; ++i) { + print_result_interior(v->elems[i]); + + if (i < v->len - 1) + putchar(' '); + } + printf(")"); +} + +void print_cons(val_cons_t *cons) +{ + print_result_interior(cons->fst); + + switch (val_typeof(cons->snd)) { + case T_EMPTY: + // nothing + break; + case T_CONS: + printf(" "); + print_cons(val_unwrap_cons(cons->snd)); + break; + default: + printf(" . "); + print_result_interior(cons->snd); + break; + } +} + +void print_str(val_str_t* s) +{ + if (!s) return; + uint64_t i; + for (i = 0; i < s->len; ++i) + print_str_char(s->codepoints[i]); +} + +void print_str_char_u(val_char_t c) +{ + printf("\\u%04X", c); +} + +void print_str_char_U(val_char_t c) +{ + printf("\\U%08X", c); +} + +void print_str_char(val_char_t c) +{ + switch (c) { + case 0 ... 6: + print_str_char_u(c); + break; + case 7: + printf("\\a"); + break; + case 8: + printf("\\b"); + break; + case 9: + printf("\\t"); + break; + case 10: + printf("\\n"); + break; + case 11: + printf("\\v"); + break; + case 12: + printf("\\f"); + break; + case 13: + printf("\\r"); + break; + case 14 ... 26: + print_str_char_u(c); + break; + case 27: + printf("\\e"); + break; + case 28 ... 31: + print_str_char_u(c); + break; + case 34: + printf("\\\""); + break; + case 39: + printf("'"); + break; + case 92: + printf("\\\\"); + break; + case 127 ... 159: + case 173 ... 173: + case 888 ... 889: + case 896 ... 899: + case 907 ... 907: + case 909 ... 909: + case 930 ... 930: + case 1328 ... 1328: + case 1367 ... 1368: + case 1376 ... 1376: + case 1416 ... 1416: + case 1419 ... 1420: + case 1424 ... 1424: + case 1480 ... 1487: + case 1515 ... 1519: + case 1525 ... 1541: + case 1564 ... 1565: + case 1757 ... 1757: + case 1806 ... 1807: + case 1867 ... 1868: + case 1970 ... 1983: + case 2043 ... 2047: + case 2094 ... 2095: + case 2111 ... 2111: + case 2140 ... 2141: + case 2143 ... 2207: + case 2227 ... 2275: + case 2436 ... 2436: + case 2445 ... 2446: + case 2449 ... 2450: + case 2473 ... 2473: + case 2481 ... 2481: + case 2483 ... 2485: + case 2490 ... 2491: + case 2501 ... 2502: + case 2505 ... 2506: + case 2511 ... 2518: + case 2520 ... 2523: + case 2526 ... 2526: + case 2532 ... 2533: + case 2556 ... 2560: + case 2564 ... 2564: + case 2571 ... 2574: + case 2577 ... 2578: + case 2601 ... 2601: + case 2609 ... 2609: + case 2612 ... 2612: + case 2615 ... 2615: + case 2618 ... 2619: + case 2621 ... 2621: + case 2627 ... 2630: + case 2633 ... 2634: + case 2638 ... 2640: + case 2642 ... 2648: + case 2653 ... 2653: + case 2655 ... 2661: + case 2678 ... 2688: + case 2692 ... 2692: + case 2702 ... 2702: + case 2706 ... 2706: + case 2729 ... 2729: + case 2737 ... 2737: + case 2740 ... 2740: + case 2746 ... 2747: + case 2758 ... 2758: + case 2762 ... 2762: + case 2766 ... 2767: + case 2769 ... 2783: + case 2788 ... 2789: + case 2802 ... 2816: + case 2820 ... 2820: + case 2829 ... 2830: + case 2833 ... 2834: + case 2857 ... 2857: + case 2865 ... 2865: + case 2868 ... 2868: + case 2874 ... 2875: + case 2885 ... 2886: + case 2889 ... 2890: + case 2894 ... 2901: + case 2904 ... 2907: + case 2910 ... 2910: + case 2916 ... 2917: + case 2936 ... 2945: + case 2948 ... 2948: + case 2955 ... 2957: + case 2961 ... 2961: + case 2966 ... 2968: + case 2971 ... 2971: + case 2973 ... 2973: + case 2976 ... 2978: + case 2981 ... 2983: + case 2987 ... 2989: + case 3002 ... 3005: + case 3011 ... 3013: + case 3017 ... 3017: + case 3022 ... 3023: + case 3025 ... 3030: + case 3032 ... 3045: + case 3067 ... 3071: + case 3076 ... 3076: + case 3085 ... 3085: + case 3089 ... 3089: + case 3113 ... 3113: + case 3130 ... 3132: + case 3141 ... 3141: + case 3145 ... 3145: + case 3150 ... 3156: + case 3159 ... 3159: + case 3162 ... 3167: + case 3172 ... 3173: + case 3184 ... 3191: + case 3200 ... 3200: + case 3204 ... 3204: + case 3213 ... 3213: + case 3217 ... 3217: + case 3241 ... 3241: + case 3252 ... 3252: + case 3258 ... 3259: + case 3269 ... 3269: + case 3273 ... 3273: + case 3278 ... 3284: + case 3287 ... 3293: + case 3295 ... 3295: + case 3300 ... 3301: + case 3312 ... 3312: + case 3315 ... 3328: + case 3332 ... 3332: + case 3341 ... 3341: + case 3345 ... 3345: + case 3387 ... 3388: + case 3397 ... 3397: + case 3401 ... 3401: + case 3407 ... 3414: + case 3416 ... 3423: + case 3428 ... 3429: + case 3446 ... 3448: + case 3456 ... 3457: + case 3460 ... 3460: + case 3479 ... 3481: + case 3506 ... 3506: + case 3516 ... 3516: + case 3518 ... 3519: + case 3527 ... 3529: + case 3531 ... 3534: + case 3541 ... 3541: + case 3543 ... 3543: + case 3552 ... 3557: + case 3568 ... 3569: + case 3573 ... 3584: + case 3643 ... 3646: + case 3676 ... 3712: + case 3715 ... 3715: + case 3717 ... 3718: + case 3721 ... 3721: + case 3723 ... 3724: + case 3726 ... 3731: + case 3736 ... 3736: + case 3744 ... 3744: + case 3748 ... 3748: + case 3750 ... 3750: + case 3752 ... 3753: + case 3756 ... 3756: + case 3770 ... 3770: + case 3774 ... 3775: + case 3781 ... 3781: + case 3783 ... 3783: + case 3790 ... 3791: + case 3802 ... 3803: + case 3808 ... 3839: + case 3912 ... 3912: + case 3949 ... 3952: + case 3992 ... 3992: + case 4029 ... 4029: + case 4045 ... 4045: + case 4059 ... 4095: + case 4294 ... 4294: + case 4296 ... 4300: + case 4302 ... 4303: + case 4681 ... 4681: + case 4686 ... 4687: + case 4695 ... 4695: + case 4697 ... 4697: + case 4702 ... 4703: + case 4745 ... 4745: + case 4750 ... 4751: + case 4785 ... 4785: + case 4790 ... 4791: + case 4799 ... 4799: + case 4801 ... 4801: + case 4806 ... 4807: + case 4823 ... 4823: + case 4881 ... 4881: + case 4886 ... 4887: + case 4955 ... 4956: + case 4989 ... 4991: + case 5018 ... 5023: + case 5109 ... 5119: + case 5789 ... 5791: + case 5881 ... 5887: + case 5901 ... 5901: + case 5909 ... 5919: + case 5943 ... 5951: + case 5972 ... 5983: + case 5997 ... 5997: + case 6001 ... 6001: + case 6004 ... 6015: + case 6110 ... 6111: + case 6122 ... 6127: + case 6138 ... 6143: + case 6158 ... 6159: + case 6170 ... 6175: + case 6264 ... 6271: + case 6315 ... 6319: + case 6390 ... 6399: + case 6431 ... 6431: + case 6444 ... 6447: + case 6460 ... 6463: + case 6465 ... 6467: + case 6510 ... 6511: + case 6517 ... 6527: + case 6572 ... 6575: + case 6602 ... 6607: + case 6619 ... 6621: + case 6684 ... 6685: + case 6751 ... 6751: + case 6781 ... 6782: + case 6794 ... 6799: + case 6810 ... 6815: + case 6830 ... 6831: + case 6847 ... 6911: + case 6988 ... 6991: + case 7037 ... 7039: + case 7156 ... 7163: + case 7224 ... 7226: + case 7242 ... 7244: + case 7296 ... 7359: + case 7368 ... 7375: + case 7415 ... 7415: + case 7418 ... 7423: + case 7670 ... 7675: + case 7958 ... 7959: + case 7966 ... 7967: + case 8006 ... 8007: + case 8014 ... 8015: + case 8024 ... 8024: + case 8026 ... 8026: + case 8028 ... 8028: + case 8030 ... 8030: + case 8062 ... 8063: + case 8117 ... 8117: + case 8133 ... 8133: + case 8148 ... 8149: + case 8156 ... 8156: + case 8176 ... 8177: + case 8181 ... 8181: + case 8191 ... 8191: + case 8203 ... 8207: + case 8232 ... 8238: + case 8288 ... 8303: + case 8306 ... 8307: + case 8335 ... 8335: + case 8349 ... 8351: + case 8382 ... 8399: + case 8433 ... 8447: + case 8586 ... 8591: + case 9211 ... 9215: + case 9255 ... 9279: + case 9291 ... 9311: + case 11124 ... 11125: + case 11158 ... 11159: + case 11194 ... 11196: + case 11209 ... 11209: + case 11218 ... 11263: + case 11311 ... 11311: + case 11359 ... 11359: + case 11508 ... 11512: + case 11558 ... 11558: + case 11560 ... 11564: + case 11566 ... 11567: + case 11624 ... 11630: + case 11633 ... 11646: + case 11671 ... 11679: + case 11687 ... 11687: + case 11695 ... 11695: + case 11703 ... 11703: + case 11711 ... 11711: + case 11719 ... 11719: + case 11727 ... 11727: + case 11735 ... 11735: + case 11743 ... 11743: + case 11843 ... 11903: + case 11930 ... 11930: + case 12020 ... 12031: + case 12246 ... 12271: + case 12284 ... 12287: + case 12352 ... 12352: + case 12439 ... 12440: + case 12544 ... 12548: + case 12590 ... 12592: + case 12687 ... 12687: + case 12731 ... 12735: + case 12772 ... 12783: + case 12831 ... 12831: + case 13055 ... 13055: + case 19894 ... 19903: + case 40909 ... 40959: + case 42125 ... 42127: + case 42183 ... 42191: + case 42540 ... 42559: + case 42654 ... 42654: + case 42744 ... 42751: + case 42895 ... 42895: + case 42926 ... 42927: + case 42930 ... 42998: + case 43052 ... 43055: + case 43066 ... 43071: + case 43128 ... 43135: + case 43205 ... 43213: + case 43226 ... 43231: + case 43260 ... 43263: + case 43348 ... 43358: + case 43389 ... 43391: + case 43470 ... 43470: + case 43482 ... 43485: + case 43519 ... 43519: + case 43575 ... 43583: + case 43598 ... 43599: + case 43610 ... 43611: + case 43715 ... 43738: + case 43767 ... 43776: + case 43783 ... 43784: + case 43791 ... 43792: + case 43799 ... 43807: + case 43815 ... 43815: + case 43823 ... 43823: + case 43872 ... 43875: + case 43878 ... 43967: + case 44014 ... 44015: + case 44026 ... 44031: + case 55204 ... 55215: + case 55239 ... 55242: + case 55292 ... 55295: + case 57344 ... 63743: + case 64110 ... 64111: + case 64218 ... 64255: + case 64263 ... 64274: + case 64280 ... 64284: + case 64311 ... 64311: + case 64317 ... 64317: + case 64319 ... 64319: + case 64322 ... 64322: + case 64325 ... 64325: + case 64450 ... 64466: + case 64832 ... 64847: + case 64912 ... 64913: + case 64968 ... 65007: + case 65022 ... 65023: + case 65050 ... 65055: + case 65070 ... 65071: + case 65107 ... 65107: + case 65127 ... 65127: + case 65132 ... 65135: + case 65141 ... 65141: + case 65277 ... 65280: + case 65471 ... 65473: + case 65480 ... 65481: + case 65488 ... 65489: + case 65496 ... 65497: + case 65501 ... 65503: + case 65511 ... 65511: + case 65519 ... 65531: + case 65534 ... 65535: + print_str_char_u(c); + break; + case 65548 ... 65548: + case 65575 ... 65575: + case 65595 ... 65595: + case 65598 ... 65598: + case 65614 ... 65615: + case 65630 ... 65663: + case 65787 ... 65791: + case 65795 ... 65798: + case 65844 ... 65846: + case 65933 ... 65935: + case 65948 ... 65951: + case 65953 ... 65999: + case 66046 ... 66175: + case 66205 ... 66207: + case 66257 ... 66271: + case 66300 ... 66303: + case 66340 ... 66351: + case 66379 ... 66383: + case 66427 ... 66431: + case 66462 ... 66462: + case 66500 ... 66503: + case 66518 ... 66559: + case 66718 ... 66719: + case 66730 ... 66815: + case 66856 ... 66863: + case 66916 ... 66926: + case 66928 ... 67071: + case 67383 ... 67391: + case 67414 ... 67423: + case 67432 ... 67583: + case 67590 ... 67591: + case 67593 ... 67593: + case 67638 ... 67638: + case 67641 ... 67643: + case 67645 ... 67646: + case 67670 ... 67670: + case 67743 ... 67750: + case 67760 ... 67839: + case 67868 ... 67870: + case 67898 ... 67902: + case 67904 ... 67967: + case 68024 ... 68029: + case 68032 ... 68095: + case 68100 ... 68100: + case 68103 ... 68107: + case 68116 ... 68116: + case 68120 ... 68120: + case 68148 ... 68151: + case 68155 ... 68158: + case 68168 ... 68175: + case 68185 ... 68191: + case 68256 ... 68287: + case 68327 ... 68330: + case 68343 ... 68351: + case 68406 ... 68408: + case 68438 ... 68439: + case 68467 ... 68471: + case 68498 ... 68504: + case 68509 ... 68520: + case 68528 ... 68607: + case 68681 ... 69215: + case 69247 ... 69631: + case 69710 ... 69713: + case 69744 ... 69758: + case 69821 ... 69821: + case 69826 ... 69839: + case 69865 ... 69871: + case 69882 ... 69887: + case 69941 ... 69941: + case 69956 ... 69967: + case 70007 ... 70015: + case 70089 ... 70092: + case 70094 ... 70095: + case 70107 ... 70112: + case 70133 ... 70143: + case 70162 ... 70162: + case 70206 ... 70319: + case 70379 ... 70383: + case 70394 ... 70400: + case 70404 ... 70404: + case 70413 ... 70414: + case 70417 ... 70418: + case 70441 ... 70441: + case 70449 ... 70449: + case 70452 ... 70452: + case 70458 ... 70459: + case 70469 ... 70470: + case 70473 ... 70474: + case 70478 ... 70486: + case 70488 ... 70492: + case 70500 ... 70501: + case 70509 ... 70511: + case 70517 ... 70783: + case 70856 ... 70863: + case 70874 ... 71039: + case 71094 ... 71095: + case 71114 ... 71167: + case 71237 ... 71247: + case 71258 ... 71295: + case 71352 ... 71359: + case 71370 ... 71839: + case 71923 ... 71934: + case 71936 ... 72383: + case 72441 ... 73727: + case 74649 ... 74751: + case 74863 ... 74863: + case 74869 ... 77823: + case 78895 ... 92159: + case 92729 ... 92735: + case 92767 ... 92767: + case 92778 ... 92781: + case 92784 ... 92879: + case 92910 ... 92911: + case 92918 ... 92927: + case 92998 ... 93007: + case 93018 ... 93018: + case 93026 ... 93026: + case 93048 ... 93052: + case 93072 ... 93951: + case 94021 ... 94031: + case 94079 ... 94094: + case 94112 ... 110591: + case 110594 ... 113663: + case 113771 ... 113775: + case 113789 ... 113791: + case 113801 ... 113807: + case 113818 ... 113819: + case 113824 ... 118783: + case 119030 ... 119039: + case 119079 ... 119080: + case 119155 ... 119162: + case 119262 ... 119295: + case 119366 ... 119551: + case 119639 ... 119647: + case 119666 ... 119807: + case 119893 ... 119893: + case 119965 ... 119965: + case 119968 ... 119969: + case 119971 ... 119972: + case 119975 ... 119976: + case 119981 ... 119981: + case 119994 ... 119994: + case 119996 ... 119996: + case 120004 ... 120004: + case 120070 ... 120070: + case 120075 ... 120076: + case 120085 ... 120085: + case 120093 ... 120093: + case 120122 ... 120122: + case 120127 ... 120127: + case 120133 ... 120133: + case 120135 ... 120137: + case 120145 ... 120145: + case 120486 ... 120487: + case 120780 ... 120781: + case 120832 ... 124927: + case 125125 ... 125126: + case 125143 ... 126463: + case 126468 ... 126468: + case 126496 ... 126496: + case 126499 ... 126499: + case 126501 ... 126502: + case 126504 ... 126504: + case 126515 ... 126515: + case 126520 ... 126520: + case 126522 ... 126522: + case 126524 ... 126529: + case 126531 ... 126534: + case 126536 ... 126536: + case 126538 ... 126538: + case 126540 ... 126540: + case 126544 ... 126544: + case 126547 ... 126547: + case 126549 ... 126550: + case 126552 ... 126552: + case 126554 ... 126554: + case 126556 ... 126556: + case 126558 ... 126558: + case 126560 ... 126560: + case 126563 ... 126563: + case 126565 ... 126566: + case 126571 ... 126571: + case 126579 ... 126579: + case 126584 ... 126584: + case 126589 ... 126589: + case 126591 ... 126591: + case 126602 ... 126602: + case 126620 ... 126624: + case 126628 ... 126628: + case 126634 ... 126634: + case 126652 ... 126703: + case 126706 ... 126975: + case 127020 ... 127023: + case 127124 ... 127135: + case 127151 ... 127152: + case 127168 ... 127168: + case 127184 ... 127184: + case 127222 ... 127231: + case 127245 ... 127247: + case 127279 ... 127279: + case 127340 ... 127343: + case 127387 ... 127461: + case 127491 ... 127503: + case 127547 ... 127551: + case 127561 ... 127567: + case 127570 ... 127743: + case 127789 ... 127791: + case 127870 ... 127871: + case 127951 ... 127955: + case 127992 ... 127999: + case 128255 ... 128255: + case 128331 ... 128335: + case 128378 ... 128378: + case 128420 ... 128420: + case 128579 ... 128580: + case 128720 ... 128735: + case 128749 ... 128751: + case 128756 ... 128767: + case 128884 ... 128895: + case 128981 ... 129023: + case 129036 ... 129039: + case 129096 ... 129103: + case 129114 ... 129119: + case 129160 ... 129167: + case 129198 ... 131071: + case 173783 ... 173823: + case 177973 ... 177983: + case 178206 ... 194559: + case 195102 ... 917759: + case 918000 ... 1114110: + print_str_char_U(c); + break; + default: + print_codepoint(c); + break; + } +} + +void print_char(val_char_t c) +{ + printf("#\\"); + switch (c) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(c); + } +} + +void print_codepoint(val_char_t c) +{ + char buffer[5] = {0}; + utf8_encode_char(c, buffer); + printf("%s", buffer); +} + +int utf8_encode_char(val_char_t c, char *buffer) +{ + // Output to buffer using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (c < 128) { + buffer[0] = (char) c; + return 1; + } else if (c < 2048) { + buffer[0] = (char)(c >> 6) | 192; + buffer[1] = ((char) c & 63) | 128; + return 2; + } else if (c < 65536) { + buffer[0] = (char)(c >> 12) | 224; + buffer[1] = ((char)(c >> 6) & 63) | 128; + buffer[2] = ((char) c & 63) | 128; + return 3; + } else { + buffer[0] = (char)(c >> 18) | 240; + buffer[1] = ((char)(c >> 12) & 63) | 128; + buffer[2] = ((char)(c >> 6) & 63) | 128; + buffer[3] = ((char) c & 63) | 128; + return 4; + } +} diff --git a/langs/mug/print.h b/langs/mug/print.h new file mode 100644 index 00000000..c22081a2 --- /dev/null +++ b/langs/mug/print.h @@ -0,0 +1,8 @@ +#ifndef PRINT_H +#define PRINT_H + +#include "values.h" + +void print_result(val_t); + +#endif diff --git a/langs/mug/read-all.rkt b/langs/mug/read-all.rkt new file mode 100644 index 00000000..8a3289a5 --- /dev/null +++ b/langs/mug/read-all.rkt @@ -0,0 +1,8 @@ +#lang racket +(provide read-all) +;; read all s-expression until eof +(define (read-all) + (let ((r (read))) + (if (eof-object? r) + '() + (cons r (read-all))))) diff --git a/langs/mug/run.rkt b/langs/mug/run.rkt new file mode 100644 index 00000000..eaa53eb9 --- /dev/null +++ b/langs/mug/run.rkt @@ -0,0 +1,18 @@ +#lang racket +(provide run run/io) +(require "types.rkt" "build-runtime.rkt" + a86/interp) + +;; Asm -> Answer +(define (run is) + (parameterize ((current-objs (list runtime-path))) + (match (asm-interp is) + ['err 'err] + [b (bits->value b)]))) + +;; Asm String -> (cons Answer String) +(define (run/io is s) + (parameterize ((current-objs (list runtime-path))) + (match (asm-interp/io is s) + [(cons 'err o) (cons 'err o)] + [(cons b o) (cons (bits->value b) o)]))) diff --git a/langs/mug/runtime.h b/langs/mug/runtime.h new file mode 100644 index 00000000..f594f0f6 --- /dev/null +++ b/langs/mug/runtime.h @@ -0,0 +1,11 @@ +#ifndef RUNTIME_H +#define RUNTIME_H +int64_t entry(); +extern FILE* in; +extern FILE* out; +extern void (*error_handler)(); + +// in words +#define heap_size 10000 +extern int64_t *heap; +#endif /* RUNTIME_H */ diff --git a/langs/mug/symbol.c b/langs/mug/symbol.c new file mode 100644 index 00000000..bcff4f3f --- /dev/null +++ b/langs/mug/symbol.c @@ -0,0 +1,55 @@ +#include +#include +#include "values.h" + +int symb_cmp(const val_symb_t *, const val_symb_t *); + +// binary tree node +struct Node { + val_symb_t* elem; + struct Node* left; + struct Node* right; +}; + +static struct Node *symbol_tbl = NULL; + +val_symb_t *intern_symbol(val_symb_t* symb) +{ + struct Node **curr = &symbol_tbl; + + while (*curr) { + struct Node *t = *curr; + int r = symb_cmp(symb, t->elem); + if (r == 0) { + // found it, so return saved pointer + return t->elem; + } else if (r < 0) { + curr = &t->left; + } else { + curr = &t->right; + } + } + + // wasn't found, so insert it and return pointer + *curr = calloc(1, sizeof(struct Node)); + (*curr)->elem = symb; + return (*curr)->elem; +} + +int symb_cmp(const val_symb_t *s1, const val_symb_t *s2) +{ + if (s1 == s2) return 0; + + int64_t len1 = s1->len; + int64_t len2 = s2->len; + + int64_t len = len1 < len2 ? len1 : len2; + int i; + + for (i = 0; i < len; i++) { + if (s1->codepoints[i] != s2->codepoints[i]) + return s1->codepoints[i] - s2->codepoints[i]; + } + + return len1 - len2; +} diff --git a/langs/mug/syntax.rkt b/langs/mug/syntax.rkt deleted file mode 100644 index bad6439e..00000000 --- a/langs/mug/syntax.rkt +++ /dev/null @@ -1,194 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "pat.rkt") - -;; type Expr+ = -;; .... exprs with match, cond, begin/define, quote etc. - -;; type S-Expr = -;; | Boolean -;; | Integer -;; | String -;; | '() -;; | (Cons S-Expr S-Expr) - -;; Expr+ -> Expr -(define (desugar e+) - (match e+ - [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) - `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) - ,(desugar e))] - [(? symbol? x) x] - [(? imm? i) i] - [`',(? symbol? s) `',s] - [`',d (quote->expr d)] - [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] - [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] - [`(letrec ,bs ,e0) - `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) - ,(desugar e0))] - [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] - [`(match . ,_) (desugar (match->cond e+))] - [`(cond . ,_) (desugar (cond->if e+))] - [`(and . ,_) (desugar (and->if e+))] - [`(or . ,_) (desugar (or->if e+))] - [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) - -;; S-Expr -> Expr -;; Produce an expression that evaluates to given s-expression, without -;; use of quote (except for symbols and empty list) -(define (quote->expr d) - (match d - [(? boolean?) d] - [(? integer?) d] - [(? string?) d] - [(? char?) d] - [(? symbol?) (list 'quote d)] - [(cons x y) (list 'cons (quote->expr x) (quote->expr y))] - ['() ''()])) - -(define (quasiquote->expr d) - (match d - [(? boolean?) d] - [(? integer?) d] - [(? string?) d] - [(? char?) d] - [(? symbol?) (list 'quote d)] - [(cons 'quasiquote d) - (quasiquote->expr (quasiquote->expr d))] - [(cons 'unquote d) d] - [(cons 'unquote-splicing d) 'ERROR] - [(cons x y) - `(append ,(quasiquote->list-expr x) - ,(quasiquote->expr y))] - ['() ''()])) - -(define (quasiquote->list-expr d) - (match d - [(? symbol?) (list 'quote d)] - ['() ''()] - [(cons 'quasiquote d) - (quasiquote->expr (quasiquote->expr d))] - [(cons 'unquote d) `(list ,d)] - [(cons 'unquote-splicing d) d] - [(cons x y) - `(list (append ,(quasiquote->list-expr x) - ,(quasiquote->expr y)))] - [_ `'(,d)])) - -;; Expr -> Expr -(define (cond->if c) - (match c - [`(cond (else ,e)) e] - [`(cond (,c ,e) . ,r) - `(if ,c ,e (cond ,@r))])) - -;; Expr -> Expr -(define (and->if c) - (match c - [`(and) #t] - [`(and ,e) e] - [`(and ,e . ,r) - `(if ,e (and ,@r) #f)])) - -;; Expr -> Expr -(define (or->if c) - (match c - [`(or) #f] - [`(or ,e) e] - [`(or ,e . ,r) - (let ((x (gensym))) - `(let ((,x ,e)) - (if ,x ,x (or ,@r))))])) - - -(define (qq-expand x depth) - (match x - [(cons 'quasiquote r) - `(cons 'quasiquote ,(qq-expand r (add1 depth)))] - [(cons 'unquote r) - (cond [(> depth 0) - `(cons ','unquote ,(qq-expand r (sub1 depth)))] - [(and (not (empty? r)) - (empty? (cdr r))) - (car r)] - [else - (error "Illegal")])] - [(cons 'unqupte-splicing r) - (error "Illegal")] - [(cons a b) - `(append ,(qq-expand-list a depth) - ,(qq-expand b depth))] - [_ `',x])) - -(define (qq-expand-list x depth) - (match x - [(cons 'quasiquote r) - `(list (cons 'quasiquote ,(qq-expand r (add1 depth))))] - [(cons 'unquote r) - (cond [(> depth 0) `(list (cons ','unquote ,(qq-expand r (sub1 depth))))] - [else `(list . ,r)])] - [(cons 'unquote-splicing r) - (cond [(> depth 0) `(list (cons ','unquote-splicing ,(qq-expand r (sub1 depth))))] - [else `(append . ,r)])] - [_ - `'(,x)])) - - - -;; Any -> Boolean -(define (imm? x) - (or (integer? x) - (boolean? x) - (char? x) - (equal? ''() x))) - -;; Expr -> LExpr -(define (label-λ e) - (match e - [(? symbol? x) x] - [(? imm? i) i] - [`(,(? prim? p) . ,es) `(,p ,@(map label-λ es))] - [`(if ,e0 ,e1 ,e2) `(if ,(label-λ e0) ,(label-λ e1) ,(label-λ e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(label-λ e0))) ,(label-λ e1))] - [`(letrec ,bs ,e0) `(letrec ,(map (λ (b) (list (first b) (label-λ (second b)))) bs) - ,(label-λ e0))] - [`(λ ,xs ,e0) `(λ ,xs ',(gensym) ,(label-λ e0))] - [`(,e . ,es) `(,(label-λ e) ,@(map label-λ es))])) - -;; LExpr -> (Listof LExpr) -;; Extract all the lambda expressions -(define (λs e) - (match e - [(? symbol? x) '()] - [(? imm? i) '()] - [`(,(? prim? p) . ,es) (append-map λs es)] - [`(if ,e0 ,e1 ,e2) (append (λs e0) (λs e1) (λs e2))] - [`(let ((,x ,e0)) ,e1) (append (λs e0) (λs e1))] - [`(letrec ,bs ,e0) (append (apply append (map (compose λs second) bs)) (λs e0))] - [`(λ ,xs ,l ,e0) (cons e (λs e0))] - [`(,e . ,es) (append (λs e) (apply append (map λs es)))])) - -;; LExpr -> (Listof Variable) -(define (fvs e) - (define (fvs e) - (match e - [(? symbol? x) (list x)] - [(? imm? i) '()] - [`(,(? prim? p) . ,es) (append-map fvs es)] - [`(if ,e0 ,e1 ,e2) (append (fvs e0) (fvs e1) (fvs e2))] - [`(let ((,x ,e0)) ,e1) (append (fvs e0) (remq* (list x) (fvs e1)))] - [`(letrec ,bs ,e0) (remq* (map first bs) - (apply append (fvs e0) (map fvs (map second bs))))] - [`(λ ,xs ,l ,e0) (remq* xs (fvs e0))] - [`(,e . ,es) (append (fvs e) (apply append (map fvs es)))])) - (remove-duplicates (fvs e))) - -;; Any -> Boolean -(define (prim? x) - (and (symbol? x) - (memq x '(add1 sub1 zero? abs - char? boolean? integer? integer->char char->integer - string? box? empty? cons cons? box unbox car cdr string-length - make-string string-ref = < <= char=? boolean=? + eq? gensym symbol? - procedure?)))) diff --git a/langs/mug/test/build-runtime.rkt b/langs/mug/test/build-runtime.rkt new file mode 100644 index 00000000..7023ee0b --- /dev/null +++ b/langs/mug/test/build-runtime.rkt @@ -0,0 +1,8 @@ +#lang racket +(require a86/interp) + +;; link with runtime for IO operations +(unless (file-exists? "../runtime.o") + (system "make -C .. runtime.o")) +(current-objs + (list (path->string (normalize-path "../runtime.o")))) diff --git a/langs/mug/test/compile.rkt b/langs/mug/test/compile.rkt new file mode 100644 index 00000000..ee289de8 --- /dev/null +++ b/langs/mug/test/compile.rkt @@ -0,0 +1,8 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../compile.rkt" + "../run.rkt") + +(test-runner (λ p (run (compile (parse p))))) +(test-runner-io (λ (s . p) (run/io (compile (parse p)) s))) diff --git a/langs/mug/test/interp-defun.rkt b/langs/mug/test/interp-defun.rkt new file mode 100644 index 00000000..68ef4191 --- /dev/null +++ b/langs/mug/test/interp-defun.rkt @@ -0,0 +1,24 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../interp-defun.rkt" + "../interp-io.rkt") + +(define (closure->proc xs e r) + ;; Could make this better by calling the interpreter, + ;; but it's only used in tests where all we care about + ;; is that you get a procedure. + (lambda _ + (error "This function is not callable."))) + +(test-runner + (λ p + (match (interp (parse p)) + [(Closure xs e r) (closure->proc xs e r)] + [v v]))) +(test-runner-io + (λ (s . p) + (match (interp/io (parse p) s) + [(cons (Closure xs e r) o) + (cons (closure->proc xs e r) o)] + [r r]))) diff --git a/langs/hoodwink/test/interp.rkt b/langs/mug/test/interp.rkt similarity index 55% rename from langs/hoodwink/test/interp.rkt rename to langs/mug/test/interp.rkt index 1eaa5864..cd7b654e 100644 --- a/langs/hoodwink/test/interp.rkt +++ b/langs/mug/test/interp.rkt @@ -4,6 +4,5 @@ "../interp.rkt" "../interp-io.rkt") -(test-runner (λ (e) (interp (parse e)))) - -(test-runner-io (λ (e s) (interp/io (parse e) s))) +(test-runner (λ p (interp (parse p)))) +(test-runner-io (λ (s . p) (interp/io (parse p) s))) diff --git a/langs/mug/test/test-runner.rkt b/langs/mug/test/test-runner.rkt new file mode 100644 index 00000000..d4d68ed2 --- /dev/null +++ b/langs/mug/test/test-runner.rkt @@ -0,0 +1,389 @@ +#lang racket +(provide test-runner test-runner-io) +(require rackunit) + +(define (test-runner run) + ;; Abscond examples + (check-equal? (run 7) 7) + (check-equal? (run -8) -8) + + ;; Blackmail examples + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7) + + ;; Con examples + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7) + + ;; Dupe examples + (check-equal? (run #t) #t) + (check-equal? (run #f) #f) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) + (check-equal? (run '(if #t 3 4)) 3) + (check-equal? (run '(if #f 3 4)) 4) + (check-equal? (run '(if 0 3 4)) 3) + (check-equal? (run '(zero? 4)) #f) + (check-equal? (run '(zero? 0)) #t) + + ;; Dodger examples + (check-equal? (run #\a) #\a) + (check-equal? (run #\b) #\b) + (check-equal? (run '(char? #\a)) #t) + (check-equal? (run '(char? #t)) #f) + (check-equal? (run '(char? 8)) #f) + (check-equal? (run '(char->integer #\a)) (char->integer #\a)) + (check-equal? (run '(integer->char 955)) #\λ) + + ;; Extort examples + (check-equal? (run '(add1 #f)) 'err) + (check-equal? (run '(sub1 #f)) 'err) + (check-equal? (run '(zero? #f)) 'err) + (check-equal? (run '(char->integer #f)) 'err) + (check-equal? (run '(integer->char #f)) 'err) + (check-equal? (run '(integer->char -1)) 'err) + (check-equal? (run '(write-byte #f)) 'err) + (check-equal? (run '(write-byte -1)) 'err) + (check-equal? (run '(write-byte 256)) 'err) + + ;; Fraud examples + (check-equal? (run '(let ((x 7)) x)) 7) + (check-equal? (run '(let ((x 7)) 2)) 2) + (check-equal? (run '(let ((x 7)) (add1 x))) 8) + (check-equal? (run '(let ((x (add1 7))) x)) 8) + (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) + (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) + (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) + + (check-equal? (run '(let ((x 0)) + (if (zero? x) 7 8))) + 7) + (check-equal? (run '(let ((x 1)) + (add1 (if (zero? x) 7 8)))) + 9) + (check-equal? (run '(+ 3 4)) 7) + (check-equal? (run '(- 3 4)) -1) + (check-equal? (run '(+ (+ 2 1) 4)) 7) + (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) + (check-equal? (run '(let ((x (+ 1 2))) + (let ((z (- 4 x))) + (+ (+ x x) z)))) + 7) + (check-equal? (run '(= 5 5)) #t) + (check-equal? (run '(= 4 5)) #f) + (check-equal? (run '(= (add1 4) 5)) #t) + (check-equal? (run '(< 5 5)) #f) + (check-equal? (run '(< 4 5)) #t) + (check-equal? (run '(< (add1 4) 5)) #f) + + ;; Hustle examples + (check-equal? (run ''()) '()) + (check-equal? (run '(box 1)) (box 1)) + (check-equal? (run '(box -1)) (box -1)) + (check-equal? (run '(cons 1 2)) (cons 1 2)) + (check-equal? (run '(unbox (box 1))) 1) + (check-equal? (run '(car (cons 1 2))) 1) + (check-equal? (run '(cdr (cons 1 2))) 2) + (check-equal? (run '(cons 1 '())) (list 1)) + (check-equal? (run '(let ((x (cons 1 2))) + (begin (cdr x) + (car x)))) + 1) + (check-equal? (run '(let ((x (cons 1 2))) + (let ((y (box 3))) + (unbox y)))) + 3) + (check-equal? (run '(eq? 1 1)) #t) + (check-equal? (run '(eq? 1 2)) #f) + (check-equal? (run '(eq? (cons 1 2) (cons 1 2))) #f) + (check-equal? (run '(let ((x (cons 1 2))) (eq? x x))) #t) + + ;; Hoax examples + (check-equal? (run '(make-vector 0 0)) #()) + (check-equal? (run '(make-vector 1 0)) #(0)) + (check-equal? (run '(make-vector 3 0)) #(0 0 0)) + (check-equal? (run '(make-vector 3 5)) #(5 5 5)) + (check-equal? (run '(vector? (make-vector 0 0))) #t) + (check-equal? (run '(vector? (cons 0 0))) #f) + (check-equal? (run '(vector-ref (make-vector 0 #f) 0)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 0 4) + x))) + #(4 5 5)) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 1 4) + x))) + #(5 4 5)) + (check-equal? (run '(vector-length (make-vector 3 #f))) 3) + (check-equal? (run '(vector-length (make-vector 0 #f))) 0) + (check-equal? (run '"") "") + (check-equal? (run '"fred") "fred") + (check-equal? (run '"wilma") "wilma") + (check-equal? (run '(make-string 0 #\f)) "") + (check-equal? (run '(make-string 3 #\f)) "fff") + (check-equal? (run '(make-string 3 #\g)) "ggg") + (check-equal? (run '(string-length "")) 0) + (check-equal? (run '(string-length "fred")) 4) + (check-equal? (run '(string-ref "" 0)) 'err) + (check-equal? (run '(string-ref (make-string 0 #\a) 0)) 'err) + (check-equal? (run '(string-ref "fred" 0)) #\f) + (check-equal? (run '(string-ref "fred" 1)) #\r) + (check-equal? (run '(string-ref "fred" 2)) #\e) + (check-equal? (run '(string-ref "fred" 4)) 'err) + (check-equal? (run '(string? "fred")) #t) + (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") + + ;; Iniquity tests + (check-equal? (run + '(define (f x) x) + '(f 5)) + 5) + + (check-equal? (run + '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) + '(tri 9)) + 45) + + (check-equal? (run + '(define (f x) x) + '(define (g x) (f x)) + '(g 5)) + 5) + (check-equal? (run + '(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) + '(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + '(even? 101)) + #f) + (check-equal? (run + '(define (map-add1 xs) + (if (empty? xs) + '() + (cons (add1 (car xs)) + (map-add1 (cdr xs))))) + '(map-add1 (cons 1 (cons 2 (cons 3 '()))))) + '(2 3 4)) + (check-equal? (run + '(define (f x) + 10) + '(f 1)) + 10) + (check-equal? (run + '(define (f x) + 10) + '(let ((x 2)) (f 1))) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(f 1 2)) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(let ((z 2)) (f 1 2))) + 10) + (check-equal? (run '(define (f x y) y) + '(f 1 (add1 #f))) + 'err) + + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) + + ;; Loot examples + (check-true (procedure? (run '(λ (x) x)))) + (check-equal? (run '((λ (x) x) 5)) + 5) + + (check-equal? (run '(let ((f (λ (x) x))) (f 5))) + 5) + (check-equal? (run '(let ((f (λ (x y) x))) (f 5 7))) + 5) + (check-equal? (run '(let ((f (λ (x y) y))) (f 5 7))) + 7) + (check-equal? (run '((let ((x 1)) + (let ((y 2)) + (lambda (z) (cons x (cons y (cons z '())))))) + 3)) + '(1 2 3)) + (check-equal? (run '(define (adder n) + (λ (x) (+ x n))) + '((adder 5) 10)) + 15) + (check-equal? (run '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36)) + 666) + (check-equal? (run '(define (tri n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))) + '(tri 36)) + 666) + (check-equal? (run '(define (tri n) + (match n + [0 0] + [m (+ m (tri (sub1 m)))])) + '(tri 36)) + 666) + (check-equal? (run '((match 8 [8 (lambda (x) x)]) 12)) + 12) + + ;; Mug examples + (check-equal? (run '(symbol? 'foo)) #t) + (check-equal? (run '(symbol? (string->symbol "foo"))) #t) + (check-equal? (run '(eq? 'foo 'foo)) #t) + (check-equal? (run '(eq? (string->symbol "foo") + (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'foo (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) + #t) + (check-equal? (run '(symbol? 'g0)) #t) + (check-equal? (run '(symbol? "g0")) #f) + (check-equal? (run '(symbol? (string->symbol "g0"))) #t) + (check-equal? (run '(symbol? (string->uninterned-symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->uninterned-symbol "g0"))) #f) + (check-equal? (run '(eq? (string->uninterned-symbol "g0") (string->uninterned-symbol "g0"))) + #f) + (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f) + (check-equal? (run '(string? (symbol->string 'foo))) #t) + (check-equal? (run '(eq? (symbol->string 'foo) "foo")) #f) + (check-equal? (run ''foo) 'foo) + (check-equal? (run '(eq? (match #t [_ "foo"]) "bar")) #f) + (check-equal? (run '(eq? (match #t [_ 'foo]) 'bar)) #f) + (check-equal? (run '(match 'foo ['bar #t] [_ #f])) #f) + (check-equal? (run '(match 'foo ['foo #t] [_ #f])) #t) + (check-equal? (run '(match "foo" ["foo" #t] [_ #f])) #t) + (check-equal? (run '(match "foo" ["bar" #t] [_ #f])) #f) + (check-equal? (run '(match (cons '+ (cons 1 (cons 2 '()))) + [(cons '+ (cons x (cons y '()))) + (+ x y)])) + 3)) + +(define (test-runner-io run) + ;; Evildoer examples + (check-equal? (run "" 7) (cons 7 "")) + (check-equal? (run "" '(write-byte 97)) (cons (void) "a")) + (check-equal? (run "a" '(read-byte)) (cons 97 "")) + (check-equal? (run "b" '(begin (write-byte 97) (read-byte))) + (cons 98 "a")) + (check-equal? (run "" '(read-byte)) (cons eof "")) + (check-equal? (run "" '(eof-object? (read-byte))) (cons #t "")) + (check-equal? (run "a" '(eof-object? (read-byte))) (cons #f "")) + (check-equal? (run "" '(begin (write-byte 97) (write-byte 98))) + (cons (void) "ab")) + + (check-equal? (run "ab" '(peek-byte)) (cons 97 "")) + (check-equal? (run "ab" '(begin (peek-byte) (read-byte))) (cons 97 "")) + ;; Extort examples + (check-equal? (run "" '(write-byte #t)) (cons 'err "")) + + ;; Fraud examples + (check-equal? (run "" '(let ((x 97)) (write-byte x))) (cons (void) "a")) + (check-equal? (run "" + '(let ((x 97)) + (begin (write-byte x) + x))) + (cons 97 "a")) + (check-equal? (run "b" '(let ((x 97)) (begin (read-byte) x))) + (cons 97 "")) + (check-equal? (run "b" '(let ((x 97)) (begin (peek-byte) x))) + (cons 97 "")) + + ;; Hustle examples + (check-equal? (run "" + '(let ((x 1)) + (begin (write-byte 97) + 1))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x 1)) + (let ((y 2)) + (begin (write-byte 97) + 1)))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x (cons 1 2))) + (begin (write-byte 97) + (car x)))) + (cons 1 "a")) + ;; Iniquity examples + #| + (check-equal? (run "" + '(define (print-alphabet i) + (if (zero? i) + (void) + (begin (write-byte (- 123 i)) + (print-alphabet (sub1 i))))) + '(print-alphabet 26)) + (cons (void) "abcdefghijklmnopqrstuvwxyz")) +|#) diff --git a/langs/mug/types.h b/langs/mug/types.h new file mode 100644 index 00000000..4093c4f7 --- /dev/null +++ b/langs/mug/types.h @@ -0,0 +1,42 @@ +#ifndef TYPES_H +#define TYPES_H + +/* + Bit layout of values + + Values are either: + - Immediates: end in #b000 + - Pointers + + Immediates are either + - Integers: end in #b0 000 + - Characters: end in #b01 000 + - True: #b11 000 + - False: #b1 11 000 + - Eof: #b10 11 000 + - Void: #b11 11 000 + - Empty: #b100 11 000 +*/ +#define imm_shift 3 +#define ptr_type_mask ((1 << imm_shift) - 1) +#define box_type_tag 1 +#define cons_type_tag 2 +#define vect_type_tag 3 +#define str_type_tag 4 +#define proc_type_tag 5 +#define symb_type_tag 6 +#define int_shift (1 + imm_shift) +#define int_type_mask ((1 << int_shift) - 1) +#define int_type_tag (0 << (int_shift - 1)) +#define nonint_type_tag (1 << (int_shift - 1)) +#define char_shift (int_shift + 1) +#define char_type_mask ((1 << char_shift) - 1) +#define char_type_tag ((0 << (char_shift - 1)) | nonint_type_tag) +#define nonchar_type_tag ((1 << (char_shift - 1)) | nonint_type_tag) +#define val_true ((0 << char_shift) | nonchar_type_tag) +#define val_false ((1 << char_shift) | nonchar_type_tag) +#define val_eof ((2 << char_shift) | nonchar_type_tag) +#define val_void ((3 << char_shift) | nonchar_type_tag) +#define val_empty ((4 << char_shift) | nonchar_type_tag) + +#endif diff --git a/langs/mug/types.rkt b/langs/mug/types.rkt new file mode 100644 index 00000000..1bb4f590 --- /dev/null +++ b/langs/mug/types.rkt @@ -0,0 +1,108 @@ +#lang racket +(provide (all-defined-out)) +(require ffi/unsafe) + +(define imm-shift 3) +(define imm-mask #b111) +(define ptr-mask #b111) +(define type-box #b001) +(define type-cons #b010) +(define type-vect #b011) +(define type-str #b100) +(define type-proc #b101) +(define type-symb #b110) +(define int-shift (+ 1 imm-shift)) +(define char-shift (+ 2 imm-shift)) +(define type-int #b0000) +(define mask-int #b1111) +(define type-char #b01000) +(define mask-char #b11111) + +(define (bits->value b) + (cond [(= b (value->bits #t)) #t] + [(= b (value->bits #f)) #f] + [(= b (value->bits eof)) eof] + [(= b (value->bits (void))) (void)] + [(= b (value->bits '())) '()] + [(int-bits? b) + (arithmetic-shift b (- int-shift))] + [(char-bits? b) + (integer->char (arithmetic-shift b (- char-shift)))] + [(box-bits? b) + (box (bits->value (heap-ref b)))] + [(cons-bits? b) + (cons (bits->value (heap-ref (+ b 8))) + (bits->value (heap-ref b)))] + [(vect-bits? b) + (if (zero? (untag b)) + (vector) + (build-vector (heap-ref b) + (lambda (j) + (bits->value (heap-ref (+ b (* 8 (add1 j))))))))] + [(str-bits? b) + (if (zero? (untag b)) + (string) + (build-string (heap-ref b) + (lambda (j) + (char-ref (+ b 8) j))))] + [(symb-bits? b) + (string->symbol + (if (zero? (untag b)) + (string) + (build-string (heap-ref b) + (lambda (j) + (char-ref (+ b 8) j)))))] + [(proc-bits? b) + (lambda _ + (error "This function is not callable."))] + [else (error "invalid bits")])) + +(define (value->bits v) + (cond [(eq? v #t) #b00011000] + [(eq? v #f) #b00111000] + [(eof-object? v) #b01011000] + [(void? v) #b01111000] + [(empty? v) #b10011000] + [(integer? v) + (arithmetic-shift v int-shift)] + [(char? v) + (bitwise-ior type-char + (arithmetic-shift (char->integer v) char-shift))] + [else (error "not an immediate value")])) + +(define (imm-bits? v) + (zero? (bitwise-and v imm-mask))) + +(define (int-bits? v) + (= type-int (bitwise-and v mask-int))) + +(define (char-bits? v) + (= type-char (bitwise-and v mask-char))) + +(define (cons-bits? v) + (= type-cons (bitwise-and v imm-mask))) + +(define (box-bits? v) + (= type-box (bitwise-and v imm-mask))) + +(define (vect-bits? v) + (= type-vect (bitwise-and v imm-mask))) + +(define (str-bits? v) + (= type-str (bitwise-and v imm-mask))) + +(define (proc-bits? v) + (= type-proc (bitwise-and v imm-mask))) + +(define (symb-bits? v) + (= type-symb (bitwise-and v imm-mask))) + +(define (untag i) + (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) + (integer-length ptr-mask))) + +(define (heap-ref i) + (ptr-ref (cast (untag i) _int64 _pointer) _int64)) + +(define (char-ref i j) + (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j))) diff --git a/langs/mug/utils.rkt b/langs/mug/utils.rkt new file mode 100644 index 00000000..612b7381 --- /dev/null +++ b/langs/mug/utils.rkt @@ -0,0 +1,33 @@ +#lang racket +(provide symbol->data-label lookup pad-stack unpad-stack) +(require a86/ast) + +(define rsp 'rsp) +(define r15 'r15) + +(define (symbol->data-label s) + (symbol->label + (string->symbol (string-append "data_" (symbol->string s))))) + +;; Id CEnv -> [Maybe Integer] +(define (lookup x cenv) + (match cenv + ['() #f] + [(cons y rest) + (match (eq? x y) + [#t 0] + [#f (match (lookup x rest) + [#f #f] + [i (+ 8 i)])])])) + +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; Asm +;; Undo the stack alignment after a call +(define unpad-stack + (seq (Add rsp r15))) diff --git a/langs/mug/values.c b/langs/mug/values.c new file mode 100644 index 00000000..32e922bd --- /dev/null +++ b/langs/mug/values.c @@ -0,0 +1,121 @@ +#include "types.h" +#include "values.h" + +type_t val_typeof(val_t x) +{ + switch (x & ptr_type_mask) { + case box_type_tag: + return T_BOX; + case cons_type_tag: + return T_CONS; + case vect_type_tag: + return T_VECT; + case str_type_tag: + return T_STR; + case symb_type_tag: + return T_SYMB; + case proc_type_tag: + return T_PROC; + } + + if ((int_type_mask & x) == int_type_tag) + return T_INT; + if ((char_type_mask & x) == char_type_tag) + return T_CHAR; + + switch (x) { + case val_true: + case val_false: + return T_BOOL; + case val_eof: + return T_EOF; + case val_void: + return T_VOID; + case val_empty: + return T_EMPTY; + } + + return T_INVALID; +} + +int64_t val_unwrap_int(val_t x) +{ + return x >> int_shift; +} +val_t val_wrap_int(int64_t i) +{ + return (i << int_shift) | int_type_tag; +} + +int val_unwrap_bool(val_t x) +{ + return x == val_true; +} +val_t val_wrap_bool(int b) +{ + return b ? val_true : val_false; +} + +val_char_t val_unwrap_char(val_t x) +{ + return (val_char_t)(x >> char_shift); +} +val_t val_wrap_char(val_char_t c) +{ + return (((val_t)c) << char_shift) | char_type_tag; +} + +val_t val_wrap_eof(void) +{ + return val_eof; +} + +val_t val_wrap_void(void) +{ + return val_void; +} + +val_box_t* val_unwrap_box(val_t x) +{ + return (val_box_t *)(x ^ box_type_tag); +} +val_t val_wrap_box(val_box_t* b) +{ + return ((val_t)b) | box_type_tag; +} + +val_cons_t* val_unwrap_cons(val_t x) +{ + return (val_cons_t *)(x ^ cons_type_tag); +} +val_t val_wrap_cons(val_cons_t *c) +{ + return ((val_t)c) | cons_type_tag; +} + +val_vect_t* val_unwrap_vect(val_t x) +{ + return (val_vect_t *)(x ^ vect_type_tag); +} +val_t val_wrap_vect(val_vect_t *v) +{ + return ((val_t)v) | vect_type_tag; +} + +val_str_t* val_unwrap_str(val_t x) +{ + return (val_str_t *)(x ^ str_type_tag); +} +val_t val_wrap_str(val_str_t *v) +{ + return ((val_t)v) | str_type_tag; +} + +val_symb_t* val_unwrap_symb(val_t x) +{ + return (val_symb_t *)(x ^ symb_type_tag); +} +val_t val_wrap_symb(val_symb_t *v) +{ + return ((val_t)v) | symb_type_tag; +} diff --git a/langs/mug/values.h b/langs/mug/values.h new file mode 100644 index 00000000..c1de09d6 --- /dev/null +++ b/langs/mug/values.h @@ -0,0 +1,84 @@ +#ifndef VALUES_H +#define VALUES_H + +#include + +/* any abstract value */ +typedef int64_t val_t; + +typedef enum type_t { + T_INVALID = -1, + /* immediates */ + T_INT, + T_BOOL, + T_CHAR, + T_EOF, + T_VOID, + T_EMPTY, + /* pointers */ + T_BOX, + T_CONS, + T_VECT, + T_STR, + T_SYMB, + T_PROC, +} type_t; + +typedef uint32_t val_char_t; +typedef struct val_box_t { + val_t val; +} val_box_t; +typedef struct val_cons_t { + val_t snd; + val_t fst; +} val_cons_t; +typedef struct val_vect_t { + uint64_t len; + val_t elems[]; +} val_vect_t; +typedef struct val_str_t { + uint64_t len; + val_char_t codepoints[]; +} val_str_t; +typedef struct val_symb_t { + uint64_t len; + val_char_t codepoints[]; +} val_symb_t; + +/* return the type of x */ +type_t val_typeof(val_t x); + +/** + * Wrap/unwrap values + * + * The behavior of unwrap functions are undefined on type mismatch. + */ +int64_t val_unwrap_int(val_t x); +val_t val_wrap_int(int64_t i); + +int val_unwrap_bool(val_t x); +val_t val_wrap_bool(int b); + +val_char_t val_unwrap_char(val_t x); +val_t val_wrap_char(val_char_t b); + +val_t val_wrap_eof(); + +val_t val_wrap_void(); + +val_box_t* val_unwrap_box(val_t x); +val_t val_wrap_box(val_box_t* b); + +val_cons_t* val_unwrap_cons(val_t x); +val_t val_wrap_cons(val_cons_t* c); + +val_vect_t* val_unwrap_vect(val_t x); +val_t val_wrap_vect(val_vect_t* c); + +val_str_t* val_unwrap_str(val_t x); +val_t val_wrap_str(val_str_t* c); + +val_symb_t* val_unwrap_symb(val_t x); +val_t val_wrap_symb(val_symb_t* c); + +#endif diff --git a/langs/neerdowell/Makefile b/langs/neerdowell/Makefile new file mode 100644 index 00000000..ed8a85f4 --- /dev/null +++ b/langs/neerdowell/Makefile @@ -0,0 +1,45 @@ +UNAME := $(shell uname) + +ifeq ($(UNAME), Darwin) + format=macho64 + CC=arch -x86_64 gcc +else + format=elf64 + CC=gcc +endif + +objs = \ + main.o \ + print.o \ + values.o \ + io.o \ + symbol.o + +default: submit.zip + +submit.zip: + zip submit.zip -r * \ + -x \*.[os] -x \*~ -x \*zip \ + -x \*Zone.Identifier -x \*\*compiled\*\* + +runtime.o: $(objs) + ld -r $(objs) -o runtime.o + +%.run: %.o runtime.o + $(CC) runtime.o $< -o $@ + +.c.o: + $(CC) -fPIC -c -g -o $@ $< + +.s.o: + nasm -g -f $(format) -o $@ $< + +%.s: %.rkt + cat $< | racket -t compile-stdin.rkt -m > $@ + +clean: + @$(RM) *.o *.s *.run ||: + @echo "$(shell basename $(shell pwd)): cleaned!" + +%.test: %.run %.rkt + @test "$(shell ./$(<))" = "$(shell racket $(word 2,$^))" diff --git a/langs/neerdowell/ast.rkt b/langs/neerdowell/ast.rkt new file mode 100644 index 00000000..03a88ae7 --- /dev/null +++ b/langs/neerdowell/ast.rkt @@ -0,0 +1,80 @@ +#lang racket +(provide (all-defined-out)) + +;; type Prog = (Prog (Listof Defn) Expr) +(struct Prog (ds e) #:prefab) + +;; type Defn = (Defn Id (Listof Id) Expr) +(struct Defn (f xs e) #:prefab) + +;; type Expr = (Eof) +;; | (Quote Datum) +;; | (Prim Op (Listof Expr)) +;; | (If Expr Expr Expr) +;; | (Begin Expr Expr) +;; | (Let Id Expr Expr) +;; | (Var Id) +;; | (Match Expr (Listof Pat) (Listof Expr)) +;; | (App Expr (Listof Expr)) +;; | (Lam Id (Listof Id) Expr) +;; type Datum = Integer +;; | Char +;; | Boolean +;; | String +;; | Symbol +;; | (Boxof Datum) +;; | (Listof Datum) +;; | (Vectorof Datum) +;; type Id = Symbol +;; type Op = Op0 | Op1 | Op2 | Op3 +;; type Op0 = 'read-byte +;; type Op1 = 'add1 | 'sub1 | 'zero? +;; | 'char? | 'integer->char | 'char->integer +;; | 'write-byte | 'eof-object? +;; | 'box | 'car | 'cdr | 'unbox +;; | 'empty? | 'cons? | 'box? +;; | 'vector? | 'vector-length +;; | 'string? | 'string-length +;; | 'symbol? | 'symbol->string +;; | 'string->symbol | 'string->uninterned-symbol +;; type Op2 = '+ | '- | '< | '= +;; | 'cons +;; | 'make-vector | 'vector-ref +;; | 'make-string | 'string-ref +;; | 'struct? +;; type Op3 = 'vector-set! | 'struct-ref +;; type OpN = 'make-struct +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; | (PSymb Symbol) +;; | (PStr String) +;; | (PStruct Id (Listof Pat)) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() + +(struct Eof () #:prefab) +(struct Prim (p es) #:prefab) +(struct If (e1 e2 e3) #:prefab) +(struct Begin (e1 e2) #:prefab) +(struct Let (x e1 e2) #:prefab) +(struct Var (x) #:prefab) +(struct App (e es) #:prefab) +(struct Lam (f xs e) #:prefab) +(struct Quote (d) #:prefab) +(struct Match (e ps es) #:prefab) + +(struct PVar (x) #:prefab) +(struct PWild () #:prefab) +(struct PLit (x) #:prefab) +(struct PBox (p) #:prefab) +(struct PCons (p1 p2) #:prefab) +(struct PAnd (p1 p2) #:prefab) +(struct PSymb (s) #:prefab) +(struct PStr (s) #:prefab) +(struct PStruct (n ps) #:prefab) diff --git a/langs/neerdowell/build-runtime.rkt b/langs/neerdowell/build-runtime.rkt new file mode 100644 index 00000000..66aad89f --- /dev/null +++ b/langs/neerdowell/build-runtime.rkt @@ -0,0 +1,14 @@ +#lang racket +(require racket/runtime-path) +(provide runtime-path) + +(define-runtime-path here ".") + +(void + (system (string-append "make -C '" + (path->string (normalize-path here)) + "' runtime.o"))) + +(define runtime-path + (path->string + (normalize-path (build-path here "runtime.o")))) diff --git a/langs/neerdowell/char.c b/langs/neerdowell/char.c new file mode 100644 index 00000000..d11f16e0 --- /dev/null +++ b/langs/neerdowell/char.c @@ -0,0 +1,57 @@ +#include +#include +#include "types.h" + +void print_codepoint(int64_t); + +void print_char (int64_t v) { + int64_t codepoint = v >> char_shift; + printf("#\\"); + switch (codepoint) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(v); + } +} + +void print_codepoint(int64_t v) { + int64_t codepoint = v >> char_shift; + // Print using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (codepoint < 128) { + printf("%c", (char) codepoint); + } else if (codepoint < 2048) { + printf("%c%c", + (char)(codepoint >> 6) | 192, + ((char)codepoint & 63) | 128); + } else if (codepoint < 65536) { + printf("%c%c%c", + (char)(codepoint >> 12) | 224, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } else { + printf("%c%c%c%c", + (char)(codepoint >> 18) | 240, + ((char)(codepoint >> 12) & 63) | 128, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } +} + diff --git a/langs/neerdowell/compile-datum.rkt b/langs/neerdowell/compile-datum.rkt new file mode 100644 index 00000000..90f8170f --- /dev/null +++ b/langs/neerdowell/compile-datum.rkt @@ -0,0 +1,88 @@ +#lang racket +(provide compile-datum) +(require "types.rkt" + "utils.rkt" + a86/ast) + +;; Registers used +(define rax 'rax) ; return + +;; Datum -> Asm +(define (compile-datum d) + (cond + [(string? d) (seq (Lea rax (load-string d)))] + [(symbol? d) (seq (Lea rax (load-symbol d)))] + [(compound? d) (compile-compound-datum d)] + [else (compile-atom d)])) + +(define (load-symbol s) + (Plus (symbol->data-label s) type-symb)) + +(define (load-string s) + (Plus (symbol->data-label (string->symbol s)) type-str)) + +;; Value -> Asm +(define (compile-atom v) + (seq (Mov rax (value->bits v)))) + +;; Datum -> Boolean +(define (compound? d) + (or (box? d) + (cons? d) + (vector? d))) + +;; Datum -> Asm +(define (compile-compound-datum d) + (match (compile-quoted d) + [(cons l is) + (seq (Data) + is + (Text) + (Lea rax l))])) + +;; Datum -> (cons AsmExpr Asm) +(define (compile-quoted c) + (cond + [(vector? c) (compile-datum-vector (vector->list c))] + [(box? c) (compile-datum-box (unbox c))] + [(cons? c) (compile-datum-cons (car c) (cdr c))] + [(symbol? c) (cons (load-symbol c) '())] + [(string? c) (cons (load-string c) '())] + [else (cons (value->bits c) '())])) + +;; Datum -> (cons AsmExpr Asm) +(define (compile-datum-box c) + (match (compile-quoted c) + [(cons l1 is1) + (let ((l (gensym 'box))) + (cons (Plus l type-box) + (seq (Label l) + (Dq l1) + is1)))])) + +;; Datum Datum -> (cons AsmExpr Asm) +(define (compile-datum-cons c1 c2) + (match (compile-quoted c1) + [(cons l1 is1) + (match (compile-quoted c2) + [(cons l2 is2) + (let ((l (gensym 'cons))) + (cons (Plus l type-cons) + (seq (Label l) + (Dq l2) + (Dq l1) + is1 + is2)))])])) + +;; [Listof Datum] -> (cons AsmExpr Asm) +(define (compile-datum-vector ds) + (match ds + ['() (cons type-vect '())] + [_ + (let ((l (gensym 'vector)) + (cds (map compile-quoted ds))) + (cons (Plus l type-vect) + (seq (Label l) + (Dq (length ds)) + (map (λ (cd) (Dq (car cd))) cds) + (append-map cdr cds))))])) diff --git a/langs/neerdowell/compile-define.rkt b/langs/neerdowell/compile-define.rkt new file mode 100644 index 00000000..354e6f26 --- /dev/null +++ b/langs/neerdowell/compile-define.rkt @@ -0,0 +1,69 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "fv.rkt" + "utils.rkt" + "compile-expr.rkt" + a86/ast) + +;; [Listof Defn] -> [Listof Id] +(define (define-ids ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (cons f (define-ids ds))])) + +;; [Listof Defn] -> Asm +(define (compile-defines ds) + (match ds + ['() (seq)] + [(cons d ds) + (seq (compile-define d) + (compile-defines ds))])) + +;; Defn -> Asm +(define (compile-define d) + (match d + [(Defn f xs e) + (compile-lambda-define (Lam f xs e))])) + +;; Defns -> Asm +;; Compile the closures for ds and push them on the stack +(define (compile-defines-values ds) + (seq (alloc-defines ds 0) + (init-defines ds (reverse (define-ids ds)) 8) + (add-rbx-defines ds 0))) + +;; Defns Int -> Asm +;; Allocate closures for ds at given offset, but don't write environment yet +(define (alloc-defines ds off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx off) rax) + (Mov rax rbx) + (Add rax off) + (Or rax type-proc) + (Push rax) + (alloc-defines ds (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns CEnv Int -> Asm +;; Initialize the environment for each closure for ds at given offset +(define (init-defines ds c off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (free-vars-to-heap fvs c off) + (init-defines ds c (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns Int -> Asm +;; Compute adjustment to rbx for allocation of all ds +(define (add-rbx-defines ds n) + (match ds + ['() (seq (Add rbx (* n 8)))] + [(cons (Defn f xs e) ds) + (add-rbx-defines ds (+ n (add1 (length (fv (Lam f xs e))))))])) diff --git a/langs/neerdowell/compile-expr.rkt b/langs/neerdowell/compile-expr.rkt new file mode 100644 index 00000000..e54a789d --- /dev/null +++ b/langs/neerdowell/compile-expr.rkt @@ -0,0 +1,347 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-ops.rkt" + "compile-datum.rkt" + a86/ast) + +;; Registers used +(define rax 'rax) ; return +(define rbx 'rbx) ; heap +(define rsp 'rsp) ; stack +(define rdi 'rdi) ; arg + +;; Expr CEnv Bool -> Asm +(define (compile-e e c t?) + (match e + [(Quote d) (compile-datum d)] + [(Eof) (seq (Mov rax (value->bits eof)))] + [(Var x) (compile-variable x c)] + [(Prim p es) (compile-prim p es c)] + [(If e1 e2 e3) (compile-if e1 e2 e3 c t?)] + [(Begin e1 e2) (compile-begin e1 e2 c t?)] + [(Let x e1 e2) (compile-let x e1 e2 c t?)] + [(App e es) (compile-app e es c t?)] + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) + +;; Id CEnv -> Asm +(define (compile-variable x c) + (match (lookup x c) + [#f (error "unbound variable")] ;(seq (Lea rax (symbol->label x)))] + [i (seq (Mov rax (Offset rsp i)))])) + +;; Op (Listof Expr) CEnv -> Asm +(define (compile-prim p es c) + (seq (compile-es* es c) + (match p + ['make-struct (compile-make-struct (length es))] + [_ (compile-op p)]))) + +;; Expr Expr Expr CEnv Bool -> Asm +(define (compile-if e1 e2 e3 c t?) + (let ((l1 (gensym 'if)) + (l2 (gensym 'if))) + (seq (compile-e e1 c #f) + (Cmp rax (value->bits #f)) + (Je l1) + (compile-e e2 c t?) + (Jmp l2) + (Label l1) + (compile-e e3 c t?) + (Label l2)))) + +;; Expr Expr CEnv Bool -> Asm +(define (compile-begin e1 e2 c t?) + (seq (compile-e e1 c #f) + (compile-e e2 c t?))) + +;; Id Expr Expr CEnv Bool -> Asm +(define (compile-let x e1 e2 c t?) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons x c) t?) + (Add rsp 8))) + +;; Id [Listof Expr] CEnv Bool -> Asm +(define (compile-app f es c t?) + ;(compile-app-nontail f es c) + (if t? + (compile-app-tail f es c) + (compile-app-nontail f es c))) + +;; Expr [Listof Expr] CEnv -> Asm +(define (compile-app-tail e es c) + (seq (compile-es (cons e es) c) + (move-args (add1 (length es)) (length c)) + (Add rsp (* 8 (length c))) + (Mov rax (Offset rsp (* 8 (length es)))) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) + (Jmp rax))) + +;; Integer Integer -> Asm +(define (move-args i off) + (cond [(zero? off) (seq)] + [(zero? i) (seq)] + [else + (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) + (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) + (move-args (sub1 i) off))])) + +;; Expr [Listof Expr] CEnv -> Asm +;; The return address is placed above the arguments, so callee pops +;; arguments and return address is next frame +(define (compile-app-nontail e es c) + (let ((r (gensym 'ret)) + (i (* 8 (length es)))) + (seq (Lea rax r) + (Push rax) + (compile-es (cons e es) (cons #f c)) + (Mov rax (Offset rsp i)) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) ; fetch the code label + (Jmp rax) + (Label r)))) + +;; Id [Listof Id] Expr CEnv -> Asm +(define (compile-lam f xs e c) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx 0) rax) + (free-vars-to-heap fvs c 8) + (Mov rax rbx) ; return value + (Or rax type-proc) + (Add rbx (* 8 (add1 (length fvs))))))) + +;; [Listof Id] CEnv Int -> Asm +;; Copy the values of given free variables into the heap at given offset +(define (free-vars-to-heap fvs c off) + (match fvs + ['() (seq)] + [(cons x fvs) + (seq (Mov r8 (Offset rsp (lookup x c))) + (Mov (Offset rbx off) r8) + (free-vars-to-heap fvs c (+ off 8)))])) + +;; [Listof Lam] -> Asm +(define (compile-lambda-defines ls) + (match ls + ['() (seq)] + [(cons l ls) + (seq (compile-lambda-define l) + (compile-lambda-defines ls))])) + +;; Lam -> Asm +(define (compile-lambda-define l) + (let ((fvs (fv l))) + (match l + [(Lam f xs e) + (let ((env (append (reverse fvs) (reverse xs) (list #f)))) + (seq (Label (symbol->label f)) + (Mov rax (Offset rsp (* 8 (length xs)))) + (Xor rax type-proc) + (copy-env-to-stack fvs 8) + (compile-e e env #t) + (Add rsp (* 8 (length env))) ; pop env + (Ret)))]))) + +;; [Listof Id] Int -> Asm +;; Copy the closure environment at given offset to stack +(define (copy-env-to-stack fvs off) + (match fvs + ['() (seq)] + [(cons _ fvs) + (seq (Mov r9 (Offset rax off)) + (Push r9) + (copy-env-to-stack fvs (+ 8 off)))])) + +;; [Listof Expr] CEnv -> Asm +(define (compile-es es c) + (match es + ['() '()] + [(cons e es) + (seq (compile-e e c #f) + (Push rax) + (compile-es es (cons #f c)))])) + +;; [Listof Expr] CEnv -> Asm +;; Like compile-es, but leave last subexpression in rax (if exists) +(define (compile-es* es c) + (match es + ['() '()] + [(cons e '()) + (compile-e e c #f)] + [(cons e es) + (seq (compile-e e c #f) + (Push rax) + (compile-es* es (cons #f c)))])) + +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c #f) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) cm)] + [(PVar x) + (list (seq (Push rax)) (cons x cm))] + [(PStr s) + (let ((ok (gensym)) + (fail (gensym))) + (list (seq (Lea rdi (symbol->data-label (string->symbol s))) + (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-str) + (Je ok) + (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok) + (Xor rax type-str) + (Mov rsi rax) + pad-stack + (Call 'symb_cmp) + unpad-stack + (Cmp rax 0) + (Jne fail)) + cm))] + [(PSymb s) + (let ((ok (gensym))) + (list (seq (Lea r9 (Plus (symbol->data-label s) type-symb)) + (Cmp rax r9) + (Je ok) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok)) + cm))] + [(PLit l) + (let ((ok (gensym))) + (list (seq (Cmp rax (value->bits l)) + (Je ok) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 cm1) + (let ((ok (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Je ok) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next) + (Label ok) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 cm2) + (let ((ok (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Je ok) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next) + (Label ok) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + cm2))])])] + [(PStruct n ps) + (match (compile-struct-patterns ps (cons #f cm) next 1 (add1 (length cm))) + [(list i cm1) + (let ((ok (gensym)) + (fail (gensym))) + (list + (seq (%%% "struct") + (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-struct) + (Je ok) + (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok) + (Xor rax type-struct) + (Mov r8 (Offset rax 0)) + (Lea r9 (Plus (symbol->data-label n) type-symb)) + (Cmp r8 r9) + (Jne fail) + (Push rax) + i) + cm1))])])) + +;; [Listof Pat] CEnv Symbol Nat Nat -> (list Asm CEnv) +(define (compile-struct-patterns ps cm next i cm0-len) + (match ps + ['() (list (seq) cm)] + [(cons p ps) + (match (compile-pattern p cm next) + [(list i1 cm1) + (match (compile-struct-patterns ps cm1 next (add1 i) cm0-len) + [(list is cmn) + (list + (seq (Mov rax (Offset rax (* 8 i))) + i1 + (Mov rax (Offset rsp (* 8 (- (length cm1) cm0-len)))) + is) + cmn)])])])) diff --git a/langs/neerdowell/compile-literals.rkt b/langs/neerdowell/compile-literals.rkt new file mode 100644 index 00000000..7530b302 --- /dev/null +++ b/langs/neerdowell/compile-literals.rkt @@ -0,0 +1,119 @@ +#lang racket +(provide compile-literals init-symbol-table literals) +(require "ast.rkt" + "utils.rkt" + a86/ast) + +(define rdi 'rdi) + +;; Prog -> Asm +(define (compile-literals p) + (append-map compile-literal (literals p))) + +;; Symbol -> Asm +(define (compile-literal s) + (let ((str (symbol->string s))) + (seq (Label (symbol->data-label s)) + (Dq (string-length str)) + (compile-string-chars (string->list str)) + (if (odd? (string-length str)) + (seq (Dd 0)) + (seq))))) + +;; Prog -> Asm +;; Call intern_symbol on every symbol in the program +(define (init-symbol-table p) + (match (symbols p) + ['() (seq)] + [ss (seq (Sub 'rsp 8) + (append-map init-symbol ss) + (Add 'rsp 8))])) + +;; Symbol -> Asm +(define (init-symbol s) + (seq (Lea rdi (symbol->data-label s)) + (Call 'intern_symbol))) + +;; Prog -> [Listof Symbol] +(define (literals p) + (remove-duplicates + (map to-symbol (literals* p)))) + +;; Prog -> [Listof Symbol] +(define (symbols p) + (remove-duplicates (filter symbol? (literals* p)))) + +;; (U String Symbol) -> Symbol +(define (to-symbol s) + (if (string? s) + (string->symbol s) + s)) + +;; Prog -> [Listof (U Symbol String)] +(define (literals* p) + (match p + [(Prog ds e) + (append (append-map literals-d ds) (literals-e e))])) + +;; Defn -> [Listof (U Symbol String)] +(define (literals-d d) + (match d + [(Defn f xs e) + (literals-e e)])) + +;; Expr -> [Listof (U Symbol String)] +(define (literals-e e) + (match e + [(Quote d) (literals-datum d)] + [(Prim p es) + (append-map literals-e es)] + [(If e1 e2 e3) + (append (literals-e e1) (literals-e e2) (literals-e e3))] + [(Begin e1 e2) + (append (literals-e e1) (literals-e e2))] + [(Let x e1 e2) + (append (literals-e e1) (literals-e e2))] + [(App e1 es) + (append (literals-e e1) (append-map literals-e es))] + [(Lam f xs e) + (literals-e e)] + [(Match e ps es) + (append (literals-e e) (append-map literals-match-clause ps es))] + [_ '()])) + +;; Pat Expr -> [Listof Symbol] +(define (literals-match-clause p e) + (append (literals-pat p) (literals-e e))) + +;; Pat -> [Listof (U Symbol String)] +(define (literals-pat p) + (match p + [(PSymb s) (list s)] + [(PStr s) (list s)] + [(PBox p) (literals-pat p)] + [(PCons p1 p2) (append (literals-pat p1) (literals-pat p2))] + [(PAnd p1 p2) (append (literals-pat p1) (literals-pat p2))] + [(PStruct t ps) (append-map literals-pat ps)] + [_ '()])) + +;; Datum -> [Listof (U Symbol String)] +(define (literals-datum d) + (cond + [(string? d) (list d)] + [(symbol? d) (list d)] + [(cons? d) + (append (literals-datum (car d)) + (literals-datum (cdr d)))] + [(box? d) + (literals-datum (unbox d))] + [(vector? d) + (append-map literals-datum (vector->list d))] + [else '()])) + +;; [Listof Char] -> Asm +(define (compile-string-chars cs) + (match cs + ['() (seq)] + [(cons c cs) + (seq (Dd (char->integer c)) + (compile-string-chars cs))])) diff --git a/langs/neerdowell/compile-ops.rkt b/langs/neerdowell/compile-ops.rkt new file mode 100644 index 00000000..23386fc7 --- /dev/null +++ b/langs/neerdowell/compile-ops.rkt @@ -0,0 +1,444 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" "types.rkt" "utils.rkt" a86/ast) + +(define rax 'rax) ; return +(define eax 'eax) ; 32-bit load/store +(define rbx 'rbx) ; heap +(define rdi 'rdi) ; arg1 +(define rsi 'rsi) ; arg2 +(define rdx 'rdx) ; arg3 +(define r8 'r8) ; scratch +(define r9 'r9) ; scratch +(define r10 'r10) ; scratch +(define r12 'r12) ; save across call to memcpy +(define r15 'r15) ; stack pad (non-volatile) +(define rsp 'rsp) ; stack + +;; Op -> Asm +(define (compile-op p) + (match p + ;; Op0 + ['void (seq (Mov rax (value->bits (void))))] + ['read-byte (seq pad-stack + (Call 'read_byte) + unpad-stack)] + ['peek-byte (seq pad-stack + (Call 'peek_byte) + unpad-stack)] + ;; Op1 + ['add1 + (seq (assert-integer rax) + (Add rax (value->bits 1)))] + ['sub1 + (seq (assert-integer rax) + (Sub rax (value->bits 1)))] + ['zero? + (seq (assert-integer rax) + (eq-imm 0))] + ['char? + (type-pred mask-char type-char)] + ['char->integer + (seq (assert-char rax) + (Sar rax char-shift) + (Sal rax int-shift))] + ['integer->char + (seq (assert-codepoint rax) + (Sar rax int-shift) + (Sal rax char-shift) + (Xor rax type-char))] + ['eof-object? (eq-imm eof)] + ['write-byte + (seq (assert-byte rax) + pad-stack + (Mov rdi rax) + (Call 'write_byte) + unpad-stack)] + ['box + (seq (Mov (Offset rbx 0) rax) + (Mov rax rbx) + (Or rax type-box) + (Add rbx 8))] + ['unbox + (seq (assert-box rax) + (Xor rax type-box) + (Mov rax (Offset rax 0)))] + ['car + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 8)))] + ['cdr + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 0)))] + ['empty? (eq-imm '())] + ['box? + (type-pred ptr-mask type-box)] + ['cons? + (type-pred ptr-mask type-cons)] + ['vector? + (type-pred ptr-mask type-vect)] + ['string? + (type-pred ptr-mask type-str)] + ['symbol? + (type-pred ptr-mask type-symb)] + ['vector-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-vector rax) + (Xor rax type-vect) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))] + ['string-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-string rax) + (Xor rax type-str) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))] + ['string->symbol + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + pad-stack + (Call 'intern_symbol) + unpad-stack + (Or rax type-symb))] + ['symbol->string + (seq (assert-symbol rax) + (Xor rax type-symb) + char-array-copy + (Or rax type-str))] + ['string->uninterned-symbol + (seq (assert-string rax) + (Xor rax type-str) + char-array-copy + (Or rax type-symb))] + + ;; Op2 + ['+ + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Add rax r8))] + ['- + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Sub r8 rax) + (Mov rax r8))] + ['< + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax (value->bits #t)) + (let ((true (gensym))) + (seq (Jl true) + (Mov rax (value->bits #f)) + (Label true))))] + ['= + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax (value->bits #t)) + (let ((true (gensym))) + (seq (Je true) + (Mov rax (value->bits #f)) + (Label true))))] + ['cons + (seq (Mov (Offset rbx 0) rax) + (Pop rax) + (Mov (Offset rbx 8) rax) + (Mov rax rbx) + (Or rax type-cons) + (Add rbx 16))] + ['eq? + (seq (Pop r8) + (eq r8 rax))] + ['make-vector + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (Cmp r8 0) ; special case empty vector + (Je empty) + + (Mov r9 rbx) + (Or r9 type-vect) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Label loop) + (Mov (Offset rbx 0) rax) + (Add rbx 8) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-vect) + (Label done)))] + + ['vector-ref + (seq (Pop r8) + (assert-vector r8) + (assert-integer rax) + (Cmp r8 type-vect) + (Je 'raise_error_align) ; special case for empty vector + (Cmp rax 0) + (Jl 'raise_error_align) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'raise_error_align) + (Sal rax 3) + (Add r8 rax) + (Mov rax (Offset r8 8)))] + + ['make-string + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (assert-char rax) + (Cmp r8 0) ; special case empty string + (Je empty) + + (Mov r9 rbx) + (Or r9 type-str) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Sar rax char-shift) + + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd + + (Label loop) + (Mov (Offset rbx 0) eax) + (Add rbx 4) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-str) + (Label done)))] + + ['string-ref + (seq (Pop r8) + (assert-string r8) + (assert-integer rax) + (Cmp r8 type-str) + (Je 'raise_error_align) ; special case for empty string + (Cmp rax 0) + (Jl 'raise_error_align) + (Xor r8 type-str) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'raise_error_align) + (Sal rax 2) + (Add r8 rax) + (Mov 'eax (Offset r8 8)) + (Sal rax char-shift) + (Or rax type-char))] + + ['struct? + (let ((f (gensym)) + (t (gensym))) + (seq (Pop r8) + ; (assert-symbol r8) ; don't need to do this we generated the code + (Mov r9 rax) + (And r9 ptr-mask) + (Cmp r9 type-struct) + (Jne f) + (Xor rax type-struct) + (Mov rax (Offset rax 0)) + (Cmp r8 rax) + (Mov rax (value->bits #t)) + (Jne f) + (Jmp t) + (Label f) + (Mov rax (value->bits #f)) + (Label t)))] + + ;; Op3 + ['vector-set! + (seq (Pop r10) + (Pop r8) + (assert-vector r8) + (assert-integer r10) + (Cmp r10 0) + (Jl 'raise_error_align) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar r10 int-shift) ; r10 = index + (Sub r9 1) + (Cmp r9 r10) + (Jl 'raise_error_align) + (Sal r10 3) + (Add r8 r10) + (Mov (Offset r8 8) rax) + (Mov rax (value->bits (void))))] + + ['struct-ref ; symbol, int, struct + (seq (Pop r8) + (Pop 'r11) + (assert-struct rax) + ;(assert-integer r8) + (Xor rax type-struct) + (Mov r10 (Offset rax 0)) + (Cmp 'r11 r10) + (Jne 'raise_error_align) + (Sar r8 int-shift) + (Add r8 1) + (Sal r8 3) + (Add rax r8) + (Mov rax (Offset rax 0)))])) + +;; Nat -> Asm +;; Emit instructions for creating a structure of length n +;; using values on top of stack +(define (compile-make-struct n) + (seq (compile-make-struct/a n 1) + (Mov rax rbx) + (Or rax type-struct) + (Add rbx (* 8 n)))) + +;; Nat Nat -> Asm +;; Pop elements off stack, writing them to heap +(define (compile-make-struct/a n i) + (if (= n i) + (seq (Mov (Offset rbx (* 8 (- n i))) rax)) + (seq (Mov (Offset rbx (* 8 (- n i))) rax) + (Pop rax) + (compile-make-struct/a n (add1 i))))) + +;; Asm +;; Copy sized array of characters pointed to by rax +(define char-array-copy + (seq (Mov rdi rbx) ; dst + (Mov rsi rax) ; src + (Mov rdx (Offset rax 0)) ; len + (Add rdx 1) ; #words = 1 + (len+1)/2 + (Sar rdx 1) + (Add rdx 1) + (Sal rdx 3) ; #bytes = 8*#words + (Mov r12 rdx) ; save rdx before destroyed + pad-stack + (Call 'memcpy) + unpad-stack + ; rbx should be preserved by memcpy + ;(Mov rbx rax) ; dst is returned, install as heap pointer + (Add rbx r12))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (assert-type mask type) + (λ (arg) + (seq (Mov r9 arg) + (And r9 mask) + (Cmp r9 type) + (Jne 'raise_error_align)))) + +(define (type-pred mask type) + (let ((l (gensym))) + (seq (And rax mask) + (Cmp rax type) + (Mov rax (value->bits #t)) + (Je l) + (Mov rax (value->bits #f)) + (Label l)))) + +(define assert-integer + (assert-type mask-int type-int)) +(define assert-char + (assert-type mask-char type-char)) +(define assert-box + (assert-type ptr-mask type-box)) +(define assert-cons + (assert-type ptr-mask type-cons)) +(define assert-vector + (assert-type ptr-mask type-vect)) +(define assert-string + (assert-type ptr-mask type-str)) +(define assert-symbol + (assert-type ptr-mask type-symb)) +(define assert-proc + (assert-type ptr-mask type-proc)) +(define assert-struct + (assert-type ptr-mask type-struct)) + +(define (assert-codepoint r) + (let ((ok (gensym))) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align) + (Cmp r (value->bits 1114111)) + (Jg 'raise_error_align) + (Cmp r (value->bits 55295)) + (Jl ok) + (Cmp r (value->bits 57344)) + (Jg ok) + (Jmp 'raise_error_align) + (Label ok)))) + +(define (assert-byte r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align) + (Cmp r (value->bits 255)) + (Jg 'raise_error_align))) + +(define (assert-natural r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'raise_error_align))) + +;; Value -> Asm +(define (eq-imm imm) + (let ((l1 (gensym))) + (seq (Cmp rax (value->bits imm)) + (Mov rax (value->bits #t)) + (Je l1) + (Mov rax (value->bits #f)) + (Label l1)))) + +(define (eq ir1 ir2) + (let ((l1 (gensym))) + (seq (Cmp ir1 ir2) + (Mov rax (value->bits #t)) + (Je l1) + (Mov rax (value->bits #f)) + (Label l1)))) diff --git a/langs/neerdowell/compile-stdin.rkt b/langs/neerdowell/compile-stdin.rkt new file mode 100644 index 00000000..cfa15106 --- /dev/null +++ b/langs/neerdowell/compile-stdin.rkt @@ -0,0 +1,10 @@ +#lang racket +(provide main) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile (parse (read-all))))) diff --git a/langs/neerdowell/compile.rkt b/langs/neerdowell/compile.rkt new file mode 100644 index 00000000..7ab2e884 --- /dev/null +++ b/langs/neerdowell/compile.rkt @@ -0,0 +1,53 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-define.rkt" + "compile-expr.rkt" + "compile-literals.rkt" + a86/ast) + +;; Registers used +(define rbx 'rbx) ; heap +(define rsp 'rsp) ; stack +(define rdi 'rdi) ; arg +(define r15 'r15) ; stack pad (non-volatile) + +;; type CEnv = (Listof [Maybe Id]) + +;; Prog -> Asm +(define (compile p) + (match p + [(Prog ds e) + (prog (externs) + (Global 'entry) + (Label 'entry) + (Push rbx) ; save callee-saved register + (Push r15) + (Mov rbx rdi) ; recv heap pointer + (init-symbol-table p) + (compile-defines-values ds) + (compile-e e (reverse (define-ids ds)) #f) + (Add rsp (* 8 (length ds))) ;; pop function definitions + (Pop r15) ; restore callee-save register + (Pop rbx) + (Ret) + (compile-defines ds) + (compile-lambda-defines (lambdas p)) + (Label 'raise_error_align) + pad-stack + (Call 'raise_error) + (Data) + (compile-literals p))])) + +(define (externs) + (seq (Extern 'peek_byte) + (Extern 'read_byte) + (Extern 'write_byte) + (Extern 'raise_error) + (Extern 'intern_symbol) + (Extern 'symb_cmp) + (Extern 'memcpy))) diff --git a/langs/neerdowell/env.rkt b/langs/neerdowell/env.rkt new file mode 100644 index 00000000..c43be9c3 --- /dev/null +++ b/langs/neerdowell/env.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide lookup ext) + +;; Env Variable -> Answer +(define (lookup env x) + (match env + ['() 'err] + [(cons (list y i) env) + (match (symbol=? x y) + [#t i] + [#f (lookup env x)])])) + +;; Env Variable Value -> Value +(define (ext r x i) + (cons (list x i) r)) \ No newline at end of file diff --git a/langs/neerdowell/fv.rkt b/langs/neerdowell/fv.rkt new file mode 100644 index 00000000..6361687d --- /dev/null +++ b/langs/neerdowell/fv.rkt @@ -0,0 +1,34 @@ +#lang racket +(require "ast.rkt") +(provide fv) + +;; Expr -> [Listof Id] +;; List all of the free variables in e +(define (fv e) + (remove-duplicates (fv* e))) + +(define (fv* e) + (match e + [(Var x) (list x)] + [(Prim p es) (append-map fv* es)] + [(If e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(Begin e1 e2) (append (fv* e1) (fv* e2))] + [(Let x e1 e2) (append (fv* e1) (remq* (list x) (fv* e2)))] + [(App e1 es) (append (fv* e1) (append-map fv* es))] + [(Lam f xs e) (remq* xs (fv* e))] + [(Match e ps es) (append (fv* e) (append-map fv-clause* ps es))] + [_ '()])) + +;; Pat Expr -> [Listof Id] +(define (fv-clause* p e) + (remq* (bv-pat* p) (fv* e))) + +;; Pat -> [Listof Id] +(define (bv-pat* p) + (match p + [(PVar x) (list x)] + [(PCons p1 p2) (append (bv-pat* p1) (bv-pat* p2))] + [(PAnd p1 p2) (append (bv-pat* p1) (bv-pat* p2))] + [(PBox p) (bv-pat* p)] + [(PStruct n ps) (append-map bv-pat* ps)] + [_ '()])) diff --git a/langs/neerdowell/heap.h b/langs/neerdowell/heap.h new file mode 100644 index 00000000..8f2f5e23 --- /dev/null +++ b/langs/neerdowell/heap.h @@ -0,0 +1,9 @@ +#include + +extern int64_t heap[]; +extern int from_side; + +extern char type[]; + +// in words +#define heap_size 1001 diff --git a/langs/neerdowell/interp-defun.rkt b/langs/neerdowell/interp-defun.rkt new file mode 100644 index 00000000..6692231b --- /dev/null +++ b/langs/neerdowell/interp-defun.rkt @@ -0,0 +1,162 @@ +#lang racket +(provide interp interp-env (struct-out Closure) zip) +(require "ast.rkt" + "env.rkt" + "interp-prims.rkt") + +;; type Answer = Value | 'err + +;; type Value = +;; | Integer +;; | Boolean +;; | Character +;; | Eof +;; | Void +;; | '() +;; | (cons Value Value) +;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Closure [Listof Id] Expr Env) +(struct Closure (xs e r) #:prefab) + +;; type REnv = (Listof (List Id Value)) +;; type Defns = (Listof Defn) + +;; Prog -> Answer +(define (interp p) + (match p + [(Prog ds e) + (interp-env e '() ds)])) + +;; Expr Env Defns -> Answer +(define (interp-env e r ds) + (match e + [(Quote d) d] + [(Eof) eof] + [(Var x) (interp-var x r ds)] + [(Prim 'void '()) (void)] + [(Prim 'read-byte '()) (read-byte)] + [(Prim 'peek-byte '()) (peek-byte)] + [(Prim p es) + (match (interp-env* es r ds) + ['err 'err] + [vs (interp-prim p vs)])] + [(If p e1 e2) + (match (interp-env p r ds) + ['err 'err] + [v + (if v + (interp-env e1 r ds) + (interp-env e2 r ds))])] + [(Begin e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [_ (interp-env e2 r ds)])] + [(Let x e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (Closure xs e r)] + [(App e es) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (match f + [(Closure xs e r) + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err)] + [_ 'err])])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PSymb s) (and (eq? s v) r)] + [(PStr s) (and (string? v) (string=? s v) r)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])] + [(PStruct t ps) + (match v + [(StructVal n vs) + (and (eq? t n) + (interp-match-pats ps (vector->list vs) r))] + [_ #f])])) + +;; [Listof Pat] [Listof Val] Env -> [Maybe Env] +(define (interp-match-pats ps vs r) + (match ps + ['() r] + [(cons p ps) + (match vs + [(cons v vs) + (match (interp-match-pat p v r) + [#f #f] + [r1 (interp-match-pats ps vs r1)])])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) + +;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) + +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) diff --git a/langs/extort/interp-io.rkt b/langs/neerdowell/interp-io.rkt similarity index 50% rename from langs/extort/interp-io.rkt rename to langs/neerdowell/interp-io.rkt index 12da1b4b..93f7d3c6 100644 --- a/langs/extort/interp-io.rkt +++ b/langs/neerdowell/interp-io.rkt @@ -2,11 +2,11 @@ (provide interp/io) (require "interp.rkt") -;; Expr String -> (Cons Value String) +;; (Expr String -> String ;; Interpret e with given string as input, ;; collect output as string (including printed result) -(define (interp/io e input) +(define (interp/io e in) (parameterize ((current-output-port (open-output-string)) - (current-input-port (open-input-string input))) - (cons (interp e) - (get-output-string (current-output-port))))) + (current-input-port (open-input-string in))) + (cons (interp e) + (get-output-string (current-output-port))))) diff --git a/langs/neerdowell/interp-prims.rkt b/langs/neerdowell/interp-prims.rkt new file mode 100644 index 00000000..9f4cdfa9 --- /dev/null +++ b/langs/neerdowell/interp-prims.rkt @@ -0,0 +1,85 @@ +#lang racket +(require "ast.rkt") +(provide interp-prim StructVal) + +;; type Struct = (StructVal Symbol (Vectorof Value)) +(struct StructVal (name vals)) + +;; Op [Listof Value] -> Answer +(define (interp-prim p vs) + (match (cons p vs) + ;; Op0 + [(list 'void) (void)] + [(list 'read-byte) (read-byte)] + [(list 'peek-byte) (peek-byte)] + ;; Op1 + [(list 'add1 (? integer? v)) (add1 v)] + [(list 'sub1 (? integer? v)) (sub1 v)] + [(list 'zero? (? integer? v)) (zero? v)] + [(list 'char? v) (char? v)] + [(list 'char->integer (? char? v)) (char->integer v)] + [(list 'integer->char (? codepoint? v)) (integer->char v)] + [(list 'eof-object? v) (eof-object? v)] + [(list 'write-byte (? byte? v)) (write-byte v)] + [(list 'box v) (box v)] + [(list 'unbox (? box? v)) (unbox v)] + [(list 'car (? pair? v)) (car v)] + [(list 'cdr (? pair? v)) (cdr v)] + [(list 'empty? v) (empty? v)] + [(list 'cons? v) (cons? v)] + [(list 'box? v) (box? v)] + [(list 'vector? v) (vector? v)] + [(list 'vector-length (? vector? v)) (vector-length v)] + [(list 'string? v) (string? v)] + [(list 'string-length (? string? v)) (string-length v)] + [(list 'symbol? v) (symbol? v)] + [(list 'symbol->string (? symbol? v)) (symbol->string v)] + [(list 'string->symbol (? string? v)) (string->symbol v)] + [(list 'string->uninterned-symbol (? string? v)) + (string->uninterned-symbol v)] + ;; Op2 + [(list '+ (? integer? v1) (? integer? v2)) (+ v1 v2)] + [(list '- (? integer? v1) (? integer? v2)) (- v1 v2)] + [(list '< (? integer? v1) (? integer? v2)) (< v1 v2)] + [(list '= (? integer? v1) (? integer? v2)) (= v1 v2)] + [(list 'cons v1 v2) (cons v1 v2)] + [(list 'eq? v1 v2) (eq? v1 v2)] + [(list 'make-vector (? integer? v1) v2) + (if (<= 0 v1) + (make-vector v1 v2) + 'err)] + [(list 'vector-ref (? vector? v1) (? integer? v2)) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-ref v1 v2) + 'err)] + [(list 'make-string (? integer? v1) (? char? v2)) + (if (<= 0 v1) + (make-string v1 v2) + 'err)] + [(list 'string-ref (? string? v1) (? integer? v2)) + (if (<= 0 v2 (sub1 (string-length v1))) + (string-ref v1 v2) + 'err)] + [(list 'struct? s v) + (match v + [(StructVal n _) (eq? s n)] + [_ #f])] + ;; Op3 + [(list 'vector-set! (? vector? v1) (? integer? v2) v3) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-set! v1 v2 v3) + 'err)] + [(list 'struct-ref s i (StructVal n vs)) + (if (and (eq? s n) (<= 0 i (sub1 (vector-length vs)))) + (vector-ref vs i) + 'err)] + ;; OpN + [(cons 'make-struct (cons (? symbol? n) vs)) + (StructVal n (list->vector vs))] + [_ 'err])) + +;; Any -> Boolean +(define (codepoint? v) + (and (integer? v) + (or (<= 0 v 55295) + (<= 57344 v 1114111)))) diff --git a/langs/neerdowell/interp-stdin.rkt b/langs/neerdowell/interp-stdin.rkt new file mode 100644 index 00000000..965b9cc4 --- /dev/null +++ b/langs/neerdowell/interp-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide main) +(require "parse.rkt" "interp.rkt" "read-all.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (let ((r (interp (parse (read-all))))) + (unless (void? r) + (println r)))) diff --git a/langs/neerdowell/interp.rkt b/langs/neerdowell/interp.rkt new file mode 100644 index 00000000..1ee5d9f6 --- /dev/null +++ b/langs/neerdowell/interp.rkt @@ -0,0 +1,159 @@ +#lang racket +(provide interp interp-env) +(require "ast.rkt" + "env.rkt" + "interp-prims.rkt") + +;; type Answer = Value | 'err + +;; type Value = +;; | Integer +;; | Boolean +;; | Character +;; | Eof +;; | Void +;; | '() +;; | (cons Value Value) +;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Value ... -> Answer) +;; | (StructVal Symbol (Vectorof Val)) + +;; type REnv = (Listof (List Id Value)) +;; type Defns = (Listof Defn) + +;; Prog -> Answer +(define (interp p) + (match p + [(Prog ds e) + (interp-env e '() ds)])) + +;; Expr Env Defns -> Answer +(define (interp-env e r ds) + (match e + [(Quote d) d] + [(Eof) eof] + [(Var x) (interp-var x r ds)] + [(Prim p es) + (match (interp-env* es r ds) + ['err 'err] + [vs (interp-prim p vs)])] + [(If p e1 e2) + (match (interp-env p r ds) + ['err 'err] + [v + (if v + (interp-env e1 r ds) + (interp-env e2 r ds))])] + [(Begin e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [_ (interp-env e2 r ds)])] + [(Let x e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (λ vs + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err))] + [(App e es) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (if (procedure? f) + (apply f vs) + 'err)])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PSymb s) (and (eq? s v) r)] + [(PStr s) (and (string? v) (string=? s v) r)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])] + [(PStruct t ps) + (match v + [(StructVal n vs) + (and (eq? t n) + (interp-match-pats ps (vector->list vs) r))] + [_ #f])])) + +;; [Listof Pat] [Listof Val] Env -> [Maybe Env] +(define (interp-match-pats ps vs r) + (match ps + ['() r] + [(cons p ps) + (match vs + [(cons v vs) + (match (interp-match-pat p v r) + [#f #f] + [r1 (interp-match-pats ps vs r1)])])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) + +;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) + +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) diff --git a/langs/neerdowell/io.c b/langs/neerdowell/io.c new file mode 100644 index 00000000..7ef82281 --- /dev/null +++ b/langs/neerdowell/io.c @@ -0,0 +1,25 @@ +#include +#include +#include "types.h" +#include "values.h" +#include "runtime.h" + +val_t read_byte(void) +{ + char c = getc(in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); +} + +val_t peek_byte(void) +{ + char c = getc(in); + ungetc(c, in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + +} + +val_t write_byte(val_t c) +{ + putc((char) val_unwrap_int(c), out); + return val_wrap_void(); +} diff --git a/langs/neerdowell/lambdas.rkt b/langs/neerdowell/lambdas.rkt new file mode 100644 index 00000000..092952aa --- /dev/null +++ b/langs/neerdowell/lambdas.rkt @@ -0,0 +1,32 @@ +#lang racket +(require "ast.rkt") +(provide lambdas) + +;; Prog -> [Listof Lam] +;; List all of the lambda expressions in p +(define (lambdas p) + (match p + [(Prog ds e) + (append (lambdas-ds ds) (lambdas-e e))])) + +;; Defns -> [Listof Lam] +;; List all of the lambda expressions in ds +(define (lambdas-ds ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (append (lambdas-e e) + (lambdas-ds ds))])) + +;; Expr -> [Listof Lam] +;; List all of the lambda expressions in e +(define (lambdas-e e) + (match e + [(Prim p es) (append-map lambdas-e es)] + [(If e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(Begin e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Let x e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(App e1 es) (append (lambdas-e e1) (append-map lambdas-e es))] + [(Lam f xs e1) (cons e (lambdas-e e1))] + [(Match e ps es) (append (lambdas-e e) (append-map lambdas-e es))] + [_ '()])) diff --git a/langs/neerdowell/main.c b/langs/neerdowell/main.c new file mode 100644 index 00000000..1ca6115f --- /dev/null +++ b/langs/neerdowell/main.c @@ -0,0 +1,40 @@ +#include +#include +#include "values.h" +#include "print.h" +#include "runtime.h" + +FILE* in; +FILE* out; +void (*error_handler)(); +val_t *heap; + +void error_exit() +{ + printf("err\n"); + exit(1); +} + +void raise_error() +{ + return error_handler(); +} + +int main(int argc, char** argv) +{ + in = stdin; + out = stdout; + error_handler = &error_exit; + heap = malloc(8 * heap_size); + + val_t result; + + result = entry(heap); + + print_result(result); + if (val_typeof(result) != T_VOID) + putchar('\n'); + + free(heap); + return 0; +} diff --git a/langs/dodger/compile-file.rkt b/langs/neerdowell/parse-file.rkt similarity index 70% rename from langs/dodger/compile-file.rkt rename to langs/neerdowell/parse-file.rkt index 988e3121..a5021320 100644 --- a/langs/dodger/compile-file.rkt +++ b/langs/neerdowell/parse-file.rkt @@ -1,6 +1,6 @@ #lang racket (provide main) -(require "parse.rkt" "compile.rkt" a86/printer) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) ;; String -> Void ;; Compile contents of given file name, @@ -9,5 +9,5 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (displayln (asm-string (compile (parse (read p))))) + (displayln (parse (read-all p))) (close-input-port p)))) diff --git a/langs/neerdowell/parse.rkt b/langs/neerdowell/parse.rkt new file mode 100644 index 00000000..ee33129a --- /dev/null +++ b/langs/neerdowell/parse.rkt @@ -0,0 +1,155 @@ +#lang racket +(provide parse parse-define parse-e parse-struct) +(require "ast.rkt") + +;; [Listof S-Expr] -> Prog +(define (parse s) + (match s + [(cons (and (cons 'struct _) d) s) + (match (parse s) + [(Prog ds e) + (Prog (append (parse-struct d) ds) e)])] + [(cons (and (cons 'define _) d) s) + (match (parse s) + [(Prog ds e) + (Prog (cons (parse-define d) ds) e)])] + [(cons e '()) (Prog '() (parse-e e))] + [_ (error "program parse error")])) + +;; S-Expr -> [Listof Defn] +(define (parse-struct s) + (match s + [(list 'struct (? symbol? n) flds) + (if (andmap symbol? flds) + (list* (make-struct-defn-construct n flds) + (make-struct-defn-predicate n) + (make-struct-defn-accessors n (reverse flds))) + (error "parse struct definition error"))] + [_ (error "parse struct definition error")])) + +;; Id [Listof Id] -> [Listof Defn] +(define (make-struct-defn-construct n flds) + (Defn n flds + (Prim 'make-struct (cons (Quote n) (map Var flds))))) + +;; Id -> [Listof Defn] +(define (make-struct-defn-predicate n) + (Defn (symbol-append n '?) (list 'x) + (Prim 'struct? (list (Quote n) (Var 'x))))) + +;; Id [Listof Id] -> [Listof Defn] +(define (make-struct-defn-accessors n flds) + (match flds + ['() '()] + [(cons f flds) + (cons (Defn (symbol-append n '- f) (list 'x) + (Prim 'struct-ref + (list (Quote n) + (Quote (length flds)) + (Var 'x)))) + (make-struct-defn-accessors n flds))])) + +;; Symbol ... -> Symbol +(define (symbol-append . ss) + (string->symbol + (apply string-append (map symbol->string ss)))) + +;; S-Expr -> Defn +(define (parse-define s) + (match s + [(list 'define (list-rest (? symbol? f) xs) e) + (if (andmap symbol? xs) + (Defn f xs (parse-e e)) + (error "parse definition error"))] + [_ (error "parse defn error" s)])) + +;; S-Expr -> Expr +(define (parse-e s) + (match s + [(? self-quoting?) (Quote s)] + [(list 'quote d) (Quote d)] + ['eof (Eof)] + [(? symbol?) (Var s)] + [(list (? (op? op0) p0)) (Prim p0 '())] + [(list (? (op? op1) p1) e) (Prim p1 (list (parse-e e)))] + [(list (? (op? op2) p2) e1 e2) (Prim p2 (list (parse-e e1) (parse-e e2)))] + [(list (? (op? op3) p3) e1 e2 e3) + (Prim p3 (list (parse-e e1) (parse-e e2) (parse-e e3)))] + [(list 'begin e1 e2) + (Begin (parse-e e1) (parse-e e2))] + [(list 'if e1 e2 e3) + (If (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'let (list (list (? symbol? x) e1)) e2) + (Let x (parse-e e1) (parse-e e2))] + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] + [(list (or 'lambda 'λ) xs e) + (if (and (list? xs) + (andmap symbol? xs)) + (Lam (gensym 'lambda) xs (parse-e e)) + (error "parse lambda error"))] + [(cons e es) + (App (parse-e e) (map parse-e es))] + [_ (error "Parse error" s)])) + +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])])) + +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? exact-integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(? string?) (PStr p)] + [(list 'quote (? symbol? s)) + (PSymb s)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (PAnd (parse-pat p1) (parse-pat p2))] + [(cons 'list '()) + (PLit '())] + [(cons 'list (cons p1 ps)) + (PCons (parse-pat p1) + (parse-pat (cons 'list ps)))] + [(cons (? symbol? n) ps) + (PStruct n (map parse-pat ps))])) + +(define (self-quoting? x) + (or (exact-integer? x) + (boolean? x) + (char? x) + (string? x) + (box? x) + (vector? x))) + +(define op0 + '(read-byte peek-byte void)) +(define op1 + '(add1 sub1 zero? char? write-byte eof-object? + integer->char char->integer + box unbox empty? cons? box? car cdr + vector? vector-length string? string-length + symbol? symbol->string string->symbol string->uninterned-symbol)) +(define op2 + '(+ - < = cons eq? make-vector vector-ref make-string string-ref)) +(define op3 + '(vector-set!)) + +(define (op? ops) + (λ (x) + (and (symbol? x) + (memq x ops)))) diff --git a/langs/neerdowell/print.c b/langs/neerdowell/print.c new file mode 100644 index 00000000..1a9f8a7f --- /dev/null +++ b/langs/neerdowell/print.c @@ -0,0 +1,865 @@ +#include +#include +#include "values.h" + +void print_char(val_char_t); +void print_codepoint(val_char_t); +void print_cons(val_cons_t *); +void print_vect(val_vect_t*); +void print_str(val_str_t*); +void print_symb(val_symb_t*); +void print_struct(val_struct_t *); +void print_str_char(val_char_t); +void print_result_interior(val_t); +int utf8_encode_char(val_char_t, char *); + +void print_result(val_t x) +{ + switch (val_typeof(x)) { + case T_INT: + printf("%" PRId64, val_unwrap_int(x)); + break; + case T_BOOL: + printf(val_unwrap_bool(x) ? "#t" : "#f"); + break; + case T_CHAR: + print_char(val_unwrap_char(x)); + break; + case T_EOF: + printf("#"); + break; + case T_VOID: + break; + case T_EMPTY: + case T_BOX: + case T_CONS: + case T_VECT: + printf("'"); + print_result_interior(x); + break; + case T_STR: + putchar('"'); + print_str(val_unwrap_str(x)); + putchar('"'); + break; + case T_SYMB: + printf("'"); + print_result_interior(x); + break; + case T_PROC: + printf("#"); + break; + case T_STRUCT: + print_struct(val_unwrap_struct(x)); + break; + case T_INVALID: + printf("internal error"); + } +} + +void print_struct(val_struct_t *s) { + printf("#<"); + print_result_interior(s->name); + printf(">"); +} + +void print_symb(val_symb_t *s) +{ + print_str((val_str_t*) s); +} + +void print_result_interior(val_t x) +{ + switch (val_typeof(x)) { + case T_EMPTY: + printf("()"); + break; + case T_BOX: + printf("#&"); + print_result_interior(val_unwrap_box(x)->val); + break; + case T_CONS: + printf("("); + print_cons(val_unwrap_cons(x)); + printf(")"); + break; + case T_SYMB: + print_symb(val_unwrap_symb(x)); + break; + case T_VECT: + print_vect(val_unwrap_vect(x)); + break; + default: + print_result(x); + } +} + +void print_vect(val_vect_t *v) +{ + uint64_t i; + + if (!v) { printf("#()"); return; } + + printf("#("); + for (i = 0; i < v->len; ++i) { + print_result_interior(v->elems[i]); + + if (i < v->len - 1) + putchar(' '); + } + printf(")"); +} + +void print_cons(val_cons_t *cons) +{ + print_result_interior(cons->fst); + + switch (val_typeof(cons->snd)) { + case T_EMPTY: + // nothing + break; + case T_CONS: + printf(" "); + print_cons(val_unwrap_cons(cons->snd)); + break; + default: + printf(" . "); + print_result_interior(cons->snd); + break; + } +} + +void print_str(val_str_t* s) +{ + if (!s) return; + uint64_t i; + for (i = 0; i < s->len; ++i) + print_str_char(s->codepoints[i]); +} + +void print_str_char_u(val_char_t c) +{ + printf("\\u%04X", c); +} + +void print_str_char_U(val_char_t c) +{ + printf("\\U%08X", c); +} + +void print_str_char(val_char_t c) +{ + switch (c) { + case 0 ... 6: + print_str_char_u(c); + break; + case 7: + printf("\\a"); + break; + case 8: + printf("\\b"); + break; + case 9: + printf("\\t"); + break; + case 10: + printf("\\n"); + break; + case 11: + printf("\\v"); + break; + case 12: + printf("\\f"); + break; + case 13: + printf("\\r"); + break; + case 14 ... 26: + print_str_char_u(c); + break; + case 27: + printf("\\e"); + break; + case 28 ... 31: + print_str_char_u(c); + break; + case 34: + printf("\\\""); + break; + case 39: + printf("'"); + break; + case 92: + printf("\\\\"); + break; + case 127 ... 159: + case 173 ... 173: + case 888 ... 889: + case 896 ... 899: + case 907 ... 907: + case 909 ... 909: + case 930 ... 930: + case 1328 ... 1328: + case 1367 ... 1368: + case 1376 ... 1376: + case 1416 ... 1416: + case 1419 ... 1420: + case 1424 ... 1424: + case 1480 ... 1487: + case 1515 ... 1519: + case 1525 ... 1541: + case 1564 ... 1565: + case 1757 ... 1757: + case 1806 ... 1807: + case 1867 ... 1868: + case 1970 ... 1983: + case 2043 ... 2047: + case 2094 ... 2095: + case 2111 ... 2111: + case 2140 ... 2141: + case 2143 ... 2207: + case 2227 ... 2275: + case 2436 ... 2436: + case 2445 ... 2446: + case 2449 ... 2450: + case 2473 ... 2473: + case 2481 ... 2481: + case 2483 ... 2485: + case 2490 ... 2491: + case 2501 ... 2502: + case 2505 ... 2506: + case 2511 ... 2518: + case 2520 ... 2523: + case 2526 ... 2526: + case 2532 ... 2533: + case 2556 ... 2560: + case 2564 ... 2564: + case 2571 ... 2574: + case 2577 ... 2578: + case 2601 ... 2601: + case 2609 ... 2609: + case 2612 ... 2612: + case 2615 ... 2615: + case 2618 ... 2619: + case 2621 ... 2621: + case 2627 ... 2630: + case 2633 ... 2634: + case 2638 ... 2640: + case 2642 ... 2648: + case 2653 ... 2653: + case 2655 ... 2661: + case 2678 ... 2688: + case 2692 ... 2692: + case 2702 ... 2702: + case 2706 ... 2706: + case 2729 ... 2729: + case 2737 ... 2737: + case 2740 ... 2740: + case 2746 ... 2747: + case 2758 ... 2758: + case 2762 ... 2762: + case 2766 ... 2767: + case 2769 ... 2783: + case 2788 ... 2789: + case 2802 ... 2816: + case 2820 ... 2820: + case 2829 ... 2830: + case 2833 ... 2834: + case 2857 ... 2857: + case 2865 ... 2865: + case 2868 ... 2868: + case 2874 ... 2875: + case 2885 ... 2886: + case 2889 ... 2890: + case 2894 ... 2901: + case 2904 ... 2907: + case 2910 ... 2910: + case 2916 ... 2917: + case 2936 ... 2945: + case 2948 ... 2948: + case 2955 ... 2957: + case 2961 ... 2961: + case 2966 ... 2968: + case 2971 ... 2971: + case 2973 ... 2973: + case 2976 ... 2978: + case 2981 ... 2983: + case 2987 ... 2989: + case 3002 ... 3005: + case 3011 ... 3013: + case 3017 ... 3017: + case 3022 ... 3023: + case 3025 ... 3030: + case 3032 ... 3045: + case 3067 ... 3071: + case 3076 ... 3076: + case 3085 ... 3085: + case 3089 ... 3089: + case 3113 ... 3113: + case 3130 ... 3132: + case 3141 ... 3141: + case 3145 ... 3145: + case 3150 ... 3156: + case 3159 ... 3159: + case 3162 ... 3167: + case 3172 ... 3173: + case 3184 ... 3191: + case 3200 ... 3200: + case 3204 ... 3204: + case 3213 ... 3213: + case 3217 ... 3217: + case 3241 ... 3241: + case 3252 ... 3252: + case 3258 ... 3259: + case 3269 ... 3269: + case 3273 ... 3273: + case 3278 ... 3284: + case 3287 ... 3293: + case 3295 ... 3295: + case 3300 ... 3301: + case 3312 ... 3312: + case 3315 ... 3328: + case 3332 ... 3332: + case 3341 ... 3341: + case 3345 ... 3345: + case 3387 ... 3388: + case 3397 ... 3397: + case 3401 ... 3401: + case 3407 ... 3414: + case 3416 ... 3423: + case 3428 ... 3429: + case 3446 ... 3448: + case 3456 ... 3457: + case 3460 ... 3460: + case 3479 ... 3481: + case 3506 ... 3506: + case 3516 ... 3516: + case 3518 ... 3519: + case 3527 ... 3529: + case 3531 ... 3534: + case 3541 ... 3541: + case 3543 ... 3543: + case 3552 ... 3557: + case 3568 ... 3569: + case 3573 ... 3584: + case 3643 ... 3646: + case 3676 ... 3712: + case 3715 ... 3715: + case 3717 ... 3718: + case 3721 ... 3721: + case 3723 ... 3724: + case 3726 ... 3731: + case 3736 ... 3736: + case 3744 ... 3744: + case 3748 ... 3748: + case 3750 ... 3750: + case 3752 ... 3753: + case 3756 ... 3756: + case 3770 ... 3770: + case 3774 ... 3775: + case 3781 ... 3781: + case 3783 ... 3783: + case 3790 ... 3791: + case 3802 ... 3803: + case 3808 ... 3839: + case 3912 ... 3912: + case 3949 ... 3952: + case 3992 ... 3992: + case 4029 ... 4029: + case 4045 ... 4045: + case 4059 ... 4095: + case 4294 ... 4294: + case 4296 ... 4300: + case 4302 ... 4303: + case 4681 ... 4681: + case 4686 ... 4687: + case 4695 ... 4695: + case 4697 ... 4697: + case 4702 ... 4703: + case 4745 ... 4745: + case 4750 ... 4751: + case 4785 ... 4785: + case 4790 ... 4791: + case 4799 ... 4799: + case 4801 ... 4801: + case 4806 ... 4807: + case 4823 ... 4823: + case 4881 ... 4881: + case 4886 ... 4887: + case 4955 ... 4956: + case 4989 ... 4991: + case 5018 ... 5023: + case 5109 ... 5119: + case 5789 ... 5791: + case 5881 ... 5887: + case 5901 ... 5901: + case 5909 ... 5919: + case 5943 ... 5951: + case 5972 ... 5983: + case 5997 ... 5997: + case 6001 ... 6001: + case 6004 ... 6015: + case 6110 ... 6111: + case 6122 ... 6127: + case 6138 ... 6143: + case 6158 ... 6159: + case 6170 ... 6175: + case 6264 ... 6271: + case 6315 ... 6319: + case 6390 ... 6399: + case 6431 ... 6431: + case 6444 ... 6447: + case 6460 ... 6463: + case 6465 ... 6467: + case 6510 ... 6511: + case 6517 ... 6527: + case 6572 ... 6575: + case 6602 ... 6607: + case 6619 ... 6621: + case 6684 ... 6685: + case 6751 ... 6751: + case 6781 ... 6782: + case 6794 ... 6799: + case 6810 ... 6815: + case 6830 ... 6831: + case 6847 ... 6911: + case 6988 ... 6991: + case 7037 ... 7039: + case 7156 ... 7163: + case 7224 ... 7226: + case 7242 ... 7244: + case 7296 ... 7359: + case 7368 ... 7375: + case 7415 ... 7415: + case 7418 ... 7423: + case 7670 ... 7675: + case 7958 ... 7959: + case 7966 ... 7967: + case 8006 ... 8007: + case 8014 ... 8015: + case 8024 ... 8024: + case 8026 ... 8026: + case 8028 ... 8028: + case 8030 ... 8030: + case 8062 ... 8063: + case 8117 ... 8117: + case 8133 ... 8133: + case 8148 ... 8149: + case 8156 ... 8156: + case 8176 ... 8177: + case 8181 ... 8181: + case 8191 ... 8191: + case 8203 ... 8207: + case 8232 ... 8238: + case 8288 ... 8303: + case 8306 ... 8307: + case 8335 ... 8335: + case 8349 ... 8351: + case 8382 ... 8399: + case 8433 ... 8447: + case 8586 ... 8591: + case 9211 ... 9215: + case 9255 ... 9279: + case 9291 ... 9311: + case 11124 ... 11125: + case 11158 ... 11159: + case 11194 ... 11196: + case 11209 ... 11209: + case 11218 ... 11263: + case 11311 ... 11311: + case 11359 ... 11359: + case 11508 ... 11512: + case 11558 ... 11558: + case 11560 ... 11564: + case 11566 ... 11567: + case 11624 ... 11630: + case 11633 ... 11646: + case 11671 ... 11679: + case 11687 ... 11687: + case 11695 ... 11695: + case 11703 ... 11703: + case 11711 ... 11711: + case 11719 ... 11719: + case 11727 ... 11727: + case 11735 ... 11735: + case 11743 ... 11743: + case 11843 ... 11903: + case 11930 ... 11930: + case 12020 ... 12031: + case 12246 ... 12271: + case 12284 ... 12287: + case 12352 ... 12352: + case 12439 ... 12440: + case 12544 ... 12548: + case 12590 ... 12592: + case 12687 ... 12687: + case 12731 ... 12735: + case 12772 ... 12783: + case 12831 ... 12831: + case 13055 ... 13055: + case 19894 ... 19903: + case 40909 ... 40959: + case 42125 ... 42127: + case 42183 ... 42191: + case 42540 ... 42559: + case 42654 ... 42654: + case 42744 ... 42751: + case 42895 ... 42895: + case 42926 ... 42927: + case 42930 ... 42998: + case 43052 ... 43055: + case 43066 ... 43071: + case 43128 ... 43135: + case 43205 ... 43213: + case 43226 ... 43231: + case 43260 ... 43263: + case 43348 ... 43358: + case 43389 ... 43391: + case 43470 ... 43470: + case 43482 ... 43485: + case 43519 ... 43519: + case 43575 ... 43583: + case 43598 ... 43599: + case 43610 ... 43611: + case 43715 ... 43738: + case 43767 ... 43776: + case 43783 ... 43784: + case 43791 ... 43792: + case 43799 ... 43807: + case 43815 ... 43815: + case 43823 ... 43823: + case 43872 ... 43875: + case 43878 ... 43967: + case 44014 ... 44015: + case 44026 ... 44031: + case 55204 ... 55215: + case 55239 ... 55242: + case 55292 ... 55295: + case 57344 ... 63743: + case 64110 ... 64111: + case 64218 ... 64255: + case 64263 ... 64274: + case 64280 ... 64284: + case 64311 ... 64311: + case 64317 ... 64317: + case 64319 ... 64319: + case 64322 ... 64322: + case 64325 ... 64325: + case 64450 ... 64466: + case 64832 ... 64847: + case 64912 ... 64913: + case 64968 ... 65007: + case 65022 ... 65023: + case 65050 ... 65055: + case 65070 ... 65071: + case 65107 ... 65107: + case 65127 ... 65127: + case 65132 ... 65135: + case 65141 ... 65141: + case 65277 ... 65280: + case 65471 ... 65473: + case 65480 ... 65481: + case 65488 ... 65489: + case 65496 ... 65497: + case 65501 ... 65503: + case 65511 ... 65511: + case 65519 ... 65531: + case 65534 ... 65535: + print_str_char_u(c); + break; + case 65548 ... 65548: + case 65575 ... 65575: + case 65595 ... 65595: + case 65598 ... 65598: + case 65614 ... 65615: + case 65630 ... 65663: + case 65787 ... 65791: + case 65795 ... 65798: + case 65844 ... 65846: + case 65933 ... 65935: + case 65948 ... 65951: + case 65953 ... 65999: + case 66046 ... 66175: + case 66205 ... 66207: + case 66257 ... 66271: + case 66300 ... 66303: + case 66340 ... 66351: + case 66379 ... 66383: + case 66427 ... 66431: + case 66462 ... 66462: + case 66500 ... 66503: + case 66518 ... 66559: + case 66718 ... 66719: + case 66730 ... 66815: + case 66856 ... 66863: + case 66916 ... 66926: + case 66928 ... 67071: + case 67383 ... 67391: + case 67414 ... 67423: + case 67432 ... 67583: + case 67590 ... 67591: + case 67593 ... 67593: + case 67638 ... 67638: + case 67641 ... 67643: + case 67645 ... 67646: + case 67670 ... 67670: + case 67743 ... 67750: + case 67760 ... 67839: + case 67868 ... 67870: + case 67898 ... 67902: + case 67904 ... 67967: + case 68024 ... 68029: + case 68032 ... 68095: + case 68100 ... 68100: + case 68103 ... 68107: + case 68116 ... 68116: + case 68120 ... 68120: + case 68148 ... 68151: + case 68155 ... 68158: + case 68168 ... 68175: + case 68185 ... 68191: + case 68256 ... 68287: + case 68327 ... 68330: + case 68343 ... 68351: + case 68406 ... 68408: + case 68438 ... 68439: + case 68467 ... 68471: + case 68498 ... 68504: + case 68509 ... 68520: + case 68528 ... 68607: + case 68681 ... 69215: + case 69247 ... 69631: + case 69710 ... 69713: + case 69744 ... 69758: + case 69821 ... 69821: + case 69826 ... 69839: + case 69865 ... 69871: + case 69882 ... 69887: + case 69941 ... 69941: + case 69956 ... 69967: + case 70007 ... 70015: + case 70089 ... 70092: + case 70094 ... 70095: + case 70107 ... 70112: + case 70133 ... 70143: + case 70162 ... 70162: + case 70206 ... 70319: + case 70379 ... 70383: + case 70394 ... 70400: + case 70404 ... 70404: + case 70413 ... 70414: + case 70417 ... 70418: + case 70441 ... 70441: + case 70449 ... 70449: + case 70452 ... 70452: + case 70458 ... 70459: + case 70469 ... 70470: + case 70473 ... 70474: + case 70478 ... 70486: + case 70488 ... 70492: + case 70500 ... 70501: + case 70509 ... 70511: + case 70517 ... 70783: + case 70856 ... 70863: + case 70874 ... 71039: + case 71094 ... 71095: + case 71114 ... 71167: + case 71237 ... 71247: + case 71258 ... 71295: + case 71352 ... 71359: + case 71370 ... 71839: + case 71923 ... 71934: + case 71936 ... 72383: + case 72441 ... 73727: + case 74649 ... 74751: + case 74863 ... 74863: + case 74869 ... 77823: + case 78895 ... 92159: + case 92729 ... 92735: + case 92767 ... 92767: + case 92778 ... 92781: + case 92784 ... 92879: + case 92910 ... 92911: + case 92918 ... 92927: + case 92998 ... 93007: + case 93018 ... 93018: + case 93026 ... 93026: + case 93048 ... 93052: + case 93072 ... 93951: + case 94021 ... 94031: + case 94079 ... 94094: + case 94112 ... 110591: + case 110594 ... 113663: + case 113771 ... 113775: + case 113789 ... 113791: + case 113801 ... 113807: + case 113818 ... 113819: + case 113824 ... 118783: + case 119030 ... 119039: + case 119079 ... 119080: + case 119155 ... 119162: + case 119262 ... 119295: + case 119366 ... 119551: + case 119639 ... 119647: + case 119666 ... 119807: + case 119893 ... 119893: + case 119965 ... 119965: + case 119968 ... 119969: + case 119971 ... 119972: + case 119975 ... 119976: + case 119981 ... 119981: + case 119994 ... 119994: + case 119996 ... 119996: + case 120004 ... 120004: + case 120070 ... 120070: + case 120075 ... 120076: + case 120085 ... 120085: + case 120093 ... 120093: + case 120122 ... 120122: + case 120127 ... 120127: + case 120133 ... 120133: + case 120135 ... 120137: + case 120145 ... 120145: + case 120486 ... 120487: + case 120780 ... 120781: + case 120832 ... 124927: + case 125125 ... 125126: + case 125143 ... 126463: + case 126468 ... 126468: + case 126496 ... 126496: + case 126499 ... 126499: + case 126501 ... 126502: + case 126504 ... 126504: + case 126515 ... 126515: + case 126520 ... 126520: + case 126522 ... 126522: + case 126524 ... 126529: + case 126531 ... 126534: + case 126536 ... 126536: + case 126538 ... 126538: + case 126540 ... 126540: + case 126544 ... 126544: + case 126547 ... 126547: + case 126549 ... 126550: + case 126552 ... 126552: + case 126554 ... 126554: + case 126556 ... 126556: + case 126558 ... 126558: + case 126560 ... 126560: + case 126563 ... 126563: + case 126565 ... 126566: + case 126571 ... 126571: + case 126579 ... 126579: + case 126584 ... 126584: + case 126589 ... 126589: + case 126591 ... 126591: + case 126602 ... 126602: + case 126620 ... 126624: + case 126628 ... 126628: + case 126634 ... 126634: + case 126652 ... 126703: + case 126706 ... 126975: + case 127020 ... 127023: + case 127124 ... 127135: + case 127151 ... 127152: + case 127168 ... 127168: + case 127184 ... 127184: + case 127222 ... 127231: + case 127245 ... 127247: + case 127279 ... 127279: + case 127340 ... 127343: + case 127387 ... 127461: + case 127491 ... 127503: + case 127547 ... 127551: + case 127561 ... 127567: + case 127570 ... 127743: + case 127789 ... 127791: + case 127870 ... 127871: + case 127951 ... 127955: + case 127992 ... 127999: + case 128255 ... 128255: + case 128331 ... 128335: + case 128378 ... 128378: + case 128420 ... 128420: + case 128579 ... 128580: + case 128720 ... 128735: + case 128749 ... 128751: + case 128756 ... 128767: + case 128884 ... 128895: + case 128981 ... 129023: + case 129036 ... 129039: + case 129096 ... 129103: + case 129114 ... 129119: + case 129160 ... 129167: + case 129198 ... 131071: + case 173783 ... 173823: + case 177973 ... 177983: + case 178206 ... 194559: + case 195102 ... 917759: + case 918000 ... 1114110: + print_str_char_U(c); + break; + default: + print_codepoint(c); + break; + } +} + +void print_char(val_char_t c) +{ + printf("#\\"); + switch (c) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(c); + } +} + +void print_codepoint(val_char_t c) +{ + char buffer[5] = {0}; + utf8_encode_char(c, buffer); + printf("%s", buffer); +} + +int utf8_encode_char(val_char_t c, char *buffer) +{ + // Output to buffer using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (c < 128) { + buffer[0] = (char) c; + return 1; + } else if (c < 2048) { + buffer[0] = (char)(c >> 6) | 192; + buffer[1] = ((char) c & 63) | 128; + return 2; + } else if (c < 65536) { + buffer[0] = (char)(c >> 12) | 224; + buffer[1] = ((char)(c >> 6) & 63) | 128; + buffer[2] = ((char) c & 63) | 128; + return 3; + } else { + buffer[0] = (char)(c >> 18) | 240; + buffer[1] = ((char)(c >> 12) & 63) | 128; + buffer[2] = ((char)(c >> 6) & 63) | 128; + buffer[3] = ((char) c & 63) | 128; + return 4; + } +} diff --git a/langs/neerdowell/print.h b/langs/neerdowell/print.h new file mode 100644 index 00000000..c22081a2 --- /dev/null +++ b/langs/neerdowell/print.h @@ -0,0 +1,8 @@ +#ifndef PRINT_H +#define PRINT_H + +#include "values.h" + +void print_result(val_t); + +#endif diff --git a/langs/neerdowell/read-all.rkt b/langs/neerdowell/read-all.rkt new file mode 100644 index 00000000..8a3289a5 --- /dev/null +++ b/langs/neerdowell/read-all.rkt @@ -0,0 +1,8 @@ +#lang racket +(provide read-all) +;; read all s-expression until eof +(define (read-all) + (let ((r (read))) + (if (eof-object? r) + '() + (cons r (read-all))))) diff --git a/langs/neerdowell/run.rkt b/langs/neerdowell/run.rkt new file mode 100644 index 00000000..eaa53eb9 --- /dev/null +++ b/langs/neerdowell/run.rkt @@ -0,0 +1,18 @@ +#lang racket +(provide run run/io) +(require "types.rkt" "build-runtime.rkt" + a86/interp) + +;; Asm -> Answer +(define (run is) + (parameterize ((current-objs (list runtime-path))) + (match (asm-interp is) + ['err 'err] + [b (bits->value b)]))) + +;; Asm String -> (cons Answer String) +(define (run/io is s) + (parameterize ((current-objs (list runtime-path))) + (match (asm-interp/io is s) + [(cons 'err o) (cons 'err o)] + [(cons b o) (cons (bits->value b) o)]))) diff --git a/langs/neerdowell/runtime.h b/langs/neerdowell/runtime.h new file mode 100644 index 00000000..f594f0f6 --- /dev/null +++ b/langs/neerdowell/runtime.h @@ -0,0 +1,11 @@ +#ifndef RUNTIME_H +#define RUNTIME_H +int64_t entry(); +extern FILE* in; +extern FILE* out; +extern void (*error_handler)(); + +// in words +#define heap_size 10000 +extern int64_t *heap; +#endif /* RUNTIME_H */ diff --git a/langs/neerdowell/symbol.c b/langs/neerdowell/symbol.c new file mode 100644 index 00000000..bcff4f3f --- /dev/null +++ b/langs/neerdowell/symbol.c @@ -0,0 +1,55 @@ +#include +#include +#include "values.h" + +int symb_cmp(const val_symb_t *, const val_symb_t *); + +// binary tree node +struct Node { + val_symb_t* elem; + struct Node* left; + struct Node* right; +}; + +static struct Node *symbol_tbl = NULL; + +val_symb_t *intern_symbol(val_symb_t* symb) +{ + struct Node **curr = &symbol_tbl; + + while (*curr) { + struct Node *t = *curr; + int r = symb_cmp(symb, t->elem); + if (r == 0) { + // found it, so return saved pointer + return t->elem; + } else if (r < 0) { + curr = &t->left; + } else { + curr = &t->right; + } + } + + // wasn't found, so insert it and return pointer + *curr = calloc(1, sizeof(struct Node)); + (*curr)->elem = symb; + return (*curr)->elem; +} + +int symb_cmp(const val_symb_t *s1, const val_symb_t *s2) +{ + if (s1 == s2) return 0; + + int64_t len1 = s1->len; + int64_t len2 = s2->len; + + int64_t len = len1 < len2 ? len1 : len2; + int i; + + for (i = 0; i < len; i++) { + if (s1->codepoints[i] != s2->codepoints[i]) + return s1->codepoints[i] - s2->codepoints[i]; + } + + return len1 - len2; +} diff --git a/langs/neerdowell/test/build-runtime.rkt b/langs/neerdowell/test/build-runtime.rkt new file mode 100644 index 00000000..7023ee0b --- /dev/null +++ b/langs/neerdowell/test/build-runtime.rkt @@ -0,0 +1,8 @@ +#lang racket +(require a86/interp) + +;; link with runtime for IO operations +(unless (file-exists? "../runtime.o") + (system "make -C .. runtime.o")) +(current-objs + (list (path->string (normalize-path "../runtime.o")))) diff --git a/langs/neerdowell/test/compile.rkt b/langs/neerdowell/test/compile.rkt new file mode 100644 index 00000000..ee289de8 --- /dev/null +++ b/langs/neerdowell/test/compile.rkt @@ -0,0 +1,8 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../compile.rkt" + "../run.rkt") + +(test-runner (λ p (run (compile (parse p))))) +(test-runner-io (λ (s . p) (run/io (compile (parse p)) s))) diff --git a/langs/neerdowell/test/interp-defun.rkt b/langs/neerdowell/test/interp-defun.rkt new file mode 100644 index 00000000..68ef4191 --- /dev/null +++ b/langs/neerdowell/test/interp-defun.rkt @@ -0,0 +1,24 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../interp-defun.rkt" + "../interp-io.rkt") + +(define (closure->proc xs e r) + ;; Could make this better by calling the interpreter, + ;; but it's only used in tests where all we care about + ;; is that you get a procedure. + (lambda _ + (error "This function is not callable."))) + +(test-runner + (λ p + (match (interp (parse p)) + [(Closure xs e r) (closure->proc xs e r)] + [v v]))) +(test-runner-io + (λ (s . p) + (match (interp/io (parse p) s) + [(cons (Closure xs e r) o) + (cons (closure->proc xs e r) o)] + [r r]))) diff --git a/langs/knock/test/interp.rkt b/langs/neerdowell/test/interp.rkt similarity index 55% rename from langs/knock/test/interp.rkt rename to langs/neerdowell/test/interp.rkt index 70d041fe..cd7b654e 100644 --- a/langs/knock/test/interp.rkt +++ b/langs/neerdowell/test/interp.rkt @@ -4,5 +4,5 @@ "../interp.rkt" "../interp-io.rkt") -(test-runner (λ (e) (interp (parse e)))) -(test-runner-io (λ (e s) (interp/io (parse e) s))) +(test-runner (λ p (interp (parse p)))) +(test-runner-io (λ (s . p) (interp/io (parse p) s))) diff --git a/langs/neerdowell/test/test-runner.rkt b/langs/neerdowell/test/test-runner.rkt new file mode 100644 index 00000000..a0fc4437 --- /dev/null +++ b/langs/neerdowell/test/test-runner.rkt @@ -0,0 +1,550 @@ +#lang racket +(provide test-runner test-runner-io) +(require rackunit) + +(define (test-runner run) + ;; Abscond examples + (check-equal? (run 7) 7) + (check-equal? (run -8) -8) + + ;; Blackmail examples + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7) + + ;; Con examples + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7) + + ;; Dupe examples + (check-equal? (run #t) #t) + (check-equal? (run #f) #f) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) + (check-equal? (run '(if #t 3 4)) 3) + (check-equal? (run '(if #f 3 4)) 4) + (check-equal? (run '(if 0 3 4)) 3) + (check-equal? (run '(zero? 4)) #f) + (check-equal? (run '(zero? 0)) #t) + + ;; Dodger examples + (check-equal? (run #\a) #\a) + (check-equal? (run #\b) #\b) + (check-equal? (run '(char? #\a)) #t) + (check-equal? (run '(char? #t)) #f) + (check-equal? (run '(char? 8)) #f) + (check-equal? (run '(char->integer #\a)) (char->integer #\a)) + (check-equal? (run '(integer->char 955)) #\λ) + + ;; Extort examples + (check-equal? (run '(add1 #f)) 'err) + (check-equal? (run '(sub1 #f)) 'err) + (check-equal? (run '(zero? #f)) 'err) + (check-equal? (run '(char->integer #f)) 'err) + (check-equal? (run '(integer->char #f)) 'err) + (check-equal? (run '(integer->char -1)) 'err) + (check-equal? (run '(write-byte #f)) 'err) + (check-equal? (run '(write-byte -1)) 'err) + (check-equal? (run '(write-byte 256)) 'err) + + ;; Fraud examples + (check-equal? (run '(let ((x 7)) x)) 7) + (check-equal? (run '(let ((x 7)) 2)) 2) + (check-equal? (run '(let ((x 7)) (add1 x))) 8) + (check-equal? (run '(let ((x (add1 7))) x)) 8) + (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) + (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) + (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) + + (check-equal? (run '(let ((x 0)) + (if (zero? x) 7 8))) + 7) + (check-equal? (run '(let ((x 1)) + (add1 (if (zero? x) 7 8)))) + 9) + (check-equal? (run '(+ 3 4)) 7) + (check-equal? (run '(- 3 4)) -1) + (check-equal? (run '(+ (+ 2 1) 4)) 7) + (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) + (check-equal? (run '(let ((x (+ 1 2))) + (let ((z (- 4 x))) + (+ (+ x x) z)))) + 7) + (check-equal? (run '(= 5 5)) #t) + (check-equal? (run '(= 4 5)) #f) + (check-equal? (run '(= (add1 4) 5)) #t) + (check-equal? (run '(< 5 5)) #f) + (check-equal? (run '(< 4 5)) #t) + (check-equal? (run '(< (add1 4) 5)) #f) + + ;; Hustle examples + (check-equal? (run ''()) '()) + (check-equal? (run '(box 1)) (box 1)) + (check-equal? (run '(box -1)) (box -1)) + (check-equal? (run '(cons 1 2)) (cons 1 2)) + (check-equal? (run '(unbox (box 1))) 1) + (check-equal? (run '(car (cons 1 2))) 1) + (check-equal? (run '(cdr (cons 1 2))) 2) + (check-equal? (run '(cons 1 '())) (list 1)) + (check-equal? (run '(let ((x (cons 1 2))) + (begin (cdr x) + (car x)))) + 1) + (check-equal? (run '(let ((x (cons 1 2))) + (let ((y (box 3))) + (unbox y)))) + 3) + (check-equal? (run '(eq? 1 1)) #t) + (check-equal? (run '(eq? 1 2)) #f) + (check-equal? (run '(eq? (cons 1 2) (cons 1 2))) #f) + (check-equal? (run '(let ((x (cons 1 2))) (eq? x x))) #t) + + ;; Hoax examples + (check-equal? (run '(make-vector 0 0)) #()) + (check-equal? (run '(make-vector 1 0)) #(0)) + (check-equal? (run '(make-vector 3 0)) #(0 0 0)) + (check-equal? (run '(make-vector 3 5)) #(5 5 5)) + (check-equal? (run '(vector? (make-vector 0 0))) #t) + (check-equal? (run '(vector? (cons 0 0))) #f) + (check-equal? (run '(vector-ref (make-vector 0 #f) 0)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 0 4) + x))) + #(4 5 5)) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 1 4) + x))) + #(5 4 5)) + (check-equal? (run '(vector-length (make-vector 3 #f))) 3) + (check-equal? (run '(vector-length (make-vector 0 #f))) 0) + (check-equal? (run '"") "") + (check-equal? (run '"fred") "fred") + (check-equal? (run '"wilma") "wilma") + (check-equal? (run '(make-string 0 #\f)) "") + (check-equal? (run '(make-string 3 #\f)) "fff") + (check-equal? (run '(make-string 3 #\g)) "ggg") + (check-equal? (run '(string-length "")) 0) + (check-equal? (run '(string-length "fred")) 4) + (check-equal? (run '(string-ref "" 0)) 'err) + (check-equal? (run '(string-ref (make-string 0 #\a) 0)) 'err) + (check-equal? (run '(string-ref "fred" 0)) #\f) + (check-equal? (run '(string-ref "fred" 1)) #\r) + (check-equal? (run '(string-ref "fred" 2)) #\e) + (check-equal? (run '(string-ref "fred" 4)) 'err) + (check-equal? (run '(string? "fred")) #t) + (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") + + ;; Iniquity tests + (check-equal? (run + '(define (f x) x) + '(f 5)) + 5) + + (check-equal? (run + '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) + '(tri 9)) + 45) + + (check-equal? (run + '(define (f x) x) + '(define (g x) (f x)) + '(g 5)) + 5) + (check-equal? (run + '(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) + '(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + '(even? 101)) + #f) + (check-equal? (run + '(define (map-add1 xs) + (if (empty? xs) + '() + (cons (add1 (car xs)) + (map-add1 (cdr xs))))) + '(map-add1 (cons 1 (cons 2 (cons 3 '()))))) + '(2 3 4)) + (check-equal? (run + '(define (f x) + 10) + '(f 1)) + 10) + (check-equal? (run + '(define (f x) + 10) + '(let ((x 2)) (f 1))) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(f 1 2)) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(let ((z 2)) (f 1 2))) + 10) + (check-equal? (run '(define (f x y) y) + '(f 1 (add1 #f))) + 'err) + + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) + + ;; Loot examples + (check-true (procedure? (run '(λ (x) x)))) + (check-equal? (run '((λ (x) x) 5)) + 5) + + (check-equal? (run '(let ((f (λ (x) x))) (f 5))) + 5) + (check-equal? (run '(let ((f (λ (x y) x))) (f 5 7))) + 5) + (check-equal? (run '(let ((f (λ (x y) y))) (f 5 7))) + 7) + (check-equal? (run '((let ((x 1)) + (let ((y 2)) + (lambda (z) (cons x (cons y (cons z '())))))) + 3)) + '(1 2 3)) + (check-equal? (run '(define (adder n) + (λ (x) (+ x n))) + '((adder 5) 10)) + 15) + (check-equal? (run '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36)) + 666) + (check-equal? (run '(define (tri n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))) + '(tri 36)) + 666) + (check-equal? (run '(define (tri n) + (match n + [0 0] + [m (+ m (tri (sub1 m)))])) + '(tri 36)) + 666) + (check-equal? (run '((match 8 [8 (lambda (x) x)]) 12)) + 12) + + ;; Mug examples + (check-equal? (run '(symbol? 'foo)) #t) + (check-equal? (run '(symbol? (string->symbol "foo"))) #t) + (check-equal? (run '(eq? 'foo 'foo)) #t) + (check-equal? (run '(eq? (string->symbol "foo") + (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'foo (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) + #t) + (check-equal? (run '(symbol? 'g0)) #t) + (check-equal? (run '(symbol? "g0")) #f) + (check-equal? (run '(symbol? (string->symbol "g0"))) #t) + (check-equal? (run '(symbol? (string->uninterned-symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->uninterned-symbol "g0"))) #f) + (check-equal? (run '(eq? (string->uninterned-symbol "g0") (string->uninterned-symbol "g0"))) + #f) + (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f) + (check-equal? (run '(string? (symbol->string 'foo))) #t) + (check-equal? (run '(eq? (symbol->string 'foo) "foo")) #f) + (check-equal? (run ''foo) 'foo) + (check-equal? (run '(eq? (match #t [_ "foo"]) "bar")) #f) + (check-equal? (run '(eq? (match #t [_ 'foo]) 'bar)) #f) + (check-equal? (run '(match 'foo ['bar #t] [_ #f])) #f) + (check-equal? (run '(match 'foo ['foo #t] [_ #f])) #t) + (check-equal? (run '(match "foo" ["foo" #t] [_ #f])) #t) + (check-equal? (run '(match "foo" ["bar" #t] [_ #f])) #f) + (check-equal? (run '(match (cons '+ (cons 1 (cons 2 '()))) + [(cons '+ (cons x (cons y '()))) + (+ x y)])) + 3) + + ;; Mountebank examples + (check-equal? (run '#()) + #()) + (check-equal? (run ''#()) + #()) + (check-equal? (run ''#t) + #t) + (check-equal? (run ''7) + 7) + (check-equal? (run ''(1 2 3)) + '(1 2 3)) + (check-equal? (run ''(1 . 2)) + '(1 . 2)) + (check-equal? (run ''(("1") (#() #(1 #(2))) (#&(1)) (#f) (4) (5))) + '(("1") (#() #(1 #(2))) (#&(1)) (#f) (4) (5))) + (check-equal? (run '(define (f) (cons 1 2)) + '(eq? (f) (f))) + #f) + (check-equal? (run '(define (f) '(1 . 2)) + '(eq? (f) (f))) + #t) + (check-equal? (run '(let ((x '(foo . foo))) + (eq? (car x) (cdr x)))) + #t) + (check-equal? + (run '(define (eval e r) + (match e + [(list 'zero? e) + (zero? (eval e r))] + [(list 'sub1 e) + (sub1 (eval e r))] + [(list '+ e1 e2) + (+ (eval e1 r) (eval e2 r))] + [(list 'if e1 e2 e3) + (if (eval e1 r) + (eval e2 r) + (eval e3 r))] + [(list 'λ (list x) e) + (lambda (v) (eval e (cons (cons x v) r)))] + [(list e1 e2) + ((eval e1 r) (eval e2 r))] + [_ + (if (symbol? e) + (lookup r e) + e)])) + '(define (lookup r x) + (match r + [(cons (cons y v) r) + (if (eq? x y) + v + (lookup r x))])) + '(eval '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36) + '())) + 666) + + ;; Neerdowell examples + (check-equal? (run '(struct foo ()) + '(foo? (foo))) + #t) + (check-equal? (run '(struct foo (x)) + '(foo? (foo 1))) + #t) + (check-equal? (run '(struct foo ()) + '(struct bar ()) + '(foo? (bar))) + #f) + (check-equal? (run '(struct foo ()) + '(struct bar ()) + '(bar? (bar))) + #t) + (check-equal? (run '(struct foo ()) + '(struct bar ()) + '(bar? #())) + #f) + (check-equal? (run '(struct foo (x)) + '(foo-x (foo 3))) + 3) + (check-equal? (run '(struct foo (x)) + '(let ((x (foo 3))) + (foo-x x))) + 3) + (check-equal? (run '(struct foo (x)) + '(let ((x (foo 3))) + (foo-x x))) + 3) + (check-equal? (run '(struct foo (x)) + '(let ((x (foo (foo 3)))) + (foo? (foo-x x)))) + #t) + (check-equal? (run '(struct foo (x y z)) + '(let ((x (foo 1 2 3))) + (cons (foo-x x) + (cons (foo-y x) + (cons (foo-z x) + '()))))) + '(1 2 3)) + (check-equal? (run '(struct foo ()) + '(eq? (foo) (foo))) + #f) + (check-equal? (run '(struct foo (x)) + '(foo-x #t)) + 'err) + (check-equal? (run '(struct foo (x)) + '(struct bar (y)) + '(match (bar 5) + [(foo x) #f] + [(bar x) x])) + 5) + (check-equal? (run '(struct nil ()) + '(struct pair (x y)) + '(define (len x) + (match x + [(nil) 0] + [(pair _ x) (add1 (len x))])) + '(len (pair 1 (pair 2 (pair 3 (nil)))))) + 3) + (check-equal? (run '(match (cons (cons 1 2) '()) + [(cons (cons x y) '()) y])) + 2) + (check-equal? (run '(struct foo (p q)) + '(match (cons (foo 1 2) '()) + [(cons (foo x y) _) y])) + 2) + (check-equal? (run '(struct foo (p q)) + '(match (cons (foo 1 2) '()) + [(cons (foo x 3) _) x] + [_ 9])) + 9) + (check-equal? (run '(struct foo (x q)) + '(define (get z) + (match z + ['() #f] + [(cons (foo x q) y) x])) + '(get (cons (foo 7 2) '()))) + 7) + (check-equal? (run '(struct posn (x y)) + '(define (posn-xs ps) + (match ps + ['() '()] + [(cons (posn x y) ps) + (cons x (posn-xs ps))])) + '(posn-xs (cons (posn 3 4) (cons (posn 5 6) (cons (posn 7 8) '()))))) + '(3 5 7)) + (check-equal? (run '(struct Foo (x y z)) + '(match (Foo 1 2 3) + [(Foo x y z) z])) + 3) + (check-equal? (run '(struct Boo (x)) + '(match 8 + [(Boo 'y) 0] + [_ 1])) + 1)) + +(define (test-runner-io run) + ;; Evildoer examples + (check-equal? (run "" 7) (cons 7 "")) + (check-equal? (run "" '(write-byte 97)) (cons (void) "a")) + (check-equal? (run "a" '(read-byte)) (cons 97 "")) + (check-equal? (run "b" '(begin (write-byte 97) (read-byte))) + (cons 98 "a")) + (check-equal? (run "" '(read-byte)) (cons eof "")) + (check-equal? (run "" '(eof-object? (read-byte))) (cons #t "")) + (check-equal? (run "a" '(eof-object? (read-byte))) (cons #f "")) + (check-equal? (run "" '(begin (write-byte 97) (write-byte 98))) + (cons (void) "ab")) + + (check-equal? (run "ab" '(peek-byte)) (cons 97 "")) + (check-equal? (run "ab" '(begin (peek-byte) (read-byte))) (cons 97 "")) + ;; Extort examples + (check-equal? (run "" '(write-byte #t)) (cons 'err "")) + + ;; Fraud examples + (check-equal? (run "" '(let ((x 97)) (write-byte x))) (cons (void) "a")) + (check-equal? (run "" + '(let ((x 97)) + (begin (write-byte x) + x))) + (cons 97 "a")) + (check-equal? (run "b" '(let ((x 97)) (begin (read-byte) x))) + (cons 97 "")) + (check-equal? (run "b" '(let ((x 97)) (begin (peek-byte) x))) + (cons 97 "")) + + ;; Hustle examples + (check-equal? (run "" + '(let ((x 1)) + (begin (write-byte 97) + 1))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x 1)) + (let ((y 2)) + (begin (write-byte 97) + 1)))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x (cons 1 2))) + (begin (write-byte 97) + (car x)))) + (cons 1 "a")) + ;; Iniquity examples + #| + (check-equal? (run "" + '(define (print-alphabet i) + (if (zero? i) + (void) + (begin (write-byte (- 123 i)) + (print-alphabet (sub1 i))))) + '(print-alphabet 26)) + (cons (void) "abcdefghijklmnopqrstuvwxyz")) +|#) diff --git a/langs/neerdowell/types.h b/langs/neerdowell/types.h new file mode 100644 index 00000000..ec7db8b2 --- /dev/null +++ b/langs/neerdowell/types.h @@ -0,0 +1,43 @@ +#ifndef TYPES_H +#define TYPES_H + +/* + Bit layout of values + + Values are either: + - Immediates: end in #b000 + - Pointers + + Immediates are either + - Integers: end in #b0 000 + - Characters: end in #b01 000 + - True: #b11 000 + - False: #b1 11 000 + - Eof: #b10 11 000 + - Void: #b11 11 000 + - Empty: #b100 11 000 +*/ +#define imm_shift 3 +#define ptr_type_mask ((1 << imm_shift) - 1) +#define box_type_tag 1 +#define cons_type_tag 2 +#define vect_type_tag 3 +#define str_type_tag 4 +#define proc_type_tag 5 +#define symb_type_tag 6 +#define struct_type_tag 7 +#define int_shift (1 + imm_shift) +#define int_type_mask ((1 << int_shift) - 1) +#define int_type_tag (0 << (int_shift - 1)) +#define nonint_type_tag (1 << (int_shift - 1)) +#define char_shift (int_shift + 1) +#define char_type_mask ((1 << char_shift) - 1) +#define char_type_tag ((0 << (char_shift - 1)) | nonint_type_tag) +#define nonchar_type_tag ((1 << (char_shift - 1)) | nonint_type_tag) +#define val_true ((0 << char_shift) | nonchar_type_tag) +#define val_false ((1 << char_shift) | nonchar_type_tag) +#define val_eof ((2 << char_shift) | nonchar_type_tag) +#define val_void ((3 << char_shift) | nonchar_type_tag) +#define val_empty ((4 << char_shift) | nonchar_type_tag) + +#endif diff --git a/langs/neerdowell/types.rkt b/langs/neerdowell/types.rkt new file mode 100644 index 00000000..9375b78d --- /dev/null +++ b/langs/neerdowell/types.rkt @@ -0,0 +1,116 @@ +#lang racket +(provide (all-defined-out)) +(require ffi/unsafe) + +(define imm-shift 3) +(define imm-mask #b111) +(define ptr-mask #b111) +(define type-box #b001) +(define type-cons #b010) +(define type-vect #b011) +(define type-str #b100) +(define type-proc #b101) +(define type-symb #b110) +(define type-struct #b111) +(define int-shift (+ 1 imm-shift)) +(define char-shift (+ 2 imm-shift)) +(define type-int #b0000) +(define mask-int #b1111) +(define type-char #b01000) +(define mask-char #b11111) + +(struct struct-val () #:transparent) + +(define (bits->value b) + (cond [(= b (value->bits #t)) #t] + [(= b (value->bits #f)) #f] + [(= b (value->bits eof)) eof] + [(= b (value->bits (void))) (void)] + [(= b (value->bits '())) '()] + [(int-bits? b) + (arithmetic-shift b (- int-shift))] + [(char-bits? b) + (integer->char (arithmetic-shift b (- char-shift)))] + [(box-bits? b) + (box (bits->value (heap-ref b)))] + [(cons-bits? b) + (cons (bits->value (heap-ref (+ b 8))) + (bits->value (heap-ref b)))] + [(vect-bits? b) + (if (zero? (untag b)) + (vector) + (build-vector (heap-ref b) + (lambda (j) + (bits->value (heap-ref (+ b (* 8 (add1 j))))))))] + [(str-bits? b) + (if (zero? (untag b)) + (string) + (build-string (heap-ref b) + (lambda (j) + (char-ref (+ b 8) j))))] + [(symb-bits? b) + (string->symbol + (if (zero? (untag b)) + (string) + (build-string (heap-ref b) + (lambda (j) + (char-ref (+ b 8) j)))))] + [(struct-bits? b) + (struct-val)] + [(proc-bits? b) + (lambda _ + (error "This function is not callable."))] + [else (error "invalid bits")])) + +(define (value->bits v) + (cond [(eq? v #t) #b00011000] + [(eq? v #f) #b00111000] + [(eof-object? v) #b01011000] + [(void? v) #b01111000] + [(empty? v) #b10011000] + [(integer? v) + (arithmetic-shift v int-shift)] + [(char? v) + (bitwise-ior type-char + (arithmetic-shift (char->integer v) char-shift))] + [else (error "not an immediate value")])) + +(define (imm-bits? v) + (zero? (bitwise-and v imm-mask))) + +(define (int-bits? v) + (= type-int (bitwise-and v mask-int))) + +(define (char-bits? v) + (= type-char (bitwise-and v mask-char))) + +(define (cons-bits? v) + (= type-cons (bitwise-and v imm-mask))) + +(define (box-bits? v) + (= type-box (bitwise-and v imm-mask))) + +(define (vect-bits? v) + (= type-vect (bitwise-and v imm-mask))) + +(define (str-bits? v) + (= type-str (bitwise-and v imm-mask))) + +(define (proc-bits? v) + (= type-proc (bitwise-and v imm-mask))) + +(define (symb-bits? v) + (= type-symb (bitwise-and v imm-mask))) + +(define (struct-bits? v) + (= type-struct (bitwise-and v imm-mask))) + +(define (untag i) + (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) + (integer-length ptr-mask))) + +(define (heap-ref i) + (ptr-ref (cast (untag i) _int64 _pointer) _int64)) + +(define (char-ref i j) + (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j))) diff --git a/langs/neerdowell/utils.rkt b/langs/neerdowell/utils.rkt new file mode 100644 index 00000000..612b7381 --- /dev/null +++ b/langs/neerdowell/utils.rkt @@ -0,0 +1,33 @@ +#lang racket +(provide symbol->data-label lookup pad-stack unpad-stack) +(require a86/ast) + +(define rsp 'rsp) +(define r15 'r15) + +(define (symbol->data-label s) + (symbol->label + (string->symbol (string-append "data_" (symbol->string s))))) + +;; Id CEnv -> [Maybe Integer] +(define (lookup x cenv) + (match cenv + ['() #f] + [(cons y rest) + (match (eq? x y) + [#t 0] + [#f (match (lookup x rest) + [#f #f] + [i (+ 8 i)])])])) + +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; Asm +;; Undo the stack alignment after a call +(define unpad-stack + (seq (Add rsp r15))) diff --git a/langs/neerdowell/values.c b/langs/neerdowell/values.c new file mode 100644 index 00000000..6627fc25 --- /dev/null +++ b/langs/neerdowell/values.c @@ -0,0 +1,132 @@ +#include "types.h" +#include "values.h" + +type_t val_typeof(val_t x) +{ + switch (x & ptr_type_mask) { + case box_type_tag: + return T_BOX; + case cons_type_tag: + return T_CONS; + case vect_type_tag: + return T_VECT; + case str_type_tag: + return T_STR; + case symb_type_tag: + return T_SYMB; + case proc_type_tag: + return T_PROC; + case struct_type_tag: + return T_STRUCT; + } + + if ((int_type_mask & x) == int_type_tag) + return T_INT; + if ((char_type_mask & x) == char_type_tag) + return T_CHAR; + + switch (x) { + case val_true: + case val_false: + return T_BOOL; + case val_eof: + return T_EOF; + case val_void: + return T_VOID; + case val_empty: + return T_EMPTY; + } + + return T_INVALID; +} + +int64_t val_unwrap_int(val_t x) +{ + return x >> int_shift; +} +val_t val_wrap_int(int64_t i) +{ + return (i << int_shift) | int_type_tag; +} + +int val_unwrap_bool(val_t x) +{ + return x == val_true; +} +val_t val_wrap_bool(int b) +{ + return b ? val_true : val_false; +} + +val_char_t val_unwrap_char(val_t x) +{ + return (val_char_t)(x >> char_shift); +} +val_t val_wrap_char(val_char_t c) +{ + return (((val_t)c) << char_shift) | char_type_tag; +} + +val_t val_wrap_eof(void) +{ + return val_eof; +} + +val_t val_wrap_void(void) +{ + return val_void; +} + +val_box_t* val_unwrap_box(val_t x) +{ + return (val_box_t *)(x ^ box_type_tag); +} +val_t val_wrap_box(val_box_t* b) +{ + return ((val_t)b) | box_type_tag; +} + +val_cons_t* val_unwrap_cons(val_t x) +{ + return (val_cons_t *)(x ^ cons_type_tag); +} +val_t val_wrap_cons(val_cons_t *c) +{ + return ((val_t)c) | cons_type_tag; +} + +val_vect_t* val_unwrap_vect(val_t x) +{ + return (val_vect_t *)(x ^ vect_type_tag); +} +val_t val_wrap_vect(val_vect_t *v) +{ + return ((val_t)v) | vect_type_tag; +} + +val_str_t* val_unwrap_str(val_t x) +{ + return (val_str_t *)(x ^ str_type_tag); +} +val_t val_wrap_str(val_str_t *v) +{ + return ((val_t)v) | str_type_tag; +} + +val_symb_t* val_unwrap_symb(val_t x) +{ + return (val_symb_t *)(x ^ symb_type_tag); +} +val_t val_wrap_symb(val_symb_t *v) +{ + return ((val_t)v) | symb_type_tag; +} + +val_struct_t* val_unwrap_struct(val_t x) +{ + return (val_struct_t *)(x ^ struct_type_tag); +} +val_t val_wrap_struct(val_struct_t* v) +{ + return ((val_t)v) | struct_type_tag; +} diff --git a/langs/neerdowell/values.h b/langs/neerdowell/values.h new file mode 100644 index 00000000..1f1dafa1 --- /dev/null +++ b/langs/neerdowell/values.h @@ -0,0 +1,91 @@ +#ifndef VALUES_H +#define VALUES_H + +#include + +/* any abstract value */ +typedef int64_t val_t; + +typedef enum type_t { + T_INVALID = -1, + /* immediates */ + T_INT, + T_BOOL, + T_CHAR, + T_EOF, + T_VOID, + T_EMPTY, + /* pointers */ + T_BOX, + T_CONS, + T_VECT, + T_STR, + T_SYMB, + T_PROC, + T_STRUCT, +} type_t; + +typedef uint32_t val_char_t; +typedef struct val_box_t { + val_t val; +} val_box_t; +typedef struct val_cons_t { + val_t snd; + val_t fst; +} val_cons_t; +typedef struct val_vect_t { + uint64_t len; + val_t elems[]; +} val_vect_t; +typedef struct val_str_t { + uint64_t len; + val_char_t codepoints[]; +} val_str_t; +typedef struct val_symb_t { + uint64_t len; + val_char_t codepoints[]; +} val_symb_t; +typedef struct val_struct_t { + val_t name; + val_t* vals; +} val_struct_t; +/* return the type of x */ +type_t val_typeof(val_t x); + +/** + * Wrap/unwrap values + * + * The behavior of unwrap functions are undefined on type mismatch. + */ +int64_t val_unwrap_int(val_t x); +val_t val_wrap_int(int64_t i); + +int val_unwrap_bool(val_t x); +val_t val_wrap_bool(int b); + +val_char_t val_unwrap_char(val_t x); +val_t val_wrap_char(val_char_t b); + +val_t val_wrap_eof(); + +val_t val_wrap_void(); + +val_box_t* val_unwrap_box(val_t x); +val_t val_wrap_box(val_box_t* b); + +val_cons_t* val_unwrap_cons(val_t x); +val_t val_wrap_cons(val_cons_t* c); + +val_vect_t* val_unwrap_vect(val_t x); +val_t val_wrap_vect(val_vect_t* c); + +val_str_t* val_unwrap_str(val_t x); +val_t val_wrap_str(val_str_t* c); + +val_symb_t* val_unwrap_symb(val_t x); +val_t val_wrap_symb(val_symb_t* c); + +val_struct_t* val_unwrap_struct(val_t x); +val_t val_wrap_struct(val_struct_t* c); + +#endif diff --git a/langs/outlaw/Makefile b/langs/outlaw/Makefile new file mode 100644 index 00000000..26a65a57 --- /dev/null +++ b/langs/outlaw/Makefile @@ -0,0 +1,91 @@ +# NOTES: +# - You will need a static version of libunistring to link against; on Mac +# ld will always choose .dylib over .a to link, so either rename or remove +# the .dylib versions. + +UNAME := $(shell uname) +.PHONY: test + + +# When on GRACE, we pass options to find libunistring in the course's +# public directory. +ifeq ($(shell hostname | egrep "grace.\.umd\.edu"),) +else + CMSC430_LIB := /afs/glue/class/fall2022/cmsc/430/0101/public + link_opts := -L$(CMSC430_LIB)/lib/ -Wl,-rpath=$(CMSC430_LIB)/lib/ + include := -I$(CMSC430_LIB)/include/ +endif + +ifeq ($(UNAME), Darwin) + format=macho64 + CC=arch -x86_64 gcc +else + format=elf64 + CC=gcc +endif + +objs = \ + main.o \ + values.o \ + print.o \ + symbol.o \ + string.o \ + io.o \ + error.o \ + os.o \ + stdlib.o + +default: runtime.o + +outlaw.rkt: compile-stdin.rkt \ + ast.rkt \ + parse.rkt \ + a86/ast.rkt \ + registers.rkt \ + types.rkt \ + lambdas.rkt \ + fv.rkt \ + utils.rkt \ + compile-ops.rkt \ + compile-datum.rkt \ + compile-expr.rkt \ + compile-define.rkt \ + compile-literals.rkt \ + compile.rkt \ + read-all.rkt \ + a86/printer.rkt \ + compile-stdin.rkt + (racket -t combine.rkt -m compile-stdin.rkt stdlib.rkt ;\ + printf "(main)\n") \ + > outlaw.rkt + +runtime.o: $(objs) + ld -r $(objs) -o runtime.o + +%.run: %.o runtime.o + $(CC) $(link_opts) runtime.o $< -lunistring -o $@ + +.c.o: + $(CC) $(include) -fPIC -c -g -o $@ $< + +.s.o: + nasm -g -f $(format) -o $@ $< + +stdlib.s: stdlib.rkt + cat stdlib.rkt | racket -t compile-library.rkt -m > stdlib.s + +%.s: %.rkt + cat $< | racket -t compile-stdin.rkt -m > $@ + +clean: + @$(RM) *.o *.s *.run outlaw.rkt ||: + @echo "$(shell basename $(shell pwd)): cleaned!" + +outlaw2.s: outlaw.rkt outlaw.run + cat outlaw.rkt | ./outlaw.run > outlaw2.s + +self-host-test: outlaw.s outlaw2.s + cmp -s outlaw.s outlaw2.s + +test: example.run + @test "$(shell ./example.run)" = "$(shell racket example.rkt)" diff --git a/langs/outlaw/a.rkt b/langs/outlaw/a.rkt new file mode 100644 index 00000000..5e2d80fd --- /dev/null +++ b/langs/outlaw/a.rkt @@ -0,0 +1,8 @@ +#lang racket +(provide a) +(require "b.rkt") + +(define (a x) + (+ (b x) (b x))) + +(a 10) diff --git a/langs/outlaw/a86/ast.rkt b/langs/outlaw/a86/ast.rkt new file mode 100644 index 00000000..2eb42d75 --- /dev/null +++ b/langs/outlaw/a86/ast.rkt @@ -0,0 +1,80 @@ +#lang racket +(provide (all-defined-out)) + +(struct Text ()) +(struct Data ()) + +(struct Global (x)) +(struct Label (x)) +(struct Call (x)) +(struct Ret ()) +(struct Mov (dst src)) +(struct Add (dst src)) +(struct Sub (dst src)) +(struct Cmp (a1 a2)) +(struct Jmp (x)) +(struct Je (x)) +(struct Jne (x)) +(struct Jl (x)) +(struct Jle (x)) +(struct Jg (x)) +(struct Jge (x)) +(struct And (dst src)) +(struct Or (dst src)) +(struct Xor (dst src)) +(struct Sal (dst i)) +(struct Sar (dst i)) +(struct Push (a1)) +(struct Pop (a1)) +(struct Lea (dst x)) +(struct Div (den)) + +(struct Offset (r i)) +(struct Extern (x)) + +(struct Equ (x v)) +(struct Const (x)) +(struct Dd (x)) +(struct Dq (x)) +(struct Plus (e1 e2)) + +;; (U Instruction Asm) ... -> Asm +;; Convenient for sequencing instructions or groups of instructions +(define (seq . xs) + (foldr (λ (x is) + (if (list? x) + (append x is) + (cons x is))) + '() + xs)) + +(define registers + '(cl eax rax rbx rcx rdx rbp rsp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15)) + +;; Any -> Boolean +(define (register? x) + (and (memq x registers) #t)) + +;; Any -> Boolean +(define (exp? x) + (or (Offset? x) + (and (Plus? x) + (exp? (Plus-e1 x)) + (exp? (Plus-e2 x))) + (symbol? x) + (integer? x))) + +(define offset? Offset?) + +;; Any -> Boolean +(define (label? x) + (and (symbol? x) + (not (register? x)))) + +;; Any -> Boolean +(define (instruction? x) + (ormap (λ (p) (p x)) + (list Text? Data? Global? Label? Extern? Call? Ret? Mov? + Add? Sub? Cmp? Jmp? Je? Jne? Jl? Jle? Jg? Jge? + And? Or? Xor? Sal? Sar? Push? Pop? Lea? Div? Equ? + Dd? Dq?))) diff --git a/langs/outlaw/a86/callback.rkt b/langs/outlaw/a86/callback.rkt new file mode 100644 index 00000000..c7ff5af4 --- /dev/null +++ b/langs/outlaw/a86/callback.rkt @@ -0,0 +1,46 @@ +#lang racket +;; based on racket/draw/unsafe/callback +(provide guard-foreign-escape) +(require ffi/unsafe + ffi/unsafe/atomic) + +(define callback-atomic? (eq? 'chez-scheme (system-type 'vm))) + +(define-syntax-rule (guard-foreign-escape e0 e ...) + (call-guarding-foreign-escape (lambda () e0 e ...))) + +(define (call-guarding-foreign-escape thunk) + (if callback-atomic? + ((call-with-c-return + (lambda () + (with-handlers ([(lambda (x) #t) + (lambda (x) + ;; Deliver an exception re-raise after returning back + ;; from `call-with-c-return`: + (lambda () + (when (in-atomic-mode?) + (end-atomic)) ; error happened during atomic mode + ;(enable-interrupts) ; ... with interrupts disabled + (void/reference-sink call-with-c-return-box) + (raise x)))]) + (let ([vs (call-with-values thunk list)]) + ;; Deliver successful values after returning back from + ;; `call-with-c-return`: + (lambda () + (void/reference-sink call-with-c-return-box) + (apply values vs))))))) + (thunk))) + +(define call-with-c-return-box (box #f)) + +;; `call-with-c-return` looks like a foreign function, due to a cast +;; to and from a callback, so returning from `call-with-c-return` will +;; pop and C frame stacks (via longjmp internally) that were pushed +;; since `call-with-c-return` was called. +(define call-with-c-return + (and callback-atomic? + (cast (lambda (thunk) (thunk)) + (_fun #:atomic? #t + #:keep call-with-c-return-box + _racket -> _racket) + (_fun _racket -> _racket)))) \ No newline at end of file diff --git a/langs/outlaw/a86/interp.rkt b/langs/outlaw/a86/interp.rkt new file mode 100644 index 00000000..891281b2 --- /dev/null +++ b/langs/outlaw/a86/interp.rkt @@ -0,0 +1,191 @@ +#lang racket +(provide/contract + [current-objs (parameter/c (listof path-string?))] + [asm-interp (-> (listof instruction?) any/c)] + [asm-interp/io (-> (listof instruction?) string? any/c)]) + +(require "printer.rkt" "ast.rkt" "callback.rkt" + (rename-in ffi/unsafe [-> _->])) + +;; Assembly code is linked with object files in this parameter +(define current-objs + (make-parameter '())) + +;; Asm -> Value +;; Interpret (by assemblying, linking, and loading) x86-64 code +;; Assume: entry point is "entry" +(define (asm-interp a) + (asm-interp/io a #f)) + +(define fopen + (get-ffi-obj "fopen" (ffi-lib #f) (_fun _path _string/utf-8 _-> _pointer))) + +(define fflush + (get-ffi-obj "fflush" (ffi-lib #f) (_fun _pointer _-> _void))) + +(define fclose + (get-ffi-obj "fclose" (ffi-lib #f) (_fun _pointer _-> _void))) + +(define fmt (if (eq? (system-type 'os) 'macosx) 'macho64 'elf64)) + +;; Asm String -> (cons Value String) +;; Like asm-interp, but uses given string for input and returns +;; result with string output +(define (asm-interp/io a input) + (define t.s (make-temporary-file "nasm~a.s")) + (define t.o (path-replace-extension t.s #".o")) + (define t.so (path-replace-extension t.s #".so")) + (define t.in (path-replace-extension t.s #".in")) + (define t.out (path-replace-extension t.s #".out")) + + (with-output-to-file t.s + #:exists 'truncate + (λ () + (begin (current-shared? #t) + (asm-display a)))) + + (nasm t.s t.o) + (ld t.o t.so) + + (define libt.so (ffi-lib t.so)) + + (define init-label + (match (findf Label? a) + [(Label l) l] + [_ (error "no initial label found")])) + + (define entry + (get-ffi-obj init-label libt.so (_fun _pointer _-> _int64))) + + ;; install our own `error_handler` procedure to prevent `exit` calls + ;; from interpreted code bringing down the parent process. All of + ;; these hooks into the runtime need a better API and documentation, + ;; but this is a rough hack to make Extort work for now. + (when (ffi-obj-ref "error_handler" libt.so (thunk #f)) + (set-ffi-obj! "error_handler" libt.so _pointer + (function-ptr (λ () (raise 'err)) (_fun _-> _void)))) + + + (define current-heap #f) + + ;; allocate a heap + (when (ffi-obj-ref "heap" libt.so (thunk #f)) + (set! current-heap (make-c-parameter "heap" libt.so _pointer)) + + (if (ffi-obj-ref "from" libt.so (thunk #f)) + (begin + (current-heap + ; IMPROVE ME: hard-coded heap size + (malloc _int64 20000 'raw)) + (set-ffi-obj! "from" libt.so _pointer (current-heap)) + (set-ffi-obj! "to" libt.so _pointer (ptr-add (current-heap) 10000 _int64)) + (set-ffi-obj! "types" libt.so _pointer (malloc _int32 10000))) + (current-heap + ; IMPROVE ME: hard-coded heap size + (malloc _int64 10000 'raw)))) + + (delete-file t.s) + (delete-file t.o) + (delete-file t.so) + (if input + (let () + (unless (and (ffi-obj-ref "in" libt.so (thunk #f)) + (ffi-obj-ref "out" libt.so (thunk #f))) + (error "asm-interp/io: running in IO mode without IO linkage")) + + (with-output-to-file t.in #:exists 'truncate + (thunk (display input))) + + (define current-in + (make-c-parameter "in" libt.so _pointer)) + (define current-out + (make-c-parameter "out" libt.so _pointer)) + + (current-in (fopen t.in "r")) + (current-out (fopen t.out "w")) + + (define result + (begin0 + (with-handlers ((symbol? identity)) + (guard-foreign-escape + (if current-heap + (cons (current-heap) (entry (current-heap))) + (entry #f)))) + #; + (when current-heap + (free (current-heap))))) + + (fflush (current-out)) + (fclose (current-in)) + (fclose (current-out)) + + (define output (file->string t.out)) + (delete-file t.in) + (delete-file t.out) + (cons result output)) + + (begin0 + (with-handlers ((symbol? identity)) + (guard-foreign-escape + (if current-heap + (cons (current-heap) (entry (current-heap))) + (entry #f)))) + #; + (when current-heap + (free (current-heap)))))) + + +(define (string-splice xs) + (apply string-append + (add-between (map (lambda (s) (string-append "\"" s "\"")) xs) + " "))) + +;;; Utilities for calling nasm and linker with informative error messages + +(struct exn:nasm exn:fail:user ()) +(define nasm-msg + (string-append + "assembly error: make sure to use `prog` to construct an assembly program\n" + "if you did and still get this error; please share with course staff.")) + +(define (nasm:error msg) + (raise (exn:nasm (format "~a\n\n~a" nasm-msg msg) + (current-continuation-marks)))) + +;; run nasm on t.s to create t.o +(define (nasm t.s t.o) + (define err-port (open-output-string)) + (unless (parameterize ((current-error-port err-port)) + (system (format "nasm -f ~a ~a -o ~a" fmt t.s t.o))) + (nasm:error (get-output-string err-port)))) + +(struct exn:ld exn:fail:user ()) +(define (ld:error msg) + (raise (exn:ld (format "link error: ~a" msg) + (current-continuation-marks)))) + +(define (ld:undef-symbol s) + (ld:error + (string-append + (format "symbol ~a not defined in linked objects: ~a\n" s (current-objs)) + "use `current-objs` to link in object containing symbol definition."))) + +;; link together t.o with current-objs to create shared t.so +(define (ld t.o t.so) + (define err-port (open-output-string)) + (define objs (string-splice (current-objs))) + (define -z-defs-maybe + (if (eq? (system-type 'os) 'macosx) + "" + "-z defs ")) + (unless (parameterize ((current-error-port err-port)) + (system (format "gcc ~a-v -L~a -shared ~a ~a -o ~a -lunistring" + -z-defs-maybe + (getenv "LINK_DIR") + t.o objs t.so))) + (define err-msg + (get-output-string err-port)) + (match (or (regexp-match #rx"Undefined.*\"(.*)\"" err-msg) ; mac + (regexp-match #rx"undefined reference to `(.*)'" err-msg)) ; linux + [(list _ symbol) (ld:undef-symbol symbol)] + [_ (ld:error (format "unknown link error.\n\n~a" err-msg))]))) diff --git a/langs/outlaw/a86/printer.rkt b/langs/outlaw/a86/printer.rkt new file mode 100644 index 00000000..a25d443b --- /dev/null +++ b/langs/outlaw/a86/printer.rkt @@ -0,0 +1,239 @@ +#lang racket +(provide asm-string current-shared? asm-display) +(require "ast.rkt") + +(define current-shared? + (let ((x (box #f))) + (case-lambda + [() (unbox x)] + [(y) (set-box! x y)]))) + +;; Any -> Boolean +(define (reg? x) + (register? x)) + +;; Reg -> String +(define (reg->string r) + (symbol->string r)) + +;; Label -> String +(define label-symbol->string + (match (system-type) + ['macosx + (λ (s) (string-append "_" (symbol->string s)))] + [_ symbol->string])) + +;; Label -> String +;; prefix with _ for Mac +(define label-symbol->string/rel + (match (system-type) + ['macosx + (λ (s) (string-append "_" (symbol->string s)))] + [_ + (λ (s) + (if (current-shared?) + (if (memq s (unbox external-labels)) + ; hack for ELF64 shared libraries in service of + ; calling external functions in asm-interp + (string-append (symbol->string s) " wrt ..plt") + (symbol->string s)) + (symbol->string s)))])) + +;; (U Label Reg) -> String +(define (jump-target->string t) + (match t + [(? reg?) (reg->string t)] + [(Offset (? reg? r) i) + (string-append "[" (reg->string r) " + " (number->string i) "]")] + [_ (label-symbol->string/rel t)])) + +;; Arg -> String +(define (arg->string a) + (match a + [(? reg?) (reg->string a)] + [(? integer?) (number->string a)] + [(Offset (? reg? r) i) + (string-append "[" (reg->string r) " + " (number->string i) "]")] + [(Offset (? label? l) i) + (string-append "[" (label-symbol->string l) " + " (number->string i) "]")] + [(Const l) + (symbol->string l)] + [(? exp?) (exp->string a)])) + +;; Exp -> String +(define (exp->string e) + (match e + [(? integer?) (number->string e)] + [(Plus e1 e2) + (string-append "(" (exp->string e1) " + " (exp->string e2) ")")] + [_ (label-symbol->string/rel e)])) + +(define tab (make-string 8 #\space)) + +(define external-labels (box '())) + +(define (external-label-shared? x) + (and (label? x) + (current-shared?) + (eq? 'unix (system-type)) + (memq x (unbox external-labels)))) + +(define (mov->string a1 a2) + (match a2 + ;; to handle loading external data + ;; when 1) ELF, 2) building a shared object + [(Offset (? external-label-shared? l) i) + (string-append tab "mov " + (arg->string a1) ", " + "[" (label-symbol->string l) " + " (number->string i) " wrt ..gotpc]\n" + tab "mov " + (arg->string a1) ", " + "[" (arg->string a1) "]")] + ;; the usual case + [_ + (string-append tab "mov " + (arg->string a1) ", " + (arg->string a2))])) + +;; Instruction -> String +(define (instr->string i) + (match i + [(Text) (string-append tab "section .text")] + [(Data) (string-append tab "section .data align=8")] ; 8-byte aligned data + [(Ret) (string-append tab "ret")] + [(Label l) (string-append (label-symbol->string l) ":")] + [(Global x) (string-append tab "global " (label-symbol->string x))] + [(Extern l) (let ((r (string-append tab "extern " (label-symbol->string l)))) + (begin + (set-box! external-labels (cons l (unbox external-labels))) + r))] + [(Mov a1 a2) + (mov->string a1 a2)] + [(Add a1 a2) + (string-append tab "add " + (arg->string a1) ", " + (arg->string a2))] + [(Sub a1 a2) + (string-append tab "sub " + (arg->string a1) ", " + (arg->string a2))] + [(Cmp a1 a2) + (string-append tab "cmp " + (arg->string a1) ", " + (arg->string a2))] + [(Sal a1 a2) + (string-append tab "sal " + (arg->string a1) ", " + (arg->string a2))] + [(Sar a1 a2) + (string-append tab "sar " + (arg->string a1) ", " + (arg->string a2))] + [(And a1 a2) + (string-append tab "and " + (arg->string a1) ", " + (arg->string a2))] + [(Or a1 a2) + (string-append tab "or " + (arg->string a1) ", " + (arg->string a2))] + [(Xor a1 a2) + (string-append tab "xor " + (arg->string a1) ", " + (arg->string a2))] + [(Jmp l) + (string-append tab "jmp " + (jump-target->string l))] + [(Je l) + (string-append tab "je " + (jump-target->string l))] + [(Jne l) + (string-append tab "jne " + (jump-target->string l))] + [(Jl l) + (string-append tab "jl " + (jump-target->string l))] + [(Jle l) + (string-append tab "jle " + (jump-target->string l))] + [(Jg l) + (string-append tab "jg " + (jump-target->string l))] + [(Jge l) + (string-append tab "jge " + (jump-target->string l))] + [(Call l) + (string-append tab "call " + (jump-target->string l))] + [(Push a) + (string-append tab "push " + (arg->string a))] + [(Pop r) + (string-append tab "pop " + (reg->string r))] + [(Lea d (? offset? x)) + (string-append tab "lea " + (arg->string d) ", " + (arg->string x))] + [(Lea d x) + (string-append tab "lea " + (arg->string d) ", [rel " + (exp->string x) "]")] + [(Div r) + (string-append tab "div " + (arg->string r))] + [(Equ x c) + (string-append tab + (symbol->string x) + " equ " + (number->string c))] + + [(Dd x) + (string-append tab "dd " (arg->string x))] + [(Dq x) + (string-append tab "dq " (arg->string x))])) + +(define (instrs->string a) + (match a + ['() ""] + [(cons i a) + (string-append (instr->string i) "\n" (instrs->string a))])) + +;; Asm -> String +(define (asm-string a) + (begin + (set-box! external-labels '()) + ;; entry point will be first label + (match (findf Label? a) + [(Label g) + (string-append + tab "global " (label-symbol->string g) "\n" + tab "default rel\n" + tab "section .text\n" + (instrs->string a))] + [_ + (instrs->string a)]))) + +(define (asm-display a) + (begin + (set-box! external-labels '()) + ;; entry point will be first label + (match (findf Label? a) + [(Label g) + (begin + (write-string + (string-append + tab "global " (label-symbol->string g) "\n" + tab "default rel\n" + tab "section .text\n")) + (asm-display-instrs a))] + [_ + (asm-display-instrs a)]))) + +(define (asm-display-instrs a) + (match a + ['() (void)] + [(cons i a) + (begin (write-string (instr->string i)) + (write-string "\n") + (asm-display-instrs a))])) diff --git a/langs/outlaw/ast.rkt b/langs/outlaw/ast.rkt new file mode 100644 index 00000000..25e7495b --- /dev/null +++ b/langs/outlaw/ast.rkt @@ -0,0 +1,95 @@ +#lang racket +(provide (all-defined-out)) + +;; type Prog = (Prog (Listof Defn)) +(struct Prog (ds)) + +;; type Lib = (Lib (Listof Id) (Listof Defn)) +(struct Lib (ids ds)) + +;; type Defn = (Defn Id Lambda) +(struct Defn (f l)) + +;; type Expr = (Eof) +;; | (Quote Datum) +;; | (Prim Op (Listof Expr)) +;; | (If Expr Expr Expr) +;; | (Begin [Listof Expr]) +;; | (Let (Listof Id) (Listof Expr) Expr) +;; | (Var Id) +;; | (Match Expr (Listof Pat) (Listof Expr)) +;; | (App Expr (Listof Expr)) +;; | Lambda +;; | (Apply Expr (Listof Expr)) +;; type Lambda = (Lam Id (Listof Id) Expr) +;; | (LamRest Id (Listof Id) Id Expr) +;; | (LamCase Id (Listof LamCaseClause)) +;; type LamCaseClause = +;; | (Lam Id (Listof Id) Expr) +;; | (LamRest Id (Listof Id) Expr) +;; type Datum = Integer +;; | Char +;; | Boolean +;; | String +;; | Symbol +;; | (Boxof Datum) +;; | (Listof Datum) +;; | (Vectorof Datum) +;; type Id = Symbol +;; type Op = Op0 | Op1 | Op2 | Op3 +;; type Op0 = 'read-byte +;; type Op1 = 'add1 | 'sub1 | 'zero? +;; | 'char? | 'integer->char | 'char->integer +;; | 'write-byte | 'eof-object? +;; | 'box | 'car | 'cdr | 'unbox +;; | 'empty? | 'cons? | 'box? +;; | 'vector? | 'vector-length +;; | 'string? | 'string-length +;; | 'symbol? | 'symbol->string +;; | 'string->symbol | 'string->uninterned-symbol +;; type Op2 = '+ | '- | '< | '= +;; | 'cons +;; | 'make-vector | 'vector-ref +;; | 'make-string | 'string-ref +;; | 'struct? +;; type Op3 = 'vector-set! | 'struct-ref +;; type OpN = 'make-struct +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; | (PSymb Symbol) +;; | (PStr String) +;; | (PStruct Id (Listof Pat)) +;; | (PPred Expr) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() + +(struct Eof ()) +(struct Prim (p es)) +(struct If (e1 e2 e3)) +(struct Begin (es)) +(struct Let (xs es e)) +(struct Var (x)) +(struct App (e es)) +(struct Lam (f xs e)) +(struct LamRest (f xs x e)) +(struct LamCase (f cs)) +(struct Apply (e es el)) +(struct Quote (d)) +(struct Match (e ps es)) + +(struct PVar (x)) +(struct PWild ()) +(struct PLit (x)) +(struct PBox (p)) +(struct PCons (p1 p2)) +(struct PAnd (p1 p2)) +(struct PSymb (s)) +(struct PStr (s)) +(struct PStruct (n ps)) +(struct PPred (e)) diff --git a/langs/outlaw/b.rkt b/langs/outlaw/b.rkt new file mode 100644 index 00000000..7a621d35 --- /dev/null +++ b/langs/outlaw/b.rkt @@ -0,0 +1,6 @@ +#lang racket +(provide b) +(require "c.rkt") + +(define (b x) + (add1 (c x))) diff --git a/langs/outlaw/build-runtime.rkt b/langs/outlaw/build-runtime.rkt new file mode 100644 index 00000000..1cc4da53 --- /dev/null +++ b/langs/outlaw/build-runtime.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide runtime-path) + +(require racket/runtime-path) +(define-runtime-path here ".") + +(system (string-append "make -C '" + (path->string (normalize-path here)) + "' runtime.o")) + +(define runtime-path + (normalize-path (build-path here "runtime.o"))) \ No newline at end of file diff --git a/langs/outlaw/c.rkt b/langs/outlaw/c.rkt new file mode 100644 index 00000000..041b7c98 --- /dev/null +++ b/langs/outlaw/c.rkt @@ -0,0 +1,4 @@ +#lang racket +(provide c) +(define (c x) + (+ x 5)) diff --git a/langs/outlaw/char.c b/langs/outlaw/char.c new file mode 100644 index 00000000..d11f16e0 --- /dev/null +++ b/langs/outlaw/char.c @@ -0,0 +1,57 @@ +#include +#include +#include "types.h" + +void print_codepoint(int64_t); + +void print_char (int64_t v) { + int64_t codepoint = v >> char_shift; + printf("#\\"); + switch (codepoint) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(v); + } +} + +void print_codepoint(int64_t v) { + int64_t codepoint = v >> char_shift; + // Print using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (codepoint < 128) { + printf("%c", (char) codepoint); + } else if (codepoint < 2048) { + printf("%c%c", + (char)(codepoint >> 6) | 192, + ((char)codepoint & 63) | 128); + } else if (codepoint < 65536) { + printf("%c%c%c", + (char)(codepoint >> 12) | 224, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } else { + printf("%c%c%c%c", + (char)(codepoint >> 18) | 240, + ((char)(codepoint >> 12) & 63) | 128, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } +} + diff --git a/langs/outlaw/combine.rkt b/langs/outlaw/combine.rkt new file mode 100644 index 00000000..6deb4855 --- /dev/null +++ b/langs/outlaw/combine.rkt @@ -0,0 +1,83 @@ +#lang racket +(provide main) + +;; This is a utility for smashing together racket files into a single +;; monolithic program. + +;; For example: + +;; racket -t combine.rkt -m compile-stdin.rkt stdlib.rkt > outlaw.rkt + +;; creates a file with all the source code needed for the Outlaw +;; compiler. + +;; It will append the source code of all the files fn requires +;; and comment out their requires and provides, excluding +;; any files given in the ignores list. + +;; String String ... -> Void +;; Combine all the files fn depends upon, print to stdout +;; as one monolithic program, excluding ignores. +(define (main fn . ignores) + (printf "#lang racket\n") + (let ((fs (remove* ignores (all-files fn)))) + (for-each (lambda (f) + (displayln (string-append ";; " f))) + fs) + (print-files fs))) + +;; Port -> [Listof S-Expr] +;; read all s-expression until eof +(define (read-all p) + (let ((r (read p))) + (if (eof-object? r) + '() + (cons r (read-all p))))) + +(define (print-files fs) + (match fs + ['() (void)] + [(cons f fs) + (displayln (make-string 12 #\;)) + (displayln (string-append ";; " f "\n")) + (print-file f) + (print-files fs)])) + +(define (print-file f) + (let ((p (open-input-file f))) + (read-line p) ; ignore #lang + (define (loop) + (let ((l (read-line p))) + (if (eof-object? l) + (begin (newline) + (close-input-port p)) + (begin + (when (regexp-match? #rx"^\\(require|^\\(provide" l) + (display "#;")) + (displayln l) + (loop))))) + (loop))) + +(define (all-files fn) + (remove-duplicates (all-files* fn '()))) + +(define (all-files* fn seen) + (if (member fn seen) + '() + (let ((p (open-input-file fn))) + (read-line p) ; ignore #lang + (begin0 + (let ((rs (get-requires (read-all p)))) + (append (append-map (λ (f) (all-files* f (cons fn seen))) rs) + (list fn))) + (close-input-port p))))) + +(define (get-requires s) + (match s + ['() '()] + [(cons (cons 'require rs) s) + (append (filter string? rs) (get-requires s))] + [(cons _ s) + (get-requires s)])) + + diff --git a/langs/outlaw/compile-datum.rkt b/langs/outlaw/compile-datum.rkt new file mode 100644 index 00000000..a47bf28b --- /dev/null +++ b/langs/outlaw/compile-datum.rkt @@ -0,0 +1,87 @@ +#lang racket +(provide compile-datum) +(require "stdlib.rkt" + "types.rkt" + "utils.rkt" + "a86/ast.rkt" + "registers.rkt") + +;; Datum -> Asm +(define (compile-datum d) + (cond + [(string? d) (seq (Lea rax (load-string d)))] + [(symbol? d) (seq (Lea rax (load-symbol d)))] + [(compound? d) (compile-compound-datum d)] + [else (compile-atom d)])) + +(define (load-symbol s) + (Plus (symbol->data-label s) type-symb)) + +(define (load-string s) + (Plus (symbol->data-label (string->symbol s)) type-str)) + +;; Value -> Asm +(define (compile-atom v) + (seq (Mov rax (imm->bits v)))) + +;; Datum -> Boolean +(define (compound? d) + (or (box? d) + (cons? d) + (vector? d))) + +;; Datum -> Asm +(define (compile-compound-datum d) + (match (compile-quoted d) + [(cons l is) + (seq (Data) + is + (Text) + (Lea rax l))])) + +;; Datum -> (cons AsmExpr Asm) +(define (compile-quoted c) + (cond + [(vector? c) (compile-datum-vector (vector->list c))] + [(box? c) (compile-datum-box (unbox c))] + [(cons? c) (compile-datum-cons (car c) (cdr c))] + [(symbol? c) (cons (load-symbol c) '())] + [(string? c) (cons (load-string c) '())] + [else (cons (imm->bits c) '())])) + +;; Datum -> (cons AsmExpr Asm) +(define (compile-datum-box c) + (match (compile-quoted c) + [(cons l1 is1) + (let ((l (gensym 'box))) + (cons (Plus l type-box) + (seq (Label l) + (Dq l1) + is1)))])) + +;; Datum Datum -> (cons AsmExpr Asm) +(define (compile-datum-cons c1 c2) + (match (compile-quoted c1) + [(cons l1 is1) + (match (compile-quoted c2) + [(cons l2 is2) + (let ((l (gensym 'cons))) + (cons (Plus l type-cons) + (seq (Label l) + (Dq l2) + (Dq l1) + is1 + is2)))])])) + +;; [Listof Datum] -> (cons AsmExpr Asm) +(define (compile-datum-vector ds) + (match ds + ['() (cons type-vect '())] + [_ + (let ((l (gensym 'vector)) + (cds (map compile-quoted ds))) + (cons (Plus l type-vect) + (seq (Label l) + (Dq (length ds)) + (map (λ (cd) (Dq (car cd))) cds) + (append-map cdr cds))))])) diff --git a/langs/outlaw/compile-define.rkt b/langs/outlaw/compile-define.rkt new file mode 100644 index 00000000..263a4e4b --- /dev/null +++ b/langs/outlaw/compile-define.rkt @@ -0,0 +1,113 @@ +#lang racket +(provide (all-defined-out)) +(require "stdlib.rkt" + "ast.rkt" + "types.rkt" + "fv.rkt" + "utils.rkt" + "compile-expr.rkt" + "a86/ast.rkt" + "registers.rkt") + +;; [Listof Defn] -> [Listof Id] +(define (define-ids ds) + (match ds + ['() '()] + [(cons (Defn f l) ds) + (cons f (define-ids ds))])) + +;; [Listof Defn] GEnv -> Asm +(define (compile-defines ds g) + (match ds + ['() (seq)] + [(cons d ds) + (seq (compile-define d g) + (compile-defines ds g))])) + +;; Defn GEnv -> Asm +(define (compile-define d g) + (match d + [(Defn f e) + (seq ; (%%% (symbol->string f)) + (Data) + (Label (symbol->label f)) + (Dq 0) + (Text) + (compile-e e '() g #f) + (Mov (Offset (symbol->label f) 0) rax))])) + +;; [Listof Lam] GEnv -> Asm +(define (compile-lambda-defines ls g) + (match ls + ['() (seq)] + [(cons l ls) + (seq (compile-lambda-define l g) + (compile-lambda-defines ls g))])) + +;; Lambda GEnv -> Asm +(define (compile-lambda-define l g) + (let ((fvs (fv- l g))) + (match l + [(Lam f xs e) + (let ((env (append (reverse fvs) (reverse xs) (list #f)))) + (seq (Label (symbol->label f)) + (Cmp r15 (length xs)) + (Jne 'raise_error_align) + (Mov rax (Offset rsp (* 8 (length xs)))) + (Xor rax type-proc) + (copy-env-to-stack fvs 8) + (compile-e e env g #t) + (Add rsp (* 8 (length env))) ; pop env + (Ret)))] + [(LamRest f xs x e) + (let ((env (append (reverse fvs) (cons x (reverse xs)) (list #f)))) + (seq (Label (symbol->label f)) + (Cmp r15 (length xs)) + (Jl 'raise_error_align) + + (Sub r15 (length xs)) + (Mov rax val-empty) + (let ((loop (gensym)) + (done (gensym))) + (seq (Label loop) + (Cmp r15 0) + (Je done) + (Mov (Offset rbx 0) rax) + (Pop rax) + (Mov (Offset rbx 8) rax) + (Mov rax rbx) + (Or rax type-cons) + (Add rbx 16) + (Sub r15 1) + (Jmp loop) + (Label done))) + (Push rax) + + (Mov rax (Offset rsp (* 8 (add1 (length xs))))) + (Xor rax type-proc) + (copy-env-to-stack fvs 8) + (compile-e e env g #t) + (Add rsp (* 8 (length env))) ; pop env + (Ret)))] + [(LamCase f cs) + (seq ; (%%% "lamcase code") + (Label (symbol->label f)) + (compile-fun-case-select cs) + (Jmp 'raise_error_align) + (compile-fun-case-clauses cs g))]))) + +(define (compile-fun-case-clauses cs g) + (append-map (lambda (c) (compile-lambda-define c g)) cs)) + +(define (compile-fun-case-select cs) + (append-map compile-fun-case-selector cs)) + +(define (compile-fun-case-selector c) + (match c + [(Lam f xs e) + (seq (Cmp r15 (length xs)) + (Je (symbol->label f)))] + [(LamRest f xs x e) + (seq (Mov r9 (sub1 (length xs))) + (Cmp r9 r15) + (Jl (symbol->label f)))])) diff --git a/langs/outlaw/compile-expr.rkt b/langs/outlaw/compile-expr.rkt new file mode 100644 index 00000000..1e0b909f --- /dev/null +++ b/langs/outlaw/compile-expr.rkt @@ -0,0 +1,407 @@ +#lang racket +(provide (all-defined-out)) +(require "stdlib.rkt" + "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-ops.rkt" + "compile-datum.rkt" + "a86/ast.rkt" + "registers.rkt") + +;; Expr CEnv GEnv Bool -> Asm +(define (compile-e e c g t?) + (match e + [(Quote d) (compile-datum d)] + [(Eof) (seq (Mov rax (imm->bits eof)))] + [(Var x) (compile-variable x c g)] + [(Prim p es) (compile-prim p es c g)] + [(If e1 e2 e3) (compile-if e1 e2 e3 c g t?)] + [(Begin es) (compile-begin es c g t?)] + [(Let xs es e) (compile-let xs es e c g t?)] + [(App e es) (compile-app e es c g t?)] + [(Apply e es el) (compile-apply e es el c g t?)] + [(Lam _ _ _) (compile-lam e c g)] + [(LamRest _ _ _ _) (compile-lam e c g)] + [(LamCase _ _) (compile-lam e c g)] + [(Match e ps es) (compile-match e ps es c g t?)])) + +;; Id CEnv GEnv -> Asm +(define (compile-variable x c g) + (match (lookup x c) + [#f (if (memq x g) + (seq (Mov rax (Offset (symbol->label x) 0))) + (error "unbound variable" x))] + [i (seq (Mov rax (Offset rsp i)))])) + +;; Op (Listof Expr) CEnv GEnv -> Asm +(define (compile-prim p es c g) + (seq (compile-es* es c g) + (match p + ['make-struct (compile-make-struct (length es))] + [_ (compile-op p)]))) + +;; Expr Expr Expr CEnv GEnv Bool -> Asm +(define (compile-if e1 e2 e3 c g t?) + (let ((l1 (gensym 'if)) + (l2 (gensym 'if))) + (seq (compile-e e1 c g #f) + (Cmp rax val-false) + (Je l1) + (compile-e e2 c g t?) + (Jmp l2) + (Label l1) + (compile-e e3 c g t?) + (Label l2)))) + +;; [Listof Expr] CEnv GEnv Bool -> Asm +(define (compile-begin es c g t?) + (match es + ['() '()] + [(cons e '()) (compile-e e c g t?)] + [(cons e es) + (seq (compile-e e c g #f) + (compile-begin es c g t?))])) + +;; [Listof Id] [Listof Expr] Expr CEnv GEnv Bool -> Asm +(define (compile-let xs es e c g t?) + (seq (compile-es es c g) + (compile-e e (append (reverse xs) c) g t?) + (Add rsp (* 8 (length xs))))) + +;; Id [Listof Expr] CEnv GEnv Bool -> Asm +(define (compile-app f es c g t?) + (compile-app-nontail f es c g) + #; + (if t? + (compile-app-tail f es c) + (compile-app-nontail f es c))) + +;; Expr [Listof Expr] CEnv GEnv -> Asm +(define (compile-app-tail e es c g) + (seq (compile-es (cons e es) c g) + (move-args (add1 (length es)) (length c)) + (Add rsp (* 8 (length c))) + (Mov rax (Offset rsp (* 8 (length es)))) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) + (Jmp rax))) + +;; Integer Integer -> Asm +(define (move-args i off) + (cond [(zero? off) (seq)] + [(zero? i) (seq)] + [else + (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) + (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) + (move-args (sub1 i) off))])) + +;; Expr [Listof Expr] CEnv GEnv -> Asm +;; The return address is placed above the arguments, so callee pops +;; arguments and return address is next frame +(define (compile-app-nontail e es c g) + (let ((r (gensym 'ret)) + (i (* 8 (length es)))) + (seq (Lea rax r) + (Push rax) + (compile-es (cons e es) (cons #f c) g) + (Mov rax (Offset rsp i)) + (assert-proc rax) + (Xor rax type-proc) + (Mov r15 (length es)) + (Mov rax (Offset rax 0)) ; fetch the code label + (Jmp rax) + (Label r)))) + +;; Expr [Listof Expr] Expr CEnv GEnv Boolean -> Asm +(define (compile-apply e es el c g t?) + ;; FIXME: should have tail recursive version too + (let ((r (gensym 'ret))) + (seq (Lea rax r) + (Push rax) + (compile-es (cons e es) (cons #f c) g) + (compile-e el (append (make-list (add1 (length es)) #f) (cons #f c)) g #f) + + (Mov r10 (Offset rsp (* 8 (length es)))) + + (Mov r15 (length es)) + (let ((loop (gensym)) + (done (gensym))) + (seq (Label loop) + (Cmp rax val-empty) + (Je done) + (assert-cons rax) + (Add r15 1) + (Xor rax type-cons) + (Mov r9 (Offset rax 8)) + (Push r9) + (Mov rax (Offset rax 0)) + (Jmp loop) + (Label done))) + + + (assert-proc r10) + (Xor r10 type-proc) + (Mov r10 (Offset r10 0)) + + (Jmp r10) + (Label r)))) + +;; Lambda CEnv GEnv -> Asm +(define (compile-lam l c g) + (let ((fvs (fv- l g))) + (seq (Lea rax (symbol->label (lambda-name l))) + (Mov (Offset rbx 0) rax) + (free-vars-to-heap fvs c 8) + (Mov rax rbx) ; return value + (Or rax type-proc) + (Add rbx (* 8 (add1 (length fvs))))))) + +;; Lambda -> Id +(define (lambda-name l) + (match l + [(Lam f _ _) f] + [(LamRest f _ _ _) f] + [(LamCase f _) f])) + +;; [Listof Id] CEnv Int -> Asm +;; Copy the values of given free variables into the heap at given offset +(define (free-vars-to-heap fvs c off) + (match fvs + ['() (seq)] + [(cons x fvs) + (match (lookup x c) + [#f (error "unbound variable" x)] + [i + (seq (Mov r8 (Offset rsp i)) + (Mov (Offset rbx off) r8) + (free-vars-to-heap fvs c (+ off 8)))])])) + +;; [Listof Id] Int -> Asm +;; Copy the closure environment at given offset to stack +(define (copy-env-to-stack fvs off) + (match fvs + ['() (seq)] + [(cons _ fvs) + (seq (Mov r9 (Offset rax off)) + (Push r9) + (copy-env-to-stack fvs (+ 8 off)))])) + +;; [Listof Expr] CEnv GEnv -> Asm +(define (compile-es es c g) + (match es + ['() '()] + [(cons e es) + (seq (compile-e e c g #f) + (Push rax) + (compile-es es (cons #f c) g))])) + +;; [Listof Expr] CEnv GEnv -> Asm +;; Like compile-es, but leave last subexpression in rax (if exists) +(define (compile-es* es c g) + (match es + ['() '()] + [(cons e '()) + (compile-e e c g #f)] + [(cons e es) + (seq (compile-e e c g #f) + (Push rax) + (compile-es* es (cons #f c) g))])) + +;; Expr [Listof Pat] [Listof Expr] CEnv GEnv Bool -> Asm +(define (compile-match e ps es c g t?) + (let ((done (gensym))) + (seq (compile-e e c g #f) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) g done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv GEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c g done t?) + (match (cons ps es) + [(cons '() '()) (seq)] + [(cons (cons p ps) (cons e es)) + (seq (compile-match-clause p e c g done t?) + (compile-match-clauses ps es c g done t?))])) + +;; Pat Expr CEnv GEnv Symbol Bool -> Asm +(define (compile-match-clause p e c g done t?) + (let ((next (gensym))) + (match (compile-pattern p c g '() next) + [(list i f cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) g t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + f + (Label next))]))) + +;; Pat CEnv GEnv CEnv Symbol -> (list Asm Asm CEnv) +(define (compile-pattern p c g cm next) + (match p + [(PWild) + (list (seq) (seq) cm)] + [(PVar x) + (list (seq (Push rax)) + (seq) + (cons x cm))] + [(PStr s) + (let ((fail (gensym))) + (list (seq (Lea rdi (symbol->data-label (string->symbol s))) + (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-str) + (Jne fail) + (Xor rax type-str) + (Mov rsi rax) + (pad-stack) + (Call 'symb_cmp) + (unpad-stack) + (Cmp rax 0) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PSymb s) + (let ((fail (gensym))) + (list (seq (Lea r9 (Plus (symbol->data-label s) type-symb)) + (Cmp rax r9) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PLit l) + (let ((fail (gensym))) + (list (seq (Cmp rax (imm->bits l)) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 c g (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 c g cm1 next) + [(list i2 f2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 f2) + cm2)])])] + [(PBox p) + (match (compile-pattern p c g cm next) + [(list i1 f1 cm1) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Jne fail) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + (seq f1 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 c g (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 c g cm1 next) + [(list i2 f2 cm2) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Jne fail) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 + f2 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm2))])])] + [(PStruct n ps) + (match (compile-struct-patterns ps c g (cons #f cm) next 1 (add1 (length cm))) + [(list i f cm1) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-struct) + (Jne fail) + (Xor rax type-struct) + (Mov r8 (Offset rax 0)) + (Lea r9 (Plus (symbol->data-label n) type-symb)) + (Cmp r8 r9) + (Jne fail) + (Push rax) + i) + (seq f + (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm1))])] + + [(PPred e) + (let ((fail (gensym 'fail))) + (list + (let ((r (gensym 'ret))) + (seq (Lea r15 r) + (Push r15) ; rp + (Push rax) ; arg (saved for the moment) + (compile-e e (list* #f #f (append cm c)) g #f) + (Pop r15) ;; HERE + (Push rax) + (Push r15) + + (assert-proc rax) + (Xor rax type-proc) + (Mov r15 1) + (Mov rax (Offset rax 0)) ; fetch code label + (Jmp rax) + (Label r) + (Cmp rax val-false) + (Je fail))) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))])) + + + + +;; [Listof Pat] CEnv Symbol Nat Nat -> (list Asm Asm CEnv) +(define (compile-struct-patterns ps c g cm next i cm0-len) + (match ps + ['() (list '() '() cm)] + [(cons p ps) + (match (compile-pattern p c g cm next) + [(list i1 f1 cm1) + (match (compile-struct-patterns ps c g cm1 next (add1 i) cm0-len) + [(list is fs cmn) + (list + (seq (Mov rax (Offset rax (* 8 i))) + i1 + (Mov rax (Offset rsp (* 8 (- (length cm1) cm0-len)))) + is) + (seq f1 fs) + cmn)])])])) diff --git a/langs/outlaw/compile-library.rkt b/langs/outlaw/compile-library.rkt new file mode 100644 index 00000000..f630bc51 --- /dev/null +++ b/langs/outlaw/compile-library.rkt @@ -0,0 +1,11 @@ +#lang racket +(require "parse.rkt" "compile.rkt" "read-all.rkt" "a86/printer.rkt") +(provide main) + +;; Compile contents of stdin +;; emit asm code on stdout +(define (main) + (begin + (read-line) ; ignore #lang racket line + (current-shared? #t) + (asm-display (compile-library (parse-library (read-all)))))) diff --git a/langs/outlaw/compile-literals.rkt b/langs/outlaw/compile-literals.rkt new file mode 100644 index 00000000..d5e219c2 --- /dev/null +++ b/langs/outlaw/compile-literals.rkt @@ -0,0 +1,124 @@ +#lang racket +(provide compile-literals init-symbol-table literals) +(require "ast.rkt" + "utils.rkt" + "a86/ast.rkt" + "registers.rkt") + +;; Prog -> Asm +(define (compile-literals p) + (append-map compile-literal (literals p))) + +;; Symbol -> Asm +(define (compile-literal s) + (let ((str (symbol->string s))) + (seq (Label (symbol->data-label s)) + (Dq (string-length str)) + (compile-string-chars (string->list str)) + (if (odd? (string-length str)) + (seq (Dd 0)) + (seq))))) + +;; Prog -> Asm +;; Call intern_symbol on every symbol in the program +(define (init-symbol-table p) + (match (symbols p) + ['() (seq)] + [ss (seq (Sub 'rsp 8) + (append-map init-symbol ss) + (Add 'rsp 8))])) + +;; Symbol -> Asm +(define (init-symbol s) + (seq (Lea rdi (symbol->data-label s)) + (Call 'intern_symbol))) + +;; Prog -> [Listof Symbol] +(define (literals p) + (remove-duplicates (map to-symbol (literals* p)) eq?)) + +;; Prog -> [Listof Symbol] +(define (symbols p) + (remove-duplicates (filter symbol? (literals* p)) eq?)) + +;; (U String Symbol) -> Symbol +(define (to-symbol s) + (if (string? s) + (string->symbol s) + s)) + +;; Prog -> [Listof (U Symbol String)] +(define (literals* p) + (match p + [(Prog ds) + (append-map literals-d ds)])) + +;; Defn -> [Listof (U Symbol String)] +(define (literals-d d) + (match d + [(Defn f l) + (literals-e l)])) + +;; Expr -> [Listof (U Symbol String)] +(define (literals-e e) + (match e + [(Quote d) (literals-datum d)] + [(Prim p es) + (append-map literals-e es)] + [(If e1 e2 e3) + (append (literals-e e1) (literals-e e2) (literals-e e3))] + [(Begin es) + (append-map literals-e es)] + [(Let xs es e) + (append (append-map literals-e es) (literals-e e))] + [(App e1 es) + (append (literals-e e1) (append-map literals-e es))] + [(Lam f xs e) + (literals-e e)] + [(LamRest f xs x e1) + (literals-e e1)] + [(LamCase f cs) + (append-map literals-e cs)] + [(Match e ps es) + (append (literals-e e) (append-map literals-match-clause ps es))] + [(Apply e es el) + (append (literals-e e) (append-map literals-e es) (literals-e el))] + [_ '()])) + +;; Pat Expr -> [Listof Symbol] +(define (literals-match-clause p e) + (append (literals-pat p) (literals-e e))) + +;; Pat -> [Listof (U Symbol String)] +(define (literals-pat p) + (match p + [(PSymb s) (list s)] + [(PStr s) (list s)] + [(PBox p) (literals-pat p)] + [(PCons p1 p2) (append (literals-pat p1) (literals-pat p2))] + [(PAnd p1 p2) (append (literals-pat p1) (literals-pat p2))] + [(PPred e) (literals-e e)] + [(PStruct t ps) (append-map literals-pat ps)] + [_ '()])) + +;; Datum -> [Listof (U Symbol String)] +(define (literals-datum d) + (cond + [(string? d) (list d)] + [(symbol? d) (list d)] + [(cons? d) + (append (literals-datum (car d)) + (literals-datum (cdr d)))] + [(box? d) + (literals-datum (unbox d))] + [(vector? d) + (append-map literals-datum (vector->list d))] + [else '()])) + +;; [Listof Char] -> Asm +(define (compile-string-chars cs) + (match cs + ['() (seq)] + [(cons c cs) + (seq (Dd (char->integer c)) + (compile-string-chars cs))])) diff --git a/langs/outlaw/compile-ops.rkt b/langs/outlaw/compile-ops.rkt new file mode 100644 index 00000000..b72f3b08 --- /dev/null +++ b/langs/outlaw/compile-ops.rkt @@ -0,0 +1,581 @@ +#lang racket +(provide (all-defined-out)) +(require "stdlib.rkt" "ast.rkt" "registers.rkt" "types.rkt" "utils.rkt" "a86/ast.rkt") + +;; Op -> Asm +(define (compile-op p) + (match p + ;; Op0 + ['void (seq (Mov rax val-void))] + ['read-byte (seq (pad-stack) + (Call 'read_byte) + (unpad-stack))] + ['current-input-port ; hack, doesn't actually exist + (seq (Mov rax val-void))] + ['system-type + (seq (pad-stack) + (Call 'system_type) + (Sal rax int-shift) + (unpad-stack))] + + ;; Op1 + ['add1 + (seq (assert-integer rax) + (Add rax (imm->bits 1)))] + ['sub1 + (seq (assert-integer rax) + (Sub rax (imm->bits 1)))] + ['zero? + (seq (assert-integer rax) + (eq-imm 0))] + ['char? + (type-pred mask-char type-char)] + ['char->integer + (seq (assert-char rax) + (Sar rax char-shift) + (Sal rax int-shift))] + ['integer->char + (seq (assert-codepoint rax) + (Sar rax int-shift) + (Sal rax char-shift) + (Xor rax type-char))] + ['eof-object? (eq-imm eof)] + ['write-byte + (seq (assert-byte rax) + (pad-stack) + (Mov rdi rax) + (Call 'write_byte) + (unpad-stack) + (Mov rax val-void))] + ['box + (seq (Mov (Offset rbx 0) rax) + (Mov rax rbx) + (Or rax type-box) + (Add rbx 8))] + ['unbox + (seq (assert-box rax) + (Xor rax type-box) + (Mov rax (Offset rax 0)))] + ['car + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 8)))] + ['cdr + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 0)))] + ['empty? (eq-imm '())] + ['box? + (type-pred ptr-mask type-box)] + ['cons? + (type-pred ptr-mask type-cons)] + ['vector? + (type-pred ptr-mask type-vect)] + ['string? + (type-pred ptr-mask type-str)] + ['symbol? + (type-pred ptr-mask type-symb)] + ['vector-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-vector rax) + (Xor rax type-vect) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))] + ['string-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-string rax) + (Xor rax type-str) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))] + ['string->symbol + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + (pad-stack) + (Call 'intern_symbol) + (unpad-stack) + (Or rax type-symb))] + ['symbol->string + (seq (assert-symbol rax) + (Xor rax type-symb) + char-array-copy + (Or rax type-str))] + ['string->uninterned-symbol + (seq (assert-string rax) + (Xor rax type-str) + char-array-copy + (Or rax type-symb))] + ['open-input-file + (seq (assert-string rax) + (Mov rdi rax) + (pad-stack) + (Call 'open_input_file) + (unpad-stack))] + ['read-byte-port + (seq (Mov rdi rax) ; assert port + (pad-stack) + (Call 'read_byte_port) + (unpad-stack))] + ['error + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + (pad-stack) + (Call 'raise_error))] + ['integer? + (type-pred mask-int type-int)] + ['procedure? + (type-pred ptr-mask type-proc)] + ['eq-hash-code + (seq (Sal rax int-shift))] + ['char-alphabetic? + (seq (assert-char rax) + (Sar rax char-shift) + (Mov rdi rax) + (pad-stack) + (Call 'is_char_alphabetic) + (unpad-stack))] + ['char-whitespace? + (seq (assert-char rax) + (Sar rax char-shift) + (Mov rdi rax) + (pad-stack) + (Call 'is_char_whitespace) + (unpad-stack))] + ['write-char + (seq (assert-char rax) + (Mov rdi rax) + (pad-stack) + (Call 'print_codepoint_out) + (unpad-stack))] + + ;; Op2 + ['+ + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Add rax r8))] + ['- + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Sub r8 rax) + (Mov rax r8))] + ['< + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax val-true) + (let ((true (gensym))) + (seq (Jl true) + (Mov rax val-false) + (Label true))))] + ['= + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax val-true) + (let ((true (gensym))) + (seq (Je true) + (Mov rax val-false) + (Label true))))] + ['cons + (seq (Mov (Offset rbx 0) rax) + (Pop rax) + (Mov (Offset rbx 8) rax) + (Mov rax rbx) + (Or rax type-cons) + (Add rbx 16))] + ['eq? + (seq (Pop r8) + (eq r8 rax))] + ['make-vector + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (Cmp r8 0) ; special case empty vector + (Je empty) + + (Mov r9 rbx) + (Or r9 type-vect) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Label loop) + (Mov (Offset rbx 0) rax) + (Add rbx 8) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-vect) + (Label done)))] + + ['vector-ref + (seq (Pop r8) + (assert-vector r8) + (assert-integer rax) + (Cmp r8 type-vect) + (Je 'raise_error_align) ; special case for empty vector + (Cmp rax 0) + (Jl 'raise_error_align) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'raise_error_align) + (Sal rax 3) + (Add r8 rax) + (Mov rax (Offset r8 8)))] + + ['make-string + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (assert-char rax) + (Cmp r8 0) ; special case empty string + (Je empty) + + (Mov r9 rbx) + (Or r9 type-str) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Sar rax char-shift) + + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd + + (Label loop) + (Mov (Offset rbx 0) eax) + (Add rbx 4) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-str) + (Label done)))] + + ['string-ref + (seq (Pop r8) + (assert-string r8) + (assert-integer rax) + (Cmp r8 type-str) + (Je 'raise_error_align) ; special case for empty string + (Cmp rax 0) + (Jl 'raise_error_align) + (Xor r8 type-str) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'raise_error_align) + (Sal rax 2) + (Add r8 rax) + (Mov 'eax (Offset r8 8)) + (Sal rax char-shift) + (Or rax type-char))] + + ['string-append + (seq (Pop r8) + (assert-string r8) + (assert-string rax) + (Xor r8 type-str) + (Xor rax type-str) + (Mov 'rdi r8) + (Mov 'rsi rax) + (Mov rdx rbx) + (pad-stack) + (Call 'string_append) + (unpad-stack) + (Mov r8 rax) + (Cmp r8 0) + (let ((empty (gensym)) + (done (gensym))) + (seq (Je empty) + (Sal r8 2) + (Mov rax rbx) + (Or rax type-str) + (Add rbx r8) + (Jmp done) + (Label empty) + (Mov rax type-str) + (Label done))))] + + ['struct? + (let ((f (gensym)) + (t (gensym))) + (seq (Pop r8) + ; (assert-symbol r8) ; don't need to do this we generated the code + (Mov r9 rax) + (And r9 ptr-mask) + (Cmp r9 type-struct) + (Jne f) + (Xor rax type-struct) + (Mov rax (Offset rax 0)) + (Cmp r8 rax) + (Mov rax (imm->bits #t)) + (Jne f) + (Jmp t) + (Label f) + (Mov rax (imm->bits #f)) + (Label t)))] + ['set-box! + (seq (Pop r8) + (assert-box r8) + (Xor r8 type-box) + (Mov (Offset r8 0) rax) + (Mov rax val-void))] + ['quotient + (seq (Pop r8) + (assert-integer r8) + (Mov r10 rax) + (assert-integer r10) + + (Mov rdx 0) + (Mov rax r8) + (Sar rax int-shift) + (Sar r10 int-shift) + (Div r10) + (Sal rax int-shift))] + ['remainder + (seq (Pop r8) + (assert-integer r8) + (Mov r10 rax) + (assert-integer r10) + + (Mov rdx 0) + (Mov rax r8) + (Sar rax int-shift) + (Sar r10 int-shift) + (Div r10) + (Mov rax rdx) + (Sal rax int-shift))] + ['bitwise-and + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (And rax r8))] + ['bitwise-ior + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Or rax r8))] + ['bitwise-xor + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Xor rax r8) + (Or rax type-int))] + ['arithmetic-shift + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Sar rax int-shift) + (Mov 'rcx rax) + (Sal r8 'cl) + (Mov rax r8))] + + ['peek-byte + (seq (Pop r8) + (assert-integer rax) + (Sar rax int-shift) + ; 'rdi argument is an ignored port value + ;; HERE + (Mov rsi rax) ; offset + (pad-stack) + (Call 'peek_byte) + (unpad-stack))] + + ;; Op3 + ['vector-set! + (seq (Pop r10) + (Pop r8) + (assert-vector r8) + (assert-integer r10) + (Cmp r10 0) + (Jl 'raise_error_align) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar r10 int-shift) ; r10 = index + (Sub r9 1) + (Cmp r9 r10) + (Jl 'raise_error_align) + (Sal r10 3) + (Add r8 r10) + (Mov (Offset r8 8) rax) + (Mov rax val-void))] + + ['peek-byte-port + (seq (Pop r8) ; assert port + (Mov rdi r8) + (assert-integer rax) + (Mov rsi rax) + (pad-stack) + (Call 'peek_byte_port) + (unpad-stack))] + + ['struct-ref ; symbol, int, struct + (seq (Pop r8) + (Pop 'r11) + (assert-struct rax) + ;(assert-integer r8) + (Xor rax type-struct) + (Mov r10 (Offset rax 0)) + (Cmp 'r11 r10) + (Jne 'raise_error_align) + (Sar r8 int-shift) + (Add r8 1) + (Sal r8 3) + (Add rax r8) + (Mov rax (Offset rax 0)))])) + +;; Nat -> Asm +;; Emit instructions for creating a structure of length n +;; using values on top of stack +(define (compile-make-struct n) + (seq (compile-make-struct/a n 1) + (Mov rax rbx) + (Or rax type-struct) + (Add rbx (* 8 n)))) + +;; Nat Nat -> Asm +;; Pop elements off stack, writing them to heap +(define (compile-make-struct/a n i) + (if (= n i) + (seq (Mov (Offset rbx (* 8 (- n i))) rax)) + (seq (Mov (Offset rbx (* 8 (- n i))) rax) + (Pop rax) + (compile-make-struct/a n (add1 i))))) + +;; Asm +;; Copy sized array of characters pointed to by rax +(define char-array-copy + (seq (Mov rdi rbx) ; dst + (Mov rsi rax) ; src + (Mov rdx (Offset rax 0)) ; len + (Add rdx 1) ; #words = 1 + (len+1)/2 + (Sar rdx 1) + (Add rdx 1) + (Sal rdx 3) ; #bytes = 8*#words + (Mov r12 rdx) ; save rdx before destroyed + (pad-stack) + (Call 'memcpy) + (unpad-stack) + ; rbx should be preserved by memcpy + ;(Mov rbx rax) ; dst is returned, install as heap pointer + (Add rbx r12))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (assert-type mask type) + (λ (arg) + (seq (Mov r9 arg) + (And r9 mask) + (Cmp r9 type) + (Jne 'raise_error_align)))) + +(define (type-pred mask type) + (let ((l (gensym))) + (seq (And rax mask) + (Cmp rax type) + (Mov rax (imm->bits #t)) + (Je l) + (Mov rax (imm->bits #f)) + (Label l)))) + +(define assert-integer + (assert-type mask-int type-int)) +(define assert-char + (assert-type mask-char type-char)) +(define assert-box + (assert-type ptr-mask type-box)) +(define assert-cons + (assert-type ptr-mask type-cons)) +(define assert-vector + (assert-type ptr-mask type-vect)) +(define assert-string + (assert-type ptr-mask type-str)) +(define assert-symbol + (assert-type ptr-mask type-symb)) +(define assert-proc + (assert-type ptr-mask type-proc)) +(define assert-struct + (assert-type ptr-mask type-struct)) + +(define (assert-codepoint r) + (let ((ok (gensym))) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align) + (Cmp r (imm->bits 1114111)) + (Jg 'raise_error_align) + (Cmp r (imm->bits 55295)) + (Jl ok) + (Cmp r (imm->bits 57344)) + (Jg ok) + (Jmp 'raise_error_align) + (Label ok)))) + +(define (assert-byte r) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align) + (Cmp r (imm->bits 255)) + (Jg 'raise_error_align))) + +(define (assert-natural r) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align))) + +;; Value -> Asm +(define (eq-imm imm) + (let ((l1 (gensym))) + (seq (Cmp rax (imm->bits imm)) + (Mov rax val-true) + (Je l1) + (Mov rax val-false) + (Label l1)))) + +(define (eq ir1 ir2) + (let ((l1 (gensym))) + (seq (Cmp ir1 ir2) + (Mov rax val-true) + (Je l1) + (Mov rax val-false) + (Label l1)))) diff --git a/langs/outlaw/compile-stdin.rkt b/langs/outlaw/compile-stdin.rkt new file mode 100644 index 00000000..2cbad671 --- /dev/null +++ b/langs/outlaw/compile-stdin.rkt @@ -0,0 +1,12 @@ +#lang racket +(require "stdlib.rkt" "parse.rkt" "compile.rkt" "read-all.rkt" "a86/printer.rkt") +(provide main) + +;; -> Void +;; Compile contents of stdin +;; emit asm code on stdout +(define (main) + (begin + (read-line) ; ignore #lang racket line + (current-shared? #t) + (asm-display (compile (parse (read-all)))))) diff --git a/langs/outlaw/compile.rkt b/langs/outlaw/compile.rkt new file mode 100644 index 00000000..d0171d8d --- /dev/null +++ b/langs/outlaw/compile.rkt @@ -0,0 +1,160 @@ +#lang racket +(provide (all-defined-out)) +(require "stdlib.rkt" + "ast.rkt" + "a86/ast.rkt" + "registers.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-define.rkt" + "compile-expr.rkt" + "compile-literals.rkt") + +;; type CEnv = [Listof Id] + +(define (compile p) + (match p + [(Prog ds) + (let ((gs (append stdlib-ids (define-ids ds)))) + (seq (externs) + (map (lambda (i) (Extern (symbol->label i))) stdlib-ids) + (Global 'entry) + (Label 'entry) + + (Push rbx) ; save non-volatile registers + (Push r12) + (Push r15) + + (Mov rbx rdi) ; recv heap pointer + (init-symbol-table p) + (init-lib) + + (compile-defines ds gs) + (compile-variable (last-define-id ds) '() gs) + + (Pop r15) ; restore non-volatile registers + (Pop r12) + (Pop rbx) + + (Ret) + (compile-lambda-defines (lambdas p) gs) + (Global 'raise_error_align) + (Label 'raise_error_align) + (pad-stack) + (Mov rdi 0) ; null arg + (Call 'raise_error) + + ;; one way to make `cons' a function instead of a primitive + ;;cons-function + + (Data) + (compile-literals p)))])) + +(define (last-define-id ds) + (match ds + [(cons (Defn x _) '()) x] + [(cons d ds) (last-define-id ds)])) + +(define (init-lib) + (let ((r (gensym))) ; call init_lib + (seq (Extern 'init_lib) + (Lea rax r) + (Push rax) + (Jmp 'init_lib) + (Label r)))) + +(define stdlib-ids + '(list list* make-list list? foldr map filter length append append* + memq member append-map vector->list + reverse + number->string gensym read read-char peek-char + > <= >= + void? + list->string string->list + char<=? char=? + remove-duplicates remq* remove* remove + andmap ormap vector list->vector boolean? + substring odd? + system-type ;; hard-coded + not findf + read-line + * ; limited + exact->inexact / expt string->keyword ; unimplemented + ;; Op0 + read-byte peek-byte void + ;; Op1 + add1 sub1 zero? char? write-byte eof-object? + integer->char char->integer + box unbox box? empty? cons? car cdr + vector? vector-length string? string-length + symbol->string string->symbol symbol? + string->uninterned-symbol + open-input-file + write-char error integer? exact-integer? procedure? + eq-hash-code char-alphabetic? char-whitespace? displayln write-string + ;; Op2 + + - < = cons eq? make-vector vector-ref + make-string string-ref string-append + quotient remainder set-box! + bitwise-and bitwise-ior bitwise-xor arithmetic-shift + ;; Op3 + vector-set!)) + +(define (externs) + (map Extern + '(peek_byte + read_byte + write_byte + raise_error + intern_symbol + symb_cmp + string_append + memcpy + open_input_file + read_byte_port + peek_byte_port + is_char_alphabetic + is_char_whitespace + print_codepoint_out + system_type))) + +(define cons-function + (let ((code (gensym 'cons_code)) + (clos (gensym 'cons_closure))) + (seq (Data) + (Label (symbol->label 'cons)) + (Dq (Plus (symbol->label clos) type-proc)) + (Label (symbol->label clos)) + (Dq (symbol->label code)) + (Text) + (Label (symbol->label code)) + (Pop rax) + (Mov (Offset rbx 0) rax) + (Pop rax) + (Mov (Offset rbx 8) rax) + (Add rsp 8) ; pop function + (Mov rax rbx) + (Or rax type-cons) + (Add rbx 16) + (Ret)))) + + +;; Lib -> Asm +(define (compile-library l) + (match l + [(Lib ids ds) + (let ((g (define-ids ds))) + (seq (externs) + (map (lambda (i) (Global (symbol->label i))) ids) + (Extern 'raise_error_align) + + (Global 'init_lib) + (Label 'init_lib) + (compile-defines ds g) + (Ret) + + (compile-lambda-defines (lambdas-ds ds) g) + (Data) + (compile-literals (Prog ds))))])) diff --git a/langs/outlaw/env.rkt b/langs/outlaw/env.rkt new file mode 100644 index 00000000..c43be9c3 --- /dev/null +++ b/langs/outlaw/env.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide lookup ext) + +;; Env Variable -> Answer +(define (lookup env x) + (match env + ['() 'err] + [(cons (list y i) env) + (match (symbol=? x y) + [#t i] + [#f (lookup env x)])])) + +;; Env Variable Value -> Value +(define (ext r x i) + (cons (list x i) r)) \ No newline at end of file diff --git a/langs/outlaw/error.c b/langs/outlaw/error.c new file mode 100644 index 00000000..7ccf37a5 --- /dev/null +++ b/langs/outlaw/error.c @@ -0,0 +1,12 @@ +#include +#include +#include "values.h" +#include "runtime.h" + +void print_str(val_str_t*); + +void error(val_t msg) { + print_str(val_unwrap_str(msg)); + putchar('\n'); + exit(1); +} diff --git a/langs/outlaw/fv.rkt b/langs/outlaw/fv.rkt new file mode 100644 index 00000000..9abf5c11 --- /dev/null +++ b/langs/outlaw/fv.rkt @@ -0,0 +1,52 @@ +#lang racket +(require "ast.rkt") +(provide fv fv-) + +;; Expr -> [Listof Id] +;; List all of the free variables in e +(define (fv e) + (remove-duplicates (fv* e) eq?)) + +;; Expr [Listof Id] -> [Listof Id] +(define (fv- e xs) + (remq* xs (fv e))) + +(define (fv* e) + (match e + [(Var x) (list x)] + [(Prim p es) (append-map fv* es)] + [(If e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(Begin es) (append-map fv* es)] + [(Let xs es e) (append (append-map fv* es) (remq* xs (fv* e)))] + [(App e1 es) (append (fv* e1) (append-map fv* es))] + [(Lam f xs e) (remq* xs (fv* e))] + [(LamRest f xs x e) (remq* (cons x xs) (fv* e))] + [(LamCase f cs) (append-map fv* cs)] + [(Apply e es el) (append (fv* e) (append-map fv* es) (fv* el))] + [(Match e ps es) (append (fv* e) (append-map fv-clause* ps es))] + [_ '()])) + +;; Pat Expr -> [Listof Id] +(define (fv-clause* p e) + (remq* (bv-pat* p) (append (fv-pat* e) (fv* e)))) + +;; Pat -> [Listof Id] +(define (bv-pat* p) + (match p + [(PVar x) (list x)] + [(PCons p1 p2) (append (bv-pat* p1) (bv-pat* p2))] + [(PAnd p1 p2) (append (bv-pat* p1) (bv-pat* p2))] + [(PBox p) (bv-pat* p)] + [(PStruct n ps) (append-map bv-pat* ps)] + [_ '()])) + +;; Pat -> [Listof Id] +(define (fv-pat* p) + (match p + [(PBox p) (fv-pat* p)] + [(PCons p1 p2) (append (fv-pat* p1) (fv-pat* p2))] + [(PAnd p1 p2) (append (fv-pat* p1) (fv-pat* p2))] + [(PStruct n ps) (append-map fv-pat* ps)] + [(PPred e) (fv* e)] + [_ '()])) + diff --git a/langs/outlaw/interp-prims.rkt b/langs/outlaw/interp-prims.rkt new file mode 100644 index 00000000..a4fb2f80 --- /dev/null +++ b/langs/outlaw/interp-prims.rkt @@ -0,0 +1,85 @@ +#lang racket +(require "ast.rkt") +(provide interp-prim) + +;; type Struct = (StructVal Symbol (Vectorof Value)) +(struct StructVal (name vals)) + +;; Op [Listof Value] -> Answer +(define (interp-prim p vs) + (match (cons p vs) + ;; Op0 + [(list 'void) (void)] + [(list 'read-byte) (read-byte)] + [(list 'peek-byte) (peek-byte)] + ;; Op1 + [(list 'add1 (? integer? v)) (add1 v)] + [(list 'sub1 (? integer? v)) (sub1 v)] + [(list 'zero? (? integer? v)) (zero? v)] + [(list 'char? v) (char? v)] + [(list 'char->integer (? char? v)) (char->integer v)] + [(list 'integer->char (? codepoint? v)) (integer->char v)] + [(list 'eof-object? v) (eof-object? v)] + [(list 'write-byte (? byte? v)) (write-byte v)] + [(list 'box v) (box v)] + [(list 'unbox (? box? v)) (unbox v)] + [(list 'car (? pair? v)) (car v)] + [(list 'cdr (? pair? v)) (cdr v)] + [(list 'empty? v) (empty? v)] + [(list 'cons? v) (cons? v)] + [(list 'box? v) (box? v)] + [(list 'vector? v) (vector? v)] + [(list 'vector-length (? vector? v)) (vector-length v)] + [(list 'string? v) (string? v)] + [(list 'string-length (? string? v)) (string-length v)] + [(list 'symbol? v) (symbol? v)] + [(list 'symbol->string (? symbol? v)) (symbol->string v)] + [(list 'string->symbol (? string? v)) (string->symbol v)] + [(list 'string->uninterned-symbol (? string? v)) + (string->uninterned-symbol v)] + ;; Op2 + [(list '+ (? integer? v1) (? integer? v2)) (+ v1 v2)] + [(list '- (? integer? v1) (? integer? v2)) (- v1 v2)] + [(list '< (? integer? v1) (? integer? v2)) (< v1 v2)] + [(list '= (? integer? v1) (? integer? v2)) (= v1 v2)] + [(list 'cons v1 v2) (cons v1 v2)] + [(list 'eq? v1 v2) (eq? v1 v2)] + [(list 'make-vector (? integer? v1) v2) + (if (<= 0 v1) + (make-vector v1 v2) + 'err)] + [(list 'vector-ref (? vector? v1) (? integer? v2)) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-ref v1 v2) + 'err)] + [(list 'make-string (? integer? v1) (? char? v2)) + (if (<= 0 v1) + (make-string v1 v2) + 'err)] + [(list 'string-ref (? string? v1) (? integer? v2)) + (if (<= 0 v2 (sub1 (string-length v1))) + (string-ref v1 v2) + 'err)] + [(list 'struct? s v) + (match v + [(StructVal n _) (eq? s n)] + [_ #f])] + ;; Op3 + [(list 'vector-set! (? vector? v1) (? integer? v2) v3) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-set! v1 v2 v3) + 'err)] + [(list 'struct-ref s i (StructVal n vs)) + (if (and (eq? s n) (<= 0 i (sub1 (vector-length vs)))) + (vector-ref vs i) + 'err)] + ;; OpN + [(cons 'make-struct (cons (? symbol? n) vs)) + (StructVal n (list->vector vs))] + [_ 'err])) + +;; Any -> Boolean +(define (codepoint? v) + (and (integer? v) + (or (<= 0 v 55295) + (<= 57344 v 1114111)))) diff --git a/langs/outlaw/io.c b/langs/outlaw/io.c new file mode 100644 index 00000000..b437f0b7 --- /dev/null +++ b/langs/outlaw/io.c @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include "types.h" +#include "values.h" +#include "runtime.h" + +#define port_buffer_bytes 8 + +void utf8_encode_string(val_str_t *, char *); +int utf8_encode_char(val_char_t, char *); + +val_t read_byte(void) +{ + char c = getc(in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int((unsigned char)c); +} + +val_t peek_byte(void* fake_port, int offset) +{ + char cs[3]; + if ((offset < 0) || (offset > 3)) { exit(-1); } + int i; + char c; + for (i = 0; i < offset; i++) { + cs[i] = getc(in); + } + c = getc(in); + ungetc(c, in); + for (i = 0; i < offset; i++) { + ungetc(cs[offset-i-1], in); + } + return (c == EOF) ? val_wrap_eof() : val_wrap_int((unsigned char)c); +} + +val_t write_byte(val_t c) +{ + putc((char) val_unwrap_int(c), out); + return val_wrap_void(); +} + +val_t print_codepoint_out(val_t c) +{ + char buffer[5] = {0}; + utf8_encode_char(val_unwrap_char(c), buffer); + fprintf(out, "%s", buffer); + return val_wrap_void(); +} + +val_t open_input_file(val_t in) { + FILE *f; + char *buf; + val_str_t* fn = val_unwrap_str(in); + buf = calloc((fn->len*4)+1, 1); + if (!buf) + error_handler(); + utf8_encode_string(fn, buf); + + f = fopen(buf, "rb"); + if (!f) + error_handler(); + + free(buf); + + val_symb_t* s; + s = calloc(6+2, sizeof(val_char_t)); + s->len = 4; + memcpy(s->codepoints, (val_char_t[]){'p', 'o', 'r', 't'}, 4 * 4); + + val_port_t *p; + p = calloc(1, sizeof(struct val_port_t)); + p->symbol = val_wrap_symb(s); + p->fp = f; + + return val_wrap_port(p); +} + +static int +populate_buffer(val_port_t *p) +{ + if (p->offset < p->len) + return 1; + + p->len = fread(p->buf, 1, port_buffer_bytes, p->fp); + p->offset = 0; + + return p->len > 0; +} + +val_t read_byte_port(val_t port) +{ + int has_bytes; + char c; + val_port_t *p = val_unwrap_port(port); + + if (p->closed) + error_handler(); + + has_bytes = populate_buffer(p); + if (!has_bytes) + return val_wrap_eof(); + + c = p->buf[p->offset]; + p->offset++; + + return val_wrap_int((unsigned char)c); +} + +val_t peek_byte_port(val_t port, val_t skip) +{ + int has_bytes; + char c; + val_port_t *p = val_unwrap_port(port); + + int64_t sk = val_unwrap_int(skip); + + if (p->closed) + error_handler(); + + has_bytes = populate_buffer(p); + if (!has_bytes) + return val_wrap_eof(); + + c = p->buf[p->offset+sk]; // FIXME: unsafe + + return val_wrap_int((unsigned char)c); +} diff --git a/langs/outlaw/lambdas.rkt b/langs/outlaw/lambdas.rkt new file mode 100644 index 00000000..218cf570 --- /dev/null +++ b/langs/outlaw/lambdas.rkt @@ -0,0 +1,58 @@ +#lang racket +(require "ast.rkt") +(provide lambdas lambdas-ds) + +;; Prog -> [Listof Lam] +;; List all of the lambda expressions in p +(define (lambdas p) + (match p + [(Prog ds) + (lambdas-ds ds)])) + +;; Defns -> [Listof Lam] +;; List all of the lambda expressions in ds +(define (lambdas-ds ds) + (match ds + ['() '()] + [(cons (Defn f l) ds) + (append (lambdas-e l) + (lambdas-ds ds))])) + +;; Expr -> [Listof Lam] +;; List all of the lambda expressions in e +(define (lambdas-e e) + (match e + [(Prim p es) (append-map lambdas-e es)] + [(If e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(Begin es) (append-map lambdas-e es)] + [(Let xs es e) (append (append-map lambdas-e es) (lambdas-e e))] + [(App e1 es) (append (lambdas-e e1) (append-map lambdas-e es))] + [(Lam f xs e1) (cons e (lambdas-e e1))] + [(LamRest f xs x e1) (cons e (lambdas-e e1))] + [(LamCase f cs) (cons e (lambdas-cs cs))] + [(Apply e es el) (append (lambdas-e e) (append-map lambdas-e es) (lambdas-e el))] + [(Match e ps es) (append (lambdas-e e) + (append-map lambdas-pat ps) + (append-map lambdas-e es))] + [_ '()])) + +;; [Listof LamCaseClause] -> [Listof Lam] +(define (lambdas-cs cs) + (match cs + ['() '()] + [(cons (Lam f xs e) cs) + (append (lambdas-e e) + (lambdas-cs cs))] + [(cons (LamRest f xs x e) cs) + (append (lambdas-e e) + (lambdas-cs cs))])) + +;; Pat -> [Listof Lam] +(define (lambdas-pat p) + (match p + [(PBox p) (lambdas-pat p)] + [(PCons p1 p2) (append (lambdas-pat p1) (lambdas-pat p2))] + [(PAnd p1 p2) (append (lambdas-pat p1) (lambdas-pat p2))] + [(PStruct n ps) (append-map lambdas-pat ps)] + [(PPred e) (lambdas-e e)] + [_ '()])) diff --git a/langs/outlaw/main.c b/langs/outlaw/main.c new file mode 100644 index 00000000..39225a9d --- /dev/null +++ b/langs/outlaw/main.c @@ -0,0 +1,44 @@ +#include +#include +#include "values.h" +#include "print.h" +#include "runtime.h" + +FILE* in; +FILE* out; +void (*error_handler)(val_str_t* msg); +val_t *heap; + +void error_exit(val_str_t* msg) +{ + if (msg) { + print_str(msg); + } else { + printf("err\n"); + } + exit(1); +} + +void raise_error(val_str_t* msg) +{ + return error_handler(msg); +} + +int main(int argc, char** argv) +{ + in = stdin; + out = stdout; + error_handler = &error_exit; + heap = malloc(8 * heap_size); + + val_t result; + + result = entry(heap); + + print_result(result); + if (val_typeof(result) != T_VOID) + putchar('\n'); + + free(heap); + return 0; +} diff --git a/langs/outlaw/os.c b/langs/outlaw/os.c new file mode 100644 index 00000000..7aeac578 --- /dev/null +++ b/langs/outlaw/os.c @@ -0,0 +1,8 @@ +// return 1 for macosx, 0 otherwise (assumed to be unix) +int system_type() { + #if __APPLE__ + return 1; + #else + return 0; + #endif +} diff --git a/langs/outlaw/parse.rkt b/langs/outlaw/parse.rkt new file mode 100644 index 00000000..ef1ffc2d --- /dev/null +++ b/langs/outlaw/parse.rkt @@ -0,0 +1,364 @@ +#lang racket +(provide parse parse-define parse-e parse-library) +(require "stdlib.rkt" "ast.rkt") + +;; [Listof S-Expr] -> Prog +(define (parse s) + (match s + ['() (Prog '())] + [(cons (and (cons (? def-keyword?) _) d) '()) + (Prog (append (parse-define d) + (list (Defn (gensym) (parse-e '(void))))))] + [(cons (and (cons (? def-keyword?) _) d) s) + (match (parse s) + [(Prog ds) + (Prog (append (parse-define d) ds))])] + [(cons (cons 'provide _) s) ; ignore provides for now + (parse s)] + [(cons (cons 'require _) s) ; ignore requires for now + (parse s)] + [(cons (cons 'module+ _) s) ; ignore submodules for now + (parse s)] + ;; Doesn't quite work and will make parse depend on read + #; + [(cons (cons 'require fs) s) + (match (parse s) + [(Prog ds) + (Prog (append (load-files loaded fs) ds))])] + [(cons e s) + (match (parse s) + [(Prog ds) + (Prog (cons (Defn (gensym) (parse-e e)) ds))])] + [_ (error "program parse error" s)])) + +(define (def-keyword? x) + (or (eq? x 'define) + (eq? x 'struct))) + +;; [Listof S-Expr] -> Lib +(define (parse-library s) + (match s + [(cons (cons 'provide ids) + (cons (cons 'require _) ds)) + (match (parse ds) + [(Prog ds) + (Lib ids ds)])])) + +;; [Listof S-Expr] -> [Listof Defn] +(define (parse-ds s) + (match s + ['() '()] + [(cons d ds) + (append (parse-define d) + (parse-ds ds))])) + +;; S-Expr -> [Listof Defn] +(define (parse-define s) + (match s + [(list 'define (cons f xs) e) + (match (parse-param-list xs e) + [(Lam l xs e) + (list (Defn f (Lam l xs e)))] + [(LamRest l xs x e) + (list (Defn f (LamRest l xs x e)))])] + [(list 'define f (cons 'case-lambda cs)) + (list (Defn f (LamCase (gensym 'lamcase) + (parse-case-lambda-clauses cs))))] + [(list 'define (? symbol? x) e) + (match (parse-e e) + [e (list (Defn x e))])] + [(cons 'struct _) + (parse-struct s)] + [_ (error "Parse defn error" s)])) + + ;; S-Expr -> [Listof Defn] +(define (parse-struct s) + (match s + [(list 'struct (? symbol? n) flds) + (if (andmap symbol? flds) + (list* (make-struct-defn-construct n flds) + (make-struct-defn-predicate n) + (make-struct-defn-accessors n (reverse flds))) + (error "parse struct definition error"))] + [_ (error "parse struct definition error")])) + +;; Id [Listof Id] -> [Listof Defn] +(define (make-struct-defn-construct n flds) + (Defn n + (Lam (gensym 'lam) + flds + (Prim 'make-struct (cons (Quote n) (map Var flds)))))) + +;; Id -> [Listof Defn] +(define (make-struct-defn-predicate n) + (Defn (symbol-append n '?) + (Lam (gensym 'lam) + (list 'x) + (Prim 'struct? (list (Quote n) (Var 'x)))))) + +;; Id [Listof Id] -> [Listof Defn] +(define (make-struct-defn-accessors n flds) + (match flds + ['() '()] + [(cons f flds) + (cons (Defn (symbol-append n '- f) + (Lam (gensym 'lam) + (list 'x) + (Prim 'struct-ref + (list (Quote n) + (Quote (length flds)) + (Var 'x))))) + (make-struct-defn-accessors n flds))])) + +;; Symbol ... -> Symbol +(define (symbol-append . ss) + (string->symbol + (apply string-append (map symbol->string ss)))) + +;; S-Expr -> Expr +(define (parse-e s) + (match s + [(? self-quoting?) (Quote (parse-datum s))] + [(list 'quote d) (Quote (parse-datum d))] + ['eof (Eof)] + [(? symbol?) (Var s)] + [(list (? (op% op0) p0)) (Prim (drop-% p0) '())] + [(list (? (op% op1) p1) e) (Prim (drop-% p1) (list (parse-e e)))] + [(list (? (op% op2) p2) e1 e2) (Prim (drop-% p2) (list (parse-e e1) (parse-e e2)))] + [(list (? (op% op3) p3) e1 e2 e3) + (Prim (drop-% p3) (list (parse-e e1) (parse-e e2) (parse-e e3)))] + [(cons 'begin es) + (Begin (parse-es es))] + [(list 'if e1 e2 e3) + (If (parse-e e1) (parse-e e2) (parse-e e3))] + [(cons 'let s) (parse-let s)] + [(cons 'match s) (parse-match s)] + [(list 'λ xs e) + (parse-param-list xs e)] + [(list 'lambda xs e) + (parse-param-list xs e)] + [(cons 'case-lambda cs) + (LamCase (gensym 'lamcase) + (parse-case-lambda-clauses cs))] + [(cons 'apply (cons e es)) + (parse-apply (parse-e e) es)] + [(list 'cond (list 'else e)) (parse-e e)] + [(cons 'cond (cons (list e1 e2) r)) + (If (parse-e e1) + (parse-e e2) + (parse-e (cons 'cond r)))] + [(cons 'or '()) + (Quote #f)] + [(cons 'or (cons e es)) + (let ((x (gensym 'or))) + (Let (list x) (list (parse-e e)) + (If (Var x) (Var x) (parse-e (cons 'or es)))))] + [(cons 'and '()) + (Quote #t)] + [(cons 'and (cons e '())) + (parse-e e)] + [(cons 'and (cons e es)) + (If (parse-e e) + (parse-e (cons 'and es)) + (Quote #f))] + [(cons e es) + (App (parse-e e) (map parse-e es))] + [_ (error "Parse error" s)])) + +(define (parse-es es) + (match es + ['() '()] + [(cons e es) + (cons (parse-e e) (parse-es es))] + [_ (error "parse es")])) + +;; S-Expr -> Expr +(define (parse-let s) + (match s + ['() (error "parse error (let)")] + [(cons s1 s2) + (parse-let-bindings s1 s2 '() '())] + [_ (error "parse error let" s)])) + +;; S-Expr S-Expr [Listof Id] [Listof Expr] -> Expr +(define (parse-let-bindings s1 s2 xs es) + (match s1 + ['() (parse-let-body s2 (reverse xs) (reverse es))] + [(cons (list (? symbol? x) e) s1) + (parse-let-bindings s1 s2 (cons x xs) (cons (parse-e e) es))])) + +;; S-Expr [Listof Id] [Listof Expr] -> Expr +(define (parse-let-body s xs es) + (match s + ['() (error "parse error let-body")] + [(cons e '()) + (Let xs es (parse-e e))] + [_ + (Let xs es (Begin (parse-es s)))])) + +;; Expr S-Expr -> Expr +(define (parse-apply e es) + (match es + [(list el) (Apply e '() (parse-e el))] + [(cons e0 es) + (match (parse-apply e es) + [(Apply e es el) + (Apply e (cons (parse-e e0) es) el)])] + [_ (error "parse apply error")])) + + +;; S-Expr -> Expr +(define (parse-match s) + (match s + ['() (error "parse error match")] + [(cons e s) + (parse-match-clauses s (parse-e e) '() '())])) + +;; S-Expr Expr [Listof Pat] [Listof Expr] -> Expr +(define (parse-match-clauses s e ps es) + (match s + ['() (Match e (reverse ps) (reverse es))] + [(cons c s) + (parse-match-clause c s e ps es)] + [_ (error "parse error match clause")])) + +(define (parse-match-clause c s e ps es) + (match c + [(list p e1) + (parse-match-clauses s e (cons (parse-pat p) ps) (cons (parse-e e1) es))] + [(list* p es1) + (parse-match-clauses s e (cons (parse-pat p) ps) (cons (Begin (parse-es es1)) es))] + [_ + (error "parse error clause")])) + +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? exact-integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(? string?) (PStr p)] + [(list 'quote (? symbol? s)) + (PSymb s)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and) (PWild)] + [(list 'and p) (parse-pat p)] + [(cons 'and (cons p ps)) + (PAnd (parse-pat p) (parse-pat (cons 'and ps)))] + [(cons 'list '()) + (PLit '())] + [(cons 'list (cons p1 ps)) + (PCons (parse-pat p1) + (parse-pat (cons 'list ps)))] + [(list '? e) + (PPred (parse-e e))] + [(cons '? (cons e ps)) + (PAnd (parse-pat (list '? e)) + (parse-pat (cons 'and ps)))] + [(cons (? symbol? n) ps) + (PStruct n (map parse-pat ps))])) + +;; S-Expr -> [Listof LamCaseClause] +(define (parse-case-lambda-clauses cs) + (match cs + ['() '()] + [(cons c cs) + (cons (parse-case-lambda-clause c) + (parse-case-lambda-clauses cs))] + [_ + (error "parse case-lambda error")])) + +;; S-Expr -> LamCaseClause +(define (parse-case-lambda-clause c) + (match c + [(list xs e) + (parse-param-list xs e)])) + +;; S-Expr S-Expr -> Lam or LamRest +(define (parse-param-list xs e) + (match xs + ['() (Lam (gensym 'lam) '() (parse-e e))] + [(cons x xs) + (match (parse-param-list xs e) + [(Lam f xs e) (Lam f (cons x xs) e)] + [(LamRest f xs y e) (LamRest f (cons x xs) y e)])] + [(? symbol? xs) + (LamRest (gensym 'lamrest) '() xs (parse-e e))] + [_ + (error "parse parameter list error")])) + +;; Datum -> Datum +(define (parse-datum d) + (match d + [(box d) + (box (parse-datum d))] + [(cons d1 d2) + (cons (parse-datum d1) (parse-datum d2))] + ['() '()] + [(? symbol? s) s] + [(? exact-integer? i) i] + [(? boolean? b) b] + [(? string? s) s] + [(? char? c) c] + [(? vector? v) + (apply vector (map parse-datum (vector->list v)))] + [_ (error "parse datum error")])) + +(define (self-quoting? x) + (or (exact-integer? x) + (boolean? x) + (char? x) + (string? x) + (box? x) + (vector? x))) + +(define op0 + '(read-byte void read-char peek-char + current-input-port ; hack, doesn't actually exist + system-type + )) + +(define op1 + '(add1 sub1 zero? char? write-byte eof-object? + integer->char char->integer + box unbox empty? cons? box? car cdr + vector? vector-length string? string-length + symbol->string string->symbol symbol? + number->string string->uninterned-symbol + open-input-file + read-byte-port + write-char + error integer? + eq-hash-code + char-alphabetic? char-whitespace? + procedure?)) +(define op2 + '(+ - < = cons eq? make-vector vector-ref make-string string-ref + string-append set-box! quotient remainder + bitwise-and bitwise-ior bitwise-xor arithmetic-shift + peek-byte)) +(define op3 + '(vector-set!)) + +(define (op? ops) + (λ (x) + (and (symbol? x) + (memq x ops)))) + + +(define (op% ops) + (λ (x) + (and (symbol? x) + (eq? #\% (string-ref (symbol->string x) 0)) + (let ((x* (drop-% x))) + (and (memq x* ops) + x*))))) + +(define (drop-% x) + (string->symbol (substring (symbol->string x) 1))) diff --git a/langs/outlaw/print.c b/langs/outlaw/print.c new file mode 100644 index 00000000..462c5514 --- /dev/null +++ b/langs/outlaw/print.c @@ -0,0 +1,884 @@ +#include +#include +#include +#include "values.h" + +void print_char(val_char_t); +void print_codepoint(val_char_t); +void print_cons(val_cons_t *); +void print_vect(val_vect_t*); +void print_str(val_str_t*); +void print_symb(val_symb_t*); +void print_struct(val_struct_t *); +void print_str_char(val_char_t); +void print_result_interior(val_t); +int utf8_encode_char(val_char_t, char *); +void utf8_encode_string(val_str_t *, char *); + +val_t is_char_alphabetic(val_char_t c) { + return val_wrap_bool(uc_is_property_alphabetic(c)); +} + +val_t is_char_whitespace(val_char_t c) { + return val_wrap_bool(uc_is_property_white_space(c)); +} + +void print_result(val_t x) +{ + switch (val_typeof(x)) { + case T_INT: + printf("%" PRId64, val_unwrap_int(x)); + break; + case T_BOOL: + printf(val_unwrap_bool(x) ? "#t" : "#f"); + break; + case T_CHAR: + print_char(val_unwrap_char(x)); + break; + case T_EOF: + printf("#"); + break; + case T_VOID: + break; + case T_EMPTY: + case T_BOX: + case T_CONS: + case T_VECT: + printf("'"); + print_result_interior(x); + break; + case T_STR: + putchar('"'); + print_str(val_unwrap_str(x)); + putchar('"'); + break; + case T_SYMB: + printf("'"); + print_result_interior(x); + break; + case T_PROC: + printf("#"); + break; + case T_STRUCT: + print_struct(val_unwrap_struct(x)); + break; + case T_INVALID: + printf("internal error"); + } +} + +void print_struct(val_struct_t *s) { + printf("#<"); + print_result_interior(s->name); + printf(">"); +} + +void print_symb(val_symb_t *s) +{ + print_str((val_str_t*) s); +} + +void print_result_interior(val_t x) +{ + switch (val_typeof(x)) { + case T_EMPTY: + printf("()"); + break; + case T_BOX: + printf("#&"); + print_result_interior(val_unwrap_box(x)->val); + break; + case T_CONS: + printf("("); + print_cons(val_unwrap_cons(x)); + printf(")"); + break; + case T_SYMB: + print_symb(val_unwrap_symb(x)); + break; + case T_VECT: + print_vect(val_unwrap_vect(x)); + break; + default: + print_result(x); + } +} + +void print_vect(val_vect_t *v) +{ + uint64_t i; + + if (!v) { printf("#()"); return; } + + printf("#("); + for (i = 0; i < v->len; ++i) { + print_result_interior(v->elems[i]); + + if (i < v->len - 1) + putchar(' '); + } + printf(")"); +} + +void print_cons(val_cons_t *cons) +{ + print_result_interior(cons->fst); + + switch (val_typeof(cons->snd)) { + case T_EMPTY: + // nothing + break; + case T_CONS: + printf(" "); + print_cons(val_unwrap_cons(cons->snd)); + break; + default: + printf(" . "); + print_result_interior(cons->snd); + break; + } +} + +void print_str(val_str_t* s) +{ + if (!s) return; + uint64_t i; + for (i = 0; i < s->len; ++i) + print_str_char(s->codepoints[i]); +} + +void print_str_char_u(val_char_t c) +{ + printf("\\u%04X", c); +} + +void print_str_char_U(val_char_t c) +{ + printf("\\U%08X", c); +} + +void print_str_char(val_char_t c) +{ + switch (c) { + case 0 ... 6: + print_str_char_u(c); + break; + case 7: + printf("\\a"); + break; + case 8: + printf("\\b"); + break; + case 9: + printf("\\t"); + break; + case 10: + printf("\\n"); + break; + case 11: + printf("\\v"); + break; + case 12: + printf("\\f"); + break; + case 13: + printf("\\r"); + break; + case 14 ... 26: + print_str_char_u(c); + break; + case 27: + printf("\\e"); + break; + case 28 ... 31: + print_str_char_u(c); + break; + case 34: + printf("\\\""); + break; + case 39: + printf("'"); + break; + case 92: + printf("\\\\"); + break; + case 127 ... 159: + case 173 ... 173: + case 888 ... 889: + case 896 ... 899: + case 907 ... 907: + case 909 ... 909: + case 930 ... 930: + case 1328 ... 1328: + case 1367 ... 1368: + case 1376 ... 1376: + case 1416 ... 1416: + case 1419 ... 1420: + case 1424 ... 1424: + case 1480 ... 1487: + case 1515 ... 1519: + case 1525 ... 1541: + case 1564 ... 1565: + case 1757 ... 1757: + case 1806 ... 1807: + case 1867 ... 1868: + case 1970 ... 1983: + case 2043 ... 2047: + case 2094 ... 2095: + case 2111 ... 2111: + case 2140 ... 2141: + case 2143 ... 2207: + case 2227 ... 2275: + case 2436 ... 2436: + case 2445 ... 2446: + case 2449 ... 2450: + case 2473 ... 2473: + case 2481 ... 2481: + case 2483 ... 2485: + case 2490 ... 2491: + case 2501 ... 2502: + case 2505 ... 2506: + case 2511 ... 2518: + case 2520 ... 2523: + case 2526 ... 2526: + case 2532 ... 2533: + case 2556 ... 2560: + case 2564 ... 2564: + case 2571 ... 2574: + case 2577 ... 2578: + case 2601 ... 2601: + case 2609 ... 2609: + case 2612 ... 2612: + case 2615 ... 2615: + case 2618 ... 2619: + case 2621 ... 2621: + case 2627 ... 2630: + case 2633 ... 2634: + case 2638 ... 2640: + case 2642 ... 2648: + case 2653 ... 2653: + case 2655 ... 2661: + case 2678 ... 2688: + case 2692 ... 2692: + case 2702 ... 2702: + case 2706 ... 2706: + case 2729 ... 2729: + case 2737 ... 2737: + case 2740 ... 2740: + case 2746 ... 2747: + case 2758 ... 2758: + case 2762 ... 2762: + case 2766 ... 2767: + case 2769 ... 2783: + case 2788 ... 2789: + case 2802 ... 2816: + case 2820 ... 2820: + case 2829 ... 2830: + case 2833 ... 2834: + case 2857 ... 2857: + case 2865 ... 2865: + case 2868 ... 2868: + case 2874 ... 2875: + case 2885 ... 2886: + case 2889 ... 2890: + case 2894 ... 2901: + case 2904 ... 2907: + case 2910 ... 2910: + case 2916 ... 2917: + case 2936 ... 2945: + case 2948 ... 2948: + case 2955 ... 2957: + case 2961 ... 2961: + case 2966 ... 2968: + case 2971 ... 2971: + case 2973 ... 2973: + case 2976 ... 2978: + case 2981 ... 2983: + case 2987 ... 2989: + case 3002 ... 3005: + case 3011 ... 3013: + case 3017 ... 3017: + case 3022 ... 3023: + case 3025 ... 3030: + case 3032 ... 3045: + case 3067 ... 3071: + case 3076 ... 3076: + case 3085 ... 3085: + case 3089 ... 3089: + case 3113 ... 3113: + case 3130 ... 3132: + case 3141 ... 3141: + case 3145 ... 3145: + case 3150 ... 3156: + case 3159 ... 3159: + case 3162 ... 3167: + case 3172 ... 3173: + case 3184 ... 3191: + case 3200 ... 3200: + case 3204 ... 3204: + case 3213 ... 3213: + case 3217 ... 3217: + case 3241 ... 3241: + case 3252 ... 3252: + case 3258 ... 3259: + case 3269 ... 3269: + case 3273 ... 3273: + case 3278 ... 3284: + case 3287 ... 3293: + case 3295 ... 3295: + case 3300 ... 3301: + case 3312 ... 3312: + case 3315 ... 3328: + case 3332 ... 3332: + case 3341 ... 3341: + case 3345 ... 3345: + case 3387 ... 3388: + case 3397 ... 3397: + case 3401 ... 3401: + case 3407 ... 3414: + case 3416 ... 3423: + case 3428 ... 3429: + case 3446 ... 3448: + case 3456 ... 3457: + case 3460 ... 3460: + case 3479 ... 3481: + case 3506 ... 3506: + case 3516 ... 3516: + case 3518 ... 3519: + case 3527 ... 3529: + case 3531 ... 3534: + case 3541 ... 3541: + case 3543 ... 3543: + case 3552 ... 3557: + case 3568 ... 3569: + case 3573 ... 3584: + case 3643 ... 3646: + case 3676 ... 3712: + case 3715 ... 3715: + case 3717 ... 3718: + case 3721 ... 3721: + case 3723 ... 3724: + case 3726 ... 3731: + case 3736 ... 3736: + case 3744 ... 3744: + case 3748 ... 3748: + case 3750 ... 3750: + case 3752 ... 3753: + case 3756 ... 3756: + case 3770 ... 3770: + case 3774 ... 3775: + case 3781 ... 3781: + case 3783 ... 3783: + case 3790 ... 3791: + case 3802 ... 3803: + case 3808 ... 3839: + case 3912 ... 3912: + case 3949 ... 3952: + case 3992 ... 3992: + case 4029 ... 4029: + case 4045 ... 4045: + case 4059 ... 4095: + case 4294 ... 4294: + case 4296 ... 4300: + case 4302 ... 4303: + case 4681 ... 4681: + case 4686 ... 4687: + case 4695 ... 4695: + case 4697 ... 4697: + case 4702 ... 4703: + case 4745 ... 4745: + case 4750 ... 4751: + case 4785 ... 4785: + case 4790 ... 4791: + case 4799 ... 4799: + case 4801 ... 4801: + case 4806 ... 4807: + case 4823 ... 4823: + case 4881 ... 4881: + case 4886 ... 4887: + case 4955 ... 4956: + case 4989 ... 4991: + case 5018 ... 5023: + case 5109 ... 5119: + case 5789 ... 5791: + case 5881 ... 5887: + case 5901 ... 5901: + case 5909 ... 5919: + case 5943 ... 5951: + case 5972 ... 5983: + case 5997 ... 5997: + case 6001 ... 6001: + case 6004 ... 6015: + case 6110 ... 6111: + case 6122 ... 6127: + case 6138 ... 6143: + case 6158 ... 6159: + case 6170 ... 6175: + case 6264 ... 6271: + case 6315 ... 6319: + case 6390 ... 6399: + case 6431 ... 6431: + case 6444 ... 6447: + case 6460 ... 6463: + case 6465 ... 6467: + case 6510 ... 6511: + case 6517 ... 6527: + case 6572 ... 6575: + case 6602 ... 6607: + case 6619 ... 6621: + case 6684 ... 6685: + case 6751 ... 6751: + case 6781 ... 6782: + case 6794 ... 6799: + case 6810 ... 6815: + case 6830 ... 6831: + case 6847 ... 6911: + case 6988 ... 6991: + case 7037 ... 7039: + case 7156 ... 7163: + case 7224 ... 7226: + case 7242 ... 7244: + case 7296 ... 7359: + case 7368 ... 7375: + case 7415 ... 7415: + case 7418 ... 7423: + case 7670 ... 7675: + case 7958 ... 7959: + case 7966 ... 7967: + case 8006 ... 8007: + case 8014 ... 8015: + case 8024 ... 8024: + case 8026 ... 8026: + case 8028 ... 8028: + case 8030 ... 8030: + case 8062 ... 8063: + case 8117 ... 8117: + case 8133 ... 8133: + case 8148 ... 8149: + case 8156 ... 8156: + case 8176 ... 8177: + case 8181 ... 8181: + case 8191 ... 8191: + case 8203 ... 8207: + case 8232 ... 8238: + case 8288 ... 8303: + case 8306 ... 8307: + case 8335 ... 8335: + case 8349 ... 8351: + case 8382 ... 8399: + case 8433 ... 8447: + case 8586 ... 8591: + case 9211 ... 9215: + case 9255 ... 9279: + case 9291 ... 9311: + case 11124 ... 11125: + case 11158 ... 11159: + case 11194 ... 11196: + case 11209 ... 11209: + case 11218 ... 11263: + case 11311 ... 11311: + case 11359 ... 11359: + case 11508 ... 11512: + case 11558 ... 11558: + case 11560 ... 11564: + case 11566 ... 11567: + case 11624 ... 11630: + case 11633 ... 11646: + case 11671 ... 11679: + case 11687 ... 11687: + case 11695 ... 11695: + case 11703 ... 11703: + case 11711 ... 11711: + case 11719 ... 11719: + case 11727 ... 11727: + case 11735 ... 11735: + case 11743 ... 11743: + case 11843 ... 11903: + case 11930 ... 11930: + case 12020 ... 12031: + case 12246 ... 12271: + case 12284 ... 12287: + case 12352 ... 12352: + case 12439 ... 12440: + case 12544 ... 12548: + case 12590 ... 12592: + case 12687 ... 12687: + case 12731 ... 12735: + case 12772 ... 12783: + case 12831 ... 12831: + case 13055 ... 13055: + case 19894 ... 19903: + case 40909 ... 40959: + case 42125 ... 42127: + case 42183 ... 42191: + case 42540 ... 42559: + case 42654 ... 42654: + case 42744 ... 42751: + case 42895 ... 42895: + case 42926 ... 42927: + case 42930 ... 42998: + case 43052 ... 43055: + case 43066 ... 43071: + case 43128 ... 43135: + case 43205 ... 43213: + case 43226 ... 43231: + case 43260 ... 43263: + case 43348 ... 43358: + case 43389 ... 43391: + case 43470 ... 43470: + case 43482 ... 43485: + case 43519 ... 43519: + case 43575 ... 43583: + case 43598 ... 43599: + case 43610 ... 43611: + case 43715 ... 43738: + case 43767 ... 43776: + case 43783 ... 43784: + case 43791 ... 43792: + case 43799 ... 43807: + case 43815 ... 43815: + case 43823 ... 43823: + case 43872 ... 43875: + case 43878 ... 43967: + case 44014 ... 44015: + case 44026 ... 44031: + case 55204 ... 55215: + case 55239 ... 55242: + case 55292 ... 55295: + case 57344 ... 63743: + case 64110 ... 64111: + case 64218 ... 64255: + case 64263 ... 64274: + case 64280 ... 64284: + case 64311 ... 64311: + case 64317 ... 64317: + case 64319 ... 64319: + case 64322 ... 64322: + case 64325 ... 64325: + case 64450 ... 64466: + case 64832 ... 64847: + case 64912 ... 64913: + case 64968 ... 65007: + case 65022 ... 65023: + case 65050 ... 65055: + case 65070 ... 65071: + case 65107 ... 65107: + case 65127 ... 65127: + case 65132 ... 65135: + case 65141 ... 65141: + case 65277 ... 65280: + case 65471 ... 65473: + case 65480 ... 65481: + case 65488 ... 65489: + case 65496 ... 65497: + case 65501 ... 65503: + case 65511 ... 65511: + case 65519 ... 65531: + case 65534 ... 65535: + print_str_char_u(c); + break; + case 65548 ... 65548: + case 65575 ... 65575: + case 65595 ... 65595: + case 65598 ... 65598: + case 65614 ... 65615: + case 65630 ... 65663: + case 65787 ... 65791: + case 65795 ... 65798: + case 65844 ... 65846: + case 65933 ... 65935: + case 65948 ... 65951: + case 65953 ... 65999: + case 66046 ... 66175: + case 66205 ... 66207: + case 66257 ... 66271: + case 66300 ... 66303: + case 66340 ... 66351: + case 66379 ... 66383: + case 66427 ... 66431: + case 66462 ... 66462: + case 66500 ... 66503: + case 66518 ... 66559: + case 66718 ... 66719: + case 66730 ... 66815: + case 66856 ... 66863: + case 66916 ... 66926: + case 66928 ... 67071: + case 67383 ... 67391: + case 67414 ... 67423: + case 67432 ... 67583: + case 67590 ... 67591: + case 67593 ... 67593: + case 67638 ... 67638: + case 67641 ... 67643: + case 67645 ... 67646: + case 67670 ... 67670: + case 67743 ... 67750: + case 67760 ... 67839: + case 67868 ... 67870: + case 67898 ... 67902: + case 67904 ... 67967: + case 68024 ... 68029: + case 68032 ... 68095: + case 68100 ... 68100: + case 68103 ... 68107: + case 68116 ... 68116: + case 68120 ... 68120: + case 68148 ... 68151: + case 68155 ... 68158: + case 68168 ... 68175: + case 68185 ... 68191: + case 68256 ... 68287: + case 68327 ... 68330: + case 68343 ... 68351: + case 68406 ... 68408: + case 68438 ... 68439: + case 68467 ... 68471: + case 68498 ... 68504: + case 68509 ... 68520: + case 68528 ... 68607: + case 68681 ... 69215: + case 69247 ... 69631: + case 69710 ... 69713: + case 69744 ... 69758: + case 69821 ... 69821: + case 69826 ... 69839: + case 69865 ... 69871: + case 69882 ... 69887: + case 69941 ... 69941: + case 69956 ... 69967: + case 70007 ... 70015: + case 70089 ... 70092: + case 70094 ... 70095: + case 70107 ... 70112: + case 70133 ... 70143: + case 70162 ... 70162: + case 70206 ... 70319: + case 70379 ... 70383: + case 70394 ... 70400: + case 70404 ... 70404: + case 70413 ... 70414: + case 70417 ... 70418: + case 70441 ... 70441: + case 70449 ... 70449: + case 70452 ... 70452: + case 70458 ... 70459: + case 70469 ... 70470: + case 70473 ... 70474: + case 70478 ... 70486: + case 70488 ... 70492: + case 70500 ... 70501: + case 70509 ... 70511: + case 70517 ... 70783: + case 70856 ... 70863: + case 70874 ... 71039: + case 71094 ... 71095: + case 71114 ... 71167: + case 71237 ... 71247: + case 71258 ... 71295: + case 71352 ... 71359: + case 71370 ... 71839: + case 71923 ... 71934: + case 71936 ... 72383: + case 72441 ... 73727: + case 74649 ... 74751: + case 74863 ... 74863: + case 74869 ... 77823: + case 78895 ... 92159: + case 92729 ... 92735: + case 92767 ... 92767: + case 92778 ... 92781: + case 92784 ... 92879: + case 92910 ... 92911: + case 92918 ... 92927: + case 92998 ... 93007: + case 93018 ... 93018: + case 93026 ... 93026: + case 93048 ... 93052: + case 93072 ... 93951: + case 94021 ... 94031: + case 94079 ... 94094: + case 94112 ... 110591: + case 110594 ... 113663: + case 113771 ... 113775: + case 113789 ... 113791: + case 113801 ... 113807: + case 113818 ... 113819: + case 113824 ... 118783: + case 119030 ... 119039: + case 119079 ... 119080: + case 119155 ... 119162: + case 119262 ... 119295: + case 119366 ... 119551: + case 119639 ... 119647: + case 119666 ... 119807: + case 119893 ... 119893: + case 119965 ... 119965: + case 119968 ... 119969: + case 119971 ... 119972: + case 119975 ... 119976: + case 119981 ... 119981: + case 119994 ... 119994: + case 119996 ... 119996: + case 120004 ... 120004: + case 120070 ... 120070: + case 120075 ... 120076: + case 120085 ... 120085: + case 120093 ... 120093: + case 120122 ... 120122: + case 120127 ... 120127: + case 120133 ... 120133: + case 120135 ... 120137: + case 120145 ... 120145: + case 120486 ... 120487: + case 120780 ... 120781: + case 120832 ... 124927: + case 125125 ... 125126: + case 125143 ... 126463: + case 126468 ... 126468: + case 126496 ... 126496: + case 126499 ... 126499: + case 126501 ... 126502: + case 126504 ... 126504: + case 126515 ... 126515: + case 126520 ... 126520: + case 126522 ... 126522: + case 126524 ... 126529: + case 126531 ... 126534: + case 126536 ... 126536: + case 126538 ... 126538: + case 126540 ... 126540: + case 126544 ... 126544: + case 126547 ... 126547: + case 126549 ... 126550: + case 126552 ... 126552: + case 126554 ... 126554: + case 126556 ... 126556: + case 126558 ... 126558: + case 126560 ... 126560: + case 126563 ... 126563: + case 126565 ... 126566: + case 126571 ... 126571: + case 126579 ... 126579: + case 126584 ... 126584: + case 126589 ... 126589: + case 126591 ... 126591: + case 126602 ... 126602: + case 126620 ... 126624: + case 126628 ... 126628: + case 126634 ... 126634: + case 126652 ... 126703: + case 126706 ... 126975: + case 127020 ... 127023: + case 127124 ... 127135: + case 127151 ... 127152: + case 127168 ... 127168: + case 127184 ... 127184: + case 127222 ... 127231: + case 127245 ... 127247: + case 127279 ... 127279: + case 127340 ... 127343: + case 127387 ... 127461: + case 127491 ... 127503: + case 127547 ... 127551: + case 127561 ... 127567: + case 127570 ... 127743: + case 127789 ... 127791: + case 127870 ... 127871: + case 127951 ... 127955: + case 127992 ... 127999: + case 128255 ... 128255: + case 128331 ... 128335: + case 128378 ... 128378: + case 128420 ... 128420: + case 128579 ... 128580: + case 128720 ... 128735: + case 128749 ... 128751: + case 128756 ... 128767: + case 128884 ... 128895: + case 128981 ... 129023: + case 129036 ... 129039: + case 129096 ... 129103: + case 129114 ... 129119: + case 129160 ... 129167: + case 129198 ... 131071: + case 173783 ... 173823: + case 177973 ... 177983: + case 178206 ... 194559: + case 195102 ... 917759: + case 918000 ... 1114110: + print_str_char_U(c); + break; + default: + print_codepoint(c); + break; + } +} + +void print_char(val_char_t c) +{ + printf("#\\"); + switch (c) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(c); + } +} + +void print_codepoint(val_char_t c) +{ + char buffer[5] = {0}; + utf8_encode_char(c, buffer); + printf("%s", buffer); +} + +int utf8_encode_char(val_char_t c, char *buffer) +{ + // Output to buffer using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (c < 128) { + buffer[0] = (char) c; + return 1; + } else if (c < 2048) { + buffer[0] = (char)(c >> 6) | 192; + buffer[1] = ((char) c & 63) | 128; + return 2; + } else if (c < 65536) { + buffer[0] = (char)(c >> 12) | 224; + buffer[1] = ((char)(c >> 6) & 63) | 128; + buffer[2] = ((char) c & 63) | 128; + return 3; + } else { + buffer[0] = (char)(c >> 18) | 240; + buffer[1] = ((char)(c >> 12) & 63) | 128; + buffer[2] = ((char)(c >> 6) & 63) | 128; + buffer[3] = ((char) c & 63) | 128; + return 4; + } +} + +void utf8_encode_string(val_str_t *s, char *buffer) +{ + uint64_t i; + for (i = 0; i < s->len; i++) { + buffer += utf8_encode_char(s->codepoints[i], buffer); + } + *buffer = '\0'; +} diff --git a/langs/outlaw/print.h b/langs/outlaw/print.h new file mode 100644 index 00000000..fb79415c --- /dev/null +++ b/langs/outlaw/print.h @@ -0,0 +1,9 @@ +#ifndef PRINT_H +#define PRINT_H + +#include "values.h" + +void print_result(val_t); +void print_str(val_str_t*); + +#endif diff --git a/langs/outlaw/read-all.rkt b/langs/outlaw/read-all.rkt new file mode 100644 index 00000000..1bfd1765 --- /dev/null +++ b/langs/outlaw/read-all.rkt @@ -0,0 +1,10 @@ +#lang racket +(provide read-all) +(require "stdlib.rkt") +;; -> [Listof S-Expr] +;; read all s-expression until eof +(define (read-all) + (let ((r (read))) + (if (eof-object? r) + '() + (cons r (read-all))))) diff --git a/langs/outlaw/read-port.rkt b/langs/outlaw/read-port.rkt new file mode 100644 index 00000000..99969408 --- /dev/null +++ b/langs/outlaw/read-port.rkt @@ -0,0 +1,1028 @@ +#lang racket +(provide read) + +;; Port -> Any +;; Read an s-expression from given port +(define (read p) + (let ((r ( p))) + (if (err? r) + (error (err-msg r)) + r))) + +(struct err (port msg)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Start + +(module+ test + (define (p s) + ( (open-input-string s))) + + (check-equal? (p "") eof) + (check-equal? (p " ") eof) + (check-equal? (p ";123") eof) + (check-equal? (p "#;123 ") eof) + (check-equal? (p "#;123") eof) + (check-equal? (p "#|123|# ") eof) + (check-equal? (p "#;#|123|#1 ") eof) + (check-equal? (p "#;#;1 2") eof) + (check-equal? (p "123") 123) + (check-equal? (p "#t") #t) + (check-equal? (p "#f") #f) + (check-equal? (p "#T") #t) + (check-equal? (p "#F") #f) + (check-equal? (p "#b0") 0) + (check-equal? (p "#b1") 1) + (check-equal? (p "#b101") #b101) + (check-equal? (p "#B101") #b101) + (check-equal? (p "#o0") 0) + (check-equal? (p "#o1") 1) + (check-equal? (p "#o701") #o701) + (check-equal? (p "#O701") #o701) + (check-equal? (p "#d0") 0) + (check-equal? (p "#d1") 1) + (check-equal? (p "#d901") 901) + (check-equal? (p "#D901") 901) + (check-equal? (p "#x0") 0) + (check-equal? (p "#x1") 1) + (check-equal? (p "#xF01") #xF01) + (check-equal? (p "#XF01") #xF01) + (check-equal? (p ";123\n1") 1) + (check-equal? (p "()") '()) + (check-equal? (p "[]") '()) + (check-equal? (p "{}") '()) + (check-equal? (p "(#t)") '(#t)) + (check-equal? (p "[#t]") '(#t)) + (check-equal? (p "{#t}") '(#t)) + (check-equal? (p "((#t))") '((#t))) + (check-equal? (p "#\\u ") #\u) + (check-equal? (p "#\\p\n") #\p) + (check-equal? (p "(1 . 2)") '(1 . 2)) + (check-pred err? (p "#|")) + (check-pred err? (p "#;")) + (check-pred err? (p "(}")) + (check-pred err? (p "(]")) + (check-pred err? (p "[)")) + (check-pred err? (p "(x}")) + (check-pred err? (p "(x]")) + (check-pred err? (p "[x)")) + (check-pred err? (p "(x . y}")) + (check-pred err? (p "(x . y]")) + (check-pred err? (p "[x . y)"))) + +(define ( p) + (match (peek-char p) + [(? eof-object?) (read-char p)] + [(? char-whitespace?) (read-char p) ( p)] + [#\; ( p) ( p)] + [#\# (read-char p) + (match (peek-char p) + [#\| (read-char p) + (let ((r ( p))) + (if (err? r) r ( p)))] + [#\; (read-char p) + (let ((r ( p))) + (if (err? r) r ( p)))] + [_ ( p)])] + [_ ( p)])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Elem + +(module+ test + (define (pe s) + ( (open-input-string s))) + + (check-equal? (pe "1") 1) + (check-equal? (pe "x") 'x) + (check-equal? (pe "|x|") 'x) + (check-equal? (pe "'x") ''x) + (check-equal? (pe "`x") '`x) + (check-equal? (pe ",x") ',x) + (check-equal? (pe ",@x") ',@x) + (check-equal? (pe "\"x\"") "x") + (check-equal? (pe "(x)") '(x)) + (check-equal? (pe "('x)") '('x)) + (check-equal? (pe ";f\n1") 1) + (check-equal? (pe "#'x") '#'x) + (check-equal? (pe "#`x") '#`x) + (check-equal? (pe "#,x") '#,x) + (check-equal? (pe "#,@x") '#,@x) + (check-equal? (pe "#true") #t) + (check-equal? (pe "#false") #f) + (check-equal? (pe "#\\a") #\a) + (check-equal? (pe "#:a") '#:a) + + (check-pred err? (pe "'(.")) + (check-pred err? (pe "#")) + (check-pred err? (pe "#z")) + (check-pred err? (pe "#|")) + (check-pred err? (pe "|")) + (check-pred err? (pe "#;"))) + +(define ( p) + (match (read-char p) + [(? eof-object?) (err p "eof")] + [(? char-whitespace?) ( p)] + [#\| ( p)] + [#\" ( '() p)] + [#\# ( p)] + [(? open-paren? c) ( c p)] + [#\; ( p) ( p)] + [#\' ( 'quote p)] + [#\` ( 'quasiquote p)] + [#\, (match (peek-char p) + [#\@ (read-char p) ( 'unquote-splicing p)] + [_ ( 'unquote p)])] + [c ( c p)])) + +(define ( q p) + (let ((r ( p))) + (if (err? r) + r + (list q r)))) + +(define ( p) + (match (peek-char p) + [#\| (read-char p) + (let ((r ( p))) + (if (err? r) r ( p)))] + [#\; (read-char p) + (let ((r ( p))) + (if (err? r) r ( p)))] + [_ ( p)])) + +(define ( p) + (match (read-char p) + [(? eof-object?) (err p "bad syntax `#`")] + [#\T (committed-delim '() #t p)] + [#\F (committed-delim '() #f p)] ; could also be #Fl + [#\t (if (delim? p) #t (committed-delim '(#\r #\u #\e) #t p))] + ;; could also be #fl + [#\f (if (delim? p) #f (committed-delim '(#\a #\l #\s #\e) #f p))] + [#\( ( #\( p)] + [#\[ ( #\[ p)] + [#\{ ( #\{ p)] + [#\s (unimplemented "structure")] + [#\\ ( p)] + [#\: ( p)] + [#\& (unimplemented "boxes")] ; FIXME + [#\' ( 'syntax p)] + [#\! (unimplemented "shebang comment")] + [#\` ( 'quasisyntax p)] + [#\, (match (peek-char p) + [#\@ (read-char p) ( 'unsyntax-splicing p)] + [_ ( 'unsyntax p)])] + [#\~ (unimplemented "compiled code")] + [#\i (unimplemented "inexact number")] + [#\I (unimplemented "inexact number")] + [#\e (unimplemented "exact number")] + [#\E (unimplemented "exact number")] + [#\b ( char-digit2? char-digit2s->number p)] + [#\B ( char-digit2? char-digit2s->number p)] + [#\o ( char-digit8? char-digit8s->number p)] + [#\O ( char-digit8? char-digit8s->number p)] + [#\d ( char-digit10? char-digit10s->number p)] + [#\D ( char-digit10? char-digit10s->number p)] + [#\x ( char-digit16? char-digit16s->number p)] + [#\X ( char-digit16? char-digit16s->number p)] + [#\< ( p)] + [#\r (unimplemented "regexp or reader")] + [#\p (unimplemented "pregexp")] + [#\c (unimplemented "case switch")] + [#\C (unimplemented "case switch")] + [#\h (unimplemented "hash")] + [(? char-digit10?) (unimplemented "vector or graph")] + [_ (err p "bad syntax")])) + + +(define ( p) + (unimplemented "here string")) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Numbers + +;; have seen '#b', '#o', etc. +;; simplified to just digits +(define ( char-digitn? char-digitsn->number p) + (local [(define (+ p) + (match (read-char p) + [(? char-digitn? c) (* p (list c))] + [_ (err p "error")])) + (define (* p ds) + (if (delim? p) + (char-digitsn->number ds) + (match (read-char p) + [(? char-digitn? c) (* p (cons c ds))] + [_ (err p "error")])))] + (match (read-char p) + [#\# + (match (read-char p) + [#\e (+ p)] + [#\i (unimplemented "inexact")] + [_ (err p "error")])] + [#\+ (read-char p) (+ p)] + [#\- (read-char p) (- (+ p))] + [(? char-digitn? c) (* p (list c))] + [_ (err p "error")]))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Numbers or Symbols + +;; Numbers are simplified significantly: + +;; ::= ['+' | '-] +;; ::= '.' + +;; | * ['.' *] + +;; whenver something else is encounter, parse as a symbol + +(module+ test + (define (pn s) + ( (string-ref s 0) (open-input-string (substring s 1)))) + + (check-equal? (pn "+") '+) + (check-equal? (pn "-") '-) + (check-equal? (pn "5") 5) + (check-equal? (pn "123") 123) + (check-equal? (pn "123 ") 123) + (check-equal? (pn "0123") 123) + (check-equal? (pn "-123") -123) + (check-equal? (pn "+123") 123) + ; removed frac + (check-equal? (pn "5.") 5.0) + (check-equal? (pn ".5") 0.5) + (check-equal? (pn ".5 ") 0.5) + (check-equal? (pn "+.5") 0.5) + (check-equal? (pn "-.5") -0.5) + (check-equal? (pn "+1.5") 1.5) + (check-equal? (pn "-1.5") -1.5) + (check-equal? (pn "+.5") 0.5) + (check-equal? (pn "-.") '-.) + (check-equal? (pn "+.") '+.) + (check-equal? (pn "+.x") '+.x) + (check-equal? (pn "+x") '+x) + (check-equal? (pn "-x") '-x) + ; removed frac + (check-equal? (pn ".x") '.x) + (check-equal? (pn "1..") '1..) + (check-equal? (pn "1.1.") '1.1.) + (check-pred err? (pn "."))) + +(define ( c p) + (match c + [#\+ (if (delim? p) '+ ( #\+ '() p))] + [#\- (if (delim? p) '- ( #\- '() p))] + [#\. (if (delim? p) (err p ".") ( #f '() '() p))] + [(? char-digit10?) ( #f (list c) p)] + [_ ( (list c) p)])) + +(define ( signed? whole p) + (match (peek-char p) + [(? eof-object?) (make-whole signed? whole)] + [(? char-delim?) (make-whole signed? whole)] + [#\. (read-char p) ( signed? whole '() p)] + [(? char-digit10? d) + (read-char p) + ( signed? (cons d whole) p)] + [_ ( (cons (read-char p) + (append whole (if signed? (list signed?) '()))) + p)])) + +(define ( signed? whole frac p) + (match (peek-char p) + [(? eof-object?) (make-frac signed? whole frac)] + [(? char-delim?) (make-frac signed? whole frac)] + [(? char-digit10?) ( signed? whole (cons (read-char p) frac) p)] + [_ ( (cons (read-char p) + (append frac + (list #\.) + whole + (if signed? (list signed?) '()))) + p)])) + +(define (make-frac signed? whole frac) + (match* (whole frac) + [('() '()) (chars->symbol (list #\. signed?))] + [(_ _) + (exact->inexact + (match signed? + [#\- (- (frac->number whole frac))] + [_ (frac->number whole frac)]))])) + + +(define (frac->number whole frac) + (+ (char-digit10s->number whole) + (/ (char-digit10s->number frac) + (expt 10 (length frac))))) + +(define (make-whole signed? ds) + (match signed? + [#\- (- (char-digit10s->number ds))] + [_ (char-digit10s->number ds)])) + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Line comment + +(module+ test + (define (pl s) + (let ((p (open-input-string s)) + (o (open-output-string))) + ( p) + (copy-port p o) + (get-output-string o))) + + (check-equal? (pl "foo") "") + (check-equal? (pl "foo\n") "") + (check-equal? (pl "foo\u20291") "1") + (check-equal? (pl "foo\u20281") "1") + (check-equal? (pl "foo\r1") "1") + (check-equal? (pl "foo\n1") "1")) + +(define ( p) + (let ((c (read-char p))) + (or (eof-object? c) + (and (memv c '(#\newline #\return #\u0085 #\u2028 #\u2029)) #t) + ( p)))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Block comment + +(module+ test + (define (pb s) + ( (open-input-string s))) + + (check-equal? (pb "|#") #t) + (check-equal? (pb "aadsfa|#") #t) + (check-equal? (pb "aadsfa|#adsf") #t) + (check-equal? (pb "aad# |#") #t) + (check-equal? (pb "aadsfa|#|#") #t) + (check-equal? (pb "aads#|fa|#adsf|#") #t) + + (check-pred err? (pb "")) + (check-pred err? (pb "#|")) + (check-pred err? (pb "#||#"))) + +(define ( p) + (match (read-char p) + [(? eof-object?) (err p "unbalanced |#")] + [#\# (match (peek-char p) + [#\| (let ((r ( p))) + (if (err? r) r ( p)))] + [_ ( p)])] + [#\| (match (peek-char p) + [#\# (read-char p) #t] + [_ ( p)])] + [_ ( p)])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Vectors + +(module+ test + (define (pv s) + ( #\( (open-input-string s))) + + (check-equal? (pv ")") #()) + (check-equal? (pv "x)") #(x)) + (check-equal? (pv "x y z)") #(x y z)) + (check-equal? (pv "#;() x y z)") #(x y z)) + (check-equal? (pv "x #;() y z)") #(x y z)) + (check-equal? (pv "x y z #;())") #(x y z)) + (check-equal? (pv ";f\nx y z #;())") #(x y z)) + (check-equal? (pv ";f\nx y z #;();\n)") #(x y z)) + (check-pred err? (pv "x . y)")) + (check-pred err? (pv "x y . z)")) + (check-pred err? (pv "#;() x y . z)")) + (check-pred err? (pv "x #;() y . z)")) + (check-pred err? (pv "x y #;() . z)")) + (check-pred err? (pv "x y . #;() z)")) + (check-pred err? (pv "x y . z #;())")) + (check-pred err? (pv "#||# x y . z)")) + (check-pred err? (pv "x #||# y . z)")) + (check-pred err? (pv "x y #||# . z)")) + (check-pred err? (pv "x y . #||# z)")) + (check-pred err? (pv "x y . z #||#)")) + (check-pred err? (pv "x y . z ;f\n)"))) + +(define ( paren p) + (let ((r ( paren p))) + (if (err? r) + r + (if (list? r) + (list->vector r) + (err p "dotted list in vector"))))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Lists or pairs + +(module+ test + (define (pd s) + ( #\( (open-input-string s))) + + (check-equal? (pd ")") '()) + (check-equal? (pd "x)") '(x)) + (check-equal? (pd "x y z)") '(x y z)) + (check-equal? (pd "#;() x y z)") '(x y z)) + (check-equal? (pd "x #;() y z)") '(x y z)) + (check-equal? (pd "x y z #;())") '(x y z)) + (check-equal? (pd ";f\nx y z #;())") '(x y z)) + (check-equal? (pd ";f\nx y z #;();\n)") '(x y z)) + (check-equal? (pd "x . y)") '(x . y)) + (check-equal? (pd "x y . z)") '(x y . z)) + (check-equal? (pd "#;() x y . z)") '(x y . z)) + (check-equal? (pd "x #;() y . z)") '(x y . z)) + (check-equal? (pd "x y #;() . z)") '(x y . z)) + (check-equal? (pd "x y . #;() z)") '(x y . z)) + (check-equal? (pd "x y . z #;())") '(x y . z)) + (check-equal? (pd "#||# x y . z)") '(x y . z)) + (check-equal? (pd "x #||# y . z)") '(x y . z)) + (check-equal? (pd "x y #||# . z)") '(x y . z)) + (check-equal? (pd "x y . #||# z)") '(x y . z)) + (check-equal? (pd "x y . z #||#)") '(x y . z)) + (check-equal? (pd "x y . z ;f\n)") '(x y . z)) + (check-equal? (pd "x #t)") '(x #t)) + (check-equal? (pd "x .y)") '(x .y)) + ; removed frac + ;(check-equal? (pd "x .0)") '(x 0.0)) + (check-equal? (pd "x .y)") '(x .y)) + (check-equal? (pd "[] 0)") '(() 0)) + + (check-pred err? (pd "")) + (check-pred err? (pd "#|")) + (check-pred err? (pd "#;")) + (check-pred err? (pd ";")) + (check-pred err? (pd "#z")) + (check-pred err? (pd "1")) + (check-pred err? (pd "1 .")) + (check-pred err? (pd "1 #|")) + (check-pred err? (pd "1 #;")) + (check-pred err? (pd "1 #z")) + (check-pred err? (pd "1 (")) + (check-pred err? (pd "x . y #t")) + (check-pred err? (pd "x . y #|")) + (check-pred err? (pd "x . y #;")) + (check-pred err? (pd "x . y 1"))) + + +(define ( paren p) + (match (peek-char p) + [(? eof-object?) (err p "missing! )")] + [(? char-whitespace?) (read-char p) ( paren p)] + [#\; ( p) ( paren p)] + [(? close-paren? c) + (read-char p) + (if (opposite? paren c) '() (err p "mismatched paren"))] + [#\# (read-char p) + (match (peek-char p) + [#\| (read-char p) + (let ((r ( p))) + (if (err? r) r ( paren p)))] + [#\; (read-char p) + (let ((r ( p))) + (if (err? r) r ( paren p)))] + [_ (let ((r ( p))) + (if (err? r) r ( paren (list r) p)))])] + [_ (let ((r ( p))) + (if (err? r) + r + ( paren (list r) p)))])) + +(define ( paren xs p) + (match (peek-char p) + [(? eof-object?) (err p "missing!! )")] + [(? char-whitespace?) (read-char p) ( paren xs p)] + [#\; ( p) ( paren xs p)] + [(? close-paren? c) + (read-char p) + (if (opposite? paren c) (reverse xs) (err p "mismatched paren"))] + [#\# (read-char p) + (match (peek-char p) + [#\| (read-char p) + (let ((r ( p))) + (if (err? r) r ( paren xs p)))] + [#\; (read-char p) + (let ((r ( p))) + (if (err? r) r ( paren xs p)))] + [_ (let ((r ( p))) + (if (err? r) r ( paren (cons r xs) p)))])] + [#\. (read-char p) + (if (delim? p) + ( paren xs p) + ( paren (cons ( #f '() '() p) xs) p))] + [_ (let ((r ( p))) + (if (err? r) + r + ( paren (cons r xs) p)))])) + +(define ( paren xs p) + (let ((r ( p))) + (if (err? r) + r + ( paren (append* (reverse xs) (list r)) p)))) + +(define ( paren xs p) + (match (read-char p) + [(? char-whitespace?) ( paren xs p)] + [#\; ( p) ( paren xs p)] + [#\# (match (peek-char p) + [#\| (read-char p) + (let ((r ( p))) + (if (err? r) r ( paren xs p)))] + [#\; (read-char p) + (let ((r ( p))) + (if (err? r) r ( paren xs p)))] + [_ (err p "unexpected")])] + [(? close-paren? c) + (if (opposite? paren c) xs (err p "mismatched paren"))] + [_ (err p "uneasdfasdxpected")])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Symbols and Keywords + +(module+ test + (define (py s) + ( '() (open-input-string s))) + + (check-equal? (py "") '||) + (check-equal? (py "x") 'x) + (check-equal? (py "xyz") 'xyz) + (check-equal? (py "x(") 'x) + (check-equal? (py "|x|") 'x) + (check-equal? (py "| |") '| |) + (check-equal? (py "\\x") 'x) + (check-equal? (py "\\ ") '| |) + (check-equal? (py "|\\|") '|\|) + (check-pred err? (py "|")) + (check-pred err? (py "\\")) + + (define (pk s) + ( (open-input-string s))) + + (check-equal? (pk "") '#:||) + (check-equal? (pk "x") '#:x) + (check-equal? (pk "xyz") '#:xyz) + (check-equal? (pk "x(") '#:x) + (check-equal? (pk "|x|") '#:x) + (check-equal? (pk "| |") '#:| |) + (check-equal? (pk "\\x") '#:x) + (check-equal? (pk "\\ ") '#:| |) + (check-equal? (pk "|\\|") '#:|\|) + (check-pred err? (pk "|")) + (check-pred err? (pk "\\"))) + +(define ( cs p) + (let ((r ( cs p))) + (if (err? r) + r + (chars->symbol r)))) + +(define ( p) + (let ((r ( '() p))) + (if (err? r) + r + (chars->keyword r)))) + +(define ( p) + (let ((r ( '() p))) + (if (err? r) + r + (chars->symbol r)))) + +;; Assume: what we've seen tells us this is a symbol, cs are the chars of the +;; symbol seen so far in reverse order +(define ( cs p) + (if (delim? p) + cs + (match (peek-char p) + [#\\ (read-char p) ( cs p)] + [#\| (read-char p) ( cs p)] + [_ ( (cons (read-char p) cs) p)]))) + +(define ( cs p) + (match (read-char p) + [(? eof-object?) (err p "read: end-of-file following `\\` in symbol")] + [c ( (cons c cs) p)])) + +(define ( cs p) + (match (read-char p) + [(? eof-object?) (err p "read: end-of-file following `|` in symbol")] + [#\| ( cs p)] + [c ( (cons c cs) p)])) + +(define (chars->symbol cs) + (string->symbol (list->string (reverse cs)))) + +(define (chars->keyword cs) + (string->keyword (list->string (reverse cs)))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Characters + +(module+ test + (require rackunit) + (define (pc s) + ( (open-input-string s))) + + (check-equal? (pc "nul") #\nul) + (check-equal? (pc "null") #\nul) + (check-equal? (pc "backspace") #\backspace) + (check-equal? (pc "tab") #\tab) + (check-equal? (pc "newline") #\newline) + (check-equal? (pc "linefeed") #\linefeed) + (check-equal? (pc "vtab") #\vtab) + (check-equal? (pc "page") #\page) + (check-equal? (pc "return") #\return) + (check-equal? (pc "space") #\space) + (check-equal? (pc "rubout") #\rubout) + (check-equal? (pc "000") #\000) + (check-equal? (pc "123") #\123) + (check-equal? (pc "u") #\u) + (check-equal? (pc "uABCD") #\uABCD) + (check-equal? (pc "uABC") #\uABC) + (check-equal? (pc "uAB") #\uAB) + (check-equal? (pc "uA") #\uA) + (check-equal? (pc "uabcd") #\uabcd) + (check-equal? (pc "uabcd7") #\uabcd) + (check-equal? (pc "uabc") #\uabc) + (check-equal? (pc "uab") #\uab) + (check-equal? (pc "ua") #\ua) + (check-equal? (pc "uag") #\ua) + (check-equal? (pc "UABCD") #\uABCD) + (check-equal? (pc "UABC") #\uABC) + (check-equal? (pc "UAB") #\uAB) + (check-equal? (pc "UA") #\uA) + (check-equal? (pc "Uabcd") #\uabcd) + (check-equal? (pc "Uabc") #\uabc) + (check-equal? (pc "Uab") #\uab) + (check-equal? (pc "Ua") #\ua) + (check-equal? (pc "UABCDE") #\UABCDE) + (check-equal? (pc "U000DEF") #\U000DEF) + (check-equal? (pc "u") #\u) + (check-equal? (pc "7") #\7) + (check-equal? (pc "78") #\7) + (check-equal? (pc "a7") #\a) + (check-equal? (pc " 8") #\space) + (check-pred err? (pc "")) + (check-pred err? (pc "aa")) + (check-pred err? (pc "newlines")) + (check-pred err? (pc "777")) + (check-pred err? (pc "UABCDEF")) + (check-pred err? (pc "spo")) + (check-pred err? (pc "nub")) + (check-pred err? (pc "nula")) + (check-pred err? (pc "nulla")) + ;; controversial + (check-pred err? (pc "77")) + (check-pred err? (pc "779")) + (check-equal? (pc "U0000000A") #\newline)) + + +;; Assume: have already read '#\' +(define ( p) + (let ((c (read-char p))) + (cond + [(eof-object? c) (err p "error")] + [(eof-object? (peek-char p)) c] + [(char-digit8? c) ( c p)] + [(not-char-alphabetic? c) c] + [else + (match c + [#\b (-special-seq #\b #\a '(#\c #\k #\s #\p #\a #\c #\e) #\backspace p)] + [#\l (-special-seq #\l #\i '(#\n #\e #\f #\e #\e #\d) #\linefeed p)] + [#\p (-special-seq #\p #\a '(#\g #\e) #\page p)] + [#\s (-special-seq #\s #\p '(#\a #\c #\e) #\space p)] + [#\t (-special-seq #\t #\a '(#\b) #\tab p)] + [#\v (-special-seq #\v #\t '(#\a #\b) #\vtab p)] + [#\r (-special-seq-alt #\r + #\e '(#\t #\u #\r #\n) #\return + #\u '(#\b #\o #\u #\t) #\rubout p)] + ;; Move this into -nu and rename to -n. + [#\n (let ((next (peek-char p))) + (cond [(char=? next #\e) + (begin (read-char p) + (committed '(#\w #\l #\i #\n #\e) #\newline p))] + [(char=? next #\u) + (begin (read-char p) (nu p))] + [(eof-object? next) #\n] + [(not-char-alphabetic? next) #\n] + [else (error p "error")]))] + + [#\u + (cond [(char-digit16? (peek-char p)) + (+ (list (read-char p)) 3 p)] + [(not-char-alphabetic? (peek-char p)) + #\u] + [else (err p "error")])] + [#\U + (cond [(char-digit16? (peek-char p)) + (+ (list (read-char p)) 7 p)] + [(not-char-alphabetic? (peek-char p)) + #\U] + [else (err p "error")])] + [_ + (if (and (char-alphabetic? c) + (not-char-alphabetic? (peek-char p))) + c + (err p "error"))])]))) + +;; Assume: seen '#\', c0, which may be the start of special sequence for char if c1 comes next +(define (-special-seq c0 c1 seq char p) + (let ((next (peek-char p))) + (cond [(char=? next c1) + (begin (read-char p) + (committed seq char p))] + [(eof-object? next) c0] + [(not-char-alphabetic? next) c0] + [else (error p "error")]))) + +;; Assume: seen '#\', c0, which may be the start of special sequence; +;; for char1 if c1 comes next or for char2 if c2 comes next +(define (-special-seq-alt c0 c1 seq1 char1 c2 seq2 char2 p) + (let ((next (peek-char p))) + (cond [(char=? next c1) + (begin (read-char p) + (committed seq1 char1 p))] + [(char=? next c2) + (begin (read-char p) + (committed seq2 char2 p))] + [(eof-object? next) c0] + [(not-char-alphabetic? next) c0] + [else (error p "error")]))) + +;; committed to see #\nul or #\null, error otherwise +(define (nu p) + (match (read-char p) + [#\l (match (peek-char p) + [(? not-char-alphabetic?) #\nul] + [#\l (read-char p) + (match (peek-char p) + [(? not-char-alphabetic?) #\nul] + [_ (err p "error")])] + [_ (err p "error")])] + [_ (err p "error")])) + +(define (+ cs n p) + (if (zero? n) + (char-digit16s->char cs) + (match (peek-char p) + [(? eof-object?) (char-digit16s->char cs)] + [(? char-digit16?) (+ (cons (read-char p) cs) (sub1 n) p)] + [_ (char-digit16s->char cs)]))) + +(define ( c p) + (match (peek-char p) + ;; this is the same behavior Racket has: it commits after two digits + ;; have to use peek-bytes to behave differently + [(? char-digit8?) ( c (read-char p) p)] + [_ c])) + +(define ( c1 c2 p) + (match (read-char p) + [(? eof-object?) (err p "error")] + [(? char-digit8? c3) (octal-char c1 c2 c3)] + [_ (err p "error")])) + +(define (committed chars c p) + (match chars + ['() (if (not-char-alphabetic? (peek-char p)) + c + (err p "error"))] + [(cons c0 cs) + (let ((c1 (read-char p))) + (if (and (char? c1) (char=? c1 c0)) + (committed cs c p) + (err p "error")))])) + +(define (char-digit16s->char ds) + (let ((x (char-digit16s->number ds))) + (if (or (<= 0 x 55295) + (<= 57344 x 1114111)) + (integer->char x) + (err 'p "error")))) + +(define (char-digit2s->number ds) + (match ds + ['() 0] + [(cons d ds) + (+ (char-digit->number d) + (* 2 (char-digit2s->number ds)))])) + +(define (char-digit8s->number ds) + (match ds + ['() 0] + [(cons d ds) + (+ (char-digit->number d) + (* 8 (char-digit8s->number ds)))])) + +(define (char-digit10s->number ds) + (match ds + ['() 0] + [(cons d ds) + (+ (char-digit->number d) + (* 10 (char-digit10s->number ds)))])) + +(define (char-digit16s->number ds) + (match ds + ['() 0] + [(cons d ds) + (+ (char-digit16->number d) + (* 16 (char-digit16s->number ds)))])) + +(define (char-digit->number d) + (- (char->integer d) + (char->integer #\0))) + +(define (char-digit2? d) + (and (char? d) + (<= 48 (char->integer d) 49))) + +(define (char-digit8? d) + (and (char? d) + (<= 48 (char->integer d) 55))) + +(define (char-digit10? d) + (and (char? d) + (<= 48 (char->integer d) 57))) + +(define (char-digit16? d) + (and (char? d) + (let ((x (char->integer d))) + (or (<= 48 x 57) + (<= 65 x 70) + (<= 97 x 102))))) + +(define (char-digit8->number c) + (- (char->integer c) 48)) + +(define (char-digit16->number c) + (let ((x (char->integer c))) + (cond [(<= 48 x 57) (- x 48)] + [(<= 65 x 70) (- x 55)] + [(<= 97 x 102) (- x 87)]))) + +(define (octal-char d1 d2 d3) + (let ((x (+ (* 64 (char-digit8->number d1)) + (* 8 (char-digit8->number d2)) + (char-digit8->number d3)))) + (if (<= 0 x 255) + (integer->char x) + (err 'p "ERROR")))) + +(define (not-char-alphabetic? c) + (or (eof-object? c) + (not (char-alphabetic? c)))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Strings + +(module+ test + (require rackunit) + (define (ps s) + ( '() (open-input-string s))) + + (check-equal? (ps "\"") "") + (check-equal? (ps "a\"") "a") + (check-equal? (ps "\\a\"") "\a") + (check-equal? (ps "\\b\"") "\b") + (check-equal? (ps "\\t\"") "\t") + (check-equal? (ps "\\n\"") "\n") + (check-equal? (ps "\\v\"") "\v") + (check-equal? (ps "\\f\"") "\f") + (check-equal? (ps "\\r\"") "\r") + (check-equal? (ps "\\e\"") "\e") + (check-equal? (ps "\\\"\"") "\"") + (check-equal? (ps "\\\'\"") "'") + (check-equal? (ps "\\\\\"") "\\") + (check-equal? (ps "\\xa\"") "\xa") + (check-equal? (ps "\\xab\"") "\xab") + (check-equal? (ps "\\xabcd\"") "\xabcd") + (check-equal? (ps "\\xabc\"") "\xabc") + (check-equal? (ps "\\xaq\"") "\xaq") + (check-equal? (ps "\\\nabc\"") "abc") + (check-equal? (ps "\\uab\"") "\xab") + (check-equal? (ps "\\uabcd\"") "\uabcd") + (check-equal? (ps "\\Uabcd\"") "\uabcd") + (check-equal? (ps "\\000\"") "\u000") + (check-equal? (ps "\\0g\"") "\u0g") + (check-equal? (ps "\\x0\"") "\x0") + (check-equal? (ps "\\xA\"") "\xa") + (check-equal? (ps "\\xa\"") "\xa") + (check-equal? (ps "\\UAAAAA\"") "\UAAAAA") + + (check-pred err? (ps "")) + (check-pred err? (ps "\\")) + (check-pred err? (ps "\\x")) + (check-pred err? (ps "\\0")) + (check-pred err? (ps "\\x0")) + (check-pred err? (ps "\\xg\"")) + + (check-pred err? (ps "\\q\"")) + (check-pred err? (ps "a\\q\"")) + (check-pred err? (ps "\\UFFFFFFFF\"")) + #;(check-pred err? (ps "\\Uag"))) + +;; Assume: have already read '"' +(define ( cs p) + (match (read-char p) + [(? eof-object?) (err p "error")] + [#\" (list->string (reverse cs))] + [#\\ ( cs p)] + [c ( (cons c cs) p)])) + +(define ( cs p) + (match (read-char p) + [(? eof-object?) (err p "error")] + [#\a ( (cons #\007 cs) p)] + [#\b ( (cons #\010 cs) p)] + [#\t ( (cons #\011 cs) p)] + [#\n ( (cons #\012 cs) p)] + [#\v ( (cons #\013 cs) p)] + [#\f ( (cons #\014 cs) p)] + [#\r ( (cons #\015 cs) p)] + [#\e ( (cons #\033 cs) p)] + [#\" ( (cons #\" cs) p)] + [#\' ( (cons #\' cs) p)] + [#\\ ( (cons #\\ cs) p)] + [#\x (* cs 2 p)] + [#\u (* cs 4 p)] ; FIXME: will need a different function to handle \u...\u... form + [#\U (* cs 8 p)] + [(? char-digit8? d) (+ cs (list d) 2 p)] + [#\newline ( cs p)] + [_ (err p "error")])) + +(define (+ cs ds n p) + (if (zero? n) + ( (cons (char-digit8s->char ds) cs) p) + (match (peek-char p) + [(? eof-object?) (err p "error")] + [(? char-digit8?) (+ cs (cons (read-char p) ds) (sub1 n) p)] + [_ ( (cons (char-digit8s->char ds) cs) p)]))) + +(define (* cs n p) + (match (peek-char p) + [(? eof-object?) (err p "error")] + [(? char-digit16?) (+ cs (list (read-char p)) (sub1 n) p)] + [_ (err p "error")])) + +(define (+ cs ds n p) + (if (zero? n) + (return-+ cs ds p) + (match (peek-char p) + [(? eof-object?) (err p "error")] + [(? char-digit16?) (+ cs (cons (read-char p) ds) (sub1 n) p)] + [_ (return-+ cs ds p)]))) + +(define (return-+ cs ds p) + (let ((r (char-digit16s->char ds))) + (if (err? r) + r + ( (cons r cs) p)))) + +(define (char-digit8s->char ds) + (integer->char (char-digit8s->number ds))) + +(define (delim? p) + (let ((c (peek-char p))) + (or (eof-object? c) + (char-delim? c)))) + +(define (char-delim? x) + (or (char-whitespace? x) + (memq x '(#\( #\) #\[ #\] #\{ #\} #\" #\, #\' #\` #\;)))) + +(define (opposite? p1 p2) + (match p1 + [#\( (char=? p2 #\))] + [#\[ (char=? p2 #\])] + [#\{ (char=? p2 #\})])) + +(define (open-paren? c) + (memq c '(#\( #\[ #\{))) + +(define (close-paren? c) + (memq c '(#\) #\] #\}))) + +;; committed to seeing chars followed by a delimiter, producing x +(define (committed-delim chars x p) + (match chars + ['() (if (delim? p) x (err p "unexpected sequence"))] + [(cons c0 cs) + (let ((c1 (read-char p))) + (if (and (char? c1) (char=? c1 c0)) + (committed-delim cs x p) + (err p "unexpected sequence")))])) + +(define (unimplemented x) + (err #f (string-append "unimplemented: " x))) + diff --git a/langs/outlaw/registers.rkt b/langs/outlaw/registers.rkt new file mode 100644 index 00000000..90973e5d --- /dev/null +++ b/langs/outlaw/registers.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide (all-defined-out)) + +(define rax 'rax) ; return +(define eax 'eax) ; 32-bit load/store +(define rbx 'rbx) ; heap +(define rdi 'rdi) ; arg1 +(define rsi 'rsi) ; arg2 +(define rdx 'rdx) ; arg3 +(define r8 'r8) ; scratch +(define r9 'r9) ; scratch +(define r10 'r10) ; scratch +(define r12 'r12) ; save across call to memcpy +(define r15 'r15) ; stack pad (non-volatile) +(define rsp 'rsp) ; stack diff --git a/langs/outlaw/runtime.h b/langs/outlaw/runtime.h new file mode 100644 index 00000000..a68e4654 --- /dev/null +++ b/langs/outlaw/runtime.h @@ -0,0 +1,11 @@ +#ifndef RUNTIME_H +#define RUNTIME_H +int64_t entry(); +extern FILE* in; +extern FILE* out; +extern void (*error_handler)(); + +// in words +#define heap_size 100000000 +extern int64_t *heap; +#endif /* RUNTIME_H */ diff --git a/langs/outlaw/stdlib.rkt b/langs/outlaw/stdlib.rkt new file mode 100644 index 00000000..e8017c5f --- /dev/null +++ b/langs/outlaw/stdlib.rkt @@ -0,0 +1,1282 @@ +#lang racket +(provide list list* make-list list? map foldr filter length append append* + memq member append-map vector->list + number->string gensym read read-char peek-char + > <= >= void? + char<=? char=? + list->string string->list + reverse + remove-duplicates remq* remove* remove + andmap ormap vector list->vector boolean? substring + odd? + system-type + not + findf + read-line + * ; limited + char-alphabetic? char-whitespace? + displayln ; only works for strings + write-string + ; unimplemented + exact->inexact / expt string->keyword + ;; Op0 + read-byte peek-byte void + ;; Op1 + add1 sub1 zero? char? write-byte eof-object? + integer->char char->integer + box unbox empty? cons? box? car cdr + vector? vector-length string? string-length + symbol->string string->symbol symbol? + string->uninterned-symbol open-input-file + write-char error integer? exact-integer? procedure? + eq-hash-code + ;; Op2 + + - < = cons eq? make-vector vector-ref + make-string string-ref string-append + quotient remainder set-box! + bitwise-and bitwise-ior bitwise-xor arithmetic-shift + ;; Op3 + vector-set!) + +(require (prefix-in % racket) + (rename-in racket [read-byte %read-byte-port])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Op0 +(define read-byte + (case-lambda + [() (%read-byte)] + [(p) (%read-byte-port p)])) ;; not a racket function! + +;(define (peek-byte) (%peek-byte)) +(define peek-byte + (case-lambda + [() + (%peek-byte (%current-input-port) 0)] + [(p off) + (%peek-byte p off)])) + +(define (void . xs) (%void)) + +(define (current-input-port) (%current-input-port)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Op1 +(define (add1 n) (%add1 n)) +(define (sub1 n) (%sub1 n)) +(define (zero? n) (%zero? n)) +(define (char? n) (%char? n)) +(define (write-byte b) (%write-byte b)) ; IMPROVE: add port +(define (write-char c) (%write-char c)) +(define (eof-object? x) (%eof-object? x)) +(define (integer->char i) (%integer->char i)) +(define (char->integer c) (%char->integer c)) +(define (box x) (%box x)) +(define (box? x) (%box? x)) +(define (unbox x) (%unbox x)) +(define (empty? x) (%empty? x)) +(define (cons? x) (%cons? x)) +(define (car x) (%car x)) +(define (cdr x) (%cdr x)) +(define (vector? x) (%vector? x)) +(define (vector-length x) (%vector-length x)) +(define (string? x) (%string? x)) +(define (string-length x) (%string-length x)) +(define (symbol->string x) (%symbol->string x)) +(define (string->symbol x) (%string->symbol x)) +(define (symbol? x) (%symbol? x)) +(define (string->uninterned-symbol x) (%string->uninterned-symbol x)) +(define (open-input-file x) (%open-input-file x)) +(define (error . x) (%error (car x))) ;; drops other args +(define (integer? x) (%integer? x)) +(define (exact-integer? x) (%integer? x)) ;; we only have exact integers +(define (procedure? x) (%procedure? x)) +(define (eq-hash-code x) (%eq-hash-code x)) + +(define (* x y) + (match x + [0 0] + [1 y] + [2 (arithmetic-shift y 1)] + [4 (arithmetic-shift y 2)] + [8 (arithmetic-shift y 3)] + [10 ; 10a=2^3a+2a + (+ (arithmetic-shift y 1) + (arithmetic-shift y 3))] + [16 (arithmetic-shift y 4)] + [64 (arithmetic-shift y 6)] + [_ (error "unimplemented multiplication")])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Op2 +(define (+ . xs) + (match xs + ['() 0] + [(cons x xs) + (%+ x (apply + xs))])) + +(define - + (case-lambda + [(z) (%- 0 z)] + [(x y) (%- x y)] + [(x y z . zs) + (apply - (%- x y) z zs)])) + +(define < + (case-lambda + [(z) #t] + [(x y . zs) + (if (%< x y) + (apply < y zs) + #f)])) + +(define <= + (case-lambda + [(z) #t] + [(x y . zs) + (if (%< y x) + #f + (apply <= y zs))])) + +(define > + (case-lambda + [(z) #t] + [(x y . zs) + (if (%< y x) + (apply > y zs) + #f)])) + +(define >= + (case-lambda + [(z) #t] + [(x y . zs) + (if (%< x y) + #f + (apply >= y zs))])) + +(define = + (case-lambda + [(z) #t] + [(x y . zs) + (if (%= x y) + (apply = y zs) + #f)])) + +(define (cons x y) (%cons x y)) + +(define (eq? x y) (%eq? x y)) + +(define make-vector + (case-lambda + [(size) (make-vector size 0)] + [(size v) (%make-vector size v)])) + +(define (vector-ref v i) (%vector-ref v i)) + +(define make-string + (case-lambda + [(k) (make-string k #\nul)] + [(k c) (%make-string k c)])) + +(define (string-ref s i) + (%string-ref s i)) + +(define string-append + (case-lambda + [(x y) (%string-append x y)] + [(x) (%string-append x "")] + [(x . ys) (%string-append x (apply string-append ys))] + [() ""])) + +(define (quotient x y) (%quotient x y)) +(define (remainder x y) (%remainder x y)) +(define (set-box! x y) (%set-box! x y)) +(define (bitwise-and x y) (%bitwise-and x y)) ;; should be n-ary +(define (bitwise-ior x y) (%bitwise-ior x y)) ;; should be n-ary +(define (bitwise-xor x y) (%bitwise-xor x y)) ;; should be n-ary +(define (arithmetic-shift x y) (%arithmetic-shift x y)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Op3 +(define (vector-set! v i x) + (%vector-set! v i x)) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (length xs) + (match xs + ['() 0] + [(cons _ xs) (add1 (length xs))])) + +(define (reverse xs) + (reverse/a xs '())) + +(define (reverse/a xs ys) + (match xs + ['() ys] + [(cons x xs) + (reverse/a xs (cons x ys))])) + +(define (equal? x y) + (error "equal? is not defined")) + +(define member + (case-lambda + [(v lst) (member v lst equal?)] + [(v lst is-equal?) + (match lst + ['() #f] + [(cons l lst1) + (if (is-equal? v l) + lst + (member v lst1 is-equal?))])])) + +(define remove-duplicates + (case-lambda + [(xs) (remove-duplicates xs equal?)] + [(xs eq) + (remove-duplicates/a xs eq '())])) + +(define (remove-duplicates/a xs eq seen) + (match xs + ['() (reverse seen)] + [(cons x xs) + (if (member x seen eq) + (remove-duplicates/a xs eq seen) + (remove-duplicates/a xs eq (cons x seen)))])) + +(define (remq* v-list lst) + (match v-list + ['() lst] + [(cons v v-list) + (remq* v-list (remove* v lst eq?))])) + +(define remove* + (case-lambda + [(x xs) (remove* x xs equal?)] + [(x xs eq) + (match xs + ['() '()] + [(cons y xs) + (if (eq x y) + (remove* x xs eq) + (cons y (remove* x xs eq)))])])) + +(define (remove x xs eq) + (match xs + ['() '()] + [(cons y xs) + (if (eq x y) + xs + (cons y (remove x xs eq)))])) + +(define (andmap f xs) + (match xs + ['() #t] + [(cons x xs) + (and (f x) + (andmap f xs))])) + +(define (ormap f xs) + (match xs + ['() #f] + [(cons x xs) + (or (f x) + (ormap f xs))])) + +(define (list->vector xs) + (list->vector/a (make-vector (length xs) 0) 0 xs)) + +(define (list->vector/a v i xs) + (match xs + ['() v] + [(cons x xs) + (begin + (vector-set! v i x) + (list->vector/a v (add1 i) xs))])) + +(define (vector . xs) + (list->vector xs)) + +(define (boolean? x) + (or (eq? x #t) + (eq? x #f))) + +(define (list->string xs) + (match xs + ['() ""] + [(cons c cs) + (string-append (make-string 1 c) + (list->string cs))])) + +(define substring + (case-lambda + [(str start) (substring str start (string-length str))] + [(str start end) + (substring/a str start end '())])) + +(define (substring/a str start end cs) + (if (= start end) + (list->string cs) + (substring/a str start (sub1 end) + (cons (string-ref str (sub1 end)) cs)))) + +(define (odd? x) + (= (remainder x 2) 1)) + +(define (system-type) + ;; the primitive system type returns 1 for mac, 0 otherwise; + ;; the fall through case is for when %system-type is implemented in Racket + (match (%system-type) + ;; the use of string->symbol here is to avoid subtle issues about symbol interning + ;; in separately compiled libraries + [1 (string->symbol "macosx")] + [0 (string->symbol "unix")] + [x x])) + +(define (not x) + (if x #f #t)) + +(define (findf proc xs) + (match xs + ['() #f] + [(cons x xs) + (if (proc x) + x + (findf proc xs))])) + +(define (char<=? c . cs) + (char-compare <= (char->integer c) cs)) + +(define (char=? c . cs) + (char-compare = (char->integer c) cs)) + +(define (char-compare cmp d cs) + (match cs + ['() #t] + [(cons c cs) + (let ((d1 (char->integer c))) + (if (cmp d d1) + (char-compare cmp d1 cs) + #f))])) + + +(define (string->list s) + (string->list/a s (string-length s) '())) + +(define (string->list/a s n xs) + (if (zero? n) + xs + (string->list/a s (sub1 n) + (cons (string-ref s (sub1 n)) xs)))) + +(define (void? x) + (eq? x (void))) + +(define (list . xs) xs) + +(define (list* x . xs) + (dot-last x xs)) + +(define (dot-last x xs) + (match xs + ['() x] + [(cons y xs) + (cons x (dot-last y xs))])) + +(define (make-list n x) + (if (zero? n) + '() + (cons x (make-list (sub1 n) x)))) + +(define (list? xs) + (match xs + ['() #t] + [(cons x xs) + (list? xs)] + [_ #f])) + +;; should really take any number of xss +(define (foldr f b xs) + (match xs + ['() b] + [(cons x xs) + (f x (foldr f b xs))])) + +(define (filter p xs) + (match xs + ['() '()] + [(cons x xs) + (if (p x) + (cons x (filter p xs)) + (filter p xs))])) + +(define map + (case-lambda + [(f xs) (map1 f xs)] + [(f . xss) (mapn f xss)])) + +(define (mapn f xss) + (if (empty? (car xss)) + '() + (cons (apply f (map1 (lambda (x) (car x)) xss)) + (mapn f (map1 (lambda (x) (cdr x)) xss))))) + +(define (map1 f xs) + (match xs + ['() '()] + [(cons x xs) + (cons (f x) (map1 f xs))])) + + +(define (append . xss) + (match xss + ['() '()] + [(cons x '()) x] + [(cons '() xss) + (apply append xss)] + [(cons (cons x xs) xss) + (cons x + (apply append xs xss))])) + +(define (append* xs xss) ; only binary case + (apply append xs xss)) + +(define (memq v lst) + (member v lst eq?)) + +(define append-map + (case-lambda + [(f xs) (append-map1 f xs)] + [(f . xss) (append-mapn f xss)])) + +(define (append-map1 f xs) + (match xs + ['() '()] + [(cons x xs) + (append (f x) (append-map1 f xs))])) + +(define (append-mapn f xss) + (if (empty? (car xss)) + '() + (append (apply f (map1 (lambda (x) (car x)) xss)) + (append-mapn f (map1 (lambda (x) (cdr x)) xss))))) + +(define (vector->list v) + (vector->list/a v (vector-length v) '())) + +(define (vector->list/a v i a) + (if (zero? i) + a + (vector->list/a v + (sub1 i) + (cons (vector-ref v (sub1 i)) a)))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define gensym-counter (box 0)) + +(define gensym + (case-lambda + [() (gensym "g")] + [(s) + (let ((i (unbox gensym-counter))) + (begin (set-box! gensym-counter (add1 i)) + (string->uninterned-symbol + (string-append (if (string? s) + s + (symbol->string s)) + (number->string i)))))])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Integer -> String +;; Only works for integers + +(define number->string + (case-lambda + [(n) (number->string n 10)] + [(n radix) + (if (< n 0) + (string-append "-" (nat->string (- n) "" radix)) + (nat->string n "" radix))])) + +(define (nat->string n m radix) + (if (< n radix) + (string-append (digit->string n radix) m) + (nat->string (quotient n radix) + (string-append (digit->string (remainder n radix) radix) m) + radix))) + +(define (digit->string n radix) + (if (= radix 16) + (hex-digit->string n) + (make-string 1 (integer->char (+ (char->integer #\0) n))))) + +(define (hex-digit->string n) + (match n + [10 "a"] + [11 "b"] + [12 "c"] + [13 "d"] + [14 "e"] + [15 "f"] + [_ (digit->string n 10)])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (read-char) + (let ((b (read-byte))) + (if (eof-object? b) + b + (integer->char + (if (< b 128) + b + (if (>= b 240) + (+ (arithmetic-shift (bitwise-and b #b111) 18) + (arithmetic-shift (bitwise-and (read-byte) #b111111) 12) + (arithmetic-shift (bitwise-and (read-byte) #b111111) 6) + (bitwise-and (read-byte) #b111111)) + (if (>= b 224) + (+ (arithmetic-shift (bitwise-and b #b1111) 12) + (arithmetic-shift (bitwise-and (read-byte) #b111111) 6) + (bitwise-and (read-byte) #b111111)) + (if (>= b 192) + (+ (arithmetic-shift (bitwise-and b #b11111) 6) + (bitwise-and (read-byte) #b111111)) + (error "bad bytes"))))))))) + +(define (peek-char) + (let ((b (peek-byte))) + (if (eof-object? b) + b + (integer->char + (if (< b 128) + b + (if (>= b 240) + (+ (arithmetic-shift (bitwise-and b #b111) 18) + (arithmetic-shift (bitwise-and (peek-byte (%current-input-port) 1) #b111111) 12) + (arithmetic-shift (bitwise-and (peek-byte (%current-input-port) 2) #b111111) 6) + (bitwise-and (peek-byte (%current-input-port) 3) #b111111)) + (if (>= b 224) + (+ (arithmetic-shift (bitwise-and b #b1111) 12) + (arithmetic-shift (bitwise-and (peek-byte (%current-input-port) 1) #b111111) 6) + (bitwise-and (peek-byte (%current-input-port) 2) #b111111)) + (if (>= b 192) + (+ (arithmetic-shift (bitwise-and b #b11111) 6) + (bitwise-and (peek-byte (%current-input-port) 1) #b111111)) + (error "bad bytes"))))))))) + +(define (read-line) + (read-line/a '())) + +(define (read-line/a cs) + (let ((c (read-char))) + (if (or (eof-object? c) (eq? c #\newline)) + (list->string (reverse cs)) + (read-line/a (cons c cs))))) + +(define (char-alphabetic? x) (%char-alphabetic? x)) +(define (char-whitespace? x) (%char-whitespace? x)) + +(define (displayln s) + (if (string? s) + (begin (write-string s) + (write-char #\newline)) + (error "unimplemented displayln for non-strings"))) + +(define (write-string s) + (begin (map write-char (string->list s)) + (string-length s))) + +(define (exact->inexact x) + (error "exact->inexact not implemented")) + +(define (/ x y) + (error "/ not implemented")) + +(define (expt n m) + (error "expt not implemented")) + +(define (string->keyword s) + (error "string->keyword not implemented")) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; read.rkt + +;; -> Any +;; Read an s-expression from given port +(define (read) + (let ((r ())) + (if (err? r) + (error (err-msg r)) + r))) + +(struct err (msg)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Start + +(define () + (match (peek-char) + [(? eof-object?) (read-char)] + [(? char-whitespace?) (begin (read-char) ())] + [#\; (begin () ())] + [#\# (begin (read-char) + (match (peek-char) + [#\| + (begin (read-char) + (let ((r ())) + (if (err? r) r ())))] + [#\; (read-char) + (let ((r ())) + (if (err? r) r ()))] + [_ ()]))] + [_ ()])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Elem + +(define () + (match (read-char) + [(? eof-object?) (err "eof")] + [(? char-whitespace?) ()] + [#\| ()] + [#\" ( '())] + [#\# ()] + [(? open-paren? c) ( c)] + [#\; () ()] + [#\' ( (string->symbol "quote"))] + [#\` ( (string->symbol "quasiquote"))] + [#\, (match (peek-char) + [#\@ (read-char) ( (string->symbol "unquote-splicing"))] + [_ ( (string->symbol "unquote"))])] + [c ( c)])) + +(define ( q) + (let ((r ())) + (if (err? r) + r + (list q r)))) + +(define () + (match (peek-char) + [#\| (read-char) + (let ((r ())) + (if (err? r) r ()))] + [#\; (read-char) + (let ((r ())) + (if (err? r) r ()))] + [_ ()])) + +(define () + (match (read-char) + [(? eof-object?) (err "bad syntax `#`")] + [#\T (committed-delim '() #t)] + [#\F (committed-delim '() #f)] ; could also be #Fl + [#\t (if (delim?) #t (committed-delim '(#\r #\u #\e) #t))] + ;; could also be #fl + [#\f (if (delim?) #f (committed-delim '(#\a #\l #\s #\e) #f))] + [#\( ( #\()] + [#\[ ( #\[)] + [#\{ ( #\{)] + [#\s (unimplemented "structure")] + [#\\ ()] + [#\: ()] + [#\& (unimplemented "boxes")] ; FIXME + [#\' ( (string->symbol "syntax"))] + [#\! (unimplemented "shebang comment")] + [#\` ( (string->symbol "quasisyntax"))] + [#\, (match (peek-char) + [#\@ (read-char) ( (string->symbol "unsyntax-splicing"))] + [_ ( (string->symbol "unsyntax"))])] + [#\~ (unimplemented "compiled code")] + [#\i (unimplemented "inexact number")] + [#\I (unimplemented "inexact number")] + [#\e (unimplemented "exact number")] + [#\E (unimplemented "exact number")] + [#\b ( char-digit2? char-digit2s->number)] + [#\B ( char-digit2? char-digit2s->number)] + [#\o ( char-digit8? char-digit8s->number)] + [#\O ( char-digit8? char-digit8s->number)] + [#\d ( char-digit10? char-digit10s->number)] + [#\D ( char-digit10? char-digit10s->number)] + [#\x ( char-digit16? char-digit16s->number)] + [#\X ( char-digit16? char-digit16s->number)] + [#\< ()] + [#\r (unimplemented "regexp or reader")] + [#\p (unimplemented "pregexp")] + [#\c (unimplemented "case switch")] + [#\C (unimplemented "case switch")] + [#\h (unimplemented "hash")] + [(? char-digit10?) (unimplemented "vector or graph")] + [_ (err "bad syntax")])) + + +(define () + (unimplemented "here string")) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Numbers + +;; have seen '#b', '#o', etc. +;; simplified to just digits +(define ( char-digitn? char-digitsn->number) + (match (read-char) + [#\# + (match (read-char) + [#\e (+ char-digitn?)] + [#\i (unimplemented "inexact")] + [_ (err "error")])] + [#\+ (read-char) (+)] + [#\- (read-char) (- (+ char-digitn?))] + [(? char-digitn? c) (* (list c) char-digitn? char-digitsn->number)] + [_ (err "error")])) + +(define (+ char-digitn?) + (match (read-char) + [(? char-digitn? c) (* (list c))] + [_ (err "error")])) + +(define (* ds char-digitn? char-digitsn->number) + (if (delim?) + (char-digitsn->number ds) + (match (read-char) + [(? char-digitn? c) (* (cons c ds) char-digitn? char-digitsn->number)] + [_ (err "error")]))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Numbers or Symbols + +;; Numbers are simplified significantly: + +;; ::= ['+' | '-] +;; ::= '.' + +;; | * ['.' *] + +;; whenver something else is encounter, parse as a symbol + +(define ( c) + (match c + [#\+ (if (delim?) (string->symbol "+") ( #\+ '()))] + [#\- (if (delim?) (string->symbol "-") ( #\- '()))] + [#\. (if (delim?) (err ".") ( #f '() '()))] + [(? char-digit10?) ( #f (list c))] + [_ ( (list c))])) + +(define ( signed? whole) + (match (peek-char) + [(? eof-object?) (make-whole signed? whole)] + [(? char-delim?) (make-whole signed? whole)] + [#\. (read-char) ( signed? whole '())] + [(? char-digit10? d) + (read-char) + ( signed? (cons d whole))] + [_ ( (cons (read-char) + (append whole (if signed? (list signed?) '()))) + )])) + +(define ( signed? whole frac) + (match (peek-char) + [(? eof-object?) (make-frac signed? whole frac)] + [(? char-delim?) (make-frac signed? whole frac)] + [(? char-digit10?) ( signed? whole (cons (read-char) frac))] + [_ ( (cons (read-char) + (append frac + (list #\.) + whole + (if signed? (list signed?) '()))) + )])) + +(define (make-frac signed? whole frac) + (match (cons whole frac) + [(cons '() '()) (chars->symbol (list #\. signed?))] + [(cons _ _) + (exact->inexact + (match signed? + [#\- (- (frac->number whole frac))] + [_ (frac->number whole frac)]))])) + + +(define (frac->number whole frac) + (+ (char-digit10s->number whole) + (/ (char-digit10s->number frac) + (expt 10 (length frac))))) + +(define (make-whole signed? ds) + (match signed? + [#\- (- (char-digit10s->number ds))] + [_ (char-digit10s->number ds)])) + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Line comment + +(define () + (let ((c (read-char))) + (or (eof-object? c) + (and (memq c '(#\newline #\return #\u0085 #\u2028 #\u2029)) #t) + ()))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Block comment + +(define () + (match (read-char) + [(? eof-object?) (err (string-append "unbalanced |" "#"))] + [#\# (match (peek-char) + [#\| (let ((r ())) + (if (err? r) r ()))] + [_ ()])] + [#\| (match (peek-char) + [#\# (read-char) #t] + [_ ()])] + [_ ()])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Vectors + +(define ( paren) + (let ((r ( paren))) + (if (err? r) + r + (if (list? r) + (list->vector r) + (err "dotted list in vector"))))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Lists or pairs + +(define ( paren) + (match (peek-char) + [(? eof-object?) (err "missing! )")] + [(? char-whitespace?) (read-char) ( paren)] + [#\; () ( paren)] + [(? close-paren? c) + (read-char) + (if (opposite? paren c) '() (err "mismatched paren"))] + [#\# (read-char) + (match (peek-char) + [#\| (read-char) + (let ((r ())) + (if (err? r) r ( paren)))] + [#\; (read-char) + (let ((r ())) + (if (err? r) r ( paren)))] + [_ (let ((r ())) + (if (err? r) r ( paren (list r))))])] + [_ (let ((r ())) + (if (err? r) + r + ( paren (list r))))])) + +(define ( paren xs) + (match (peek-char) + [(? eof-object?) (err "missing!! )")] + [(? char-whitespace?) (read-char) ( paren xs)] + [#\; () ( paren xs)] + [(? close-paren? c) + (read-char) + (if (opposite? paren c) (reverse xs) (err "mismatched paren"))] + [#\# (read-char) + (match (peek-char) + [#\| (read-char) + (let ((r ())) + (if (err? r) r ( paren xs)))] + [#\; (read-char) + (let ((r ())) + (if (err? r) r ( paren xs)))] + [_ (let ((r ())) + (if (err? r) r ( paren (cons r xs))))])] + [#\. (read-char) + (if (delim?) + ( paren xs) + ( paren (cons ( #f '() '()) xs)))] + [_ (let ((r ())) + (if (err? r) + r + ( paren (cons r xs))))])) + +(define ( paren xs) + (let ((r ())) + (if (err? r) + r + ( paren (append* (reverse xs) (list r)))))) + +(define ( paren xs) + (match (read-char) + [(? char-whitespace?) ( paren xs)] + [#\; () ( paren xs)] + [#\# (match (peek-char) + [#\| (read-char) + (let ((r ())) + (if (err? r) r ( paren xs)))] + [#\; (read-char) + (let ((r ())) + (if (err? r) r ( paren xs)))] + [_ (err "unexpected")])] + [(? close-paren? c) + (if (opposite? paren c) xs (err "mismatched paren"))] + [_ (err "uneasdfasdxpected")])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Symbols and Keywords + +(define ( cs) + (let ((r ( cs))) + (if (err? r) + r + (chars->symbol r)))) + +(define () + (let ((r ( '()))) + (if (err? r) + r + (chars->keyword r)))) + +(define () + (let ((r ( '()))) + (if (err? r) + r + (chars->symbol r)))) + +;; Assume: what we've seen tells us this is a symbol, cs are the chars of the +;; symbol seen so far in reverse order +(define ( cs) + (if (delim?) + cs + (match (peek-char) + [#\\ (read-char) ( cs)] + [#\| (read-char) ( cs)] + [_ ( (cons (read-char) cs))]))) + +(define ( cs) + (match (read-char) + [(? eof-object?) (err "read: end-of-file following `\\` in symbol")] + [c ( (cons c cs))])) + +(define ( cs) + (match (read-char) + [(? eof-object?) (err "read: end-of-file following `|` in symbol")] + [#\| ( cs)] + [c ( (cons c cs))])) + +(define (chars->symbol cs) + (string->symbol (list->string (reverse cs)))) + +(define (chars->keyword cs) + (string->keyword (list->string (reverse cs)))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Characters + +;; Assume: have already read '#\' +(define () + (let ((c (read-char))) + (cond + [(eof-object? c) (err "error")] + [(eof-object? (peek-char)) c] + [(char-digit8? c) ( c)] + [(not-char-alphabetic? c) c] + [else + (match c + [#\b (-special-seq #\b #\a '(#\c #\k #\s #\p #\a #\c #\e) #\backspace)] + [#\l (-special-seq #\l #\i '(#\n #\e #\f #\e #\e #\d) #\linefeed)] + [#\p (-special-seq #\p #\a '(#\g #\e) #\page)] + [#\s (-special-seq #\s #\p '(#\a #\c #\e) #\space)] + [#\t (-special-seq #\t #\a '(#\b) #\tab)] + [#\v (-special-seq #\v #\t '(#\a #\b) #\vtab)] + [#\r (-special-seq-alt #\r + #\e '(#\t #\u #\r #\n) #\return + #\u '(#\b #\o #\u #\t) #\rubout)] + ;; Move this into -nu and rename to -n. + [#\n (let ((next (peek-char))) + (cond [(char=? next #\e) + (begin (read-char) + (committed '(#\w #\l #\i #\n #\e) #\newline))] + [(char=? next #\u) + (begin (read-char) (nu))] + [(eof-object? next) #\n] + [(not-char-alphabetic? next) #\n] + [else (err "error")]))] + + [#\u + (cond [(char-digit16? (peek-char)) + (+ (list (read-char)) 3)] + [(not-char-alphabetic? (peek-char)) + #\u] + [else (err "error")])] + [#\U + (cond [(char-digit16? (peek-char)) + (+ (list (read-char)) 7)] + [(not-char-alphabetic? (peek-char)) + #\U] + [else (err "error")])] + [_ + (if (and (char-alphabetic? c) + (not-char-alphabetic? (peek-char))) + c + (err "error"))])]))) + +;; Assume: seen '#\', c0, which may be the start of special sequence for char if c1 comes next +(define (-special-seq c0 c1 seq char) + (let ((next (peek-char))) + (cond [(char=? next c1) + (begin (read-char) + (committed seq char))] + [(eof-object? next) c0] + [(not-char-alphabetic? next) c0] + [else (err "error")]))) + +;; Assume: seen '#\', c0, which may be the start of special sequence; +;; for char1 if c1 comes next or for char2 if c2 comes next +(define (-special-seq-alt c0 c1 seq1 char1 c2 seq2 char2) + (let ((next (peek-char))) + (cond [(char=? next c1) + (begin (read-char) + (committed seq1 char1))] + [(char=? next c2) + (begin (read-char) + (committed seq2 char2))] + [(eof-object? next) c0] + [(not-char-alphabetic? next) c0] + [else (err "error")]))) + +;; committed to see #\nul or #\null, error otherwise +(define (nu) + (match (read-char) + [#\l (match (peek-char) + [(? not-char-alphabetic?) #\nul] + [#\l (read-char) + (match (peek-char) + [(? not-char-alphabetic?) #\nul] + [_ (err "error")])] + [_ (err "error")])] + [_ (err "error")])) + +(define (+ cs n) + (if (zero? n) + (char-digit16s->char cs) + (match (peek-char) + [(? eof-object?) (char-digit16s->char cs)] + [(? char-digit16?) (+ (cons (read-char) cs) (sub1 n))] + [_ (char-digit16s->char cs)]))) + +(define ( c) + (match (peek-char) + ;; this is the same behavior Racket has: it commits after two digits + ;; have to use peek-bytes to behave differently + [(? char-digit8?) ( c (read-char))] + [_ c])) + +(define ( c1 c2) + (match (read-char) + [(? eof-object?) (err "error")] + [(? char-digit8? c3) (octal-char c1 c2 c3)] + [_ (err "error")])) + +(define (committed chars c) + (match chars + ['() (if (not-char-alphabetic? (peek-char)) + c + (err "error"))] + [(cons c0 cs) + (let ((c1 (read-char))) + (if (and (char? c1) (char=? c1 c0)) + (committed cs c) + (err "error")))])) + +(define (char-digit16s->char ds) + (let ((x (char-digit16s->number ds))) + (if (or (<= 0 x 55295) + (<= 57344 x 1114111)) + (integer->char x) + (err "error")))) + +(define (char-digit2s->number ds) + (match ds + ['() 0] + [(cons d ds) + (+ (char-digit->number d) + (* 2 (char-digit2s->number ds)))])) + +(define (char-digit8s->number ds) + (match ds + ['() 0] + [(cons d ds) + (+ (char-digit->number d) + (* 8 (char-digit8s->number ds)))])) + +(define (char-digit10s->number ds) + (match ds + ['() 0] + [(cons d ds) + (+ (char-digit->number d) + (* 10 (char-digit10s->number ds)))])) + +(define (char-digit16s->number ds) + (match ds + ['() 0] + [(cons d ds) + (+ (char-digit16->number d) + (* 16 (char-digit16s->number ds)))])) + +(define (char-digit->number d) + (- (char->integer d) + (char->integer #\0))) + +(define (char-digit2? d) + (and (char? d) + (<= 48 (char->integer d) 49))) + +(define (char-digit8? d) + (and (char? d) + (<= 48 (char->integer d) 55))) + +(define (char-digit10? d) + (and (char? d) + (<= 48 (char->integer d) 57))) + +(define (char-digit16? d) + (and (char? d) + (let ((x (char->integer d))) + (or (<= 48 x 57) + (<= 65 x 70) + (<= 97 x 102))))) + +(define (char-digit8->number c) + (- (char->integer c) 48)) + +(define (char-digit16->number c) + (let ((x (char->integer c))) + (cond [(<= 48 x 57) (- x 48)] + [(<= 65 x 70) (- x 55)] + [(<= 97 x 102) (- x 87)] + [else (error "bad char-digit16")]))) + +(define (octal-char d1 d2 d3) + (let ((x (+ (* 64 (char-digit8->number d1)) + (* 8 (char-digit8->number d2)) + (char-digit8->number d3)))) + (if (<= 0 x 255) + (integer->char x) + (err "ERROR")))) + +(define (not-char-alphabetic? c) + (or (eof-object? c) + (not (char-alphabetic? c)))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Strings + +;; Assume: have already read '"' +(define ( cs) + (match (read-char) + [(? eof-object?) (err "error")] + [#\" (list->string (reverse cs))] + [#\\ ( cs)] + [c ( (cons c cs))])) + +(define ( cs) + (match (read-char) + [(? eof-object?) (err "error")] + [#\a ( (cons #\007 cs))] + [#\b ( (cons #\010 cs))] + [#\t ( (cons #\011 cs))] + [#\n ( (cons #\012 cs))] + [#\v ( (cons #\013 cs))] + [#\f ( (cons #\014 cs))] + [#\r ( (cons #\015 cs))] + [#\e ( (cons #\033 cs))] + [#\" ( (cons #\" cs))] + [#\' ( (cons #\' cs))] + [#\\ ( (cons #\\ cs))] + [#\x (* cs 2)] + [#\u (* cs 4)] ; FIXME: will need a different function to handle \u...\u... form + [#\U (* cs 8)] + [(? char-digit8? d) (+ cs (list d) 2)] + [#\newline ( cs)] + [_ (err "error")])) + +(define (+ cs ds n) + (if (zero? n) + ( (cons (char-digit8s->char ds) cs)) + (match (peek-char) + [(? eof-object?) (err "error")] + [(? char-digit8?) (+ cs (cons (read-char) ds) (sub1 n))] + [_ ( (cons (char-digit8s->char ds) cs))]))) + +(define (* cs n) + (match (peek-char) + [(? eof-object?) (err "error")] + [(? char-digit16?) (+ cs (list (read-char)) (sub1 n))] + [_ (err "error")])) + +(define (+ cs ds n) + (if (zero? n) + (return-+ cs ds) + (match (peek-char) + [(? eof-object?) (err "error")] + [(? char-digit16?) (+ cs (cons (read-char) ds) (sub1 n))] + [_ (return-+ cs ds)]))) + +(define (return-+ cs ds) + (let ((r (char-digit16s->char ds))) + (if (err? r) + r + ( (cons r cs))))) + +(define (char-digit8s->char ds) + (integer->char (char-digit8s->number ds))) + +(define (delim?) + (let ((c (peek-char))) + (or (eof-object? c) + (char-delim? c)))) + +(define (char-delim? x) + (or (char-whitespace? x) + (memq x '(#\( #\) #\[ #\] #\{ #\} #\" #\, #\' #\` #\;)))) + +(define (opposite? p1 p2) + (match p1 + [#\( (char=? p2 #\))] + [#\[ (char=? p2 #\])] + [#\{ (char=? p2 #\})])) + +(define (open-paren? c) + (memq c '(#\( #\[ #\{))) + +(define (close-paren? c) + (memq c '(#\) #\] #\}))) + +;; committed to seeing chars followed by a delimiter, producing x +(define (committed-delim chars x) + (match chars + ['() (if (delim?) x (err "unexpected sequence"))] + [(cons c0 cs) + (let ((c1 (read-char))) + (if (and (char? c1) (char=? c1 c0)) + (committed-delim cs x) + (err "unexpected sequence")))])) + +(define (unimplemented x) + (err (string-append "unimplemented: " x))) diff --git a/langs/outlaw/string.c b/langs/outlaw/string.c new file mode 100644 index 00000000..5fc9f90f --- /dev/null +++ b/langs/outlaw/string.c @@ -0,0 +1,20 @@ +#include "values.h" + +int string_append(const val_str_t* s1, const val_str_t* s2, val_str_t* dest) +{ + if (!s1 && !s2) { return 0; } + int i1 = (s1 ? s1->len : 0); + int i2 = (s2 ? s2->len : 0); + int len = i1+i2; + dest->len = len; + int i; + if (s1) { + for (i = 0; i < s1->len; i++) + dest->codepoints[i] = s1->codepoints[i]; + } + if (s2) { + for (i = 0; i < s2->len; i++) + dest->codepoints[i1 + i] = s2->codepoints[i]; + } + return 2+len+((len % 2) == 0 ? 0 : 1); +} diff --git a/langs/outlaw/symbol.c b/langs/outlaw/symbol.c new file mode 100644 index 00000000..bcff4f3f --- /dev/null +++ b/langs/outlaw/symbol.c @@ -0,0 +1,55 @@ +#include +#include +#include "values.h" + +int symb_cmp(const val_symb_t *, const val_symb_t *); + +// binary tree node +struct Node { + val_symb_t* elem; + struct Node* left; + struct Node* right; +}; + +static struct Node *symbol_tbl = NULL; + +val_symb_t *intern_symbol(val_symb_t* symb) +{ + struct Node **curr = &symbol_tbl; + + while (*curr) { + struct Node *t = *curr; + int r = symb_cmp(symb, t->elem); + if (r == 0) { + // found it, so return saved pointer + return t->elem; + } else if (r < 0) { + curr = &t->left; + } else { + curr = &t->right; + } + } + + // wasn't found, so insert it and return pointer + *curr = calloc(1, sizeof(struct Node)); + (*curr)->elem = symb; + return (*curr)->elem; +} + +int symb_cmp(const val_symb_t *s1, const val_symb_t *s2) +{ + if (s1 == s2) return 0; + + int64_t len1 = s1->len; + int64_t len2 = s2->len; + + int64_t len = len1 < len2 ? len1 : len2; + int i; + + for (i = 0; i < len; i++) { + if (s1->codepoints[i] != s2->codepoints[i]) + return s1->codepoints[i] - s2->codepoints[i]; + } + + return len1 - len2; +} diff --git a/langs/outlaw/test/build-runtime.rkt b/langs/outlaw/test/build-runtime.rkt new file mode 100644 index 00000000..4a1f1bf4 --- /dev/null +++ b/langs/outlaw/test/build-runtime.rkt @@ -0,0 +1,8 @@ +#lang racket +(require "../a86/interp.rkt") + +;; link with runtime for IO operations +(unless (file-exists? "../runtime.o") + (system "make -C .. runtime.o")) +(current-objs + (list (path->string (normalize-path "../runtime.o")))) diff --git a/langs/jig/test/compile.rkt b/langs/outlaw/test/compile.rkt similarity index 94% rename from langs/jig/test/compile.rkt rename to langs/outlaw/test/compile.rkt index 81defae6..0d8f86a0 100644 --- a/langs/jig/test/compile.rkt +++ b/langs/outlaw/test/compile.rkt @@ -3,7 +3,7 @@ "../parse.rkt" "../compile.rkt" "../unload-bits-asm.rkt" - a86/interp) + "../a86/interp.rkt") ;; link with runtime for IO operations (unless (file-exists? "../runtime.o") diff --git a/langs/outlaw/test/read.rkt b/langs/outlaw/test/read.rkt new file mode 100644 index 00000000..f08aa6df --- /dev/null +++ b/langs/outlaw/test/read.rkt @@ -0,0 +1,62 @@ +#lang racket +(require rackunit + (rename-in "../stdlib.rkt" [read read-stdin])) + +(define (p s) + (with-input-from-string s + (lambda () + (read-stdin)))) + +(check-equal? (p "") eof) +(check-equal? (p " ") eof) +(check-equal? (p ";123") eof) +(check-equal? (p "#;123 ") eof) +(check-equal? (p "#;123") eof) +(check-equal? (p "#|123|# ") eof) +(check-equal? (p "#;#|123|#1 ") eof) +(check-equal? (p "#;#;1 2") eof) +(check-equal? (p "123") 123) +(check-equal? (p "#t") #t) +(check-equal? (p "#f") #f) +(check-equal? (p "#T") #t) +(check-equal? (p "#F") #f) +(check-equal? (p "#b0") 0) +(check-equal? (p "#b1") 1) +(check-equal? (p "#b101") #b101) +(check-equal? (p "#B101") #b101) +(check-equal? (p "#o0") 0) +(check-equal? (p "#o1") 1) +(check-equal? (p "#o701") #o701) +(check-equal? (p "#O701") #o701) +(check-equal? (p "#d0") 0) +(check-equal? (p "#d1") 1) +(check-equal? (p "#d901") 901) +(check-equal? (p "#D901") 901) +(check-equal? (p "#x0") 0) +(check-equal? (p "#x1") 1) +(check-equal? (p "#xF01") #xF01) +(check-equal? (p "#XF01") #xF01) +(check-equal? (p ";123\n1") 1) +(check-equal? (p "()") '()) +(check-equal? (p "[]") '()) +(check-equal? (p "{}") '()) +(check-equal? (p "(#t)") '(#t)) +(check-equal? (p "[#t]") '(#t)) +(check-equal? (p "{#t}") '(#t)) +(check-equal? (p "((#t))") '((#t))) +(check-equal? (p "#\\u ") #\u) +(check-equal? (p "#\\p\n") #\p) +(check-equal? (p "(1 . 2)") '(1 . 2)) +#| +(check-pred err? (p (string-append "#" "|"))) +(check-pred err? (p "#;")) +(check-pred err? (p "(}")) +(check-pred err? (p "(]")) +(check-pred err? (p "[)")) +(check-pred err? (p "(x}")) +(check-pred err? (p "(x]")) +(check-pred err? (p "[x)")) +(check-pred err? (p "(x . y}")) +(check-pred err? (p "(x . y]")) +(check-pred err? (p "[x . y)"))) +|# diff --git a/langs/outlaw/test/self-host.rkt b/langs/outlaw/test/self-host.rkt new file mode 100644 index 00000000..1be03dd2 --- /dev/null +++ b/langs/outlaw/test/self-host.rkt @@ -0,0 +1,3 @@ +#lang racket +(require rackunit) +(check-true (system "make -C .. self-host-test")) \ No newline at end of file diff --git a/langs/outlaw/test/test-runner.rkt b/langs/outlaw/test/test-runner.rkt new file mode 100644 index 00000000..64f0d419 --- /dev/null +++ b/langs/outlaw/test/test-runner.rkt @@ -0,0 +1,781 @@ +#lang racket +(provide test-runner test-runner-io) +(require rackunit) + +(define (test-runner run) + ;; Abscond examples + (check-equal? (run 7) 7) + (check-equal? (run -8) -8) + + ;; Blackmail examples + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7) + + ;; Con examples + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7) + + ;; Dupe examples + (check-equal? (run #t) #t) + (check-equal? (run #f) #f) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) + (check-equal? (run '(if #t 3 4)) 3) + (check-equal? (run '(if #f 3 4)) 4) + (check-equal? (run '(if 0 3 4)) 3) + (check-equal? (run '(zero? 4)) #f) + (check-equal? (run '(zero? 0)) #t) + + ;; Dodger examples + (check-equal? (run #\a) #\a) + (check-equal? (run #\b) #\b) + (check-equal? (run '(char? #\a)) #t) + (check-equal? (run '(char? #t)) #f) + (check-equal? (run '(char? 8)) #f) + (check-equal? (run '(char->integer #\a)) (char->integer #\a)) + (check-equal? (run '(integer->char 955)) #\λ) + + ;; Extort examples + (check-equal? (run '(add1 #f)) 'err) + (check-equal? (run '(sub1 #f)) 'err) + (check-equal? (run '(zero? #f)) 'err) + (check-equal? (run '(char->integer #f)) 'err) + (check-equal? (run '(integer->char #f)) 'err) + (check-equal? (run '(integer->char -1)) 'err) + (check-equal? (run '(write-byte #f)) 'err) + (check-equal? (run '(write-byte -1)) 'err) + (check-equal? (run '(write-byte 256)) 'err) + + ;; Fraud examples + (check-equal? (run '(let ((x 7)) x)) 7) + (check-equal? (run '(let ((x 7)) 2)) 2) + (check-equal? (run '(let ((x 7)) (add1 x))) 8) + (check-equal? (run '(let ((x (add1 7))) x)) 8) + (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) + (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) + (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) + + (check-equal? (run '(let ((x 0)) + (if (zero? x) 7 8))) + 7) + (check-equal? (run '(let ((x 1)) + (add1 (if (zero? x) 7 8)))) + 9) + (check-equal? (run '(+ 3 4)) 7) + (check-equal? (run '(- 3 4)) -1) + (check-equal? (run '(+ (+ 2 1) 4)) 7) + (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) + (check-equal? (run '(let ((x (+ 1 2))) + (let ((z (- 4 x))) + (+ (+ x x) z)))) + 7) + (check-equal? (run '(= 5 5)) #t) + (check-equal? (run '(= 4 5)) #f) + (check-equal? (run '(= (add1 4) 5)) #t) + (check-equal? (run '(< 5 5)) #f) + (check-equal? (run '(< 4 5)) #t) + (check-equal? (run '(< (add1 4) 5)) #f) + + ;; Hustle examples + (check-equal? (run ''()) '()) + (check-equal? (run '(box 1)) (box 1)) + (check-equal? (run '(box -1)) (box -1)) + (check-equal? (run '(cons 1 2)) (cons 1 2)) + (check-equal? (run '(unbox (box 1))) 1) + (check-equal? (run '(car (cons 1 2))) 1) + (check-equal? (run '(cdr (cons 1 2))) 2) + (check-equal? (run '(cons 1 '())) (list 1)) + (check-equal? (run '(let ((x (cons 1 2))) + (begin (cdr x) + (car x)))) + 1) + (check-equal? (run '(let ((x (cons 1 2))) + (let ((y (box 3))) + (unbox y)))) + 3) + (check-equal? (run '(eq? 1 1)) #t) + (check-equal? (run '(eq? 1 2)) #f) + (check-equal? (run '(eq? (cons 1 2) (cons 1 2))) #f) + (check-equal? (run '(let ((x (cons 1 2))) (eq? x x))) #t) + + ;; Hoax examples + (check-equal? (run '(make-vector 0 0)) #()) + (check-equal? (run '(make-vector 1 0)) #(0)) + (check-equal? (run '(make-vector 3 0)) #(0 0 0)) + (check-equal? (run '(make-vector 3 5)) #(5 5 5)) + (check-equal? (run '(vector? (make-vector 0 0))) #t) + (check-equal? (run '(vector? (cons 0 0))) #f) + (check-equal? (run '(vector-ref (make-vector 0 #f) 0)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 0 4) + x))) + #(4 5 5)) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 1 4) + x))) + #(5 4 5)) + (check-equal? (run '(vector-length (make-vector 3 #f))) 3) + (check-equal? (run '(vector-length (make-vector 0 #f))) 0) + (check-equal? (run '"") "") + (check-equal? (run '"fred") "fred") + (check-equal? (run '"wilma") "wilma") + (check-equal? (run '(make-string 0 #\f)) "") + (check-equal? (run '(make-string 3 #\f)) "fff") + (check-equal? (run '(make-string 3 #\g)) "ggg") + (check-equal? (run '(string-length "")) 0) + (check-equal? (run '(string-length "fred")) 4) + (check-equal? (run '(string-ref "" 0)) 'err) + (check-equal? (run '(string-ref (make-string 0 #\a) 0)) 'err) + (check-equal? (run '(string-ref "fred" 0)) #\f) + (check-equal? (run '(string-ref "fred" 1)) #\r) + (check-equal? (run '(string-ref "fred" 2)) #\e) + (check-equal? (run '(string-ref "fred" 4)) 'err) + (check-equal? (run '(string? "fred")) #t) + (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") + + ;; Iniquity tests + (check-equal? (run + '(define (f x) x) + '(f 5)) + 5) + (check-equal? (run + '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) + '(tri 9)) + 45) + + (check-equal? (run + '(define (f x) x) + '(define (g x) (f x)) + '(g 5)) + 5) + (check-equal? (run + '(define (my-even? x) + (if (zero? x) + #t + (my-odd? (sub1 x)))) + '(define (my-odd? x) + (if (zero? x) + #f + (my-even? (sub1 x)))) + '(my-even? 101)) + #f) + (check-equal? (run + '(define (map-add1 xs) + (if (empty? xs) + '() + (cons (add1 (car xs)) + (map-add1 (cdr xs))))) + '(map-add1 (cons 1 (cons 2 (cons 3 '()))))) + '(2 3 4)) + (check-equal? (run + '(define (f x) + 10) + '(f 1)) + 10) + (check-equal? (run + '(define (f x) + 10) + '(let ((x 2)) (f 1))) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(f 1 2)) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(let ((z 2)) (f 1 2))) + 10) + + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) + + ;; Loot examples + (check-true (procedure? (run '(λ (x) x)))) + (check-equal? (run '((λ (x) x) 5)) + 5) + + (check-equal? (run '(let ((f (λ (x) x))) (f 5))) + 5) + (check-equal? (run '(let ((f (λ (x y) x))) (f 5 7))) + 5) + (check-equal? (run '(let ((f (λ (x y) y))) (f 5 7))) + 7) + (check-equal? (run '((let ((x 1)) + (let ((y 2)) + (lambda (z) (cons x (cons y (cons z '())))))) + 3)) + '(1 2 3)) + (check-equal? (run '(define (adder n) + (λ (x) (+ x n))) + '((adder 5) 10)) + 15) + (check-equal? (run '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36)) + 666) + (check-equal? (run '(define (tri n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))) + '(tri 36)) + 666) + (check-equal? (run '(define (tri n) + (match n + [0 0] + [m (+ m (tri (sub1 m)))])) + '(tri 36)) + 666) + (check-equal? (run '((match 8 [8 (lambda (x) x)]) 12)) + 12) + + ;; Mug examples + (check-equal? (run '(symbol? 'foo)) #t) + (check-equal? (run '(symbol? (string->symbol "foo"))) #t) + (check-equal? (run '(eq? 'foo 'foo)) #t) + (check-equal? (run '(eq? (string->symbol "foo") + (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'foo (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) + #t) + (check-equal? (run '(symbol? 'g0)) #t) + (check-equal? (run '(symbol? "g0")) #f) + (check-equal? (run '(symbol? (string->symbol "g0"))) #t) + (check-equal? (run '(symbol? (string->uninterned-symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->uninterned-symbol "g0"))) #f) + (check-equal? (run '(eq? (string->uninterned-symbol "g0") (string->uninterned-symbol "g0"))) + #f) + (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f) + (check-equal? (run '(string? (symbol->string 'foo))) #t) + (check-equal? (run '(eq? (symbol->string 'foo) "foo")) #f) + (check-equal? (run ''foo) 'foo) + (check-equal? (run '(eq? (match #t [_ "foo"]) "bar")) #f) + (check-equal? (run '(eq? (match #t [_ 'foo]) 'bar)) #f) + (check-equal? (run '(match 'foo ['bar #t] [_ #f])) #f) + (check-equal? (run '(match 'foo ['foo #t] [_ #f])) #t) + (check-equal? (run '(match "foo" ["foo" #t] [_ #f])) #t) + (check-equal? (run '(match "foo" ["bar" #t] [_ #f])) #f) + (check-equal? (run '(match (cons '+ (cons 1 (cons 2 '()))) + [(cons '+ (cons x (cons y '()))) + (+ x y)])) + 3) + + ;; Mountebank examples + (check-equal? (run '#()) + #()) + (check-equal? (run ''#()) + #()) + (check-equal? (run ''#t) + #t) + (check-equal? (run ''7) + 7) + (check-equal? (run ''(1 2 3)) + '(1 2 3)) + (check-equal? (run ''(1 . 2)) + '(1 . 2)) + (check-equal? (run ''(("1") (#() #(1 #(2))) (#&(1)) (#f) (4) (5))) + '(("1") (#() #(1 #(2))) (#&(1)) (#f) (4) (5))) + (check-equal? (run '(define (f) (cons 1 2)) + '(eq? (f) (f))) + #f) + (check-equal? (run '(define (f) '(1 . 2)) + '(eq? (f) (f))) + #t) + (check-equal? (run '(let ((x '(foo . foo))) + (eq? (car x) (cdr x)))) + #t) + (check-equal? + (run '(define (eval e r) + (match e + [(list 'zero? e) + (zero? (eval e r))] + [(list 'sub1 e) + (sub1 (eval e r))] + [(list '+ e1 e2) + (+ (eval e1 r) (eval e2 r))] + [(list 'if e1 e2 e3) + (if (eval e1 r) + (eval e2 r) + (eval e3 r))] + [(list 'λ (list x) e) + (lambda (v) (eval e (cons (cons x v) r)))] + [(list e1 e2) + ((eval e1 r) (eval e2 r))] + [_ + (if (symbol? e) + (lookup r e) + e)])) + '(define (lookup r x) + (match r + [(cons (cons y v) r) + (if (eq? x y) + v + (lookup r x))])) + '(eval '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36) + '())) + 666) + + ;; Neerdowell examples + (check-equal? (run '(struct foo ()) + '(foo? (foo))) + #t) + (check-equal? (run '(struct foo (x)) + '(foo? (foo 1))) + #t) + (check-equal? (run '(struct foo ()) + '(struct bar ()) + '(foo? (bar))) + #f) + (check-equal? (run '(struct foo ()) + '(struct bar ()) + '(bar? (bar))) + #t) + (check-equal? (run '(struct foo ()) + '(struct bar ()) + '(bar? #())) + #f) + (check-equal? (run '(struct foo (x)) + '(foo-x (foo 3))) + 3) + (check-equal? (run '(struct foo (x)) + '(let ((x (foo 3))) + (foo-x x))) + 3) + (check-equal? (run '(struct foo (x)) + '(let ((x (foo 3))) + (foo-x x))) + 3) + (check-equal? (run '(struct foo (x)) + '(let ((x (foo (foo 3)))) + (foo? (foo-x x)))) + #t) + (check-equal? (run '(struct foo (x y z)) + '(let ((x (foo 1 2 3))) + (cons (foo-x x) + (cons (foo-y x) + (cons (foo-z x) + '()))))) + '(1 2 3)) + (check-equal? (run '(struct foo ()) + '(eq? (foo) (foo))) + #f) + (check-equal? (run '(struct foo (x)) + '(foo-x #t)) + 'err) + (check-equal? (run '(struct foo (x)) + '(struct bar (y)) + '(match (bar 5) + [(foo x) #f] + [(bar x) x])) + 5) + (check-equal? (run '(struct nil ()) + '(struct pair (x y)) + '(define (len x) + (match x + [(nil) 0] + [(pair _ x) (add1 (len x))])) + '(len (pair 1 (pair 2 (pair 3 (nil)))))) + 3) + (check-equal? (run '(match (cons (cons 1 2) '()) + [(cons (cons x y) '()) y])) + 2) + (check-equal? (run '(struct foo (p q)) + '(match (cons (foo 1 2) '()) + [(cons (foo x y) _) y])) + 2) + (check-equal? (run '(struct foo (p q)) + '(match (cons (foo 1 2) '()) + [(cons (foo x 3) _) x] + [_ 9])) + 9) + (check-equal? (run '(struct foo (x q)) + '(define (get z) + (match z + ['() #f] + [(cons (foo x q) y) x])) + '(get (cons (foo 7 2) '()))) + 7) + (check-equal? (run '(struct posn (x y)) + '(define (posn-xs ps) + (match ps + ['() '()] + [(cons (posn x y) ps) + (cons x (posn-xs ps))])) + '(posn-xs (cons (posn 3 4) (cons (posn 5 6) (cons (posn 7 8) '()))))) + '(3 5 7)) + (check-equal? (run '(struct Foo (x y z)) + '(match (Foo 1 2 3) + [(Foo x y z) z])) + 3) + (check-equal? (run '(struct Boo (x)) + '(match 8 + [(Boo 'y) 0] + [_ 1])) + 1) + + ;; Outlaw examples + (check-equal? (run '(+)) 0) + (check-equal? (run '(+ 1 2 3)) 6) + (check-equal? (run '(< 1)) #t) + (check-equal? (run '(< 1 2 3)) #t) + (check-equal? (run '(< 1 3 3)) #f) + (check-equal? (run '(> 1)) #t) + (check-equal? (run '(> 3 2 1)) #t) + (check-equal? (run '(> 3 3 1)) #f) + (check-equal? (run '(<= 1)) #t) + (check-equal? (run '(<= 1 2 3)) #t) + (check-equal? (run '(<= 1 3 3)) #t) + (check-equal? (run '(<= 1 4 3)) #f) + (check-equal? (run '(>= 1)) #t) + (check-equal? (run '(>= 3 2 1)) #t) + (check-equal? (run '(>= 3 3 1)) #t) + (check-equal? (run '(>= 3 4 1)) #f) + (check-equal? (run '(list)) '()) + (check-equal? (run '(list 1 2 3)) '(1 2 3)) + (check-equal? (run '(map add1 (list 1 2 3))) '(2 3 4)) + (check-equal? (run '(map + (list 1 2 3) (list 4 5 6))) '(5 7 9)) + (check-equal? (run '(append)) '()) + (check-equal? (run '(append '(1 2 3))) '(1 2 3)) + (check-equal? (run '(append '(1 2 3) '())) '(1 2 3)) + (check-equal? (run '(append '() '(1 2 3))) '(1 2 3)) + (check-equal? (run '(append '(1 2 3) '(4 5 6))) '(1 2 3 4 5 6)) + (check-equal? (run '(memq 'x '())) #f) + (check-equal? (run '(memq 'x '(p x y))) '(x y)) + (check-equal? (run '(member 'x '() eq?)) #f) + (check-equal? (run '(member 'x '(p x y) eq?)) '(x y)) + (check-equal? (run '(append-map list '(1 2 3))) '(1 2 3)) + (check-equal? (run '(vector->list #())) '()) + (check-equal? (run '(vector->list #(1 2 3))) '(1 2 3)) + (check-equal? (run '(number->string 0)) "0") + (check-equal? (run '(number->string 10)) "10") + (check-equal? (run '(number->string 123)) "123") + (check-equal? (run '(number->string 0 10)) "0") + (check-equal? (run '(number->string 10 10)) "10") + (check-equal? (run '(number->string 123 10)) "123") + (check-equal? (run '(number->string 0 2)) "0") + (check-equal? (run '(number->string 1 2)) "1") + (check-equal? (run '(number->string 3 2)) "11") + (check-equal? (run '(number->string 8 2)) "1000") + (check-equal? (run '(number->string 0 8)) "0") + (check-equal? (run '(number->string 1 8)) "1") + (check-equal? (run '(number->string 3 8)) "3") + (check-equal? (run '(number->string 8 8)) "10") + (check-equal? (run '(number->string 0 16)) "0") + (check-equal? (run '(number->string 1 16)) "1") + (check-equal? (run '(number->string 3 16)) "3") + (check-equal? (run '(number->string 8 16)) "8") + (check-equal? (run '(number->string 10 16)) "a") + (check-equal? (run '(number->string 15 16)) "f") + (check-equal? (run '(number->string 16 16)) "10") + (check-pred symbol? (run '(gensym))) + (check-equal? (run '(eq? (gensym) (gensym))) #f) + (check-equal? (run '(let ((x (gensym))) (eq? x x))) #t) + (check-pred symbol? (run '(gensym 'fred))) + (check-equal? (run '(eq? (gensym 'fred) (gensym 'fred))) #f) + (check-equal? (run '(let ((x (gensym 'fred))) (eq? x x))) #t) + (check-pred symbol? (run '(gensym "fred"))) + (check-equal? (run '(eq? (gensym "fred") (gensym "fred"))) #f) + (check-equal? (run '(let ((x (gensym "fred"))) (eq? x x))) #t) + (check-equal? (run '(void? (void))) #t) + (check-equal? (run '(void? void)) #f) + (check-equal? (run '(eq? (void) (void))) #t) + (check-equal? (run '(bitwise-and #b111 #b000)) #b000) + (check-equal? (run '(bitwise-and #b111 #b111)) #b111) + (check-equal? (run '(bitwise-and #b101 #b100)) #b100) + (check-equal? (run '(bitwise-and #b001 #b100)) #b000) + (check-equal? (run '(bitwise-ior #b111 #b000)) #b111) + (check-equal? (run '(bitwise-ior #b111 #b111)) #b111) + (check-equal? (run '(bitwise-ior #b101 #b100)) #b101) + (check-equal? (run '(bitwise-ior #b001 #b100)) #b101) + (check-equal? (run '(bitwise-xor #b111 #b000)) #b111) + (check-equal? (run '(bitwise-xor #b111 #b111)) #b000) + (check-equal? (run '(bitwise-xor #b101 #b100)) #b001) + (check-equal? (run '(bitwise-xor #b001 #b100)) #b101) + (check-equal? (run '(arithmetic-shift 1 0)) 1) + (check-equal? (run '(arithmetic-shift 1 1)) 2) + (check-equal? (run '(arithmetic-shift 1 2)) 4) + (check-equal? (run '(arithmetic-shift 1 3)) 8) + (check-equal? (run '(arithmetic-shift 3 2)) 12) + (check-equal? (run '(or)) #f) + (check-equal? (run '(or #t)) #t) + (check-equal? (run '(or 7)) 7) + (check-equal? (run '(or 7 #t)) 7) + (check-equal? (run '(or #f #f #f)) #f) + (check-equal? (run '(or #f 7 9)) 7) + (check-equal? (run '(list->string '())) "") + (check-equal? (run '(list->string '(#\a #\b #\c))) "abc") + (check-equal? (run '(char<=? #\a)) #t) + (check-equal? (run '(char<=? #\a #\b)) #t) + (check-equal? (run '(char<=? #\a #\b #\c)) #t) + (check-equal? (run '(char<=? #\a #\b #\b)) #t) + (check-equal? (run '(char<=? #\a #\b #\a)) #f) + (check-equal? (run '(= (eq-hash-code 'x) (eq-hash-code 'x))) #t) + (check-equal? (run '(= (eq-hash-code 'x) (eq-hash-code 'y))) #f) + (check-equal? (run '(foldr + #f '())) #f) + (check-equal? (run '(foldr + 0 '(1 2 3))) 6) + (check-equal? (run '(list? '())) #t) + (check-equal? (run '(list? '(1 2 3))) #t) + (check-equal? (run '(list? (cons 1 2))) #f) + (check-equal? (run '(list? #t)) #f) + (check-equal? (run '(reverse '())) '()) + (check-equal? (run '(reverse '(1 2 3))) '(3 2 1)) + (check-equal? (run '(remove-duplicates '() eq?)) '()) + (check-equal? (run '(remove-duplicates '(1 2 3) eq?)) '(1 2 3)) + (check-equal? (run '(remove-duplicates '(1 2 3 2 1 3) eq?)) '(1 2 3)) + (check-equal? (run '(remove 'x '() eq?)) '()) + (check-equal? (run '(remove 'x '(x y z) eq?)) '(y z)) + (check-equal? (run '(remove 'x '(p q x r) eq?)) '(p q r)) + (check-equal? (run '(remove 'x '(p q x r x) eq?)) '(p q r x)) + (check-equal? (run '(remove* 'x '() eq?)) '()) + (check-equal? (run '(remove* 'x '(x y z) eq?)) '(y z)) + (check-equal? (run '(remove* 'x '(p q x r) eq?)) '(p q r)) + (check-equal? (run '(remove* 'x '(p q x r x) eq?)) '(p q r)) + (check-equal? (run '(remq* '(x y) '())) '()) + (check-equal? (run '(remq* '(x y) '(x y z))) '(z)) + (check-equal? (run '(remq* '(x y) '(p q x r x))) '(p q r)) + (check-equal? (run '(make-list 0 #\a)) '()) + (check-equal? (run '(make-list 3 #\a)) '(#\a #\a #\a)) + (check-equal? (run '(match 8 + [(? integer?) 1] + [_ 2])) + 1) + (check-equal? (run '(match 8 + [(? string?) 1] + [_ 2])) + 2) + (check-equal? (run '(match (cons 8 "8") + [(cons (? integer?) (? string?)) 1] + [_ 2])) + 1) + (check-equal? (run '(match 8 + [(? (lambda (x) (eq? x 8))) 1] + [_ 2])) + 1) + (check-equal? (run '(match 8 + [(? integer? x) x] + [_ 2])) + 8) + (check-equal? (run '(match (box #\a) + [(box (and x (? integer?))) 1] + [(box (and x (? char?))) x])) + #\a) + + (check-equal? (run '(vector)) #()) + (check-equal? (run '(vector 1 2 3)) #(1 2 3)) + (check-equal? (run '(list->vector '())) #()) + (check-equal? (run '(list->vector '(1 2 3))) #(1 2 3)) + (check-equal? (run '(boolean? #t)) #t) + (check-equal? (run '(boolean? #f)) #t) + (check-equal? (run '(boolean? 8)) #f) + (check-equal? (run '(substring "hello" 0)) "hello") + (check-equal? (run '(substring "hello" 1)) "ello") + (check-equal? (run '(substring "hello" 1 4)) "ell") + (check-equal? (run '(odd? 7)) #t) + (check-equal? (run '(odd? 8)) #f) + (check-equal? (run '(filter odd? '())) '()) + (check-equal? (run '(filter odd? '(1 2 3 4))) '(1 3)) + (check-equal? (run '(findf odd? '())) #f) + (check-equal? (run '(findf odd? '(2 4 3 7))) 3) + (check-equal? (run '(char-alphabetic? #\a)) #t) + (check-equal? (run '(char-alphabetic? #\space)) #f) + (check-equal? (run '(char-whitespace? #\a)) #f) + (check-equal? (run '(char-whitespace? #\space)) #t) + (check-equal? (run '(begin 1)) 1) + (check-equal? (run '(begin 1 2)) 2) + (check-equal? (run '(begin 1 2 3)) 3) + (check-equal? (run '(let () 1 2)) 2) + (check-equal? (run '(let ((x 1)) x x)) 1) + (check-equal? (run '(let ((x 1)) x x x)) 1) + (check-equal? (run '(match 1 [1 2 3])) 3) + (check-equal? (run '(system-type)) (system-type)) + (check-equal? (run '(struct Foo (x)) + '(struct Bar (y)) + '(match (Bar 1) + [(Foo x) #f] + [(Bar x) x])) + 1) + (check-equal? (run '(procedure? add1)) #t) + (check-equal? (run '(procedure? (lambda (x) x))) #t) + (check-equal? (run '(procedure? 8)) #f) + (check-equal? (run '(struct posn (x y)) + '(procedure? (posn 3 4))) + #f) + (check-equal? (run '(apply string-append (list "x"))) + "x") + + (check-equal? (run '(* 0 8)) 0) + (check-equal? (run '(* 1 8)) 8) + (check-equal? (run '(* 2 9)) 18) + (check-equal? (run '(* 2 -3)) -6) + (check-equal? (run '(* 4 3)) 12) + (check-equal? (run '(* 8 3)) 24) + (check-equal? (run '(* 16 2)) 32) + (check-equal? (run '(* 10 5)) 50) + (check-equal? (run '(* 64 2)) 128) + (check-equal? (run '(let ((pred (lambda (x) #t))) + (match 0 + [(and (? pred) _) #t] + [_ #f]))) + #t)) + + +(define (test-runner-io run) + ;; Evildoer examples + (check-equal? (run "" 7) (cons 7 "")) + (check-equal? (run "" '(write-byte 97)) (cons (void) "a")) + (check-equal? (run "a" '(read-byte)) (cons 97 "")) + (check-equal? (run "b" '(begin (write-byte 97) (read-byte))) + (cons 98 "a")) + (check-equal? (run "" '(read-byte)) (cons eof "")) + (check-equal? (run "" '(eof-object? (read-byte))) (cons #t "")) + (check-equal? (run "a" '(eof-object? (read-byte))) (cons #f "")) + (check-equal? (run "" '(begin (write-byte 97) (write-byte 98))) + (cons (void) "ab")) + (check-equal? (run "ab" '(peek-byte)) (cons 97 "")) + (check-equal? (run "ab" '(begin (peek-byte) (read-byte))) (cons 97 "")) + ;; Extort examples + (check-equal? (run "" '(write-byte #t)) (cons 'err "")) + + ;; Fraud examples + (check-equal? (run "" '(let ((x 97)) (write-byte x))) (cons (void) "a")) + (check-equal? (run "" + '(let ((x 97)) + (begin (write-byte x) + x))) + (cons 97 "a")) + (check-equal? (run "b" '(let ((x 97)) (begin (read-byte) x))) + (cons 97 "")) + (check-equal? (run "b" '(let ((x 97)) (begin (peek-byte) x))) + (cons 97 "")) + + ;; Hustle examples + (check-equal? (run "" + '(let ((x 1)) + (begin (write-byte 97) + 1))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x 1)) + (let ((y 2)) + (begin (write-byte 97) + 1)))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x (cons 1 2))) + (begin (write-byte 97) + (car x)))) + (cons 1 "a")) + ;; Iniquity examples + (check-equal? (run "" + '(define (print-alphabet i) + (if (zero? i) + (void) + (begin (write-byte (- 123 i)) + (print-alphabet (sub1 i))))) + '(print-alphabet 26)) + (cons (void) "abcdefghijklmnopqrstuvwxyz")) + + ;; Outlaw examples + (check-equal? (run "" '(read-char)) + (cons eof "")) + (check-equal? (run "a" '(read-char)) + (cons #\a "")) + (check-equal? (run "ab" '(read-char)) + (cons #\a "")) + (check-equal? (run "ab" '(cons (read-char) (read-char))) + (cons '(#\a . #\b) "")) + (check-equal? (run "a" '(peek-byte (%current-input-port) 0)) + (cons 97 "")) + (check-equal? (run "ab" '(cons (peek-byte (%current-input-port) 1) (read-byte))) + (cons (cons 98 97) "")) + (check-equal? (run "abc" '(cons (peek-byte (%current-input-port) 2) + (cons (read-byte) (read-byte)))) + (cons (cons 99 (cons 97 98)) "")) + (check-equal? (run "a" '(peek-char)) + (cons #\a "")) + (check-equal? (run "ab" '(cons (peek-char) (peek-char))) + (cons '(#\a . #\a) "")) + (check-equal? (run "λ" '(peek-char)) + (cons #\λ "")) + (check-equal? (run "" '(write-char #\a)) + (cons (void) "a")) + (check-equal? (run "" '(write-char #\newline)) + (cons (void) "\n")) + (check-equal? (run "" '(write-string "hello world")) + (cons 11 "hello world")) + (check-equal? (run "" '(displayln "hello world")) + (cons (void) "hello world\n")) + ) diff --git a/langs/outlaw/types.h b/langs/outlaw/types.h new file mode 100644 index 00000000..ec7db8b2 --- /dev/null +++ b/langs/outlaw/types.h @@ -0,0 +1,43 @@ +#ifndef TYPES_H +#define TYPES_H + +/* + Bit layout of values + + Values are either: + - Immediates: end in #b000 + - Pointers + + Immediates are either + - Integers: end in #b0 000 + - Characters: end in #b01 000 + - True: #b11 000 + - False: #b1 11 000 + - Eof: #b10 11 000 + - Void: #b11 11 000 + - Empty: #b100 11 000 +*/ +#define imm_shift 3 +#define ptr_type_mask ((1 << imm_shift) - 1) +#define box_type_tag 1 +#define cons_type_tag 2 +#define vect_type_tag 3 +#define str_type_tag 4 +#define proc_type_tag 5 +#define symb_type_tag 6 +#define struct_type_tag 7 +#define int_shift (1 + imm_shift) +#define int_type_mask ((1 << int_shift) - 1) +#define int_type_tag (0 << (int_shift - 1)) +#define nonint_type_tag (1 << (int_shift - 1)) +#define char_shift (int_shift + 1) +#define char_type_mask ((1 << char_shift) - 1) +#define char_type_tag ((0 << (char_shift - 1)) | nonint_type_tag) +#define nonchar_type_tag ((1 << (char_shift - 1)) | nonint_type_tag) +#define val_true ((0 << char_shift) | nonchar_type_tag) +#define val_false ((1 << char_shift) | nonchar_type_tag) +#define val_eof ((2 << char_shift) | nonchar_type_tag) +#define val_void ((3 << char_shift) | nonchar_type_tag) +#define val_empty ((4 << char_shift) | nonchar_type_tag) + +#endif diff --git a/langs/jig-playground/types.rkt b/langs/outlaw/types.rkt similarity index 81% rename from langs/jig-playground/types.rkt rename to langs/outlaw/types.rkt index 806fd02e..c1c76eb5 100644 --- a/langs/jig-playground/types.rkt +++ b/langs/outlaw/types.rkt @@ -8,6 +8,9 @@ (define type-cons #b010) (define type-vect #b011) (define type-str #b100) +(define type-proc #b101) +(define type-symb #b110) +(define type-struct #b111) (define int-shift (+ 1 imm-shift)) (define char-shift (+ 2 imm-shift)) (define type-int #b0000) @@ -41,7 +44,8 @@ [(eq? v #t) val-true] [(eq? v #f) val-false] [(void? v) val-void] - [(empty? v) val-empty])) + [(empty? v) val-empty] + [else (error "not an immediate")])) (define (imm-bits? v) @@ -64,3 +68,12 @@ (define (str-bits? v) (zero? (bitwise-xor (bitwise-and v imm-mask) type-str))) + +(define (proc-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-proc))) + +(define (symb-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-symb))) + +(define (struct-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-struct))) diff --git a/langs/hoodwink/unload-bits-asm.rkt b/langs/outlaw/unload-bits-asm.rkt similarity index 69% rename from langs/hoodwink/unload-bits-asm.rkt rename to langs/outlaw/unload-bits-asm.rkt index be9b50c8..3274b657 100644 --- a/langs/hoodwink/unload-bits-asm.rkt +++ b/langs/outlaw/unload-bits-asm.rkt @@ -3,6 +3,8 @@ (require "types.rkt" ffi/unsafe) +(struct struct-val () #:transparent) + ;; Answer* -> Answer (define (unload/free a) (match a @@ -30,14 +32,26 @@ (string) (build-string (heap-ref i) (lambda (j) - (char-ref (+ i 8) j))))])) + (char-ref (+ i 8) j))))] + [(? symb-bits? i) + (string->symbol + (if (zero? (untag i)) + (string) + (build-string (heap-ref i) + (lambda (j) + (char-ref (+ i 8) j)))))] + [(? proc-bits? i) + (lambda _ + (error "This function is not callable."))] + [(? struct-bits? i) + (struct-val)])) (define (untag i) (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) (integer-length ptr-mask))) (define (heap-ref i) - (ptr-ref (cast (untag i) _int64 _pointer) _uint64)) + (ptr-ref (cast (untag i) _int64 _pointer) _int64)) (define (char-ref i j) (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j))) diff --git a/langs/outlaw/utils.rkt b/langs/outlaw/utils.rkt new file mode 100644 index 00000000..3be134e2 --- /dev/null +++ b/langs/outlaw/utils.rkt @@ -0,0 +1,50 @@ +#lang racket +(provide symbol->label symbol->data-label lookup pad-stack unpad-stack) +(require "a86/ast.rkt" "registers.rkt") + +;; Symbol -> Label +;; Produce a symbol that is a valid Nasm label +(define (symbol->label s) + (to-label "label_" s)) + +(define (symbol->data-label s) + (to-label "data_" s)) + +;; Char -> String +(define (char-encode c) + (if (or (char<=? #\a c #\z) + (char<=? #\A c #\Z) + (char<=? #\0 c #\9) + (memq c '(#\_ #;#\$ #\# #\@ #\~ #\. #\?))) + (make-string 1 c) + (string-append "$" (number->string (char->integer c) 16)))) + +(define (to-label prefix s) + (string->symbol + (string-append prefix + (apply string-append + (map char-encode + (string->list (symbol->string s))))))) + +;; Id CEnv -> [Maybe Integer] +(define (lookup x cenv) + (match cenv + ['() #f] + [(cons y rest) + (match (eq? x y) + [#t 0] + [#f (match (lookup x rest) + [#f #f] + [i (+ 8 i)])])])) + +;; -> Asm +;; Dynamically pad the stack to be aligned for a call +(define (pad-stack) + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; -> Asm +;; Undo the stack alignment after a call +(define (unpad-stack) + (seq (Add rsp r15))) diff --git a/langs/outlaw/values.c b/langs/outlaw/values.c new file mode 100644 index 00000000..f7826ad2 --- /dev/null +++ b/langs/outlaw/values.c @@ -0,0 +1,141 @@ +#include "types.h" +#include "values.h" + +type_t val_typeof(val_t x) +{ + switch (x & ptr_type_mask) { + case box_type_tag: + return T_BOX; + case cons_type_tag: + return T_CONS; + case vect_type_tag: + return T_VECT; + case str_type_tag: + return T_STR; + case symb_type_tag: + return T_SYMB; + case proc_type_tag: + return T_PROC; + case struct_type_tag: + return T_STRUCT; + } + + if ((int_type_mask & x) == int_type_tag) + return T_INT; + if ((char_type_mask & x) == char_type_tag) + return T_CHAR; + + switch (x) { + case val_true: + case val_false: + return T_BOOL; + case val_eof: + return T_EOF; + case val_void: + return T_VOID; + case val_empty: + return T_EMPTY; + } + + return T_INVALID; +} + +int64_t val_unwrap_int(val_t x) +{ + return x >> int_shift; +} +val_t val_wrap_int(int64_t i) +{ + return (i << int_shift) | int_type_tag; +} + +int val_unwrap_bool(val_t x) +{ + return x == val_true; +} +val_t val_wrap_bool(int b) +{ + return b ? val_true : val_false; +} + +val_char_t val_unwrap_char(val_t x) +{ + return (val_char_t)(x >> char_shift); +} +val_t val_wrap_char(val_char_t c) +{ + return (((val_t)c) << char_shift) | char_type_tag; +} + +val_t val_wrap_eof(void) +{ + return val_eof; +} + +val_t val_wrap_void(void) +{ + return val_void; +} + +val_box_t* val_unwrap_box(val_t x) +{ + return (val_box_t *)(x ^ box_type_tag); +} +val_t val_wrap_box(val_box_t* b) +{ + return ((val_t)b) | box_type_tag; +} + +val_cons_t* val_unwrap_cons(val_t x) +{ + return (val_cons_t *)(x ^ cons_type_tag); +} +val_t val_wrap_cons(val_cons_t *c) +{ + return ((val_t)c) | cons_type_tag; +} + +val_vect_t* val_unwrap_vect(val_t x) +{ + return (val_vect_t *)(x ^ vect_type_tag); +} +val_t val_wrap_vect(val_vect_t *v) +{ + return ((val_t)v) | vect_type_tag; +} + +val_str_t* val_unwrap_str(val_t x) +{ + return (val_str_t *)(x ^ str_type_tag); +} +val_t val_wrap_str(val_str_t *v) +{ + return ((val_t)v) | str_type_tag; +} + +val_symb_t* val_unwrap_symb(val_t x) +{ + return (val_symb_t *)(x ^ symb_type_tag); +} +val_t val_wrap_symb(val_symb_t *v) +{ + return ((val_t)v) | symb_type_tag; +} + +val_struct_t* val_unwrap_struct(val_t x) +{ + return (val_struct_t *)(x ^ struct_type_tag); +} +val_t val_wrap_struct(val_struct_t* v) +{ + return ((val_t)v) | struct_type_tag; +} + +val_port_t* val_unwrap_port(val_t x) +{ + return (val_port_t *)(x ^ struct_type_tag); +} +val_t val_wrap_port(val_port_t* v) +{ + return ((val_t)v) | struct_type_tag; +} diff --git a/langs/outlaw/values.h b/langs/outlaw/values.h new file mode 100644 index 00000000..215dc943 --- /dev/null +++ b/langs/outlaw/values.h @@ -0,0 +1,104 @@ +#ifndef VALUES_H +#define VALUES_H + +#include +#include + +/* any abstract value */ +typedef int64_t val_t; + +typedef enum type_t { + T_INVALID = -1, + /* immediates */ + T_INT, + T_BOOL, + T_CHAR, + T_EOF, + T_VOID, + T_EMPTY, + /* pointers */ + T_BOX, + T_CONS, + T_VECT, + T_STR, + T_SYMB, + T_PROC, + T_STRUCT, +} type_t; + +typedef uint32_t val_char_t; +typedef struct val_box_t { + val_t val; +} val_box_t; +typedef struct val_cons_t { + val_t snd; + val_t fst; +} val_cons_t; +typedef struct val_vect_t { + uint64_t len; + val_t elems[]; +} val_vect_t; +typedef struct val_str_t { + uint64_t len; + val_char_t codepoints[]; +} val_str_t; +typedef struct val_symb_t { + uint64_t len; + val_char_t codepoints[]; +} val_symb_t; +typedef struct val_struct_t { + val_t name; + val_t* vals; +} val_struct_t; +typedef struct val_port_t { + val_t symbol; + FILE *fp; + uint8_t len; + uint8_t offset; + int8_t closed; + char buf[]; +} val_port_t; + +/* return the type of x */ +type_t val_typeof(val_t x); + +/** + * Wrap/unwrap values + * + * The behavior of unwrap functions are undefined on type mismatch. + */ +int64_t val_unwrap_int(val_t x); +val_t val_wrap_int(int64_t i); + +int val_unwrap_bool(val_t x); +val_t val_wrap_bool(int b); + +val_char_t val_unwrap_char(val_t x); +val_t val_wrap_char(val_char_t b); + +val_t val_wrap_eof(); + +val_t val_wrap_void(); + +val_box_t* val_unwrap_box(val_t x); +val_t val_wrap_box(val_box_t* b); + +val_cons_t* val_unwrap_cons(val_t x); +val_t val_wrap_cons(val_cons_t* c); + +val_vect_t* val_unwrap_vect(val_t x); +val_t val_wrap_vect(val_vect_t* c); + +val_str_t* val_unwrap_str(val_t x); +val_t val_wrap_str(val_str_t* c); + +val_symb_t* val_unwrap_symb(val_t x); +val_t val_wrap_symb(val_symb_t* c); + +val_struct_t* val_unwrap_struct(val_t x); +val_t val_wrap_struct(val_struct_t* c); + +val_port_t* val_unwrap_port(val_t x); +val_t val_wrap_port(val_port_t* c); + +#endif diff --git a/langs/shakedown/Makefile b/langs/shakedown/Makefile deleted file mode 100644 index 0fc515c3..00000000 --- a/langs/shakedown/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -UNAME := $(shell uname) -.PHONY: test - -ifeq ($(UNAME), Darwin) - format=macho64 -else ifeq ($(UNAME), Linux) - format=elf64 -else - format=win64 -endif - -%.run: %.o main.o char.o clib.o - gcc main.o char.o clib.o $< -o $@ - -main.o: main.c types.h - gcc -c main.c -o main.o - -char.o: char.c types.h - gcc -c char.c -o char.o - -clib.o: clib.c types.h - gcc -c clib.c -o clib.o - -%.o: %.s - nasm -f $(format) -o $@ $< - -%.s: %.shk - racket -t compile-file.rkt -m $< > $@ - -clean: - rm *.o *.s *.run - -test: 42.run - @test "$(shell ./42.run)" = "42" diff --git a/langs/shakedown/asm/interp.rkt b/langs/shakedown/asm/interp.rkt deleted file mode 100644 index 8e05688f..00000000 --- a/langs/shakedown/asm/interp.rkt +++ /dev/null @@ -1,23 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "printer.rkt" racket/runtime-path) -(define-runtime-path dir "..") - -;; Asm -> Integer -;; Interpret (by assemblying, linking, and exec'ing) x86-64 code -;; Assume: starts with entry point run-time expects -(define (asm-interp a) - (let* ((t.s (make-temporary-file "nasm~a.s")) - (t.run (path-replace-extension t.s #".run"))) - (with-output-to-file t.s - #:exists 'truncate - (λ () - (asm-display a))) - (system (format "(cd ~a && make -s ~a) 2>&1 >/dev/null" dir t.run)) - (delete-file t.s) - (with-input-from-string - (with-output-to-string - (λ () - (system (path->string t.run)) - (delete-file t.run))) - read))) diff --git a/langs/shakedown/asm/printer.rkt b/langs/shakedown/asm/printer.rkt deleted file mode 100644 index ba32f33c..00000000 --- a/langs/shakedown/asm/printer.rkt +++ /dev/null @@ -1,83 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -;; Asm -> String -(define (asm->string a) - (foldr (λ (i s) (string-append (instr->string i) s)) "" a)) - -;; Instruction -> String -(define (instr->string i) - (match i - [`(,(? opcode2? o) ,a1 ,a2) - (string-append "\t" - (symbol->string o) " " - (arg->string a1) ", " - (arg->string a2) "\n")] - [`(jmp ,l) - (string-append "\tjmp " (arg->string l) "\n")] - [`(je ,l) - (string-append "\tje " (label->string l) "\n")] - [`(jle ,l) - (string-append "\tjle " (label->string l) "\n")] - [`(jl ,l) - (string-append "\tjl " (label->string l) "\n")] - [`(jg ,l) - (string-append "\tjg " (label->string l) "\n")] - [`(jge ,l) - (string-append "\tjge " (label->string l) "\n")] - [`(jne ,l) - (string-append "\tjne " (label->string l) "\n")] - [`ret "\tret\n"] - [`(neg ,a1) - (string-append "\tneg " (arg->string a1) "\n")] - [`(call ,l) - (string-append "\tcall " (arg->string l) "\n")] - [`(push ,r) - (string-append "\tpush " (reg->string r) "\n")] - [`(extern ,f) - (string-append "\textern " (label->string f) "\n")] - [`(section text) "\tsection .text\n"] - [l (string-append (label->string l) ":\n")])) - -(define (opcode2? x) - (memq x '(mov add sub cmp and cmovl xor or sal sar lea))) - -;; Arg -> String -(define (arg->string a) - (match a - [(? reg?) (reg->string a)] - [`(offset ,r) - (string-append "[" (arg->string r) "]")] - [`(offset ,r ,i) - (string-append "[" (arg->string r) " + " (number->string (* i 8)) "]")] - [(? integer?) (number->string a)] - [(? symbol?) (label->string a)])) - -(define all-regs '(rax rbx rcx rdx rsp rdi rip rbp rsi r8 r9 r10 r11 r12 r13 r14 r15)) - -;; Any -> Boolean -(define (reg? x) - (and (symbol? x) - (memq x all-regs))) - -;; Reg -> String -(define (reg->string r) - (symbol->string r)) - -;; Label -> String -;; prefix with _ for Mac -(define label->string - (match (system-type 'os) - ['macosx - (λ (s) (string-append "_" (symbol->string s)))] - [_ symbol->string])) - -;; Asm -> Void -(define (asm-display a) - ;; entry point will be first label - (let ((g (findf symbol? a))) - (display - (string-append "\tglobal " (label->string g) "\n" - "\tdefault rel\n" - "\textern " (label->string 'error) "\n" - (asm->string a))))) diff --git a/langs/shakedown/ast.rkt b/langs/shakedown/ast.rkt deleted file mode 100644 index 6410a0d7..00000000 --- a/langs/shakedown/ast.rkt +++ /dev/null @@ -1,183 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -;; type Prog = [FunDef] Expr - -;; type FunDef = Variable [Variable] Expr - -;; type Expr = -;; | Integer -;; | Boolean -;; | Character -;; | Variable -;; | Prim1 Expr -;; | Prim2 Expr Expr -;; | Lam Name [Variable] Expr <--- New for Loot -;; | App Expr [Expr] <--- Changed for Loot -;; | If Expr Expr Expr -;; | Let (Binding list) Expr -;; | LetRec (Binding list) Expr <--- New for Loot (See the lecture notes!) -;; | Nil - -;; Note: Fun and Call, from Knock, are gone! -;; They have been made redundant by the combination -;; of Lam (which is new) and App (which has been modified) - -;; type Prim1 = 'add1 | 'sub1 | 'zero? | box | unbox | car | cdr -;; type Prim2 = '+ | '- | cons - -;; type Binding = Variable Expr - -;; type Variable = Symbol (except 'add1 'sub1 'if, etc.) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;; The represenation of top-level programs -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(struct prog (ds e) #:transparent) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;; The represenation of a function definition -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; A FunDef has a symbol for the function's name, -;; a list of symbols representing the names of the function's -;; arguments, and one expression that forms the body of the function. -(struct fundef (name args body) #:transparent) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;; The Expr data structure -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; An Expr can be viewed as having 'kinds' of nodes. -;; -;; * The nodes that represnt an expression themselves -;; -;; * The nodes that are part of an expression, but no an expression themselves - -;; The below are the former: - -(struct int-e (i) #:transparent) -(struct bool-e (b) #:transparent) -(struct char-e (c) #:transparent) -(struct var-e (v) #:transparent) -(struct prim-e (p es) #:transparent) -(struct lam-e (vs es) #:transparent) -(struct lam-t (n vs es) #:transparent) -(struct app-e (f es) #:transparent) -(struct ccall-e (f es) #:transparent) ; <- new for Shakedown -(struct if-e (e t f) #:transparent) -(struct let-e (bs b) #:transparent) -(struct letr-e (bs b) #:transparent) -(struct nil-e () #:transparent) - -;; The next is the latter: - -;; A binding holds a symbol representing the bound variable and -;; Expr that represents the value that will be bound to that variable -(struct binding (v e) #:transparent) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;; AST nodes for closures (used for pedagogical purposes) -;;;;;; (see interp-defun.rkt) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(struct closure (fs e env) #:transparent) -(struct rec-closure (lam fenv) #:transparent) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;; AST utility functions (predicates) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define unops '(add1 sub1 zero? box unbox empty? car cdr)) -(define biops '(+ - cons)) - -;; Any -> Boolean -(define (prim? x) - (and (symbol? x) - (memq x (append unops biops)))) - -;; Any -> Boolean -(define (biop? x) - (and (symbol? x) - (memq x biops))) - -;; Any -> Boolean -(define (unop? x) - (and (symbol? x) - (memq x unops))) - -(define (value? v) - (or (int-e? v) - (bool-e? v))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;; AST utility functions (getters) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; It will sometimes be useful to get the list of all the variables that are -;; introduced by a `let` -;; [Binding] -> [Symbol] -(define (get-vars bs) - (match bs - ['() '()] - [(cons (binding v _) bs) (cons v (get-vars bs))])) - -;; Get all of the _definitions_ from a list of bindings -;; [Binding] -> [Expr] -(define (get-defs bs) - (match bs - ['() '()] - [(cons (binding _ def) bs) (cons def (get-defs bs))])) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;; AST utility functions (maps) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (bindings-map-def f bs) - (match bs - ['() '()] - [(cons (binding n def) bs) - (cons (binding n (f def)) (bindings-map-def f bs))])) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;; AST utility functions (printers) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; We have switched to using `#:transparent` above, so this should only be -;; necessary if you're desperate when debugging :'( - -;; Given a Program, construct an sexpr that has the same shape -(define (prog-debug p) - (match p - [(prog ds e) `(prog ,(map fundef-debug ds) ,(ast-debug e))])) - -;; Given a FunDef, construct an sexpr that has the same shape -(define (fundef-debug def) - (match def - [(fundef name args body) `(fundef ,name ,args ,(ast-debug body))])) - -;; Given an AST, construct an sexpr that has the same shape -(define (ast-debug a) - (match a - [(int-e i) `(int-e ,i)] - [(bool-e b) `(bool-e ,b)] - [(char-e c) `(char-e ,c)] - [(var-e v) `(var-e ,v)] - [(nil-e) ''()] - [(prim-e p es) `(prim-e ,p ,@(map ast-debug es))] - [(lam-t n vs e)`(lam-t ,n ,vs ,(ast-debug e))] - [(lam-e vs e) `(lam-e ,vs ,(ast-debug e))] - [(app-e f es) `(app-e ,(ast-debug f) ,@(map ast-debug es))] - [(if-e e t f) `(if-e ,(ast-debug e) - ,(ast-debug t) - ,(ast-debug f))] - [(let-e bs b) `(let-e ,(binding-debug bs) ,(ast-debug b))] - [(letr-e bs b) `(letr-e ,(binding-debug bs) ,(ast-debug b))])) - -(define (binding-debug bnds) - (match bnds - ['() '()] - [(cons (binding v e) bnds) `((,v ,(ast-debug e)) ,@(binding-debug bnds))])) diff --git a/langs/shakedown/clib.c b/langs/shakedown/clib.c deleted file mode 100644 index e4890547..00000000 --- a/langs/shakedown/clib.c +++ /dev/null @@ -1,15 +0,0 @@ -#include -#include -#include -#include "types.h" - -int64_t c_fun() { - puts("Hello, from C!"); - return (42 << imm_shift); -} - -int64_t c_fun1(int64_t x) { - printf("You gave me x = %" PRId64 "\n", x); - int64_t res = x * x; - return (res << imm_shift); -} diff --git a/langs/shakedown/compile-file.rkt b/langs/shakedown/compile-file.rkt deleted file mode 100644 index b993b89d..00000000 --- a/langs/shakedown/compile-file.rkt +++ /dev/null @@ -1,18 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "compile.rkt" "syntax.rkt" "asm/printer.rkt") - -;; String -> Void -;; Compile contents of given file name, -;; emit asm code on stdout -(define (main fn) - (with-input-from-file fn - (λ () - (let ((p (read-program))) - ; assumed OK for now - ;(unless (and (prog? p) (closed? p)) - ; (error "syntax error")) - (asm-display (compile (sexpr->prog p))))))) - -(define (read-program) - (read)) diff --git a/langs/shakedown/compile.rkt b/langs/shakedown/compile.rkt deleted file mode 100644 index 5d0fed59..00000000 --- a/langs/shakedown/compile.rkt +++ /dev/null @@ -1,582 +0,0 @@ -#lang racket -(require "syntax.rkt" "ast.rkt") -(provide (all-defined-out)) - -;; An immediate is anything ending in #b000 -;; All other tags in mask #b111 are pointers - -(define result-shift 3) -(define result-type-mask (sub1 (arithmetic-shift 1 result-shift))) -(define type-imm #b000) -(define type-box #b001) -(define type-pair #b010) -(define type-string #b011) -(define type-proc #b100) - -(define imm-shift (+ 2 result-shift)) -(define imm-type-mask (sub1 (arithmetic-shift 1 imm-shift))) -(define imm-type-int (arithmetic-shift #b00 result-shift)) -(define imm-type-bool (arithmetic-shift #b01 result-shift)) -(define imm-type-char (arithmetic-shift #b10 result-shift)) -(define imm-type-empty (arithmetic-shift #b11 result-shift)) -(define imm-val-false imm-type-bool) -(define imm-val-true - (bitwise-ior (arithmetic-shift 1 (add1 imm-shift)) imm-type-bool)) - -;; Allocate in 64-bit (8-byte) increments, so pointers -;; end in #b000 and we tag with #b001 for boxes, etc. - -;; type CEnv = (Listof (Maybe Variable)) -;; type Imm = Integer | Boolean | Char | ''() - -;; type LExpr = -;; .... -;; | `(λ ,Formals ,Label ,Expr) - -;; type Label = (quote Symbol) - -;; Prog -> Asm -(define (compile p) - ; Remove all of the explicit function definitions - (match (desugar-prog p) - [(prog _ e) - (compile-entry (label-λ e))])) - - -;; Expr -> Asm -(define (compile-entry e) - `(,@(make-externs (ffi-calls e)) - (section text) - entry - ,@(compile-tail-e e '()) - ret - ,@(compile-λ-definitions (λs e)) - err - (push rbp) - (call error) - ret)) - -;; (Listof Symbol) -> Asm -(define (make-externs fs) - (map (lambda (s) `(extern ,s)) fs)) - -;; (Listof Lambda) -> Asm -(define (compile-λ-definitions ls) - (apply append (map compile-λ-definition ls))) - -;; Lambda -> Asm -(define (compile-λ-definition l) - (match l - [(lam-t f xs e0) - (let ((c0 (compile-tail-e e0 (reverse (append xs (fvs l)))))) - `(,f - ,@c0 - ret))] - [(lam-e _ _) (error "Lambdas need to be labeled before compiling")])) - -;; LExpr CEnv -> Asm -;; Compile an expression in tail position -(define (compile-tail-e e c) - (match e - [(var-e v) (compile-variable v c)] - [(? imm? i) (compile-imm i)] - [(prim-e (? prim? p) es) (compile-prim p es c)] - [(if-e p t f) (compile-tail-if p t f c)] - [(let-e (list b) body) (compile-tail-let b body c)] - [(letr-e bs body) (compile-tail-letrec (get-vars bs) (get-defs bs) body c)] - [(app-e f es) (compile-tail-call f es c)] - [(lam-t l xs e0) (compile-λ xs l (fvs e) c)])) - - - -;; LExpr CEnv -> Asm -;; Compile an expression in non-tail position -(define (compile-e e c) - (match e - [(var-e v) (compile-variable v c)] - [(? imm? i) (compile-imm i)] - [(prim-e (? prim? p) es) (compile-prim p es c)] - [(if-e p t f) (compile-if p t f c)] - [(let-e (list b) body) (compile-let b body c)] - [(letr-e bs body) (compile-letrec (get-vars bs) (get-defs bs) body c)] - [(ccall-e f es) (compile-ccall f es c)] - [(app-e f es) (compile-call f es c)] - [(lam-t l xs e0) (compile-λ xs l (fvs e) c)])) - -;; Our current set of primitive operations require no function calls, -;; so there's no difference between tail and non-tail call positions -(define (compile-prim p es c) - (match (cons p es) - [`(box ,e0) (compile-box e0 c)] - [`(unbox ,e0) (compile-unbox e0 c)] - [`(cons ,e0 ,e1) (compile-cons e0 e1 c)] - [`(car ,e0) (compile-car e0 c)] - [`(cdr ,e0) (compile-cdr e0 c)] - [`(add1 ,e0) (compile-add1 e0 c)] - [`(sub1 ,e0) (compile-sub1 e0 c)] - [`(zero? ,e0) (compile-zero? e0 c)] - [`(empty? ,e0) (compile-empty? e0 c)] - [`(+ ,e0 ,e1) (compile-+ e0 e1 c)] - [_ (error - (format "prim applied to wrong number of args: ~a ~a" p es))])) - -;; Label (listof Expr) -> Asm -(define (compile-ccall f es c) - (let* ((c0 (store-caller-save caller-saves c)) - (c* (car c0)) - (c1 (compile-es-ffi es c* 0)) - (c2 (cdr (load-caller-save caller-saves c))) - (stack-size (* 8 (length c*)))) - - ; We don't actually have to do all caller-save (that's a lot!) - ; Just the ones that our compiler emits - `(,@(cdr c0) - - ,@c1 - (mov r15 rsp) ; Using the fact that r15 is callee save - - ; change rsp to reflect the top of the stack - (sub rsp ,stack-size) - - ; align rsp to safest 16-byte aligned spot - (and rsp -16) - - ; Actually call the function - (call ,f) - - ; Restore our stack - (mov rsp r15) - - ; Put the caller-saved values back - ,@c2))) - -;; The registers that we can use to pass arguments to C functions -;; (in the right order) -;; -(define arg-regs '(rdi rsi rdx rcx r8 r9)) -(define callee-saves '(rbp rbx r12 r13 r14 r15)) -(define caller-saves '(rcx rdx rdi rsi r8 r9 r10 r11)) - -; Make sure we store every caller-save register that we care about on the stack. -; This is basiclaly a foldMR, but I need to learn more Racket -(define (store-caller-save rs c) - (match rs - ['() (cons c '())] - [(cons r rs) - (match (store-caller-save rs c) - [(cons d asm) - (cons (cons #f d) - (append asm `((mov (offset rsp ,(- (add1 (length d)))) ,r))))])])) - -; Same as above but inverse -(define (load-caller-save rs c) - (match rs - ['() (cons c '())] - [(cons r rs) - (match (load-caller-save rs c) - [(cons d asm) - (cons (cons #f d) - (append asm `((mov ,r (offset rsp ,(- (add1 (length d))))))))])])) - - -;; JMCT: I keep 'programming in Haskell in Racket' and I need to stop that... -;; the above is my monadic habits biting me - -;; (Listof LExpr) CEnv -> Asm -(define (compile-es-ffi es c i) - (match es - ['() '()] - [(cons e es) - (let ((c0 (compile-e e c)) - (cs (compile-es-ffi es c (add1 i)))) - `(,@c0 - (sar rax ,imm-shift) - (mov ,(list-ref arg-regs i) rax) ; Put the result in the appropriate register - ,@cs))])) - - -;; (Listof Variable) Label (Listof Variable) CEnv -> Asm -(define (compile-λ xs f ys c) - ; Save label address - `((lea rax (offset ,f 0)) - (mov (offset rdi 0) rax) - - ; Save the environment - (mov r8 ,(length ys)) - (mov (offset rdi 1) r8) - (mov r9 rdi) - (add r9 16) - ,@(copy-env-to-heap ys c 0) - - ; Return a pointer to the closure - (mov rax rdi) - (or rax ,type-proc) - (add rdi ,(* 8 (+ 2 (length ys)))))) - -;; (Listof Variable) CEnv Natural -> Asm -;; Pointer to beginning of environment in r9 -(define (copy-env-to-heap fvs c i) - (match fvs - ['() '()] - [(cons x fvs) - `((mov r8 (offset rsp ,(- (add1 (lookup x c))))) - (mov (offset r9 ,i) r8) - ,@(copy-env-to-heap fvs c (add1 i)))])) - -;; Natural Natural -> Asm -;; Move i arguments upward on stack by offset off -(define (move-args i off) - (match i - [0 '()] - [_ `(,@(move-args (sub1 i) off) - (mov rbx (offset rsp ,(- off i))) - (mov (offset rsp ,(- i)) rbx))])) - -;; LExpr (Listof LExpr) CEnv -> Asm -(define (compile-call e0 es c) - (let ((cs (compile-es es (cons #f c))) - (c0 (compile-e e0 c)) - (i (- (add1 (length c)))) - (stack-size (* 8 (length c)))) - `(,@c0 - (mov (offset rsp ,i) rax) - ,@cs - (mov rax (offset rsp ,i)) - ,@assert-proc - (xor rax ,type-proc) - (sub rsp ,stack-size) - - (mov rcx rsp) ; start of stack in rcx - (add rcx ,(- (* 8 (+ 2 (length es))))) - ,@(copy-closure-env-to-stack) - - (call (offset rax 0)) - (add rsp ,stack-size)))) - -;; LExpr (Listof LExpr) CEnv -> Asm -(define (compile-tail-call e0 es c) - (let ((cs (compile-es es (cons #f c))) - (c0 (compile-e e0 c)) - (i (- (add1 (length c))))) - `(,@c0 - (mov (offset rsp ,i) rax) - ,@cs - (mov rax (offset rsp ,i)) - ,@(move-args (length es) i) - ,@assert-proc - (xor rax ,type-proc) - - (mov rcx rsp) ; start of stack in rcx - (add rcx ,(- (* 8 (+ 1 (length es))))) - ,@(copy-closure-env-to-stack) - - ;,@(copy-closure-env-to-stack (length es)) - (jmp (offset rax 0))))) - - -;; -> Asm -;; Copy closure's (in rax) env to stack in rcx -(define (copy-closure-env-to-stack) - (let ((copy-loop (gensym 'copy_closure)) - (copy-done (gensym 'copy_done))) - `((mov r8 (offset rax 1)) ; length - (mov r9 rax) - (add r9 16) ; start of env - ,copy-loop - (cmp r8 0) - (je ,copy-done) - (mov rbx (offset r9 0)) - (mov (offset rcx 0) rbx) ; Move val onto stack - (sub r8 1) - (add r9 8) - (sub rcx 8) - (jmp ,copy-loop) - ,copy-done))) - -;; (Listof Variable) (Listof Lambda) Expr CEnv -> Asm -(define (compile-letrec fs ls e c) - (let ((c0 (compile-letrec-λs ls c)) - (c1 (compile-letrec-init fs ls (append (reverse fs) c))) - (c2 (compile-e e (append (reverse fs) c)))) - `(,@c0 - ,@c1 - ,@c2))) - -;; (Listof Variable) (Listof Lambda) Expr CEnv -> Asm -(define (compile-tail-letrec fs ls e c) - (let ((c0 (compile-letrec-λs ls c)) - (c1 (compile-letrec-init fs ls (append (reverse fs) c))) - (c2 (compile-tail-e e (append (reverse fs) c)))) - `(,@c0 - ,@c1 - ,@c2))) - -;; (Listof Lambda) CEnv -> Asm -;; Create a bunch of uninitialized closures and push them on the stack -(define (compile-letrec-λs ls c) - (match ls - ['() '()] - [(cons l ls) - (match l - [(lam-t lab as body) - (let ((cs (compile-letrec-λs ls (cons #f c))) - (ys (fvs l))) - `((lea rax (offset ,lab 0)) - (mov (offset rdi 0) rax) - (mov rax ,(length ys)) - (mov (offset rdi 1) rax) - (mov rax rdi) - (or rax ,type-proc) - (add rdi ,(* 8 (+ 2 (length ys)))) - (mov (offset rsp ,(- (add1 (length c)))) rax) - ,@cs))])])) - -;; (Listof Variable) (Listof Lambda) CEnv -> Asm -(define (compile-letrec-init fs ls c) - (match fs - ['() '()] - [(cons f fs) - (let ((ys (fvs (first ls))) - (cs (compile-letrec-init fs (rest ls) c))) - `((mov r9 (offset rsp ,(- (add1 (lookup f c))))) - (xor r9 ,type-proc) - (add r9 16) ; move past label and length - ,@(copy-env-to-heap ys c 0) - ,@cs))])) - -;; (Listof LExpr) CEnv -> Asm -(define (compile-es es c) - (match es - ['() '()] - [(cons e es) - (let ((c0 (compile-e e c)) - (cs (compile-es es (cons #f c)))) - `(,@c0 - (mov (offset rsp ,(- (add1 (length c)))) rax) - ,@cs))])) - -;; Imm -> Asm -(define (compile-imm i) - `((mov rax ,(imm->bits i)))) - -;; Imm -> Integer -(define (imm->bits i) - (match i - [(int-e i) (arithmetic-shift i imm-shift)] - [(char-e c) (+ (arithmetic-shift (char->integer c) imm-shift) imm-type-char)] - [(bool-e b) (if b imm-val-true imm-val-false)] - [(nil-e) imm-type-empty])) - - -;; Variable CEnv -> Asm -(define (compile-variable x c) - (let ((i (lookup x c))) - `((mov rax (offset rsp ,(- (add1 i))))))) - -;; LExpr CEnv -> Asm -(define (compile-box e0 c) - (let ((c0 (compile-e e0 c))) - `(,@c0 - (mov (offset rdi 0) rax) - (mov rax rdi) - (or rax ,type-box) - (add rdi 8)))) ; allocate 8 bytes - -;; LExpr CEnv -> Asm -(define (compile-unbox e0 c) - (let ((c0 (compile-e e0 c))) - `(,@c0 - ,@assert-box - (xor rax ,type-box) - (mov rax (offset rax 0))))) - -;; LExpr LExpr CEnv -> Asm -(define (compile-cons e0 e1 c) - (let ((c0 (compile-e e0 c)) - (c1 (compile-e e1 (cons #f c)))) - `(,@c0 - (mov (offset rsp ,(- (add1 (length c)))) rax) - ,@c1 - (mov (offset rdi 0) rax) - (mov rax (offset rsp ,(- (add1 (length c))))) - (mov (offset rdi 1) rax) - (mov rax rdi) - (or rax ,type-pair) - (add rdi 16)))) - -;; LExpr CEnv -> Asm -(define (compile-car e0 c) - (let ((c0 (compile-e e0 c))) - `(,@c0 - ,@assert-pair - (xor rax ,type-pair) - (mov rax (offset rax 1))))) - -;; LExpr CEnv -> Asm -(define (compile-cdr e0 c) - (let ((c0 (compile-e e0 c))) - `(,@c0 - ,@assert-pair - (xor rax ,type-pair) - (mov rax (offset rax 0))))) - -;; LExpr CEnv -> Asm -(define (compile-empty? e0 c) - (let ((c0 (compile-e e0 c)) - (l0 (gensym))) - `(,@c0 - (and rax ,imm-type-mask) - (cmp rax ,imm-type-empty) - (mov rax ,imm-val-false) - (jne ,l0) - (mov rax ,imm-val-true) - ,l0))) - -;; LExpr CEnv -> Asm -(define (compile-add1 e0 c) - (let ((c0 (compile-e e0 c))) - `(,@c0 - ,@assert-integer - (add rax ,(arithmetic-shift 1 imm-shift))))) - -;; LExpr CEnv -> Asm -(define (compile-sub1 e0 c) - (let ((c0 (compile-e e0 c))) - `(,@c0 - ,@assert-integer - (sub rax ,(arithmetic-shift 1 imm-shift))))) - -;; LExpr CEnv -> Asm -(define (compile-zero? e0 c) - (let ((c0 (compile-e e0 c)) - (l0 (gensym)) - (l1 (gensym))) - `(,@c0 - ,@assert-integer - (cmp rax 0) - (mov rax ,imm-val-false) - (jne ,l0) - (mov rax ,imm-val-true) - ,l0))) - -;; LExpr LExpr LExpr CEnv -> Asm -(define (compile-if e0 e1 e2 c) - (let ((c0 (compile-e e0 c)) - (c1 (compile-e e1 c)) - (c2 (compile-e e2 c)) - (l0 (gensym)) - (l1 (gensym))) - `(,@c0 - (cmp rax ,imm-val-false) - (je ,l0) - ,@c1 - (jmp ,l1) - ,l0 - ,@c2 - ,l1))) - -;; LExpr LExpr LExpr CEnv -> Asm -(define (compile-tail-if e0 e1 e2 c) - (let ((c0 (compile-e e0 c)) - (c1 (compile-tail-e e1 c)) - (c2 (compile-tail-e e2 c)) - (l0 (gensym)) - (l1 (gensym))) - `(,@c0 - (cmp rax ,imm-val-false) - (je ,l0) - ,@c1 - (jmp ,l1) - ,l0 - ,@c2 - ,l1))) - -;; Variable LExpr LExpr CEnv -> Asm -(define (compile-tail-let b body c) - (match b - [(binding x def) - (let ((c0 (compile-e def c)) - (c1 (compile-tail-e body (cons x c)))) - `(,@c0 - (mov (offset rsp ,(- (add1 (length c)))) rax) - ,@c1))])) - -;; Variable LExpr LExpr CEnv -> Asm -(define (compile-let b body c) - (match b - [(binding x def) - (let ((c0 (compile-e def c)) - (c1 (compile-e body (cons x c)))) - `(,@c0 - (mov (offset rsp ,(- (add1 (length c)))) rax) - ,@c1))])) - -;; LExpr LExpr CEnv -> Asm -(define (compile-+ e0 e1 c) - (let ((c1 (compile-e e1 c)) - (c0 (compile-e e0 (cons #f c)))) - `(,@c1 - ,@assert-integer - (mov (offset rsp ,(- (add1 (length c)))) rax) - ,@c0 - ,@assert-integer - (add rax (offset rsp ,(- (add1 (length c)))))))) - - -(define (type-pred->mask p) - (match p - [(or 'box? 'cons? 'string? 'procedure?) result-type-mask] - [_ imm-type-mask])) - -(define (type-pred->tag p) - (match p - ['box? type-box] - ['cons? type-pair] - ['string? type-string] - ['procedure? type-proc] - ['integer? imm-type-int] - ['empty? imm-type-empty] - ['char? imm-type-char] - ['boolean? imm-type-bool])) - -;; Variable CEnv -> Natural -(define (lookup x cenv) - (match cenv - ['() (error "undefined variable:" x)] - [(cons y cenv) - (match (eq? x y) - [#t (length cenv)] - [#f (lookup x cenv)])])) - -(define (assert-type p) - `((mov rbx rax) - (and rbx ,(type-pred->mask p)) - (cmp rbx ,(type-pred->tag p)) - (jne err))) - -(define assert-integer (assert-type 'integer?)) -(define assert-box (assert-type 'box?)) -(define assert-pair (assert-type 'cons?)) -(define assert-string (assert-type 'string?)) -(define assert-char (assert-type 'char?)) -(define assert-proc (assert-type 'procedure?)) - -;; Asm -(define assert-natural - `(,@assert-integer - (cmp rax -1) - (jle err))) - -;; Asm -(define assert-integer-codepoint - `((mov rbx rax) - (and rbx ,imm-type-mask) - (cmp rbx 0) - (jne err) - (cmp rax ,(arithmetic-shift -1 imm-shift)) - (jle err) - (cmp rax ,(arithmetic-shift #x10FFFF imm-shift)) - (mov rbx rax) - (sar rbx ,(+ 11 imm-shift)) - (cmp rbx #b11011) - (je err))) diff --git a/langs/shakedown/example.shk b/langs/shakedown/example.shk deleted file mode 100644 index 563e3d70..00000000 --- a/langs/shakedown/example.shk +++ /dev/null @@ -1 +0,0 @@ -(let ((y (ccall c_fun1 42))) ((lambda (x) y) 11)) diff --git a/langs/shakedown/interp.rkt b/langs/shakedown/interp.rkt deleted file mode 100644 index adb22d1c..00000000 --- a/langs/shakedown/interp.rkt +++ /dev/null @@ -1,122 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "syntax.rkt") - -;; type Expr = -;; ... -;; | `(λ ,(Listof Variable) ,Expr) - -;; type Value = -;; ... -;; | Function - -;; type Function = -;; | (Values ... -> Answer) - -(define (interp e) - (interp-env (desugar e) '())) - -;; Expr REnv -> Answer -(define (interp-env e r) - (match e - [''() '()] - [(? syntactic-value? v) v] - [(list (? prim? p) es ...) - (match (interp-env* es r) - [(list vs ...) (interp-prim p vs)] - [_ 'err])] - [`(if ,e0 ,e1 ,e2) - (match (interp-env e0 r) - ['err 'err] - [v - (if v - (interp-env e1 r) - (interp-env e2 r))])] - [(? symbol? x) - (lookup r x)] - [`(let ((,x ,e0)) ,e1) - (match (interp-env e0 r) - ['err 'err] - [v - (interp-env e1 (ext r x v))])] - [`(letrec ,bs ,e) - (letrec ((r* (λ () - (append - (zip (map first bs) - ;; η-expansion to delay evaluating r* - ;; relies on RHSs being functions - (map (λ (l) (λ vs (apply (interp-env l (r*)) vs))) - (map second bs))) - r)))) - (interp-env e (r*)))] - [`(λ (,xs ...) ,e) - (λ vs - (if (= (length vs) (length xs)) - (interp-env e (append (zip xs vs) r)) - 'err))] - [`(,e . ,es) - (match (interp-env* (cons e es) r) - [(list f vs ...) - (if (procedure? f) - (apply f vs) - 'err)] - [_ 'err])])) - -;; (Listof Expr) REnv -> (Listof Value) | 'err -(define (interp-env* es r) - (match es - ['() '()] - [(cons e es) - (match (interp-env e r) - ['err 'err] - [v (cons v (interp-env* es r))])])) - -;; Any -> Boolean -(define (prim? x) - (and (symbol? x) - (memq x '(add1 sub1 + - zero? - box unbox empty? cons car cdr)))) - -;; Any -> Boolean -(define (syntactic-value? x) - (or (integer? x) - (boolean? x) - (null? x))) - -;; Prim (Listof Value) -> Answer -(define (interp-prim p vs) - (match (cons p vs) - [(list 'add1 (? integer? i0)) (add1 i0)] - [(list 'sub1 (? integer? i0)) (sub1 i0)] - [(list 'zero? (? integer? i0)) (zero? i0)] - [(list 'box v0) (box v0)] - [(list 'unbox (? box? v0)) (unbox v0)] - [(list 'empty? v0) (empty? v0)] - [(list 'cons v0 v1) (cons v0 v1)] - [(list 'car (cons v0 v1)) v0] - [(list 'cdr (cons v0 v1)) v1] - [(list '+ (? integer? i0) (? integer? i1)) - (+ i0 i1)] - [(list '- (? integer? i0) (? integer? i1)) - (- i0 i1)] - [_ 'err])) - -;; Env Variable -> Answer -(define (lookup env x) - (match env - ['() 'err] - [(cons (list y i) env) - (match (symbol=? x y) - [#t i] - [#f (lookup env x)])])) - -;; Env Variable Value -> Value -(define (ext r x i) - (cons (list x i) r)) - -(define (zip xs ys) - (match* (xs ys) - [('() '()) '()] - [((cons x xs) (cons y ys)) - (cons (list x y) - (zip xs ys))])) diff --git a/langs/shakedown/main.c b/langs/shakedown/main.c deleted file mode 100644 index 0c9ffc76..00000000 --- a/langs/shakedown/main.c +++ /dev/null @@ -1,131 +0,0 @@ -#include -#include -#include -#include "types.h" - -// in bytes -#define heap_size 1000000 - -int64_t entry(void *); -void print_result(int64_t); -void print_pair(int64_t); -void print_immediate(int64_t); -void print_char(int64_t); -void print_string(int64_t); -void print_string_char(int64_t); -void print_codepoint(int64_t); - -int main(int argc, char** argv) { - void * heap = malloc(heap_size); - int64_t result = entry(heap); - print_result(result); - printf("\n"); - return 0; -} - -void error() { - printf("err"); - exit(1); -} - -void internal_error() { - printf("internal-error"); - exit(1); -} - -void print_result(int64_t v) { - switch (result_type_mask & v) { - case type_imm: - print_immediate(v); - break; - case type_box: - printf("#&"); - print_result (*((int64_t *)(v ^ type_box))); - break; - case type_pair: - printf("("); - print_pair(v); - printf(")"); - break; - case type_string: - printf("\""); - print_string(v); - printf("\""); - break; - case type_proc: - printf("procedure"); - break; - default: - internal_error(); - } -} - -void print_immediate(int64_t v) { - switch (imm_type_mask & v) { - case imm_type_int: - printf("%" PRId64, v >> imm_shift); - break; - case imm_type_bool: - printf("#%c", v >> imm_shift ? 't' : 'f'); - break; - case imm_type_empty: - printf("()"); - break; - case imm_type_char: - print_char(v); - default: - break; - internal_error(); - } -} - -void print_pair(int64_t v) { - int64_t car = *((int64_t *)((v + 8) ^ type_pair)); - int64_t cdr = *((int64_t *)((v + 0) ^ type_pair)); - print_result(car); - if ((imm_type_mask & cdr) == imm_type_empty) { - // nothing - } else if ((result_type_mask & cdr) == type_pair) { - printf(" "); - print_pair(cdr); - } else { - printf(" . "); - print_result(cdr); - } -} - -void print_char (int64_t v) { - int64_t codepoint = v >> imm_shift; - printf("#\\"); - switch (codepoint) { - case 0: - printf("nul"); break; - case 8: - printf("backspace"); break; - case 9: - printf("tab"); break; - case 10: - printf("newline"); break; - case 11: - printf("vtab"); break; - case 12: - printf("page"); break; - case 13: - printf("return"); break; - case 32: - printf("space"); break; - case 127: - printf("rubout"); break; - default: - print_codepoint(v); - } -} - -void print_string(int64_t v) { - int64_t* str = (int64_t *)(v ^ type_string); - int64_t len = (str[0] >> imm_shift); - - int i; - for (i = 0; i < len; i++) - print_string_char(str[i+1]); -} diff --git a/langs/shakedown/syntax.rkt b/langs/shakedown/syntax.rkt deleted file mode 100644 index 91536339..00000000 --- a/langs/shakedown/syntax.rkt +++ /dev/null @@ -1,137 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -(require "ast.rkt") - -; In order to desugar a program into a single let-rec, we take all of the -; top-level definitions and convert them into bindings for a top-level -; let-rec -(define (desugar-prog p) - (match p - [(prog ds e) (let ((bs (map desugar-def ds))) - (prog '() (letr-e bs e)))])) - -(define (desugar-def d) - (match d - [(fundef n args body) - (binding n (lam-e args body))])) - -;; Expr+ -> Expr -; The only case that is interesting is the `letr-e` case, where bindings -; get turned into lambdas -(define (desugar e+) - (match e+ - [(? imm? i) e+] - [(var-e v) e+] - [(prim-e p es) (prim-e p (map desugar es))] - [(if-e e0 e1 e2) (if-e (desugar e0) (desugar e1) (desugar e2))] - [(let-e bs body) (let-e (bindings-map-def desugar bs) (desugar body))] - [(letr-e bs body) (letr-e (bindings-map-def desugar bs) (desugar body))] - [(lam-e xs e0) (lam-e xs (desugar e0))] - [(ccall-e f es) (ccall-e f (map desugar es))] - [(app-e f es) (app-e (desugar f) (map desugar es))])) - -;; Any -> Boolean -(define (imm? x) - (or (int-e? x) - (bool-e? x) - (char-e? x) - (nil-e? x))) - -;; Expr -> LExpr -(define (label-λ e) - (match e - [(? imm? i) e] - [(var-e v) e] - [(prim-e p es) (prim-e p (map label-λ es))] - [(if-e e0 e1 e2) (if-e (label-λ e0) (label-λ e1) (label-λ e2))] - [(let-e bs body) (let-e (bindings-map-def label-λ bs) (label-λ body))] - [(letr-e bs body) (letr-e (bindings-map-def label-λ bs) (label-λ body))] - [(lam-e xs e0) (lam-t (gensym) xs (label-λ e0))] - [(ccall-e f es) (ccall-e f (map label-λ es))] - [(app-e f es) (app-e (label-λ f) (map label-λ es))])) - -;; LExpr -> (Listof LExpr) -;; Extract all the lambda expressions -(define (λs e) - (match e - [(? imm? i) '()] - [(var-e v) '()] - [(prim-e p es) (apply append (map λs es))] - [(if-e e0 e1 e2) (append (λs e0) (λs e1) (λs e2))] - [(let-e (list (binding v def)) body) - (append (λs def) (λs body))] - [(letr-e bs body) (append (apply append (map λs (get-defs bs))) (λs body))] - [(lam-e xs e0) (cons e (λs e0))] - [(lam-t _ xs e0) (cons e (λs e0))] - [(ccall-e f es) (apply append (map λs es))] - [(app-e f es) (append (λs f) (apply append (map λs es)))])) - -;; LExpr -> (Listof Variable) -(define (fvs e) - (define (fvs e) - (match e - [(? imm? i) '()] - [(var-e v) (list v)] - [(prim-e p es) (apply append (map fvs es))] - [(if-e e0 e1 e2) (append (fvs e0) (fvs e1) (fvs e2))] - [(let-e bs body) (append (apply append (map fvs (get-defs bs))) - (remq* (get-vars bs) (fvs body)))] - [(letr-e bs body) (remq* (get-vars bs) (append (apply append (map fvs (get-defs bs))) (fvs body)))] - [(lam-t _ xs e0) (remq* xs (fvs e0))] - [(lam-e xs e0) (remq* xs (fvs e0))] - [(ccall-e f es) (apply append (map fvs es))] - [(app-e f es) (append (fvs f) (apply append (map fvs es)))])) - (remove-duplicates (fvs e))) - -;; LExpr -> (Listof Symbol) -;; Extract all the calls to C Functions -(define (ffi-calls e) - (match e - [(? imm? i) '()] - [(var-e v) '()] - [(prim-e p es) (apply append (map ffi-calls es))] - [(if-e e0 e1 e2) (append (ffi-calls e0) (ffi-calls e1) (ffi-calls e2))] - [(let-e (list (binding v def)) body) - (append (ffi-calls def) (ffi-calls body))] - [(letr-e bs body) (append (apply append (map ffi-calls (get-defs bs))) (ffi-calls body))] - [(lam-e xs e0) (ffi-calls e0)] - [(lam-t _ xs e0) (ffi-calls e0)] - [(ccall-e f es) (cons f (apply append (map ffi-calls es)))] - [(app-e f es) (append (ffi-calls f) (apply append (map ffi-calls es)))])) - -; SExpr -> Prog -(define (sexpr->prog s) - (match s - [(list 'begin defs ... e) (prog (map sexpr->fundef defs) (sexpr->expr e))] - [e (prog '() (sexpr->expr e))])) - -; SExpr -> FunDef -(define (sexpr->fundef def) - (match def - [`(define (,f . ,as) ,body) (fundef f as (sexpr->expr body))])) - -; SExpr -> Expr -; Parse the s-expr into our Expr AST -; This should be a one-to-one mapping for now. -(define (sexpr->expr s) - (match s - [(? symbol? v) (var-e v)] - [(? integer? s) (int-e s)] - [(? boolean? b) (bool-e b)] - [(? char? c) (char-e c)] - [''() (nil-e)] - [`(if ,p ,t ,f) (if-e (sexpr->expr p) (sexpr->expr t) (sexpr->expr f))] - [`(let ((,bnd ,def)) ,body) - (let-e (list (binding bnd (sexpr->expr def))) (sexpr->expr body))] - [`(letrec ,bs ,body) - (letr-e (map (lambda (b) (binding (first b) (sexpr->expr (second b)))) bs) (sexpr->expr body))] - [`(,(? unop? p) ,e) - (prim-e p (list (sexpr->expr e)))] - [`(,(? biop? p) ,e1 ,e2) - (prim-e p (list (sexpr->expr e1) (sexpr->expr e2)))] - [`(λ ,xs ,e0) (lam-e xs (sexpr->expr e0))] - [`(lambda ,a ,e) (lam-e a (sexpr->expr e))] - [`(ccall ,f . ,es) (ccall-e f (map sexpr->expr es))] - [`(,f . ,as) (app-e (sexpr->expr f) (map sexpr->expr as))] - [_ (error "operation not supported")])) diff --git a/langs/shakedown/test/compile.rkt b/langs/shakedown/test/compile.rkt deleted file mode 100644 index bbf1c24c..00000000 --- a/langs/shakedown/test/compile.rkt +++ /dev/null @@ -1,164 +0,0 @@ -#lang racket -(require "../compile.rkt" - "../syntax.rkt" - "../asm/interp.rkt" - rackunit - redex/reduction-semantics) - -(define (run e) - (asm-interp (compile (sexpr->prog e)))) - -(check-equal? (run 7) 7) -(check-equal? (run -8) -8) -(check-equal? (run '(add1 (add1 7))) 9) -(check-equal? (run '(add1 (sub1 7))) 7) - -;; Examples from the notes -(check-equal? (run '(let ((x 7)) x)) 7) -(check-equal? (run '(let ((x 7)) 2)) 2) -(check-equal? (run '(let ((x 7)) (add1 x))) 8) -(check-equal? (run '(let ((x (add1 7))) x)) 8) -(check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) -(check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) -(check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) - -; (check-equal? (run 'x) 'err) ;; Not a valid program -(check-equal? (run '(add1 #f)) 'err) -(check-equal? (run '(+ 1 2)) 3) -(check-equal? (run '(zero? 0)) #t) -(check-equal? (run '(zero? 1)) #f) - - -;; Hustle tests -(check-equal? (run '(box 8)) (box 8)) -(check-equal? (run '(unbox (box 8))) 8) -(check-equal? (run '(unbox 8)) 'err) - -;; Iniquity tests -(check-equal? (run - '(begin (define (f x) x) - (f 5))) - 5) -(check-equal? (run - '(begin (define (tri x) - (if (zero? x) - 0 - (+ x (tri (sub1 x))))) - (tri 9))) - 45) -(check-equal? (run - '(begin (define (even? x) - (if (zero? x) - #t - (odd? (sub1 x)))) - (define (odd? x) - (if (zero? x) - #f - (even? (sub1 x)))) - (even? 101))) - #f) -(check-equal? (run - '(begin (define (map-add1 xs) - (if (empty? xs) - '() - (cons (add1 (car xs)) - (map-add1 (cdr xs))))) - (map-add1 (cons 1 (cons 2 (cons 3 '())))))) - '(2 3 4)) -(check-equal? (run '(begin (define (f x) x) - f)) - 'procedure) -(check-equal? (run '(begin (define (f x) x) - (f 5))) - 5) - -;; Loot tests -(check-equal? (run '((λ (x) x) 7)) 7) -(check-equal? (run '(((λ (x) (λ (y) x)) 7) 8)) 7) -(check-equal? (run '((λ (f) (f 0)) (λ (x) (add1 x)))) 1) -(check-equal? (run '((λ (f) (f (f 0))) (λ (x) (add1 x)))) 2) -(check-equal? (run '((let ((y 8)) (car (cons (λ (x) y) '()))) 2)) 8) -(check-equal? (run '(let ((y 8)) ((car (cons (λ (x) y) '())) 2))) 8) -(check-equal? - (run - '(((λ (t) - ((λ (f) (t (λ (z) ((f f) z)))) - (λ (f) (t (λ (z) ((f f) z)))))) - (λ (tri) - (λ (n) - (if (zero? n) - 1 - (+ n (tri (sub1 n))))))) - 10)) - 56) - -(check-equal? - (run - '(begin (define (map f ls) - (if (empty? ls) - '() - (cons (f (car ls)) (map f (cdr ls))))) - - (map (λ (f) (f 0)) - (cons (λ (x) (add1 x)) - (cons (λ (x) (sub1 x)) - '()))))) - '(1 -1)) - -(check-equal? - (run - '(begin (define (map f ls) - (letrec ((mapper (λ (ls) - (if (empty? ls) - '() - (cons (f (car ls)) (mapper (cdr ls))))))) - (mapper ls))) - (map (λ (f) (f 0)) - (cons (λ (x) (add1 x)) - (cons (λ (x) (sub1 x)) - '()))))) - '(1 -1)) - -;(check-equal? -; (run -; '(begin (define (map f ls) -; (begin (define (mapper ls) -; (if (empty? ls) -; '() -; (cons (f (car ls)) (mapper (cdr ls))))) -; (mapper ls))) -; (map (λ (f) (f 0)) -; (cons (λ (x) (add1 x)) -; (cons (λ (x) (sub1 x)) -; '()))))) -; '(1 -1)) - -(check-equal? (run - '(let ((id (λ (x) x))) - (letrec ((even? - (λ (x) - (if (zero? x) - #t - (id (odd? (sub1 x)))))) - (odd? - (λ (x) - (if (zero? x) - #f - (id (even? (sub1 x))))))) - (even? 101)))) - #f) - -(check-equal? (run - '(let ((id (λ (x) x))) - (id (letrec ((even? - (λ (x) - (if (zero? x) - #t - (odd? (sub1 x))))) - (odd? - (λ (x) - (if (zero? x) - #f - (even? (sub1 x)))))) - (even? 101))))) - #f) diff --git a/langs/shakedown/test/interp.rkt b/langs/shakedown/test/interp.rkt deleted file mode 100644 index afa3523a..00000000 --- a/langs/shakedown/test/interp.rkt +++ /dev/null @@ -1,157 +0,0 @@ -#lang racket -(require "../interp.rkt" - ;(only-in "../semantics.rkt" H 𝑯 convert) - rackunit - redex/reduction-semantics) - -(define (test-suite run) - (check-equal? (run 7) 7) - (check-equal? (run -8) -8) - (check-equal? (run '(add1 (add1 7))) 9) - (check-equal? (run '(add1 (sub1 7))) 7) - - ;; Examples from the notes - (check-equal? (run '(let ((x 7)) x)) 7) - (check-equal? (run '(let ((x 7)) 2)) 2) - (check-equal? (run '(let ((x 7)) (add1 x))) 8) - (check-equal? (run '(let ((x (add1 7))) x)) 8) - (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) - (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) - (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) - - - (check-equal? (run 'x) 'err) - (check-equal? (run '(add1 #f)) 'err) - (check-equal? (run '(+ 1 2)) 3) - (check-equal? (run '(zero? 0)) #t) - (check-equal? (run '(zero? 1)) #f) - - - ;; Hustle tests - (check-equal? (run '(box 8)) (box 8)) - (check-equal? (run '(unbox (box 8))) 8) - (check-equal? (run '(unbox 8)) 'err) - - ;; Iniquity tests - (check-equal? (run - '(begin (define (f x) x) - (f 5))) - 5) - - (check-equal? (run - '(begin (define (tri x) - (if (zero? x) - 0 - (+ x (tri (sub1 x))))) - (tri 9))) - 45) - - (check-equal? (run - '(begin (define (even? x) - (if (zero? x) - #t - (odd? (sub1 x)))) - (define (odd? x) - (if (zero? x) - #f - (even? (sub1 x)))) - (even? 101))) - #f) - - (check-equal? (run - '(begin (define (map-add1 xs) - (if (empty? xs) - '() - (cons (add1 (car xs)) - (map-add1 (cdr xs))))) - (map-add1 (cons 1 (cons 2 (cons 3 '())))))) - '(2 3 4)) - - - ;; Loot examples - - (check-equal? (run '((λ (x) x) 5)) 5) - (check-equal? (run '((λ (x y) x) 5 7)) 5) - (check-equal? (run '((λ (x y) y) 5 7)) 7) - (check-equal? (run '(((λ (x) (λ (y) y)) 5) 7)) 7) - (check-equal? (run '(((λ (x) (λ (y) x)) 5) 7)) 5) - (check-equal? (run '(((λ (t) - ((λ (f) (t (λ (z) ((f f) z)))) - (λ (f) (t (λ (z) ((f f) z)))))) - (λ (tri) - (λ (n) - (if (zero? n) - 1 - (+ n (tri (sub1 n))))))) - 10)) - 56) - - - (check-equal? (run - '(begin (define (map-add1 xs) - (if (empty? xs) - '() - (cons (add1 (car xs)) - (map-add1 (cdr xs))))) - (map-add1 (cons 1 (cons 2 (cons 3 '())))))) - '(2 3 4)) - (check-equal? (run '(begin (define (f x) x) - f)) - 'procedure) - (check-equal? (run '(begin (define (f x) x) - (f 5))) - 5) - - (check-equal? (run '((λ (f) (f 0)) (λ (x) (add1 x)))) 1) - (check-equal? (run '((λ (f) (f (f 0))) (λ (x) (add1 x)))) 2) - (check-equal? (run '((let ((y 8)) (car (cons (λ (x) y) '()))) 2)) 8) - (check-equal? (run '(let ((y 8)) ((car (cons (λ (x) y) '())) 2))) 8) - - (check-equal? - (run - '(begin (define (map f ls) - (if (empty? ls) - '() - (cons (f (car ls)) (map f (cdr ls))))) - - (map (λ (f) (f 0)) - (cons (λ (x) (add1 x)) - (cons (λ (x) (sub1 x)) - '()))))) - '(1 -1)) - - (check-equal? - (run - '(begin (define (map f ls) - (letrec ((mapper (λ (ls) - (if (empty? ls) - '() - (cons (f (car ls)) (mapper (cdr ls))))))) - (mapper ls))) - (map (λ (f) (f 0)) - (cons (λ (x) (add1 x)) - (cons (λ (x) (sub1 x)) - '()))))) - '(1 -1)) - - (check-equal? - (run - '(begin (define (map f ls) - (begin (define (mapper ls) - (if (empty? ls) - '() - (cons (f (car ls)) (mapper (cdr ls))))) - (mapper ls))) - (map (λ (f) (f 0)) - (cons (λ (x) (add1 x)) - (cons (λ (x) (sub1 x)) - '()))))) - '(1 -1))) - -; TODO: Not sure if I actually want to write an interpreter for this as it's -; mostly about the System V calling convention -;(test-suite -; (λ (e) -; (match (interp e) -; [(? procedure?) 'procedure] -; [v v]))) diff --git a/langs/shakedown/types.h b/langs/shakedown/types.h deleted file mode 100644 index c494821d..00000000 --- a/langs/shakedown/types.h +++ /dev/null @@ -1,14 +0,0 @@ -#define result_shift 3 -#define result_type_mask ((1 << result_shift) - 1) -#define type_imm 0 -#define type_box 1 -#define type_pair 2 -#define type_string 3 -#define type_proc 4 - -#define imm_shift (2 + result_shift) -#define imm_type_mask ((1 << imm_shift) - 1) -#define imm_type_int (0 << result_shift) -#define imm_type_bool (1 << result_shift) -#define imm_type_char (2 << result_shift) -#define imm_type_empty (3 << result_shift) diff --git a/langs/test-programs/get-progs.rkt b/langs/test-programs/get-progs.rkt index ae978904..03cd1c8e 100644 --- a/langs/test-programs/get-progs.rkt +++ b/langs/test-programs/get-progs.rkt @@ -49,10 +49,10 @@ (if (file-exists? p.in) (check-equal? (run/io p.run p.in) (racket/io p.rkt p.in) - p.rkt) + (path->string p.rkt)) (check-equal? (run p.run) (racket p.rkt) - p.rkt))) + (path->string p.rkt)))) (define (system/s cmd) (with-output-to-string (thunk (system cmd)))) diff --git a/langs/test-programs/hustle/chars.rkt b/langs/test-programs/hustle/chars.rkt new file mode 100644 index 00000000..d65f3308 --- /dev/null +++ b/langs/test-programs/hustle/chars.rkt @@ -0,0 +1,2 @@ +#lang racket +(cons #\λ (cons #\a '())) diff --git a/www/Makefile b/www/Makefile index 218f0a32..0d14b871 100644 --- a/www/Makefile +++ b/www/Makefile @@ -1,13 +1,28 @@ course = main -.PHONY : all clean test +.PHONY : all clean test scribble zips push all: clean $(course) test: clean raco test . -$(course): +$(course): scribble zips + +zips: + mkdir -p $(course)/code/ + cd ../langs ; \ + tar -c `git ls-files intro a86 abscond blackmail con dupe dodger evildoer extort fraud hustle hoax iniquity iniquity-gc iniquity-plus jig knock loot mug mountebank neerdowell outlaw fp project.pdf` \ + a86/main.c a86/gcd.c a86/tri.s abscond/42.s | \ + (cd ../www/main/code ; tar -x ; \ + for f in abscond blackmail con dupe dodger evildoer extort fraud hustle hoax iniquity iniquity-gc iniquity-plus jig knock loot mug mountebank neerdowell outlaw ; do \ + zip $${f}.zip -r $${f}/ ; \ + done ) + cd slides ; \ + tar -c `git ls-files ocaml-to-racket.pdf abscond.pdf con.pdf dupe.pdf extort.pdf grift.pdf grift2.pdf hustle.pdf` | \ + (cd ../main/code ; tar -x) ; cd ../.. + +scribble: raco scribble --htmls \ ++style css/extra.css \ ++style css/fancyverb.css \ @@ -15,14 +30,9 @@ $(course): ++xref-in setup/xref load-collections-xref \ --redirect-main http://docs.racket-lang.org/ \ $(course).scrbl - mkdir -p $(course)/code/ - cd ../langs ; \ - tar -c `git ls-files intro a86 abscond blackmail con dupe extort fraud hustle iniquity jig .knock loot mug fp project.pdf` \ - a86/main.c a86/gcd.c a86/tri.s abscond/42.s | \ - (cd ../www/main/code ; tar -x) ; cd ../.. - cd slides ; \ - tar -c `git ls-files ocaml-to-racket.pdf abscond.pdf con.pdf dupe.pdf extort.pdf grift.pdf grift2.pdf hustle.pdf` | \ - (cd ../main/code ; tar -x) ; cd ../.. + +push: + rsync -rvzp main/ dvanhorn@junkfood.cs.umd.edu:/fs/www/class/spring2024/cmsc430/ clean: rm -rf $(course) diff --git a/www/assignments.scrbl b/www/assignments.scrbl index ab5f20ec..4ad05e98 100644 --- a/www/assignments.scrbl +++ b/www/assignments.scrbl @@ -4,9 +4,9 @@ @local-table-of-contents[#:style 'immediate-only] @include-section{assignments/1.scrbl} -@include-section{assignments/2.scrbl} -@include-section{assignments/3.scrbl} -@include-section{assignments/4.scrbl} +@;include-section{assignments/2.scrbl} +@;include-section{assignments/3.scrbl} +@;include-section{assignments/4.scrbl} @;include-section{assignments/5.scrbl} @;include-section{assignments/6.scrbl} @;;include-section{assignments/7.scrbl} diff --git a/www/assignments/1.scrbl b/www/assignments/1.scrbl index 365316f5..5d50cfdb 100644 --- a/www/assignments/1.scrbl +++ b/www/assignments/1.scrbl @@ -1,9 +1,10 @@ #lang scribble/manual -@(require "../defns.rkt") +@(require "../defns.rkt" + (only-in scribble/html/html div)) @title[#:tag "Assignment 1" #:style 'unnumbered]{Assignment 1: Learning about Programming Languages} -@bold{Due: Tuesday, September 7, 11:59PM} +@bold{Due: Wednesday, January 31, 11:59PM} Find two programming languages that are new to you, and answer the following questions: @@ -33,7 +34,11 @@ Racket, Ruby, Scheme, SML, Visual Basic. @section[#:style 'unnumbered]{What to turn in} -Submit this assignment via @link[@gradescope]{Gradescope}. +Submit a PDF or plain text file containing your write-up via +@link[@gradescope]{Gradescope}. + +@;{Until this assignment is officially assigned (see the due date), submissions +will not be accepted.} @;{We will be using GitHub Classroom. This means you will work with git repositories and turning in your work consists of pushing the diff --git a/www/assignments/2.scrbl b/www/assignments/2.scrbl index 0e3b044b..87ffc0b1 100644 --- a/www/assignments/2.scrbl +++ b/www/assignments/2.scrbl @@ -1,10 +1,14 @@ #lang scribble/manual @title[#:tag "Assignment 2" #:style 'unnumbered]{Assignment 2: Racket Primer} -@bold{Due: Tuesday, September 14, 11:59PM} +@bold{Due: Wednesday, September 13, 11:59PM} The goal of this assignment is to gain practice programming in Racket. +@bold{This is a collaborative assignment.} You may work with anyone +you'd like on this assignment, but each person must submit their +@tt{main.rkt} file on Gradescope. + You are given a @tt{main.rkt} file (on ELMS under "Files"), that contains a number of sections. In each section there are several function ``stubs,'' i.e. incomplete function definitions with type @@ -48,4 +52,4 @@ Submit your filled-in @tt{main.rkt} file on Gradescope. Your submission will be graded for correctness. Passing the unit tests included in the file is necessary but @bold{not sufficient} to receive a perfect score. You are strongly encouraged to add your own -tests to ensure the correctness of your solutions. \ No newline at end of file +tests to ensure the correctness of your solutions. diff --git a/www/assignments/3.scrbl b/www/assignments/3.scrbl index 51d38044..1368efdf 100644 --- a/www/assignments/3.scrbl +++ b/www/assignments/3.scrbl @@ -1,11 +1,12 @@ #lang scribble/manual @title[#:tag "Assignment 3" #:style 'unnumbered]{Assignment 3: Primitives, Conditionals, and Dispatch} -@(require (for-label (except-in racket ...))) +@(require (for-label a86 (except-in racket ...))) @(require "../../langs/con-plus/semantics.rkt") @(require redex/pict) -@bold{Due: Thu, Sept 30, 11:59PM} +@bold{Due: @elem[#:style "strike"]{Friday, September 29, 11:59PM} + Monday, October 2, 11:59PM} The goal of this assignment is to extend the parser, interpreter, and compiler with some simple unary numeric and boolean operations and two @@ -13,9 +14,12 @@ forms of control flow expressions: @racket[cond]-expressions and @racket[case]-expressions. -You are given a zip file on ELMS with a starter compiler based on the -Dupe language we studied in class. You are tasked with extending the -language in a number of ways: +You are given a file @tt{dupe-plus.zip} on ELMS with a starter +compiler based on the @secref{Dupe} language we studied in class. + + +You are tasked with extending the language in a number of +ways: @itemlist[ @item{adding new primitive operations,} @@ -89,8 +93,8 @@ true in general for Racket. The parser should reject any The meaning of a @racket[cond] expression is computed by evaluating -each expression @racket[_e-pi] in order until the first one that is -true is found, in which case, the corresponding expression +each expression @racket[_e-pi] in order until the first one that +does not evaluate to @racket[#f] is found, in which case, the corresponding expression @racket[_e-ai] is evaluated and its value is the value of the @racket[cond] expression. If no such @racket[_e-pi] exists, the expression @racket[_e-an]'s value is the value of the @racket[cond]. @@ -225,9 +229,9 @@ There are two new kinds of expression constructors: @racket[Cond] and @racket[Case]. A @racket[Cond] AST node contains a list of cond-clauses and expression, which the expression of the @racket[else] clause. Each cond-clause is represented by a @racket[Clause] -structure containing two expressions: the right-hand-side of the -clause which is used to determine whether the left-hand-side is -evaluated, and the left-hand-side expression. +structure containing two expressions: the left-hand-side of the +clause which is used to determine whether the right-hand-side is +evaluated, and the right-hand-side expression. The @racket[Case] AST node contains three things: an expression that is the subject of the dispatch (i.e. the expression that is evaluated @@ -288,8 +292,6 @@ write additional test cases. @section[#:tag-prefix "a3-" #:style 'unnumbered]{Submitting} -You should submit on Gradescope. You should submit a zip file with -exactly the same structure that the stub contains (a dupe-plus -folder). We will only use the @tt{parse.rkt}, @tt{ast.rkt}, -@tt{compile.rkt}, @tt{interp.rkt}, and @tt{interp-prim.rkt} files for -grading, so make sure all your work is contained there! +Submit a zip file containing your work to Gradescope. Use @tt{make +submit.zip} from within the @tt{dupe-plus} directory to create a zip +file with the proper structure. diff --git a/www/assignments/4.scrbl b/www/assignments/4.scrbl index 331d8c64..a5d5fbfd 100644 --- a/www/assignments/4.scrbl +++ b/www/assignments/4.scrbl @@ -7,19 +7,19 @@ @(require "../notes/ev.rkt") -@bold{Due: Tuesday, Oct 19th at 11:59PM EST} +@bold{Due: Wednesday, November 1, 11:59PM EST} The goal of this assignment is to extend a compiler with binding forms and primitives that can take any number of arguments. -You are given a zip file on ELMS with a starter compiler similar to -the @seclink["Fraud"]{Fraud} language we studied in class. You are -tasked with: +You are given a @tt{fraud-plus.zip} file on ELMS with a starter +compiler similar to the @seclink["Fraud"]{Fraud} language we studied +in class. You are tasked with: @itemlist[ @item{incorporating the language features you added in -@seclink["Assignment 3"]{Assignment 3},} +@seclink["Assignment 3"]{Assignment 3}, scaled up to Fraud,} @item{extending the addition primitive to handle an arbitrary number of arguments,} @@ -182,12 +182,13 @@ write additional test cases. @section[#:tag-prefix "a4-" #:style 'unnumbered]{Submitting} -You should submit on Gradescope. You should submit a zip file that has -exactly the same structure that the stub contains. We will only use -the @tt{compile.rkt}, @tt{interp.rkt}, and @tt{interp-prim.rkt} files -for grading, so make sure all your work is contained there! Note the -lack of @tt{ast.rkt}, @tt{parse.rkt}, etc. - part of assignment 3 was -learning to design your own structures, part of assignment 4 is -learning to work within the constraints of an existing design! - +Submit a zip file containing your work to Gradescope. Use @tt{make +submit.zip} from within the @tt{fraud-plus} directory to create a zip +file with the proper structure. +We will only use the @tt{compile.rkt}, @tt{interp.rkt}, and +@tt{interp-prim.rkt} files for grading, so make sure all your work is +contained there! Note the lack of @tt{ast.rkt}, @tt{parse.rkt}, etc. - +part of assignment 3 was learning to design your own structures, part +of assignment 4 is learning to work within the constraints of an +existing design! diff --git a/www/assignments/5.scrbl b/www/assignments/5.scrbl index 80cf56f7..5f22307c 100644 --- a/www/assignments/5.scrbl +++ b/www/assignments/5.scrbl @@ -1,135 +1,492 @@ #lang scribble/manual -@title[#:tag "Assignment 5" #:style 'unnumbered]{Assignment 5: A Heap of Characters} +@title[#:tag "Assignment 5" #:style 'unnumbered]{Assignment 5: Arity Checking, Rest Arguments, Case Functions, and Apply} @(require (for-label (except-in racket ...))) -@(require redex/pict) +@(require "../notes/ev.rkt" + "../notes/utils.rkt") -@(require "../notes/ev.rkt") +@(ev `(current-directory ,(path->string (build-path notes "iniquity-plus")))) +@(for-each (λ (f) (ev `(require (file ,f)))) + '("ast.rkt" "parse.rkt" "interp.rkt")) -@bold{Due: Thursday, April 7th, 11:59PM EDT} -@(define repo "https://github.com/cmsc430/assign05") +@bold{Due: Tuesday, June 27, 11:59PM EST} -The goal of this assignment is to extend a compiler with data types -that require memory allocation and dereferencing. A secondary goal is to hone your test-writing skills. +The goal of this assignment is to extend a compiler with arity +checking for function calls, to add new kinds of function parameter +features, and to add the @racket[apply] form for applying a function +to a list of arguments. -Assignment repository: -@centered{@link[repo repo]} - -You are given a repository with a starter compiler similar to the -@seclink["Hustle"]{Hustle} language we studied in class. You are tasked -with: +You are given a file @tt{iniquity-plus.zip} on ELMS with a starter +compiler similar to the @seclink["Iniquity"]{Iniquity} language we +studied in class. You are tasked with: @itemlist[ -@item{extending the language to include a string data type,} +@item{implementing run-time arity checking for function calls,} + +@item{extending function definitions to include ``rest argument'' +parameters for writing variable-arity functions,} + +@item{extending function definitions to include +@racket[case-lambda]-style multiple-arity functions,} -@item{extend the (very few) tests provided to be as comprehensive as possible.} +@item{extending the arity checking features to handle these new forms +of function definitions, and} +@item{implementing the @racket[apply] mechanism for applying a function +to the elements of a list as arguments.} ] +Unlike previous assignments, you do not need to bring forward your +past features to this language; there is no need to implement +@racket[cond], @racket[case], etc. + +Be sure to read the entire problem description before starting. There +are a number of @secref[#:tag-prefixes '("a5-")]{Suggestions} on how to +approach the assignment near the end. + + +@section[#:tag-prefix "a5-" #:style 'unnumbered #:tag "arity"]{Checking arity} -@section[#:tag-prefix "a5-" #:style 'unnumbered]{Strung out} +In @seclink["Iniquity"]{Iniquity}, we implemented a language with +function definitions and calls. We noted that bad things can happen +when a function is called with the incorrect number of arguments. +While it's possible to statically check this property of Iniquity +programs, it's not possible in more expressive languages and arity +checking must be done at run-time. You are tasked with implementing +such a run-time arity checking mechanism. -In Dodger, we implemented a character data type for -representing single letters. In this assignment, you will implement a -String data type for representing arbitrarily long sequences of -characters. +Here is the basic idea. You need to add a run-time checking mechanism +that will cause the following program to signal an error: +@#reader scribble/comment-reader +(racketblock +(define (f x y) (+ x y)) +(f 1) +) -Strings are disjoint from all other data types and are essentially a -fixed-size array of characters. Literal strings are written by -enclosing the characters within the string in double quotes (@tt{"}). -Strings can include double quotes by using the escape sequence -@tt{\"}. +The function call knows how many arguments are given and the function +definition knows how many argument are expected. The generated code +should check that these two quantities match when the function is called. -You must add the following operations to Hustle+: +A simple way to do this is to pick a designated register that will be +used for communicating arity information. The caller should set the +register to the number of arguments before jumping to the function. +The function should check this number against the expected number and +signal an error when they don't match. + + +You should modify @racket[compile-app] and @racket[compile-define] to +implement this part of the assignment. + +@section[#:tag-prefix "a5-" #:style 'unnumbered #:tag "rest"]{Rest +arguments} + +Many languages including JavaScript, C, and Racket provide facilities +for defining functions that take a ``rest argument'' which allows the +function to be called with more arguments than expected and these +additional arguments will be bound to a single value that collects all +of these arguments. In Iniquity, as in Racket, the obvious way of +collecting these arguments into a single value is to use a list. + +Here are some examples: @itemlist[ -@item{@code[#:lang "racket"]{string? ; Any -> Boolean}, which determines if its argument is a string.} -@item{@code[#:lang "racket"]{string-ref ; String Natural -> Char}, which -extracts the character at the given index (using 0-based counting). An error is signalled if -the index is out of bounds for the given string or if the first argument is not a string.} -@item{@code[#:lang "racket"]{string-length ; String -> Natural}, which computes the length of the string, or yields an error if its argument is not a string.} -@item{@code[#:lang "racket"]{make-string ; Natural Char -> String}, which constructs a string of the given -length, filled with the given character If the first argument is not a non-negative integer, of if the second argument is not a character, an error should be returned.} + +@item{@racket[(define (f . xs) ...)]: this function takes @emph{any} number +of arguments and binds @racket[xs] to a list containing all of them,} + +@item{@racket[(define (f x . xs) ...)]: this function takes @emph{at +least} one argument and binds @racket[x] to the first argument and +@racket[xs] to a list containing the rest. It's an error to call this function +with zero arguments.} + +@item{@racket[(define (f x y z . xs) ...)]: this function takes +@emph{at least} three arguments and binds @racket[x], @racket[y], and +@racket[z] to the first three arguments and @racket[xs] to a list +containing the rest. It's an error to call this function with 0, 1, +or 2 arguments.} +] + +Here are some examples in Racket to get a sense of the behavior: + +@ex[ +(define (f . xs) (list xs)) +(f) +(f 1) +(f 1 2) +(f 1 2 3) +(f 1 2 3 4) +(define (f x . xs) (list x xs)) +(eval:error (f)) +(f 1) +(f 1 2) +(f 1 2 3) +(f 1 2 3 4) +(define (f x y z . xs) (list x y z xs)) +(eval:error (f)) +(eval:error (f 1)) +(eval:error (f 1 2)) +(f 1 2 3) +(f 1 2 4) +] + +The code generated for a function call should not change---other than +what you did for @secref[#:tag-prefixes '("a5-") "arity"]: it should +pass all of the arguments on the stack along with information about +the number of arguments. + +The compilation of function definitions that use a rest argument +should generate code that checks that the given number of arguments is +acceptable and should generate code to pop all ``extra'' arguments off +the stack and construct a list which is then bound to the rest +parameter. + +It is worth remembering that arguments are pushed on the stack in such +a way that the last argument is the element most recently pushed on +the stack. This has the benefit of making it easy to pop off the +extra arguments and to construct a list with the elements in the +proper order. + +HINT: the function definition knows the number of ``required'' +arguments, i.e. the minimum number of arguments the function can be +called with---call this @math{m}---and the caller communicates how +many actual arguments have been supplied---call this @math{n}. The +compiler needs to generate a loop that pops @math{n-m} times, +constructing a list with with popped elements, and then finally pushes +this list in order to bind it to the rest parameter. + +@section[#:tag-prefix "a5-" #:style 'unnumbered #:tag "case-lambda"]{Arity dispatch} + +Some languages such as Java, Haskell, and Racket make it possible to +overload a single function name with multiple definitions where the +dispatch between these different definitions is performed based on the +number (or kind) of arguments given at a function call. + +In Racket, this is accomplished with the @racket[case-lambda] form for +constructing multiple-arity functions. + +Here is an example: + +@ex[ +(define f + (case-lambda + [(x) "got one!"] + [(p q) "got two!"])) + +(f #t) +(f #t #f) +(eval:error (f #t #f 0)) ] -We have already added a @tt{String} struct in @tt{ast.rkt} and -provided the parsing code in @tt{parse.rkt}. +This function can accept @emph{either} one or two arguments. If given +one argument, it evaluates the right-hand-side of the first clause +with @racket[x] bound to that argument. If given two arguments, it +evaluates the right-hand-side of the second clause with @racket[p] and +@racket[q] bound to the arguments. If given any other number of +arguments, it signals an error. + +A @racket[case-lambda] form can have any number of clauses (including +zero!) and the first clause for which the number of arguments is +acceptable is taken when the function is called. + +Note that @racket[case-lambda] can be combined with rest arguments too. +A clause that accepts any number of arguments is written by simply +listing a parameter name (no parentheses). A clause that accepts some +non-zero minimum number of parameters is written with a dotted +parameter list. + +For example: + +@ex[ +(define f + (case-lambda + [(x y z . r) (length r)] + [(x) "just one!"])) + +(f 1 2 3 4 5 6) +(f #t) +(eval:error (f)) +(eval:error (f 1 2))] + +This function takes three or more arguments @emph{or} one argument. Any +other number of arguments (i.e. zero or two) results in an error. + +@ex[ +(define f + (case-lambda + [(x y z) "three!"] + [xs (length xs)])) + +(f) +(f 1 2) +(f 1 2 3) +(f 1 2 3 4 5 6) +] -More importantly, the run-time system has been updated to account for -a string type. It assumes a representation where the length of the -string is stored in memory, followed by the characters of the string, -in order. You can change the representation if you'd like, -but you will have to update the run-time system to properly print -strings and discuss it with the instructors beforehand. Not recommended! -Otherwise, no changes to the run-time system should be necessary. +This function takes any number of arguments, but when given three, it +produces @racket["three!"]; in all other cases it produces the number +of arguments. + +@section[#:tag-prefix "a5-" #:style 'unnumbered #:tag "apply"]{Apply} + +Apply is the yin to the yang of rest arguments (or maybe the other way +around). Whereas a rest argument lets a function take arbitrarily +more arguments and packages them up as a list, @racket[apply] will +apply a function to a list as though the elements of the list were +given as arguments. + +@ex[ +(define (f x y) (+ x y)) +(apply f (list 1 2)) +(define (flatten ls) + (apply append ls)) +(flatten (list (list 1 2) (list 3 4 5) (list 6))) +(define (sum ls) + (apply + ls)) +(sum (list 5 6 7 8)) +] -If you want to understand the details of how strings are implemented in -the run-time system. See the function @tt{print_string()} -in @tt{main.c}. +Here you can see @racket[apply] taking two things: a function and +single argument which is a list. It is calling the function with the +elements of the list as the arguments. -In order to get all the points for this section of the assignment you will -need to modify the following files: +It turns out, @racket[apply] can also take other arguments in addition +to the list and pass them along to the function. -@itemlist[ +@ex[ +(define (f x y) (+ x y)) +(apply f 1 (list 2)) +(apply list 1 2 3 4 (list 5 6 7)) +] -@item{@tt{interp.rkt}} -@item{@tt{interp-prim.rkt}} -@item{@tt{compile.rkt}} +Note that if the function expects a certain number of arguments and the list has +a different number of elements, it results in an arity error: +@ex[ +(define (f x y) (+ x y)) +(eval:error (apply f (list 1 2 3))) ] -@section[#:tag-prefix "a5-" #:style 'unnumbered]{Add tests!} - -One thing that has been not stressed enough this semester is the need -to add tests. There are 6 files under the @tt{test/} directory in the -repo. Three of them contain regression tests to ensure that you don't -break functionality when you add the new constructs. These are -@tt{test-runner.rkt}, @tt{interp.rkt} and @tt{compile.rkt}. The -@tt{test-runner.rkt} file provides two functions @tt{test-runner} and -@tt{test-runner-io} which execute a sequence of calls to an input -@tt{run} function, and check that they yield the expected result. This -@tt{run} function is instantiated in @tt{interp.rkt} with a call to -the interpreter, and in @tt{compile.rkt} with a call to the compiler. - -With this setup, when you do @tt{raco test test/interp.rkt} or -@tt{raco test test/compile.rkt} you should be seeing "76 tests passed". - -There are also three similar files with the "-string" suffix in the -test directory in the repo (@tt{interp-string.rkt}, -@tt{compile-string.rkt}, and @tt{test-runner-string.rkt}. You'll -notice that there are exactly two public tests there - one using io -and one not. The second part of the assingment is to extend these tests -to thoroughly test the behavior of the four primitives you added. - -You should ONLY add tests to @tt{test-runner-string.rkt} and they should -all be of the same form as the ones provided: either @tt{(check-equal? (run XXX) Y)} -for the @tt{test-runner} function, or @tt{(check-equal? (run XXX SSS) (cons YYY ZZZ))} -if your tests use IO. - -Your test suite will be autograded based on its ability to reveal bugs -in a series of implementations we have provided, each with 1 or more -bugs injected. That is, we will run @tt{raco test} using YOUR -@tt{test-runner-string} file against OUR (intentionally faulty) -implementations. Of course, your test suite should not be failing -any tests for a correct implementation. +A new form of expression has been added to the @tt{Expr} AST type: -@section[#:tag-prefix "a5-" #:style 'unnumbered]{Submitting} +@#reader scribble/comment-reader +(racketblock +;; type Expr = ... +;; | (Apply Id [Listof Expr] Expr) +) + +The parser has been updated to handle concrete syntax of the form: -You should submit on Gradescope. You should submit a zip file that has -exactly the same structure that the stub contains. We will only use -the @tt{compile.rkt}, @tt{interp.rkt}, @tt{interp-prim.rkt}, and -@tt{test/test-runner-string.rkt} files for grading, so make sure all -your work is contained there! +@#reader scribble/comment-reader +(racketblock +(apply _f _e0 ... _en) +) + +@ex[ +(parse-e '(apply f x y zs)) +] + +Note that the AST for an @racket[apply] expression has the function +name, an arbitrarily long list of arguments, plus a distinguished last +argument that should produce a list. (It is an error if this expression +produces anything other than a list.) + +While it's allowable to have only the function and the list argument, +it's a syntax error to leave off a list argument altogether: + +@ex[ +(parse-e '(apply f xs)) +(eval:error (parse-e '(apply f))) +] + +The interpreter also handles @racket[apply] expressions: + +@ex[ +(interp (parse '[ (define (f x y) (cons y x)) + (apply f (cons 1 (cons 2 '()))) ])) +] + +Together with rest arguments, @racket[apply] makes it possible to +write many functions you may like to use: + +@#reader scribble/comment-reader +(ex +(interp + (parse + '[;; an append that works on any number of lists + (define (append . xss) + (if (empty? xss) + '() + (if (empty? (car xss)) + (apply append (cdr xss)) + (cons (car (car xss)) + (apply append (cdr (car xss)) (cdr xss)))))) + ;; the list function! + (define (list . xs) xs) + + (append (list 1 2 3) (list 4) (list 5 6 7))]))) + +In @tt{compile.rkt}, the @racket[compile-e] has an added case for +@racket[Apply] AST nodes and calls @racket[compile-apply], which is +stubbed out for you. You will need to implement @racket[apply] there. + +Here is the idea for @racket[apply]: it is doing something similar to +a function call, so it needs to make a label for the return point and +push that on the stack. It then needs to execute all of the given +arguments, pushing them on the stack (again just like a regular +function call). Then it needs to execute the distinguished list +argument and generate code that will traverse the list at run-time, +pushing elements on to the stack until reaching the end of the list. +At this point, all of the arguments, both those given explicitly and +those in the list are on the stack. Jump to the function. + + +@section[#:tag-prefix "a5-" #:style 'unnumbered]{Representing the +syntax of function definitions} + +The @seclink["Iniquity"]{Iniquity} language has a single function +definition form: @racket[(define (_f _x ...) _e)] which is represented +with the following AST type: + +@#reader scribble/comment-reader +(racketblock +;; type Defn = (Defn Id (Listof Id) Expr) +(struct Defn (f xs e) #:prefab) +) + +Because there are three different forms of function definition in +Iniquity+, we use the following AST representation: + +@#reader scribble/comment-reader +(racketblock +;; type Defn = (Defn Id Fun) +(struct Defn (f fun) #:prefab) + +;; type Fun = (FunPlain [Listof Id] Expr) +;; | (FunRest [Listof Id] Id Expr) +;; | (FunCase [Listof FunCaseClause]) +;; type FunCaseClause = (FunPlain [Listof Id] Expr) +;; | (FunRest [Listof Id] Id Expr) +(struct FunPlain (xs e) #:prefab) +(struct FunRest (xs x e) #:prefab) +(struct FunCase (cs) #:prefab) +) + +What used to be represented as @racket[(Defn _f _xs _e)] is now +represented as @racket[(Defn _f (FunPlain _xs _e))]. + + +The parser already works for these new forms of function definitions. +Here are some examples of how function definitions are parsed, but you +are encouraged to try out more to get a better sense: + +@ex[ +(parse-define '(define (f x) x)) +(parse-define '(define (f . xs) xs)) +(parse-define '(define (f x y z . q) q)) +(parse-define + '(define f + (case-lambda + [(x y) 2] + [(z) 1] + [(a b c . d) "3+"] + [q "other"]))) +] + +@section[#:tag-prefix "a5-" #:style 'unnumbered]{Starter code} + +The compiler code given to you is just an implementation of Iniquity, +but updated to parse the new forms of function definitions and +re-organized slightly to match the new AST representation. + +The interpreter code given to you works on the full Iniquity+ +language, so you do not need to update @racket[interp.rkt] and can use +the interpreter to guide your implementation of the compiler. + +@ex[ +(interp + (parse '[(define (f x) x) + (f 1)])) +(interp + (parse '[(define (f . x) x) + (f 1)])) +(interp + (parse '[(define (f . x) x) + (f)])) +(interp + (parse '[(define (f . x) x) + (f 1 2 3 4 5)])) +(interp + (parse '[(define f + (case-lambda + [(x y) 2] + [(z) 1] + [(a b c . d) "3+"] + [q "other"])) + (cons (f 7) + (cons (f 3 4) + (cons (f) + (cons (f 7 8 9 10 11) + '()))))])) +] + + +Thus, you should only need to modify @racket[compile.rkt]. + +A small number of test cases are given as usual. + + +@section[#:tag-prefix "a5-" #:style 'unnumbered]{Suggestions} + +This is a tricky assignment. The amount of code you have to write is +pretty small, however you may spend a long time slogging through the +assignment if your approach is to hack first, think later. + +Here are some suggestions for how to approach the assignment. Make +sure you get each of the pieces working before moving on. @itemlist[ -@item{Not breaking past behavior is worth 20 points.} -@item{The 4 string primitives are worth 60 points (30 interpreter, 30 compiler).} -@item{Finding bugs with your tests is worth 20 points.} +@item{Start with @secref[#:tag-prefixes '("a5-") "arity"]; this should +be pretty easy. Make sure it works for plain function definitions.} + +@item{Move on to @secref[#:tag-prefixes '("a5-") "rest"]. You could +start by emitting code that checks that the arguments are acceptable, +popping the appropriate number of arguments off (and ignoring the +elements), then pushing the empty list. This will work like a rest arg +in that it should accept any number of arguments beyond the required +minimum, but the rest argument will always be bound to empty. Once +working, try to modify the code to build a list as it pops arguments. +Test that it works.} + +@item{Next you could either tackle @racket[apply] or +@racket[case-lambda].} + +@item{For @secref[#:tag-prefixes '("a5-") +"case-lambda"], remember that you have a compiler for plain and rest +argument functions at this point. That should come in handy. Think +of @racket[case-lambda] as generating a set of function definitions +(with generated names), and then the main work of @racket[case-lambda] +is determing which of the generated functions to call, given the +specific number of arguments passed in by the caller. When you find +the function that fits, jump to it. You might start by only handling +plain function clauses in @racket[case-lambda] before moving on to +handling rest argument functions, too.} + +@item{For @secref[#:tag-prefixes '("a5-") "apply"], at first don't +worry about arity checking and consider the case where there are no +explicit arguments given, i.e. focus on @racket[(apply _f _e)]. Once +you have that working, consider the more general case of +@racket[(apply _f _e0 ... _e)]. Then figure out how to add in the +arity checking part. Finally, make sure you're detecting error cases +such as when @racket[_e] is not a proper list.} + ] -Just zipping the empty template we provide is worth ~30 points! -Upload often to make sure you don't run into autograder issues! +@section[#:tag-prefix "a5-" #:style 'unnumbered]{Submitting} + +Submit a zip file containing your work to Gradescope. Use @tt{make +submit.zip} from within the @tt{iniquity-plus} directory to create a zip +file with the proper structure. diff --git a/www/assignments/6.scrbl b/www/assignments/6.scrbl index 3090d3c3..ab2293b4 100644 --- a/www/assignments/6.scrbl +++ b/www/assignments/6.scrbl @@ -1,87 +1,88 @@ #lang scribble/manual -@title[#:tag "Assignment 6" #:style 'unnumbered]{Assignment 6: Arities!} +@title[#:tag "Assignment 6" #:style 'unnumbered]{Assignment 6: Squid Game} @(require (for-label (except-in racket ...))) -@(require redex/pict) - -@(require "../notes/ev.rkt") - -@bold{Due: Thursday, April 29th, 11:59PM EST} - -@(define repo "https://github.com/cmsc430/assign06") - -The goal of this assignment is (1) to implement arity checking in a -language with functions, and (2) to implement the @racket[procedure-arity] -operation for accessing the arity of a function. - -Assignment repository: -@centered{@link[repo repo]} - -You are given a repository with a starter compiler similar to the -@seclink["Loot"]{Loot} language we studied in class. The only change -has been the addition of parsing code for the unary -@racket[procedure-arity] primitive. - -@section[#:tag-prefix "a6-" #:style 'unnumbered]{Arity-check yourself, before you wreck yourself} - -When we started looking at functions and function applications, we -wrote an interpreter that did arity checking, i.e. just before making -a function call, it confirmed that the function definition had as many -parameters as the call had arguments. - -The compiler, however, does no such checking. This means that -arguments will silently get dropped when too many are supplied and -(much worse!) parameters will be bound to junk values when too few are -supplied; the latter has the very unfortunate effect of possibly -leaking local variable's values to expressions out of the scope of -those variables. (This has important security ramifications.) - -The challenge here is that the arity needs to be checked at run-time, -since we have first class functions. But at run-time, we don't have -access to the syntax of the function definition or the call. So in -order to check the arity of a call, we must emit code to do the -checking and to compute the relevant information for carrying out the -check. - -The main high-level idea is that: when compiling a function -definition, the arity of the function is clear from the number of -parameters of the definition; when compiling a call, the number of -arguments is also obvious. Therefore, what's needed is a way for the -the function and the call to communicate and check their corresponding -arity information. - -We recommend storing the arity of the function as an additional piece -of information in the closure during its compilation. Then, during a -call you can access that arity and check it before making the call. -Bonus: it makes implementing @racket[procedure-arity] really -straightforward: you just have to access that number. - -Just like we've been saying all semester, there are multiple other -ways of going about this, feel free to design and implement a solution -that works correctly - and consider the trade-offs! For example, -another approach would be to treat the arity of the function as if it -were the first argument of the function. A function of @math{n} -arguments would then be compiled as a function of @math{n+1} -arguments. A call with @math{m} arguments would be compiled as a call -with @math{m+1} arguments, where the value of the first argument is -@math{m}. The emitted code for a function should then check that the -value of the first argument is equal to @math{n} and signal an error -when it is not. But how would you implement @racket[procedure-arity] -in this case? (This is not a rhetorical question, if you have a -realistic solution to this, send us an e-mail!) - -Your job is to modify @racket[compile.rkt] and to implement this arity -checking protocol and the @racket[procedure-arity] primitive. It might -help to implement the primitive before compiling the calls themselves, -to partially test your implementation. Unlike previous assignments, -there are no explicitly marked TODOs (with the exception of -@racket[procedure-arity]). You have to make sure you modify all places -where closures are created/accessed to ensure that your changes work -correctly! - -As always, remember to test your code using both the testcases -provided and by adding your own! +@(require "../notes/ev.rkt" + "../notes/utils.rkt") + +@bold{Due: Monday, July 3, 11:59PM EST} + +The goal of this assignment is to hone your testing skills. + +@section[#:tag-prefix "a6-" #:style 'unnumbered #:tag "game"]{The Game} + +The autograder for this assignment includes a collection of compilers +that implement @secref["Assignment 5"] and a reference interpreter. + +You must submit a list of programs that will be run on each compiler. +If a compiler produces a result that is inconsistent with the +reference interpreter, it is eliminated. Your goal is to construct +a set of test programs that eliminate the largest number of compilers. +The player that eliminates the largest number of compilers, wins. + +Note that the notion of correctness we're using is the same one we've +been using all semester: if the interpreter crashes when evaluating a +program, that program has unspecified behavior and therefore the +compiler cannot be incorrect for that program. On the other hand if +the interpreter produces an answer (either a value or the error +result), then the compiler is obligated to produce the same answer. + +When you submit, choose a name to display on the leaderboard. It does +not need to be your real name, but please keep it appropriate for this +setting. + +After submitting, click "Leaderboard" to see the latest standings. + +There are 59 compilers included. Your score will be 15 + 2.5 times +the number of compilers you are able to eliminate, with a maximum +score of 100. + +We reserve the right to update the reference interpreter and will +announce any changes on Discord. + +The following updates have been made since the release: + +@itemlist[ + +@item{The interpreter checks for integer overflow and crashes when +this happens, thereby making overflow behavior unspecified for the compilers.} + +@item{The interpreter crashes when interpreting unbound variables, + making unbound variable behavior unspecified.} + +] + +Submissions should be written using the following format: + +@codeblock|{ +#lang info +(define programs + (list + '[ (add1 1) ] + '[ (write-byte 97) ] + '[ (define (f x) (+ x x)) (f 5) ])) +}| + +If you'd like to include a program reads data from the standard input +port, you can add an enties which are two-element lists, where the first +element is a string that is used as the contents of the input port +and the second element is the program, for example: + +@codeblock|{ +#lang info +(define programs + (list + '[ (add1 1) ] + '[ (write-byte 97) ] + '[ "abc" [ (read-byte) ]] + '[ (define (f x) (+ x x)) (f 5) ])) +}| + + +You may add as many programs as you'd like to the file. + @section[#:tag-prefix "a6-" #:style 'unnumbered]{Submitting} -Submit just the @tt{compile.rkt} file on Gradescope. \ No newline at end of file +You should submit on Gradescope. You should a single file named +@tt{info.rkt} that conforms to the format shown above. diff --git a/www/css/extra.css b/www/css/extra.css index f8ed5287..8d772ee9 100644 --- a/www/css/extra.css +++ b/www/css/extra.css @@ -23,6 +23,20 @@ visibility:hidden; float: right; } +.stale-warning { + color: red; + background-color: #fbc9c7; + border: 1px solid red; + padding: 10px; +} + +.warning { + color: red; + background-color: #fbc9c7; + border: 1px solid red; + padding: 10px; +} + .tocset td a.tocviewselflink { color: black; text-decoration: underline; @@ -47,7 +61,7 @@ visibility:hidden; background-color: hsl(293, 33%, 90%); } -a, .toclink, .toptoclink, .tocviewlink, .tocviewselflink, .tocviewtoggle, .plainlink, +a, .toclink, .toptoclink, .tocviewlink, .tocviewselflink, .tocviewtoggle, .plainlink, .techinside, .techoutside:hover, .techinside:hover { color: hsl(296, 100%, 22%); } diff --git a/www/defns.rkt b/www/defns.rkt index 2af549e6..29f8eb7a 100644 --- a/www/defns.rkt +++ b/www/defns.rkt @@ -2,32 +2,61 @@ (provide (all-defined-out)) (require scribble/core scribble/html-properties scribble/manual) -(define prof (link "https://www.cs.umd.edu/~dvanhorn/" "David Van Horn")) -(define prof-email "dvanhorn@cs.umd.edu") - -(define semester "fall") -(define year "2021") +;(define prof1 (link "https://jmct.cc" "José Manuel Calderón Trilla")) +;(define prof1-pronouns "he/him") +;(define prof1-email "jmct@cs.umd.edu") +;(define prof1-initials "JMCT") + +(define prof1 (link "https://www.cs.umd.edu/~dvanhorn/" "David Van Horn")) +(define prof1-pronouns "he/him") +(define prof1-email "dvanhorn@cs.umd.edu") +(define prof1-initials "DVH") + +(define semester "spring") +(define year "2024") (define courseno "CMSC 430") +(define lecture-dates "" #;"May 30 -- July 7, 2023") + (define IRB "IRB") (define AVW "AVW") +(define KEY "KEY") -(define m1-date "Friday, October 15th") -(define m2-date "Friday, November 12th") -(define final-date "Saturday, December 18th") -(define elms-url "TBD") - -(define racket-version "8.1") +(define m1-date "TBD") +(define m2-date "TBD") +(define midterm-hours "24") +(define final-date "TBD") +(define elms-url "https://umd.instructure.com/courses/1359023") -(define staff - (list (list (link "http://jmct.cc/" "José Manuel Calderón Trilla") "jmct@umd.edu" "-") - (list "William Chung" "wchung1@terpmail.umd.edu" "Th 3:30-5:30 Online") - (list "Justin Frank" "jpfrank@umd.edu" "W 12:00-2:00 AVW 4160") - (list "Vyas Gupta" "vgupta13@terpmail.umd.edu" "F 1:30-3:30 AVW 4160"))) - -(define lecture-schedule "Tuesday & Thursday, 2:00pm - 3:15pm, CSI 2117") -(define discord "https://discord.gg/tyumZUEFSk") +(define racket-version "8.11") -(define gradescope "https://www.gradescope.com/courses/303043") +(define staff + (list (list "William Wegand" "wwegand@terpmail.umd.edu") + (list "Pierce Darragh" "pdarragh@umd.edu") + (list "Henry Blanchette" "blancheh@umd.edu") + (list "Deena Postol" "dpostol@umd.edu") + (list "Kazi Tasnim Zinat" "kzintas@umd.edu") + #;(list "Fuxiao Liu" "fl3es@umd.edu") + #;(list "Vivian Chen" "vchen8@terpmail.umd.edu") + #;(list "Ian Morrill" "imorrill@terpmail.umd.edu") + #;(list "Matthew Schneider" "mgl@umd.edu") + #;(list "Rhea Jajodia" "rjajodia@terpmail.umd.edu") + #;(list "Syed Zaidi" "szaidi@umd.edu") + #;(list "William Wegand" "wfweg@verizon.net") + #;(list "Wilson Smith" "smith@umd.edu") + #;(list "Yuhwan Lee" "ylee9251@terpmail.umd.edu") + )) + + +;(define lecture-schedule1 "MW, 2:00-3:15pm") +(define lecture-schedule1 "MW, 3:30-4:45pm") + +(define classroom1 "HJP 0226") + +;(define discord "TBD") +(define piazza "https://piazza.com/class/lrs6masma6h2o1/") +(define gradescope "https://www.gradescope.com/") ; FIXME + +(define feedback "https://docs.google.com/forms/d/e/1FAIpQLSc80xQELhHb_Ef-tn0DkpH2b6pYadQiT3aYSEJFNqEqBjzdGg/viewform?usp=sf_link") \ No newline at end of file diff --git a/www/js/accessibility.js b/www/js/accessibility.js index b6b5d9da..079e6897 100644 --- a/www/js/accessibility.js +++ b/www/js/accessibility.js @@ -1,5 +1,8 @@ // Adds the web accessibility link to the bottom of every page +// change to true to mark pages as stale +var is_stale = false; + function AddAccessibilityToMainDiv() { var main_div = document.getElementsByClassName("main")[0]; @@ -7,5 +10,13 @@ function AddAccessibilityToMainDiv() h.setAttribute("class", "accessibility-link"); h.innerHTML = "Web Accessibility"; main_div.insertBefore(h, main_div.lastChild); + + if (is_stale) { + var g = document.createElement('div'); + g.setAttribute("class", "stale-warning"); + g.innerHTML = "This is NOT the current webpage! See " + + "Class web pages for current listing."; + main_div.insertBefore(g, main_div.firstChild); + }; } AddOnLoad(AddAccessibilityToMainDiv); diff --git a/www/main.scrbl b/www/main.scrbl index cb8df9d7..50eb11f0 100644 --- a/www/main.scrbl +++ b/www/main.scrbl @@ -21,9 +21,10 @@ @emph{@string-titlecase[semester], @year} -@emph{Lectures: @lecture-schedule} +@emph{Lecture}: @lecture-schedule1, @classroom1 + +@emph{Professor}: @prof1 -@emph{Professor: @prof} CMSC 430 is an introduction to compilers. Its major goal is to arm students with the ability to design, implement, and extend a @@ -31,14 +32,39 @@ programming language. Throughout the course, students will design and implement several related languages. -@tabular[#:style 'boxed +@tabular[#:style 'boxed #:row-properties '(bottom-border ()) - (list* (list @bold{Staff} 'cont 'cont) - (list @bold{Name} @elem{@bold{E-mail}} @elem{@bold{Hours}}) - (list prof prof-email "By appt.") + (list* (list @bold{Staff} 'cont) + (list @bold{Name} @elem{@bold{E-mail}}) + (list prof1 prof1-email) + #;(list prof2 prof2-email) staff)] -@bold{Communications:} Email, @link[@discord]{Discord} +@bold{Office hours:} AVW 4140 + +Schedule, TBD. + +@;{ +@tabular[#:style 'boxed + #:row-properties '(bottom-border ()) + (list (list @bold{Time} @bold{Monday} @bold{Tuesday} @bold{Wednesday} @bold{Thursday} @bold{Friday}) + (list "9 AM" "William" 'cont "William" 'cont 'cont) + (list "10 AM" "William" 'cont "Ian, William" 'cont 'cont) + (list "11 AM" 'cont "Pierce" "Ian" 'cont 'cont) + (list "12 PM" "Dalton, Fuxiao" "Pierce" "Dalton" 'cont 'cont) + (list "1 PM" "Dalton, Fuxiao, Ian" 'cont "Matthew, Wilson" "Fuxiao" "Wilson") + (list "" "Wilson" 'cont 'cont 'cont 'cont) + (list "2 PM" "Ian, Wilson" "Pierce" "Matthew" "Fuxiao" 'cont) + (list "" 'cont 'cont 'cont "Pierce" 'cont) + (list "3 PM" "Matthew, Yuhwan" "Pierce" "Matthew, Yuhwan" "Pierce" "Vivian") + (list "4 PM" "Yuhwan" 'cont "Yuhwan, Dalton" 'cont 'cont) + (list "5 PM" 'cont "Vivian" 'cont "Vivian" 'cont) + (list "6 PM" 'cont 'cont 'cont "Vivian" 'cont))] +} + + + +@bold{Communications:} @link[@elms-url]{ELMS}, @link[@piazza]{Piazza} @bold{Assumptions:} This course assumes you know the material in CMSC 330 and CMSC 216. In particular, you need to know how to program in a functional @@ -50,6 +76,9 @@ this material. change. Any substantive change will be accompanied with an announcement to the class via ELMS. +@bold{Feedback:} We welcome anonymous feedback on the course and its +staff using this @link[feedback]{form}. + @include-section{syllabus.scrbl} @include-section{texts.scrbl} @include-section{schedule.scrbl} diff --git a/www/midterms.scrbl b/www/midterms.scrbl index b5a44f12..e6d8d33a 100644 --- a/www/midterms.scrbl +++ b/www/midterms.scrbl @@ -3,7 +3,7 @@ @title[#:style '(toc unnumbered)]{Midterms} There will be two midterm examinations, which will be @bold{take-home} -exams. Exams will be distributed at least 72 hours before the due +exams. Exams will be distributed at least 48 hours before the due date of the midterm. @itemlist[ diff --git a/www/midterms/1.scrbl b/www/midterms/1.scrbl index d9e45d96..e72491d3 100644 --- a/www/midterms/1.scrbl +++ b/www/midterms/1.scrbl @@ -5,19 +5,20 @@ @title{Midterm 1} -@bold{Due: @m1-date 3:15PM} +@bold{Due: @m1-date 11:59PM} -@(define repo "https://github.com/cmsc430/Midterm1-prog") - -Midterm 1 will be released at least 72 hours prior to its due date. +Midterm 1 will be released at least @midterm-hours hours prior to +its due date. +@;{ @section{Instructions} The midterm will be released as a zip file @tt{m1.zip} on ELMS. -There are SIX parts to this midterm. Each part has its own directory -with a README and supplementary files. Read the README in each part -for instructions on how to complete that part of the midterm. +There are several parts to this midterm. Each part has its own +directory with a README and supplementary files. Read the README in +each part for instructions on how to complete that part of the +midterm. @section{Communications} @@ -25,10 +26,10 @@ If you have questions about the exam, send a DM to ModMail on Discord. This will go to the entire course staff. Answers to common clarifying questions will be posted to the -@tt{#midterm-1} channel on Discord. +@tt{#midterm1} channel on Discord. If you have trouble reaching the course staff via Discord, email -@tt|{dvanhorn@cs.umd.edu}|. +@tt{@prof-email}. You may not communicate with anyone outside of the course staff about the midterm. @@ -36,6 +37,14 @@ the midterm. @section{Submissions} You should submit your work as a single zip file of this directory on -Gradescope. Unlike past assignments, Gradescope will not provide -feedback on the correctness of your solutions so you are encouraged to -check your own work. +Gradescope. Unlike past assignments, Gradescope will only do a basic +test for well-formedness of your submission. It will make sure the +directory layout is correct and that all the functions that will be +tested are available. It will catch syntax errors in your code, but +it does not run any correctness tests. + +If you fail these tests, we will not be able to grade your submission. +Passing these tests only means your submission is well-formed. Your +actual grade will be computed after the deadline. + +You are encouraged to check your own work.} \ No newline at end of file diff --git a/www/midterms/2.scrbl b/www/midterms/2.scrbl index 82fb75ef..6d3f5788 100644 --- a/www/midterms/2.scrbl +++ b/www/midterms/2.scrbl @@ -9,289 +9,36 @@ @bold{Due: @m2-date 11:59PM} -@(define repo "https://github.com/cmsc430/Midterm2-prog") - -Midterm 2 will be released at least 72 hours prior to its due date. +Midterm 2 will be released at least @midterm-hours hours prior to +its due date. @;{ +@section{Instructions} -Midterm repository: -@centered{@link[repo repo]} - -The exam consists of two parts: a written portion and a programmatic -portion. Both will be handled through gradescope. You will see two -gradescope assignments marked accordingly. - -During the exam period, you may only ask private questions to -the staff (via email, discord, etc.) if you need clarification. -You may not communicate or collaborate with any one else about the -content of this exam. - -Questions that are deemed applicable to the entire class will be shared, along -with their responses, with the rest of the class. - -The repository contains two things. -@itemlist[ -@item{A folder @tt{BoxIncr} which contains the base code to build upon for Question 4.} -@item{A folder @tt{CallByName} which contains the base code to build upon for Question 5.} -] - -Your submission must be submitted by 11:59 EDT on Tuesday, April -13th. For the programmatic fragment, you should submit a zip file -containing two files: the @tt{BoxIncr/compile.rkt} for -Question 4, and @tt{CallByName/interp.rkt} for Question 5. - -@section[#:tag-prefix "m2"]{Short answer} - -@bold{Question 1} - -[10 points] - -@itemlist[ - -@item{On the random generation lecture on Tuesday (a recording can be -found on ELMS!), we wrote random generators for testing the @tt{Con} -and @tt{Dupe} languages. When going from @tt{Con} to @tt{Dupe}, -we modified the generators to take an additional argument representing -the type of the expression we want to generate. Why?} - -@item{Why did we need to introduce the @tt{Lea} assembly -instruction when implementing @tt{Knock}?} -] - -@bold{Question 2} - -[10 points] - -Is it possible for @tt{fib1} to run out of memory? Justify your -answer. - -@#reader scribble/comment-reader -(racketblock -(define (fib1 i) - (match i - [0 1] - [1 1] - [_ (+ (fib1 (- i 1)) (fib1 (- i 2)))])) -) - - -How about @tt{fib2}? Again, justify your answer. - -@#reader scribble/comment-reader -(racketblock -(define (fib2-aux i fib-1 fib-2) - (if (zero? i) fib-1 - (fib2-aux (sub1 i) (+ fib-1 fib-2) fib-1))) - -(define (fib2 i) - (match i - [0 1] - [1 1] - [_ (fib2-aux (sub1 i) 1 1)])) -) - -Hint: consider Jig. - -@bold{Question 3} - -[10 points] - -For each of the following expressions, which subexpressions are in tail -position? Assume that the top-level expression is in tail position. - -@itemlist[ - -@item{@verbatim{(sub1 e0)}} - -@item{@verbatim{(begin e0 e1)}} - -@item{@verbatim{(let ((x a)) e)}} - -@item{@verbatim{(if b (box e1) e2)}} - -@item{@verbatim{(match e [p1 e1] [_ e2])}} - -] - -@section[#:tag-prefix "m2"]{Code generation} - -@bold{Question 4} - -[25 points] - -In the repo (@link[repo repo]), you will find a directory named -"BoxIncr". That contains the @tt{Hustle} language from the lectures, -partially extended with two additional primitives: @racket[incr-box!] -and an @racket[decr-box!]. - -An @racket[(incr-box! _e)] expression evaluates @racket[_e]. The -result of @racket[_e] should be a boxed integer (otherwise an error is -signalled). The box is updated (mutated) to increment its value by 1. -Similarly, @racket[(decr-box! _e)] should decrement the boxed integer -by 1. The result of the operation should be @racket[void]. - -Here's an example that returns 42 (note: @racket[let] is used for -sequencing here): - -@#reader scribble/comment-reader -(racketblock -(let ((b (box 41))) - (let ((v (incr-box! b))) - (unbox b))) -) - -The ast, parser, and interpreter have already been updated for you to -implement this functionality. Your job is to implement the compiler. - -@section[#:tag-prefix "m2"]{Call by Name} - -@bold{Question 5} - -[45 points] - -In the @link[repo repo], you will find a stripped down version of the -@secref["Iniquity"] language: just the interpreter. Iniquity -introduces the notion of function definitions and function calls. The -way we evaluate function calls (as in racket) is known as -"call-by-value": the arguments to a function call are evaluated -before the body of the function is evaluated. - -An alternative evaluation strategy is "call-by-name". In call-by-name, -the arguments to a function call are substituted in the function body, -left to be evaluated as they appear: if an argument is unused, it will -never be evaluated; if an argument is used multiple times, it will be -evaluated multiple times. - -Consider the following example: - -@#reader scribble/comment-reader -(racketblock -(begin - (define (f x) 42) - (f (read-byte))) -) - -In standard call-by-value, this will read a byte from standard input, -and proceed to call @tt{f} with that read value, ignore it, and return -42. In call-by-name, this program will not read any value from the -standard input - the @tt{read-byte} will never be evaluated, as the body -of @tt{f} does not make use of @tt{x}. - -On the other hand, consider the following program: - -@#reader scribble/comment-reader -(racketblock -(begin - (define (f x) (+ x x)) - (f (read-byte))) -) - -Again, in standard call-by-value, this will read a byte from standard -input, and proceed to call @tt{f} with that read value, doubling it. -In call-by-name, this program will result to two different calls to -@tt{read-byte}, whose results will then be added together: when -calling @tt{(f (read-byte))}, the argument will be substituted into -the body of @tt{f}, yielding @tt{(+ (read-byte) (read-byte))}. - -An interesting point is how call-by-value interacts with -let-bindings. For this assignment, we will keep let-bindings strict, -just like in the current interpreter, that is the argument to the let -will be evaluated before the body is executed. - -To fully understand the interactions between let and function calls, consider the following examples: - -@#reader scribble/comment-reader -(racketblock -(begin - (define (f x y) (+ (+ x x) (+ y y))) - (let ((z (read-byte))) (f z (read-byte)))) -) - -This program has the effect of first evaluating @tt{read-byte} (for -example, say 42), and then substituting its value for @tt{z} in the -body of the let. Then it will evaluate the call @tt{(f z -(read-byte))}. This will have the effect of substituting the value of -@tt{z} (which has already been evaluated), and the expression -@tt{(read-byte)} for @tt{x} and @tt{y} in the body of @tt{f}. In turn, -that means that there will be two new calls to @tt{read-byte}, for -each occurence of @tt{y} in the body of f. - -@#reader scribble/comment-reader -(racketblock -(begin - (define (f x) (+ x y)) - (let ((y 42)) (f y))) -) - -This program should yield an error - we're still using static scoping! - -@#reader scribble/comment-reader -(racketblock -(begin - (define (f x) (let ((y 42)) x)) - (let ((y 17)) (f y))) -) - -This program should yield 17, in both evaluation methods. - -@#reader scribble/comment-reader -(racketblock -(begin - (define (f x) (let ((x 42)) x)) - (f 17)) -) - -This example should yield 42, the let in the body of f shadows the -argument. - -@#reader scribble/comment-reader -(racketblock -(begin - (define (loop x) (loop x)) - (define (f x) 42) - (f (loop 0))) -) - -In call-by-name, this program terminates as the argument -@tt{(loop 0)} is never evaluated! - -@#reader scribble/comment-reader -(racketblock -(begin - (define (f x) 42) - (f (+ #t #f))) -) - -In call-by-name, this program also terminates: the error-producing -argument @tt{(+ #t #f)} is again never evaluated! +The midterm will be released as a zip file @tt{m2.zip} on ELMS. -Your task is to modify the interpreter to implement this style of -evaluation. The easiest way to do that is to re-implement -let-bindings and function calls using substitution instead of the -environment to pass arguments. While an implementation using -environments is possible, it can be very tricky to get right; you have -been warned! +There are several parts to this midterm. Each part has its own directory +with a README and supplementary files. Read the README in each part +for instructions on how to complete that part of the midterm. +@section{Communications} -@bold{Question Call-by-need: Extra Credit} +If you have questions about the exam, send a DM to ModMail on Discord. +This will go to the entire course staff. -[20 points] +Answers to common clarifying questions will be posted to the +@tt{#midterm-2} channel on Discord. -An alternative to call-by-name is call-by-need. In call-by-need, the -arguments to a function are not evaluated at call time (just like in -call-by-name), but the evaluation of the argument is memoized - it's -going to only be evaluated the first time it's needed and future -occurences will reuse that evaluation's result. +If you have trouble reaching the course staff via Discord, email +@tt|{dvanhorn@cs.umd.edu}|. -You're on your own on this one and there will be no autograder, so -reach out to set out a time to talk about your solution if you try to -tackle this! +You may not communicate with anyone outside of the course staff about +the midterm. -@subsection{Submission and Grading} +@section{Submissions} -We will only use two files for grading: @tt{BoxIncr/compile.rkt} and -@tt{CallByName/interp.rkt}. You should be able to submit a zip from -inside the cloned repo to Gradescope, but we will only be using these -two files for grading, so restrict your work in those. +You should submit your work as a single zip file of this directory on +Gradescope. Unlike past assignments, Gradescope will not provide +feedback on the correctness of your solutions so you are encouraged to +check your own work. } \ No newline at end of file diff --git a/www/notes.scrbl b/www/notes.scrbl index 7df98bac..63d97142 100644 --- a/www/notes.scrbl +++ b/www/notes.scrbl @@ -30,4 +30,8 @@ suggestions for improving the material, @bold{please}, @include-section{notes/jig.scrbl} @include-section{notes/knock.scrbl} @include-section{notes/loot.scrbl} -@include-section{notes/shakedown.scrbl} +@include-section{notes/mug.scrbl} +@include-section{notes/mountebank.scrbl} +@include-section{notes/neerdowell.scrbl} +@include-section{notes/outlaw.scrbl} +@;include-section{notes/shakedown.scrbl} diff --git a/www/notes/a86.scrbl b/www/notes/a86.scrbl index 26c577fb..1e4ee76f 100644 --- a/www/notes/a86.scrbl +++ b/www/notes/a86.scrbl @@ -2,9 +2,9 @@ @(require (for-label (except-in racket compile) a86)) - + @(require scribble/examples - redex/reduction-semantics + redex/reduction-semantics redex/pict (only-in pict scale) (only-in racket system) @@ -67,7 +67,7 @@ int gcd(int n1, int n2) { } HERE ) - + (parameterize ([current-directory (build-path notes "a86")]) (save-file "tri.s" (asm-string (tri 36))) (save-file "main.c" main.c) @@ -161,7 +161,7 @@ even at a low-level. Without getting too bogged down in the details, here how the code works. Instructions execute one after another. There are a number of registers which can be used to hold values. -This code makes use of the @tt{rax} and @tt{rdi} register +This code makes use of the @tt{rax} and @tt{rbx} register (and some other registers are implicitly used and altered by the @tt{call}, @tt{push}, @tt{pop} and @tt{ret} instructions). The lines like @tt{entry:}, @tt{tri:}, and @@ -178,7 +178,7 @@ Suppose we start executing at @tt{entry}. rbx} to zero. Executing this instruction sets a flag in the CPU, which affects subsequent ``conditional'' instructions. In this program, the next instruction is a conditional jump.} - + @item{@tt{je done} either jumps to the instruction following label @tt{done} or proceeds to the next instruction, based on the state of the comparison flag. The @@ -192,7 +192,7 @@ Suppose we start executing at @tt{entry}. (register @tt{rsp}).} @item{@tt{sub rbx, 1} decrements @tt{rbx} by 1.} - + @item{@tt{call tri} performs something like a function call; it uses memory as a stack to save the current location in the code (which is where control should return to after @@ -222,7 +222,7 @@ Suppose we start executing at @tt{entry}. ``output'') is 0.} @item{@tt{ret} does a ``return,'' either to a prior call to - @tt{tri} or the caller of @tt{entry}.} + @tt{tri} or the caller of @tt{entry}.} ] Despite the lower-level mechanisms, this code computes in a @@ -401,22 +401,20 @@ number. Easy-peasy: It's also easy to go from our data representation to its interpretation as an x86 program. -First, we convert the data to a string. There is a function -provided for converting from a86 to a string representation -of x86 code in nasm notation, called @racket[asm-string]. -You can use @racket[display] to print this to the current -output port (or to a file): +There is a function provided for printing an a86 program as an x86 +program using nasm notation, called @racket[asm-display]. Calling +this function prints to the current output port, but it's also +possible to write the output to a file or convert it to a string. -@margin-note{The @racket[asm-string] function knows what OS you are +@margin-note{The @racket[asm-display] function knows what OS you are using and adjusts the label naming convention to use underscores or not, so that you don't have to worry about it.} @ex[ -(display (asm-string (tri 36))) +(asm-display (tri 36)) ] -Notice how this generates exactly what you saw in @tt{ - tri.s}. +Notice how this generates exactly what you saw in @tt{tri.s}. From here, we can assemble, link, and execute. @@ -537,7 +535,7 @@ save the result of @racket['f]: (ex (eg (seq (Call 'f) (Mov 'rbx 'rax) - (Call 'g) + (Call 'g) (Add 'rax 'rbx))) ) @@ -636,13 +634,13 @@ address to jump to, we could've also written it as: (ex (eg (seq (Sub 'rsp 8) ; allocate a frame on the stack ; load address of 'fret label into top of stack - (Lea (Offset 'rsp 0) 'fret) + (Lea (Offset 'rsp 0) 'fret) (Jmp 'f) ; jump to 'f (Label 'fret) ; <-- return point for "call" to 'f (Push 'rax) ; save result (like before) (Sub 'rsp 8) ; allocate a frame on the stack ; load address of 'gret label into top of stack - (Lea (Offset 'rsp 0) 'gret) + (Lea (Offset 'rsp 0) 'gret) (Jmp 'g) ; jump to 'g (Label 'gret) ; <-- return point for "call" to 'g (Pop 'rbx) ; pop saved result from calling 'f @@ -680,15 +678,21 @@ and @racketmodname[a86/interp], described below This section describes the instruction set of a86. -There are 16 registers: @racket['rax], @racket['rbx], -@racket['rcx], @racket['rdx], @racket['rbp], @racket['rsp], -@racket['rsi], @racket['rdi], @racket['r8], @racket['r9], -@racket['r10], @racket['r11], @racket['r12], @racket['r13], -@racket['r14], and @racket['r15]. These registers are -64-bits wide. There is no analog to the x86 register -suffixes for accessing low-order bits. Each register plays -the same role as in x86, so for example @racket['rsp] holds -the current location of the stack. +There are 16 registers: @racket['rax], @racket['rbx], @racket['rcx], +@racket['rdx], @racket['rbp], @racket['rsp], @racket['rsi], +@racket['rdi], @racket['r8], @racket['r9], @racket['r10], +@racket['r11], @racket['r12], @racket['r13], @racket['r14], and +@racket['r15]. These registers are 64-bits wide. There is also +@racket['eax] which accesses the lower 32-bits of @racket['rax]. +This is useful in case you need to read or write 32-bits of memory. + +The registers @racket['rbx], @racket['rsp], @racket['rbp], and +@racket['r12] through @racket['r15] are ``callee-saved'' registers, +meaning they are preserved across function calls (and must be saved +and restored by any callee code). + +Each register plays the same role as in x86, so for example +@racket['rsp] holds the current location of the stack. @defproc[(register? [x any/c]) boolean?]{ A predicate for registers. @@ -696,6 +700,21 @@ the current location of the stack. @defproc[(label? [x any/c]) boolean?]{ A predicate for label @emph{names}, i.e. symbols which are not register names. + + Labels must also follow the NASM restrictions on label names: "Valid + characters in labels are letters, numbers, @tt{_}, @tt{$}, @tt{#}, @tt{@"@"}, @tt{~}, @tt{.}, and + @tt{?}. The only characters which may be used as the first character of an + identifier are letters, @tt{.} (with special meaning), @tt{_} + and @tt{?}." + + @ex[ + (label? 'foo) + (label? "foo") + (label? 'rax) + (label? 'foo-bar) + (label? 'foo.bar) + ] + } @defproc[(instruction? [x any/c]) boolean?]{ @@ -706,6 +725,28 @@ the current location of the stack. A predicate for offsets. } +@defproc[(64-bit-integer? [x any/c]) boolean?]{ + A predicate for determining if a value is an integer that fits in 64-bits. + + @ex[ + (64-bit-integer? 0) + (64-bit-integer? (sub1 (expt 2 64))) + (64-bit-integer? (expt 2 64)) + (64-bit-integer? (- (expt 2 63))) + (64-bit-integer? (sub1 (- (expt 2 63))))] +} + +@defproc[(32-bit-integer? [x any/c]) boolean?]{ + A predicate for determining if a value is an integer that fits in 64-bits. + + @ex[ + (32-bit-integer? 0) + (32-bit-integer? (sub1 (expt 2 32))) + (32-bit-integer? (expt 2 32)) + (32-bit-integer? (- (expt 2 32))) + (32-bit-integer? (sub1 (- (expt 2 32))))] +} + @defproc[(seq [x (or/c instruction? (listof instruction?))] ...) (listof instruction?)]{ A convenience function for splicing togeter instructions and lists of instructions. @@ -728,7 +769,7 @@ the current location of the stack. @itemlist[ - @item{Programs have at least one label; the first label is used as the entry point.} + @item{Programs have at least one label which is declared @racket[Global]; the first label is used as the entry point.} @item{All label declarations are unique.} @item{All label targets are declared.} @item{... other properties may be added in the future.} @@ -741,18 +782,31 @@ the current location of the stack. outermost level of a function that produces a86 code and not nested. - @ex[ - (prog (Label 'foo)) - (prog (list (Label 'foo))) + @ex[ + (prog (Global 'foo) (Label 'foo)) + (eval:error (prog (Label 'foo))) + (eval:error (prog (list (Label 'foo)))) (eval:error (prog (Mov 'rax 32))) (eval:error (prog (Label 'foo) (Label 'foo))) (eval:error (prog (Jmp 'foo))) - (prog (Label 'foo) + (prog (Global 'foo) + (Label 'foo) (Jmp 'foo)) ] } +@defproc[(symbol->label [s symbol?]) label?]{ + + Returns a modified form of a symbol that follows NASM label conventions. + + @ex[ + (let ([l (symbol->label 'my-great-label)]) + (seq (Label l) + (Jmp l))) + ] +} + @deftogether[(@defstruct*[% ([s string?])] @defstruct*[%% ([s string?])] @defstruct*[%%% ([s string?])])]{ @@ -765,9 +819,9 @@ the current location of the stack. @#reader scribble/comment-reader (ex - (display - (asm-string - (prog (%%% "Start of foo") + (asm-display + (prog (Global 'foo) + (%%% "Start of foo") (Label 'foo) ; Racket comments won't appear (%% "Inputs one argument in rdi") @@ -775,9 +829,9 @@ the current location of the stack. (Add 'rax 'rax) (% "double it") (Sub 'rax 1) (% "subtract one") (%% "we're done!") - (Ret))))) + (Ret)))) } - + @defstruct*[Offset ([r register?] [i exact-integer?])]{ Creates an memory offset from a register. Offsets are used @@ -790,17 +844,31 @@ the current location of the stack. ] } +@defstruct*[Text ()]{ + + Declares the start of a text section, which includes instructions to + be executed. + +} + +@defstruct*[Data ()]{ + + Declares the start of a data section, which includes data and constants. + +} @defstruct*[Label ([x label?])]{ Creates a label from the given symbol. Each label in a program must be unique. Register names cannot be used - as label names. + as label names and names must follow the NASM restrictions + on valid label names (see @racket[label?] for details). @ex[ (Label 'fred) (eval:error (Label "fred")) (eval:error (Label 'rax)) + (eval:error (Label 'fred-wilma)) ] } @@ -808,9 +876,16 @@ the current location of the stack. @defstruct*[Extern ([x label?])]{ Declares an external label. - + +} + +@defstruct*[Global ([x label?])]{ + + Declares a label as global, i.e. linkable with other object files. + } + @defstruct*[Call ([x (or/c label? register?)])]{ A call instruction. @@ -844,8 +919,8 @@ the current location of the stack. } -@defstruct*[Mov ([dst (or/c register? offset?)] [src (or/c register? offset? exact-integer?)])]{ - +@defstruct*[Mov ([dst (or/c register? offset?)] [src (or/c register? offset? 64-bit-integer?)])]{ + A move instruction. Moves @racket[src] to @racket[dst]. Either @racket[dst] or @racket[src] may be offsets, but not both. @@ -854,7 +929,7 @@ the current location of the stack. (asm-interp (prog (Global 'entry) - (Label 'entry) + (Label 'entry) (Mov 'rbx 42) (Mov 'rax 'rbx) (Ret))) @@ -863,23 +938,23 @@ the current location of the stack. } -@defstruct*[Add ([dst register?] [src (or/c register? offset? exact-integer?)])]{ +@defstruct*[Add ([dst register?] [src (or/c register? offset? 32-bit-integer?)])]{ An addition instruction. Adds @racket[src] to @racket[dst] and writes the result to @racket[dst]. - + @ex[ (asm-interp (prog (Global 'entry) - (Label 'entry) + (Label 'entry) (Mov 'rax 32) (Add 'rax 10) (Ret))) ] } -@defstruct*[Sub ([dst register?] [src (or/c register? offset? exact-integer?)])]{ +@defstruct*[Sub ([dst register?] [src (or/c register? offset? 32-bit-integer?)])]{ A subtraction instruction. Subtracts @racket[src] frrom @racket[dst] and writes the result to @racket[dst]. @@ -888,14 +963,14 @@ the current location of the stack. (asm-interp (prog (Global 'entry) - (Label 'entry) + (Label 'entry) (Mov 'rax 32) (Sub 'rax 10) (Ret))) ] } -@defstruct*[Cmp ([a1 (or/c register? offset?)] [a2 (or/c register? offset? exact-integer?)])]{ +@defstruct*[Cmp ([a1 (or/c register? offset?)] [a2 (or/c register? offset? 32-bit-integer?)])]{ Compare @racket[a1] to @racket[a2]. Doing a comparison sets the status flags that affect the conditional instructions like @racket[Je], @racket[Jl], etc. @@ -908,14 +983,14 @@ the current location of the stack. (Cmp 'rax 2) (Jg 'l1) (Mov 'rax 0) - (Label 'l1) + (Label 'l1) (Ret))) - ] + ] } @defstruct*[Jmp ([x (or/c label? register?)])]{ Jump to label @racket[x]. - + @ex[ (asm-interp (prog @@ -932,15 +1007,15 @@ the current location of the stack. (Global 'entry) (Label 'entry) (Mov 'rax 42) - (Pop 'rbx) + (Pop 'rbx) (Jmp 'rbx))) ] - + } @defstruct*[Je ([x (or/c label? register?)])]{ Jump to label @racket[x] if the conditional flag is set to ``equal.'' - + @ex[ (asm-interp (prog @@ -950,14 +1025,14 @@ the current location of the stack. (Cmp 'rax 2) (Je 'l1) (Mov 'rax 0) - (Label 'l1) + (Label 'l1) (Ret))) ] } @defstruct*[Jne ([x (or/c label? register?)])]{ Jump to label @racket[x] if the conditional flag is set to ``not equal.'' - + @ex[ (asm-interp (prog @@ -967,14 +1042,14 @@ the current location of the stack. (Cmp 'rax 2) (Jne 'l1) (Mov 'rax 0) - (Label 'l1) + (Label 'l1) (Ret))) ] } @defstruct*[Jl ([x (or/c label? register?)])]{ Jump to label @racket[x] if the conditional flag is set to ``less than.'' - + @ex[ (asm-interp (prog @@ -984,14 +1059,31 @@ the current location of the stack. (Cmp 'rax 2) (Jl 'l1) (Mov 'rax 0) - (Label 'l1) + (Label 'l1) + (Ret))) + ] +} + +@defstruct*[Jle ([x (or/c label? register?)])]{ + Jump to label @racket[x] if the conditional flag is set to ``less than or equal.'' + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 42) + (Cmp 'rax 42) + (Jle 'l1) + (Mov 'rax 0) + (Label 'l1) (Ret))) ] } @defstruct*[Jg ([x (or/c label? register?)])]{ Jump to label @racket[x] if the conditional flag is set to ``greater than.'' - + @ex[ (asm-interp (prog @@ -1001,12 +1093,359 @@ the current location of the stack. (Cmp 'rax 2) (Jg 'l1) (Mov 'rax 0) - (Label 'l1) + (Label 'l1) + (Ret))) + ] +} + +@defstruct*[Jge ([x (or/c label? register?)])]{ + Jump to label @racket[x] if the conditional flag is set to ``greater than or equal.'' + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 42) + (Cmp 'rax 42) + (Jg 'l1) + (Mov 'rax 0) + (Label 'l1) + (Ret))) + ] +} + +@defstruct*[Jo ([x (or/c label? register?)])]{ + Jump to @racket[x] if the overflow flag is set. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax (sub1 (expt 2 63))) + (Add 'rax 1) + (Jo 'l1) + (Mov 'rax 0) + (Label 'l1) + (Ret))) + ] +} + +@defstruct*[Jno ([x (or/c label? register?)])]{ + Jump to @racket[x] if the overflow flag is @emph{not} set. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax (sub1 (expt 2 63))) + (Add 'rax 1) + (Jno 'l1) + (Mov 'rax 0) + (Label 'l1) + (Ret))) + ] +} + +@defstruct*[Jc ([x (or/c label? register?)])]{ + Jump to @racket[x] if the carry flag is set. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax -1) + (Add 'rax 1) + (Jc 'l1) + (Mov 'rax 0) + (Label 'l1) + (Ret))) + ] +} + +@defstruct*[Jnc ([x (or/c label? register?)])]{ + Jump to @racket[x] if the carry flag is @emph{not} set. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax -1) + (Add 'rax 1) + (Jnc 'l1) + (Mov 'rax 0) + (Label 'l1) + (Ret))) + ] +} + +@defstruct*[Cmove ([dst register?] [src (or/c register? offset?)])]{ + Move from @racket[src] to @racket[dst] if the comparison flag is set to equal. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 0) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmove 'rax 'r9) + (Ret))) + + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 2) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmove 'rax 'r9) + (Ret))) + ] +} + +@defstruct*[Cmovne ([dst register?] [src (or/c register? offset?)])]{ + Move from @racket[src] to @racket[dst] if the comparison flag is set to not equal. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 0) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmovne 'rax 'r9) + (Ret))) + + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 2) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmovne 'rax 'r9) + (Ret))) + ] +} + +@defstruct*[Cmovl ([dst register?] [src (or/c register? offset?)])]{ + Move from @racket[src] to @racket[dst] if the comparison flag is set to less than. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 0) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmovl 'rax 'r9) + (Ret))) + + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax -1) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmovl 'rax 'r9) (Ret))) ] } -@defstruct*[And ([dst (or/c register? offset?)] [src (or/c register? offset? exact-integer?)])]{ +@defstruct*[Cmovle ([dst register?] [src (or/c register? offset?)])]{ + Move from @racket[src] to @racket[dst] if the comparison flag is set to less than or equal. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 0) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmovle 'rax 'r9) + (Ret))) + + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 2) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmovle 'rax 'r9) + (Ret))) + ] +} + +@defstruct*[Cmovg ([dst register?] [src (or/c register? offset?)])]{ + Move from @racket[src] to @racket[dst] if the comparison flag is set to greather than. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 0) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmovg 'rax 'r9) + (Ret))) + + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 2) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmovg 'rax 'r9) + (Ret))) + ] +} + +@defstruct*[Cmovge ([dst register?] [src (or/c register? offset?)])]{ + Move from @racket[src] to @racket[dst] if the comparison flag is set to greater than or equal. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax -1) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmovge 'rax 'r9) + (Ret))) + + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 2) + (Cmp 'rax 0) + (Mov 'r9 1) + (Cmovge 'rax 'r9) + (Ret))) + ] +} + +@defstruct*[Cmovo ([dst register?] [src (or/c register? offset?)])]{ + Move from @racket[src] to @racket[dst] if the overflow flag is set. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax (- (expt 2 63) 1)) + (Add 'rax 1) + (Mov 'r9 1) + (Cmovo 'rax 'r9) + (Ret))) + + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax (- (expt 2 63) 2)) + (Add 'rax 1) + (Mov 'r9 1) + (Cmovo 'rax 'r9) + (Ret))) + ] +} + +@defstruct*[Cmovno ([dst register?] [src (or/c register? offset?)])]{ + Move from @racket[src] to @racket[dst] if the overflow flag is not set. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax (- (expt 2 63) 1)) + (Add 'rax 1) + (Mov 'r9 1) + (Cmovno 'rax 'r9) + (Ret))) + + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax (- (expt 2 63) 2)) + (Add 'rax 1) + (Mov 'r9 1) + (Cmovno 'rax 'r9) + (Ret))) + ] +} + +@defstruct*[Cmovc ([dst register?] [src (or/c register? offset?)])]{ + Move from @racket[src] to @racket[dst] if the carry flag is set. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax (- (expt 2 64) 1)) + (Add 'rax 1) + (Mov 'r9 1) + (Cmovc 'rax 'r9) + (Ret))) + + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax (- (expt 2 64) 2)) + (Add 'rax 1) + (Mov 'r9 1) + (Cmovc 'rax 'r9) + (Ret))) + ] +} + +@defstruct*[Cmovnc ([dst register?] [src (or/c register? offset?)])]{ + Move from @racket[src] to @racket[dst] if the carry flag is not set. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax (- (expt 2 64) 1)) + (Add 'rax 1) + (Mov 'r9 1) + (Cmovnc 'rax 'r9) + (Ret))) + + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax (- (expt 2 64) 2)) + (Add 'rax 1) + (Mov 'r9 1) + (Cmovnc 'rax 'r9) + (Ret))) + ] +} + + +@defstruct*[And ([dst (or/c register? offset?)] [src (or/c register? offset? 32-bit-integer?)])]{ + Compute logical ``and'' of @racket[dst] and @racket[src] and put result in @racket[dst]. @#reader scribble/comment-reader @@ -1021,7 +1460,7 @@ the current location of the stack. ) } -@defstruct*[Or ([dst (or/c register? offset?)] [src (or/c register? offset? exact-integer?)])]{ +@defstruct*[Or ([dst (or/c register? offset?)] [src (or/c register? offset? 32-bit-integer?)])]{ Compute logical ``or'' of @racket[dst] and @racket[src] and put result in @racket[dst]. @#reader scribble/comment-reader @@ -1036,7 +1475,7 @@ the current location of the stack. ) } -@defstruct*[Xor ([dst (or/c register? offset?)] [src (or/c register? offset? exact-integer?)])]{ +@defstruct*[Xor ([dst (or/c register? offset?)] [src (or/c register? offset? 32-bit-integer?)])]{ Compute logical ``exclusive or'' of @racket[dst] and @racket[src] and put result in @racket[dst]. @#reader scribble/comment-reader @@ -1091,17 +1530,17 @@ the current location of the stack. ) } -@defstruct*[Push ([a1 (or/c exact-integer? register?)])]{ +@defstruct*[Push ([a1 (or/c 32-bit-integer? register?)])]{ Decrements the stack pointer and then stores the source operand on the top of the stack. - + @ex[ (asm-interp (prog (Global 'entry) (Label 'entry) - (Mov 'rax 42) + (Mov 'rax 42) (Push 'rax) (Mov 'rax 0) (Pop 'rax) @@ -1111,13 +1550,13 @@ the current location of the stack. @defstruct*[Pop ([a1 register?])]{ Loads the value from the top of the stack to the destination operand and then increments the stack pointer. - + @ex[ (asm-interp (prog (Global 'entry) (Label 'entry) - (Mov 'rax 42) + (Mov 'rax 42) (Push 'rax) (Mov 'rax 0) (Pop 'rax) @@ -1125,9 +1564,23 @@ the current location of the stack. ] } +@defstruct*[Not ([a1 register?])]{ +Perform bitwise not operation (each 1 is set to 0, and each 0 is set to 1) on the destination operand. + + @ex[ + (asm-interp + (prog + (Global 'entry) + (Label 'entry) + (Mov 'rax 0) + (Not 'rax) + (Ret))) + ] +} + @defstruct*[Lea ([dst (or/c register? offset?)] [x label?])]{ Loads the address of the given label into @racket[dst]. - + @ex[ (asm-interp (prog @@ -1142,30 +1595,50 @@ the current location of the stack. ] } +@defstruct*[Db ([d integer?])]{ + Psuedo-instruction for declaring 8-bits of initialized static memory. +} + +@defstruct*[Dw ([d integer?])]{ + Psuedo-instruction for declaring 16-bits of initialized static memory. +} + +@defstruct*[Dd ([d integer?])]{ + Psuedo-instruction for declaring 32-bits of initialized static memory. +} + +@defstruct*[Dq ([d integer?])]{ + Psuedo-instruction for declaring 64-bits of initialized static memory. +} + @section{From a86 to x86} @defmodule[a86/printer] +@defproc[(asm-display [is (listof instruction?)]) void?]{ + + Prints an a86 program to the current output port in nasm syntax. + + @ex[ + (asm-display (prog (Global 'entry) + (Label 'entry) + (Mov 'rax 42) + (Ret))) + ] + +} + @defproc[(asm-string [is (listof instruction?)]) string?]{ - Converts an a86 program to a string in nasm syntax. This is - useful in concert with Racket functions for IO in order to - write programs using concrete syntax that can be passed to - @tt{nasm}. + Converts an a86 program to a string in nasm syntax. @ex[ (asm-string (prog (Global 'entry) (Label 'entry) (Mov 'rax 42) (Ret))) - - (display - (asm-string (prog (Global 'entry) - (Label 'entry) - (Mov 'rax 42) - (Ret)))) ] - + } @section{An Interpreter for a86} @@ -1223,7 +1696,7 @@ The simplest form of interpreting an a86 program is to use (Mov 'rax 0) (Jmp 'rax)))) ] - + } It is often the case that we want our assembly programs to @@ -1270,7 +1743,7 @@ code: (Sub 'rsp 8) (Call 'gcd) (Add 'rsp 8) - (Ret))))] + (Ret))))] This will be particularly relevant for writing a compiler where emitted code will make use of functionality defined in @@ -1296,8 +1769,5 @@ linking error saying a symbol is undefined: Like @racket[asm-interp], but uses @racket[in] for input and produce the result along with any output as a string. - -} - - +} diff --git a/www/notes/abscond.scrbl b/www/notes/abscond.scrbl index 4c9d3430..c635faec 100644 --- a/www/notes/abscond.scrbl +++ b/www/notes/abscond.scrbl @@ -31,11 +31,13 @@ #'(void)))])) @;{ Have to compile 42.s (at expand time) before listing it } -@(shell-expand "racket -t compile-file.rkt -m 42.rkt > 42.s") +@(shell-expand "cat 42.rkt | racket -t compile-stdin.rkt -m > 42.s") @title[#:tag "Abscond"]{Abscond: a language of numbers} +@src-code["abscond"] + @emph{Let's Make a Programming Language!} @table-of-contents[] @@ -148,10 +150,10 @@ parse the concrete expression as an s-expression. While not terribly useful for a language as overly simplistic as Abscond, we use an AST datatype for representing expressions and another syntactic categories. For each category, we will have an appropriate constructor. In the case of Abscond -all expressions are integers, so we have a single constructor, @racket[Int]. +all expressions are integers, so we have a single constructor, @racket[Lit]. @(define-language A-concrete - (e ::= (Int i)) + (e ::= (Lit i)) (i ::= integer)) @centered{@render-language[A-concrete]} @@ -169,7 +171,7 @@ it is, otherwise it signals an error: @section{Meaning of Abscond programs} The meaning of an Abscond program is simply the number itself. So -@racket[(Int 42)] evaluates to @racket[42]. +@racket[(Lit 42)] evaluates to @racket[42]. We can write an ``interpreter'' that consumes an expression and produces it's meaning: @@ -178,14 +180,14 @@ produces it's meaning: @#reader scribble/comment-reader (examples #:eval ev -(interp (Int 42)) -(interp (Int -8)) +(interp (Lit 42)) +(interp (Lit -8)) ) We can add a command line wrapper program for interpreting Abscond -programs saved in files: +programs from stdin: -@codeblock-include["abscond/interp-file.rkt"] +@codeblock-include["abscond/interp-stdin.rkt"] The details here aren't important (and you won't be asked to write this kind of code), but this program @racket[read]s the contents of a @@ -194,7 +196,7 @@ well-formed Abscond program, then it runs the intepreter and displays the result. For example, interpreting the program @tt{42.rkt} shown above: -@shellbox["racket -t interp-file.rkt -m 42.rkt"] +@shellbox["cat 42.rkt | racket -t interp-stdin.rkt -m"] Even though the semantics is obvious, we can provide a formal definition of Abscond using @bold{operational semantics}. @@ -211,15 +213,15 @@ language, just a single inference rule suffices: #:mode (𝑨 I O) #:contract (𝑨 e i) [---------- - (𝑨 (Int i) i)]) + (𝑨 (Lit i) i)]) @(centered (render-judgment-form 𝑨)) Here, we are defining a binary relation, called @render-term[A 𝑨], and saying every integer literal expression is paired with the integer itself in the -relation. So @math{((Int 2),2)} is in @render-term[A 𝑨], -@math{((Int 5),5)} is in @render-term[A 𝑨], and so on. +relation. So @math{((Lit 2),2)} is in @render-term[A 𝑨], +@math{((Lit 5),5)} is in @render-term[A 𝑨], and so on. The inference rules define the binary relation by defining the @emph{evidence} for being in the relation. The rule makes use of @@ -417,12 +419,12 @@ Writing the @racket[compile] function is easy: @#reader scribble/comment-reader (examples #:eval ev -(compile (Int 42)) -(compile (Int 38)) +(compile (Lit 42)) +(compile (Lit 38)) ) To convert back to the concrete NASM syntax, we use -@racket[asm-string]. +@racket[asm-display]. @margin-note{Note: the printer takes care of the macOS vs Linux label convention by detecting the underlying system and printing @@ -430,17 +432,17 @@ appropriately.} @#reader scribble/comment-reader (examples #:eval ev -(displayln (asm-string (compile (Int 42))))) +(asm-display (compile (Lit 42)))) Putting it all together, we can write a command line compiler much like the command line interpreter before, except now we emit assembly code: -@codeblock-include["abscond/compile-file.rkt"] +@codeblock-include["abscond/compile-stdin.rkt"] Example: -@shellbox["racket -t compile-file.rkt -m 42.rkt"] +@shellbox["cat 42.rkt | racket -t compile-stdin.rkt -m"] Using a Makefile, we can capture the whole compilation dependencies as: @@ -473,7 +475,7 @@ adds up to much more efficient programs. Just to demonstrate, here's a single data point measuring the difference between interpreting and compiling Abscond programs: -@shellbox["time -p racket -t interp-file.rkt -m 42.rkt"] +@shellbox["cat 42.rkt | time -p racket -t interp-stdin.rkt -m"] Compiling: @@ -531,17 +533,17 @@ compilation within Racket: @examples[#:eval ev -(asm-interp (compile (Int 42))) -(asm-interp (compile (Int 37))) -(asm-interp (compile (Int -8))) +(asm-interp (compile (Lit 42))) +(asm-interp (compile (Lit 37))) +(asm-interp (compile (Lit -8))) ] This of course agrees with what we will get from the interpreter: @examples[#:eval ev -(interp (Int 42)) -(interp (Int 37)) -(interp (Int -8)) +(interp (Lit 42)) +(interp (Lit 37)) +(interp (Lit -8)) ] We can turn this in a @bold{property-based test}, i.e. a function that @@ -552,9 +554,9 @@ correctness claim: (check-eqv? (interp e) (asm-interp (compile e)))) -(check-compiler (Int 42)) -(check-compiler (Int 37)) -(check-compiler (Int -8)) +(check-compiler (Lit 42)) +(check-compiler (Lit 37)) +(check-compiler (Lit -8)) ] This is a powerful testing technique when combined with random @@ -563,11 +565,11 @@ Abscond programs, we can randomly generate @emph{any} Abscond program and check that it holds. @examples[#:eval ev -(check-compiler (Int (random 100))) +(check-compiler (Lit (random 100))) ; test 10 random programs (for ([i (in-range 10)]) - (check-compiler (Int (random 10000)))) + (check-compiler (Lit (random 10000)))) ] The last expression is taking 10 samples from the space of Abscond diff --git a/www/notes/blackmail.scrbl b/www/notes/blackmail.scrbl index 2939003e..a9c0ccfa 100644 --- a/www/notes/blackmail.scrbl +++ b/www/notes/blackmail.scrbl @@ -29,10 +29,12 @@ #'(void)))])) @;{ Have to compile 42.s (at expand time) before listing it } -@(shell-expand "racket -t compile-file.rkt -m add1-add1-40.rkt > add1-add1-40.s") +@(shell-expand "cat add1-add1-40.rkt | racket -t compile-stdin.rkt -m > add1-add1-40.s") @title[#:tag "Blackmail"]{Blackmail: incrementing and decrementing} +@src-code["blackmail"] + @emph{Let's Do It Again!} @table-of-contents[] @@ -75,10 +77,10 @@ The grammar of abstract Backmail expressions is: @centered{@render-language[B]} -So, @racket[(Int 0)], @racket[(Int 120)], and -@racket[(Int -42)] are Blackmail AST expressions, but so are -@racket[(Prim1 'add1 (Int 0))], @racket[(Sub1 (Int 120))], -@racket[(Prim1 'add1 (Prim1 'add1 (Prim1 'add1 (Int -42))))]. +So, @racket[(Lit 0)], @racket[(Lit 120)], and +@racket[(Lit -42)] are Blackmail AST expressions, but so are +@racket[(Prim1 'add1 (Lit 0))], @racket[(Sub1 (Lit 120))], +@racket[(Prim1 'add1 (Prim1 'add1 (Prim1 'add1 (Lit -42))))]. A datatype for representing expressions can be defined as: @@ -127,7 +129,7 @@ contrast to the first rule, which applies unconditionally. We can understand these rules as saying the following: @itemlist[ -@item{For all integers @math{i}, @math{((Int i),i)} is in @render-term[B 𝑩].} +@item{For all integers @math{i}, @math{((Lit i),i)} is in @render-term[B 𝑩].} @item{For expressions @math{e_0} and all integers @math{i_0} and @math{i_1}, if @math{(e_0,i_0)} is in @render-term[B 𝑩] and @math{i_1 @@ -155,11 +157,11 @@ interpreter, one for each form of expression: @codeblock-include["blackmail/interp.rkt"] @examples[#:eval ev -(interp (Int 42)) -(interp (Int -7)) -(interp (Prim1 'add1 (Int 42))) -(interp (Prim1 'sub1 (Int 8))) -(interp (Prim1 'add1 (Prim1 'add1 (Prim1 'add1 (Int 8))))) +(interp (Lit 42)) +(interp (Lit -7)) +(interp (Prim1 'add1 (Lit 42))) +(interp (Prim1 'sub1 (Lit 8))) +(interp (Prim1 'add1 (Prim1 'add1 (Prim1 'add1 (Lit 8))))) ] Here's how to connect the dots between the semantics and interpreter: @@ -170,7 +172,7 @@ expression, which determines which rule of the semantics applies. @itemlist[ -@item{if @math{e} is an integer @math{(Int i)}, then we're done: this is the +@item{if @math{e} is an integer @math{(Lit i)}, then we're done: this is the right-hand-side of the pair @math{(e,i)} in @render-term[B 𝑩].} @item{if @math{e} is an expression @RACKET[(Prim1 'add1 (UNSYNTAX @@ -218,13 +220,12 @@ To compile Blackmail, we make use of two more a86 instructions, @racket[Add] and @racket[Sub]: @ex[ -(displayln - (asm-string - (list (Label 'entry) - (Mov 'rax 40) - (Add 'rax 1) - (Add 'rax 1) - (Ret)))) +(asm-display + (list (Label 'entry) + (Mov 'rax 40) + (Add 'rax 1) + (Add 'rax 1) + (Ret))) ] The compiler consists of two functions: the first, which is given a @@ -240,16 +241,16 @@ recursion, much like the interpreter. We can now try out a few examples: @ex[ -(compile (Prim1 'add1 (Prim1 'add1 (Int 40)))) -(compile (Prim1 'sub1 (Int 8))) -(compile (Prim1 'add1 (Prim1 'add1 (Prim1 'sub1 (Prim1 'add1 (Int -8)))))) +(compile (Prim1 'add1 (Prim1 'add1 (Lit 40)))) +(compile (Prim1 'sub1 (Lit 8))) +(compile (Prim1 'add1 (Prim1 'add1 (Prim1 'sub1 (Prim1 'add1 (Lit -8)))))) ] And give a command line wrapper for parsing, checking, and compiling -files in @link["code/blackmail/compile-file.rkt"]{@tt{compile-file.rkt}}, +in @link["code/blackmail/compile-stdin.rkt"]{@tt{compile-stdin.rkt}}, we can compile files as follows: -@shellbox["racket -t compile-file.rkt -m add1-add1-40.rkt"] +@shellbox["cat add1-add1-40.rkt | racket -t compile-stdin.rkt -m"] And using the same @link["code/blackmail/Makefile"]{@tt{Makefile}} setup as in Abscond, we capture the whole compilation process with a @@ -263,9 +264,9 @@ the same @racket[asm-interp] function to encapsulate running assembly code: @ex[ -(asm-interp (compile (Prim1 'add1 (Prim1 'add1 (Int 40))))) -(asm-interp (compile (Prim1 'sub1 (Int 8)))) -(asm-interp (compile (Prim1 'add1 (Prim1 'add1 (Prim1 'add1 (Prim1 'add1 (Int -8))))))) +(asm-interp (compile (Prim1 'add1 (Prim1 'add1 (Lit 40))))) +(asm-interp (compile (Prim1 'sub1 (Lit 8)))) +(asm-interp (compile (Prim1 'add1 (Prim1 'add1 (Prim1 'add1 (Prim1 'add1 (Lit -8))))))) ] @section{Correctness and random testing} @@ -331,10 +332,10 @@ x86 does. Let's see: @ex[ (define max-int (sub1 (expt 2 63))) (define min-int (- (expt 2 63))) -(asm-interp (compile (Int max-int))) -(asm-interp (compile (Prim1 'add1 (Int max-int)))) -(asm-interp (compile (Int min-int))) -(asm-interp (compile (Prim1 'sub1 (Int min-int))))] +(asm-interp (compile (Lit max-int))) +(asm-interp (compile (Prim1 'add1 (Lit max-int)))) +(asm-interp (compile (Lit min-int))) +(asm-interp (compile (Prim1 'sub1 (Lit min-int))))] Now there's a fact you didn't learn in grade school: in the first example, adding 1 to a number made it smaller; in the @@ -343,18 +344,18 @@ second, subtracting 1 made it bigger! This problem doesn't exist in the interpreter: @ex[ -(interp (Int max-int)) -(interp (Prim1 'add1 (Int max-int))) -(interp (Int min-int)) -(interp (Prim1 'sub1 (Int min-int))) +(interp (Lit max-int)) +(interp (Prim1 'add1 (Lit max-int))) +(interp (Lit min-int)) +(interp (Prim1 'sub1 (Lit min-int))) ] So we have found a counter-example to the claim of compiler correctness: @ex[ -(check-compiler (Prim1 'add1 (Int max-int))) -(check-compiler (Prim1 'sub1 (Int min-int))) +(check-compiler (Prim1 'add1 (Lit max-int))) +(check-compiler (Prim1 'sub1 (Lit min-int))) ] What can we do? This is the basic problem of a program not @@ -414,7 +415,7 @@ these pieces in the two compilers we've written: @item{@bold{Generated} into assembly x86 @itemlist[@item{we use @racket[compile] to generate assembly (in AST form), - and use @racket[asm-string] to obtain printable concrete X86-64 code}]} + and use @racket[asm-display] to print concrete X86-64 code}]} @item{@bold{Linked} against a run-time (usually written in C) diff --git a/www/notes/con.scrbl b/www/notes/con.scrbl index d157b5fc..bad4700b 100644 --- a/www/notes/con.scrbl +++ b/www/notes/con.scrbl @@ -23,6 +23,8 @@ @title[#:tag "Con"]{Con: branching with conditionals} +@src-code["con"] + @emph{When you come to a fork in the road, take it.} @table-of-contents[] @@ -215,7 +217,9 @@ branch, then (unconditionally) jump over the then branch code. To accomplish this, we will need two new labels: one for the then branch code and one for the end of the then branch code. The @racket[gensym] function can be used to generate symbols that have not -appeared before. +appeared before. @margin-note{Q: Why should we generate label names +here? What would go wrong if simply used labels like @racket['l0] and +@racket['l1]?} In total, the code for this example would look like: @@ -265,7 +269,7 @@ The complete compiler code is: Mirroring the change we made to the interpreter, we separate out a module for compiling primitives: -@codeblock-include["con/compile-prim.rkt"] +@codeblock-include["con/compile-ops.rkt"] Let's take a look at a few examples: @ex[ diff --git a/www/notes/dodger.scrbl b/www/notes/dodger.scrbl index 05fb8009..bb11c8ea 100644 --- a/www/notes/dodger.scrbl +++ b/www/notes/dodger.scrbl @@ -20,6 +20,8 @@ @title[#:tag "Dodger"]{Dodger: addressing a lack of character} +@src-code["dodger"] + @emph{There are 11 types of values...} @table-of-contents[] @@ -112,19 +114,19 @@ The meaning of characters and their operations are just lifted from Racket. We can try out some examples: @ex[ -(interp (Char #\a)) -(interp (Char #\b)) -(interp (Prim1 'char? (Char #\a))) -(interp (Prim1 'char? (Bool #t))) -(interp (Prim1 'char->integer (Char #\a))) -(interp (Prim1 'integer->char (Prim1 'char->integer (Char #\a)))) +(interp (Lit #\a)) +(interp (Lit #\b)) +(interp (Prim1 'char? (Lit #\a))) +(interp (Prim1 'char? (Lit #t))) +(interp (Prim1 'char->integer (Lit #\a))) +(interp (Prim1 'integer->char (Prim1 'char->integer (Lit #\a)))) ] Just as in Dupe, type errors result in the interpreter crashing: @ex[ -(eval:error (interp (Prim1 'char->integer (Bool #f)))) +(eval:error (interp (Prim1 'char->integer (Lit #f)))) ] Also, not every integer corresponds to a character, so when @@ -132,7 +134,7 @@ Also, not every integer corresponds to a character, so when (more on this in a minute): @ex[ -(eval:error (interp (Prim1 'integer->char (Int -1)))) +(eval:error (interp (Prim1 'integer->char (Lit -1)))) ] @section{Ex uno plures iterum: Out of One, Many... Again} diff --git a/www/notes/dupe.scrbl b/www/notes/dupe.scrbl index c88889c7..47a3cf09 100644 --- a/www/notes/dupe.scrbl +++ b/www/notes/dupe.scrbl @@ -18,11 +18,13 @@ @(ev '(require rackunit a86)) @(for-each (λ (f) (ev `(require (file ,(path->string (build-path notes "dupe" f)))))) - '("interp.rkt" "compile.rkt" "ast.rkt" "parse.rkt" "random.rkt" "types.rkt")) + '("interp.rkt" "interp-prim.rkt" "compile.rkt" "ast.rkt" "parse.rkt" "random.rkt" "types.rkt")) @title[#:tag "Dupe"]{Dupe: a duplicity of types} +@src-code["dupe"] + @emph{There are 10 types of values...} @table-of-contents[] @@ -65,7 +67,7 @@ The s-expression parser is defined as follows: @section{Meaning of Dupe programs} -To consider he meaning of Dupe programs, we must revisit the meaning +To consider the meaning of Dupe programs, we must revisit the meaning of conditions. Previously we branched on whether a subexpression evaluated to 0. We will now consider whether the subexpression evaluates to @racket[#f]. @@ -168,14 +170,14 @@ rule essentially defers the work to a new metafunction, (render-metafunction 𝑫-𝒑𝒓𝒊𝒎 #:contract? #t))) Returning to the issue of type mismatches, what does the -semantics say about @racket[(Prim1 'add1 (Bool #f))]? +semantics say about @racket[(Prim1 'add1 (Lit #f))]? What it says is: nothing. These programs are simply not in the semantic relation for this language. There's only one rule for giving meaning to an @racket[(Prim1 'add1 _e0)] expression and it's premise is that @racket[_e] means some @emph{integer} @racket[_i0]. But -@math{(@racket[(Bool #f)], i) ∉ 𝑫} for any @math{i}. So there's no value -@math{v} such that @math{(@racket[(Prim1 'add1 (Bool #f))], v) ∈ 𝑫}. This +@math{(@racket[(Lit #f)], i) ∉ 𝑫} for any @math{i}. So there's no value +@math{v} such that @math{(@racket[(Prim1 'add1 (Lit #f))], v) ∈ 𝑫}. This expression is @bold{undefined} according to the semantics. @@ -192,13 +194,13 @@ We can confirm the interpreter computes the right result for the examples given earlier: @ex[ -(interp (Bool #t)) -(interp (Bool #f)) -(interp (If (Bool #f) (Int 1) (Int 2))) -(interp (If (Bool #t) (Int 1) (Int 2))) -(interp (If (Int 0) (Int 1) (Int 2))) -(interp (If (Int 7) (Int 1) (Int 2))) -(interp (If (Prim1 'zero? (Int 7)) (Int 1) (Int 2))) +(interp (Lit #t)) +(interp (Lit #f)) +(interp (If (Lit #f) (Lit 1) (Lit 2))) +(interp (If (Lit #t) (Lit 1) (Lit 2))) +(interp (If (Lit 0) (Lit 1) (Lit 2))) +(interp (If (Lit 7) (Lit 1) (Lit 2))) +(interp (If (Prim1 'zero? (Lit 7)) (Lit 1) (Lit 2))) ] Correctness follows the same pattern as before, although it is worth @@ -220,7 +222,7 @@ which results in the @racket[interp] program crashing and Racket signalling an error: @ex[ -(eval:error (interp (Prim1 'add1 (Bool #f)))) +(eval:error (interp (Prim1 'add1 (Lit #f)))) ] This isn't a concern for correctness, because the interpreter is free @@ -276,17 +278,9 @@ integers and booleans, so we could use one bit to indicate whether a value is a boolean or an integer. The remaining 63 bits can be used to represent the value itself, either true, false, or some integer. -@(define (binary i [len 0]) - (typeset-code #:block? #f #:indent 0 - (string-append "#b" - (~a (number->string i 2) - #:left-pad-string "0" - #:align 'right - #:min-width len)))) - Let's use the least significant bit to indicate the type and let's use @binary[type-int] for integer and -@binary[type-bool] for boolean. These are arbitrary choices +@binary[(value->bits #t)] for boolean. These are arbitrary choices (more or less). The number @racket[1] would be represented as @@ -296,10 +290,10 @@ The number @racket[1] would be represented as number is no longer the number itself: the Dupe value @racket[1] is represented by the number @racket[2] (@binary[2]). The Dupe value @racket[#t] -is represented by the number @racket[#,val-true] -(@binary[val-true 2]); the Dupe value @racket[#f] -is represented by the number @racket[#,val-false] -(@binary[val-false 2]). +is represented by the number @racket[#,(value->bits #t)] +(@binary[(value->bits #t) 2]); the Dupe value @racket[#f] +is represented by the number @racket[#,(value->bits #f)] +(@binary[(value->bits #f) 2]). One nice thing about our choice of encoding: @racket[0] is represented as @racket[0] (@binary[0 2]). @@ -316,8 +310,7 @@ encoding: @codeblock-include["dupe/types.rkt"] @#reader scribble/comment-reader -(ex - +(ex (bits->value #b000) (bits->value #b001) (bits->value #b010) @@ -326,7 +319,6 @@ encoding: (eval:error (bits->value #b101)) (bits->value #b110) (eval:error (bits->value #b111)) - ) Notice that not all bits represent a value; name any odd number that's @@ -398,7 +390,8 @@ succeeds): (with-handlers ([exn:fail? (λ (x) 'ok)]) (interp e) (check-equal? (interp-bits e) - (value->bits (interp e))))) + (value->bits (interp e)) + (format "~a" e)))) (define es (for/list ([i 100]) @@ -413,7 +406,8 @@ The one wrinkle is we really only need the spec to hold when is undefined, we use an exception handler to avoid testing when @racket[_e] is undefined. -Now let us inline the defintion of @racket[interp]: +Now let us inline the defintion of @racket[interp], i.e. let's replace +the use of @racket[interp] with it's definition: @#reader scribble/comment-reader (ex #:no-prompt @@ -421,18 +415,13 @@ Now let us inline the defintion of @racket[interp]: (define (interp-bits e) (value->bits (match e - [(Int i) i] - [(Bool b) b] - [(Prim1 'add1 e0) - (add1 (interp e0))] - [(Prim1 'sub1 e0) - (sub1 (interp e0))] - [(Prim1 'zero? e0) - (zero? (interp e0))] - [(If e0 e1 e2) - (if (interp e0) - (interp e1) - (interp e2))]))) + [(Lit l) l] + [(Prim1 p e) + (interp-prim1 p (interp e))] + [(If e1 e2 e3) + (if (interp e1) + (interp e2) + (interp e3))]))) ) It's still correct: @@ -451,28 +440,163 @@ So we get: ;; Expr -> Bits (define (interp-bits e) (match e - [(Int i) (value->bits i)] - [(Bool b) (value->bits b)] - [(Prim1 'add1 e0) - (value->bits (add1 (interp e0)))] - [(Prim1 'sub1 e0) - (value->bits (sub1 (interp e0)))] - [(Prim1 'zero? e0) - (value->bits (zero? (interp e0)))] - [(If e0 e1 e2) (value->bits - (if (interp e0) - (interp e1) - (interp e2)))])) -) + [(Lit l) (value->bits l)] + [(Prim1 p e) + (value->bits + (interp-prim1 p (interp e)))] + [(If e1 e2 e3) + (value->bits + (if (interp e1) + (interp e2) + (interp e3)))]))) Still correct: @ex[ (for-each interp-bits-correct es)] +Now consider the first case, where we are calling @racket[value->bits] +on @racket[i], which we know is an integer whenever this clause of the +@racket[match] is taken. From looking at the definition of +@racket[value->bits], we know @racket[(value->bits i)] is +@racket[(arithmetic-shift i int-shift)] when @racket[i] is an integer. +So we can replace the RHS of the first case with +@racket[(arithmetic-shift i int-shift)]. + +We can do similar reasoning on the second case with +@racket[(value->bits b)] where @racket[b] is a boolean. From the +definition of @racket[value->bits], we can replace the RHS with +@racket[(match b [#t (value->bits #t)] [#f (value->bits #f)])], which can be written +more succinctly as @racket[(if b (value->bits #t) (value->bits #f))]. + +In the third case, let's suppose there is an analog of +@racket[interp-prim1] called @racket[interp-prim1-bits] that operates +on, and produces, bits. As a start, we can assume a definition that +is just the specification of this function (we'll derive a better +version later): + +@#reader scribble/comment-reader +(ex #:no-prompt +;; Op Bits -> Bits +(define (interp-prim1-bits p b) + (value->bits (interp-prim1 p (bits->value b)))) +) + +Now we can replace the RHS of the third case with +@racket[(interp-prim1-bits p (interp-bits e))]. + +Finally, in the fourth case, we can use the following equality: + +@racketblock[ +(_f (if _e0 _e1 _e2)) = (if _e0 (_f _e1) (_f _e2)) +] + +to arrive at: + +@racketblock[ +(if (interp e0) + (value->bits (interp e1)) + (value->bits (interp e2))) +] + +Of course, @racket[(value->bits (interp e1))] is just what +@racket[interp-bits] computes, so this is equivalent to: + +@racketblock[ +(if (interp e0) + (interp-bits e1) + (interp-bits e2)) +] + +Now observe that @racket[(interp e0)] produces @racket[#f] if and only +if @racket[(interp-bits e0)] produces @racket[(value->bits #f)]. We can therefore +eliminate the use of @racket[interp] by replacing this conditional with: + +@racketblock[ +(if (eq? (value->bits #f) (interp-bits e0)) + (interp-bits e2) + (interp-bits e1)) +] + +(Notice the swapping of the then- and else-branch of the conditional.) + +We've now arrived at the following @racket[interp]-free definition of +@racket[interp-bits]: + +@#reader scribble/comment-reader +(ex #:no-prompt +;; Expr -> Bits +(define (interp-bits e) + (match e + [(Lit l) (value->bits l)] + [(Prim1 p e) + (interp-prim1-bits p (interp-bits e))] + [(If e1 e2 e3) + (if (eq? (value->bits #f) (interp-bits e1)) + (interp-bits e3) + (interp-bits e2))]))) + +And it is still correct: +@ex[ +(for-each interp-bits-correct es)] + + +We're almost done. Now let's derive a version of +@racket[interp-prim1-bits] starting from the specification we gave +above. To start, replace the use of @racket[interp-prim1] with its +definition: + +@#reader scribble/comment-reader +(ex #:no-prompt +;; Op Bits -> Bits +(define (interp-prim1-bits op b) + (value->bits + (match op + ['add1 (add1 (bits->value b))] + ['sub1 (sub1 (bits->value b))] + ['zero? (zero? (bits->value b))])))) + +Now push @racket[value->bits] inward: + +@#reader scribble/comment-reader +(ex #:no-prompt +;; Op Bits -> Bits +(define (interp-prim1-bits op b) + (match op + ['add1 (value->bits (add1 (bits->value b)))] + ['sub1 (value->bits (sub1 (bits->value b)))] + ['zero? (value->bits (zero? (bits->value b)))]))) + +Now notice the following: + +@itemlist[ + +@item{@racket[(value->bits (add1 (bits->value b)))] ≡ @racket[(+ b (value->bits 1))] ≡ @racket[(+ b (arithmetic-shift 1 int-shift))]} + +@item{@racket[(value->bits (sub1 (bits->value b)))] ≡ @racket[(- b (value->bits 1))] ≡ @racket[(- b (arithmetic-shift 1 int-shift))]} + +@item{@racket[(value->bits (zero? (bits->value b)))] ≡ @racket[(value->bits (zero? b))] ≡ @racket[(if (zero? b) (value->bits #t) (value->bits #f))]} + +] + +So we can define @racket[interp-prim1-bits] as: + +@#reader scribble/comment-reader +(ex #:no-prompt +;; Op Bits -> Bits +(define (interp-prim1-bits op b) + (match op + ['add1 (+ b (arithmetic-shift 1 int-shift))] + ['sub1 (- b (arithmetic-shift 1 int-shift))] + ['zero? (if (zero? b) (value->bits #t) (value->bits #f))]))) + + +@;{ + + In the first two cases, we know that @racket[i] and @racket[b] are integers and booleans, respectively. So we know @racket[(values->bits -i) = (* 2 i)] and @racket[(values->bits b) = (if b #,val-true #,val-false)]. We can +i) = (* 2 i)] and @racket[(values->bits b) = (if b #,(value->bits #t) #,(value->bits #f))]. We can rewrite the code as: @;{the #:escape identity thing is a cute solution to the @@ -482,8 +606,7 @@ rewrite the code as: @code:comment{Expr -> Bits} (define (interp-bits e) (match e - [(Int i) (* 2 i)] - [(Bool b) (if b (identity val-true) (identity val-false))] + [(Lit l) (value->bits l)] [(Prim1 'add1 e0) (value->bits (add1 (interp e0)))] [(Prim1 'sub1 e0) @@ -511,8 +634,8 @@ We can rewrite the last case by the following equation: @code:comment{Expr -> Bits} (define (interp-bits e) (match e - [(Int i) (* 2 i)] - [(Bool b) (if b (identity val-true) (identity val-false))] + [(Lit i) (* 2 i)] + [(Lit b) (if b (identity (value->bits #t)) (identity (value->bits #f)))] [(Prim1 'add1 e0) (value->bits (add1 (interp e0)))] [(Prim1 'sub1 e0) @@ -545,8 +668,8 @@ to get: @code:comment{Expr -> Bits} (define (interp-bits e) (match e - [(Int i) (* 2 i)] - [(Bool b) (if b (identity val-true) (identity val-false))] + [(Lit i) (* 2 i)] + [(Lit b) (if b (identity (value->bits #t)) (identity (value->bits #f)))] [(Prim1 'add1 e0) (+ (value->bits (interp e0)) (value->bits 1))] [(Prim1 'sub1 e0) @@ -575,8 +698,8 @@ We can now rewrite by the equation of our specification: @code:comment{Expr -> Bits} (define (interp-bits e) (match e - [(Int i) (* 2 i)] - [(Bool b) (if b (identity val-true) (identity val-false))] + [(Lit i) (* 2 i)] + [(Lit b) (if b (identity (value->bits #t)) (identity (value->bits #f)))] [(Prim1 'add1 e0) (+ (interp-bits e0) (identity (value->bits 1)))] [(Prim1 'sub1 e0) @@ -601,16 +724,16 @@ and inline @racket[value->bits] specialized to a boolean argument: @code:comment{Expr -> Bits} (define (interp-bits e) (match e - [(Int i) (* 2 i)] - [(Bool b) (if b (identity val-true) (identity val-false))] + [(Lit i) (* 2 i)] + [(Lit b) (if b (identity (value->bits #t)) (identity (value->bits #f)))] [(Prim1 'add1 e0) (+ (interp-bits e0) (identity (value->bits 1)))] [(Prim1 'sub1 e0) (- (interp-bits e0) (identity (value->bits 1)))] [(Prim1 'zero? e0) (match (zero? (interp-bits e0)) - [#t (identity val-true)] - [#f (identity val-false)])] + [#t (identity (value->bits #t))] + [#f (identity (value->bits #f))])] [(If e0 e1 e2) (if (interp e0) (interp-bits e1) @@ -624,24 +747,27 @@ Still correct: Finally, in the last case, all that matters in @racket[(if (interp e0) ...)] is whether @racket[(interp e0)] returns @racket[#f] or something else. So we can rewrite in terms of whether @racket[(interp-bits e0)] -produces the representation of @racket[#f] (@binary[val-false 2]): +produces the representation of @racket[#f] (@binary[(value->bits #f) 2]): @ex[#:escape identity #:no-prompt @code:comment{Expr -> Bits} (define (interp-bits e) (match e - [(Int i) (* 2 i)] - [(Bool b) (if b (identity val-true) (identity val-false))] + [(Lit l) + (cond + [(integer? l) (* 2 l)] + [(boolean? l) + (if l (identity (value->bits #t)) (identity (value->bits #f)))])] [(Prim1 'add1 e0) (+ (interp-bits e0) (identity (value->bits 1)))] [(Prim1 'sub1 e0) (- (interp-bits e0) (identity (value->bits 1)))] [(Prim1 'zero? e0) (match (zero? (interp-bits e0)) - [#t (identity val-true)] - [#f (identity val-false)])] + [#t (identity (value->bits #t))] + [#f (identity (value->bits #f))])] [(If e0 e1 e2) - (if (= (interp-bits e0) (identity val-false)) + (if (= (interp-bits e0) (identity (value->bits #f))) (interp-bits e2) (interp-bits e1))])) ] @@ -658,6 +784,8 @@ _bs #,(value->bits 1))], i.e. adding @binary[(value->bits 1) 2]. When @racket[_ represents a boolean, then @racket[(value->bits (add1 (bits->value _bs)))] would crash, while @racket[(+ _bs (value->bits 1))] doesn't, but this is an undefined program, so changing the behavior is fine. +} + Looking back: starting from the spec, we've arrived at a definition of @racket[interp-bits] that is completely self-contained: it doesn't use @@ -673,8 +801,8 @@ interpreter in a final conversion: (define (interp.v2 e) (bits->value (interp-bits e))) -(interp.v2 (Bool #t)) -(interp.v2 (Bool #f)) +(interp.v2 (Lit #t)) +(interp.v2 (Lit #f)) (interp.v2 (parse '(if #f 1 2))) (interp.v2 (parse '(if #t 1 2))) (interp.v2 (parse '(if 0 1 2))) @@ -705,9 +833,9 @@ Let's consider some simple examples: before, but needs to use the new representation, i.e. the compiler should produce @racket[(Mov 'rax 84)], which is @racket[(* 42 2)].} -@item{@racket[#f]: this should produce @racket[(Mov 'rax #,val-false)].} +@item{@racket[#f]: this should produce @racket[(Mov 'rax #,(value->bits #f))].} -@item{@racket[#t]: this should produce @racket[(Mov 'rax #,val-true)].} +@item{@racket[#t]: this should produce @racket[(Mov 'rax #,(value->bits #t))].} @item{@racket[(add1 _e)]: this should produce the instructions for @racket[_e] followed by an instruction to add @racket[#,(value->bits 1)], which is @@ -716,24 +844,34 @@ just how @racket[interp-bits] interprets an @racket[add1].} @item{@racket[(sub1 _e)]: should work like @racket[(add1 _e)] but subtracting @racket[#,(value->bits 1)].} - @item{@racket[(zero? _e)]: this should produce the +@item{@racket[(zero? _e)]: this should produce the instructions for @racket[_e] followed by instructions that compare @racket['rax] to 0 and set @racket['rax] to - @racket[#t] (i.e. @binary[val-true 2]) if true and - @racket[#f] (i.e. @binary[val-false 2]) otherwise.} + @racket[#t] (i.e. @binary[(value->bits #t) 2]) if true and + @racket[#f] (i.e. @binary[(value->bits #f) 2]) otherwise. + +This is a bit different from what we saw with Con, which combined +conditional execution with testing for equality to @racket[0]. Here +there is no need to @emph{jump} anywhere based on whether @racket[_e] +produces @racket[0] or not. Instead we want to move either the +encoding of @racket[#t] or @racket[#f] into @racket['rax] depending on +what @racket[_e] produces. To accomplish that, we can use a new kind +of instruction, the @bold{conditional move} instruction: @racket[Cmov]. + +} @item{@racket[(if _e0 _e1 _e2)]: this should work much like before, compiling each subexpression, generating some labels and the appropriate comparison and conditional jump. The only difference is we now want to compare the result of executing @racket[_e0] with -@racket[#f] (i.e. @binary[val-false 2]) and jumping to the code for @racket[_e2] when +@racket[#f] (i.e. @binary[(value->bits #f) 2]) and jumping to the code for @racket[_e2] when they are equal.} ] @ex[ -(compile-e (Int 42)) -(compile-e (Bool #t)) -(compile-e (Bool #f)) +(compile-e (Lit 42)) +(compile-e (Lit #t)) +(compile-e (Lit #f)) (compile-e (parse '(zero? 0))) (compile-e (parse '(if #t 1 2))) (compile-e (parse '(if #f 1 2))) @@ -755,8 +893,8 @@ We can try out the compiler with the help of @racket[asm-interp], but you'll notice the results are a bit surprising: @ex[ -(asm-interp (compile (Bool #t))) -(asm-interp (compile (Bool #f))) +(asm-interp (compile (Lit #t))) +(asm-interp (compile (Lit #f))) (asm-interp (compile (parse '(zero? 0)))) (asm-interp (compile (parse '(zero? -7)))) (asm-interp (compile (parse '(if #t 1 2)))) @@ -776,8 +914,8 @@ values: (define (interp-compile e) (bits->value (asm-interp (compile e)))) -(interp-compile (Bool #t)) -(interp-compile (Bool #f)) +(interp-compile (Lit #t)) +(interp-compile (Lit #f)) (interp-compile (parse '(zero? 0))) (interp-compile (parse '(zero? -7))) (interp-compile (parse '(if #t 1 2))) @@ -807,7 +945,7 @@ integer, to recover the number being represented, we need to divide by 2, which can be done efficiently with a right-shift of 1 bit. Likewise with a boolean, if we shift right by 1 bit there are two possible results: -@racket[#,val-false] for false and @racket[#,val-true] for +@racket[#,(value->bits #f)] for false and @racket[#,(value->bits #t)] for true. We use the following interface for values in the runtime system: @@ -863,11 +1001,10 @@ our usual appraoch: @ex[ (define (check-correctness e) (check-equal? (interp-compile e) - (interp e) - e)) + (interp e))) (check-correctness (parse '(add1 7))) -(eval:error (check-correctness (parse '(add1 #f)))) +;;(eval:error (check-correctness (parse '(add1 #f)))) ] This isn't a counter-example to correctness because @racket['(add1 @@ -876,15 +1013,16 @@ interpreter and compiler are free to do anything on this input. Since we know Racket will signal an error when the interpreter tries to interpret a meaningless expression, we can write an alternate -@racket[check-correctness] function that catches any exceptions and -produces void, effectively ignoring the test: +@racket[check-correctness] function that first runs the interpreter +with an exception handler installed. Should an error occur, +the test is ignored, otherwise the value produced is compared +to that of the compiler: @ex[ (define (check-correctness e) (with-handlers ([exn:fail? void]) - (check-equal? (interp-compile e) - (interp e) - e))) + (let ((v (interp e))) + (check-equal? v (interp-compile e))))) (check-correctness (parse '(add1 7))) (check-correctness (parse '(add1 #f))) diff --git a/www/notes/evildoer.scrbl b/www/notes/evildoer.scrbl index 456334f9..66cf6df2 100644 --- a/www/notes/evildoer.scrbl +++ b/www/notes/evildoer.scrbl @@ -63,6 +63,8 @@ HERE @title[#:tag "Evildoer"]{Evildoer: change the world a couple nibbles at a time} +@src-code["evildoer"] + @emph{Warning: Side effects may include itching, burning, oozing, weeping. Not intended for heart patients and those with nervous disorders.} @@ -83,10 +85,10 @@ writing!) do not have this property. Instead they interact with the outside world and compute results based on the state of the world. -For example, consider the @tt{compile-file.rkt} program, -which reads the contents of a file from disk and compiles -it. The meaning of this program depends on the state of your -computer's hard drive. Similarly, it prints out assembly +For example, consider the @tt{compile-stdin.rkt} program, +which reads the contents of stdin and compiles +it. The meaning of this program depends on the state of input +port. Similarly, it prints out assembly code to the standard output port. So not only does this program depend on the outside world, it changes it too. @@ -426,7 +428,7 @@ Let's save it to a file called @tt{p.s}: @ex[ (with-output-to-file "p.s" (λ () - (displayln (asm-string p))) + (asm-display p)) #:exists 'truncate)] We can assemble it, link it together with the printer, and run it: @@ -501,11 +503,11 @@ Now save each program in its nasm format: @ex[ (with-output-to-file "p.s" (λ () - (displayln (asm-string p))) + (asm-display p)) #:exists 'truncate) (with-output-to-file "life.s" (λ () - (displayln (asm-string life))) + (asm-display life)) #:exists 'truncate)] And assemble: @@ -597,7 +599,7 @@ of its argument in @racket['rdi] before the call: (Ret))) (with-output-to-file "q.s" (λ () - (displayln (asm-string q))) + (asm-display q)) #:exists 'truncate)] We can assemble it into an object file: @@ -641,7 +643,7 @@ pop around the call: (Ret))) (with-output-to-file "q.s" (λ () - (displayln (asm-string q))) + (asm-display q)) #:exists 'truncate)] @shellbox[(string-append "nasm -f " format " q.s -o q.o") diff --git a/www/notes/extort.scrbl b/www/notes/extort.scrbl index c5d28651..0ea0ec4f 100644 --- a/www/notes/extort.scrbl +++ b/www/notes/extort.scrbl @@ -25,6 +25,8 @@ @title[#:tag this-lang]{@|this-lang|: when errors exist} +@src-code[this-lang] + @emph{The greatest mistake is to imagine that we never err.} @table-of-contents[] @@ -122,9 +124,9 @@ We can confirm the interpreter computes the right result for the examples given earlier: @ex[ -(interp (Prim1 'add1 (Bool #f))) -(interp (Prim1 'zero? (Bool #t))) -(interp (If (Prim1 'zero? (Bool #f)) (Int 1) (Int 2))) +(interp (Prim1 'add1 (Lit #f))) +(interp (Prim1 'zero? (Lit #t))) +(interp (If (Prim1 'zero? (Lit #f)) (Lit 1) (Lit 2))) ] The statement of correctness stays the same, but now observe that @@ -201,7 +203,7 @@ usual way again: (interp e) e)) -(check-correctness (Prim1 'add1 (Int 7))) -(check-correctness (Prim1 'add1 (Bool #f))) +(check-correctness (Prim1 'add1 (Lit 7))) +(check-correctness (Prim1 'add1 (Lit #f))) ] diff --git a/www/notes/fraud.scrbl b/www/notes/fraud.scrbl index 847652a8..24b39041 100644 --- a/www/notes/fraud.scrbl +++ b/www/notes/fraud.scrbl @@ -23,6 +23,8 @@ @title[#:tag this-lang]{@|this-lang|: local binding, variables, and binary operations} +@src-code[this-lang] + @emph{To be is to be the value of a variable.} @table-of-contents[] @@ -334,7 +336,7 @@ The interpreter closely mirrors the semantics. The top-level @racket[interp-env] that takes an expression and environment and computes the result. It is defined by structural recursion on the expression. Environments are represented as lists of associations -between variables and integers. There are two helper functions for +between variables and values. There are two helper functions for @racket[ext] and @racket[lookup]: @codeblock-include["fraud/interp.rkt"] @@ -462,24 +464,24 @@ variable name either. The idea is that we will translate expression (@tt{Expr}) like: @racketblock[ -(Let 'x (Int 7) (Var 'x))] +(Let 'x (Lit 7) (Var 'x))] into intermediate expressions (@tt{IExpr}) like: @racketblock[ -(Let '_ (Int 7) (Var 0)) +(Let '_ (Lit 7) (Var 0)) ] And: @racketblock[ -(Let 'x (Int 7) (Let 'y (Int 9) (Var 'x))) +(Let 'x (Lit 7) (Let 'y (Lit 9) (Var 'x))) ] into: @racketblock[ -(Let '_ (Int 7) (Let '_ (Int 9) (Var 1))) +(Let '_ (Lit 7) (Let '_ (Lit 9) (Var 1))) ] @@ -505,8 +507,8 @@ by raising a (compile-time) error in the case of unbound variables. We can try out some examples to confirm it works as expected. @ex[ - (translate (Let 'x (Int 7) (Var 'x))) - (translate (Let 'x (Int 7) (Let 'y (Int 9) (Var 'x)))) + (translate (Let 'x (Lit 7) (Var 'x))) + (translate (Let 'x (Lit 7) (Let 'y (Lit 9) (Var 'x)))) ] The interpreter for @tt{IExpr}s will still have an @@ -768,37 +770,65 @@ the second example, then it couldn't be in the first. So our previous once-and-done solution to the stack alignment issue will no longer work. Instead, we will have to emit code that aligns the stack at every @racket[Call] and this adjustment will depend upon -the compile-time environment in which the call occurs. - -For example, let's assume we no longer adjust the stack at the entry -of our code. The first example (occuring in the empty compile-time -environment) will need subtract 8 to the stack pointer, call, and then -add 8 to the stack pointer. In the second example, the -@racket[write-byte] call occurs in a compile-time environment of -@racket['(x)]. The single binding being pushed on the stack, in -combination with the original call from the run-time system, results -in an aligned stack, so no adjustment is needed. Had there been two -elements on the stack, an adjustment similar to the first example -would be needed. In other words, if there are an even number of -elements on the stack, we need to adjust. - -This means, compared to the previous compiler for primitive -operations, each part of the compiler that may issue @racket[Call] -instructions will need to be informed of the current environment. - -We will use a helper function @racket[(pad-stack _c)] and -@racket[(unpad-stack _c)] that takes a compile-time environment and -produce instructions to align and revert the stack (if needed) before -and after @racket[Call]s. - -Signalling errors is likewise complicated and we handle it by having -two target labels that can be jumped to when an error happens: -@racket['raise_error] and @racket['raise_error_align]. The latter -adds 8 to @racket['rsp] and jumps to @racket['raise_error]. Since we -don't expect the the error handler function to return, we don't need -to worry about adjusting the stack afterward. We use another helper -function @racket[(error-label _c)] that computes the appropriate target -based on the given compile-time environment. +the state of the stack pointer when the call occurs. + +It's possible to compute the needed adjustment to the stack statically +using the compile-time environment, however we opt for a simpler, +one-size-fits-all approach of @emph{dynamically} aligning the stack +immediately before issuing a @racket[Call]. We do this by emitting +code that adjusts the stack pointer based on its current value, +computing a pad: either @racket[0] or @racket[8], which is subtracted +from @racket['rsp]. We then stash this pad value away in a +non-volatile register, which means the called function is not allowed +to modify it---or at least, if they do, they must restore before +returning. When the call returns, we add the pad value back to +@racket['rsp] to restore the stack pointer to where it was before +being aligned. + +Note that stack pointer is either divisible by 16 (meaning the last +four bits are @racket[0]) or by 8 but not by 16 (meaning the last four +bits are @binary[8]). When the stack pointer is divisible by 16, it's +aligned and no adjustment is needed. Otherwise we need to adjust by +subtracting 8. + +Here is the code we can use to pad and unpad the stack. It does an +@racket[And] of the stack address and @binary[8], saving the result +into @racket['r15]. So @racket['r15] is @racket[0] when @racket['rsp] +is aligned and @racket[8] when misaligned. In both cases, subtracting +@racket['r15] from @racket['rsp] ensures @racket['rsp] is aligned. +The @racket[unpad-stack] simply adds @racket['r15] back. + +@#reader scribble/comment-reader +(racketblock +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; Asm +;; Undo the stack alignment after a call +(define unpad-stack + (seq (Add rsp r15))) +) + +Since @racket['r15] is a @emph{non-volatile} register, meaning a +called function must preserve its value when returning, it's safe to +stash away the stack adjustment value in this register. + +We can now call C functions by first padding then unpadding the stack: + +@#reader scribble/comment-reader +(racketblock +(seq pad-stack + (Call 'read_byte) + unpad-stack)) + +Signalling errors is likewise complicated and we handle it by having a +target @racket['raise_error_align] that aligns the stack using +@racket[pad-stack] before calling @racket['raise_error]. It doesn't +bother with @racket[unpad-stack] because there's no coming back. Here is the compiler for primitives that incorporates all of these stack-alignment issues, but is otherwise the same as before: diff --git a/www/notes/hoax.scrbl b/www/notes/hoax.scrbl index 8cc621d8..3c54c88e 100644 --- a/www/notes/hoax.scrbl +++ b/www/notes/hoax.scrbl @@ -23,6 +23,7 @@ @title[#:tag this-lang]{@|this-lang|: vectors and strings} +@src-code[this-lang] @emph{Stupidity, outrage, vanity, cruelty, iniquity, bad faith, falsehood - we fail to see the whole array when it is facing in the @@ -317,7 +318,7 @@ This looks a lot like the creation of a vector, however note that we @item{use @racket['eax] to write 32-bits of memory,} @item{advance the offset by 4-bytes (32-bits) on each subsequent character,} @item{write @racket[(char->integer #\a)] instead of @racket[(value->bits #\a)] into memory,} -@item{increment @racket['rbx] by 24, even though we've only written 20 bits.} +@item{increment @racket['rbx] by 24, even though we've only written 20 bytes.} ] Now let’s consider referencing elements of a string. Suppose diff --git a/www/notes/hustle.scrbl b/www/notes/hustle.scrbl index d437539f..9673e740 100644 --- a/www/notes/hustle.scrbl +++ b/www/notes/hustle.scrbl @@ -23,6 +23,7 @@ @title[#:tag this-lang]{@|this-lang|: heaps and lists} +@src-code[this-lang] @emph{A little and a little, collected together, become a great deal; the heap in the barn consists of single grains, and drop and drop @@ -140,10 +141,10 @@ primitives: (render-metafunction 𝑯-𝒑𝒓𝒊𝒎 #:contract? #t)) ] -The interpreter similarly has an update to the @racket[interp-prims] +The interpreter similarly has an update to the @racket[interp-prim] module: -@codeblock-include["hustle/interp-prims.rkt"] +@codeblock-include["hustle/interp-prim.rkt"] Inductively defined data is easy to model in the semantics and interpreter because we can rely on inductively defined data at the @@ -362,7 +363,7 @@ the address of the boxes content. Likewise with pairs. We use a register, @racket['rbx], to hold the address of the next free memory location in memory. To allocate memory, we simply increment the content of @racket['rbx] by a multiple of 8. To initialize the -memory, we just write into the memory at that location. To contruct a +memory, we just write into the memory at that location. To construct a pair or box value, we just tag the unused bits of the address. @@ -374,7 +375,7 @@ So for example the following creates a box containing the value 7: @#reader scribble/comment-reader (racketblock -(seq (Mov 'rax (arithmetic-shift 7 imm-shift)) +(seq (Mov 'rax (value->bits 7)) (Mov (Offset 'rbx 0) 'rax) ; write '7' into address held by rbx (Mov 'rax 'rbx) ; copy pointer into return register (Or 'rax type-box) ; tag pointer as a box @@ -391,28 +392,41 @@ dereferencing the memory: (Mov 'rax (Offset 'rax 0))) ; load memory into rax ) -Pairs are similar. Suppose we want to make @racket[(cons 3 4)]: +Pairs are similar, only they are represented as tagged pointers to two +words of memory. Suppose we want to make @racket[(cons 3 4)]: @#reader scribble/comment-reader (racketblock -(seq (Mov 'rax (arithmetic-shift 3 imm-shift)) - (Mov (Offset 'rbx 0) 'rax) ; write '3' into address held by rbx - (Mov 'rax (arithmetic-shift 4 imm-shift)) - (Mov (Offset 'rbx 8) 'rax) ; write '4' into word after address held by rbx +(seq (Mov 'rax (value->bits 4)) + (Mov (Offset 'rbx 0) 'rax) ; write '4' into address held by rbx + (Mov 'rax (value->bits 3)) + (Mov (Offset 'rbx 8) 'rax) ; write '3' into word after address held by rbx (Mov 'rax rbx) ; copy pointer into return register - (Or 'rax type-pair) ; tag pointer as a pair + (Or 'rax type-cons) ; tag pointer as a pair (Add 'rbx 16)) ; advance rbx 2 words ) +This code writes two words of memory and leaves a tagged pointer in +@racket['rax]. It's worth noting that we chose to write the +@racket[cdr] of the pair into the @emph{first} word of memory and the +@racket[car] into the @emph{second}. This may seem like a strange +choice, but how we lay out the memory is in some sense an arbitrary +choice, so long as all our pair operations respect this layout. We +could have just as easily done the @racket[car] first and @racket[cdr] +second. The reason for laying out pairs as we did will make things +slightly more convenient when implementing the @racket[cons] primitive +as we'll see later. + + If @racket['rax] holds a pair value, we can project out the elements by erasing the pair tag, leaving just the address of the pair contents, then dereferencing either the first or second word of memory: @#reader scribble/comment-reader (racketblock -(seq (Xor 'rax type-pair) ; erase the pair tag - (Mov 'rax (Offset 'rax 0)) ; load car into rax - (Mov 'rax (Offset 'rax 8))) ; or... load cdr into rax +(seq (Xor 'rax type-cons) ; erase the pair tag + (Mov 'rax (Offset 'rax 8)) ; load car into rax + (Mov 'rax (Offset 'rax 0))) ; or... load cdr into rax ) From here, writing the compiler for @racket[box], @racket[unbox], @@ -468,6 +482,16 @@ one: (show '(cdr x) '(x)) ] +We can now see why we chose to layout pairs with the @racket[cdr] +first and @racket[car] second. Since @racket[cons] is a binary +operation, the expression which produces the @racket[car] value will +be evaluated first and pushed on the stack. Then the expression that +produces the @racket[cdr] value will execute with its result sitting +in @racket[rax]. So at this point it's easiest to write out the +@racket[cdr] since it's already sitting in a register. Once we do +that, we can pop the @racket[car] value into @racket['rax] and write +that. Hence our choice for the layout. + @section[#:tag "hustle-run-time"]{A Run-Time for @this-lang} First, we extend our runtime system's view of values to include diff --git a/www/notes/iniquity.scrbl b/www/notes/iniquity.scrbl index 22abb3de..938c378a 100644 --- a/www/notes/iniquity.scrbl +++ b/www/notes/iniquity.scrbl @@ -26,6 +26,8 @@ @title[#:tag this-lang]{@|this-lang|: function definitions and calls} +@src-code[this-lang] + @table-of-contents[] @section[#:tag-prefix "iniquity"]{Functions} @@ -63,7 +65,7 @@ incorporating @bold{ functions}, and in particular, @bold{recursive functions}, which will allow us to compute over arbitrarily large data with finite-sized programs. -Let's call it @bold{Iniquity}. +Let's call it @bold{@|this-lang|}. We will extend the syntax by introducing a new syntactic category of @bold{programs}, which consist of a sequence of function definitions @@ -111,10 +113,10 @@ is updated to include function applications. Because of the change from a program being a single expression to a sequence, we have to update the utilities that read program files, -i.e. @tt{interp-file.rkt} and @tt{compile-file.rkt}: +i.e. @tt{interp-stdin.rkt} and @tt{compile-stdin.rkt}: -@codeblock-include["iniquity/interp-file.rkt"] -@codeblock-include["iniquity/compile-file.rkt"] +@codeblock-include["iniquity/interp-stdin.rkt"] +@codeblock-include["iniquity/compile-stdin.rkt"] @@ -122,7 +124,7 @@ i.e. @tt{interp-file.rkt} and @tt{compile-file.rkt}: @section[#:tag-prefix "iniquity"]{An Interpreter for Functions} -Writing an interpreter for Inquity is not too hard. The main idea is +Writing an interpreter for @|this-lang| is not too hard. The main idea is that the interpretation of expression is now parameterized by a set of function definitions from the program. It serves as a second kind of environment that gets passed around and is used to resolve function @@ -158,8 +160,8 @@ We can try it out: @ex[ (interp (parse - '[(define (double x) (+ x x)) - (double 5) ])) + '(define (double x) (+ x x)) + '(double 5))) ] We can see it works with recursive functions, too. Here's a recursive @@ -168,12 +170,12 @@ function for computing triangular numbers: @ex[ (interp (parse - '[(define (tri x) - (if (zero? x) - 0 - (+ x (tri (sub1 x))))) + '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) - (tri 9)])) + '(tri 9))) ] We can even define mutually recursive functions such as @racket[even?] @@ -182,20 +184,20 @@ and @racket[odd?]: @ex[ (interp (parse - '[(define (even? x) - (if (zero? x) - #t - (odd? (sub1 x)))) + '(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) - (define (odd? x) - (if (zero? x) - #f - (even? (sub1 x)))) - (even? 101)]))] + '(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + '(even? 101)))] And the utility for interpreting programs in files works as well: -@shellbox["racket -t interp-file.rkt -m example/len.rkt"] +@shellbox["cat example/len.rkt | racket -t interp-stdin.rkt -m"] @section[#:tag-prefix "iniquity"]{Conventions of Calling} @@ -547,56 +549,6 @@ parameters. Touching up the code, we compile function definitions as: (Ret)) ) -There is another issue which we must deal with: the aligning of the -stack. As we saw when added code to call C functions, we are required -to align the stack to 16-bytes (or 2 64-bit words) when making a call. - -Because function definitions may include expressions that make such -calls to C, we need to make sure the stack is aligned before doing so. -And because a user-defined function may be called from a point where -the stack is aligned @emph{or} where the stack is off by 8-bytes---in -fact the same function may be called from @emph{both}---it will be the -responsibility of the caller to align the stack for the function. - -@#reader scribble/comment-reader -(racketblock -(let ((r (gensym 'ret))) - (seq (pad-stack c) - (Lea rax r) - (Push rax) - (compile-es es (static-pad (cons #f c))) - (Jmp (symbol->label f)) - (Label r) - (unpad-stack c))) - ) - -The @racket[pad-stack] and @racket[unpad-stack] are functions we used -when implementing calls to C functions. Recall that on entry to the -code our compiler generates, the stack is off by one word (because of -the call to @racket['entry] from the run-time system). If the static -environment's length is even, it means we've pushed an even number of -elements on the stack, and so it remains misaligned by one word. On -the other hand if the static environment is odd, an odd number of -pushes have occurred, so the stack is aligned. Hence, -@racket[pad-stack] decrements @racket['rsp] by @racket[8] when -@racket[c] is of even length and does nothing otherwise; -@racket[unpad-stack] undoes the padding by incrementing when -@racket[c] is even. - -The @racket[static-pad] function does something similar but instead of -modifying the run-time static, it works on the compile-time -environment, adding an extra frame whenever the stack would be padded -for alignment purposes: - -@#reader scribble/comment-reader -(racketblock -;; CEnv -> CEnv -(define (static-pad c) - (if (odd? (length c)) - (cons #f c) - c)) -) - Now writing the complete definitions for @racket[compile-define] and @racket[compile-app], we have: @@ -614,13 +566,11 @@ Now writing the complete definitions for @racket[compile-define] and ;; Id [Listof Expr] CEnv -> Asm (define (compile-app f es c) (let ((r (gensym 'ret))) - (seq (pad-stack c) - (Lea rax r) + (seq (Lea rax r) (Push rax) - (compile-es es (static-pad (cons #f c))) + (compile-es es (cons #f c)) (Jmp (symbol->label f)) - (Label r) - (unpad-stack c)))) + (Label r)))) ) @@ -660,7 +610,7 @@ Using this function, we can touch up our code: ) -@section[#:tag-prefix "iniquity"]{A Compiler for Iniquity} +@section[#:tag-prefix "iniquity"]{A Compiler for @|this-lang|} The last piece of the puzzle is the function for emitting code for a complete program: @@ -702,10 +652,9 @@ single list: Here's an example of the code this compiler emits: @ex[ -(displayln - (asm-string - (compile - (parse '[(define (double x) (+ x x)) (double 5)])))) +(asm-display + (compile + (parse '(define (double x) (+ x x)) '(double 5)))) ] And we can confirm running the code produces results consistent with @@ -713,27 +662,27 @@ the interpreter: @ex[ (current-objs '("runtime.o")) -(define (run p) - (asm-interp (compile (parse p)))) - -(run '[(define (double x) (+ x x)) - (double 5)]) - -(run '[(define (tri x) - (if (zero? x) - 0 - (+ x (tri (sub1 x))))) - (tri 9)]) - -(run '[(define (even? x) - (if (zero? x) - #t - (odd? (sub1 x)))) - (define (odd? x) - (if (zero? x) - #f - (even? (sub1 x)))) - (even? 101)]) +(define (run . p) + (bits->value (asm-interp (compile (apply parse p))))) + +(run '(define (double x) (+ x x)) + '(double 5)) + +(run '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) + '(tri 9)) + +(run '(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) + '(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + '(even? 101)) ] The complete compiler code: diff --git a/www/notes/jig.scrbl b/www/notes/jig.scrbl index 88f9315e..f5be1158 100644 --- a/www/notes/jig.scrbl +++ b/www/notes/jig.scrbl @@ -16,7 +16,11 @@ @(for-each (λ (f) (ev `(require (file ,f)))) '("interp.rkt" "compile.rkt" "ast.rkt" "parse.rkt" "types.rkt")) -@title[#:tag "Jig"]{Jig: jumping to tail calls} +@(define this-lang "Jig") + +@title[#:tag this-lang]{@|this-lang|: jumping to tail calls} + +@src-code[this-lang] @table-of-contents[] @@ -88,15 +92,15 @@ the function still needs to be applied, but the body of the function, The significance of tail position is relevant to the compilation of calls. Consider the compilation of a call as described in -@secref{Iniquity}: arguments are pushed on the call stack, then the -@racket['call] instruction is issued, which pushes the address of the -return point on the stack and jumps to the called position. When the -function returns, the return point is popped off the stack and jumped -back to. +@secref{Iniquity}: a return address is pushed on the stack, +arguments are pushed on the call stack, then the +@racket[Jmp] instruction is issued, which jumps to the called function. +The function computes its result, pops its arguments, the pops the return +address and jumps back to the caller. But if the call is in tail position, what else is there to do? Nothing. So after the call, return transfers back to the caller, who -then just returns itself. +then just pops their arguments and returns to their caller. This leads to unconditional stack space consumption on @emph{every} function call, even function calls that don't need to consume space. @@ -141,7 +145,8 @@ the overall call to @racket[sum]. There's no need for a new return point and there's no need to keep the local binding of @racket[b] since there's no way this program can depend on it after the recursive call. Instead of pushing a new, useless, return point, we should make -the call with whatever the current return point. This is the idea of +the call with whatever the current return address is, because that's +where control is going to jump to anyway. This is the idea of @tt{proper tail calls}. @bold{An axe to grind:} the notion of proper tail calls is often @@ -184,105 +189,550 @@ re-write the interpreter, but as it is, we're already done. @section[#:tag-prefix "jig"]{A Compiler with Proper Tail Calls} -The compiler requires a bit more work, because of how the @tt{Call} instruction -is implemented in the hardware itself, we always use a little bit of stack -space each time we execute a function call. Therefore, in order to implement -tail-calls correctly, we need to @emph{avoid} the @tt{Call} instruction! - -How do we perform function calls without the @tt{Call} instruction, well we're -going to have to do a little bit of extra work in the compiler. First, let's -remind ourselves of how a `normal' function call works (we'll just look at the -case where we don't have to adjust for alignment): +Consider the following program: @#reader scribble/comment-reader (racketblock -(define (compile-app f es c) +(define (f x) + (if (zero? x) + 42 + (f (sub1 x)))) +(f 100) +) + +It's a silly program, but it will help illuminate what tail calls are +all about and how we can make them work. - ; Generate the code for each argument - ; and push each on the stack - (seq (compile-es es c) +Here's what this code will compile to, roughly: - ; Generate the instruction for calling the function itself - (Call (symbol->label f)) +@(void (ev '(current-objs '()))) - ; `pop` all of the arguments off of the stack - (Add rsp (* 8 (length es))))) +@#reader scribble/comment-reader +(ex +(asm-interp + (seq (Global 'entry) + (Label 'entry) + + ;; calling (f 100), so set up return address, + ;; push argument, then jump + (Lea 'rax 'r1) + (Push 'rax) + (Mov 'rax 100) + (Push 'rax) + (Jmp 'f) + (Label 'r1) + + ;; done with (f 100), return + (Ret) + + ;; (define (f x) ...) + (Label 'f) + (Mov 'rax (Offset 'rsp 0)) + (Cmp 'rax 0) + (Jne 'if_false) + + ;; if-then branch + (Mov 'rax 42) + (Jmp 'done) + + ;; if-else branch + (Label 'if_false) + ;; calling (f (sub1 x)), so set up return address, + ;; push argument, then jump + (Lea 'rax 'r2) + (Push 'rax) + (Mov 'rax (Offset 'rsp 8)) + (Sub 'rax 1) + (Push 'rax) + (Jmp 'f) + (Label 'r2) + + (Label 'done) + (Add 'rsp 8) ; pop x + (Ret))) ) -The first insight regards what the stack will look like once we are -@emph{inside the function we are calling}. Upon entry to the function's code, -@tt{rsp} will point to the return address that the last @tt{Call} instruction -pushed onto the stack, with the arguments to the function at positive offsets -to @tt{rsp}. As long as we ensure that this is the case we don't @emph{have} to -call functions with @tt{Call}. - -The second insight is what we mentioned above, when describing tail calls -themselves: If we're performing a call in the tail position then there is -nothing else to do when we return. So instead of returning here, we can return -to the @emph{previous} call, we can overwrite the current environment on the -stack, since we won't need it (there's nothing else to do, after all). In -jargon: we can @emph{reuse the stack frame}. The only thing we have to -be careful about is whether the current environment is `big enough' to -hold all of the arguments for our function call, since we are going -to reuse it, we'll want to make sure there's enough space. - -For now assume we've performed that check and that there is enough space. -Let's go through the process bit by bit: +Now let's think about how this computes, paying attention to the stack. + +First, the run-time system would call @racket['entry], so there's +going to be an address on the stack telling us where to return to when +the program is done: + +@verbatim|{ + + ---------------------+ +rsp ---> | return to runtime | + +----------------------+ +}| + +Next, the call to @racket[(f 100)] is set up, pushing the address of +@racket['r1] for where the call should return to, and then pushing the +argument, @racket[100]. So before the @racket[Jmp] to @racket['f], +the stack looks like: + +@verbatim|{ + + ---------------------+ + | return to runtime | + +----------------------+ + | return to r1 | + +----------------------+ +rsp ---> | x : 100 | + +----------------------+ +}| + +Control jumps to @racket['f], which asks if @racket[x] is 0 by +referencing the argument on the top of the stack. It is not, so +control jumps to the @racket['if_false] label, which now sets up the +call @racket[(f (sub1 x))] by computing a return address for +@racket['r2], pushing it, subtracting @racket[1] from @racket[x], and +pushing that, then jumping to @racket['f]. + +Now the stack looks like: + +@verbatim|{ + + ---------------------+ + | return to runtime | + +----------------------+ + | return to r1 | + +----------------------+ + | x : 100 | + +----------------------+ + | return to r2 | + +----------------------+ +rsp ---> | x : 99 | + +----------------------+ +}| + +This asks if @racket[x] is 0 by referencing the argument on the top of +the stack (now: @racket[99]). It is not, so control jumps to the +@racket['if_false] label, which now sets up the call @racket[(f +(sub1 x))] by computing a return address for @racket['r2], pushing it, +subtracting @racket[1] from @racket[x], and pushing that, then jumping +to @racket['f]. + +@verbatim|{ + + ---------------------+ + | return to runtime | + +----------------------+ + | return to r1 | + +----------------------+ + | x : 100 | + +----------------------+ + | return to r2 | + +----------------------+ + | x : 99 | + +----------------------+ + | return to r2 | + +----------------------+ +rsp ---> | x : 98 | + +----------------------+ +}| + +You can see where this is going. + +@verbatim|{ + + ---------------------+ + | return to runtime | + +----------------------+ + | return to r1 | + +----------------------+ + | x : 100 | + +----------------------+ + | return to r2 | + +----------------------+ + | x : 99 | + +----------------------+ + | return to r2 | + +----------------------+ + | x : 98 | + +----------------------+ + | . | + | . | + | . | + +----------------------+ + | return to r2 | + +----------------------+ + | x : 1 | + +----------------------+ + | return to r2 | + +----------------------+ +rsp ---> | x : 0 | + +----------------------+ +}| + +At this point, we make a final jump to @racket['f]. Since @racket[x] +is @racket[0], @racket[42] is moved into @racket['rax], control jumps +tp @racket['done], at which point we pop the current @racket[x] off +the stack, then return, which pops off the next frame of the stack. +Since that frame says to jump to @racket['r2], that's where control +jumps to. + +But @racket['r2] is the same as @racket['done]! So we pop off the +current @racket[x] (now: @racket[1]) and return, which pops of the +next frame saying jump to @racket['r2]. + +This process continues, popping two frames and jumping back to +@racket['r2] until the stack looks like: + +@verbatim|{ + + ---------------------+ + | return to runtime | + +----------------------+ + | return to r1 | + +----------------------+ +rsp ---> | x : 100 | + +----------------------+ +}| + +And we're back at @racket['r2]. Next we pop the current @racket[x] +(now: 100) and return, which pops the top frame off and jumps to +@racket['r1]. But the code following @racket['r1] is simply a +@racket[Ret] intruction, so we pop another frame (the stack is now +empty) and jump back to the runtime system (with @racket['rax] holding +@racket[42]). + +So to summarize, each call to @racket[f] pushes two words on the +stack: one for where to return and one for the argument to the +function. Then, when the base case is finally reached, we +spin through a loop popping all this information off. + +Let's take another look at the function: @#reader scribble/comment-reader (racketblock -;; Variable (Listof Expr) CEnv -> Asm -;; Compile a call in tail position -(define (compile-tail-call f es c) - (let ((cnt (length es))) - - ; Generate the code for the arguments to the function, - ; pushing them on the stack, this is no different - ; than a normal call - (seq (compile-es es c) - - - ; Now we _move_ the arguments from where they are on the - ; stack to where the _previous_ values in the environment - ; the function `move-args` takes the number of values we - ; have to move, and the number of stack slots that we have to - ; move them. - (move-args cnt (+ cnt (in-frame c))) - - ; Once we've moved the arguments, we no longer need them at the - ; top of the stack. This is a big part of the benefit for - ; tail-calls - (Add rsp (* 8 (+ cnt (in-frame c)))) - - ; Now that `rsp` points to the _previous_ return address, - ; and the arguments are at a positive offset of `rsp`, - ; we no longer need the `call` instruction (in fact, it would - ; be incorrect to use it!), instead we jump to the function - ; directly. - (Jmp (symbol->label f))))) +(define (f x) + (if (zero? x) + 42 + (f (sub1 x)))) +) + +In the call to @racket[(f (sub1 x))], that expression is in a tail +position of the function. Intuitively this means once you've computed +the result of @racket[(f (sub1 x))] there's nothing further to +compute, you now have the answer for @racket[(f x)]. This suggests +that you don't need to keep the current binding for @racket[x] on the +stack; if there's no further work to do, you can't possibly need +@racket[x]. It also suggests there's no need to return to the point +after @racket[(f (sub1 x))]; you could instead just return to the +caller of @racket[(f x)]! + +We can modify the code to embody these ideas: + +@#reader scribble/comment-reader +(ex +(asm-interp + (seq (Global 'entry) + (Label 'entry) + + ;; calling (f 100), so set up return address, + ;; push argument, then jump + (Lea 'rax 'r1) + (Push 'rax) + (Mov 'rax 100) + (Push 'rax) + (Jmp 'f) + (Label 'r1) + + ;; done with (f 100), return + (Ret) + + ;; (define (f x) ...) + (Label 'f) + (Mov 'rax (Offset 'rsp 0)) + (Cmp 'rax 0) + (Jne 'if_false) + + ;; if-then branch + (Mov 'rax 42) + (Jmp 'done) + + ;; if-else branch + (Label 'if_false) + ;; TAIL calling (f (sub1 x)), + ;; so pop off the argument (don't need it anymore) + ;; and don't push a new return address, just leave + ;; our caller's return address on stack + (Mov 'rax (Offset 'rsp 0)) + (Sub 'rax 1) + (Add 'rsp 8) ; pop x + (Push 'rax) ; push arg + (Jmp 'f) + + (Label 'done) + (Add 'rsp 8) ; pop x + (Ret))) +) + +Let's step through the computation again. It starts off the same: the +runtime calls @racket['entry], which sets up the call to @racket[(f +100)], so when control jumps to @racket['f], the stack again looks +like: + +@verbatim|{ + + ---------------------+ + | return to runtime | + +----------------------+ + | return to r1 | + +----------------------+ +rsp ---> | x : 100 | + +----------------------+ +}| + +The checks if @racket[x] is @racket[0], which it is not, so jumps to +@racket['if_false]. Now the code computes @racket[x-1] and then pops +@racket[x], and pushes @racket[x-1], so when we jump to @racket['f], +the stack looks like: + +@verbatim|{ + + ---------------------+ + | return to runtime | + +----------------------+ + | return to r1 | + +----------------------+ +rsp ---> | x : 99 | + +----------------------+ +}| + +Again we go through the same instructions, popping @racket[x] and +pushing @racket[x-1], then jumping to @racket['f] with stack: + +@verbatim|{ + + ---------------------+ + | return to runtime | + +----------------------+ + | return to r1 | + +----------------------+ +rsp ---> | x : 98 | + +----------------------+ +}| + +This continues, but the stack never grows further, until finally jumping to @racket['f] +with the stack: + +@verbatim|{ + + ---------------------+ + | return to runtime | + +----------------------+ + | return to r1 | + +----------------------+ +rsp ---> | x : 0 | + +----------------------+ +}| + +At which point, @racket[42] is moved in to @racket['rax] and control +jumps to @racket['done], where @racket[x] is popped and then +@racket[Ret] pops the next frame and jumps to @racket['r1], which +issues another @racket[Ret], popping the stack (now empty) and jumping +back to the runtime system. + +So this program makes 100 calls to @racket[f], but it uses a constant +amount of stack space. Indeed, we could've made it @racket[(f 200)] +and it would still use the same three stack frames. The program +is really computing a loop. + +Moreover, we can do one better: notice that the initial call +@racket[(f 100)] is itself in a tail position: whatever it's result is +is the result of the whole program. We can turn this in to a tail +call: + +@#reader scribble/comment-reader +(ex +(asm-interp + (seq (Global 'entry) + (Label 'entry) + + ;; TAIL calling (f 100), + ;; no args to pop + ;; don't push a new return address, just leave + ;; our caller's return address on stack + (Mov 'rax 100) + (Push 'rax) + (Jmp 'f) + + ;; No need for this since we never come back: + ;; (Ret) + + ;; (define (f x) ...) + (Label 'f) + (Mov 'rax (Offset 'rsp 0)) + (Cmp 'rax 0) + (Jne 'if_false) + + ;; if-then branch + (Mov 'rax 42) + (Jmp 'done) + + ;; if-else branch + (Label 'if_false) + ;; TAIL calling (f (sub1 x)), + ;; so pop off the argument (don't need it anymore) + ;; and don't push a new return address, just leave + ;; our caller's return address on stack + (Mov 'rax (Offset 'rsp 0)) + (Sub 'rax 1) + (Add 'rsp 8) ; pop x + (Push 'rax) ; push arg + (Jmp 'f) + + (Label 'done) + (Add 'rsp 8) ; pop x + (Ret))) ) -@tt{move-args} is defined below: +Now the stack looks like this: + +@verbatim|{ + + ---------------------+ + | return to runtime | + +----------------------+ +rsp ---> | x : 100 | + +----------------------+ +}| + +decrementing until @racket[x] reaches @racket[0] at which point +@racket[42] is put in @racket['rax] and control jumps back to the +runtime system. + +In general, when a function call @racket[(_f _e0 ...)] is in tail +position, there are going to be some number of things currently pushed +on the stack, which are described by the current environment +@racket[_c]. To carry out a tail call, we need to pop all of those +things described by @racket[_c], then push the values of @racket[_e0 +...] which are the arguments for @racket[_f], then jump to +@racket[_f]. + +There is a problem here, which is that we need to evaluate the +subexpressions @racket[_e0 ...] and doing so may depend on things in +the current environment, e.g. they may reference bound variables. + +So we have to wait to pop the things described by @racket[_c] until +@emph{after} evaluating @racket[_e0 ...], but evaluating @racket[_e0 +...] will need to save the values somewhere... and that somewhere is +the stack. + +Let's say we have an expression that looks like this: + +@#reader scribble/comment-reader +(racketblock +(let ((x 1)) + (let ((y 2)) + (f (+ x y) 5)))) + +The call to @racket[f] is in tail position and it will be compiled in +a compile-time environment of @racket['(y x)]. The compiler will need +to compile @racket[(+ x y)] in that same environment, but then emit code +to save the result on the stack while the next argument is evaluated. + +That means by the time the arguments are evaluated and the call is +ready to be made, the stack will look like: + +@verbatim|{ + + ---------------------+ + | return address | + +----------------------+ + | x : 1 | + +----------------------+ + | y : 2 | + +----------------------+ + | 3 | + +----------------------+ +rsp ---> | 5 | + +----------------------+ +}| + +At which point we need to remove the @racket[x] and @racket[y] part, +but then also have the arguments @racket[3] and @racket[5] sitting just +below the return address, i.e. we want: + +@verbatim|{ + + ---------------------+ + | return address | + +----------------------+ + | 3 | + +----------------------+ +rsp ---> | 5 | + +----------------------+ +}| + +To accomplish, we rely on the following helper function for generating +code that moves arguments on the stack: @#reader scribble/comment-reader (racketblock ;; Integer Integer -> Asm -;; Move i arguments upward on stack by offset off -(define (move-args i cnt) - (match i - [0 (seq)] - [_ (seq - ; mov first arg to temp reg - (Mov r9 (Offset rsp (* 8 (sub1 i)))) - ; mov value to correct place on the old frame - (Mov (Offset rsp (* 8 (+ i cnt))) r9) - ; Now do the next one - (move-args (sub1 i) cnt))])) +(define (move-args i off) + (cond [(zero? off) (seq)] + [(zero? i) (seq)] + [else + (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) + (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) + (move-args (sub1 i) off))])) ) -The entire compiler will be illuminated for seeing how we keep track of which -expressions are in a tail-call position and whether we have enough space to -re-use the stack frame. +It moves @racket[i] elements on the stack up @racket[off]-positions. +So if you have @racket[(length _c)] items in the environment and +@racket[_n] arguments, then @racket[(move-args _n (length _c))] will +move the arguments the right spot, just below the return address. + +Once the arguments are moved to the proper spot on the stack, we can +pop off the local environment and jump. So the complete code for +compiling a tail call is: + +@#reader scribble/comment-reader +(racketblock +;; Id [Listof Expr] CEnv -> Asm +(define (compile-app-tail f es c) + (seq (compile-es es c) + (move-args (length es) (length c)) + (Add rsp (* 8 (length c))) + (Jmp (symbol->label f)))) +) + +What's left is determining @emph{when} to use this strategy for a +function application and when to use the prior version which pushes a +return pointer. + +The way we do this is to add a parameter to the expression compiler, +so the new signature is: + +@#reader scribble/comment-reader +(racketblock +;; Expr CEnv Bool -> Asm +(define (compile-e e c t?) + ...) +) + +Calling @racket[(compile-e _e _c #t)] signals that the expression +@racket[_e] should be compiled assuming it is in tail position, while +@racket[(compile-e _e _c #f)] signals it is not in tail position. + +If @racket[_e] is an application, then the compiler selects between +@racket[compile-app-nontail] and @racket[compile-app-tail] based on +@racket[t?]. + +If @racket[_e] is any other kind of expression that has +sub-expressions, then the compiler function for that form also adds a +@racket[t?] parameter and sets @racket[t?] to @racket[#f] for any that +are not tail positions and passes on the @racket[t?] given for those +in tail position. For example, here is how @racket[begin] is +compiled: + +@#reader scribble/comment-reader +(racketblock +;; Expr Expr CEnv Bool -> Asm +(define (compile-begin e1 e2 c t?) + (seq (compile-e e1 c #f) + (compile-e e2 c t?))) +) + +There are two important places where @racket[t?] is seeded to @racket[#t]: + +@itemlist[ +@item{The top-level expression is in tail position.} +@item{The body of every function is in tail position.} +] + + +The complete compiler: @codeblock-include["jig/compile.rkt"] diff --git a/www/notes/juvie.scrbl b/www/notes/juvie.scrbl index aa644e92..3870ffb5 100644 --- a/www/notes/juvie.scrbl +++ b/www/notes/juvie.scrbl @@ -13,7 +13,11 @@ @;(for-each (λ (f) (ev `(require (file ,(path->string (build-path notes "iniquity" f)))))) @; '("interp.rkt" "ast.rkt" "parse.rkt" "compile.rkt" "asm/interp.rkt" "asm/printer.rkt")) -@title[#:tag "Juvie"]{Juvie: cleaning up after your mess} +@(define this-lang "Juvie") + +@title[#:tag this-lang]{@|this-lang|: cleaning up after your mess} + +@src-code[this-lang] @emph{Many a man fails to become a thinker for the sole reason that his memory is too good.} diff --git a/www/notes/knock.scrbl b/www/notes/knock.scrbl index 55f5e93a..3fac688a 100644 --- a/www/notes/knock.scrbl +++ b/www/notes/knock.scrbl @@ -6,181 +6,691 @@ scribble/examples "utils.rkt" "ev.rkt" + "../fancyverb.rkt" "../utils.rkt") @(define codeblock-include (make-codeblock-include #'h)) +@(define (shellbox . s) + (parameterize ([current-directory (build-path notes "knock")]) + (filebox (emph "shell") + (fancyverbatim "fish" (apply shell s))))) + + @(ev '(require rackunit a86)) @(ev `(current-directory ,(path->string (build-path notes "knock")))) @(void (ev '(with-output-to-string (thunk (system "make runtime.o"))))) +@(ev '(current-objs '("runtime.o"))) @(for-each (λ (f) (ev `(require (file ,f)))) '("interp.rkt" "compile.rkt" "ast.rkt" "parse.rkt" "types.rkt")) -@title[#:tag "Knock"]{Knock: first-class function (pointers)} +@(define this-lang "Knock") + +@title[#:tag this-lang]{@|this-lang|: pattern matching} + +@src-code[this-lang] @table-of-contents[] -@section[#:tag-prefix "knock"]{First-class function (pointers)} +@section[#:tag-prefix "knock"]{Matching} + +One feature we've taken advantage extensively in the writing of our +compilers is the Racket's @racket[match] facility for pattern +matching. + +Let's add a similar feature to our own language. + +We'll call it @bold{@this-lang}! + +In @|this-lang|, we will support a limited form of pattern matching of +the form: + +@racketblock[ +(match _e + [_p0 _e0] + ...) +] + +A pattern matching expression is used to perform case-analysis, +deconstruction, and binding on the value produced by @racket[_e]. A +match consists of any number of clauses, where each clause consists of +a pattern @racket[_pi] and expression @racket[_ei]. Each @racket[_pi] +is a @emph{pattern}, which can include literal booleans, characters, +integers, and the empty list, or can be a pattern variable, a +wildcard, a @racket[cons]-pattern, or a @racket[and]-pattern. Clauses +are matched in the order in which they appear and if a pattern matches +the value of @racket[_e], then the corresponding expression is +evaluated in an environment that binds any pattern variables to the +matching parts of @racket[_e]'s value. If no patterns match +@racket[_e]'s value, an error is signalled. + +The syntax is extended as follows: -With Iniquity and Jig, we have introduced functions and function -calls, but functions are second-class language mechanisms: functions -are not values. They cannot be computed. They cannot be stored in a -list or box. They cannot be passed as arguments are returned as -results of other functions. +@codeblock-include["knock/ast.rkt"] -This is too bad since so many program designs depend on the idea of -computation-as-a-value at the heart of functional and object-oriented -programming. -Let's now remedy this problem by making functions first-class values -in our language. We'll call it @bold{Knock}. +@section[#:tag-prefix "knock"]{Match by Example} +Since we've been using pattern matching throughout the course, it +probably is pretty natural at this point, but let's quickly walk +through some examples to try and disentangle the different aspects of +@racket[match]. -We add to the syntax two new forms, one for reference functions and -one for calling a function where the function position is an arbitrary -expression (that should evaluate to a function): +Perhaps the simplest form of a @racket[match]-expression uses a +pattern that just consists of a variable, e.g. -@verbatim|{ -;; type Expr = -;; | .... -;; | (Fun Id) -;; | (Call Expr (Listof Expr)) -}| +@racketblock[ +(match _e + [x _e0])] -These new syntactic forms are temporary forms that don't correspond -anything in Racket but make it a bit easier to present how first-class -functions work. The @racket[(fun _f)] form is a reference to a -function @racket[_f], which is defined in the program. The -@racket[(call _e0 _es ...)] form is a function call where the function -position, @racket[_e0] is an arbitrary expression that should produce -a function value. +This expression is equivalent to @racket[(let ((x _e)) _e0)] because a +pattern variable matches any value and binds that name in the scope of +its right-hand expression. We can see from this example that +@racket[match] is doing variable binding. -We will end up eliminating them in future versions of the compiler; -they are simply a crutch for now. +Relatedly, a ``wildcard'' pattern can be used to match anything +@emph{without} binding the value to a name: -@section[#:tag-prefix "knock"]{A Compiler with Function pointers} +@racketblock[ +(match _e + [_ _e0])] -The main idea in making functions into values is we will need to have -a representation of functions. We will use a representation similar -to boxes: functions will be heap allocated data structures. What will -be stored in the heap? The address of the label of the function. +This expression is equivalent to @racket[(begin _e _e0)]. -A function reference, @racket[(fun _f)], will allocate a 64-bit -segment of the heap, store the location of the function's label, -i.e. a pointer to the instructions for the function, and tag the -pointer as a ``procedure value,'' which is new, disjoint kind of -value. +Another simple form of pattern is to use a literal such as an integer, +character, etc. which matches when the value is the same as the +literal. This form of pattern doesn't bind any names, but is used to +discriminate between different cases of what the value may be. For +example: + +@racketblock[ +(match _e + [#f _e1] + [_ _e2])] + +This expression is equivalent to @racket[(if _e _e2 _e1)]. Here we can +see that @racket[match] is doing conditional evaluation, selecting +@racket[_e1] if @racket[_e] produces @racket[#f], and selecting +@racket[_e2] otherwise. + +A more complicated pattern involves a constructor-style pattern like +@racket[cons]: + +@racketblock[ +(match _e + [(cons x y) _e1] + [_ _e2]) +] + +Here, the @racket[cons] pattern is both discriminating between +@racket[cons] and non-@racket[cons] values, matching only when +@racket[_e] is a pair, but also binding the names @racket[x] and +@racket[y] to the components of the pair when the value is in fact a +pair; these names are bound in the scope of @racket[_e1]. In this +way, the pattern is used to @emph{destructure} compound values such as +pair. + +The @racket[x] and @racket[y] in this example are actually just +instances of patterns themselves, and patterns can be nested +arbitrarily deep. So for example, if we wanted only to match +a pair containing @racket[1] and @racket[2], we could write: + +@racketblock[ +(match _e + [(cons 1 2) _e1] + [_ _e2])] + +The @racket[and]-pattern is used to match the conjunction of two +patterns, so @racket[(and _p1 _p2)] matches whenever @racket[_p1] and +@racket[_p2] both match and binds all of the names in @racket[_p1] and +@racket[_p2]. For example, + +@racketblock[ +(match _e + [(and (cons 1 x) (cons y 2)) _e1] + [_ _e2]) +] + +The first clause matches when @racket[_e] evaluates to @racket[(cons 1 +2)] and binds the name @racket[x] to @racket[2] and @racket[y] to +@racket[1] in the scope of @racket[_e1]. + + +Here are some complete examples and how they are parsed: + +@ex[ +(parse-e '(match z [x x])) +(parse-e '(match z [_ #t])) +(parse-e '(match z [1 #t])) +(parse-e '(match z [1 #t] [2 #f])) +(parse-e '(match z [(cons x y) #t])) +(parse-e '(match z [(cons 1 2) #t])) +(parse-e '(match z [(and (cons x 2) (cons 1 y)) #t])) +(parse-define + '(define (length xs) + (match xs + ['() 0] + [(cons x xs) + (add1 (length xs))]))) +] + +@section[#:tag-prefix "knock"]{An Interpreter for Pattern Matching} + +At the heart of interpreter for @this-lang is the function: @#reader scribble/comment-reader (racketblock -;; Id -> Asm -(define (compile-fun f) - ; Load the address of the label into rax - (seq (Lea rax (symbol->label f)) - ; Copy the value onto the heap - (Mov (Offset rbx 0) rax) - ; Copy the heap address into rax - (Mov rax rbx) - ; Tag the value as a proc - (Or rax type-proc) - ; Bump the heap pointer - (Add rbx 8))) +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) ...) ) -A function call, @racket[(call _e0 _es ...)] will evaluate on the -subexpressions. The @racket[_e0] expression should produce a -function, i.e. tagged pointer. We can erase the tag to compute the -address in the heap. Dereferencing that location, gets us the label -address, which can then jump to. +This function takes a single pattern and value, along with an +environment, and determines whether the pattern matches the value, and +if so, an environment that binds the variables in the pattern to the +sub-parts of the value that match. If the pattern doesn't match, +@racket[#f] is produced. + +So for example, if the pattern is simply a variable @racket[x], the +function produces @racket[r] extended to bind @racket[x] to +@racket[v]. If the pattern is a wildcard, it produces @racket[r], +indicating a match, but with no new bindings. Likewise, if the +pattern is a literal, it produces @racket[r] when the value is the +same as the literal. The more interesting cases are of @racket[cons]- +and @racket[and]-patterns which recursively match the sub-patterns. + + +It's important to see that this function's return type is +communicating multiple things at the same time. If the pattern +doesn't match, it produces @racket[#f]. If it produces an +environment, it means the pattern matched @emph{and} the environment +communicates the binding of the pattern variables to values. + +Let's consider some examples: +@ex[ +(interp-match-pat (Var '_) 99 '()) +] + +Here the pattern matches, but binds no variables so the result is the +same environment as given. + +@ex[ +(interp-match-pat (Var 'x) 99 '()) +] + +Here the pattern matches and binds @racket[x] to @racket[99], which is +reflected in the output environment. + +@ex[ +(interp-match-pat (Lit 99) 99 '()) +] + +Here the pattern matches but binds nothing. + +@ex[ +(interp-match-pat (Lit 100) 99 '()) +] + +Here the pattern doesn't match. + + +@ex[ +(interp-match-pat (Conj (Lit 99) (Var 'x)) 99 '()) +] + +Here the pattern matches and binds @racket[x] to @racket[99]. -Similar to `compile-app` from Iniquity, we have to be concerned about 16-byte -alignment for `rsp`. However, the wrinkle is that we also have the function -pointer on the stack, so we have to do the calculation with an `extended` env: -`env`: +@ex[ +(interp-match-pat (Conj (Lit 100) (Var 'x)) 99 '()) +] + +Here the pattern doesn't match. + +@ex[ +(interp-match-pat (Cons (Var 'x) (Var 'y)) 99 '()) +] + +Here the pattern doesn't match. + +@ex[ +(interp-match-pat (Cons (Var 'x) (Var 'y)) (cons 99 100) '()) +] + +Here the pattern matches and binds @racket[x] to @racket[99] and +@racket[y] to @racket[100]. + +As you can see, the patterns can be nested arbitrarily deep but the +environment produced will bind each variable to the appropriate +sub-part of the given value: + +@ex[ +(interp-match-pat (Cons (Cons (Var 'x) (Var 'y)) + (Cons (Var 'p) (Var 'q))) + (cons (cons 99 100) + (cons #t #f)) + '()) +] + +The complete code for @racket[interp-match-pat] is: @#reader scribble/comment-reader (racketblock -(define (compile-fun-call e0 es c) - (let ((d (length es)) - (env (cons #f c))) - ; We have to computer the function pointer either way. - (seq (compile-e e0 c) - (assert-proc rax) - (Push rax) - - ; Then we worry about alignment - (if (even? (+ d (length env))) - - ; We will be 16-byte aligned - (seq (compile-es es env) - (Mov rax (Offset rsp (* 8 d))) - (Xor rax type-proc) - (Call (Offset rax 0)) - (Add rsp (* 8 (add1 d)))) - - ; We won't be 16-byte aligned, and need to adjust `rsp` - (seq (Sub rsp 8) - (compile-es es env) - (Mov rax (Offset rsp (* 8 (add1 d)))) - (Xor rax type-proc) - (Call (Offset rax 0)) - ; pop arguments, padding, and function pointer - (Add rsp (* 8 (+ 2 d)))))))) +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(Var '_) r] + [(Var x) (ext r x v)] + [(Lit l) (and (eqv? l v) r)] + [(Box p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(Cons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(Conj p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) +) + +With @racket[interp-match-pat], we can then build up the function for +interpreting a @racket[match] expression: + +@#reader scribble/comment-reader +(racketblock +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) ...) ) -A tail call version of the above can be defined as: +This function traverses the patterns in order until finding one that +matches (using @racket[interp-match-pat]) and then evaluating the +corresponding right-hand expression in the environment that +@racket[interp-match-pat] produced. If it runs out of clauses without +finding a matching, it produces an error. + +It's fairly straightforward: @#reader scribble/comment-reader (racketblock -;; Variable (Listof Expr) CEnv -> Asm -;; Compile a call in tail position -(define (compile-tail-fun-call f es c) - (let ((cnt (length es))) - (seq (compile-e f c) - (assert-proc rax) - (Push rax) - (compile-es es (cons #f c)) - (move-args cnt (+ cnt (add1 (in-frame c)))) - (Mov rax (Offset rsp (* 8 cnt))) - (Xor rax type-proc) - (Add rsp (* 8 (+ cnt (add1 (in-frame c))))) - (Jmp (Offset rax 0))))) +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) +) + +The complete interpreter: + +@codeblock-include["knock/interp.rkt"] + + +We can now see it in action: + +@ex[ +(define (run e) + (interp-env (parse-e e) '() '())) + +(run '(match 1 [1 #t] [_ #f])) +(run '(match 2 [1 #t] [_ #f])) +(run '(match 2 [x x] [_ #f])) +(run '(match (cons 1 2) [(cons x y) x] [_ #f])) +(run '(match (box 1) [(box x) x] [_ #f])) +(run '(match (box 1) [(box 2) #t] [_ #f])) +] + +And we can use pattern matching to define functions in style similar +to what we've been using all semester: + +@ex[ +(interp + (parse + '(define (length xs) + (match xs + ['() 0] + [(cons x xs) + (add1 (length xs))])) + '(length (cons 7 (cons 8 (cons 9 '())))))) +] + + +@section[#:tag-prefix "knock"]{A Compiler for Pattern Matching} + +The compilation of pattern matching expression is significantly more +complicated compared to interpretation. + +Most of the complication is due to the fact that the computation of +the binding structure in the interpreter must be split and mirrored +across compile-time and run-time in the compiler. Each +right-hand-side of a clause must be compiled in a static environment +that is dependent on the variables occurring in the left-hand-side +pattern. At run-time, these variables will be bound by pushing parts +of the matched value on the stack. + +To make matters worse, the stack will also be needed to save +intermediate results for later processing. For example, in matching a +@racket[cons]-pattern, we must push the @racket[cdr] of the pair on +the stack while pattern-matching the @racket[car]. +The function @racket[compile-pat] has the following signature: + +@#reader scribble/comment-reader +(racketblock +;; Pat CEnv Symbol -> (list Asm Asm CEnv) +(define (compile-pat p cm next) ...) ) -The complete compiler: +It consumes a single pattern, which it is compiling, a static +environment that describes the bindings that have occurred so far, and +label name which denotes where to jump to in order to try matching the +next pattern. -@codeblock-include["knock/compile.rkt"] +It produces three things: + +@itemlist[ + +@item{a sequence of instructions which determine whether the value in +@racket['rax] match the pattern @racket[p] and bind any variables that +may occur in @racket[p],} -We can verify that the compiler works for programs that use functions -like before: +@item{a sequence of instructions which handle what to do if @racket[p] +doesn't match such as restoring the stack to its state before the +match started and jumping to @racket[next], and} + +@item{a static environment that describes the bindings of the pattern +in case it matches.} + +] + + +Let's look at some examples. First, consider the wildcard pattern: @ex[ -(current-objs '("runtime.o")) -(asm-interp - (compile (parse '(begin (define (f x) - (if (zero? x) - 0 - (add1 (call (fun f) (sub1 x))))) - (call (fun f) 10))))) +(compile-pattern (Var '_) '() 'next) ] -But it also works when functions are put in lists: +When the pattern is a wildcard, it produces an empty sequence of +instructions for the ``determine if the pattern matches'' part. This +is because the pattern @emph{always} matches. There's nothing to do. +Similarly, it produces an empty sequence of instructions for the +``what to do if it doesn't match'' part because that's impossible; this +pattern always matches. Finally, it produces the environment it was +given because it doesn't bind anything. + +Now pattern variables: @ex[ -(current-objs '("runtime.o")) -(asm-interp - (compile (parse '(begin (define (f x) x) - (call (car (cons (fun f) '())) 7))))) +(compile-pattern (Var 'x) '() 'next) ] -And functions that produce functions: +A pattern variable always matches and binds the value to @racket[x], +so in the ``determine and bind'' part it simply pushes @racket['rax] +on to the stack to bind the value. + +It has empty sequences of instructions for the ``failing'' part +because it always matches just like a wildcard. Finally the static +environment part adds @racket[x] to the environment because this +pattern binds @racket[x] when it matches. + +Pattern literals: @ex[ -(current-objs '("runtime.o")) -(asm-interp - (compile (parse '(begin (define (f x) (fun h)) - (define (h y) y) - (call (call (fun f) 5) 9))))) +(compile-pattern (Lit 0) '() 'next) ] + +In the ``determine and bind'' part, we compare the value in +@racket['rax] to the literal. If they are not equal, the pattern +doesn't match so control jumps a generated label that is defined in +the ``fail'' part. The instructions in the ``fail'' part pop off all +of the current bindings in the pattern (in this example there are +none) and then jumps to @racket[next]. + +The environment stays the same because a literal doesn't bind anything. + +Supposing we had changed the example to: + +@ex[ +(compile-pattern (Lit 0) '(x y z) 'next) +] + +This is essentially saying ``compile the pattern @racket[(Lit 0)] +assuming it occurs in the context of a surrounding pattern that binds +@racket[x], @racket[y], and @racket[z] before getting to this point.'' +If it fails, it needs to pop all three bindings of the stack, hence +the ``fail'' code adds @racket[24] to @racket['rsp] before jumping to +@racket['next]. + +Now we get to the inductive patterns, which will be more interesting. +Let's start with the @racket[box]-pattern. + +@ex[ +(compile-pattern (Box (Var '_)) '() 'next) +] + +This ``determine and bind'' part moves the value to a temporary +register and masks the final three bits then compares the result to +the type tag for boxes. If they are not equal, the value in +@racket['rax] is not a box, so it jumps to the generated label for the +``fail'' part, which pops all bound pattern variables before jumping +to @racket['next]. If the value is a box, it is untagged and the +value inside the box is fetched to @racket['rax] for the subsequent +pattern to match against, in this case the wildcard. Nothing is bound +so no changes in the output environment. + +Let's change the wild card to a literal: + +@ex[ +(compile-pattern (Box (Lit 0)) '() 'next) +] + +This works just like before but now in the ``determine and bind'' +instructions, it compares the unboxed value to @racket[0]. + +Notice that the code here is modifying @racket['rax]. As it descends +into the box and tries to match the inner pattern, it moves the value +inside the box into @racket['rax]. This is important because it +maintains the invariant that the pattern is being matched against the +value in @racket['rax], but it also means that in compound patterns, +we may have to do more work to ensure the right value is in +@racket['rax]. + +Let's consider a @racket[cons]-pattern. A @racket[cons]-pattern is +similar to a @racket[box] pattern in that the first thing it needs to +do is determine if the value is a pointer tagged with the appropriate +type, in this case the @racket[cons] tag. Then it needs to move a +value into @racket['rax] and check if a subpattern matches. In +particular, it needs to move the @racket[car] value into @racket['rax] +and check if the first subpattern matches. + +Assuming it does match, what happens next? We need to move the +@racket[cdr] value in to @racket['rax] and check it matches the second +subpattern. But where can we get the @racket[cdr]? The moment we +overwrite @racket['rax] with the @racket[car], we've lost a handle on +the pair and thus access to the @racket[cdr]. + +The solution is to use the same mechanism we've always used to save +values: push it on the stack and fetch it later. With this in mind, +consider the following example for matching @racket[(cons 0 0)]: + +@ex[ +(compile-pattern (Cons (Lit 0) (Lit 0)) '() 'next) +] + +This starts off like the @racket[box] pattern checking the tag bits of +the value. But then, before moving the @racket[car] into +@racket['rax], it pushes the @racket[cdr] on the stack. It then +installs the @racket[car] and checks if it matches @racket[0]. If it +does, it then installs the @racket[cdr] off the stack and into +@racket['rax] to check if it too is @racket[0]. Note that if either +subpatterns fail to match, they both jump to code that pops a single +element off the stack, which is the stashed away @racket[cdr] value +that was pushed. + +Also note that the static environment produced is @racket['(#f)] to +account for the @racket[cdr] value that was pushed. + +The @racket[and]-pattern is a bit like @racket[cons] in that it has to +push a value on the stack in order to restore it after matching the +first subpattern: + +@ex[ +(compile-pattern (Conj (Lit 0) (Lit 0)) '() 'next) +] + +The @racket[compile-pattern] function is used by +@racket[compile-match-clause] which takes care of compiling a single +@racket[match] clause. It is given a pattern patterns and a +right-hand-side expression to execute should the pattern match, an +environment that describes the current bindings, a label to jump to +when the code is done, i.e. the correct result is in @racket['rax], +and finally a boolean indicating if this @racket[match] expression is +in tail position. + +@#reader scribble/comment-reader +(racketblock +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) ...) +) + +This function stitches together the parts returned by +@racket[compile-pattern] to implement a clause. This function assumes +the value to be matched is the top element of the stack, so the first +thing is does is fetch the value and install it in @racket['rax]. It +then executes the ``determine if the pattern matches and bind'' code +followed by the right hand side expression, then pops all the +pattern-bound values off the stack and jumps to @racket[done]. After +this it emits the code for what to do if the pattern doesn't fail +(thus jumping to @racket[done] will jump past this code). + +Consider a match clause like @racket[[_ #t]]: + +@ex[ +(compile-match-clause (Var '_) (Lit #t) '() 'done #f) +] + +Here we can see the value being matched is fetched from the top of the +stack. Since this pattern always matches, it next executes the +right-hand-side by moving the bit-representation of @racket[#t] into +@racket['rax]. It pops everything matching the pattern pushed on the +stack (in this case nothing), then jumps to @racket[done]. The final +label, which is never reached, is where control should jump to in +order to try matching the next clause. + +Let's look at a literal; consider a clause @racket[[0 #t]]: + +@ex[ +(compile-match-clause (Lit 0) (Lit #t) '() 'done #f) +] + +As always, it starts by fetching the top of the stack and putting the +value in @racket['rax]. It then does the ``determine if matches and +bind'' instructions followed by the right-hand-side. If the value in +@racket['rax] is not @racket[0] it will jump to code that handles the +failure to match by popping of anything pushed to the stack (in this +case nothing) and then jumping to the next clause (in this case, +that's the next label, but this isn't the case in general). If the +value in @racket['rax] is @racket[0], @racket[#t] is moved into +@racket['rax], the stack is popped, and control jumps to +@racket[done]. + +Let's see what a clause involving a pattern variable looks like, +e.g. @racket[[x x]]. Here we're going to reference the variable bound +in the pattern in the right-hand-side: + +@ex[ +(compile-match-clause (Var 'x) (Var 'x) '() 'done #f) +] + +The value being matched is fetched from the stack. It's immediately +pushed (again) to the stack because the variable pattern always +matches and binds. We then execute the right hand side, which is just +a reference to @racket[x], hence it fetches the top element of the +stack, then pops this off and jumps to @racket[done]. + +OK, now let's try something like @racket[[(box x) x]]: + +@ex[ +(compile-match-clause (Box (Var 'x)) (Var 'x) '() 'done #f) +] + +The value being matched is fetched from the stack. It's checked for +whether it is a box, jump away when it isn't. Otherwise it unboxes +the value and pushes it on the stack to bind to @racket[x], the +executes the RHS, which fetches @racket[x] into @racket['rax], pops, +and jumps to @racket[done]. + +Here is the complete code for @racket[compile-match-clause]: + +@#reader scribble/comment-reader +(racketblock +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i f cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + f + (Label next))]))) +) + +Generating code for a sequence of @racket[match] clauses is as simple +as generate the code for each clause in sequence: + +@#reader scribble/comment-reader +(racketblock +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) +) + +Finally, we have a function for compiling a complete @racket[match] +expression: + +@#reader scribble/comment-reader +(racketblock +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c t?) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched +) + +We can check that the compiler works for a complete example: + +@ex[ +(define (run . p) + (bits->value (asm-interp (compile (apply parse p))))) + +(run + '(define (length xs) + (match xs + ['() 0] + [(cons x xs) (add1 (length xs))])) + '(length (cons 7 (cons 8 (cons 9 '()))))) +] + + + +With these pieces in place, here's the complete compiler: + +@codeblock-include["knock/compile.rkt"] diff --git a/www/notes/loot.scrbl b/www/notes/loot.scrbl index d3bf4432..f2ecf010 100644 --- a/www/notes/loot.scrbl +++ b/www/notes/loot.scrbl @@ -13,46 +13,171 @@ @(ev '(require rackunit a86)) @(ev `(current-directory ,(path->string (build-path notes "loot")))) @(void (ev '(with-output-to-string (thunk (system "make runtime.o"))))) +@(void (ev '(current-objs '("runtime.o")))) @(for-each (λ (f) (ev `(require (file ,f)))) '("interp.rkt" "compile.rkt" "ast.rkt" "parse.rkt" "types.rkt")) -@title[#:tag "Loot"]{Loot: lambda the ultimate} +@(define this-lang "Loot") + +@title[#:tag this-lang]{@|this-lang|: lambda the ultimate} + +@src-code[this-lang] @table-of-contents[] @section[#:tag-prefix "loot"]{Functions in their most general form} -We've been building up the pieces of functions, first with -second-class functions, then with tail-calls, then with first-class -function pointers. +We've added function calls and function definitions, but what we don't +have and really should is function @bold{values}. + +Programming with functions as values is a powerful idiom that is at +the heart of both functional programming and object-oriented +programming, which both center around the idea that computation itself +can be packaged up in a suspended form as a value and later run. Now we're ready to deal with functions in their most general form: @racket[λ]-expressions. -We add @racket[λ]-expressions to the syntax and remove the -@racket[(fun ,Variable)] and @racket[(call ,Expr ,@(Listof Expr))] -forms. We no longer need a separate syntactic form for referencing -the name of a function, we can just use variable binding. Likewise, -we use the same syntax as Racket for function application: +Let's call it @bold{Loot}. -@verbatim|{ -;; type Expr = -;; | .... -;; | Lam Name (Listof Variable) Expr -;; | App Expr (Listof Expr) -}| +We add @racket[λ]-expressions to the syntax of expressions: + +@racketblock[ +(λ (_x0 ...) _e0) +] + +Here @racket[_x0 ...] are the formal parameters of the function and +@racket[_e0] is the body. + +The syntax is evocative of function definitions: + +@racketblock[ +(define (_f _x0 ...) _e0) +] + +However, you'll notice: + +@itemlist[ + +@item{There is no function name in the @racket[λ]-expression; it is an +@bold{anonymous} function.} + +@item{The new form is an expression---it can appear any where as a +subexpression in a program, whereas definitions were restricted to be +at the top-level.} + +] + +There also is a syntactic relaxation on the grammar of application +expressions (a.k.a. function calls). Previously, a function call +consisted of a function name and some number of arguments: + +@racketblock[ +(_f _e0 ...) +] + +But since functions will now be considered values, we can generalize +what's allowed in the function position of the syntax for calls to be +an arbitrary expression. That expression is expected to produce a +function value (and this expectation gives rise to a new kind of +run-time error when violated: applying a non-function to arguments), +which can called with the value of the arguments. + +Hence the syntax is extended to: + +@racketblock[ +(_e _e0 ...) +] + +In particular, the function expression can be a @racket[λ]-expression, +e.g.: + +@racketblock[ +((λ (x) (+ x x)) 10) +] + +But also it may be expression which produces a function, but isn't +itself a @racket[λ]-expression: + + +@racketblock[ +(define (adder n) + (λ (x) + (+ x n))) +((adder 5) 10) +] + +Here, @racket[(adder 5)] is the function position of @racket[((adder +5) 10)]. That subexpression is itself a function call expression, +calling @racket[adder] with the argument @racket[5]. The result of +that subexpression is a function that, when applied, adds @racket[5] +to its argument. + +In terms of the AST, here's how we model the extended syntax: + +@filebox-include-fake[codeblock "loot/ast.rkt"]{ +#lang racket +;; type Expr = ... +;; | (App Expr (Listof Expr)) +;; | (Lam (Listof Id) Expr) +} + +So for example, the expression @racket[((adder 5) 10)] would be parsed +as: + +@racketblock[ +(App (App (Var 'adder) (Int 5)) (Int 10)) +] + +and @racket[(λ (x) (+ x n))] would be parsed as: + +@racketblock[ +(Lam (list 'x) (Prim2 '+ (Var 'x) (Var 'n))) +] + +We will actually use a slight tweak of this AST when it comes to +representing the syntax of @racket[λ]-expressions. Although functions +are anynomous, it will nonetheless be useful to syntactically +distinguish one @racket[λ]-expression @emph{occurrence} from an +otherwise identical occurrence. + +Consider for example: + +@racketblock[ +(let ((g1 (let ((x 100)) (λ (y) (+ x y)))) + (g2 (let ((x 9)) (λ (y) (+ x y))))) + ...) +] -Two things to note: for now you can ignore the @tt{Name} parameter, -and @tt{Formals} can be defined as a list of variables: +This program has two occurrences of the expression @racket[(λ (y) (+ x +y))]. Even though these expressions are identical and both evaluate +to functions, they @emph{do not} evaluate to the same function! One +is the ``add 100'' function and the other is the ``add 9'' function. + +It will be useful to distinguish these two occurrences so we can talk +about @emph{this} or @emph{that} @racket[λ]-expression. + +The way we accomplish this is we will assume the AST representation of +each distinct occurrence of a @racket[λ]-expression has it's own +unique name (represented with a symbol). We choose to have the parser +take of labelling @racket[λ]-expressions by inserting a +@racket[gensym]'d symbol. So, we touch-up the @racket[Lam] AST type +definition as follows: + +@#reader scribble/comment-reader +(racketblock +;; type Expr = ... +;; | (Lam Id (Listof Id) Expr) +) -@verbatim|{ -;; type Formals = (Listof Variable) -}| +and these two occurrence would be distinguished by having distinct +symbols for the label of the expression: -But it's possible to extend the @racket[λ]-notation to include the -ability to define variable-arity functions, as you will see in -@secref["Assignment 6"]. +@ex[ +(Lam (gensym) (list 'x) (Prim2 '+ (Var 'x) (Var 'y))) +(Lam (gensym) (list 'x) (Prim2 '+ (Var 'x) (Var 'y))) +] @section[#:tag-prefix "loot"]{Long Live Lambda!} @@ -61,11 +186,12 @@ forms are @racket[λ]s and applications: @#reader scribble/comment-reader (racketblock -;; Expr REnv -> Answer -(define (interp-env e r) - ;;... +;; Expr REnv Defns -> Answer +(define (interp-env e r ds) + (match e + ;; ... [(Lam _ xs e) '...] - [(App e es) '...]) + [(App e es) '...])) ) These two parts of the interpreter must fit together: @racket[λ] is @@ -109,15 +235,20 @@ in what we know so far: @#reader scribble/comment-reader (racketblock -;; Expr REnv -> Answer -(define (interp-env e r) - ;;... +;; Expr REnv Defns -> Answer +(define (interp-env e r ds) + (match e + ;; ... [(Lam _ xs e) (λ ??? '...)] [(App e es) - (let ((f (interp-eval e r)) - (vs (interp-eval* es r))) - (apply f vs))]) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (apply f vs)])])])) ) @@ -129,18 +260,22 @@ number of arguments: @#reader scribble/comment-reader (racketblock -;; Expr REnv -> Answer -(define (interp-env e r) - ;;... +;; Expr REnv Defns -> Answer +(define (interp-env e r ds) + (match e + ;; ... [(Lam _ xs e) (λ vs '...)] [(App e es) - (let ((f (interp-eval e r)) - (vs (interp-eval* es r))) - (apply f vs))]) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (apply f vs)])])])) ) - Second, what should happen when a function is applied? It should produce the answer produced by the body of the @racket[λ] expression in an environment that associates @racket[xs] with @racket[vs]. @@ -148,15 +283,20 @@ Translating that to code, we get: @#reader scribble/comment-reader (racketblock -;; Expr REnv -> Answer -(define (interp-env e r) - ;;... +;; Expr REnv Defns -> Answer +(define (interp-env e r ds) + (match e + ;; ... [(Lam _ xs e) - (λ vs (interp-env e (zip xs vs)))] + (λ vs (interp-env e (zip xs vs) ds))] [(App e es) - (let ((f (interp-eval e r)) - (vs (interp-eval* es r))) - (apply f vs))]) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (apply f vs)])])])) ) And now we have simultaneously arrived at our representation of function values: @@ -193,67 +333,116 @@ in the (Racket) function: @#reader scribble/comment-reader (racketblock -;; Expr REnv -> Answer -(define (interp-env e r) - ;;... +;; Expr REnv Defns -> Answer +(define (interp-env e r ds) + (match e + ;; ... [(Lam _ xs e) - (λ (vs) (interp-env e (append (zip xs vs) r)))] + (λ vs (interp-env e (append (zip xs vs) r)) ds)] [(App e es) - (let ((f (interp-eval e r)) - (vs (interp-eval* es r))) - (apply f vs))]) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (apply f vs)])])])) ) The last remaining issue is we should do some type and arity-checking: @#reader scribble/comment-reader (racketblock -;; Expr REnv -> Answer -(define (interp-env e r) - ;;... +;; Expr REnv Defns -> Answer +(define (interp-env e r ds) + (match e + ;; ... [(Lam _ xs e) - (λ (vs) - (if (= (length xs) (length vs)) - (interp-env e (append (zip xs vs) r)) - 'err))] + (λ vs + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err))] [(App e es) - (let ((f (interp-eval e r)) - (vs (interp-eval* es r))) - (if (procedure? f) - (apply f vs) - 'err))]) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (if (procedure? f) + (apply f vs) + 'err)])])])) +) + +We have a final issue to deal with. What should we do about +references to functions defined at the top-level of the program? In +other words, how do we make function applicaton when the function was +defined with @racket[define]? + +One possible answer to re-use our new power of +@racket[lambda]-expression by considering @racket[define]-bound names +as just regular old variables, but changing the way that variables are +interpreted so that when evaluating a variable that is not bound in +the local environment, we consult the program definitions and +construct the function value at that moment. + +There will turn out to be a better, more uniform approach, but this we +will work for now and is simple. + +So for now we interpret variables as follows: + +@#reader scribble/comment-reader +(racketblock +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) ) +You'll notice that the function is constructed by interpreting a +@racket[lambda]-expression corresponding to the function definition +and that this happens in an empty environment; that's because function +definitions can only occur at the top-level and therefore the only +variables they can reference are other @racket[define]-bound +functions, given in @racket[ds]. + The complete interpreter is: @codeblock-include["loot/interp.rkt"] - We now have the full power of @racket[λ] expressions in our language. We can write recursive functions, using only anonymous functions, via the Y-combinator: + + @ex[ -(interp (parse - '(λ (t) - ((λ (f) (t (λ (z) ((f f) z)))) - (λ (f) (t (λ (z) ((f f) z)))))))) +(define (run . p) (interp (parse p))) + +(run + '(λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z))))))) ] For example, computing the triangular function applied to 10: @ex[ -(interp (parse - '(((λ (t) - ((λ (f) (t (λ (z) ((f f) z)))) - (λ (f) (t (λ (z) ((f f) z)))))) - (λ (tri) - (λ (n) - (if (zero? n) - 1 - (+ n (tri (sub1 n))))))) - 10))) +(run + '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36)) ] One of the niceties of using Racket functions to represent Loot @@ -262,30 +451,34 @@ Loot functions: @ex[ (define Y - (interp (parse - '(λ (t) - ((λ (f) (t (λ (z) ((f f) z)))) - (λ (f) (t (λ (z) ((f f) z))))))))) + (run + '(λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))))) (define tri - (interp (parse '(λ (tri) - (λ (n) - (if (zero? n) - 1 - (+ n (tri (sub1 n))))))))) + (run + '(λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n)))))))) ] And then use them from within Racket: @ex[ -((Y tri) 10) +((Y tri) 36) ] + We can also ``import'' Racket functions in to Loot: + @ex[ -(interp-env (parse '(expt 2 10)) - `((expt ,expt))) +(interp-env (parse-e '(expt 2 10)) + (list (list 'expt expt)) + '()) ] @@ -338,29 +531,36 @@ To: (racketblock ;; type Value = ;; | .... -;; | Closure Formals Expr Env +;; | (Closure [Listof Id] Expr Env) ) When a @racket[λ] is evaluated, a closure is created. When a function is applied, we deconstruct the closure and execute the code that used to be in the (Racket) function: + @#reader scribble/comment-reader (racketblock -;; Expr REnv -> Answer -(define (interp-env e r) +;; Expr REnv Defns -> Answer +(define (interp-env e r ds) + (match e ;;... [(Lam _ xs e) (Closure xs e r)] [(App e es) - (let ((f (interp-eval e r)) - (vs (interp-eval* es r))) - (match f - [(Closure xs e r) - (if (= (length vs) (length xs)) - (interp-env e (append (zip xs vs) r)) - 'err)] - [_ 'err]))]) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (match f + [(Closure xs e r) + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err)] + [_ 'err])])])])) ) We can give it a try: @@ -369,50 +569,64 @@ We can give it a try: @(ev `(require (file ,(path->string (build-path notes "loot" "interp-defun.rkt"))))) @ex[ -(interp (parse '(λ (x) x))) -(interp (parse '((λ (x) (λ (y) x)) 8))) +(define (run . p) (interp (parse p))) + +(run '(λ (x) x)) +(run '((λ (x) (λ (y) x)) 8)) ] Notice in the second example how the closure contains the body of the -function and the environment mapping the free variable @racket['x] to +function and the environment mapping the free variable: @racket['x] to 8. We can also confirm our larger example works: @ex[ -(interp (parse +(run '(((λ (t) ((λ (f) (t (λ (z) ((f f) z)))) (λ (f) (t (λ (z) ((f f) z)))))) (λ (tri) (λ (n) (if (zero? n) - 1 + 0 (+ n (tri (sub1 n))))))) - 10))) + 36)) ] While can't apply the interpretation of functions in Racket like we did previously, we can @racket[apply-function] the interpretation of functions: -@ex[ +@#reader scribble/comment-reader +(ex (define Y - (interp (parse + (run '(λ (t) ((λ (f) (t (λ (z) ((f f) z)))) - (λ (f) (t (λ (z) ((f f) z))))))))) + (λ (f) (t (λ (z) ((f f) z)))))))) (define tri - (interp (parse - '(λ (tri) - (λ (n) - (if (zero? n) - 1 - (+ n (tri (sub1 n))))))))) - -(apply-function (apply-function Y tri) 10) -] + (run + '(λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n)))))))) + +;; Value Value ... -> Answer +(define (apply-function f . vs) + (match f + [(Closure xs e r) + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) '()) + 'err)] + [_ 'err])) + +(apply-function (apply-function Y tri) 36) +) + The process we used to eliminate function values from the interpreter is an instance of a general-purpose whole-program transformation @@ -459,7 +673,8 @@ Let's give it a try: (accepts `(star (char #\a)) "aaaab") (accepts `(star (plus (char #\a) (char #\b))) "aaaab") ] - + + But what if needed to program this regular expression matching without the use of function values? We can arrive at such code systematically by applying defunctionalization. @@ -478,6 +693,7 @@ And we get the same results: ] + @section[#:tag-prefix "loot"]{Compiling Loot} Compiling a @racket[λ]-expression will involve generating two @@ -512,568 +728,511 @@ bound outside of the @racket[λ]-expression.} To deal with the first issue, we first make a pass over the program inserting computed names for each @racket[λ]-expression. -This is the reason for the @tt{Name} field in the @racket[Lam] constructor. +This is the reason for the generated name field in the @racket[Lam] constructor. @#reader scribble/comment-reader (racketblock ;; type Expr = ;; .... -;; | Lam Name [Variable] Expr -) - -Now @racket[λ]-expressions have the form like @racket[(Lam 'fred '(x) (+ x x))]. -The symbol @racket['fred] here is used to give a name to the -@racket[λ]-expression. - -The first step of the compiler will be to label every -@racket[λ]-expression using the following function: - -@#reader scribble/comment-reader -(racketblock -;; Expr -> Expr -(define (label-λ e) - (match e - [(Prog ds e) (Prog (map label-λ ds) (label-λ e))] - [(Defn f xs e) (Defn f xs (label-λ e))] - [(Prim1 p e) (Prim1 p (label-λ e))] - [(Prim2 p e1 e2) (Prim2 p (label-λ e1) (label-λ e2))] - [(If e1 e2 e3) (If (label-λ e1) (label-λ e2) (label-λ e3))] - [(Begin e1 e2) (Begin (label-λ e1) (label-λ e2))] - [(Let x e1 e2) (Let x (label-λ e1) (label-λ e2))] - [(LetRec bs e1) (LetRec (map (lambda (xs) (map label-λ xs)) bs) (label-λ e1))] - [(Lam '() xs e) (Lam (gensym 'lam) xs (label-λ e))] - [(Lam n xs e) (Lam (gensym n) xs (label-λ e))] - [(App f es) (App (label-λ f) (map label-λ es))] - [_ e])) +;; | (Lam Id [Listof Id] Expr) ) -Here it is at work: +These labels are inserted by the parser. Here it is at work: @ex[ -(label-λ (parse +(parse-e '(λ (t) ((λ (f) (t (λ (z) ((f f) z)))) - (λ (f) (t (λ (z) ((f f) z)))))))) + (λ (f) (t (λ (z) ((f f) z))))))) ] Now turning to the second issue--@racket[λ]-expression may reference variables bound outside of the expression---let's consider how to compile something like @racket[(λ (x) z)]? -There are many possible solutions, but perhaps the simplest is to -compile this as a function that takes @emph{two} arguments, -i.e. compile it as if it were: @racket[(λ (x z) z)]. The idea is that -a @racket[λ]-expression defines a function of both explicit arguments -(the parameters) and implicit arguments (the free variables of the -@racket[λ]-expression). +There are many possible solutions, but here is one. Every function +can be passed an implicit first argument which will point to a section +of memory that contains all of the values for the free variables. + +In other words, the code for functions will accept an additional +argument that plays the role of the environment for this particular +instance of the function. + +The first thing the function does once called is copies these values +from memory to the stack and then executes the body of the function in +an environment that binds both the free variables and the formal +parameters. This will have to work in concert with closure creation and function calls. When the @racket[λ]-expression is evaluated, a closure will be -created storing the value of @racket[z]. When the function is -applied, the caller will need to retrieve that value and place it as -the second argument on stack before calling the function's code. +created storing the value of @racket[z] in memory. When the function +is applied, the caller will need to retrieve that value and place it +as the first argument on stack before calling the function's code. To implement this, we will need to compute the free variables, which we do with the following function: +@codeblock-include["loot/fv.rkt"] + +We can now write the function that compiles a labelled +@racket[λ]-expression into a function in assembly: + @#reader scribble/comment-reader (racketblock -;; Expr -> (Listof Variable) -(define (fvs e) - (define (fvs e) - (match e - [(Prim1 p e) (fvs e)] - [(Prim2 p e1 e2) (append (fvs e1) (fvs e2))] - [(If e1 e2 e3) (append (fvs e1) (fvs e2) (fvs e3))] - [(Begin e1 e2) (append (fvs e1) (fvs e2))] - [(Let x e1 e2) (append (fvs e1) (remq* (list x) (fvs e2)))] - [(LetRec bs e1) (let ((bound (map car bs)) - (def-fvs (append-map fvs-bind bs))) - (remq* bound (append def-fvs (fvs e1))))] - [(Lam n xs e1) (remq* xs (fvs e1))] - [(Var x) (list x)] - [(App f es) (append (fvs f) (append-map fvs es))] - [_ '()])) - (remove-duplicates (fvs e))) +;; Lam -> Asm +(define (compile-lambda-define l) + (let ((fvs (fv l))) + (match l + [(Lam f xs e) + (let ((env (append (reverse fvs) (reverse xs) (list #f)))) + (seq (Label (symbol->label f)) + (Mov rax (Offset rsp (* 8 (length xs)))) + (Xor rax type-proc) + (copy-env-to-stack fvs 8) + (compile-e e env #t) + (Add rsp (* 8 (length env))) ; pop env + (Ret)))]))) ) -We can now write the function that compiles a labelled -@racket[λ]-expression into a function in assembly: +Notice how similar it is to our previous function definition compiler: + +@#reader scribble/comment-reader +(racketblock +;; Defn -> Asm +(define (compile-define d) + (match d + [(Defn f xs e) + (seq (Label (symbol->label f)) + (compile-e e (reverse xs) #t) + (Add rsp (* 8 (length xs))) ; pop args + (Ret))])) +) + +The key difference here is that we are expecting the caller to leave +the closure at the top of the stack. When called, the function +fetches the closure and copies its environment to the stack, hence the +body of the function has a static environment which includes the free +variables followed by the parameters followed by the closure. + +The copying of the values from the closure environment to the stack is +achieved by this helper function: @#reader scribble/comment-reader (racketblock -;; Lambda -> Asm -(define (compile-λ-definition l) - (match l - [(Lam '() xs e) (error "Lambdas must be labelled before code-gen")] - [(Lam f xs e) - (let* ((free (remq* xs (fvs e))) - ; leave space for RIP - (env (parity (cons #f (cons #f (reverse (append xs free))))))) - (seq - (Label (symbol->label f)) - ; we need the #args on the frame, not the length of the entire - ; env (which may have padding) - ; Ignore tail calls for now - (compile-e e env) - (Ret)))])) +;; [Listof Id] Int -> Asm +;; Copy the closure environment at given offset to stack +(define (copy-env-to-stack fvs off) + (match fvs + ['() (seq)] + [(cons _ fvs) + (seq (Mov r9 (Offset rax off)) + (Push r9) + (copy-env-to-stack fvs (+ 8 off)))])) ) +When the body of the function completes, all of these elements are +popped off the stack and the function returns. + Here's what's emitted for a @racket[λ]-expression with a free variable: @ex[ -(compile-λ-definition (Lam 'f '(x) (Var 'z))) +(compile-lambda-define (Lam 'f '(x) (Var 'z))) ] -Notice that it's identical to a @racket[λ]-expression with an added -parameter and no free variables: -@ex[ -(compile-λ-definition (Lam 'f '(x z) (Var 'z))) -] The compiler will need to generate one such function for each @racket[λ]-expression in the program. So we use a helper function for -extracting all the @racket[λ]-expressions and another for compiling -each of them: +extracting all the @racket[λ]-expressions: + +@codeblock-include["loot/lambdas.rkt"] + +And another for compiling each of them: @#reader scribble/comment-reader (racketblock -;; LExpr -> (Listof LExpr) -;; Extract all the lambda expressions -(define (λs e) - (match e - [(Prog ds e) (append (append-map λs ds) (λs e))] - [(Defn f xs e) (λs e)] - [(Prim1 p e) (λs e)] - [(Prim2 p e1 e2) (append (λs e1) (λs e2))] - [(If e1 e2 e3) (append (λs e1) (λs e2) (λs e3))] - [(Begin e1 e2) (append (λs e1) (λs e2))] - [(Let x e1 e2) (append (λs e1) (λs e2))] - [(LetRec bs e1) (append (append-map lambda-defs bs) (λs e1))] - [(Lam n xs e1) (cons e (λs e1))] - [(App f es) (append (λs f) (append-map λs es))] - [_ '()])) - -;; [Lam] -> Asm -(define (compile-λ-definitions ds) - (seq - (match ds - ['() (seq)] - [(cons d ds) - (seq (compile-λ-definition d) - (compile-λ-definitions ds))]))) +;; [Listof Lam] -> Asm +(define (compile-lambda-defines ls) + (match ls + ['() (seq)] + [(cons l ls) + (seq (compile-lambda-define l) + (compile-lambda-defines ls))])) ) -The top-level @racket[compile] function now labels inserts labels and -compiles all the @racket[λ]-expressions to functions: +The top-level @racket[compile] function now extracts and compiles all +the @racket[λ]-expressions to functions: @#reader scribble/comment-reader (racketblock ;; Prog -> Asm (define (compile p) - (match (label-λ (desugar p)) - [(Prog '() e) - (prog (Extern 'peek_byte) - (Extern 'read_byte) - (Extern 'write_byte) - (Extern 'raise_error) + (match p + [(Prog ds e) + (prog (externs) + (Global 'entry) (Label 'entry) - (Mov rbx rdi) - (compile-e e '(#f)) - (Mov rdx rbx) + (Mov rbx rdi) ; recv heap pointer + (compile-e e '() #t) (Ret) - (compile-λ-definitions (λs e)))])) + (compile-lambda-defines (lambdas e)) + (Label 'raise_error_align) + pad-stack + (Call 'raise_error))])) ) What remains is the issue of compiling @racket[λ]-expressions to code -to create a closure. +to create a closure and using closures to provide the appropriate +environment when called. + + +@section[#:tag "closure" #:tag-prefix "loot"]{Save the Environment: Create a Closure!} + + +The basic challenge we are faced with is designing a representation of +functions as values. Like other kinds of values, functions will be +disjoint kind of value, meaning bits representing a function will need +to be tagged distinctly from other kinds of values. Functions will +need to represent all of the run-time information in the +@racket[Closure] structure used in the interpreter. Looking back, a +@racket[Closure] contains the formal parameters of the +@racket[lambda]-expression, the body, and the environment in place at +the time the @racket[lambda]-expression was evaluated. -@section[#:tag-prefix "loot"]{Save the Environment: Create a Closure!} +The parameters and body expression are relevant +@racket[compile-lambda-define]. What's relevant for the closure is +the label of @racket[lambda]-expression and the environment. For the +compiler, the environment can be represented by the sequence of values +it contains at run-time. -We've already seen how to create a reference to a function pointer, -enabling functions to be first-class values that can be passed around, -returned from other functions, stored in data structures, etc. The -basic idea was to allocate a location in memory and save the address -of a function label there. +So, the way we will represent a closure is by a tagged pointer to a +sequence in memory that contains the label of the closure's code and a +sequence of values that were bound to the free variables when the +@racket[lambda]-expression was evaluated. -A closure is just this, plus the environment that needs to be restored -with the function is called. So representing a closure is fairly -straightforward: we will allocate a location in memory and save the -function label, plus each value that is needed from the environment. -In order to keep track of how many values there are, we'll also store -the length of the environment. +When a @racket[lambda]-expression is evaluated, we allocate a closure +on the heap, write the @racket[lambda]'s label, followed by the values +of the free variables. The result of evaluating the expression is the +tagged pointer to the memory just written. Here's the function for emitting closure construction code: @#reader scribble/comment-reader (racketblock -;; (Listof Variable) Label (Listof Variable) CEnv -> Asm -(define (compile-λ xs f ys c) - (seq - ; Save label address - (Lea rax (symbol->label f)) - (Mov (Offset rbx 0) rax) - - ; Save the environment - (%% "Begin saving the env") - (Mov r8 (length ys)) - - (Mov (Offset rbx 8) r8) - (Mov r9 rbx) - (Add r9 16) - (copy-env-to-heap ys c 0) - (%% "end saving the env") - - ; Return a pointer to the closure - (Mov rax rbx) - (Or rax type-proc) - (Add rbx (* 8 (+ 2 (length ys)))))) +;; Id [Listof Id] Expr CEnv -> Asm +(define (compile-lam f xs e c) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx 0) rax) + (free-vars-to-heap fvs c 8) + (Mov rax rbx) ; return value + (Or rax type-proc) + (Add rbx (* 8 (add1 (length fvs))))))) ) -Compared the previous code we say for function pointer references, the -only difference is the code to store the length and value of the free -variables of the @racket[λ]-expression. Also: the amount of memory -allocated is no longer just a single cell, but depends on the number -of free variables being closed over. - -The @racket[copy-env-to-heap] function generates instructions for -dereferencing variables and copying them to the appropriate memory -location where the closure is stored: +It relies on a helper function for emitting instructions to copy the +value of free variables, i.e. variables bound in the current +environment but outside of the @racket[lambda]-expression. It fetches +these values just like a variable reference would: it computes the +variables lexical address and fetches it from the stack, then writes +it to the heap. @#reader scribble/comment-reader (racketblock -;; (Listof Variable) CEnv Natural -> Asm -;; Pointer to beginning of environment in r9 -(define (copy-env-to-heap fvs c i) +;; [Listof Id] CEnv Int -> Asm +;; Copy the values of given free variables into the heap at given offset +(define (free-vars-to-heap fvs c off) (match fvs ['() (seq)] [(cons x fvs) - (seq - ; Move the stack item in question to a temp register - (Mov r8 (Offset rsp (lookup x c))) - - ; Put the iterm in the heap - (Mov (Offset r9 i) r8) - - ; Do it again for the rest of the items, incrementing how - ; far away from r9 the next item should be - (copy-env-to-heap fvs c (+ 8 i)))])) + (seq (Mov r8 (Offset rsp (lookup x c))) + (Mov (Offset rbx off) r8) + (free-vars-to-heap fvs c (+ off 8)))])) ) That's all there is to closure construction! @section[#:tag-prefix "loot"]{Calling Functions} -The last final peice of the puzzle is making function calls and -closures work together. Remember that a @racket[λ]-expression is -compiled into a function that expects two sets of arguments on the -stack: the first are the explicit arguments that given at the call -site; the other arguments are the implicit arguments corresponding to -free variables the @racket[λ]-expression being called. The value of -these arguments are given by the environment saved in the closure of -the @racket[λ]-expressions. +The last peice of the puzzle is making function calls and closures +work together. Remember that a @racket[λ]-expression is compiled into +a function that expects a closure @emph{plus} its arguments on the +stack. So the code generated for a function call needs to manage running each -subexpression, the first of which should evaluate to a function (a -pointer to a closure). The arguments are saved on the stack, and then -the values stored in the environment part of the closure need to be -copied from the heap to the stack: +subexpression, the first of which should evaluate to a function (i.e. +a pointer to a label and environment in memory) and then fetching the +function's label and jumping to it. + +Here is the code for the non-tail-calls: @#reader scribble/comment-reader (racketblock -;; Expr (Listof Expr) CEnv -> Asm -(define (compile-call f es c) - (let* ((cnt (length es)) - (aligned (even? (+ cnt (length c)))) - (i (if aligned 1 2)) - (c+ (if aligned - c - (cons #f c))) - (c++ (cons #f c+))) - (seq - - (%% "Begin compile-call") - ; Adjust the stack for alignment, if necessary - (if aligned - (seq) - (Sub rsp 8)) - - ; Generate the code for the thing being called - ; and push the result on the stack - (compile-e f c+) - (%% "Push function on stack") - (Push rax) - - ; Generate the code for the arguments - ; all results will be put on the stack (compile-es does this) - (compile-es es c++) - - ; Get the function being called off the stack - ; Ensure it's a proc and remove the tag - ; Remember it points to the _closure_ - (%% "Get function off stack") - (Mov rax (Offset rsp (* 8 cnt))) - (assert-proc rax) - (Xor rax type-proc) - - (%% "Get closure env") - (copy-closure-env-to-stack) - (%% "finish closure env") - - ; get the size of the env and save it on the stack - (Mov rcx (Offset rax 8)) - (Push rcx) - - ; Actually call the function - (Call (Offset rax 0)) - - ; Get the size of the env off the stack - (Pop rcx) - (Sal rcx 3) - - ; pop args - ; First the number of arguments + alignment + the closure - ; then captured values - (Add rsp (* 8 (+ i cnt))) - (Add rsp rcx)))) +;; Expr [Listof Expr] CEnv -> Asm +;; The return address is placed above the arguments, so callee pops +;; arguments and return address is next frame +(define (compile-app-nontail e es c) + (let ((r (gensym 'ret)) + (i (* 8 (length es)))) + (seq (Lea rax r) + (Push rax) + (compile-es (cons e es) (cons #f c)) + (Mov rax (Offset rsp i)) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) ; fetch the code label + (Jmp rax) + (Label r)))) ) -The main aspect involving lambdas is @racket[copy-closure-env-to-stack]. -Unlike the closure construction code, in which we statically know what -and how many variables to save in a closure, we must dynamically -loop over the environment to move values to the stack: +Compared to the previous version of this code, it additionally +executes the code for @racket[_e]. After all the subexpression are +evaluated, it fetches the value of @racket[_e] off the stack, checks +that it is a function, then fetches the label for the function's code +and jumps to it. Notice how the stack naturally has the function as +the top-most element. This is used by the code for the function to +fetch the values stored in the closure. + +The code for tail calls is similar, but adapted to avoid pushing a +return frame and to pop the local environment before jumping: @#reader scribble/comment-reader (racketblock -;; -> Asm -;; Copy closure's (in rax) env to stack in rcx -(define (copy-closure-env-to-stack) - (let ((copy-loop (symbol->label (gensym 'copy_closure))) - (copy-done (symbol->label (gensym 'copy_done)))) - (seq - - (Mov r8 (Offset rax 8)) ; length - (Mov r9 rax) - (Add r9 16) ; start of env - (Label copy-loop) - (Cmp r8 0) - (Je copy-done) - (Mov rcx (Offset r9 0)) - (Push rcx) ; Move val onto stack - (Sub r8 1) - (Add r9 8) - (Jmp copy-loop) - (Label copy-done)))) +;; Expr [Listof Expr] CEnv -> Asm +(define (compile-app-tail e es c) + (seq (compile-es (cons e es) c) + (move-args (add1 (length es)) (length c)) + (Add rsp (* 8 (length c))) + (Mov rax (Offset rsp (* 8 (length es)))) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) + (Jmp rax))) ) -Let's try it out: -@ex[ -(asm-interp (compile (parse '((let ((x 8)) (λ (y) x)) 2)))) -(asm-interp (compile (parse '(((λ (x) (λ (y) x)) 8) 2)))) -(asm-interp (compile (parse '((λ (f) (f (f 0))) (λ (x) (add1 x)))))) -] +We've now implemented all there is to first-class functions. It's +possible to write recursive functions using the Y-combinator, although +that's no so convenient. Next we can tackle the issue of recursive or +even sets of mutually recursive functions by dealing with top-level +function definitions. + @section[#:tag-prefix "loot"]{Recursive Functions} Writing recursive programs with the Y-combinator is a bit -inconvenient. Let us now add a recursive function binding construct: -@racket[letrec]. - -A @racket[letrec]-expression has a shape like a -@racket[let]-expression, but variables are bound in both the body -@emph{and} the right-hand-side of the @racket[letrec]. To keep -matters simple, we will assume the right-hand-sides of a -@racket[letrec] are all @racket[λ]-expressions. (Racket eases this -restriction, but it significantly complicates compilation.) - -So for example, writing the @racket[even?] and @racket[odd?] functions -using @racket[letrec] looks like: - -@ex[ -(letrec ((even? - (λ (x) - (if (zero? x) - #t - (odd? (sub1 x))))) - (odd? - (λ (x) - (if (zero? x) - #f - (even? (sub1 x)))))) - (even? 10)) -] +inconvenient. +We previously had the ability to write recursive or even mutually +recursive function definitions by defining them at the top-level with +@racket[define], although that was before functions were considered +first-class values. -To compile a @racket[letrec]-expression, we can compile the -@racket[λ]-expression as functions just as before. Notice that the -recursive (or mutually recursive) occurrence will be considered a free -variable within the @racket[λ]-expression, so just like any other free -variable, the closure creation should capture the value of this -binding. +What changes now? -We need to extend the syntax functions for computing free variables, -extracting @racket[λ]-expressions, and so on. All of this is -straightforward. +Well, one view is that @racket[(define (f x) (add1 x))] is really just +defining a function and giving it a name. In other words, it's really +just saying @racket[(define f (lambda (x) (add1 x)))]. We already +know how to compile @racket[lambda]-expressions and we all ready know +how to bind variable names to values, so it would seem this is not so +difficult to accomodate. -The key complication to compiling a @racket[letrec]-expression is that -the name of a function should be bound---to itself---within the body -of the function. The key insight into achieving this is to first -allocate closures, but to delay the actual population of the closures' -environments. +A program consisting of a series of function definitions followed by +an expression can first compile all the function definitions, then +create a series of closures, push them on the stack, then execute the +main expression in an environment that includes the names of the +defined functions. -The way that compiling a @racket[letrec]-expression works is roughly: +That will work just fine for an example like @racket[(define (f x) (add1 +x)) (f 5)]. -@itemlist[ +Where it breaks down is in a program like this: -@item{allocate a closure for each of the right-hand-side -@racket[λ]-expressions, but do not copy the (relevant parts of the) -environment in to closures (yet),} +@#reader scribble/comment-reader +(racketblock +(define (f n) + (if (zero? n) + 1 + (+ n (f (sub1 n))))) -@item{push each of these closures on to the stack (effectively binding -the left-hand-sides to the unitialized closures),} +(f 10) +) -@item{now that the names are bound, we can populate the closures, and -references to any of the @racket[letrec]-bound variables will be -captured correctly,} +Why? Because the (implicit) @racket[lambda]-expression here has a +free variable @racket[f]. In the closure representation, what should +the value of this variable be? It should be the function @racket[f] +itself. In other words, it should be a tagged pointer to the closure, +meaning that the closure representation of a recursive function is a +cyclic data structure! -@item{then compile the body in an environment that includes all of the -@racket[letrec]-bound variables.} +But how can we create such a structure? In creating the closure +representation of the function @racket[f] we would need to write the +pointer to the value we are constructing @emph{as we construct it}. -] - -The @racket[compile-letrec] function takes a list of variables to -bind, the right-hand-side @racket[λ]-expressions, body, and -compile-time environment. It relies on three helper functions to -handle the tasks listed above: +To make matters worse, consider a set of mutually recursive functions +like this: @#reader scribble/comment-reader (racketblock -;; (Listof Variable) (Listof Lambda) Expr CEnv -> Asm -(define (compile-letrec fs ls e c) - (seq - (compile-letrec-λs ls c) - (compile-letrec-init fs ls (append (reverse fs) c)) - (compile-e e (append (reverse fs) c)) - (Add rsp (* 8 (length fs))))) +(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) +(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + +(even? 101) ) -The first two tasks are taken care of by @racket[compile-letrec-λs], -which allocates unitialized closures and pushes each on the stack. +Both @racket[even?] and @racket[odd?] contain a free variable: for +@racket[even?] it's @racket[odd?] and for @racket[odd?] it's +@racket[even?]. Hence the closure representation of @racket[even?] +should be two words long; the first words will be the address of the +label that contains @racket[even?]'s code and the second word will be +the tagged pointer to the @racket[odd?] closure. Likewise, the +closure representation of @racket[odd?] will be two words long, +containing the address of the label for @racket[odd?] followed by the +tagged pointer to the @racket[even?] closure. + +How can we possible construct these two closures that must each point +to the other? + +The solution here is to recognize that the closures can be constructed +in a staged way. We can lay out the memory for each closure but delay +writing the value of the free variables. This is possible because all +we need to know in order to allocate the memory for a closure is the +number of free variables that occur in the syntax of the +@racket[lambda]-expression. Once we have addresses for each closure +we are constructing, we can @emph{then} go back and initialize each +closure writing the value of its free variables. Doing this staged +initialization is safe because we know that none of these functions +can be called before the initialization is complete. (Try to convince +yourself of this by considering the grammar of programs.) + +Using that idea, we can compile the functions defined at the top-level +in a slightly different way from @racket[lambda]-expressions. We will +first allocate memory for all of the closures and push tagged pointers +for each of them on the stack, effectively binding the defined +function names to their (unitialized) closures. We then copy free +variable values to memory, initializing the closures. Doing it in +this way allows functions to refer back to themselves or other +top-level function definitions. + +First, the easy stuff: the code of a top-level function definition is +compiled just like a @racket[lambda]-expression: @#reader scribble/comment-reader (racketblock -;; (Listof Lambda) CEnv -> Asm -;; Create a bunch of uninitialized closures and push them on the stack -(define (compile-letrec-λs ls c) - (match ls - ['() (seq)] - [(cons l ls) - (match l - [(Lam lab as body) - (let ((ys (fvs l))) - (seq - (Lea rax (Offset (symbol->label lab) 0)) - (Mov (Offset rbx 0) rax) - (Mov rax (length ys)) - (Mov (Offset rbx 8) rax) - (Mov rax rbx) - (Or rax type-proc) - (Add rbx (* 8 (+ 2 (length ys)))) - (Push rax) - (compile-letrec-λs ls (cons #f c))))])])) +;; Defn -> Asm +(define (compile-define d) + (match d + [(Defn f xs e) + (compile-lambda-define (Lam f xs e))])) ) -The @racket[compile-letrec-init] goes through each function and -initializes its closure now that all of the function pointers are -available. Finally the body is compiled in an extended environment. +We extend this to lists of function definitions in the obvious way: @#reader scribble/comment-reader (racketblock -;; (Listof Variable) (Listof Lambda) CEnv -> Asm -(define (compile-letrec-init fs ls c) - (match fs +;; [Listof Defn] -> Asm +(define (compile-defines ds) + (match ds ['() (seq)] - [(cons f fs) - (let ((ys (fvs (first ls)))) - (seq - (Mov r9 (Offset rsp (lookup f c))) - (Xor r9 type-proc) - (Add r9 16) ; move past label and length - (copy-env-to-heap ys c 0) - (compile-letrec-init fs (rest ls) c)))])) + [(cons d ds) + (seq (compile-define d) + (compile-defines ds))])) ) -We can give a spin: +And in compiling a program @racket[(Prog ds e)] we make sure to emit +@racket[(compile-defines ds)]. -@ex[ -(asm-interp (compile (parse - '(letrec ((even? - (λ (x) - (if (zero? x) - #t - (odd? (sub1 x))))) - (odd? - (λ (x) - (if (zero? x) - #f - (even? (sub1 x)))))) - (even? 10))))) - -(asm-interp - (compile (parse - '(letrec ((map (λ (f ls) - (letrec ((mapper (λ (ls) - (if (empty? ls) - '() - (cons (f (car ls)) (mapper (cdr ls))))))) - (mapper ls))))) - (map (λ (f) (f 0)) - (cons (λ (x) (add1 x)) - (cons (λ (x) (sub1 x)) - '()))))))) -] +Now we have to turn to creating all of the closures for @racket[ds]. +To accomplish this, we write a function +@racket[(compile-defines-values ds)] that will create a closure for +each function definition and push it on the stack. +The top-level expression @racket[e] will no longer be compiled in the +empty environment, but instead in an environment that includes all of +the names defined as functions. So to compile @racket[(Prog ds e)] we +@racket[(compile-e e (reverse (define-ids ds)) #t)], where +@racket[define-ids] is a simple function for fetching the list of +function names defined by @racket[ds] (the list of names is reversed +because the functions are pushed on in the order they appear, hence +the last function is the most recently pushed). +Here is the definition of @racket[compile-defines-values]: -@section[#:tag-prefix "loot"]{Syntactic sugar for function definitions} +@#reader scribble/comment-reader +(racketblock +;; Defns -> Asm +;; Compile the closures for ds and push them on the stack +(define (compile-defines-values ds) + (seq (alloc-defines ds 0) + (init-defines ds (reverse (define-ids ds)) 8) + (add-rbx-defines ds 0))) +) -The @racket[letrec] form is a generlization of the -@racket[(begin (define (_f _x ...) _e) ... _e0)] form we started with -when we first started looking at adding functions to the language. To -fully subsume the language of @seclink["Iniquity"]{Iniquity}, we can -add this form back in to the language as syntactic sugar for -@racket[letrec], i.e. we can eliminate this form from programs by -rewriting them. +It does the staged allocation and initialization of the closures as +described earlier. Once the closures are allocated and initialized, +it bumps @racket['rbx] by the total size of all the allocated closures. -Let @tt{Expr+} refer to programs containing @racket[(begin (define (_f -_x ...) _e) ... _e0)]. The @racket[desugar] function writes -@tt{Expr+}s into @tt{Expr}s. +The @racket[alloc-defines] function allocates, but leaves +uninitialized, each of the closures and pushes them on the stack: @#reader scribble/comment-reader (racketblock -(define (desugar e+) - (match e+ - [(Prog '() e) (Prog '() (desugar e))] - [(Prog ds e) (let ((defs (map desugar ds))) - (Prog '() (LetRec defs e)))] - [(Defn f xs e) (list f (Lam f xs e))] - [(Prim1 p e) (Prim1 p (desugar e))] - [(Prim2 p e1 e2) (Prim2 p (desugar e1) (desugar e2))] - [(If e1 e2 e3) (If (desugar e1) (desugar e2) (desugar e3))] - [(Begin e1 e2) (Begin (desugar e1) (desugar e2))] - [(Let x e1 e2) (Let x (desugar e1) (desugar e2))] - [(LetRec bs e1) (LetRec (map (lambda (xs) (map desugar xs)) bs) (desugar e1))] - [(Lam n xs e) (Lam (gensym 'lam) xs (desugar e))] - [(App f es) (App (desugar f) (map desugar es))] - [_ e+])) +;; Defns Int -> Asm +;; Allocate closures for ds at given offset, but don't write environment yet +(define (alloc-defines ds off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx off) rax) + (Mov rax rbx) + (Add rax off) + (Or rax type-proc) + (Push rax) + (alloc-defines ds (+ off (* 8 (add1 (length fvs)))))))])) ) -The compiler now just desugars before labeling and compiling expressions. +The @racket[init-defines] function intializes each of the closures +using @racket[free-vars-to-heap]: -And here's the complete compiler, including tail calls, @racket[letrec], etc.: +@#reader scribble/comment-reader +(racketblock +;; Defns CEnv Int -> Asm +;; Initialize the environment for each closure for ds at given offset +(define (init-defines ds c off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (free-vars-to-heap fvs c off) + (init-defines ds c (+ off (* 8 (add1 (length fvs)))))))])) +) -@codeblock-include["loot/compile.rkt"] +Finally, the @racket[add-rbx-defines] function computes the total size +of all the closures and adjusts @racket['rbx] appropriately: +@#reader scribble/comment-reader +(racketblock +;; Defns Int -> Asm +;; Compute adjustment to rbx for allocation of all ds +(define (add-rbx-defines ds n) + (match ds + ['() (seq (Add rbx (* n 8)))] + [(cons (Defn f xs e) ds) + (add-rbx-defines ds (+ n (add1 (length (fv (Lam f xs e))))))])) +) + + +@section[#:tag-prefix "loot"]{A Complete Compiler} + +Putting all the pieces together, we have the complete compile for Loot: + +@codeblock-include["loot/compile.rkt"] diff --git a/www/notes/modules.scrbl b/www/notes/modules.scrbl new file mode 100644 index 00000000..45e65f13 --- /dev/null +++ b/www/notes/modules.scrbl @@ -0,0 +1,253 @@ +#lang scribble/manual + +@(require (for-label (except-in racket compile ...) a86)) +@(require redex/pict + racket/runtime-path + scribble/examples + "utils.rkt" + "ev.rkt" + "../fancyverb.rkt" + "../utils.rkt") + +@(define codeblock-include (make-codeblock-include #'h)) + +@(define (shellbox . s) + (parameterize ([current-directory (build-path notes "jig")]) ;; FIXME + (filebox (emph "shell") + (fancyverbatim "fish" (apply shell s))))) + + +@(ev '(require rackunit a86)) +@(ev `(current-directory ,(path->string (build-path notes "jig")))) ;; FIXME +@(void (ev '(with-output-to-string (thunk (system "make runtime.o"))))) +@(for-each (λ (f) (ev `(require (file ,f)))) + '("interp.rkt" "compile.rkt" "ast.rkt" "parse.rkt" "types.rkt" "unload-bits-asm.rkt")) + +@(define this-lang "Knock") + +@title[#:tag this-lang]{@|this-lang|: Libraries and Modules} + +@table-of-contents[] + +@section[#:tag-prefix "knock"]{Modules, Everywhere!} + +With @secref{Iniquity}, we introduced function definitions and +function calls and with @secref{Jig} we even made sure that calls +behave properly and that a call in tail position operates like a +``goto with arguments.'' + +At this point, we have a pretty rich language: there are some nice +built-in datatypes (although lacking the ability to add user-defined +data types is a real shortcoming), we have a few useful primitives, +and we can write and use functions. + +One limitation of our language so far is that programs are assumed to +be monolithic; we're given a single file containing a bunch of +function definitions and an expression to evaluate. This of course +isn't realistic. Real programs are composed of different parts and +these parts can be re-used. Our compiler is a good example: we've +defined several modules for dealing with data type definitions, +parsing, compiling primitives, etc. + +Let us now endow our language with the same capability. + +We'll call it @bold{@this-lang}! + +@section[#:tag-prefix "knock"]{What is a Module?} + +A module is something we've been writing throughout this course. +Roughly it's a unit of code that may make use of other modules via +@racket[require], define some things, and provide a subset of those +things via @racket[provide]. For example, the following is a module: + +@typeset-code{ +#lang racket +(provide rev) + +;; [Listof X] -> [Listof X] +(define (rev xs) + (rev/a xs '())) + +;; [Listof X] [Listof X] -> [Listof X] +(define (rev/a xs ys) + (if (empty? xs) + ys + (rev/a (cdr xs) (cons (car xs) ys)))) +} + +It doesn't require any other modules, but it provides +@racket[rev]. It defines @racket[rev] in terms of a helper +function @racket[rev/a], which is not provided, and therefore won't be +visible to clients of the module. + +Another module can use this module: + +@typeset-code{ +#lang racket +(provide main) +(require "rev.rkt") + +(define (main) + (rev (cons 1 (cons 2 (cons 3 '()))))) +} + +This module requires the module defined in @tt{rev.rkt}, which makes +the @racket[rev] function available within the module, and defines +a @racket[main] function that uses @racket[rev]. + +We might think of this as the ``main'' entry point of the program, +and can run it in Racket with the following command: + +racket -t p.rkt -m + +which says to require the module and call its @racket[main] function. + + + + +@section[#:tag-prefix "knock"]{A Standard Library} + + + +But one thing that's missing is a large set of useful functions. Sure +you can roll your own @racket[length], @racket[reverse], and +@racket[append] functions for operating on lists, but these kinds of +things usually come standard with a programming language. + +One option is to add more primitives. We could add a case to +@racket[compile-op1] to handle @racket['length] and then implement the +length operation on lists in assembly. That sounds tedious, and +writing assembly is easy to get wrong, hard to debug, and just +generally a little unpleasant. We're building a high-level language, +after all, so that we can write programs in way that easier to +understand and interact with. + +We can just write @racket[length] in Racket. We can debug and test it +in Racket. We might come up with something like: + +@#reader scribble/comment-reader +(racketblock +;; [Listof X] -> Natural +(define (length xs) + (if (empty? xs) + 0 + (add1 (length (cdr xs))))) +) + +Or, now knowing more about tail recursion, we might write an iterative +version: + +@#reader scribble/comment-reader +(racketblock +;; [Listof X] -> Natural +(define (length xs) + (length/a xs 0)) + +;; [Listof X] Natural -> Natural +(define (length/a xs n) + (if (empty? xs) + n + (length (cdr xs)))) +) + +But here's the thing: we've sort of reached a critical tiping point +with our compiler. Rather than write the assembly code for +@racket[length] by hand, we can write the code in Racket @emph{and +compile it!} + + +@#reader scribble/comment-reader +(ex +(compile-define + (parse-define + ;; [Listof X] -> Natural + '(define (length xs) + (length/a xs 0)))) + +(compile-define + (parse-define + ;; [Listof X] Natural -> Natural + '(define (length/a xs n) + (if (empty? xs) + n + (length/a (cdr xs) (add1 n)))))) +) + +In fact, we can declare @racket[length]'s label as a @racket[Global], +declare some other labels that will be linked in later, and print +these instructions to a file, like so: + +@#reader scribble/comment-reader +(ex +(with-output-to-file "length.s" + #:exists 'truncate + (λ () + (displayln + (asm-string + (seq (Global (symbol->label 'length)) + (Extern 'raise_error_align) + (externs) + (compile-define + (parse-define + ;; [Listof X] -> Natural + '(define (length xs) + (length/a xs 0)))) + (compile-define + (parse-define + ;; [Listof X] Natural -> Natural + '(define (length/a xs n) + (if (empty? xs) + n + (length/a (cdr xs) (add1 n)))))))))))) + +We can confirm the file now contains the instructions in NASM syntax: + +@shellbox["cat length.s"] + +Now we can assemble the instructions into an object file and link it +together with the runtime system to obtain a new runtime system that +includes @racket[length]: + +@shellbox[(format "nasm -g -f ~a -o length.o length.s" + (if (eq? (system-type) 'macosx) + "macho64" + "elf64")) + "ld -r runtime.o length.o -o runtime-plus-length.o"] + +Now we can compile programs that use @racket[length] without further +work from our compiler by simply linking them against this new runtime: + + +@#reader scribble/comment-reader +(ex +(current-objs '("runtime-plus-length.o")) +(unload/free + (asm-interp + (seq (Extern (symbol->label 'length)) + (Global 'raise_error_align) + (compile + (parse + '[(length (cons 1 (cons 2 (cons 3 '()))))])))))) + +This suggests a path forward for extending the language with new +functionality: + +@itemlist[ + +@item{implement functions within the language the compiler can handle,} + +@item{compile the function definition, and link in to the runtime +system so that other programs can now make use of the new functions.} + +] + +As you can see from the examples above, most of the work of adding +functionality in this way concerns declaring the appropriate labels +@racket[Global] or @racket[Extern] as appropriate and then building +and linking the parts of the runtime system. + +Before addressing these issues, let's instead turn to another, closely +related issue: supporting the compilation of multi-module programs. + +@section[#:tag-prefix "knock"]{Modules} + diff --git a/www/notes/mountebank.scrbl b/www/notes/mountebank.scrbl new file mode 100644 index 00000000..98105505 --- /dev/null +++ b/www/notes/mountebank.scrbl @@ -0,0 +1,359 @@ +#lang scribble/manual + +@(require (for-label (except-in racket compile ...) a86)) +@(require redex/pict + racket/runtime-path + scribble/examples + "utils.rkt" + "ev.rkt" + "../fancyverb.rkt" + "../utils.rkt") + +@(define codeblock-include (make-codeblock-include #'h)) + +@(ev '(require rackunit a86)) +@(ev `(current-directory ,(path->string (build-path notes "mountebank")))) +@(void (ev '(with-output-to-string (thunk (system "make runtime.o"))))) +@(void (ev '(current-objs '("runtime.o")))) +@(for-each (λ (f) (ev `(require (file ,f)))) + '("interp.rkt" "compile.rkt" "compile-expr.rkt" "compile-literals.rkt" "compile-datum.rkt" "utils.rkt" "ast.rkt" "parse.rkt" "types.rkt")) + +@(define this-lang "Mountebank") + +@title[#:tag this-lang]{@|this-lang|: quote and compound static data} + +@src-code[this-lang] + +@table-of-contents[] + +@section[#:tag-prefix "mountebank"]{Quote} + +One of the distinguishing features of the Lisp family of languages is +the @racket[quote] form, abbreviated @tt{'}, which is a notation for +writing down literal s-expressions. + +Recall that an S-Expression is: + +@#reader scribble/comment-reader +(racketblock +;; type S-Expr = +;; | Boolean +;; | Number +;; | Character +;; | String +;; | Symbol +;; | Empty +;; | (Boxof S-Expr) +;; | (Pairof S-Expr S-Expr) +;; | (Vectorof S-Expr) +) + +Using quotes, we can write down a literal s-expression such as: + +@ex[ +'#t +'#f +'9 +'#\f +'f +'() +'#&7 +'(1 . 2) +'#(1 2 3) +'(a b ((c) #(d))) +] + +The grammar of things that can be written down inside of a +@racket[quote] are: + +@#reader scribble/comment-reader +(racketblock +;; Datum d ::= #t +;; | #f +;; | n where n is a Number literal +;; | c where c is a Character literal +;; | s where s is a String literal +;; | s where s is a Symbol literal +;; | () +;; | #&d +;; | (d . d) +;; | #(d ...) +) + +At a first level of understanding, it's possible to understand +@racket[quote] by rewriting to ``push'' the quote in as far as +possible. + +Some things are ``self-quoting,'' e.g. booleans, characters, strings, +numbers, boxes, and vectors; thus @racket[#t] and @racket['#t] are the +same. When we have a @racket[quote] around a self-quoting datum, we +can delete it. + +Other datums like symbols and the empty list cannot push the +@racket[quote] in further, so we have @racket['()] and @racket['fred] +as literals for the empty list and the symbol @racket[fred], +respectively. + +Pairs, boxes, and vectors are @bold{compound datums}. We can +understand @racket[#&_d] as @racket[(box '_d)] and @racket['(_d1 +. _d2)] as @racket[(cons '_d1 '_d2)] and @racket[#(_d ...)] as +@racket[(vector '_d ...)]. + +We've been using the @racket[quote]-notation from the beginning of the +course so it should be familiar by now. + +One of the key things about @racket[quote] is that we can go from the +concrete syntax of an expression as a piece of code, e.g. @racket[(if +(zero? x) 0 (+ x (tri (sub1 x))))], to @emph{a representation of that +expression} as a piece of data by prepending a single character; +@tt{'}, e.g. @racket['(if (zero? x) 0 (+ x (tri (sub1 x))))]. + +We've relied on this in the front-end of our compiler and interpreter +to parse programs by first calling @racket[read], which reads a single +datum: + +@ex[ +(with-input-from-string + "(if (zero? x) 0 (+ x (tri (sub1 x))))" + read) +] + +Let us now add fully support for @racket[quote] to our language. +Let's call it @bold{Mountebank}. + +We will change the AST definition for Mountebank to add a +@racket[Quote] constructor, which contains a datum. Since +@racket[(Str _s)] and @racket[(Quote _s)] where @racket[_s] is a +string are redundant, we remove all of the literal constructors. + +Here is the new AST definition: + +@filebox-include[codeblock "mountebank/ast.rkt"] + +The parser is updated to parse things like booleans, numbers, etc. as +@racket[Quote] nodes now and also to support the ability to write +arbitrary datum value under a quote: + +@filebox-include[codeblock "mountebank/parse.rkt"] + + +@section[#:tag-prefix "mountebank"]{Quotes are constants} + +One thing that the ``pushing quote'' in understanding of +@racket[quote] misses is that a @racket[quote] expression produces a +constant, unlike the use of operations to construct an equivalent +value. + +Using @racket[eq?] we can observe the difference. Recall that +@racket['(1 . 2)] produces a value equivalent to @racket[(cons 1 2)]; +however @racket['(1 . 2)] is a constant, whereas @racket[(cons 1 2)] +dynamically allocates memory to represent the pair. + +We can see difference here: + +@ex[ +(define (f) '(1 . 2)) +(define (g) (cons 1 2)) +(eq? (f) (f)) +(eq? (g) (g)) +] + +Note, this does not mean that all @racket[quote]s are interned +(although some members of the Lisp and Scheme family do this): + +@ex[ +(define (f) '(1 . 2)) +(define (g) '(1 . 2)) +(eq? (f) (g)) +] + +On the other hand, it's important to note that strings and symbols +that appear in @racket[quote]d datums are interned as usual: + +@ex[ +(define (f) '("first" . second)) +(define (g) '("first" . second)) +(eq? (car (f)) (car (g))) +(eq? (cdr (f)) (cdr (g))) +(eq? (f) (g)) +] + + + +@section[#:tag-prefix "mountebank"]{Interpreting quote} + +Interpreting a quoted datum is trivial---it evaluates to the datum +itself: + +@filebox-include[codeblock "mountebank/interp.rkt"] + +The proper treatment of datums as constants is inherited from Racket, +so our interpreter does the right thing on these examples: + +@ex[ +(define (run . p) + (interp (parse p))) + +(run '(define (f) (cons 1 2)) + '(eq? (f) (f))) + +(run '(define (f) '(1 . 2)) + '(eq? (f) (f))) +] + +@section[#:tag-prefix "mountebank"]{Compiling quote} + +Compiling @racket[quote] is not difficult. We've seen all the +necessary pieces already. The key things to observe are: + +@itemlist[ + +@item{a compound quoted datum should be statically allocated,} + +@item{strings and symbols that appear in datums should be interned.} + +] + +The latter is achieved by extending the @racket[literals] function +from Mug to traverse the datum in a @racket[quote] to extract any +string or symbol occurrences. + +@filebox-include[codeblock "mountebank/compile-literals.rkt"] + +The static allocation of compound datums is achieved use the same +static memory allocation mechanism we saw when allocating the string +data of strings and symbols. + +Here's how datums are compiled: + +@itemlist[ + +@item{strings are compiled to the tagged address of their string data,} + +@item{symbols are compiled to the tagged address of their string data,} + +@item{atoms are compiled to their bit represetation, and} + +@item{compound datums are compiled to a static chunk of memory to +contain its data and a tagged address of that memory.} + +] + +Let's see some examples: + +@ex[ + +(compile-datum 0) + +(compile-datum #f) + +(compile-datum 'fred) + +(compile-datum "fred") + +(compile-datum '(1 . 2)) + +] + +In the last example, you'll notice we get a @racket[(Data)] section +that includes 2 words of memory; the first contains the bit +representation of @racket[2], i.e. the @racket[cdr] of the pair, and +the second contains the bit representation of @racket[1], i.e. the +@racket[car] of the pair. After the @racket[(Data)] section, we +switch back to @racket[(Text)] mode with an instruction to load the +address of the statically allocated pair, appropriately tagged. + +Datums can be built up arbitrarily large, so in order to compound +datums, we need to recursive traverse their structure to emit the +static data section of their construction. Here's a larger example: + +@ex[ +(compile-datum '((3) fred #(x y z) (("wilma")))) +] + +Notice that every compound datum has its own label and when they are +contained within other compound datums, we get references, +appropriately tagged, to those labels. + +Here is a simple example of a nested datum: a box containing a box +containing zero. + +@ex[ +(compile-datum '#&#&0) +] + +The data section starts with a label and word for the outer box. The +word contains a tagged reference to the inner box, which is defined +immediately below as a label and word. That word contains @racket[0]. +In the text section there is a single instruction to load the tagged +address of the outer box into @racket['rax]. + +Here is the complete code for @racket[compile-datum]: + +@filebox-include[codeblock "mountebank/compile-datum.rkt"] + +Now we've succsefully implemented @racket[quote] and can confirm are +examples behave as expected: + +@ex[ +(current-objs '("runtime.o")) +(define (run . p) + (bits->value (asm-interp (compile (parse p))))) + +(run '#t) +(run ''#t) +(run ''(1 . 2)) +(run ''(1 fred #("wilma"))) +(run '(define (f) '(1 . 2)) + '(eq? (f) (f))) +(run '(define (f) '("fred" . wilma)) + '(define (g) '("fred" . wilma)) + '(eq? (car (f)) (car (g)))) +(run '(define (f) '("fred" . wilma)) + '(define (g) '("fred" . wilma)) + '(eq? (cdr (f)) (cdr (g)))) +] + +@section[#:tag-prefix "mountebank"]{Getting Meta} + +It's worth taking stock of the kind of programs we can now write. +Since @racket[quote] let's us write down data that looks an awful lot +like programs, we can start to write programs that operate over this +kind of data in a way that may seem familiar. + +For example, here's a program that interprets a little language that +has elements of the ones we've been building: + +@filebox-include[codeblock "mountebank/simple-interp.rkt"] + + +Now of course this is a Racket program, which we can run. Running it +will run the interpreter we defined on the input program, computing +the 36th triangular number: + +@(define (shellbox . s) + (parameterize ([current-directory (build-path notes "mountebank")]) + (filebox (emph "shell") + (fancyverbatim "fish" (apply shell s))))) + +@shellbox[ +"racket simple-interp.rkt" +] + +But of course, this is also a Mountebank program! So we can interpret +it with our Mountenank interpreter: + +@shellbox[ +"racket -t interp-file.rkt -m simple-interp.rkt" +] + +And since it's a Mountebank program, we can also compile it and then +running the resulting executable: + +@shellbox[ +"make simple-interp.run" +"./simple-interp.run" +] + +We are moving ever closer to the point where our compiler can compile +the source code of itself. \ No newline at end of file diff --git a/www/notes/mug.scrbl b/www/notes/mug.scrbl index c94b4ed5..d1b2036c 100644 --- a/www/notes/mug.scrbl +++ b/www/notes/mug.scrbl @@ -1,1196 +1,968 @@ #lang scribble/manual -@(require (for-label (except-in racket ...))) +@(require (for-label (except-in racket compile ...) a86)) @(require redex/pict racket/runtime-path scribble/examples "utils.rkt" "ev.rkt" - "../utils.rkt") + "../fancyverb.rkt" + "../utils.rkt") @(define codeblock-include (make-codeblock-include #'h)) -@(for-each (λ (f) (ev `(require (file ,(path->string (build-path notes "mug" f)))))) - '("interp.rkt" "interp-env.rkt" #;"compile.rkt" "syntax.rkt" "pat.rkt" #;"asm/interp.rkt" #;"asm/printer.rkt")) +@(ev '(require rackunit a86)) +@(ev `(current-directory ,(path->string (build-path notes "mug")))) +@(void (ev '(with-output-to-string (thunk (system "make runtime.o"))))) +@(void (ev '(current-objs '("runtime.o")))) +@(for-each (λ (f) (ev `(require (file ,f)))) + '("interp.rkt" "compile.rkt" "compile-expr.rkt" "compile-literals.rkt" "utils.rkt" "ast.rkt" "parse.rkt" "types.rkt")) -@title[#:tag "Mug"]{Mug: matching, throwing, quoting} +@(define this-lang "Mug") -@table-of-contents[] - -@section[#:tag-prefix "mug"]{Scaling up with syntax} - -We have developed a small, but representative functional programming -language. But there's still a long way to go from our Loot language -to the kind of constructs we expect in a modern, expressive -programming language. In particular, there's a fairly large gap -between Loot and the subset of Racket we've explored so far in this -class. +@title[#:tag this-lang]{@|this-lang|: symbols and interned string literals} -For example, our programs have made extensive use of pattern matching, -quotation, quasi-quotation, and lots of built-in functions. In this -section, we'll examine how to scale Loot up to a language that's nicer -to program in. As we'll see, much of this can be accomplished -@emph{without extending the compiler}. Rather we can explain these -language features by @bold{elaboration} of fancier language syntax -into the existing core forms. +@src-code[this-lang] -In this chapter, we'll explore several ideas at the level of an -interpreter, but the techniques should work just as well for the compiler. - -@section[#:tag-prefix "mug"]{The Loot+ interpreter} +@table-of-contents[] -Let us start with an interprter for the Loot language, plus all of the -extensions considered in the various assignments up through -@seclink["Assignment 7"]{Assignment 7}. +@section[#:tag-prefix "mug"]{String Literals} +As it currently stands in our language, @bold{string literals} are +dynamically allocated when they are evaluated. -@codeblock-include["mug/interp-env.rkt"] +This means, for example, that if we had a program like this: -@section[#:tag-prefix "mug"]{A bit more sugar} +@#reader scribble/comment-reader +(racketblock +(define (f) "fred") +(cons (f) (cons (f) (cons (f) '()))) +) +This will allocate three distinct copies of the string +@racket["fred"], one for each call to @racket[f]. This is unfortunate +since really just a single allocation of @racket["fred"] that is +referenced three times could've worked just as well and allocated less +memory. -As we saw in @seclink["Loot"]{Loot}, we can consider syntaxtic -extensions of language that elaborate into the core @tt{Expr} form of -a language. We saw this with the @racket[define]-form that we rewrote -into @racket[letrec]. We can consider further extensions such as -@racket[and], @racket[or], and even @racket[cond]. +A common approach programming language implementations take is to take +every string literal that appears in a program and all allocate it +@bold{once} and replace occurrences of those literals with references +to memory allocated for it. -Here are functions for transforming each of these forms into simpler -forms: +This means, for example, that multiple occurrences of the same string +literal evaluate to the same pointer: -@#reader scribble/comment-reader -(ex -(define (cond->if c) - (match c - [`(cond (else ,e)) e] - [`(cond (,c ,e) . ,r) - `(if ,c ,e (cond ,@r))])) - -(define (and->if c) - (match c - [`(and) #t] - [`(and ,e) e] - [`(and ,e . ,r) - `(if ,e (and ,@r) #f)])) - -(define (or->if c) - (match c - [`(or) #f] - [`(or ,e) e] - [`(or ,e . ,r) - (let ((x (gensym))) - `(let ((,x ,e)) - (if ,x ,x (or ,@r))))])) -) +@ex[ +(eq? "x" "x") +] -Note that these functions do not necessarily eliminate @emph{all} -@racket[cond], @racket[and], or @racket[or] forms, but rather -eliminate @emph{one} occurrence, potentially creating a new occurrence -within a subexpression: +Note that this doesn't mean that every string of the same characters +is represented by a unique pointer. We can dynamically construct +strings that will not be equal to a string literal of the same +characters: @ex[ -(cond->if '(cond [(even? x) 8] [else 9])) -(cond->if '(cond [else 9])) -(and->if '(and)) -(and->if '(and 8)) -(and->if '(and 8 9)) -(or->if '(or)) -(or->if '(or 8)) -(or->if '(or 8 9)) +(eq? "x" (string #\x)) ] -The idea is that another function will drive the repeated use of these -functions until all these extended forms are eliminated. - -You may wonder why the @racket[or] elaboration is complicated by the -@racket[let]-binding. Consider a potential simpler approach: +Let's consider how strings were previously compiled. Here's an assembly program +that returns @racket["Hello!"]: -@#reader scribble/comment-reader -(ex -(define (or->if-simple c) - (match c - [`(or) #f] - [`(or ,e) e] - [`(or ,e . ,r) - `(if ,e ,e (or ,@r))])) -) +@ex[ +(require loot/compile) +(seq (Label 'entry) + (Mov 'rbx 'rdi) + (compile-string "Hello!") + (Ret)) +] -But compare the elaboration of the following exmample: +We can run it just to make sure: @ex[ -(or->if-simple '(or (some-expensive-function) #t)) -(or->if '(or (some-expensive-function) #t)) +(bits->value + (asm-interp + (seq (Global 'entry) + (Label 'entry) + (Mov 'rbx 'rdi) + (compile-string "Hello!") + (Ret)))) ] -The second program is much more efficient. Moreover, if -@racket[some-expensive-function] had side-effects, the first program -would duplicate them, thereby changing the program's intended -behavior. +Notice that this program dynamically allocates the string by executing +instructions that write to memory pointed to by @racket['rbx] and +incrementing @racket['rbx]. -We can incorporate these new functions into the desugar function, -which will transform extended programs into ``core'' expressions: +But fundamentally, we shouldn't need to do anything dynamically if we +know statically that the string being return is @racket["Hello!"]. We +could @emph{statically} allocate the memory for the string at +compile-time and return a pointer to this data. -@#reader scribble/comment-reader -(ex -;; Expr+ -> Expr -(define (desugar e+) - (match e+ - [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) - `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) - ,(desugar e))] - [(? symbol? x) x] - [(? imm? i) i] - [`',(? symbol? s) `',s] - [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] - [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] - [`(letrec ,bs ,e0) - `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) - ,(desugar e0))] - [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] - [`(cond . ,_) (desugar (cond->if e+))] - [`(and . ,_) (desugar (and->if e+))] - [`(or . ,_) (desugar (or->if e+))] - [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) -) +@section[#:tag-prefix "mug"]{Static Memory} + +How can we statically allocate memory? The idea is to use memory in +the program itself to store the data needed to represent the string +literal. It turns out that in an a86 program you can have a section +for the program text and another section with binary data. To switch +between the program text and program data, we use the @racket[(Text)] +and @racket[(Data)] directive. Once in @racket[(Data)] mode we can +write down data that will be placed in the program. -Note how a @racket[cond], @racket[and], or @racket[or] form are -transformed and then @racket[desugar]ed again. This will take care of -eliminating any derived forms introduced by the transformation, which -is useful so that derived forms can be defined in terms of other -derived forms, including itself! +For example, here is a data section: @ex[ -(desugar '(cond [(even? x) 8] [else 9])) -(desugar '(cond [else 9])) -(desugar '(and)) -(desugar '(and 8)) -(desugar '(and 8 9)) -(desugar '(or)) -(desugar '(or 8)) -(desugar '(or 8 9)) +(seq (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))) + ] +These psuedo-instructions will add to the data segment of our program +56-bytes of data. The first 8-bytes consist of the number 6. The +next 4-bytes consist of the number @racket[72], i.e. the codepoint for +@racket[#\H]. The next 4-bytes consist of the codepoint for +@racket[#\e] and so on. The names of these psuedo-instructions +designate how much memory is used: @racket[Dq] means 8-bytes +(64-bits), while @racket[Dd] means 4-bytes (32-bits). -Derived forms that can be elaborated away by rewriting into more -primitive forms are sometimes called @bold{syntactic sugar} since they -are not fundamental but ``sweeten'' the experience of writing programs -with useful shorthands. We call the elaboration function @racket[desugar] -to indicate that it is eliminating the syntactic sugar. +The label @racket['hi] is given to name this data's location. We've +previously seen how to load the address of a label using the +@racket[Lea] instruction in order to compute a place in the code to +jump to. Similarly, if we load the address of @racket['hi], we have a +pointer to the data at that location in the program. -@section[#:tag-prefix "mug"]{Exceptional behavior} +So to write a similar program that returns @racket["Hello!"] but +@emph{statically} allocates the memory for the string, we could do the +following: -To see an example of taking the idea of program transformation as a -method for implementing language features, let's consider the case of -exceptions and exception handlers, a common feature of modern -high-level languages. +@ex[ +(bits->value + (asm-interp + (seq (Global 'entry) + (Label 'entry) + (Lea 'rax 'hi) + (Or 'rax type-str) + (Ret) + (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))))) +] -Consider the following program for computing the product of all the -elements in a binary tree: +A couple things to note: +@itemlist[ -@#reader scribble/comment-reader -(ex -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (match bt - ['leaf 1] - [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))])) - -(prod 'leaf) -(prod '(node 8 leaf leaf)) -(prod '(node 8 (node 2 leaf leaf) (node 4 leaf leaf))) -) +@item{nothing is allocated in the heap memory set up by the run-time; +indeed this program doesn't use the @racket['rbx] register at all.} -Now consider the work done in an example such as this: +@item{Executing this program takes fewer steps than the previous +version; when the @racket['entry] label is called, it executes an +@racket[Lea] and @racket[Or] instruction and returns.} -@ex[ -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) ] -From a quick scan of the elements, we know the answer is 0 without -doing any arithmetic. But the @racket[prod] function will do a bunch -of multiplication to actually figure this out. +This is pretty big improvement over the previous approach since the +number of instructions to execute were proportional to the size of the +string being compiled. Now we simply load the address of the static +data in a small, constant number of instructions. -To see, let's use a helper function to replace @racket[*] that prints -every it multiplies two numbers: +In fact, we can do one better. The @racket[Or] instruction is there +in order to tag the pointer to @racket['hi] as a string. There's +really no reason to do this at run-time; we should be able to add the +tag statically so that just a single load instruction suffices. The +goal is to add the tag to the address of @racket['hi] at compile time, +but the location of the label is actually not fully known until link +time. Our assembler has a way of resolving this by allowing us to +write @emph{expressions} involving labels and constants that will be +computed at link time. -@#reader scribble/comment-reader -(ex -;; Number Number -> Number -(define (mult x y) - (printf "mult: ~a x ~a\n" x y) - (* x y)) - -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (match bt - ['leaf 1] - [`(node ,v ,l ,r) (mult v (mult (prod l) (prod r)))])) - -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) -) +Here is a version of the same program that avoids the @racket[Or] +instruction, instead computing that type tagging at link time: -This could potentially be bad if the tree were quite large. - -How can we do better? One option is to detect if the value at a node -is zero and simply avoid recurring on the left and right subtrees at -that point: +@ex[ +(bits->value + (asm-interp + (seq (Global 'entry) + (Label 'entry) + (Lea 'rax (Plus 'hi type-str)) + (Ret) + (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))))) +] -@#reader scribble/comment-reader -(ex -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (match bt - ['leaf 1] - [`(node ,v ,l ,r) - (if (zero? v) - 0 - (mult v (mult (prod l) (prod r))))])) -) -Does this help our answer? Only slightly: +So one idea is to use static data to represent string literals. This +reduces the run-time memory that is allocated and makes is more +efficient to evaluate string literals. We could replace the old +@racket[compile-string] function with the following: @ex[ -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +(define (compile-string s) + (let ((l (gensym 'string))) + (seq (Data) + (Label l) + (Dq (string-length s)) + (map Dd (map char->integer (string->list s))) + (Text) + (Lea 'rax (Plus l type-str))))) + +(compile-string "Hello!") + +(bits->value + (asm-interp + (seq (Global 'entry) + (Label 'entry) + (compile-string "Hello!") + (Ret)))) ] -Why? +Now, while this does allocate string literals statically, using memory +within to the program to store the string, it doesn't alone solve the +problem with string literals being represented uniquely. -The problem is that you may encounter the zero element deep within a -tree. At that point you not only want to avoid doing the -multiplication of subtrees, but also of the elements surrounding the -zero. But we seemingly don't have control over the context -surrounding the node with a zero in it, just the subtrees. What can -we do? +@section[#:tag-prefix "mug"]{Static Interning} -One option, if the language provides it, is to @bold{raise an -exception}, signalling that a zero element has been found. An outer -function can @bold{catch} that exception and produce zero. Such a -program will avoid doing any multiplication in case there's a zero in -the tree. +We've seen static memory, but we still need to make sure every string +literal is allocated just once. -Racket comes with an exception mechanism that uses @racket[raise] to -signal an exception, which is propagated to the nearest enclosing -exception handler. If there is no such handler, an uncaught exception -error occurs. +Here is the basic idea: -@ex[ +@itemlist[ -(eval:error (raise 5)) -(eval:error (mult (raise 5) 2)) -(eval:error (mult (raise (mult 5 3)) 2)) +@item{Collect all of the string literals in the program.} -] +@item{For each distinct string literal, compile it to static data as +described above, labelling the data location.} -The general form of an exception handler uses the -@racket[with-handlers] form that includes a series of predicates and -handler expressions. We'll consider a simpler form called -@racket[catch] that unconditionally catches any exception throw and -handles it with a function that takes the raised value as an argument. -It can be expressed in terms of the more sophisticated -@racket[with-handlers] form: +@item{For each string literal expression, compile it to a reference to +the appropropiate label for that string.} -@ex[ -(define-syntax-rule (catch e f) - (with-handlers ([(λ (x) #t) f]) e)) - -(catch (raise 5) (λ (x) x)) -(catch (mult (raise 5) 2) (λ (x) x)) -(catch (mult (raise (mult 5 3)) 2) (λ (x) x)) -(catch (mult (mult 5 3) 2) (λ (x) x)) -(catch (mult (mult 5 3) 2) (λ (x) (mult x x))) -(catch (mult (raise (mult 5 3)) 2) (λ (x) (mult x x))) ] -Now we can solve our problem: +For example, let's say we want to compile this program: @#reader scribble/comment-reader -(ex -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (catch (prod/r bt) (λ (x) 0))) - -;; BT -> Number -;; Throws: 0 -(define (prod/r bt) - (match bt - ['leaf 1] - [`(node ,v ,l ,r) - (if (zero? v) - (raise 0) - (mult v (mult (prod/r l) (prod/r r))))])) - -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +(racketblock +(begin "Hello!" + "Hello!") ) -(This code is a bit problematic for reasons that are beside the point -of this section, but... the problem is this will catch any exception, -including things like system signals, out of memory exceptions, etc. -A better solution would have the handler check that the exception -value was 0 and re-raise it if not. That way it doesn't ``mask'' any -other exceptions.) - -This code works great for our purposes, but what if the language -didn't provide an exception handling mechanism? Could we achieve the -same effect without relying on exceptions? - -One solution is to re-write the program in what's called -@bold{continuation passing style} (CPS). Continuation passing style -makes explicit what is implicit in the recursive calls to -@racket[prod] in our original program, which is that after recursively -computing the product of the subtree, we have to do more work such as -another recursive call and multiplication. By making this work -explicit, we gain control over it and have the option to do things -like throw away this work. - -Here is the basic idea. We will write a version of @racket[prod] that -takes an additional argument which represents ``the work to be done -after this function call completes.'' It will take a single argument, -a number, which is the result of this function call, and it will -produce some final result for the computation (in this case, a number). - -In general, we want @racket[(k (prod bt))] ≡ @racket[(prod/k bt k)] -for all functions @racket[k] and binary trees @racket[bt]. - -Starting from the spec, we have: +We'd like it to compile to something like this: @#reader scribble/comment-reader -(ex -;; BT (Number -> Number) -> Number -(define (prod/k bt k) - (k (prod bt))) +(racketblock +(seq (Mov 'rax (Add 'hi type-str)) + (Mov 'rax (Add 'hi type-str)) + (Ret) + (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))) ) -We can unroll the definition of @racket[prod]: +Notice how the two occurrences of @racket["Hello!"] turn into the +instruction @racket[(Mov 'rax (Add 'hi type-str))]. The labelled +location @racket['hi] contains the data for the string and it is +statically allocated just once. -@#reader scribble/comment-reader -(ex -(define (prod/k bt k) - (match bt - ['leaf (k 1)] - [`(node ,v ,l ,r) - (k (mult v (mult (prod l) (prod r))))])) -) +In order to do this, we need to maintain an association between unique +string literals and the labels our compiler will choose to label their +static data. -Now we'd like to replace the calls to @racket[prod] with calls to -@racket[prod/k], which we can do by recognizing the work to be done -around the call to @racket[prod] and placing it in the -@bold{continuation} argument to @racket[prod/k]. Let's do the first call: +We @emph{could} do this by making a pass over the program to compute +this association. Initially it would be empty and every time a string +literal was encountered, we'd check to see if it's already in the +association. If it is, there's nothing to be done. If isn't, we'd +generate a new label and add it to the association. + +This association would have to be added as a parameter to each of our +@racket[compile-e] functions and string literals would consult the +association to emit the @racket[(Mov 'rax (Add _label type-str))] +instruction. + +We'd also take every label and string pair in the association and +compile the string data to static data labelled with the associated +label. + +However, here's a fun ``trick'' we can employ to avoid having to +explicitly represent this association between strings and their +labels. + +Strings can be converted to symbols, and symbols can be used as +labels. Symbols that consist of the same characters are guaranteed to +be pointer-equal to each other, so by converting a string to a symbol, +we can take advantage of our implementation language's (Racket's) +facility for interning to help us implement interning in our compiler. + +So here is our revised apporach will produce code like this for our +example program: @#reader scribble/comment-reader -(ex -(define (prod/k bt k) - (match bt - ['leaf (k 1)] - [`(node ,v ,l ,r) - (prod/k l (λ (pl) - (k (mult v (mult pl (prod r))))))])) +(racketblock +(seq (Mov 'rax (Add (symbol->label (string->symbol "Hello!")) type-str)) + (Mov 'rax (Add (symbol->label (string->symbol "Hello!")) type-str)) + (Ret) + (Data) + (Label (symbol->label (string->symbol "Hello!"))) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))) ) -Doing this again, we get: +So now an occurrence of a string literal @racket[_str] can be compiled +as @racket[(Mov 'rax (string->label (string->symbol _str)))]; no +association needs to be maintained explicity. @#reader scribble/comment-reader -(ex -(define (prod/k bt k) - (match bt - ['leaf (k 1)] - [`(node ,v ,l ,r) - (prod/k l (λ (pl) - (prod/k r (λ (pr) - (k (mult v (mult pl pr)))))))])) +(racketblock +;; String -> Asm +(define (compile-string s) + (seq (Lea 'rax (Plus (symbol->label (string->symbol s)) type-str)))) ) -Now we have a definition of @racket[prod/k] that is independent of -@racket[prod] that satisfies the spec we started with. +@(ev '(define (compile-string s) + (seq (Lea 'rax (Plus (symbol->label (string->symbol s)) type-str))))) -A couple of things to note: +So here's how an occurrence of @racket["Hello!"] is compiled: -@itemlist[ -@item{Every call to @racket[prod/k] is a tail-call,} -@item{The context of the recursive calls are given explicitly as continuation arguments.} +@ex[ +(compile-string "Hello!") ] -We can recreate the original function by giving the appropriate initial continuation: +We still need to compile the set of string literals that appear in the +program into statically allocated data, so for this we will write a +function: @#reader scribble/comment-reader -(ex -;; BT -> Number -(define (prod bt) - (prod/k bt (λ (x) x))) +(racketblock +;; Prog -> [Listof Symbol] +(define (literals p) ...) ) -Now, this code doesn't do anything smart on zero elements; it does -exactly the same multiplications our first program does: +This will produce the set of strings that appear literally in the +program text. Each string will be converted to its symbol +representation. The string representation is easy to recover by using +@racket[symbol->string]. + +This function is straightforwad, if a bit tedious, to write. It +traverses the AST. Recursive results are collected with +@racket[append]; when a string node @racket[(Str _s)] is encountered, +it produces @racket[(list (string->symbol _s))]. After all of the +strings have been collected, a final call to +@racket[remove-duplicates] ensures a list of unique symbols is +returned. @ex[ -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +(literals (parse '["Hello!"])) +(literals (parse '[(begin "Hello!" "Hello!")])) +(literals (parse '[(begin "Hello!" "Fren")])) +(literals (parse '[(define (f x) "Hello!") + (cons (f "Fren") (cons (f "Hello!") '()))])) ] -However, with a small tweak, we can get the behavior of the exception-handling code. +Using @racket[literals], we can write a function that compiles all of +the string literals into static data as follows: -Consider this definition: +@(ev '(require mug/compile-literals)) @#reader scribble/comment-reader (ex -;; BT (Number -> Number) -> Number -(define (prod/k bt k) - (match bt - ['leaf (k 1)] - [`(node ,v ,l ,r) - (if (zero? v) - 0 - (prod/k l (λ (pl) - (prod/k r (λ (pr) - (k (mult v (mult pl pr))))))))])) - -;; BT -> Number -(define (prod bt) - (prod/k bt (λ (x) x))) +;; Prog -> Asm +(define (compile-literals p) + (append-map compile-literal (literals p))) + +;; [Listof Char] -> Asm +(define (compile-string-chars cs) + (match cs + ['() (seq)] + [(cons c cs) + (seq (Dd (char->integer c)) + (compile-string-chars cs))])) + +;; Symbol -> Asm +(define (compile-literal s) + (let ((str (symbol->string s))) + (seq (Label (symbol->label s)) + (Dq (string-length str)) + (compile-string-chars (string->list str)) + (if (odd? (string-length str)) + (seq (Dd 0)) + (seq))))) + +(seq (compile-string "Hello!") + (compile-string "Hello!") + (compile-literal 'Hello!)) ) -Notice that this program, when the value in a node is zero, -immediately returns @racket[0]. It does not do any of the work -represented by @racket[k]. It does something akin to raising an -exception: it blows off all the work of the surround context and -returns a value to the ``handler'' (in this case, @racket[prod]). - -Returning to our example, we can see that no multiplications occur: - -@ex[ -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) -] - -We've now achieved our original goal without the use of exception -handlers. We achieved this by rewriting our program to make explicit -the work that remains to do, giving us the ability to avoid doing it -when necessary. This is a slighly simplified version of the general -exception handling transformation, which we will look at next, since -there's only a single handler and all it does it produce 0. But, the -by-hand transformation we did provides a useful blueprint for how can -generally transform programs that use exception handling into ones -that don't. - +We've seemingly reached our goal. However, there is a fairly nasty +little bug with our approach. Can you spot it? -@section[#:tag-prefix "mug"]{Exceptional transformation} +Here's a hint: we are generating labels based on the content of string +literals. What else do we generate labels based off of and is it +possible to create a conflict? -Let's consider a very small subset of expressions, extended with -@racket[raise] and @racket[catch], and see how we can transform away -those added mechanisms: +The answer is yes. Consider this program: @#reader scribble/comment-reader (racketblock -;; An Expr is one of: -;; - Integer -;; - Variable -;; - `(if ,Expr ,Expr ,Expr) -;; - `(,Prim1 ,Expr) -;; - `(,Prim2 ,Expr ,Expr) -;; - `(raise ,Expr) -;; - `(catch ,Expr (λ (,Variable) ,Expr)) +(define (Hello! x) "Hello!") +42 ) -Here is the basic idea of the transformation, we transform every -expression into a function of two arguments. The two arguments -represent the two ways an expression may produce results: either by -returning normally or by raising an exception. +It contains both a function called @racket[Hello!] and a string +literal @racket["Hello!"]. Unfortunately, the label used both for the +function and the string data will be @racket[(symbol->label 'Hello!)]. +If the compiler emits two definitions of this label, the assembler +will complain and fail to assemble the program. +The solution is simple, when generating labels for data, we will use a +different symbol to label function, let's call it +@racket[symbol->data-label] that is guaranteed to produce disjoint +labels from @racket[symbol->label], which we will continue to use for +code labels. -So for example, if the original expression were @racket[1], we'd want -the transformed program to be -@racketblock[ -'(λ (retn raze) (retn 1)) -] - -Why? Because @racket[1] just produces @racket[1]; it can't possibly -raise an exception. So given the two ways of producing a value, we -choose the @racket[ret] way and ``return'' by apply @racket[retn] to -the value we want to return: @racket[1]. - +Using this function in all the places we used @racket[symbol->label] +will resolve the issue and our problematic program will now have two +different labels defined in it: -Suppose the original expression is @racket[(raise 1)]. Then we want -to produce: -@racketblock[ -'(λ (retn raze) (raze 1)) +@ex[ +(symbol->label 'Hello!) +(symbol->data-label 'Hello!) ] -This is choosing to not return a value, but rather ``raise'' an -exception by calling the @racket[raze] function. -This is a lot like the by-hand transformation we did, except we now -have two continuations: one to represent work to do after -returning (normally) and one for work to do after raising an -exception. +So now we have accomplished our goal: string literals are statically +allocated and different occurrence of the same string literal are +considered @racket[eq?] to each other: -At the top-level, to run an expression we simply plug in appropriate -definitions for @racket[retn] and @racket[raze]. The @racket[retn] -function should just produce the result, i.e. it should be @racket[(λ -(x) x)], while @racket[raze] should signal an uncaught exception. -Since our language has such a simple model of errors, we'll just cause -an error to occur, i.e. @racket[(λ (x) (add1 #f))]. Let's try our -examples. +@(ev '(require mug/compile)) @ex[ -(interp-env '((λ (retn raze) (retn 1)) (λ (x) x) (λ (x) (add1 #f))) '()) -(interp-env '((λ (retn raze) (raze 1)) (λ (x) x) (λ (x) (add1 #f))) '()) +(seq (compile-string "Hello!") + (compile-string "Hello!") + (compile-literal 'Hello!)) ] -What about something like @racket[(add1 _e)]? - -Well if @racket[_e] returns normally, then the whole thing should -produce one more than that value. If @racket[_e] raises an exception, -then @racket[(add1 _e)] should raise that exception. - -Suppose @racket[_t] where the transformed version of @racket[_e], -which means it is a function of two parameters: what to do if -@racket[_e] returns and what to do if @racket[_e] raises. - -Then the transformation of @racket[(add1 _e)] is -@racketblock[ -(λ (retn raze) - (_t (λ (x) (retn (add1 x))) (λ (x) (raze x))))] - -This can be simplified slightly by observing that @racket[(λ (x) (raze -x))] is equal to @racket[raze]: -@racketblock[ -(λ (retn raze) - (_t (λ (x) (retn (add1 x))) raze))] - -How about something like @racket[(catch _e0 (λ (_x) _e1))]? If -@racket[_e0] produces a value normally, then the whole expression -produces that value normally. However if @racket[_e0] raises an -expression then the whole expression produces whatever @racket[_e1] -with @racket[x] bound to the raised value produces. Let @racket[_t0] -and @racket[_t1] be the transformed versions of @racket[_e0] and -@racket[_e1]. Then transformation of the whole expressions should be - -@racketblock[ -(λ (retn raze) - (_t0 retn (λ (_x) (_t1 retn raze)))) -] +We can try it out to confirm some examples. -One thing to notice here is that we are running @racket[_t0] with a @racket[raze] function -that, if called, will run @racket[_t1] normally. -Guided by the examples, let's define the transformation (note: we have -to take care of avoiding unintended variable capture): +@ex[ +(define (run . p) + (bits->value (asm-interp (compile (parse p))))) -@#reader scribble/comment-reader -(ex -;; Expr -> Expr -(define (exn-transform e) - (match e - [(? integer? i) `(λ (retn raze) (retn ,i))] - [(? symbol? x) - (let ((retn (gensym 'retn)) - (raze (gensym 'raze))) - `(λ (,retn ,raze) (,retn ,x)))] - [`(if ,e0 ,e1 ,e2) - (let ((t0 (exn-transform e0)) - (t1 (exn-transform e1)) - (t2 (exn-transform e2)) - (retn (gensym 'retn)) - (raze (gensym 'raze))) - `(λ (,retn ,raze) - (,t0 - (λ (x) - ((if x ,t1 ,t2) ,retn ,raze)) - ,raze)))] - [`(,(? prim? p) ,e0) - (let ((t0 (exn-transform e0)) - (retn (gensym 'retn)) - (raze (gensym 'raze))) - `(λ (,retn ,raze) - (,t0 (λ (v) (,retn (,p v))) - ,raze)))] - [`(,(? prim? p) ,e0 ,e1) - (let ((t0 (exn-transform e0)) - (t1 (exn-transform e1)) - (retn (gensym 'retn)) - (raze (gensym 'raze)) - (v0 (gensym 'v0))) - `(λ (,retn ,raze) - (,t0 (λ (,v0) - (,t1 (λ (v1) (,retn (,p v0 v1))) - ,raze)) - ,raze)))] - [`(raise ,e) - (let ((t (exn-transform e)) - (retn (gensym 'retn)) - (raze (gensym 'raze))) - `(λ (,retn ,raze) - (,t ,raze ,raze)))] - [`(catch ,e0 (λ (,x) ,e1)) - (let ((t0 (exn-transform e0)) - (t1 (exn-transform e1)) - (retn (gensym 'retn)) - (raze (gensym 'raze))) - - `(λ (,retn ,raze) - (,t0 ,retn - (λ (,x) - (,t1 ,retn ,raze)))))])) -) +(run "Hello!") -Here's what the transformation looks like on examples: +(run '(begin "Hello!" "Hello!")) -@ex[ -(exn-transform '1) -(exn-transform '(raise 1)) -(exn-transform '(catch (raise 1) (λ (x) x))) -(exn-transform '(catch (raise 1) (λ (x) (add1 x)))) -(exn-transform '(catch (add1 (raise 1)) (λ (x) 1))) -(exn-transform '(catch (add1 (raise 1)) (λ (x) (raise x)))) +(run '(eq? "Hello!" "Hello!")) +(run '(eq? "Hello!" "Fren")) + +(run '(define (Hello! x) "Hello!") + '(eq? (Hello! 42) "Hello!")) ] -Now let's give it a spin: +It's still worth noting that only string literals are interned. +Dynamically created strings are not pointer-equal to structurally +equal string literals: @ex[ -;; Expr -> Value -(define (run e) - (interp-env `(,(exn-transform e) (λ (x) x) (λ (x) (add1 #f))) '())) - -(run '1) -(run '(raise 1)) -(run '(catch (raise 1) (λ (x) x))) -(run '(catch (raise 1) (λ (x) (add1 x)))) -(run '(catch (add1 (raise 1)) (λ (x) 1))) -(run '(catch (add1 (raise 1)) (λ (x) (raise x)))) -(run '(if (raise 0) 1 2)) -(run '(if (zero? 0) (raise 1) 2)) +(run '(eq? "fff" (make-string 3 #\f))) ] -@section[#:tag-prefix "mug"]{Quotation} +This is why we refer to this kind of interning as ``static'' interning. -We have seen how to interpret limited uses of @racket[quote], such as -in @racket[(quote ())] and @racket[(quote x)], i.e. the empty list and symbols. +Let us now turn to a new, but familar, data type that supports a +stronger sense of interning: the symbol. -But we've also been using @emph{using} @racket[quote] more generally -where we can write down an arbitrary constant s-expression within a -@racket[quote]: +@section[#:tag-prefix "mug"]{Symbols} -@ex[ -'#t -'5 -'(1 2 3) -'(add1 x) -'(car '(1 2 3)) -'(((1) 2) 3) -'(1 . 2) -'("asdf" fred ((one))) -] +One basic data type that we've used frequently in the writing of our +compiler, but which is not currently accounted for in our language is +that of @bold{symbols}. -We can understand the more general @racket[quote] form as a shorthand -for expressions that construct an equivalent list to the one denoted -by the s-expression. +At first cut, a symbol is alot like a string: the name of a symbol +consists of some textual data. We can represent a symbol much like we +represent a string: using a tagged pointer to a sized array of +characters that comprise the name of the symbol. -For example, -@itemlist[ +In fact, we made extensive use of this in our implementation of static +interning for string literals. This section will now uncover +@emph{how} symbols do their (dynamic) interning. -@item{@racket['1] is shorthand for @racket[1],} +From a syntax point of view, we add a new AST constructor for symbols +and names of the new operations: -@item{@racket['(1 . 2)] is shorthand for @racket[(cons '1 '2)], which -is shorthand for @racket[(cons 1 2)],} +@filebox-include-fake[codeblock "mug/ast.rkt"]{ +;; type Expr = ... +;; | (Symb Symbol) +;; type Op1 = ... +;; | 'symbol? | 'symbol->string +;; | 'string->symbol | 'string->uninterned-symbol +(struct Symb (s) #:prefab) +} -@item{@racket['(1 2 3)] is shorthand for @racket[(cons '1 '(2 3))], -which is shorthand for @racket[(cons 1 (cons '2 '(3)))], which is -shorthand for @racket[(cons 1 (cons 2 (cons '3 '())))], which is -shorthand for @racket[(cons 1 (cons 2 (cons 3 '())))],} +The parser is updated to construct such AST nodes when it encounters a +symbol: -@item{@racket['()] is as simple as possible (the empty list),} +@ex[ +(parse-e ''foo) +] -@item{@racket['x] is as simple as possible (a symbol), and} +We can create a new pointer type tag: -@item{@racket[5] is as simple as possible.} +@filebox-include-fake[codeblock "mug/types.rkt"]{ +(define type-symb #b110) +} + +The run-time system has to be updated to handle symbol results and the +printer is updated to properly print symbols, but all of this follows +the blueprint of strings. It's simply a different tag and a slightly +different printer, which uses and initial @tt{'} delimiter instead of +an initial @tt{"} and subsequent @tt{"} delimiter. + +But one of the key differences between strings and symbols is that +symbols that have the same name are considered the same, i.e. they +should be represented by the same @emph{pointer}. + +This means that two symbols of the same name should be @racket[eq?] to +each other: + +@ex[ +(eq? 'x 'x) ] -Guided by these examples, we can write a function that transforms the -s-expression inside of a @racket[quote] into an equivalent expression -that only uses @racket[quote] for constructing symbols and the empty -list: +Having seen how string literals are handled, you can see that symbol +literals are like string literals and we can take a similar approach +to transform a program into one that statically allocates all of the +symbols that appear in the program and replace their occurrences with +references. +Again, we just follow the blueprint of strings. + +The key additions are a function for compiling symbol occurrences: @#reader scribble/comment-reader -(ex -;; S-Expr -> Expr -;; Produce an expression that evaluates to given s-expression, without -;; use of quote (except for symbols and empty list) -(define (quote->expr d) - (match d - [(? boolean?) d] - [(? integer?) d] - [(? string?) d] - [(? char?) d] - [(? symbol?) (list 'quote d)] - [(cons x y) (list 'cons (quote->expr x) (quote->expr y))] - ['() ''()])) - - -(quote->expr 'x) -(quote->expr 5) -(quote->expr "Fred") -(quote->expr '(1 . 2)) -(quote->expr '(1 2 3)) -(quote->expr '(car '(1 2 3))) -(quote->expr '(((1) 2) 3)) -(quote->expr '(1 . 2)) -(quote->expr '("asdf" fred ((one)))) +(racketblock +;; Symbol -> Asm +(define (compile-symbol s) + (seq (Lea 'rax (Plus (symbol->data-label s) type-symb)))) ) -We can now incorporate this into @racket[desugar] to eliminate uses of -compound-data @racket[quote]: +Which works as follows: -@#reader scribble/comment-reader -(ex -;; Expr+ -> Expr -(define (desugar e+) - (match e+ - [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) - `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) - ,(desugar e))] - [(? symbol? x) x] - [(? imm? i) i] - [`',(? symbol? s) `',s] - [`',d (quote->expr d)] - [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] - [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] - [`(letrec ,bs ,e0) - `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) - ,(desugar e0))] - [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] - [`(cond . ,_) (desugar (cond->if e+))] - [`(and . ,_) (desugar (and->if e+))] - [`(or . ,_) (desugar (or->if e+))] - [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) -) +@ex[ +(compile-symbol 'Hello!) +] -And now we can @racket[desugar] programs such as these: +And the @racket[literals] function should now include a case for +@racket[(Symb _sym)] to return @racket[(list _sym)]. @ex[ -(desugar '(map f '(1 2 3))) -(desugar '(map f '(and 1 2))) -(desugar '(if x '(1 . 2) 3)) +(literals (parse '['Hello!])) ] -And our prior interpterter will work just fine on these programs: +You might worry that programs that have similar strings and symbols +may cause problem. Since @racket[literals] on the following program +only returns a single literal: @ex[ -(interp-env (desugar '(map f '(1 2 3))) `((map ,map) (f ,add1))) -(interp-env (desugar '(map f '(and 1 2))) `((map ,map) (f ,identity))) -(interp-env (desugar '(if x '(1 . 2) 3)) `((x #t))) +(literals (parse '[(begin "Hello!" 'Hello!)])) ] -And: +But actually this is just fine. What happens is that only a single +chunk of memory is allocated to hold the character data @tt{H}, +@tt{e}, @tt{l}, @tt{l}, @tt{o}, @tt{!}, but the @emph{symbol} +@racket['Hello] is represented as a pointer to this data tagged as a +symbol, while the string @racket["Hello"] is represent as the same +pointer, but tagged as a string. So this program compiles to: @ex[ -(interp-env (desugar ''(((1) 2) 3)) '()) -(interp-env (desugar ''(1 . 2)) '()) -(interp-env (desugar ''("asdf" fred ((one)))) '()) +(seq (compile-string "Hello!") + (compile-symbol 'Hello!) + (compile-literal 'Hello!)) ] -@section[#:tag-prefix "mug"]{Pattern matching} +We have now added a symbol data type and have implement static +interning just as we did for strings. -One of the most ubiquitous language features we've used, but not -implemented, is pattern matching with the @racket[match] form. +However this strategy alone won't fully solve the problem of symbol +identity because it is possible to dynamically create symbols and even +then it should be the case that symbols with the same name are ``the +same.'' This in contrast to how strings work: -Pattern matching too can be seen as syntactic sugar since it's easy to -imagine how you could rewrite uses of @racket[match] into equivalent -expressions that didn't involve @racket[match]. +@ex[ +(eq? 'x (string->symbol (string #\x))) +] -For example, consider the program: +Here we are creating a symbol dynamically, using the string +@racket["x"] to specify the name of the symbol. Comparing it to a +@racket['x] that appears statically should still produce @racket[#t]. -@#reader scribble/comment-reader -(racketblock -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (match bt - ['leaf 1] - [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))])) -) +This was in fact a critical property we relied upon in implementing +static string interning. -An alternative, equivalent, formulation that doesn't use -@racket[match] is the following: +This latter example shows that we need to @emph{dynamically} ensure +symbols of the same name evaluate to unique pointers. -@#reader scribble/comment-reader -(racketblock -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (cond - [(eq? 'leaf bt) 1] - [(and (list? bt) - (= 4 (length bt)) - (eq? 'node (first bt))) - (let ((v (second bt)) - (l (third bt)) - (r (fourth bt))) - (* v (* (prod l) (prod r))))] - ; corresponds to a match failure - [else (add1 #f)])) -) +@section[#:tag-prefix "mug"]{Dynamic Interning} -This code is less nice to read and write, but essentially does the -same thing the pattern-matching code does. +Static interning requires identical static occurrences of data to have +a unique representation. Dynamic interning requires identical data, +regardless of when it's created, to have a unique representation. +Symbols are like strings that support dynamic interning. -In this example, each @racket[match]-clause becomes a -@racket[cond]-clause. The question-part of each @racket[cond]-clause -is an expression that determines whether the corresponding -pattern-part of the @racket[match-clause] matches. The answer-part of -each @racket[cond]-clause corresponds to the expression-part of the -@racket[match]-clause, with an add @racket[let]-form that destructures -the scrutinee and binds the pattern variables of the pattern-part. +This is going to require more support from our run-time system. +Essentially, the run-time systems needs to keep track of all of the +symbols that have appeared so far during the running of the program. +When a new symbol is dynamically created, e.g. through +@racket[string->symbol], the run-time will check whether this symbol +has been seen before (based on the characters of its name). If it has +been seen before, the run-time can give us the pointer for the +previous use of the symbol, thus preserving the pointer-equality +between this symbol and any other occurrences. -Let's consider the following extension to the grammar of @tt{Expr+} to -include a simplified version of the pattern matchin form we've been -using: +On the other hand if the run-time has not see this symbol, it can +allocate memory for it, return the pointer, and remember in the future +that this symbol has been seen. -@#reader scribble/comment-reader -(racketblock -;; type Expr+ = -;; .... -;; | Match - -;; type Match = (match ,Expr+ ,(list Pat Expr+) ...) - -;; type Pat = -;; | #t -;; | #f -;; | Integer -;; | String -;; | Variable -;; | `_ -;; | `'() -;; | `(quote ,Symbol) -;; | `(cons ,Pat ,Pat) -;; | `(list ,Pat ...) -;; | `(? ,Expr ,Pat ...) - ) - -A @racket[match] form consists of an expression to match against, -sometimes callsed the @bold{scrutinee}, followed by some number of -pattern-matching clauses; each one consists of a pattern and -expression to evaluate should the pattern match the scrutinee's value. - -Here a pattern can either be a literal boolean, integer, string, empty -list, or symbol, or a pattern variable, which matches anything and -binds the value to the variable, a ``wildcard'' which matches anything -and binds nothing, a @racket[cons] pattern which matches pairs of -things that match the subpatterns, @racket[list] pattern which matches -lists of a fixed-size where elements matches the subpatterns, or a -@racket[?] pattern which matches if the results of evaluated the first -subexpression applied to scrutinee produces true and all of the -subpatterns match. - -This doesn't include the @racket[quasiquote]-patterns we used above, -but still this is a useful subset of pattern matching and allows us to -write programs such as: +To accomplish this, we will implement a @bold{symbol table}. It +associates symbol names, i.e. the characters of a symbol, with +pointers to symbols. When a program wishes to create a symbol, it +confers with the table to either fetch an existing pointer for the +symbol or create a new on, updating the table. + +To implement this table, we'll use a binary search tree of symbols, +represented in C as. We have a globally defined pointer +@tt{symbol_tbl} is which is initially empty (@tt{NULL}). The work of +dynamically interning a symbol will be done by the @tt{intern_symbol} +function. It searches the BST, using @tt{symb_cmp} to compare symbols +for alphabetic ordering. If an entry is found, it returns the +previously seen symbol, otherwise it adds the symbol to the table and +returns it. + +@filebox-include[fancy-c "mug/symbol.c"] + +The idea will be that every time a symbol is constructed, we call +@tt{intern_symbol} to intern it. + +So in addition to collecting all of the literals and compiling each to +static data, we will need to collect all of the symbols and emit a +call to @tt{intern_symbol} at the start of the program. + +To accomplish this, we'll design a function: @#reader scribble/comment-reader (racketblock -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (match bt - ['leaf 1] - [(list 'node v l r) (* v (* (prod l) (prod r)))])) +;; Prog -> Asm +;; Initialize the symbol table with all the symbols that occur statically +(define (init-symbol-table p) ...) ) -As alluded to above, each pattern plays two roles: it used to -determine whether the scrutinee matches the pattern, and it used to -bind variable names (in the scope of the RHS expression) to sub-parts -of the scrutinee when it does match. +Here's what it will produce for some example programs: -We can write two helper functions to accomplish each of these tasks: -@itemlist[ +@ex[ +(init-symbol-table (parse '['Hello!])) +(init-symbol-table (parse '[(begin 'Hello! 'Hello!)])) +(init-symbol-table (parse '["Hello!"])) +(init-symbol-table (parse '[(define (Hello! x) 'Hello!) + (Hello! 'Fren)])) +] -@item{rewrite patterns into Boolean valued expressions that answer -whether the pattern matches the scrutinee,} +For each unique symbol in the program, it emits two instructions: -@item{rewrite pattern and RHS in to expressions in which the pattern -variables of pattern are bound to the appropriately deconstructed -parts of the scrutinee.} +@itemlist[ -] +@item{move the address of the symbol's data into @racket['rdi], the +register used for the first argument in the System V ABI,} +@item{call @tt{intern_symbol}.} +] -Assume: the scrutinee is a variable. (It's easy to establish this assumption in general.) +We know that initially the table is empty, so each of these calls will +insert the given symbols into the table ensure that if any subsequent +symbol is interned that has the same character data, call +@tt{intern_symbol} will produce the original pointer to static data +for that symbol. -We need two functions: +Now we can implement the two operations @racket[string->symbol] and +@racket[symbol->string]. Here's what we do for +@racket[string->symbol]: @#reader scribble/comment-reader (racketblock -;; Pat Variable -> Expr -;; Produces an expression determining if p matches v -(define (pat-match p v) ...) - -;; Pat Variable Expr -> Expr -;; Produce an expression that deconstructs v and binds pattern variables -;; of p in scope of e. -;; ASSUME: v matches p -(define (pat-bind p v e) ...) +;; Op1 -> Asm +(define (compile-op1 p) + (match p + ; ... + ['string->symbol + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + pad-stack + (Call 'intern_symbol) + unpad-stack + (Or rax type-symb))])) ) -Let's turn to @racket[pat-match] first. +This first does some type-tag checking to make sure the argument is a +string, then it untags the pointer and moves it to the @racket['rdi] +register in order to call @racket[intern_symbol]. The address of the +interned symbol is returned in @racket['rax], which is then tagged as +being a symbol. -Suppose the pattern is a literal @racket[#t]. When does @racket[v] -match it? When @racket[v] is @racket[eq?] to @racket[#t]. +We can now confirm that dynamically created symbols are still +pointer-equal to symbols that statically appear in the program: -So an expression that produces true when this pattern matches is @racket[(eq? #t v)]. - -Handling @racket[#f], integers, characters, symbols, and the empty list is similar. - -What about variables? Suppose the pattern is @racket[x]. When does -@racket[v] match it? Always. So @racket[#t] is an expression that -produces true with this pattern matches. - -Wildcards are the same. - -What about when the pattern is a @racket[cons]-pattern? Suppose the -pattern is @racket[(cons _p1 _p2)] for some patterns @racket[_p1] and -@racket[_p2]. When does @racket[v] match @racket[(cons _p1 _p2)]? -When @racket[v] is a pair and @racket[(car v)] matches @racket[_p1] -and @racket[(cdr v)] matches @racket[_p2]. - -A @racket[list] pattern is similar, except that the scrunity must be a -list with as many elements as there are patterns, and the elements -must match the corresponding subpattern. +@ex[ +(run '(eq? 'fff (string->symbol (make-string 3 #\f)))) +] -What about a @racket[?] pattern? When does @racket[v] match it? -Suppose the pattern is @racket[(? even?)]. When does @racket[v] match -it? When @racket[(even? v)] is true. If the pattern had a non-empty -list of sub-patterns they would all need to match @racket[v], too. +Even creating two symbols dynamically will result in the same pointer +so long as they are spelled the same: +@ex[ +(run '(eq? (string->symbol (make-string 3 #\a)) + (string->symbol (make-string 3 #\a)))) +] -We can now formulate a defintion of @racket[pat-match]: +Going the other direction from symbols to strings is easy: we copy the +string data and tag the pointer as a string. Note that we could get +away will simply retagging the pointer and not actually copying the +string, but we make a copy to mimic Racket's behavior and to be safe +should we add string mutation operations. @#reader scribble/comment-reader (racketblock -;; Pat Variable -> Expr -;; Produces an expression determining if p matches v -(define (pat-match p v) +;; Op1 -> Asm +(define (compile-op1 p) (match p - [#t `(eq? #t ,v)] - [#f `(eq? #f ,v)] - [(? integer? i) `(eq? ,i ,v)] - [(? string? s) - `(and (string? ,v) - (string=? ,s ,v))] - [(list 'quote '()) `(eq? '() ,v)] - [(? symbol?) #t] - [(list 'quote (? symbol? s)) `(eq? ,v ',s)] - [(list 'cons p1 p2) - (let ((v1 (gensym)) - (v2 (gensym))) - `(and (cons? ,v) - (let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - (and ,(pat-match p1 v1) - ,(pat-match p2 v2)))))] - [(cons 'list ps) - `(and (list? ,v) - (= (length ,v) ,(length ps)) - ,(pat-match-list ps v))] - [(cons '? (cons e ps)) - `(and (,e ,v) - ,(pats-match ps v))])) + ; ... + ['symbol->string + (seq (assert-symbol rax) + (Xor rax type-symb) + char-array-copy + (Or rax type-str))])) + +;; Asm +;; Copy sized array of characters pointed to by rax +(define char-array-copy + (seq (Mov rdi rbx) ; dst + (Mov rsi rax) ; src + (Mov rdx (Offset rax 0)) ; len + (Add rdx 1) ; #words = 1 + (len+1)/2 + (Sar rdx 1) + (Add rdx 1) + (Sal rdx 3) ; #bytes = 8*#words + pad-stack + (Call 'memcpy) + unpad-stack + (Mov rax rbx) + (Add rbx rdx))) ) -The @racket[list]-pattern case relies on a helper function -@racket[pat-match-list] and the @racket[?]-pattern case relies on -@racket[pats-match], both defined below: +The @racket[char-array-copy] sequence of instructions sets up a call +to C's @tt{memcpy} function giving the address of the string data as +the source, the current heap pointer as the destination, and the +number of bytes which will be copied. After the call returns, the +heap pointer is incremented by that number of copied bytes. +We can see that this works: -@#reader scribble/comment-reader -(racketblock -;; (Listof Pat) Variable -> Expr -;; Produces an expression determining if every ps matches x -(define (pats-match ps v) - (match ps - ['() #t] - [(cons p ps) - `(and ,(pat-match p v) - ,(pats-match ps v))])) - -;; (Listof Pat) Variable -> Expr -;; Produces an expression determining if each ps matches each element of list v -(define (pat-match-list ps v) - (match ps - ['() #t] - [(cons p ps) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - (and ,(pat-match p v1) - ,(pat-match-list ps v2))))])) -) +@ex[ +(run '(symbol->string 'foo)) +] -Here are some examples: +To observe the copying behavior, notice: @ex[ +(run '(eq? (symbol->string 'foo) (symbol->string 'foo))) +] -(pat-match 'leaf 'bt) -(pat-match '(list 'node v l r) 'bt) -(pat-match '(list 'node (? even? v) l r) 'bt) +@section[#:tag-prefix "mug"]{Uninterned Symbols} + +Sometimes it is useful to create a symbol that is distinct from all +other symbols. We've relied on the ability to create a symbol with +this property whenever we used the @racket[gensym] operation. What +@racket[gensym] produces is an @bold{uninterned} symbol. Even if you +constructed a symbol with the same letters, it would be a different +pointer from the symbol created by a call to @racket[gensym]. + +To add this ability, we will add an precursor to @racket[gensym] +called @racket[string->uninterned-symbol]. It consumes a string and +produces a symbol with the same letters, but distinct from all other +symbols, even those that are spelled the same. + +@ex[ +(eq? 'Hello! (string->uninterned-symbol "Hello!")) ] -These aren't very readable programs that emerge, however, we check -that they're doing the right thing. Note that the elaboration -requires a few functions to be available, such as @racket[list?], -and @racket[length]. We make these available in an initial -environment: +Calling @racket[string->uninterned-symbol] twice with the same string +will produce two different symbols: @ex[ -(define env0 - `((length ,length) - (list? ,list?))) -(interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 'leaf 'bt))) env0) -(interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 8 'bt))) env0) -(interp-env (desugar - `(let ((bt '(node 1 leaf leaf))) - ,(pat-match '(list 'node v l r) 'bt))) - env0) -(interp-env (desugar - `(let ((bt '(node 1 leaf leaf))) - ,(pat-match '(list 'node (? zero?) l r) 'bt))) - env0) -(interp-env (desugar - `(let ((bt '(node 0 leaf leaf))) - ,(pat-match '(list 'node (? zero?) l r) 'bt))) - env0) +(eq? (string->uninterned-symbol "Hello!") + (string->uninterned-symbol "Hello!")) ] -Now moving on to @racket[pat-bind], it follows a similar structure, -but we always assume the given pattern matches the scrutinee. The -addition @tt{Expr} argument represents the right-hand-side expression -of the @racket[match]-clause. The idea is that the pattern variables -of @racket[p] are bound to sub-parts of @racket[v] in @racket[e]. +Implementing @racket[string->uninterned-symbol] is fairly simple: we +allocate a new symbol, thereby ensuring it is unique and then simple +avoid calling @tt{intern_symbol}: @#reader scribble/comment-reader (racketblock -;; Pat Variable Expr -> Expr -;; Produce an expression that deconstructs v and binds pattern variables -;; of p in scope of e. -;; ASSUME: v matches p -(define (pat-bind p v e) +;; Op1 -> Asm +(define (compile-op1 p) (match p - [#t e] - [#f e] - [(? integer?) e] - [(? string?) e] - [(list 'quote '()) e] - ['_ e] - [(? symbol? x) `(let ((,x ,v)) ,e)] - [(list 'quote (? symbol?)) e] - [(list 'cons p1 p2) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - ,(pat-bind p1 v1 - (pat-bind p2 v2 e))))] - [(cons 'list ps) - (pat-bind-list ps v e)] - [(cons '? (cons _ ps)) - (pats-bind ps v e)])) + ; ... + ['string->uninterned-symbol + (seq (assert-string rax) + (Xor rax type-str) + char-array-copy + (Or rax type-symb))])) ) -Here are some examples: +We can confirm this works as expected: @ex[ +(run '(string->uninterned-symbol "foo")) +(run '(eq? 'foo (string->uninterned-symbol "foo"))) +(run '(eq? (string->uninterned-symbol "foo") + (string->uninterned-symbol "foo"))) +] -(pat-bind 'leaf 'bt 'bt) -(pat-bind '(list 'node v l r) 'bt 'v) -(pat-bind '(list 'node (? even? v) l r) 'bt 'v) +With that, we have completed the implementation of symbols and strings +with the proper interning behavior. -] -These are tough to read, but we can confirm what they compute: +@section[#:tag-prefix "mug"]{Matching symbols and strings} -@ex[ -(interp-env (desugar - `(let ((bt '(node 0 leaf leaf))) - ,(pat-bind '(list 'node (? zero? z) l r) 'bt 'z))) - '()) -] +Since we have @racket[match] in our language, we should probably add +the ability to match against strings and symbols. -Putting the pieces together, we can now write a @racket[match->cond] function -that rewrites a @racket[match]-expression into a @racket[cond]-expression: +We can extend the AST definition for patterns: -@#reader scribble/comment-reader -(racketblock -;; Match -> Expr -;; Rewrite match expression into an equivalent cond expression -(define (match->cond m) - (match m - [(cons 'match (cons e mcs)) - (let ((x (gensym))) - `(let ((,x ,e)) - (cond ,@(map (λ (mc) - (match mc - [(list p e) - (list (pat-match p x) (pat-bind p x e))])) - mcs) - ;; fall through to error - [else (add1 #f)])))])) -) +@filebox-include-fake[codeblock "mug/ast.rkt"]{ +;; type Pat = ... +;; | (PSymb Symbol) +;; | (PStr String) +(struct PSymb (s) #:prefab) +(struct PStr (s) #:prefab) +} + +Extending the interpreter is straightforward: + +@filebox-include-fake[codeblock "mug/interp.rkt"]{ +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + ; ... + [(PSymb s) (and (eq? s v) r)] + [(PStr s) (and (string? v) (string=? s v) r)])) +} + +Extending the compiler is more involved, but essentially boils down to +doing exactly what the interpreter is doing above: + +@filebox-include-fake[codeblock "mug/compile-expr.rkt"]{ +;; Pat CEnv Symbol -> (list Asm Asm CEnv) +(define (compile-pattern p cm next) + (match p + ; ... + [(PStr s) + (let ((fail (gensym))) + (list (seq (Lea rdi (symbol->data-label (string->symbol s))) + (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-str) + (Jne fail) + (Xor rax type-str) + (Mov rsi rax) + pad-stack + (Call 'symb_cmp) + unpad-stack + (Cmp rax 0) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PSymb s) + (let ((fail (gensym))) + (list (seq (Lea r9 (Plus (symbol->data-label s) type-symb)) + (Cmp rax r9) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))])) +} + +The implementation of string matching uses the @tt{symb_cmp} function +from the run-time system, checking whether it returns @racket[0] to +indicate the strings are the same. (Although the function is +concerned with comparing symbols, symbols and strings are represented +the same, so it works just as well to compare strings.) + +We can confirm some examples: @ex[ -(match->cond '(match '(node 2 leaf leaf) - ['leaf 0] - [(list 'node v l r) v])) +(run '(match 'foo + ['foo 1] + ["foo" 2])) +(run '(match "foo" + ['foo 1] + ["foo" 2])) +(run '(match (cons '+ (cons 1 (cons 2 '()))) + [(cons '+ (cons x (cons y '()))) + (+ x y)])) ] -Finally, we can incorporate @racket[match->cond] into @racket[desugar]: -@#reader scribble/comment-reader -(ex #:no-prompt -;; Expr+ -> Expr -(define (desugar e+) - (match e+ - [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) - `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) - ,(desugar e))] - [(? symbol? x) x] - [(? imm? i) i] - [`',(? symbol? s) `',s] - [`',d (quote->expr d)] - [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] - [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] - [`(letrec ,bs ,e0) - `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) - ,(desugar e0))] - [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] - [`(cond . ,_) (desugar (cond->if e+))] - [`(and . ,_) (desugar (and->if e+))] - [`(or . ,_) (desugar (or->if e+))] - [`(match . ,_) (desugar (match->cond e+))] ; new - [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) -) +@section[#:tag-prefix "mug"]{Compiling Symbols and Strings} -Now we can interpret programs such as this: +We can now put the pieces together for the complete compiler. -@ex[ +@(define (code-link fn) + (link (string-append "code/" fn) (tt fn))) -(interp-env - (desugar - '(begin (define (prod bt) - (match bt - ['leaf 1] - [(list 'node v l r) - (* v (* (prod l) (prod r)))])) - - (prod '(node 3 (node 4 leaf leaf) leaf)))) - `((* ,*) (list? ,list?) (length ,length))) +We do a bit of housekeeping and move the code for compiling +expressions to its own module: @code-link{mug/compile-expr.rkt}. -] - - + +The top-level compiler is now: + +@filebox-include[codeblock "mug/compile.rkt"] + +The work of compiling literals and emitting calls to initialize the +symbol table is contained in its own module: + +@filebox-include[codeblock "mug/compile-literals.rkt"] diff --git a/www/notes/neerdowell.scrbl b/www/notes/neerdowell.scrbl new file mode 100644 index 00000000..275fe60e --- /dev/null +++ b/www/notes/neerdowell.scrbl @@ -0,0 +1,419 @@ +#lang scribble/manual + +@(require (for-label (except-in racket compile ... struct?) a86)) +@(require redex/pict + racket/runtime-path + scribble/examples + "utils.rkt" + "ev.rkt" + "../fancyverb.rkt" + "../utils.rkt") + +@(define codeblock-include (make-codeblock-include #'h)) + +@(ev '(require rackunit a86)) +@(ev `(current-directory ,(path->string (build-path notes "neerdowell")))) +@(void (ev '(with-output-to-string (thunk (system "make runtime.o"))))) +@(void (ev '(current-objs '("runtime.o")))) +@(for-each (λ (f) (ev `(require (file ,f)))) + '("interp.rkt" "compile.rkt" "compile-expr.rkt" "compile-literals.rkt" "compile-datum.rkt" "utils.rkt" "ast.rkt" "parse.rkt" "types.rkt")) + +@(define this-lang "Neerdowell") + +@title[#:tag this-lang]{@|this-lang|: structures} + +@src-code[this-lang] + +@emph{Structures don't march in the streets.} + +@table-of-contents[] + +@section[#:tag-prefix "neerdowell"]{Defining structs} + +So far we've built up a number of important datatypes in our language: +integers, booleans, characters, pairs, lists, boxes, vectors, strings, +symbols, and functions, to be precise. But despite all of this, +programmers may still want more. In fact, we probably can't +anticipate all of the possible datatypes they might want, so let's +instead provide a mechanism for defining new kinds of datatypes. +We'll add the same @racket[struct] mechanism for user-defined +structure types that we've been using all semester. + +We'll call it @bold{Neerdowell}. + + +@section[#:tag-prefix "neerdowell"]{Syntax} + +We will add the following concrete syntax to Neerdowell programs: + +@#reader scribble/comment-reader +(racketblock +(struct _struct-type (_field0 ...)) +) + +Structure type definitions will only be allowed at the top-level along +with other definitions, so a Neerdowell program might look like: + +@#reader scribble/comment-reader +(racketblock +(struct posn (x y)) +(posn 1 2) +) + +Each structure definition creates four kinds of things: + +@itemlist[ + +@item{A constructor, e.g. @racket[posn].} + +@item{A predicate, e.g. @racket[posn?].} + +@item{A set of accessor functions, e.g. @racket[posn-x] and @racket[posn-y].} + +@item{A pattern constructor, e.g. @racket[posn].} +] + +The constructor is a function that takes as many arguments as fields +in the structure type definition and creates an instance of that +structure type with the given arguments populating the fields. + +The predicate is a unary function that takes any value and returns +@racket[#t] is an instance of that structure type. + +The accessor functions are each unary functions that take an instance +of the structure type and retrieve the value of the corresponding +field. + +We're going to take a slightly different approach to parsing and +interpreting/compiling structure definitions. Rather than +implementing the structure operations directly, we will instead +consider a set of generic structure operations that can be used to +implement the constructor, predicate, and accessor operations. + +A structure definition will be translated into a set of source-level +function definitions that use these generic operations to implement +the structure-specific operations. That translation will be +carried-out by the parser. + +Here are the generic operations: + +@#reader scribble/comment-reader +(racketblock +;; make-struct : Symbol Value ... -> StructVal +;; struct? : Symbol Value -> Boolean +;; struct-ref : Symbol Int StructVal -> Value +) + +Here the @tt{Symbol} argument represents the name of the structure +type. So for example, to create an instance of the @racket[posn] +structure type, you'd call @racket[(make-struct 'posn 1 2)]. To check +if @racket[_x] is an instance of a @racket[posn] structure, you'd call +@racket[(struct? 'posn _x)]. To access the first field of an instance +of a @racket[posn] structure type @racket[_x], you'd call @racket[(struct-ref +'posn 0 _x)]. + +With these operations in mind, you can see @racket[struct] as a kind +of shorthand: + +@#reader scribble/comment-reader +(racketblock + +(struct posn (x y)) + +;; means: + +(define (posn x y) + (make-struct 'posn x y)) + +(define (posn? x) + (struct? 'posn x)) + +(define (posn-x x) + (struct-ref 'posn 0 x)) + +(define (posn-y x) + (struct-ref 'posn 1 x)) +) + +To accomplish this, we add the following function to the parser: + +@#reader scribble/comment-reader +(racketblock +;; parse-struct : S-Expr -> [Listof Defn] +) + +Here's an example: + +@ex[ +(parse-struct '(struct posn (x y))) +] + +And here's an example of an expression that makes use of some of the +operations: + +@ex[ +(parse-e '(posn-x (posn 3 4))) +] + +The generic struct primitives are @emph{only} used by the code +generated by the struct definitions, a property which is achieved by +the parser by not treating @racket['make-struct], @racket['struct?], +and @racket['struct-ref] as keywords. If you write a program that +uses these names, they will be treated as variables, not primitives: + +@ex[ +(parse-e '(struct? 'posn x)) +] + +The @racket[parse-struct] function is defined as follows: + +@filebox-include-fake[codeblock "neerdowell/parse.rkt"]{ +;; S-Expr -> [Listof Defn] +(define (parse-struct s) + (match s + [(list 'struct (? symbol? n) flds) + (if (andmap symbol? flds) + (list* (make-struct-defn-construct n flds) + (make-struct-defn-predicate n) + (make-struct-defn-accessors n (reverse flds))) + (error "parse struct definition error"))] + [_ (error "parse struct definition error")])) + +;; Id [Listof Id] -> [Listof Defn] +(define (make-struct-defn-construct n flds) + (Defn n flds + (Prim 'make-struct (cons (Quote n) (map Var flds))))) + +;; Id -> [Listof Defn] +(define (make-struct-defn-predicate n) + (Defn (symbol-append n '?) (list 'x) + (Prim 'struct? (list (Quote n) (Var 'x))))) + +;; Id [Listof Id] -> [Listof Defn] +(define (make-struct-defn-accessors n flds) + (match flds + ['() '()] + [(cons f flds) + (cons (Defn (symbol-append n '- f) (list 'x) + (Prim 'struct-ref + (list (Quote n) + (Quote (length flds)) + (Var 'x)))) + (make-struct-defn-accessors n flds))])) + +;; Symbol ... -> Symbol +(define (symbol-append . ss) + (string->symbol + (apply string-append (map symbol->string ss)))) +} + + +@section[#:tag-prefix "neerdowell"]{Structure and Interpretation} + +In the interpreter, we choose to represent a structure instance as an +instance of a @racket[StructVal] structure, which will contain the +structure type name (represented as a symbol) and a vector, which +holds the values of the fields: + +@#reader scribble/comment-reader +(racketblock +;; type Value = ... +;; | (StructVal Symbol (Vectorof Val)) +) + +So for example the value produced by @racket[(posn 3 4)] would be +@racket[(StructVal 'posn #(3 4))]. With this representation, +implementing the generic operations is pretty simple. Here's the +complete @racket[interp-prims] function. + +@filebox-include-fake[codeblock "neerdowell/interp-prims.rkt"]{ +;; type Struct = (StructVal Symbol (Vectorof Value)) +(struct StructVal (name vals)) + +;; Op [Listof Value] -> Answer +(define (interp-prim p vs) + (match (cons p vs) + ;; ... + [(list 'struct? s v) + (match v + [(StructVal n _) (eq? s n)] + [_ #f])] + [(list 'struct-ref s i (StructVal n vs)) + (if (and (eq? s n) (<= 0 i (sub1 (vector-length vs)))) + (vector-ref vs i) + 'err)] + ;; OpN + [(cons 'make-struct (cons (? symbol? n) vs)) + (StructVal n (list->vector vs))])) +} + +And we can try it out: + +@ex[ +(define (run . p) + (interp (parse p))) + +(run '(struct posn (x y)) + '(posn? (posn 3 4))) + +(run '(struct posn (x y)) + '(posn-x (posn 3 4))) + +(run '(struct posn (x y)) + '(posn-y (posn 3 4))) +] + +@section[#:tag-prefix "neerdowell"]{Compiling Structures} + +Providing support for structures in the compiler largely follows the +same outline of previous additions. In particular, we add yet another +tagged pointer type to represent structure instances. The runtime is +extended to add support for printing structures. + +Structure instance are represented as an array of values. The first +element is a symbol, which names the structure type and the remaining +elements hold the values of the fields for the structure. The length +of the structure is not stored in memory since the width of structure +instances is statically determined, just like closures. + +Compiling the @racket[struct?] and @racket[struct-ref] operations are +fairly straightforward, requiring just a slight bit more than say +@racket[vector?] and @racket[vector-ref]. The slight be more being +the checking that the structure type symbols match between the +(automatically inserted) argument and the structure instance value. +The @racket[struct-ref] operation actually requires slightly less in +that there's no need for bounds checking since it is the compiler +itself that inserted indices and we can trust the code we wrote is +correct. For a similar reason, there's no need to do type tag +checking on the structure type and index arguments, but there still +needs to be checking of the structure instance argument since that +comes from the user. + +One consequence of the source transformation approach to implementing +structures is that, although the type structure symbol and the integer +index have known, fixed types (a symbol and an integer, respectively), +they will still be represented as arbitrary values, i.e. as tagged +pointers or bit-shifted numbers. That means the operations must +compute accordingly and there's a small performance hit because of it. +(It wouldn't be difficult to specialize the compilation of these +primitives so these arguments are treated distinctly, thus avoiding +the performance hit, but making the compiler itself more complicated.) + +Here is the implementation of @racket[compile-op] for @racket[struct?] +and @racket[struct-ref]: + +@filebox-include-fake[codeblock "neerdowell/compile-ops.rkt"]{ +;; Op -> Asm +(define (compile-op p) + (match p + ;; ... + ['struct? + (let ((f (gensym)) + (t (gensym))) + (seq (Pop r8) + ; don't need to do this we generated the code + ; (assert-symbol r8) + (Mov r9 rax) ; check the tag bits + (And r9 ptr-mask) + (Cmp r9 type-struct) ; compare tag to the structure tag + (Jne f) ; not a structure + (Xor rax type-struct) ; untag the structure pointer + (Mov rax (Offset rax 0)) ; get the structure type symbol + (Cmp r8 rax) ; compare it to the type argument + (Mov rax (value->bits #t)) + (Jne f) ; a structure, but not this kind + (Jmp t) ; a structure of the same kind + (Label f) + (Mov rax (value->bits #f)) + (Label t)))] + + ['struct-ref + (seq (Pop r8) + (Pop r11) + ; don't need to check + ; (assert-symbol r11) + ; (assert-integer r8) + (assert-struct rax) + (Xor rax type-struct) + (Mov r10 (Offset rax 0)) ; get the structure type symbol + (Cmp r11 r10) ; compare it to the type argument + (Jne 'raise_error_align) ; a structure, but not this kind + (Sar r8 int-shift) ; convert to raw int + (Add r8 1) ; +1 to skip symbol element + (Sal r8 3) ; *4 for byte offset + (Add rax r8) + (Mov rax (Offset rax 0)))])) +} + +Handling @racket[make-struct] is slightly more complicated. The +complication comes from the fact that @racket[make-struct] needs to be +a primitive that accepts an arbitrary number of arguments. All other +primitives have a fixed arity so they know if and how many arguments +to pop from the stack, whereas for @racket[make-struct] it depends on +the kind of structure being created. + +We assume the @racket[make-struct] primitive is given the appropriate +number of arguments. (This is consistent with many other assumptions +we make about that well-formedness of programs that are not explicitly +checked by the compiler.) Under this assumption, the number of +arguments indicates how many values to pop, so we have a special case +for @racket[make-struct] in @racket[compile-prim] to compute the +length and call a separate function for handling the +@racket[make-struct] operation: + +@filebox-include-fake[codeblock "neerdowell/compile-expr.rkt"]{ +;; Op (Listof Expr) CEnv -> Asm +(define (compile-prim p es c) + (seq (compile-es* es c) + (match p + ['make-struct (compile-make-struct (length es))] + [_ (compile-op p)]))) +} + +The @racket[compile-make-struct] function is defined as follows: + +@filebox-include-fake[codeblock "neerdowell/compile-ops.rkt"]{ +;; Nat -> Asm +;; Emit instructions for creating a structure of length n +;; using values on top of stack +(define (compile-make-struct n) + (seq (compile-make-struct/a n 1) + (Mov rax rbx) + (Or rax type-struct) + (Add rbx (* 8 n)))) + +;; Nat Nat -> Asm +;; Pop elements off stack, writing them to heap +(define (compile-make-struct/a n i) + (if (= n i) + (seq (Mov (Offset rbx (* 8 (- n i))) rax)) + (seq (Mov (Offset rbx (* 8 (- n i))) rax) + (Pop rax) + (compile-make-struct/a n (add1 i))))) +} + +We can now see structures in action: + +@ex[ +(define (run . p) + (bits->value (asm-interp (compile (parse p))))) + +(run '(struct posn (x y)) + '(posn? (posn 3 4))) + +(run '(struct posn (x y)) + '(let ((p (posn 3 4))) + (cons (posn-x p) + (posn-y p)))) + +(run '(struct leaf ()) + '(struct node (v l r)) + '(define (count bt) + (if (leaf? bt) + 0 + (+ 1 (+ (count (node-l bt)) + (count (node-r bt)))))) + '(count (node 8 + (node 3 (leaf) (leaf)) + (leaf)))) +] \ No newline at end of file diff --git a/www/notes/outlaw.scrbl b/www/notes/outlaw.scrbl new file mode 100644 index 00000000..540afb3d --- /dev/null +++ b/www/notes/outlaw.scrbl @@ -0,0 +1,392 @@ +#lang scribble/manual + +@(require (for-label (except-in racket compile ... struct?) a86)) +@(require redex/pict + racket/runtime-path + scribble/examples + "utils.rkt" + "ev.rkt" + "../fancyverb.rkt" + "../utils.rkt") + +@(define codeblock-include (make-codeblock-include #'h)) + +@(define (shellbox . s) + (parameterize ([current-directory (build-path notes "outlaw")]) + (filebox (emph "shell") + (fancyverbatim "fish" (apply shell s))))) + +@(require (for-syntax "../utils.rkt" racket/base "utils.rkt")) +@(define-syntax (shell-expand stx) + (syntax-case stx () + [(_ s ...) + (parameterize ([current-directory (build-path notes "abscond")]) + (begin (apply shell (syntax->datum #'(s ...))) + #'(void)))])) + +@;{ Have to generate a-whole.rkt before listing it below.} +@(shell-expand "racket -t combine.rkt -m a.rkt > a-whole.rkt") + +@(ev '(require rackunit a86)) +@(ev `(current-directory ,(path->string (build-path notes "outlaw")))) +@(void (ev '(with-output-to-string (thunk (system "make runtime.o"))))) +@(void (ev '(current-objs '("runtime.o")))) +@(for-each (λ (f) (ev `(require (file ,f)))) + '(#;"interp.rkt" "compile.rkt" "compile-expr.rkt" "compile-literals.rkt" "compile-datum.rkt" "utils.rkt" "ast.rkt" "parse.rkt" "types.rkt")) + +@(define this-lang "Outlaw") + +@title[#:tag this-lang]{@|this-lang|: self-hosting} + +@src-code[this-lang] + +@emph{The king is dead, long live the king!} + +@table-of-contents[] + +@section[#:tag-prefix "neerdowell"]{Bootstrapping the compiler} + +Take stock for a moment of the various language features we've built +over the course of these notes and assignments: we've built a +high-level language with built-in data types like booleans, integers, +characters, pairs, lists, strings, symbols, vectors, boxes. Users can +define functions, including recursive functions. Functions are +themselves values and can be constructed anonymously with +@racket[lambda]. We added basic I/O facilities. We added the ability +to overload functions based on the number of arguments received using +@racket[case-lambda], the ability to define variable arity functions +using rest arguments, and the ability to call functions with arguments +from a list using @racket[apply]. Users can defined their own +structure types and use pattern matching to destructure values. +Memory management is done automatically by the run-time system. + +It's a pretty full-featured language and there are lots of interesting +programs we could write in our language. One of the programs we could +@emph{almost} write is actually the compiler itself. In this section, +let's bridge the gap between the features of Racket our compiler uses +and those that our compiler implements and then explore some of the +consequences. + + +We'll call it @bold{Outlaw}. + +@section[#:tag-prefix "outlaw"]{Features used by the Compiler} + +Let's take a moment to consider all of the language features we +@emph{use} in our compiler source code, but we haven't yet +implemented. Open up the source code for, e.g. @secref{Neerdowell}, +and see what you notice: + +@itemlist[ + +@item{Modules: programs are not monolithic; they are broken into +@bold{modules} in separate files like @tt{compile-stdin.rkt}, +@tt{parse.rkt}, @tt{compile.rkt}, etc.} + +@item{a86: our compiler relies heavily on the @secref{a86} library +that provides all of the constructors for a86 instructions and +functions like @racket[asm-display] for printing a86 instructions +using NASM syntax.} + +@item{Higher-level I/O: at the heart of the front-end of our compiler +is the use of Racket's @racket[read] function, which reads in an +s-expression. We also use things like @racket[read-line] which reads +in a line of text and returns it as a string.} + +@item{Lots and lots of Racket functions: our compiler makes use of +lots of built-in Racket functions that we haven't implemented. These +are things like @racket[length], @racket[map], @racket[foldr], +@racket[filter], etc. Even some of the functions we have implemented +have more featureful counterparts in Racket which we use. For +example, our @racket[+] primitve takes two arguments, while Racket's +@racket[+] function can take any number of arguments.} + +@item{Primitives as functions: the previous item brings up an +important distinction between our language and Racket. For us, +things like @racket[+] are @bold{primitives}. Primitives are +@emph{not} values. You can't return a primitive from a function. You +can't make a list of primitives. This means even if we had a +@racket[map] function, you couldn't pass @racket[add1] as an argument, +since @racket[add1] is not a value. In Racket, there's really no such +thing as a primitive; things like @racket[add1], @racket[+], +@racket[cons?], etc. are all just functions.} + +] + +If we want our compiler to be written in the language it implements we +have to deal with this gap in some way. For each difference between +what we implement and what we use, we basically only have two ways to +proceed: + +@itemlist[#:style 'ordered + + @item{rewrite our compiler source code to @emph{not} use +that feature, or} + + @item{implement it.} +] + +Let's take some of these in turn. + +@section[#:tag-prefix "outlaw"]{Punting on Modules} + +Our compiler currently works by compiling a whole program, which we +assume is given all at once as input to the compiler. The compiler +source code, on the other hand, is sensibly broken into seperate +modules. + +We @emph{could} think about designing a module system for our +language. We'd have to think about how seperate compilation of +modules would work. At a minimum our compiler would have to deal with +resolving module references made through @racket[require]. + +While module systems are a fascinating and worthy topic of study, we +don't really have the time to do them justice and instead we'll opt to +punt on the module system. Instead we can rewrite the compiler source +code as a single monolithic source file. + +That's not a very good software engineering practice and it will be a +bit of pain to maintain the complete @this-lang source file. As a +slight improvement, we can write a little utility program that given a +file containing a module will recursively follow all @racket[require]d +files and print out a single, @racket[require]-free program that +includes all of the modules that comprise the program. + +Let's see an example of the @tt{combine.rkt} utility in action. + +Suppose we have a program that consists of the following files: + +@codeblock-include["outlaw/a.rkt"] +@codeblock-include["outlaw/b.rkt"] +@codeblock-include["outlaw/c.rkt"] + +Then we can combine these files into a single program +as follows: + +@shellbox["racket -t combine.rkt -m a.rkt > a-whole.rkt"] + +@codeblock-include["outlaw/a-whole.rkt"] + + +This gives us a rudimentary way of combining modules into a single +program that can be compiled with our compiler. The idea will be that +we construct a single source file for our compiler by running +@tt{combine.rkt} on @tt{compile-stdin.rkt}. The resulting file will +be self-contained and include everything @tt{compile-stdin.rkt} +depends upon. + +It's worth recognizing that this isn't a realistic alternative to +having a module system. In particular, combining modules in this way +breaks usual abstractions provided by modules. For example, it's +common for modules to define their own helper functions or stateful +data that are not exported (via @racket[provide]) outside the module. +This ensures that clients of the module cannot access potentially +sensitive data or operations or mess with invariants maintained by a +module's exports. Our crude combination tool does nothing to enforce +these abstraction barriers. + +That's an OK compromise to make for now. The idea is that +@tt{combine.rkt} doesn't have to work @emph{in general} for combining +programs in a meaning-preserving way. It just needs to work for one +specific program: our compiler. + +@section[#:tag-prefix "outlaw"]{Bare-bones a86} + +Our compiler makes heavy use of the @secref{a86} library that provides +all of the constructors for a86 instructions and functions like +@racket[asm-display] for printing a86 instructions using NASM syntax. +That library is part of the @tt{langs} package. + +The library at its core provides structures for representing a86 +instructions and some operations that work on instructions. While the +library has a bunch of functionality that provides for good, early +error checking when you construct an instruction or a whole a86 +program, we really only need the structures and functions of the +library. + +To make the compiler self-contained we can build our own bare-bones +version of the a86 library and include it in the compiler. + +For example, here's the module that defines an AST for a86 instructions: + +@codeblock-include["outlaw/a86/ast.rkt"] + +And here's the module that implements the needed operations for +writing out instructions in NASM syntax: + +@codeblock-include["outlaw/a86/printer.rkt"] + +OK, so now we've made a86 a self-contained part of the the compiler. +The code consists of a large AST definition and some functions that +operate on the a86 AST data type. The printer makes use of some Racket +functions we haven't used before, like @racket[system-type] and +@racket[number->string], and also some other high-level IO functions +like @racket[write-string]. We'll have to deal with these features, +so while we crossed one item of our list (a86), we added a few more, +hopefully smaller problems to solve. + +@section[#:tag-prefix "outlaw"]{Racket functions, more I/O, and primitives} + +We identified three more gaps between our compiler's implementation +language and its implemented language: lots of Racket functions like +@racket[length], @racket[map], etc., more I/O functions that operate +at a higher-level than our @racket[write-byte] and @racket[read-byte] +such as @racket[write-string], @racket[read], @racket[read-line], +etc., and finally the issue that primitives are not values. + +There are many ways we could proceed from here. We could, for +example, spend some time adding new primitives to our compiler +that implement all the missing functionality like @racket[length], +@racket[write-string], and others. + +Let's consider adding a @racket[length] primitive. It's not terribly +difficult. We could add a unary operation called @racket['length], +which would emit the following code: + +@#reader scribble/comment-reader +(racketblock +;; assume list is in rax +(let ((done (gensym 'done)) + (loop (gensym 'loop))) + (seq (Mov r8 0) ; count = 0 + (Label loop) + (Cmp rax (value->bits '())) ; if empty, done + (Je done) + (assert-cons rax) ; otherwise, should be a cons + (Xor rax type-cons) + (Mov rax (Offset rax 0)) ; move cdr into rax + (Add r8 (value->bits 1)) ; increment count + (Jmp loop) ; loop + (Label done) + (Mov rax r8))) ; return count +) + +We can play around an make sure this assembly code is actually +computing the length of the list in @racket['rax]: + +@(void (ev '(current-objs '()))) + +@#reader scribble/comment-reader +(ex +(require neerdowell/parse + neerdowell/compile-datum + neerdowell/compile-ops + neerdowell/types) +(require a86) + +;; Datum -> Natural +;; Computes the length of d in assembly +(define (length/asm d) + (bits->value + (asm-interp + (seq (Global 'entry) + (Label 'entry) + (compile-datum d) + ; assume list is in rax + (let ((done (gensym 'done)) + (loop (gensym 'loop))) + (seq (Mov r8 0) ; count = 0 + (Label loop) + (Cmp rax (value->bits '())) ; if empty, done + (Je done) + (assert-cons rax) ; otherwise, should be a cons + (Xor rax type-cons) + (Mov rax (Offset rax 0)) ; move cdr into rax + (Add r8 (value->bits 1)) ; increment count + (Jmp loop) ; loop + (Label done) + (Mov rax r8))) ; return count + (Ret) + (Label 'raise_error_align) ; dummy version, returns -1 + (Mov rax -1) + (Ret))))) + +(length/asm '()) +(length/asm '(1 2 3)) +(length/asm '(1 2 3 4 5 6)) +) + +Looks good. + +Alternatively, instead of a primitive, we could add a @racket[length] +@emph{function} by creating a static function value and binding it to +the variable @racket[length]. The code for the function would +essentially be the same as the primitive above: + +@#reader scribble/comment-reader +(racketblock +(seq (Data) + (Label 'length_func) ; the length closure + (Dq 'length_code) ; points to the length code + (Text) + (Label 'length_code) ; code for length + (Cmp r15 1) ; expects 1 arg + (Jne 'raise_error_align) + (Pop rax) + ; ... length code from above + (Add rsp 8) ; pop off function + (Ret)) +) + + +The @racket[compile] function could push the binding for +@racket[length] (and potentially other built-in functions) on the +stack before executing the instructions of the program compiled in an +environment that included @racket['length]. This would effectively +solve the problem for @racket[length]. + +We'd have to do something similar for @racket[map], @racket[foldr], +@racket[memq], and everything else we needed. + + +The @emph{problem} with this approach is will be spending a bunch of +time writing lots and lots of assembly code. An activity we had hoped +to avoid by building a high-level programming language! Even worse, +some of the functions we'd like to add, e.g. @racket[map], will be +much more complicated to write in assembly compared to @racket[length]. + +But here's the thing. Consider a Racket definition of @racket[length]: + +@#reader scribble/comment-reader +(racketblock +(define (length xs) + (match xs + ['() 0] + [(cons _ xs) (add1 (length xs))])) +) + +Note that this definition is within the language we've built. Instead +of writing the assembly code for @racket[length], we could write a +definition in @this-lang and simply compile it to obtain assembly code +that implements a @racket[length] function. + +Many of the functions we need in the compiler can be built up this +way. Instead of spending our time writing and debugging assembly +code, which is difficulty to do, we can simply write some Racket code. + +With this, we will introduce a @bold{standard library}. The idea is that +the standard library, like the run-time system, is a bundle of code that +will accompany every executable; it will provide a set of built-in functions +and the compiler will be updated to compile programs in the environment of +everything provided by the standard library. + + +@section[#:tag-prefix "outlaw"]{Building a standard library} + +... + +@section[#:tag-prefix "outlaw"]{Parsing primitives, revisited} + +... + +@section[#:tag-prefix "outlaw"]{A few more primitives} + +... + +@section[#:tag-prefix "outlaw"]{Dealing with I/O} + +... + +@section[#:tag-prefix "outlaw"]{Putting it all together} + +... diff --git a/www/notes/sugar.scrbl b/www/notes/sugar.scrbl new file mode 100644 index 00000000..c94b4ed5 --- /dev/null +++ b/www/notes/sugar.scrbl @@ -0,0 +1,1196 @@ +#lang scribble/manual + +@(require (for-label (except-in racket ...))) +@(require redex/pict + racket/runtime-path + scribble/examples + "utils.rkt" + "ev.rkt" + "../utils.rkt") + +@(define codeblock-include (make-codeblock-include #'h)) + +@(for-each (λ (f) (ev `(require (file ,(path->string (build-path notes "mug" f)))))) + '("interp.rkt" "interp-env.rkt" #;"compile.rkt" "syntax.rkt" "pat.rkt" #;"asm/interp.rkt" #;"asm/printer.rkt")) + +@title[#:tag "Mug"]{Mug: matching, throwing, quoting} + +@table-of-contents[] + +@section[#:tag-prefix "mug"]{Scaling up with syntax} + +We have developed a small, but representative functional programming +language. But there's still a long way to go from our Loot language +to the kind of constructs we expect in a modern, expressive +programming language. In particular, there's a fairly large gap +between Loot and the subset of Racket we've explored so far in this +class. + +For example, our programs have made extensive use of pattern matching, +quotation, quasi-quotation, and lots of built-in functions. In this +section, we'll examine how to scale Loot up to a language that's nicer +to program in. As we'll see, much of this can be accomplished +@emph{without extending the compiler}. Rather we can explain these +language features by @bold{elaboration} of fancier language syntax +into the existing core forms. + +In this chapter, we'll explore several ideas at the level of an +interpreter, but the techniques should work just as well for the compiler. + +@section[#:tag-prefix "mug"]{The Loot+ interpreter} + +Let us start with an interprter for the Loot language, plus all of the +extensions considered in the various assignments up through +@seclink["Assignment 7"]{Assignment 7}. + + +@codeblock-include["mug/interp-env.rkt"] + +@section[#:tag-prefix "mug"]{A bit more sugar} + + +As we saw in @seclink["Loot"]{Loot}, we can consider syntaxtic +extensions of language that elaborate into the core @tt{Expr} form of +a language. We saw this with the @racket[define]-form that we rewrote +into @racket[letrec]. We can consider further extensions such as +@racket[and], @racket[or], and even @racket[cond]. + +Here are functions for transforming each of these forms into simpler +forms: + +@#reader scribble/comment-reader +(ex +(define (cond->if c) + (match c + [`(cond (else ,e)) e] + [`(cond (,c ,e) . ,r) + `(if ,c ,e (cond ,@r))])) + +(define (and->if c) + (match c + [`(and) #t] + [`(and ,e) e] + [`(and ,e . ,r) + `(if ,e (and ,@r) #f)])) + +(define (or->if c) + (match c + [`(or) #f] + [`(or ,e) e] + [`(or ,e . ,r) + (let ((x (gensym))) + `(let ((,x ,e)) + (if ,x ,x (or ,@r))))])) +) + +Note that these functions do not necessarily eliminate @emph{all} +@racket[cond], @racket[and], or @racket[or] forms, but rather +eliminate @emph{one} occurrence, potentially creating a new occurrence +within a subexpression: + +@ex[ +(cond->if '(cond [(even? x) 8] [else 9])) +(cond->if '(cond [else 9])) +(and->if '(and)) +(and->if '(and 8)) +(and->if '(and 8 9)) +(or->if '(or)) +(or->if '(or 8)) +(or->if '(or 8 9)) +] + +The idea is that another function will drive the repeated use of these +functions until all these extended forms are eliminated. + +You may wonder why the @racket[or] elaboration is complicated by the +@racket[let]-binding. Consider a potential simpler approach: + +@#reader scribble/comment-reader +(ex +(define (or->if-simple c) + (match c + [`(or) #f] + [`(or ,e) e] + [`(or ,e . ,r) + `(if ,e ,e (or ,@r))])) +) + +But compare the elaboration of the following exmample: + +@ex[ +(or->if-simple '(or (some-expensive-function) #t)) +(or->if '(or (some-expensive-function) #t)) +] + +The second program is much more efficient. Moreover, if +@racket[some-expensive-function] had side-effects, the first program +would duplicate them, thereby changing the program's intended +behavior. + +We can incorporate these new functions into the desugar function, +which will transform extended programs into ``core'' expressions: + +@#reader scribble/comment-reader +(ex +;; Expr+ -> Expr +(define (desugar e+) + (match e+ + [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) + `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) + ,(desugar e))] + [(? symbol? x) x] + [(? imm? i) i] + [`',(? symbol? s) `',s] + [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] + [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] + [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] + [`(letrec ,bs ,e0) + `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) + ,(desugar e0))] + [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] + [`(cond . ,_) (desugar (cond->if e+))] + [`(and . ,_) (desugar (and->if e+))] + [`(or . ,_) (desugar (or->if e+))] + [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) +) + +Note how a @racket[cond], @racket[and], or @racket[or] form are +transformed and then @racket[desugar]ed again. This will take care of +eliminating any derived forms introduced by the transformation, which +is useful so that derived forms can be defined in terms of other +derived forms, including itself! + +@ex[ +(desugar '(cond [(even? x) 8] [else 9])) +(desugar '(cond [else 9])) +(desugar '(and)) +(desugar '(and 8)) +(desugar '(and 8 9)) +(desugar '(or)) +(desugar '(or 8)) +(desugar '(or 8 9)) +] + + +Derived forms that can be elaborated away by rewriting into more +primitive forms are sometimes called @bold{syntactic sugar} since they +are not fundamental but ``sweeten'' the experience of writing programs +with useful shorthands. We call the elaboration function @racket[desugar] +to indicate that it is eliminating the syntactic sugar. + +@section[#:tag-prefix "mug"]{Exceptional behavior} + +To see an example of taking the idea of program transformation as a +method for implementing language features, let's consider the case of +exceptions and exception handlers, a common feature of modern +high-level languages. + +Consider the following program for computing the product of all the +elements in a binary tree: + +@#reader scribble/comment-reader +(ex +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (match bt + ['leaf 1] + [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))])) + +(prod 'leaf) +(prod '(node 8 leaf leaf)) +(prod '(node 8 (node 2 leaf leaf) (node 4 leaf leaf))) +) + +Now consider the work done in an example such as this: + +@ex[ +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +] + +From a quick scan of the elements, we know the answer is 0 without +doing any arithmetic. But the @racket[prod] function will do a bunch +of multiplication to actually figure this out. + +To see, let's use a helper function to replace @racket[*] that prints +every it multiplies two numbers: + +@#reader scribble/comment-reader +(ex +;; Number Number -> Number +(define (mult x y) + (printf "mult: ~a x ~a\n" x y) + (* x y)) + +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (match bt + ['leaf 1] + [`(node ,v ,l ,r) (mult v (mult (prod l) (prod r)))])) + +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +) + +This could potentially be bad if the tree were quite large. + +How can we do better? One option is to detect if the value at a node +is zero and simply avoid recurring on the left and right subtrees at +that point: + +@#reader scribble/comment-reader +(ex +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (match bt + ['leaf 1] + [`(node ,v ,l ,r) + (if (zero? v) + 0 + (mult v (mult (prod l) (prod r))))])) +) + +Does this help our answer? Only slightly: + +@ex[ +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +] + +Why? + +The problem is that you may encounter the zero element deep within a +tree. At that point you not only want to avoid doing the +multiplication of subtrees, but also of the elements surrounding the +zero. But we seemingly don't have control over the context +surrounding the node with a zero in it, just the subtrees. What can +we do? + +One option, if the language provides it, is to @bold{raise an +exception}, signalling that a zero element has been found. An outer +function can @bold{catch} that exception and produce zero. Such a +program will avoid doing any multiplication in case there's a zero in +the tree. + +Racket comes with an exception mechanism that uses @racket[raise] to +signal an exception, which is propagated to the nearest enclosing +exception handler. If there is no such handler, an uncaught exception +error occurs. + +@ex[ + +(eval:error (raise 5)) +(eval:error (mult (raise 5) 2)) +(eval:error (mult (raise (mult 5 3)) 2)) + +] + +The general form of an exception handler uses the +@racket[with-handlers] form that includes a series of predicates and +handler expressions. We'll consider a simpler form called +@racket[catch] that unconditionally catches any exception throw and +handles it with a function that takes the raised value as an argument. +It can be expressed in terms of the more sophisticated +@racket[with-handlers] form: + +@ex[ +(define-syntax-rule (catch e f) + (with-handlers ([(λ (x) #t) f]) e)) + +(catch (raise 5) (λ (x) x)) +(catch (mult (raise 5) 2) (λ (x) x)) +(catch (mult (raise (mult 5 3)) 2) (λ (x) x)) +(catch (mult (mult 5 3) 2) (λ (x) x)) +(catch (mult (mult 5 3) 2) (λ (x) (mult x x))) +(catch (mult (raise (mult 5 3)) 2) (λ (x) (mult x x))) +] + +Now we can solve our problem: + +@#reader scribble/comment-reader +(ex +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (catch (prod/r bt) (λ (x) 0))) + +;; BT -> Number +;; Throws: 0 +(define (prod/r bt) + (match bt + ['leaf 1] + [`(node ,v ,l ,r) + (if (zero? v) + (raise 0) + (mult v (mult (prod/r l) (prod/r r))))])) + +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +) + +(This code is a bit problematic for reasons that are beside the point +of this section, but... the problem is this will catch any exception, +including things like system signals, out of memory exceptions, etc. +A better solution would have the handler check that the exception +value was 0 and re-raise it if not. That way it doesn't ``mask'' any +other exceptions.) + +This code works great for our purposes, but what if the language +didn't provide an exception handling mechanism? Could we achieve the +same effect without relying on exceptions? + +One solution is to re-write the program in what's called +@bold{continuation passing style} (CPS). Continuation passing style +makes explicit what is implicit in the recursive calls to +@racket[prod] in our original program, which is that after recursively +computing the product of the subtree, we have to do more work such as +another recursive call and multiplication. By making this work +explicit, we gain control over it and have the option to do things +like throw away this work. + +Here is the basic idea. We will write a version of @racket[prod] that +takes an additional argument which represents ``the work to be done +after this function call completes.'' It will take a single argument, +a number, which is the result of this function call, and it will +produce some final result for the computation (in this case, a number). + +In general, we want @racket[(k (prod bt))] ≡ @racket[(prod/k bt k)] +for all functions @racket[k] and binary trees @racket[bt]. + +Starting from the spec, we have: + +@#reader scribble/comment-reader +(ex +;; BT (Number -> Number) -> Number +(define (prod/k bt k) + (k (prod bt))) +) + +We can unroll the definition of @racket[prod]: + +@#reader scribble/comment-reader +(ex +(define (prod/k bt k) + (match bt + ['leaf (k 1)] + [`(node ,v ,l ,r) + (k (mult v (mult (prod l) (prod r))))])) +) + +Now we'd like to replace the calls to @racket[prod] with calls to +@racket[prod/k], which we can do by recognizing the work to be done +around the call to @racket[prod] and placing it in the +@bold{continuation} argument to @racket[prod/k]. Let's do the first call: + +@#reader scribble/comment-reader +(ex +(define (prod/k bt k) + (match bt + ['leaf (k 1)] + [`(node ,v ,l ,r) + (prod/k l (λ (pl) + (k (mult v (mult pl (prod r))))))])) +) + +Doing this again, we get: + +@#reader scribble/comment-reader +(ex +(define (prod/k bt k) + (match bt + ['leaf (k 1)] + [`(node ,v ,l ,r) + (prod/k l (λ (pl) + (prod/k r (λ (pr) + (k (mult v (mult pl pr)))))))])) +) + +Now we have a definition of @racket[prod/k] that is independent of +@racket[prod] that satisfies the spec we started with. + +A couple of things to note: + +@itemlist[ +@item{Every call to @racket[prod/k] is a tail-call,} +@item{The context of the recursive calls are given explicitly as continuation arguments.} +] + +We can recreate the original function by giving the appropriate initial continuation: + +@#reader scribble/comment-reader +(ex +;; BT -> Number +(define (prod bt) + (prod/k bt (λ (x) x))) +) + +Now, this code doesn't do anything smart on zero elements; it does +exactly the same multiplications our first program does: + +@ex[ +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +] + +However, with a small tweak, we can get the behavior of the exception-handling code. + +Consider this definition: + +@#reader scribble/comment-reader +(ex +;; BT (Number -> Number) -> Number +(define (prod/k bt k) + (match bt + ['leaf (k 1)] + [`(node ,v ,l ,r) + (if (zero? v) + 0 + (prod/k l (λ (pl) + (prod/k r (λ (pr) + (k (mult v (mult pl pr))))))))])) + +;; BT -> Number +(define (prod bt) + (prod/k bt (λ (x) x))) +) + +Notice that this program, when the value in a node is zero, +immediately returns @racket[0]. It does not do any of the work +represented by @racket[k]. It does something akin to raising an +exception: it blows off all the work of the surround context and +returns a value to the ``handler'' (in this case, @racket[prod]). + +Returning to our example, we can see that no multiplications occur: + +@ex[ +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +] + +We've now achieved our original goal without the use of exception +handlers. We achieved this by rewriting our program to make explicit +the work that remains to do, giving us the ability to avoid doing it +when necessary. This is a slighly simplified version of the general +exception handling transformation, which we will look at next, since +there's only a single handler and all it does it produce 0. But, the +by-hand transformation we did provides a useful blueprint for how can +generally transform programs that use exception handling into ones +that don't. + + +@section[#:tag-prefix "mug"]{Exceptional transformation} + +Let's consider a very small subset of expressions, extended with +@racket[raise] and @racket[catch], and see how we can transform away +those added mechanisms: + +@#reader scribble/comment-reader +(racketblock +;; An Expr is one of: +;; - Integer +;; - Variable +;; - `(if ,Expr ,Expr ,Expr) +;; - `(,Prim1 ,Expr) +;; - `(,Prim2 ,Expr ,Expr) +;; - `(raise ,Expr) +;; - `(catch ,Expr (λ (,Variable) ,Expr)) +) + +Here is the basic idea of the transformation, we transform every +expression into a function of two arguments. The two arguments +represent the two ways an expression may produce results: either by +returning normally or by raising an exception. + + +So for example, if the original expression were @racket[1], we'd want +the transformed program to be +@racketblock[ +'(λ (retn raze) (retn 1)) +] + +Why? Because @racket[1] just produces @racket[1]; it can't possibly +raise an exception. So given the two ways of producing a value, we +choose the @racket[ret] way and ``return'' by apply @racket[retn] to +the value we want to return: @racket[1]. + + +Suppose the original expression is @racket[(raise 1)]. Then we want +to produce: +@racketblock[ +'(λ (retn raze) (raze 1)) +] + +This is choosing to not return a value, but rather ``raise'' an +exception by calling the @racket[raze] function. + +This is a lot like the by-hand transformation we did, except we now +have two continuations: one to represent work to do after +returning (normally) and one for work to do after raising an +exception. + +At the top-level, to run an expression we simply plug in appropriate +definitions for @racket[retn] and @racket[raze]. The @racket[retn] +function should just produce the result, i.e. it should be @racket[(λ +(x) x)], while @racket[raze] should signal an uncaught exception. +Since our language has such a simple model of errors, we'll just cause +an error to occur, i.e. @racket[(λ (x) (add1 #f))]. Let's try our +examples. + +@ex[ +(interp-env '((λ (retn raze) (retn 1)) (λ (x) x) (λ (x) (add1 #f))) '()) +(interp-env '((λ (retn raze) (raze 1)) (λ (x) x) (λ (x) (add1 #f))) '()) +] + +What about something like @racket[(add1 _e)]? + +Well if @racket[_e] returns normally, then the whole thing should +produce one more than that value. If @racket[_e] raises an exception, +then @racket[(add1 _e)] should raise that exception. + +Suppose @racket[_t] where the transformed version of @racket[_e], +which means it is a function of two parameters: what to do if +@racket[_e] returns and what to do if @racket[_e] raises. + +Then the transformation of @racket[(add1 _e)] is +@racketblock[ +(λ (retn raze) + (_t (λ (x) (retn (add1 x))) (λ (x) (raze x))))] + +This can be simplified slightly by observing that @racket[(λ (x) (raze +x))] is equal to @racket[raze]: +@racketblock[ +(λ (retn raze) + (_t (λ (x) (retn (add1 x))) raze))] + +How about something like @racket[(catch _e0 (λ (_x) _e1))]? If +@racket[_e0] produces a value normally, then the whole expression +produces that value normally. However if @racket[_e0] raises an +expression then the whole expression produces whatever @racket[_e1] +with @racket[x] bound to the raised value produces. Let @racket[_t0] +and @racket[_t1] be the transformed versions of @racket[_e0] and +@racket[_e1]. Then transformation of the whole expressions should be + +@racketblock[ +(λ (retn raze) + (_t0 retn (λ (_x) (_t1 retn raze)))) +] + +One thing to notice here is that we are running @racket[_t0] with a @racket[raze] function +that, if called, will run @racket[_t1] normally. + +Guided by the examples, let's define the transformation (note: we have +to take care of avoiding unintended variable capture): + +@#reader scribble/comment-reader +(ex +;; Expr -> Expr +(define (exn-transform e) + (match e + [(? integer? i) `(λ (retn raze) (retn ,i))] + [(? symbol? x) + (let ((retn (gensym 'retn)) + (raze (gensym 'raze))) + `(λ (,retn ,raze) (,retn ,x)))] + [`(if ,e0 ,e1 ,e2) + (let ((t0 (exn-transform e0)) + (t1 (exn-transform e1)) + (t2 (exn-transform e2)) + (retn (gensym 'retn)) + (raze (gensym 'raze))) + `(λ (,retn ,raze) + (,t0 + (λ (x) + ((if x ,t1 ,t2) ,retn ,raze)) + ,raze)))] + [`(,(? prim? p) ,e0) + (let ((t0 (exn-transform e0)) + (retn (gensym 'retn)) + (raze (gensym 'raze))) + `(λ (,retn ,raze) + (,t0 (λ (v) (,retn (,p v))) + ,raze)))] + [`(,(? prim? p) ,e0 ,e1) + (let ((t0 (exn-transform e0)) + (t1 (exn-transform e1)) + (retn (gensym 'retn)) + (raze (gensym 'raze)) + (v0 (gensym 'v0))) + `(λ (,retn ,raze) + (,t0 (λ (,v0) + (,t1 (λ (v1) (,retn (,p v0 v1))) + ,raze)) + ,raze)))] + [`(raise ,e) + (let ((t (exn-transform e)) + (retn (gensym 'retn)) + (raze (gensym 'raze))) + `(λ (,retn ,raze) + (,t ,raze ,raze)))] + [`(catch ,e0 (λ (,x) ,e1)) + (let ((t0 (exn-transform e0)) + (t1 (exn-transform e1)) + (retn (gensym 'retn)) + (raze (gensym 'raze))) + + `(λ (,retn ,raze) + (,t0 ,retn + (λ (,x) + (,t1 ,retn ,raze)))))])) +) + +Here's what the transformation looks like on examples: + +@ex[ +(exn-transform '1) +(exn-transform '(raise 1)) +(exn-transform '(catch (raise 1) (λ (x) x))) +(exn-transform '(catch (raise 1) (λ (x) (add1 x)))) +(exn-transform '(catch (add1 (raise 1)) (λ (x) 1))) +(exn-transform '(catch (add1 (raise 1)) (λ (x) (raise x)))) +] + +Now let's give it a spin: + +@ex[ +;; Expr -> Value +(define (run e) + (interp-env `(,(exn-transform e) (λ (x) x) (λ (x) (add1 #f))) '())) + +(run '1) +(run '(raise 1)) +(run '(catch (raise 1) (λ (x) x))) +(run '(catch (raise 1) (λ (x) (add1 x)))) +(run '(catch (add1 (raise 1)) (λ (x) 1))) +(run '(catch (add1 (raise 1)) (λ (x) (raise x)))) +(run '(if (raise 0) 1 2)) +(run '(if (zero? 0) (raise 1) 2)) +] + +@section[#:tag-prefix "mug"]{Quotation} + +We have seen how to interpret limited uses of @racket[quote], such as +in @racket[(quote ())] and @racket[(quote x)], i.e. the empty list and symbols. + +But we've also been using @emph{using} @racket[quote] more generally +where we can write down an arbitrary constant s-expression within a +@racket[quote]: + +@ex[ +'#t +'5 +'(1 2 3) +'(add1 x) +'(car '(1 2 3)) +'(((1) 2) 3) +'(1 . 2) +'("asdf" fred ((one))) +] + +We can understand the more general @racket[quote] form as a shorthand +for expressions that construct an equivalent list to the one denoted +by the s-expression. + +For example, +@itemlist[ + +@item{@racket['1] is shorthand for @racket[1],} + +@item{@racket['(1 . 2)] is shorthand for @racket[(cons '1 '2)], which +is shorthand for @racket[(cons 1 2)],} + +@item{@racket['(1 2 3)] is shorthand for @racket[(cons '1 '(2 3))], +which is shorthand for @racket[(cons 1 (cons '2 '(3)))], which is +shorthand for @racket[(cons 1 (cons 2 (cons '3 '())))], which is +shorthand for @racket[(cons 1 (cons 2 (cons 3 '())))],} + +@item{@racket['()] is as simple as possible (the empty list),} + +@item{@racket['x] is as simple as possible (a symbol), and} + +@item{@racket[5] is as simple as possible.} +] + +Guided by these examples, we can write a function that transforms the +s-expression inside of a @racket[quote] into an equivalent expression +that only uses @racket[quote] for constructing symbols and the empty +list: + + +@#reader scribble/comment-reader +(ex +;; S-Expr -> Expr +;; Produce an expression that evaluates to given s-expression, without +;; use of quote (except for symbols and empty list) +(define (quote->expr d) + (match d + [(? boolean?) d] + [(? integer?) d] + [(? string?) d] + [(? char?) d] + [(? symbol?) (list 'quote d)] + [(cons x y) (list 'cons (quote->expr x) (quote->expr y))] + ['() ''()])) + + +(quote->expr 'x) +(quote->expr 5) +(quote->expr "Fred") +(quote->expr '(1 . 2)) +(quote->expr '(1 2 3)) +(quote->expr '(car '(1 2 3))) +(quote->expr '(((1) 2) 3)) +(quote->expr '(1 . 2)) +(quote->expr '("asdf" fred ((one)))) +) + +We can now incorporate this into @racket[desugar] to eliminate uses of +compound-data @racket[quote]: + +@#reader scribble/comment-reader +(ex +;; Expr+ -> Expr +(define (desugar e+) + (match e+ + [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) + `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) + ,(desugar e))] + [(? symbol? x) x] + [(? imm? i) i] + [`',(? symbol? s) `',s] + [`',d (quote->expr d)] + [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] + [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] + [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] + [`(letrec ,bs ,e0) + `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) + ,(desugar e0))] + [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] + [`(cond . ,_) (desugar (cond->if e+))] + [`(and . ,_) (desugar (and->if e+))] + [`(or . ,_) (desugar (or->if e+))] + [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) +) + +And now we can @racket[desugar] programs such as these: + +@ex[ +(desugar '(map f '(1 2 3))) +(desugar '(map f '(and 1 2))) +(desugar '(if x '(1 . 2) 3)) +] + +And our prior interpterter will work just fine on these programs: + +@ex[ +(interp-env (desugar '(map f '(1 2 3))) `((map ,map) (f ,add1))) +(interp-env (desugar '(map f '(and 1 2))) `((map ,map) (f ,identity))) +(interp-env (desugar '(if x '(1 . 2) 3)) `((x #t))) +] + +And: + +@ex[ +(interp-env (desugar ''(((1) 2) 3)) '()) +(interp-env (desugar ''(1 . 2)) '()) +(interp-env (desugar ''("asdf" fred ((one)))) '()) +] + +@section[#:tag-prefix "mug"]{Pattern matching} + +One of the most ubiquitous language features we've used, but not +implemented, is pattern matching with the @racket[match] form. + +Pattern matching too can be seen as syntactic sugar since it's easy to +imagine how you could rewrite uses of @racket[match] into equivalent +expressions that didn't involve @racket[match]. + +For example, consider the program: + +@#reader scribble/comment-reader +(racketblock +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (match bt + ['leaf 1] + [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))])) +) + +An alternative, equivalent, formulation that doesn't use +@racket[match] is the following: + +@#reader scribble/comment-reader +(racketblock +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (cond + [(eq? 'leaf bt) 1] + [(and (list? bt) + (= 4 (length bt)) + (eq? 'node (first bt))) + (let ((v (second bt)) + (l (third bt)) + (r (fourth bt))) + (* v (* (prod l) (prod r))))] + ; corresponds to a match failure + [else (add1 #f)])) +) + +This code is less nice to read and write, but essentially does the +same thing the pattern-matching code does. + +In this example, each @racket[match]-clause becomes a +@racket[cond]-clause. The question-part of each @racket[cond]-clause +is an expression that determines whether the corresponding +pattern-part of the @racket[match-clause] matches. The answer-part of +each @racket[cond]-clause corresponds to the expression-part of the +@racket[match]-clause, with an add @racket[let]-form that destructures +the scrutinee and binds the pattern variables of the pattern-part. + + +Let's consider the following extension to the grammar of @tt{Expr+} to +include a simplified version of the pattern matchin form we've been +using: + +@#reader scribble/comment-reader +(racketblock +;; type Expr+ = +;; .... +;; | Match + +;; type Match = (match ,Expr+ ,(list Pat Expr+) ...) + +;; type Pat = +;; | #t +;; | #f +;; | Integer +;; | String +;; | Variable +;; | `_ +;; | `'() +;; | `(quote ,Symbol) +;; | `(cons ,Pat ,Pat) +;; | `(list ,Pat ...) +;; | `(? ,Expr ,Pat ...) + ) + +A @racket[match] form consists of an expression to match against, +sometimes callsed the @bold{scrutinee}, followed by some number of +pattern-matching clauses; each one consists of a pattern and +expression to evaluate should the pattern match the scrutinee's value. + +Here a pattern can either be a literal boolean, integer, string, empty +list, or symbol, or a pattern variable, which matches anything and +binds the value to the variable, a ``wildcard'' which matches anything +and binds nothing, a @racket[cons] pattern which matches pairs of +things that match the subpatterns, @racket[list] pattern which matches +lists of a fixed-size where elements matches the subpatterns, or a +@racket[?] pattern which matches if the results of evaluated the first +subexpression applied to scrutinee produces true and all of the +subpatterns match. + +This doesn't include the @racket[quasiquote]-patterns we used above, +but still this is a useful subset of pattern matching and allows us to +write programs such as: + +@#reader scribble/comment-reader +(racketblock +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (match bt + ['leaf 1] + [(list 'node v l r) (* v (* (prod l) (prod r)))])) +) + +As alluded to above, each pattern plays two roles: it used to +determine whether the scrutinee matches the pattern, and it used to +bind variable names (in the scope of the RHS expression) to sub-parts +of the scrutinee when it does match. + +We can write two helper functions to accomplish each of these tasks: +@itemlist[ + +@item{rewrite patterns into Boolean valued expressions that answer +whether the pattern matches the scrutinee,} + +@item{rewrite pattern and RHS in to expressions in which the pattern +variables of pattern are bound to the appropriately deconstructed +parts of the scrutinee.} + +] + + +Assume: the scrutinee is a variable. (It's easy to establish this assumption in general.) + +We need two functions: + +@#reader scribble/comment-reader +(racketblock +;; Pat Variable -> Expr +;; Produces an expression determining if p matches v +(define (pat-match p v) ...) + +;; Pat Variable Expr -> Expr +;; Produce an expression that deconstructs v and binds pattern variables +;; of p in scope of e. +;; ASSUME: v matches p +(define (pat-bind p v e) ...) +) + +Let's turn to @racket[pat-match] first. + +Suppose the pattern is a literal @racket[#t]. When does @racket[v] +match it? When @racket[v] is @racket[eq?] to @racket[#t]. + +So an expression that produces true when this pattern matches is @racket[(eq? #t v)]. + +Handling @racket[#f], integers, characters, symbols, and the empty list is similar. + +What about variables? Suppose the pattern is @racket[x]. When does +@racket[v] match it? Always. So @racket[#t] is an expression that +produces true with this pattern matches. + +Wildcards are the same. + +What about when the pattern is a @racket[cons]-pattern? Suppose the +pattern is @racket[(cons _p1 _p2)] for some patterns @racket[_p1] and +@racket[_p2]. When does @racket[v] match @racket[(cons _p1 _p2)]? +When @racket[v] is a pair and @racket[(car v)] matches @racket[_p1] +and @racket[(cdr v)] matches @racket[_p2]. + +A @racket[list] pattern is similar, except that the scrunity must be a +list with as many elements as there are patterns, and the elements +must match the corresponding subpattern. + +What about a @racket[?] pattern? When does @racket[v] match it? +Suppose the pattern is @racket[(? even?)]. When does @racket[v] match +it? When @racket[(even? v)] is true. If the pattern had a non-empty +list of sub-patterns they would all need to match @racket[v], too. + + +We can now formulate a defintion of @racket[pat-match]: + +@#reader scribble/comment-reader +(racketblock +;; Pat Variable -> Expr +;; Produces an expression determining if p matches v +(define (pat-match p v) + (match p + [#t `(eq? #t ,v)] + [#f `(eq? #f ,v)] + [(? integer? i) `(eq? ,i ,v)] + [(? string? s) + `(and (string? ,v) + (string=? ,s ,v))] + [(list 'quote '()) `(eq? '() ,v)] + [(? symbol?) #t] + [(list 'quote (? symbol? s)) `(eq? ,v ',s)] + [(list 'cons p1 p2) + (let ((v1 (gensym)) + (v2 (gensym))) + `(and (cons? ,v) + (let ((,v1 (car ,v)) + (,v2 (cdr ,v))) + (and ,(pat-match p1 v1) + ,(pat-match p2 v2)))))] + [(cons 'list ps) + `(and (list? ,v) + (= (length ,v) ,(length ps)) + ,(pat-match-list ps v))] + [(cons '? (cons e ps)) + `(and (,e ,v) + ,(pats-match ps v))])) +) + +The @racket[list]-pattern case relies on a helper function +@racket[pat-match-list] and the @racket[?]-pattern case relies on +@racket[pats-match], both defined below: + + +@#reader scribble/comment-reader +(racketblock +;; (Listof Pat) Variable -> Expr +;; Produces an expression determining if every ps matches x +(define (pats-match ps v) + (match ps + ['() #t] + [(cons p ps) + `(and ,(pat-match p v) + ,(pats-match ps v))])) + +;; (Listof Pat) Variable -> Expr +;; Produces an expression determining if each ps matches each element of list v +(define (pat-match-list ps v) + (match ps + ['() #t] + [(cons p ps) + (let ((v1 (gensym)) + (v2 (gensym))) + `(let ((,v1 (car ,v)) + (,v2 (cdr ,v))) + (and ,(pat-match p v1) + ,(pat-match-list ps v2))))])) +) + +Here are some examples: + +@ex[ + +(pat-match 'leaf 'bt) +(pat-match '(list 'node v l r) 'bt) +(pat-match '(list 'node (? even? v) l r) 'bt) + +] + +These aren't very readable programs that emerge, however, we check +that they're doing the right thing. Note that the elaboration +requires a few functions to be available, such as @racket[list?], +and @racket[length]. We make these available in an initial +environment: + +@ex[ +(define env0 + `((length ,length) + (list? ,list?))) +(interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 'leaf 'bt))) env0) +(interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 8 'bt))) env0) +(interp-env (desugar + `(let ((bt '(node 1 leaf leaf))) + ,(pat-match '(list 'node v l r) 'bt))) + env0) +(interp-env (desugar + `(let ((bt '(node 1 leaf leaf))) + ,(pat-match '(list 'node (? zero?) l r) 'bt))) + env0) +(interp-env (desugar + `(let ((bt '(node 0 leaf leaf))) + ,(pat-match '(list 'node (? zero?) l r) 'bt))) + env0) +] + +Now moving on to @racket[pat-bind], it follows a similar structure, +but we always assume the given pattern matches the scrutinee. The +addition @tt{Expr} argument represents the right-hand-side expression +of the @racket[match]-clause. The idea is that the pattern variables +of @racket[p] are bound to sub-parts of @racket[v] in @racket[e]. + +@#reader scribble/comment-reader +(racketblock +;; Pat Variable Expr -> Expr +;; Produce an expression that deconstructs v and binds pattern variables +;; of p in scope of e. +;; ASSUME: v matches p +(define (pat-bind p v e) + (match p + [#t e] + [#f e] + [(? integer?) e] + [(? string?) e] + [(list 'quote '()) e] + ['_ e] + [(? symbol? x) `(let ((,x ,v)) ,e)] + [(list 'quote (? symbol?)) e] + [(list 'cons p1 p2) + (let ((v1 (gensym)) + (v2 (gensym))) + `(let ((,v1 (car ,v)) + (,v2 (cdr ,v))) + ,(pat-bind p1 v1 + (pat-bind p2 v2 e))))] + [(cons 'list ps) + (pat-bind-list ps v e)] + [(cons '? (cons _ ps)) + (pats-bind ps v e)])) +) + +Here are some examples: + +@ex[ + +(pat-bind 'leaf 'bt 'bt) +(pat-bind '(list 'node v l r) 'bt 'v) +(pat-bind '(list 'node (? even? v) l r) 'bt 'v) + +] + +These are tough to read, but we can confirm what they compute: + +@ex[ +(interp-env (desugar + `(let ((bt '(node 0 leaf leaf))) + ,(pat-bind '(list 'node (? zero? z) l r) 'bt 'z))) + '()) +] + +Putting the pieces together, we can now write a @racket[match->cond] function +that rewrites a @racket[match]-expression into a @racket[cond]-expression: + +@#reader scribble/comment-reader +(racketblock +;; Match -> Expr +;; Rewrite match expression into an equivalent cond expression +(define (match->cond m) + (match m + [(cons 'match (cons e mcs)) + (let ((x (gensym))) + `(let ((,x ,e)) + (cond ,@(map (λ (mc) + (match mc + [(list p e) + (list (pat-match p x) (pat-bind p x e))])) + mcs) + ;; fall through to error + [else (add1 #f)])))])) +) + +@ex[ +(match->cond '(match '(node 2 leaf leaf) + ['leaf 0] + [(list 'node v l r) v])) +] + +Finally, we can incorporate @racket[match->cond] into @racket[desugar]: + +@#reader scribble/comment-reader +(ex #:no-prompt +;; Expr+ -> Expr +(define (desugar e+) + (match e+ + [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) + `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) + ,(desugar e))] + [(? symbol? x) x] + [(? imm? i) i] + [`',(? symbol? s) `',s] + [`',d (quote->expr d)] + [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] + [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] + [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] + [`(letrec ,bs ,e0) + `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) + ,(desugar e0))] + [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] + [`(cond . ,_) (desugar (cond->if e+))] + [`(and . ,_) (desugar (and->if e+))] + [`(or . ,_) (desugar (or->if e+))] + [`(match . ,_) (desugar (match->cond e+))] ; new + [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) +) + +Now we can interpret programs such as this: + +@ex[ + +(interp-env + (desugar + '(begin (define (prod bt) + (match bt + ['leaf 1] + [(list 'node v l r) + (* v (* (prod l) (prod r)))])) + + (prod '(node 3 (node 4 leaf leaf) leaf)))) + `((* ,*) (list? ,list?) (length ,length))) + +] + + diff --git a/www/notes/utils.rkt b/www/notes/utils.rkt index f0818591..de1b198a 100644 --- a/www/notes/utils.rkt +++ b/www/notes/utils.rkt @@ -3,6 +3,7 @@ (require (for-syntax racket/runtime-path racket/base racket/file)) (require scribble/manual racket/runtime-path) (require (for-label (except-in racket compile) a86)) +(require images/icons/file) (begin-for-syntax (define-runtime-path notes "../../langs/")) @@ -27,3 +28,16 @@ (define (save-file f s) (with-output-to-file f (λ () (display s)) #:exists 'replace)) + +(define (binary i [len 0]) + (typeset-code #:block? #f #:indent 0 + (string-append "#b" + (~a (number->string i 2) + #:left-pad-string "0" + #:align 'right + #:min-width len)))) + +(define (src-code lang) + (margin-note (small-save-icon) " " + (link (string-append "code/" (string-downcase lang) ".zip") "Source code") + ".")) diff --git a/www/project.scrbl b/www/project.scrbl index 2063b26b..fce2d36f 100644 --- a/www/project.scrbl +++ b/www/project.scrbl @@ -1,13 +1,529 @@ #lang scribble/manual +@(require (for-label (except-in racket compile ...) a86)) @(require "defns.rkt") +@(require "notes/ev.rkt") +@(require "fancyverb.rkt") + @title[#:style '(unnumbered)]{Project} -There will be a final course project to be completed over the last -several weeks of the course. The project will involve extending the -design and implementation of the programming language and its compiler -that we will develop throughout the semester. +The final assessment for this course consists of an individually +completed project. + +Details to be released later in the semester. + +@;{ + +Final deliverables are due on the last day of class, July 7. + +@elem[#:style "strike"]{There are several projects to choose from, +described below.} + +@emph{Summer update: Typically we allow projects to be chosen from a +number of options, but for the summer semester we will streamline +things a bit by settling on a single option.} + +Compared to assignments, the project is more open-ended. You will +need to select from a project description below and then select which +language you'd like to target with your project. As starter code, you +can use the source code of any of the course languages. How you +implement your project is up to you. It may involve changes to all +aspects of the language implementation: the parser, the compiler, and +the run-time system (however, we do not require an interpreter +implementation). No tests are provided, so we recommend you write your +own and suggest focusing on tests @emph{before} trying to implement +these features. + +@elem[#:style "strike"]{In addition to the source code for your +project, you must write a 2-page document in PDF format, which gives a +summary of your work and describes how your project is implemented.} + +@table-of-contents[] + +@;{ +@section{a86 optimizer} + +Our compiler is designed to be simple and easy to maintain. That +comes at the cost of emitting code that often does needless work. +Write an a86 optimizer, i.e., a program that takes in a list of a86 +instructions and produces an alternative list of instructions that +have the same behavior, but will execute more efficiently. + +This is a fairly open-ended project, which means you can take a simple +approach, or you can do a deep-dive on assembly code optimization and +try to do something very sophisticated. + +For a maximum of 95% of the possible points, your optimizer should +work on any a86 instructions produced by the +@seclink["Iniquity"]{Iniquity} compiler. For 100%, your optimizer +should work on any a86 instructions produced by the +@seclink["Loot"]{Loot} compiler. + +The most important aspect of the optimizer is it must preserve the +meaning of the original source program. If running a program with or +without optimization can produce different results, you will lose +significant points. + +The second important aspect of the optimizer is that it produces more +efficient code (but this should never come at the expense of +correctness---otherwise it's trivial to optimize every program!). You +should design some experiments demonstrating the impact of your +optimizations and measure the performance improvement of your optimizer. + +Here are some ideas for what you can optimize: + +@itemlist[ + +@item{Avoid stack references where possible. + +For example, you might push something and immediately reference it: +@racket[(seq (Push _r1) (Mov _r2 (Offset rsp 0)))], which is +equivalent to @racket[(seq (Push _r1) (Mov _r2 _r1))]. The +register-to-register move will be faster than accessing the memory on +the stack.} + +@item{Avoid stack pushes where possible. + +In the previous example, it may be tempting to delete the +@racket[Push], but that is only valid if that stack element is not +referenced later before being popped. And even if the element is not +referenced, we have to be careful about how the element is popped. + +But if you know where the pop occurs and there's no intervening +references in to the stack or other stack changes, then you can +improve the code further, e.g. @racket[(seq (Push _r1) (Mov _r2 +(Offset rsp 0)) (Add rsp 8))] can become @racket[(seq (Mov _r2 _r1))]. +} + +@item{Statically compute. + +Sometimes the compiler emits code for computing something at run-time +which can instead be computed at compile time. For example, the +compiler might emit @racket[(seq (Mov _r 42) (Add _r 12))], but this +can be simplified to @racket[(seq (Mov _r 54))].} + +] + +There are many, many other kinds of optimizations you might consider. +To get a sense of the opportunities for optimization, try compiling +small examples and looking at the assembly code produces. Try +hand-optimizing the code, then try to abstract what you did by hand +and do it programmatically. + +@section{Source optimizer} + +Another complimentary approach to making programs compute more +efficiently is to optimize them at the level of source code. Write a +source code optimizer, i.e. a program that takes in a program AST and +produces an alternative AST that has the same behavior, but will +execute more efficiently. + +This is another fairly open-ended project, which means you can take a +simple approach, or you can do a deep-dive on source code optimization +and try to do something very sophisticated. + +For a maximum of 95% of the possible points, your optimizer should +work for the @seclink["Iniquity"]{Iniquity} language. For 100%, your +optimizer should work for the @seclink["Loot"]{Loot} language (or later). + +The most important aspect of the optimizer is it must preserve the +meaning of the original source program. If running a program with or +without optimization can produce different results, you will lose +significant points. + +The second important aspect of the optimizer is that it produces more +efficient code (but this should never come at the expense of +correctness—otherwise it’s trivial to optimize every program!). You +should design some experiments demonstrating the impact of your +optimizations and measure the performance improvement of your +optimizer. + +Here are some ideas for where you can optimize: + +@itemlist[ + +@item{Avoid variable bindings where possible. + +Sometimes a program may bind a variable to a value, but then use the +variable only once, e.g. @racket[(let ((x (add1 7))) (add1 x))]. We +can instead replace the variable occurrence with it's definition to +get: @racket[(add1 (add1 7))]. Note that can must be taken to +@emph{not} do this optimization if it changes the order in which +effects may happen. For example, consider + +@racketblock[ +(let ((x (read-byte))) + (begin (read-byte) + (add1 x))) +] + +This is not the same as: + +@racketblock[ +(begin (read-byte) + (add1 (read-byte))) +] + +because the latter adds one to the second byte of the input stream rather than the first.} + +@item{Statically compute. + +Sometimes parts of a program can be computed at compile-time rather +than run-time. For example, @racket[(add1 41)] can be replaced with +@racket[42]. Likewise, expressions like @racket[(if #f _e1 _e2)] can +be replaced by @racket[_e2].} + +@item{Inline function calls. + +Suppose you have: + +@racketblock[ +(define (f x) (add1 x)) +(if (zero? (f 5)) _e1 _e2) +] + +Since the expression @racket[(f 5)] is calling a known function, you +should be able to transform this call into @racket[(let ((x 5)) (add1 +x))]. Using the previously described optimization, you can further +optimize this to @racket[(add1 5)], which in turn can be simplified +to @racket[6]. You can keep going and notice that @racket[(zero? 6)] +is just @racket[#f], so the whole program can be simplified to: + +@racketblock[ +(define (f x) (add1 x)) +_e2 +] +} + +] + +Note that the last example can get considerably more complicated in a +language with first-class functions since it may not be possible to +know statically which function is being called. + +There are many other optimizations you might consider. Think about +the kinds of expressions you might write and how they can be +simplified, then figure out how to do it programmatically. +} + +@section{Multiple return values} + +Racket, Scheme, and even x86 support returning more than one value +from a function call. Implement Racket's @racket[let-values] and +@racket[values] forms to add multiple return values. + +You may choose to implement this feature for any language that is +@seclink["Iniquity"]{Iniquity} or later for a maximum 95% of the +possible points. For 100% you'll need to implement the feature for +Loot or later. + +Here are the key features that need to be added: + +@itemlist[ + +@item{@racket[(values _e1 ... _en)] will evaluate @racket[_e1] through +@racket[_en] and then ``return'' all of their values.} + +@item{@racket[(let-values ([(_x1 ... _xn) _e]) _e0)] will evaluate +@racket[_e], which is expected to be an expression that produces +@racket[_n] values, which are bound to @racket[_x1] through +@racket[_xn] in the body expression @racket[_e0].} + +] + + +Here are some examples to help illustrate: + +@ex[ + +(let-values ([(x y) (values 1 2)]) (+ x y)) + +(let-values ([(x) (values 1)]) (add1 x)) + +(let-values ([() (values)]) 7) + +(define (f x) + (values x (+ x 1) (+ x 2))) + +(let-values ([(x y z) (f 5)]) + (cons x (cons y (cons z '())))) + +(add1 (values 5)) + +(let ((x (values 5))) + (add1 x)) + +] + +Any time an expression produces a number of values that doesn't match +what the surrounding context expects, an error should be signaled. + +@ex[ + +(eval:error (add1 (values 1 2))) + +(eval:error (let-values ([(x y) 2]) x)) + +] + +The top-level expression may produce any number of values and the +run-time system should print each of them out, followed by a newline: + +@ex[ +(values 1 2 3) +] + +Note there is some symmetry here between function arity checking where +we make sure the number of arguments matches the number of parameters +of the function being called and the ``result arity'' checking that is +required to implement this feature. This suggests a similar approach +to implementing this feature, namely designating a register to +communicate the arity of the result, which should be checked by the +surrounding context. + +You will also need to design an alternative mechanism for +communicating return values. Using a single register (@racket['rax]) +works when every expression produces a single result, but now +expressions may produce an arbitrary number of results and using +registers will no longer suffice. (Although you may want to continue +to use @racket['rax] for the common case of a single result.) The +solution for this problem with function parameters was to use the +stack and a similar approach can work for results too. + + +@subsection{Returning multiple values to the run-time system or @racket[asm-interp]} + +In implementing @racket[values], there are two design decisions you +have to make: + +@itemlist[#:style 'ordered +@item{How are values going to be represented during the execution of a program?} +@item{How are values going to be communicated back to the run-time system and/or asm-interp when the program completes?} +] + +The answers to (1) and (2) don't necessarily have to be the same. + +Note that you can go a long way working on (1) without making any +changes to the run-time system or @tt{unload-bits-asm.rkt} (which is +how the result of @racket[asm-interp] is converted back to a Racket +value). You can basically punt on (2) and work on (1) by writing +tests that use multiple values within a computation, but ultimately +return a single value, e.g. @racket[(let-values ([(x y) (values 1 2)] +(cons x y)))]. + +As for (2), here is a suggestion that you are free to adopt, although +you can implement (2) however you'd like so long as when running an +executable that returns multiple values it prints the results in a way +consistent with how Racket prints and that if using +@racket[asm-interp], your version of @racket[unload/free] produces +multiple values whenever the program does. + +You can return a vector of results at the end of @racket[entry]. This +means after the instructions for the program, whatever values are +produced are converted from the internal representation of values +(i.e., your design for (1)) to a vector and the address (untagged) is +put into @tt{rax} to be returned to the run-time system and/or +@racket[asm-interp]. + +Now both the run-time system and @tt{unload-bits-asm.rkt} need to be +updated to deal with this change in representation for the result. + +In @tt{main.c}, the part that gets the result and prints it: + +@fancy-c[ +#<len; ++i) { + print_result(result->elems[i]); + if (val_typeof(result->elems[i]) != T_VOID) + putchar('\n'); + } +HERE +] + +You'll also need to update the signature of @racket[entry] in +@tt{runtime.h} to: + +@fancy-c[" val_vect_t* entry();"] + +You'll also need to make a similar change to @racket[unload/free] in +@tt{unload-bits-asm.rkt}, which plays the role of the run-time system +when writing tests that use @racket[asm-interp]. + +Instead of: + +@#reader scribble/comment-reader +(racketblock +;; Answer* -> Answer +(define (unload/free a) + (match a + ['err 'err] + [(cons h v) (begin0 (unload-value v) + (free h))])) +) + +You'll want: + +@#reader scribble/comment-reader +(racketblock +;; Answer* -> Answer +(define (unload/free a) + (match a + ['err 'err] + [(cons h vs) (begin0 (unload-values vs) + (free h))])) + +(define (unload-values vs) + (let ((vec (unload-value (bitwise-xor vs type-vect)))) + (apply values (vector->list vec)))) +) + + + +Let's say you make these changes to the run-time system and +@racket[unload/free] before you make any changes to the compiler and +now you want to adapt the compiler to work with the new set up (before +trying to do anything with @racket[values]). You can add the +following just after the call to @racket[compile-e] for the main expression +of the program and before restoring volatile registers and returning: + +@#reader scribble/comment-reader +(racketblock +;; Create and return unary vector holding the result +(Mov r8 1) +(Mov (Offset rbx 0) r8) ; write size of vector, 1 +(Mov (Offset rbx 8) rax) ; write rax as single element of vector +(Mov rax rbx) ; return the pointer to the vector +) + +In order to return more values, you'd construct a larger vector. + + +@;{ +@section{Exceptions and exception handling} + +Exceptions and exception handling mechanisms are widely used in modern +programming languages. Implement Racket's @racket[raise] and +@racket[with-handlers] forms to add exception handling. + +You may choose to implement this feature for any language that is +@seclink["Iniquity"]{Iniquity} or later for a maximum 95% of the +possible points. For 100% you'll need to implement the feature for +Loot or later. + +Here are the key features that need to be added: + +@itemlist[ + +@item{@racket[(raise _e)] will evaluate @racket[_e] and then ``raise'' +the value, side-stepping the usual flow of control and instead jump +to the most recently installed exception handler.} + +@item{@racket[(with-handlers ([_p1 _f1] ...) _e)] will install a new +exception handler during the evaluation of @racket[_e]. If +@racket[_e] raises an exception that is not caught, the predicates +should be applied to the raised value until finding the first +@racket[_pi] that returns true, at which point the corresponding +function @racket[_fi] is called with the raised value and the result +of that application is the result of the entire @racket[with-handlers] +expression. If @racket[_e] does not raise an error, its value is the +value of the @racket[with-handler] expression.} + +] + +Here are some examples to help illustrate: + +@ex[ + +(with-handlers ([string? (λ (s) (cons "got" s))]) + (raise "a string!")) + +(with-handlers ([string? (λ (s) (cons "got" s))] + [number? (λ (n) (+ n n))]) + (raise 10)) + +(with-handlers ([string? (λ (s) (cons "got" s))] + [number? (λ (n) (+ n n))]) + (+ (raise 10) 30)) + +(let ((f (λ (x) (raise 10)))) + (with-handlers ([string? (λ (s) (cons "got" s))] + [number? (λ (n) (+ n n))]) + (+ (f 10) 30))) + +(with-handlers ([string? (λ (s) (cons "got" s))] + [number? (λ (n) (+ n n))]) + 'nothing-bad-happens) + +(with-handlers ([symbol? (λ (s) (cons 'reraised s))]) + (with-handlers ([string? (λ (s) (cons "got" s))] + [number? (λ (n) (+ n n))]) + (raise 'not-handled-by-inner-handler))) + +] + +Notice that when a value is raised, the enclosing context is discard. +In the third example, the surrounding @racket[(+ [] 30)] part is +ignored and instead the raised value @racket[10] is given the +exception handler predicates, selecting the appropriate handler. + +Thinking about the implementation, what this means is that a portion +of the stack needs to be discarded, namely the area between the +current top of the stack and the stack that was in place when the +@racket[with-handlers] expression was evaluated. + +This suggestions that a @racket[with-handlers] expression should stash +away the current value of @racket['rsp]. When a @racket[raise] +happens, it grabs the stashed away value and installs it as the +current value of @racket['rsp], effectively rolling back the stack to +its state at the point the exception handler was installed. It should +then jump to code that will carry out the applying of the predicates +and right-hand-side functions. + +Since @racket[with-handler]s can be nested, you will need to maintain +an arbitrarily large collection of exception handlers, each of which +has a pointer into the stack and a label for the code to handle the +exception. This collection should operate like a stack: each +@racket[with-handlers] expression adds a new handler to the handler +stack. If the body expression returns normally, the top-most handler +should be removed. When a raise happens, the top-most handler is +popped and used. + +@;{ +@subsection{Additional requirements} + +To receive full credit, you will to add the above features to Perp and +do the following. + +After you have a working implementation of @racket[raise] and +@racket[with-handlers], add a structure definition to your standard +library: @racket[(struct exn:fail (msg cm))]. Rework the compiler so +that all run-time errors raise an instance of @racket[struct:fail]. +This enables user-programs to handle run-time errors like this: + +@ex[ + +(with-handlers ([exn:fail? (λ (e) 'OK)]) + (add1 #f)) + +] + +(The @racket[cm] field can be ignored; you can always populate it with +@racket[#f] if you'd like. It's there just for consistency with +Racket's @racket[exn:fail].) +} + -Details of the project will be released later in the semester. @;{ For your project you should turn in your extension code, a directory @@ -48,13 +564,13 @@ semester: @item{Typing Loot. We have discussed typing for @tt{Hustle} and its implications in the compiler (deleting a whole lot of assertions), as - well as typing for a simple lambda calculus. This project would aim + well as typing for a simple λ calculus. This project would aim to combine the two threads, implementing a type system on top of Loot. There are interesting design decisions here, so feel free to reach out to talk about them!} @item{Loot Optimizations. Sky's the limit here. You can try - high-level optimizations (e.g. inlining, lambda lifting, dead-code + high-level optimizations (e.g. inlining, λ lifting, dead-code elimination, partial evaluation, etc.) or low-level ones (register allocation, register-based calling conventions etc.). Optimizations can be tricky to get right, so make sure you reuse all the unit @@ -64,111 +580,84 @@ semester: its scope is appropriate for a final project.} ] +} @;{ +@section{Pattern matching} +Racket, OCaml, Rust, Scala, and many other programming languages +support pattern matching. Extend +} -@(define repo "https://classroom.github.com/a/t5KO9b5-") - -The goal of this project is to put together everything you've -learned over the semester to complete a full-featured compiler. - -Project repository: -@centered{@link[repo repo]} - -@link["code/project.pdf"]{Slides} from lecture on the project. +@section{Garbage collection} -You are given a working compiler for an extension of the language we -have been developing all semester. +Racket, OCaml, Java, JavaScript, Ruby, and many, many other languages +use garbage collection as the means of deallocating memory. Implement +a garbage collector. -Your overall object is to improve the @emph{run-time} performance of -code generated by your compiler while maintaining correctness. +You may choose to implement this feature for any language that is +@seclink["Loot"]{Loot} or later. -There will be two releases of benchmark programs: +Here are the key features that need to be added: @itemlist[ -@item{Tuesday 12/3} -@item{Tuesday 12/10} -] -The final due date for your project is 10:30 AM on Saturday 12/14. +@item{all language constructs that allocate memory should check that +the current state of the heap can accommodate an allocation before +performing it, and if not, doing a garbage collection and trying +again. If there is still not possible to accommodate the allocation, +an error should be signaled.} -You will have an allowance of 10 minutes to @emph{compile} all benchmark -programs. Exceeding the allowance result in a penalty, but there is -no reward for improving @emph{compile-time} performance so long as you -come in under the 10 minute mark. +@item{@racket[(collect-garbage)] will run a garbage collection and +return void.} -You will have an allowance of 10 minutes to @emph{run} all benchmark -programs. For full-credit, you must improve the overall run-time -performance by 20\%. Run-time will compute as the average of three -runs, done on the GRACE cluster. +@item{@racket[(current-memory-use)] will return the number of bytes +current allocated in the heap. This operation should not run a +garbage collection and should not trace reachable objects in the heap. +Instead it should simply return the total size, in bytes, that are +currently allocated in the heap.} -Full credit solutions will be entered in a compiler tournament to -determine the most performant (and correct) compiler. Tournament -results do not count toward your grade and will involve compiling -programs not included in the benchmark suite. - -Benchmark programs will be batch I/O programs: read some input, -compute something, produce a result and/or write some output. - -I/O primitives include @racket[read-char], @racket[write-char] -(limited to the standard input and output ports). - -The compiler supports a standard library, with source level -definitions provided to you. See the @racket[stdlib] function -in the compiler. - -There will be a garbage collector provided by the second round of -benchmarks which you will need to incorporate in to your compiler. - -@section[#:tag-prefix "fp-" #:style 'unnumbered]{Measuring run-times} - -Let's look at an example of how to measure the run-time performance of -the code your compiler generates. - -First, let's start with fairly computationally intensive program. -Here is a @link["code/fp/sieve.rkt"]{program} that computes the -@emph{n}th prime number using the ancient -@link["https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes"]{Sieve of -Eratosthenes} method. +@item{@racket[(dump-memory-stats)] prints information about the +current stack and heap and returns void. See the @tt{iniquity-gc} +language for an example.} +] -Save it to the directory where your compiler lives and run @tt{make -sieve.run}. This will run the compiler to generate the @tt{sieve.run} -executable. This program expects to read a number from the standard -input port. -Run: -@centered{@tt{echo -n 100 | ./sieve.run}} -to compute the 100th prime number. +@section{Design your own} -To measure the time it takes, add the time command: +You may also design your own project, however, you will need to submit +a one-page write-up that documents what you plan to do and how you +will evaluate whether it is successful. You must submit this document +and have it approved by the instructor by November 22. +} -@centered{@tt{echo -n 100 | time ./sieve.run}} +@section[#:tag "project"]{Submitting} -This will run the program and show the result @emph{and} timing -information. We will be concerned with improving the real time it -takes to run the program. +Submissions should be made on Gradescope. +Your submission should be a zip file containing the following contents: -@section[#:tag-prefix "fp-" #:style 'unnumbered]{Testing} -@bold{There is separate a repository for tests.} When you push your -code, Travis will automatically run your code against the tests. If -you would like to run the tests locally, clone the following -repository into the directory that contains your compiler and run -@tt{raco test .} to test everything: +@verbatim|{ +info.rkt +/ +}| -@centered{@tt{https://github.com/cmsc430/fp-test.git}} +where @tt{} corresponds to the language you have chosen to +implement for your project, e.g. @tt{iniquity}, @tt{loot}, etc. -This repository will evolve as the week goes on, but any time there's -a significant update it will be announced on Piazza. +The @tt{info.rkt} should contain the following information: -@section[#:tag-prefix "fp-" #:style 'unnumbered]{Submitting} +@codeblock|{ +#lang info +(define project 'values) +(define language ') +}| -Pushing your local repository to github ``submits'' your work. We -will grade the latest submission that occurs before the deadline. -} +The @tt{} should be @tt{iniquity}, @tt{loot}, +etc. and should be the same as the directory that contains the +implementation. } \ No newline at end of file diff --git a/www/schedule.scrbl b/www/schedule.scrbl index 7e7153b0..b2defa19 100644 --- a/www/schedule.scrbl +++ b/www/schedule.scrbl @@ -4,109 +4,144 @@ @title[#:style 'unnumbered]{Schedule} -@;(TuTh 2-3:15, Online) +@;(TuTh 9:30-10:45, IRB 0318) @(define (wk d) (nonbreaking (bold d))) @; for unreleased assignments, switch to seclink when ready to release @(define (tbaseclink lnk txt) txt) +@(define (day s) @elem[s]) + + @tabular[#:style 'boxed - #:sep @hspace[1] + #:sep @hspace[1] #:row-properties '(bottom-border) - (list (list @bold{Week} @bold{Due} @bold{Tuesday} @bold{Thursday}) - (list @wk{8/31} - "" - @secref["Intro"] - @elem{@secref["OCaml to Racket"], (@link["https://youtu.be/xKCFkXUcmK4"]{video})} ) - - (list @wk{9/7} - @seclink["Assignment 1"]{A1} - @elem{@secref["OCaml to Racket"], cont., (@link["https://youtu.be/sLOMQ_j7cPE"]{video}) } - @elem{@secref["a86"], (@link["https://www.youtube.com/watch?v=25tV38STdbQ"]{video})} ) - - (list @wk{9/14} - @seclink["Assignment 2"]{A2} - @itemlist[@item{@secref["Abscond"]} - @item{@secref["Blackmail"], (@link["https://www.youtube.com/watch?v=28F-5sCUfzg"]{video})} - @item{@secref["Con"], (@link["https://youtu.be/nh8x0EQsQQY"]{video})}] - @elem{@secref["Dupe"], (@link["https://youtu.be/GdQZ2D1lyZA"]{video})}) - - (list @wk{9/21} - "" - @itemlist[@item{@secref["Dodger"], (@link["https://youtu.be/-SuINAKs7gE"]{video})} - @item{Semantics, Correctness, Testing}] - @elem{@secref["Evildoer"], (@link["https://youtu.be/ouOgFdbT9fk"]{video})}) - - (list @wk{9/28} - @seclink["Assignment 3"]{A3} - @elem{@secref["Extort"], (@link["https://youtu.be/4lU-0i5sl-Q"]{video})} - @elem{@secref["Fraud"], (@link["https://youtu.be/XmezNX4qfWE"]{video})}) - - (list @wk{10/5} - "" - @elem{@secref["Fraud"], cont. (@link["https://youtu.be/XmezNX4qfWE"]{video})} - @itemlist[@item{Exam Review} - @item{@secref["Hustle"] (@link["https://youtu.be/SwKc_FeEmHk"]{video})}]) - - (list @wk{10/12} - @bold{@seclink["Midterm_1"]{M1}} - @itemlist[ - @item{@secref["Hustle"], cont. (@link["https://youtu.be/SwKc_FeEmHk"]{video})} - @item{@secref["Hoax"]}] - @elem{No lecture (exam)}) - - (list @wk{10/19} - @seclink["Assignment 4"]{A4} - @itemlist[ - @item{@secref["Hoax"], cont.} - @item{Hoodwink}] - @elem{@secref{Iniquity}}) - - (list @wk{10/26} + (list (list @bold{Week} + @bold{Due} + @bold{Monday} + @bold{Wednesday}) + + (list @wk{1/22} + "" + "No class" + @secref["Intro"]) + + + (list @wk{1/29} + @seclink["Assignment 1"]{A1} + @elem{@secref["OCaml to Racket"]} + @elem{@secref["OCaml to Racket"]}) +#| + (list @wk{9/11} + @seclink["Assignment 2"]{A2} + @elem{@secref["a86"]} + @elem{@secref["Abscond"]}) + + (list @wk{9/18} + "" + @itemlist[@item{@secref["Blackmail"]} + @item{@secref["Con"]}] + @itemlist[@item{@secref["Dupe"]} + @item{@secref{Dodger}}]) + + (list @wk{9/25} + @seclink["Assignment 3"]{A3} + @secref["Evildoer"] + @secref["Extort"]) + + (list @wk{10/2} + "" + @secref["Fraud"] + @elem{@secref["Fraud"] (cont.)}) + + (list @wk{10/9} + "" + @elem{@secref["Fraud"] (cont.)} + @secref["Midterm_1"]) + + (list @wk{10/16} + "" + @elem{@secref["Fraud"] (cont.)} + @elem{@secref["Hustle"]}) + + (list @wk{10/23} + "" + @elem{@secref["Hustle"] (cont.)} + @elem{@secref["Hoax"]}) + + (list @wk{10/30} + @seclink["Assignment 4"]{A4} + @elem{@secref["Iniquity"]} + @elem{@secref["Jig"]}) + + (list @wk{11/6} + "" + @elem{@secref["Knock"]} + @elem{@secref["Knock"] (cont.)}) + + (list @wk{11/13} + "" + "" + "") + + (list @wk{11/20} + "" + @secref["Midterm_2"] + "Thanksgiving break (no lecture)") + + (list @wk{11/27} + "" "" - @elem{@secref["Jig"]} - @elem{@secref["Knock"]}) - - (list @wk{11/2} - @tbaseclink["Assignment 5"]{A5} - @elem{@secref["Loot"]} - @elem{@secref["Loot"], cont.}) - - (list @wk{11/9} - @bold{@seclink["Midterm_2"]{M2}} - @elem{(slack.)} - @elem{No lecture (exam)}) - - (list @wk{11/16} - "" - @elem{Mountebank} @;{Match} - @elem{Neerdowell} @;{ ?? } - ) - - (list @wk{11/23} + "") + + (list @wk{12/4} + "" + "" + "") + + (list @wk{12/11} "" - @elem{Self-hosting} - @elem{No class, Thanksgiving.}) - - (list @wk{11/30} - @tbaseclink["Assignment 6"]{A6} - @elem{GC} - @elem{GC} - ) - - (list @wk{12/7} - @bold{@tbaseclink["Settle on Final Project"]{Final Project}} - @elem{Final Project Q&A} - @elem{Slack} - ) - - (list @wk{12/14} - "Final Projects due on exam date" - @elem{No lectures} - 'cont) - - )] - - -@bold{Final project assessment: @final-date .} + "" + "") + |# +)] +@;{ +@tabular[#:style 'boxed +#:sep @hspace[1] +#:row-properties '(bottom-border) +(list (list @bold{Date} @bold{Topic} @bold{Due}) +(list @day{5/30} @secref["Intro"] "") +(list @day{5/31} @secref["OCaml to Racket"] "") +(list @day{6/1} @secref["a86"] "") +(list @day{6/2} @secref["Abscond"] @seclink["Assignment 1"]{A1}) +(list @day{6/5} @itemlist[@item{@secref["Blackmail"]} @item{@secref["Con"]}] @seclink["Assignment 2"]{A2}) +(list @day{6/6} @itemlist[@item{@secref["Dupe"]} @item{@secref{Dodger}}] "") +(list @day{6/7} @secref["Evildoer"] "") +(list @day{6/8} @secref["Extort"] "") +(list @day{6/9} @secref["Fraud"] "") +(list @day{6/12} @secref["Hustle"] @seclink["Assignment 3"]{A3}) +(list @day{6/13} @secref["Hoax"] "") +(list @day{6/14} "Midterm 1" @secref["Midterm_1"]) +(list @day{6/15} @secref["Iniquity"] "") +(list @day{6/16} @elem{@secref["Iniquity"], cont.} "") +(list @day{6/19} @elem{Juneteenth Holiday} "") +(list @day{6/20} @secref["Jig"] @seclink["Assignment 4"]{A4}) +(list @day{6/21} @secref["Knock"] "") +(list @day{6/22} @elem{@secref["Knock"], cont.} "") +(list @day{6/23} @secref["Loot"] "") +(list @day{6/26} @elem{@secref["Loot"], cont.} "") +(list @day{6/27} @elem{GC} @seclink["Assignment 5"]{A5}) +(list @day{6/28} @secref["Mug"] "") +(list @day{6/29} "Midterm 2" @secref["Midterm_2"]) +(list @day{6/30} @secref["Mountebank"] "") +(list @day{7/3} @secref["Neerdowell"] @seclink["Assignment 6"]{A6}) +(list @day{7/4} "Independence Day Holiday" "") +(list @day{7/5} @secref["Outlaw"] "") +(list @day{7/6} @elem{@secref["Outlaw"], cont.} "") +(list @day{7/7} "Slack" @secref{Project}) +) +] +} + +@bold{Final project assessment: @|final-date|.} diff --git a/www/slides/lecture-01.pdf b/www/slides/lecture-01.pdf new file mode 100644 index 00000000..4394a955 Binary files /dev/null and b/www/slides/lecture-01.pdf differ diff --git a/www/slides/lecture-01.tex b/www/slides/lecture-01.tex index f4a83148..a1c89cb6 100644 --- a/www/slides/lecture-01.tex +++ b/www/slides/lecture-01.tex @@ -11,7 +11,7 @@ \title{Compilers}%\texorpdfstring{$\mathbb{N}$}} \subtitle{CMSC 430} -\date{January 26\textsuperscript{th}, 2021} +\date{January 25\textsuperscript{th}, 2022} \usetheme{jmct} @@ -59,40 +59,29 @@ \frametitle{Before we start...} \begin{enumerate} \item<2 - 5> Who am I? - \item<3 - 5> Who is on the lecture videos? \item<4 - 5> Who are the TAs? \item<5 - 5> Some admin \end{enumerate} } - + \frame{ \frametitle{Who am I?} - \onslide<2>{Leonidas Lampropoulos}\\ - \onslide<2>{PhD in Programming Languages} + \onslide<2>{Jos\'{e} Manuel Calder\'{o}n Trilla}\\ + \onslide<2>{PhD in Compilers} } \frame{ \frametitle{Who am I?} - {{\bf \large{Leo}}nidas Lampropoulos}\\ - {PhD in Programming Languages} + {{\bf \large{Jos\'{e}}} Manuel Calder\'{o}n Trilla}\\ + {PhD in Compilers} } - - - \frame{ - \frametitle{Who is on the lecture videos?} - \onslide<2>{Jos$\acute{e}$ Manuel Calder$\acute{o}$n Trilla}\\ - \onslide<2>{PhD in Compilers}\\ - \onslide<2>{Taught CMSC 430 Last Semester} - } - \frame{ \frametitle{Who are the TAs?} \onslide<2>{\begin{itemize} - \item Deena Postol - \item Tasnim Kabir - \item Vyas Gupta - \item Temur Saidkhodjaev + \item Benjamin Glover Quiring + \item William Chung + \item Drhuv Maniktala \end{itemize} } } diff --git a/www/slides/ocaml-to-racket.rkt b/www/slides/ocaml-to-racket.rkt index 636363dc..5dfcb7ac 100644 --- a/www/slides/ocaml-to-racket.rkt +++ b/www/slides/ocaml-to-racket.rkt @@ -4,18 +4,16 @@ ;; Title (slide - #:title "CMSC 430, Jan 30th 2020" + #:title "CMSC 430, Jan 27th 2022" (with-size 64 (tt "OCaml to Racket"))) ;; Stuff I forgot from last time (slide #:title "Admin take 2" 'next - (item "My name: José") + (item "My email: " (tt "jmct@umd.edu")) 'next - (item "My email (for now): " (tt "jmct@jmct.cc")) - 'next - (item "Website: " (tt "cs.umd.edu/class/spring2020/cmsc430/")) ) + (item "Website: " (tt "cs.umd.edu/class/spring2022/cmsc430/")) ) ;; Remind them that OCaml is 'cool' (slide @@ -63,7 +61,7 @@ 'next (item "PLT Scheme was original aimed as a" (text "pedagogical" '(italic) (current-font-size)) "tool for those learning programming and PLT") 'next - (item "Racket has a notion of 'language levels'") + (item "Racket has a notion of `language levels'") 'next (subitem "This allows features to be enabled/disabled so that they can be learned/understood individually") 'next @@ -78,7 +76,7 @@ (para "The code for the first slide looked like this:") (code (slide #:title "OCaml to Racket" - (item "CMSC 430, Jan 30th 2020")))) + (item "CMSC 430, Jan 27th 2022")))) ;; Is it still used (answer to question #3) (slide diff --git a/www/software.scrbl b/www/software.scrbl index b4dcb443..0b8583f7 100644 --- a/www/software.scrbl +++ b/www/software.scrbl @@ -33,29 +33,211 @@ This course will make use of the following software: @itemlist[ - @item{Operating system: an x86-64 ABI conforming OS such as - many variants of Linux and macOS running on an x86-64 CPU. - For @secref{Windows}, see notes below. - Students have access to the campus - @link["http://www.grace.umd.edu/"]{GRACE} cluster, which use - Red Hat Linux on an x86-64 CPU, an appropriate OS for this - class.} + @item{Operating system: an x86-64 ABI conforming OS such as many + variants of Linux and macOS running on an x86-64 CPU. It is + also possible to use macOS on an Apple Silicon CPU with some extra + steps. For @secref{Windows}, see notes below. + + All students have access to the campus + @link["http://www.grace.umd.edu/"]{GRACE} cluster, which use Red Hat + Linux on an x86-64 CPU, an appropriate OS for this class. See the + @secref{GRACE} notes below.} @item{Racket: the implementation language and source language of our compilers.} @item{Racket @tt{langs} package: a package containing utilities for this course.} - + @item{NASM: the Netwide Assembler, which we will use to - assembly x86 programs.} + assemble x86 programs.} @item{GCC: the GNU compiler collection or a GCC-compatible - system such as clang.} + system such as clang.} ] -@section{Installing Racket} +Instruction for using each system are below: + +@itemlist[ +@item{@secref{GRACE}} +@item{@secref{Linux}} +@item{@secref{mac}} +@item{@secref{Windows}} +] + +@section[#:tag "GRACE"]{Using GRACE} + +The @link["http://www.grace.umd.edu/"]{GRACE} system gives students +access to an x86-64 Linux system that meets all of the system +requirements for the software in this course. If you have an +incompatible system, or if you'd rather avoid installing and setting +up the software for this course, you can use GRACE. + +Before using GRACE, you should locally install an implementation of +the X.Org X Window System which will enable you to run GUI programs +from GRACE on your computer (or any other computer that uses X11). On +Linux, this is likely set up by default. On Mac, you will need to +install @link["https://www.xquartz.org/"]{XQuartz}. On Windows, you +can use @link["https://mobaxterm.mobatek.net/"]{MobaXterm}. + +To use GRACE, open a terminal on your computer and +type: + +@verbatim|{ ssh -Y @grace.umd.edu}| + +You will prompted for your UMD Directory ID password. After entering +your password, you will be at the GRACE command line prompt. + +The @tt{-Y} command line option sets up X11 forwarding, which lets you +run GUI applications from GRACE. If you leave this off, programs like +DrRacket will fail to launch when started. + +Racket and @tt{nasm} are already installed, but you will +need to modify your @tt{PATH} environment variable so that you can +execute them from the command-line. You can do this with the +following commands: + +@verbatim|{ + # CMSC 430 set up + set path = ( /cell_root/software/racket/8.4/sys/bin $path ) + set path = ( /cell_root/software/nasm/2.15.05/sys/bin/ $path )}| + +If you add these lines to the @tt{.path} file in your home directory, then you +won't have to run this command manually every time you login; it will happen +automatically. + +Once set, you should be able to run commands such as @tt{racket}, +@tt{raco}, and @tt{nasm}. Other tools such as @tt{gcc} are already +available. + +Finally, you will need to install @secref{langs-package}. + +@section[#:tag "Linux"]{Using Linux} + +If you have an ARM-based machine, you will need to use +@seclink["GRACE"]{GRACE} or potentially setup an x86 VM. + +For x86-based Linux machines, you will need to +@seclink["install-racket"]{install Racket} and the +@seclink["langs-package"]{langs package}. Finally, install @tt{nasm}. +You can use your favorite package manager; they should all have +@tt{nasm}. + + +@section[#:tag "mac"]{Using macOS} + +If you are using a macOS computer, the setup will be different +depending on whether you have an Intel-based CPU or an Apple Silicon +CPU. If you're unsure which you have, click the Apple icon in the +top-left and select "About This Mac". Under CPU, you will see +a chip name containing either Intel or Apple. + +@subsection[#:tag "intel-mac"]{Using macOS on Intel} + +Intel-based Macs are fairly straightforward to set up. You will need +to @seclink["install-racket"]{install Racket} and the +@seclink["langs-package"]{langs package}. You will also need to +install @tt{nasm}. It's probably easiest to use a package manager +such as @link["https://brew.sh/"]{Homebrew} to install with @tt{brew +install nasm}. + +You will also want to make sure your Racket installation is visible +from your @tt{PATH} environment variable. Assuming Racket was +installed in the usual location, you can run: + +@verbatim|{ export PATH=$PATH:"/Applications/Racket v|@|racket-version|/bin"}| + +NOTE: You'll need to know what version of Racket you installed and use that +version's name in the above command. For example, if you install Racket 8.6, +you should be using the path @tt{"/Applications/Racket 8.6/bin"} instead. + +You can add this line to the @tt{.zshrc} file in your home directory so that it +is available every time you start the Terminal. Note that once you make this +change to the @tt{.zshrc}, you'll either need to restart your terminal +application or run @tt{source ~/.zshrc} to update your current @tt{PATH} +settings. + +@subsection[#:tag "apple-silicon-mac"]{Using macOS on Apple Silicon} + +It's also possible to run everything we need on an Apple Silicon Mac +even though it doesn't use an x86 CPU and instead uses an ARM +processor. That's because Apple provides a compability layer called Rosetta +that will allow you run x86 programs on your ARM CPU. + +The set up is basically the same as when @secref{intel-mac}, except +that when you install Racket you need to select the installer for +@bold{Mac OS (Intel 64-bit)} when +@link["https://download.racket-lang.org/"]{downloading Racket}. Do +not use Apple Silicon 64-bit installer. This will work thanks to +Rosetta. + +Otherwise, follow the steps given above. + +@section[#:tag "Windows"]{Using Windows} + +For Windows users, using WSL for testing is highly recommended. Beyond +the first few assignments, the projects will require generating and +executing assembly code using the nasm package. Students in the past +have had trouble trying to configure this in the Windows environment, +so an easier workaround is simply to enable WSL and run your tests through +some Linux Distribution. Here is a breakdown of the steps: + +@itemlist[ + #:style 'ordered + @item{Following the instructions at + @link["https://docs.microsoft.com/en-us/windows/wsl/install-win10"]{ + this link}, install a Linux Distro of your choice (e.g., + Ubuntu). The instructions include a suggestion to upgrade to + WSL2; this is not necessary but will improve efficiency in + general.} + + @item{Open your installed Linux distribution of choice and + make any initial configurations necessary (user, pass, + etc.). Run @tt{sudo apt update} and follow with @tt{sudo apt + upgrade}. These two may take some time. } + + @item{Run @tt{sudo apt install racket} and @tt{ + sudo apt install nasm}. These two should cover the necessary + installations for this course.} + + @item{Here is where to determine which IDE you would like to + use. + +@itemlist[ + @item{Using vim (or Emacs as mentioned in the previous section) is simple. + Copy assignment files into WSL. Modify files. } + + @item{Previous students preferred installing VSCode (outside of WSL) from + @link["https://code.visualstudio.com/download"]{this link}. For each + assignment, copy assignment files somewhere on your Linux distro. If you + would like to open @tt{some-file.rkt}, you can open it from the command line by + calling @tt{code some-file.rkt} and, after some automatic set up, VSCode should + load up the file. You can install Racket extensions from the VSCode + Marketplace (a suggestion will also pop up once you open a .rkt file) to + have colorized syntax, bracket matching, autocomplete/IntelliSense, etc. } + + @item{If you are intent on using DrRacket, you will want to set up an X + Window System to run GUI programs from within WSL. A popular option is + @link["https://mobaxterm.mobatek.net/"]{MobaXterm}. Once that is installed, + you can launch DrRacket from within your WSL terminal by running the + command @tt{drracket}. + + You could also install DrRacket @emph{outside} WSL and copy your files back + and forth or use a symbolic link to connect the two. However, DrRacket will + not be able to see the @tt{langs} package you will install later by default, + so you would either need to install it again within DrRacket or else find a + way to connect your Windows-based DrRacket to your WSL-based Racket package + configuration. This seems unnecessarily complicated, though, so we don't + recommend this option. } +]} + +] + +Regardless of the IDE used, you can now run your tests from your Linux +subsystem by entering the project directory and using the raco command. + +@section[#:tag "install-racket"]{Installing Racket} Racket is available for all major operating systems from: @@ -101,7 +283,7 @@ install updates, run: @section{IDE} -Racket comes with it's own IDE: DrRacket, which is the recommended way +Racket comes with its own IDE: DrRacket, which is the recommended way to edit Racket files. We will also be running Racket and its associated tools from the command line. @@ -110,54 +292,7 @@ If you'd like to use Emacs, there's a good using DrRacket for a while before switching to Emacs. Using any other editor is fine, too. -@section[#:tag "Windows"]{Windows 10 Users} - -For Windows 10 users, using WSL for testing is highly recommended. Beyond -the first few assignments, the projects will require generating and -executing assembly code using the nasm package. Students in the past -have had trouble trying to configure this in the Windows environment, -so an easier workaround is simply to enable WSL and run your tests through -some Linux Distribution. Here is a breakdown of the steps: - -@itemlist[ - #:style 'ordered - @item{Following the instructions at - @link["https://docs.microsoft.com/en-us/windows/wsl/install-win10"]{ - this link}, install a Linux Distro of your choice (e.g., - Ubuntu). The instructions include a suggestion to upgrade to - WSL2; this is not necessary but will improve efficiency in - general.} - - @item{Open your installed Linux distribution of choice and - make any initial configurations necessary (user, pass, - etc.). Run @tt{sudo apt update} and follow with @tt{sudo apt - upgrade}. These two may take some time. } - - @item{Run @tt{sudo apt install racket} and @tt{ - sudo apt install nasm}. These two should cover the necessary - installations for this course.} - - @item{Here is where to determine which IDE you would like to - use. - -@itemlist[ - @item{Using vim (or Emacs as mentioned in the previous section) is simple. Git clone project repos into WSL. Modify files.} - @item{Previous students preferred installing VSCode (outside of WSL) from @link["https://code.visualstudio.com/download"]{this link}. - For each assignment, git clone somewhere on your Linux distro. For some .rkt file, call 'code some-rkt-file.rkt' and - after some automatic set up, VSCode should load up the file. Install Racket extensions from the VSCode - Marketplace (a suggestion will also pop up once you open a .rkt file) to have colorized syntax, bracket matching, - inteliSense, etc. } - @item{If you are intent on using DrRacket, you would also need to install Racket on your local machine - (outside WSL). For each assignment, git clone into your normal file system and use DrRacket to edit files - accordingly. To access from your Linux subsystem, create a soft symbolic link in your Linux distro to the - project directory (or the parent directory so you do not need to make links with each new project).} -]} - -] - -Regardless of the IDE used, you can now run your tests from your Linux -subsystem by entering the project directory and using the raco command. - +@;{ @section{Detailed compatiblity list} The course software has been successfully tested with the @@ -170,7 +305,7 @@ following: @item{Red Hat Enterprise Linux 7.7} @item{macOS 11.0 (Big Sur)} @item{macOS 10.15 (Catalina)}]} - + @item{Racket: @itemlist[@item{Racket 8.1 [cs]} @item{Racket 8.1 [bc]} @@ -214,7 +349,7 @@ A program is a sequence of definitions or expressions. The grammar for the subset of Racket we will use is: @(with-unquote-rewriter - (lambda (lw) + (lambda (lw) (build-lw (list (build-lw "(" (lw-line lw) (lw-line-span lw) (lw-column lw) 1) (build-lw 'unquote (lw-line lw) (lw-line-span lw) (+ 1 (lw-column lw)) 7) (build-lw " " (lw-line lw) (lw-line-span lw) (+ 2 (lw-column lw)) 1) @@ -225,7 +360,7 @@ The grammar for the subset of Racket we will use is: (lw-column lw) (+ 8 (lw-column-span lw)))) - + (render-grammar R0)) @section{Built-In Datatypes} @@ -289,3 +424,4 @@ Here are some examples of writing various functions in our subset of Racket. } +} \ No newline at end of file diff --git a/www/syllabus.scrbl b/www/syllabus.scrbl index 85d0c227..ce33f611 100644 --- a/www/syllabus.scrbl +++ b/www/syllabus.scrbl @@ -4,65 +4,150 @@ @provide[exam-table] +@(define grades:m1 (list @elem{Midterm, @m1-date} "10%")) +@(define grades:f (list @elem{Final Exam, @final-date} "20%")) + +@(define (make-grade-component-table . entries) + @tabular[#:style 'boxed + #:sep @hspace[1] + (list* (list @bold{Component} @bold{Percentage}) entries)]) + + +@(define exam-table + @make-grade-component-table[ + @grades:m1 + @grades:f]) + @title[#:style 'unnumbered]{Syllabus} -@local-table-of-contents[] +@bold{Introduction to Compilers, CMSC 430} + +@bold{Term:} @string-titlecase[semester], @year + +@bold{Professor:} @prof1 (@prof1-pronouns) -@section{Prerequisites and Description} +@bold{Email:} @prof1-email + +@bold{Office Hours:} By appointment. Send email or ELMS message to set +up. @bold{Prerequisite:} a grade of C or better in CMSC330; and permission of department; or CMSC graduate student. @bold{Credits:} 3. -@courseno is an introduction to compilers. Its major goal is to arm -students with the ability to design, implement, and extend a -programming language. Throughout the course, students will design and -implement several related high-level programming languages, building -compilers that target the x86 CPU architecture. +@;{@bold{Lecture dates:} @lecture-dates} + +@bold{Lectures:} +@lecture-schedule1, @classroom1 (@prof1-initials) + +@bold{Course Description:} @courseno is an introduction to compilers. +Its major goal is to arm students with the ability to design, +implement, and extend a programming language. Throughout the course, +students will design and implement several related high-level +programming languages, building compilers that target the x86 CPU +architecture. The course assumes familiarity with a functional programming such as OCaml from CMSC 330, and, to a lesser extent, imperative programming in C and Assembly as covered in CMSC 216. -@section{Course Workflow} -The course will be a combination of synchronous in-person lectures, -video lectures, live Q+A sessions, and online course notes. The short -lecture videos will be available online, to view at your own pace. The -scheduled in-person lecture time will supplement and expand upon the -videos and incorporate Q+A time, focusing on the topic of the most -recent video lectures. +@bold{Course Structure:} The course will consist of +in-person lectures, which will be recorded and available on ELMS +immediately after each lecture. There are two midterms, a final +project, which counts as the final assessment for the class, several +assignments, and several quizes and surveys. Midterms are take-home +exams and completed online over a @midterm-hours period. -In-person lectures will be recorded and posted shortly after each -class. +@bold{Contents:} -@section{COVID-19} +@local-table-of-contents[] -In the Fall of 2021, we are still in the midst of the global COVID-19 -pandemic. Although this semester marks a return to in-person -instruction, a number of changes have been made by the university in -order to anticipate and accomodate this situation. Please review the -Provost's FAQ on @link["https://provost.umd.edu/node/4243"]{Fall 2021 -Instruction}. +@section{Policies and Resources for Undergraduate Courses} -@section{Mask Policy} +It is our shared responsibility to know and abide by the University of +Maryland's policies that relate to all courses, which include topics +like: -@bold{Face coverings over the nose and mouth are required while you -are indoors at all times.} There are no exceptions when it comes to -classrooms, laboratories, and campus offices. Students not wearing a -mask will be given a warning and asked to wear one, or will be asked -to leave the room immediately. Students who have additional issues -with the mask expectation after a first warning will be referred to -the Office of Student Conduct for failure to comply with a directive -of University officials. +@itemlist[ +@item{Academic integrity} +@item{Student and instructor conduct} +@item{Accessibility and accommodations} +@item{Attendance and excused absences} +@item{Grades and appeals} +@item{Copyright and intellectual property} +] +Please visit +@link["https://www.ugst.umd.edu/courserelatedpolicies.html"]{https://www.ugst.umd.edu/courserelatedpolicies.html} +for the Office of Undergraduate Studies' full list of campus-wide +policies and follow up with the instructor if you have questions. + +@section{Course Guidelines} + +@bold{Names/Pronouns and Self-Identifications:} The University of +Maryland recognizes the importance of a diverse student body, and we +are committed to fostering inclusive and equitable classroom +environments. We invite you, if you wish, to tell us how you want to +be referred to in this class, both in terms of your name and your +pronouns (he/him, she/her, they/them, etc.). Keep in mind that the +pronouns someone uses are not necessarily indicative of their gender +identity. Visit @link["https://trans.umd.edu"]{https://trans.umd.edu} +to learn more. + +Additionally, it is your choice whether to disclose how you identify +in terms of your gender, race, class, sexuality, religion, and +dis/ability, among all aspects of your identity (e.g., should it come +up in classroom conversation about our experiences and perspectives) +and should be self-identified, not presumed or imposed. Course staff +will do their best to address and refer to all students accordingly, +and we ask you to do the same for all of your fellow Terps. + +@bold{Communication with Instructor:} + +Email: If you need to reach out and communicate with @prof1, +please email at @|prof1-email|. Please DO NOT email +questions that are easily found in the syllabus or on ELMS (i.e. When +is this assignment due? How much is it worth? etc.) but please DO +reach out about personal, academic, and intellectual +concerns/questions. + +ELMS: IMPORTANT announcements will be sent via ELMS messaging. You +must make sure that your email & announcement notifications (including +changes in assignments and/or due dates) are enabled in ELMS so you do +not miss any messages. You are responsible for checking your email +and Canvas/ELMS inbox with regular frequency. + +@bold{Communication with Peers:} + +With a diversity of perspectives and experience, we may find ourselves +in disagreement and/or debate with one another. As such, it is +important that we agree to conduct ourselves in a professional manner +and that we work together to foster and preserve a virtual classroom +environment in which we can respectfully discuss and deliberate +controversial questions. We encourage you to confidently exercise your +right to free speech—bearing in mind, of course, that you will be +expected to craft and defend arguments that support your +position. Keep in mind, that free speech has its limit and this course +is NOT the space for hate speech, harassment, and derogatory +language. We will make every reasonable attempt to create an +atmosphere in which each student feels comfortable voicing their +argument without fear of being personally attacked, mocked, demeaned, +or devalued. + +Any behavior (including harassment, sexual harassment, and racially +and/or culturally derogatory language) that threatens this atmosphere +will not be tolerated. Please alert the instructor immediately if you +feel threatened, dismissed, or silenced at any point during the +semester and/or if your engagement in discussion has been in some way +hindered by the learning environment. + +@;{HERE} @section{Office Hours} -Office hours will be held on @link[@discord]{this discord -server}. Make sure that your 'nickname' is set to something appropriate for -class. +Office hours will be held online and in-person. Details TBD. @;{Please make sure that you fill out @link["https://docs.google.com/spreadsheets/d/1sDCpekBHIGjVSuGDsabPb74wZ5nHA_sTLvIPOzTUQ4k/edit?usp=sharing"]{ @@ -75,9 +160,9 @@ up repeatedly, the staff can make an announcement that addresses the concern for the entire class. Lastly, it helps the course staff keep an eye on what topics might need more attention.} -The discord server is there for you to organize as a class, ask questions of +@;{The discord server is there for you to organize as a class, ask questions of each other, and to get help from staff. Its main purpose is as a vehicle for -office hours. That said, feel free to use the discord for discussion. I (DVH) +office hours. That said, feel free to use the discord for discussion. I (@prof-initials) will check periodically, but if you would like to ask a question directly to the course staff, office hours and email remain the prioritized forms of communication. @@ -85,7 +170,7 @@ communication. There is a channel '#course-discussion' that is meant for discussion/questions/help regarding the material of the course, make sure that you keep that channel free from noise so that other students and course staff -can easily see what issues are being brought up. +can easily see what issues are being brought up.} @section{Topics} @@ -105,54 +190,54 @@ of the course: @item{Language design} @item{Advanced topics in compilation}] -@section{Grading} +@section{Grades} + +All assessment scores will be posted on the course +@link[elms-url]{ELMS} page. -Grades will be maintained on @link[elms-url]{ELMS}. +Late work will not be accepted for course credit so please plan to +have it submitted well before the scheduled deadline. -You are responsible for all material discussed in lecture and posted -on the class web page, including announcements, deadlines, policies, -etc. +Any formal grade disputes must be submitted in writing and within one +week of receiving the grade. Final letter grades are assigned based +on the percentage of total assessment points earned. To be fair to +everyone I have to establish clear standards and apply them +consistently, so please understand that being close to a cutoff is not +the same as making the cut (89.99 ≠ 90.00). It would be unethical to +make exceptions for some and not others. Your final course grade will be determined according to the following percentages: -@(define grades:m1 (list @elem{Midterm, @m1-date} "10%")) -@(define grades:f (list @elem{Final Exam, @final-date} "20%")) - -@(define (make-grade-component-table . entries) - @tabular[#:style 'boxed - #:sep @hspace[1] - (list* (list @bold{Component} @bold{Percentage}) entries)]) - - -@(define exam-table - @make-grade-component-table[ - @grades:m1 - @grades:f]) - - @make-grade-component-table[ (list "Assignments" "45%") (list @elem{Quizzes & surveys} "15%") (list "Midterms (2)" "25%") (list "Final project" "15%")] +Final letter grades will be assigned based on the following cutoff +table: + +@tabular[#:style 'boxed #:sep @hspace[1] + (list (list "A+" "97%" "C+" "77%" "D+" "67%" " " " ") + (list "A" "94%" "C" "74%" "D" "64%" "F" "<60%") + (list "A-" "90%" "C-" "70%" "D-" "60%" " " " "))] + @section[#:tag "syllabus-videos"]{Videos} -Videos will be a core part of this course. There are two types of -videos: (1) short, pre-recorded video lectures which will be made -available before each class, and (2) screen and audio captures of each -in-person lecture. +Lectures will be recorded and posted to ELMS shortly after every +class. There are also prepared videos available covering the +material. -These videos will be made available as the course progresses. If there -is ever any issue with accessing these videos, let the instructor know -as soon as possible. +These videos will be made available as the course +progresses. If there is ever any issue with accessing these videos, +let the instructor know as soon as possible. @section[#:tag "syllabus-assignments"]{Assignments} -There will be several programming @secref{Assignments}, often with full week +There will be several programming @secref{Assignments}, often with a full week given for completion and submission (e.g. if it assigned on a Tuesday it will be due the following Tuesday at 11:59pm EST unless otherwise noted). @@ -218,7 +303,7 @@ will be provided during office hours. Office hours for the instructional staff will be posted on the course web page. Additional assistance will provided via discussion on -@link[@discord]{Discord}. You may use this forum to ask general +@link[@piazza]{Piazza}. You may use this forum to ask general questions of interest to the class as a whole, e.g., administrative issues or problem set clarification questions. The course staff will monitor it on a daily basis, but do not expect immediate answers to @@ -226,15 +311,15 @@ questions. Please do not post publicly any information that would violate the university academic integrity policy (e.g., problem set code). -Discord allows students to send private questions that are only +@;{Discord allows students to send private questions that are only visible to instructors. Please use this feature if you wish to ask -specific questions concerning your assignment solutions. +specific questions concerning your assignment solutions.} Personal e-mail to TAs should be reserved for issues that cannot be handled by the above methods. Important announcements will be made in class or on the class web -page, and via Discord. +page, and via Piazza. @section{Excused Absences} @@ -329,12 +414,6 @@ All arrangements for exam accommodations as a result of disability three business days prior to the exam date; later requests (including retroactive ones) will be refused. -@section{University of Maryland Policies for Undergraduate Students} - -Please read the university's guide on -@link["https://www.ugst.umd.edu/courserelatedpolicies.html"]{Course -Related Policies}, which provides you with resources and information -relevant to your participation in a UMD course. @section{Academic Integrity} @@ -464,3 +543,9 @@ syllabus. Portions of the course materials are based on material developed by Ranjit Jhala and Joe Gibbs Politz. + +We gratefully acknowledge the work of past CMSC 430 TAs William Chung, +Pierce Darragh, Justin Frank, Vyas Gupta, Sankha Narayan Guria, Tasnim +Kabir, John Kastner, Yiyun Liu, Dhruv Maniktala, Christopher Maxey, +Deena Postol, Ivan Quiles-Rodriguez, Benjamin Quiring, Temur +Saidkhodjaev, Matvey Stepanov, Alex Taber. diff --git a/www/utils.rkt b/www/utils.rkt index 8457760f..fb5bb611 100644 --- a/www/utils.rkt +++ b/www/utils.rkt @@ -1,6 +1,5 @@ - #lang racket -(provide exercise float-right panopto-vid shell) +(provide exercise float-right panopto-vid shell shell-result) (require scribble/base scribble/core scribble/html-properties redex/pict) @@ -47,9 +46,12 @@ (parameterize ([current-output-port o] [current-error-port e]) (set! r (proc))))))))) - (unless r (error (string-append os es))) + ; (unless r (error (string-append os es))) (string-append os es)) +(define (shell-result c) + (with-output-to-string/err (λ () (system #:set-pwd? #t c)))) + (define (shell . cs) (match cs ['() ""] diff --git a/ziggy/info.rkt b/ziggy/info.rkt new file mode 100644 index 00000000..9866a55e --- /dev/null +++ b/ziggy/info.rkt @@ -0,0 +1,8 @@ +#lang info +(define version "1.0") +(define collection 'use-pkg-name) +(define compile-omit-paths (list "src")) +(define test-omit-paths (list "src/test")) +(define deps (list "base" "rackunit" + "git+https://github.com/dvanhorn/crook.git?path=#main")) + diff --git a/ziggy/src/Makefile b/ziggy/src/Makefile new file mode 100644 index 00000000..f482f0f8 --- /dev/null +++ b/ziggy/src/Makefile @@ -0,0 +1,2 @@ +runtime.o: + touch runtime.o # this is a dummy runtime for Ziggy diff --git a/ziggy/src/ast.rkt b/ziggy/src/ast.rkt new file mode 100644 index 00000000..6de4c2de --- /dev/null +++ b/ziggy/src/ast.rkt @@ -0,0 +1,84 @@ +#lang crook +{:= A B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(provide {:> A} Lit {:> E0} Prim0 {:> B} Prim1 {:> F} Prim2 {:> H1} Prim3 + {:> C D0} IfZero {:> D0} If {:> E0} Eof {:> E0} Begin {:> F} Let + {:> F} Var {:> H0} Empty {:> I} Prog {:> I} Defn {:> I} App + {:> K} Match {:> K} Box {:> K} Cons {:> K} Conj) +;; + +{:> I} ;; type Prog = (Prog (Listof Defn) Expr) +{:> I} (struct Prog (ds e) #:prefab) + +{:> I} ;; type Defn = (Defn Id (Listof Id) Expr) +{:> I} (struct Defn (f xs e) #:prefab) + +{:> A D0} ;; type Expr = (Lit Integer) +{:> D0} ;; type Expr = (Lit Datum) +{:> E0} ;; | (Eof) +{:> H0} ;; | (Empty) +{:> E0} ;; | (Prim0 Op0) +{:> B} ;; | (Prim1 Op1 Expr) +{:> F} ;; | (Prim2 Op2 Expr Expr) +{:> H1} ;; | (Prim3 Op3 Expr Expr Expr) +{:> C D0} ;; | (IfZero Expr Expr Expr) +{:> D0} ;; | (If Expr Expr Expr) +{:> D0.A D1} + ;; | (Cond [Listof CondClause] Expr) +{:> D0.A D1} + ;; | (Case Expr [Listof CaseClause] Expr) +{:> F} ;; | (Let Id Expr Expr) +{:> F} ;; | (Var Id) +{:> I} ;; | (App Id (Listof Expr)) +{:> K} ;; | (Match Expr (Listof Pat) (Listof Expr)) + +{:> D0.A D1} +;; type CondClause = (Clause Expr Expr) +{:> D0.A D1} +;; type CaseClause = (Clause [Listof Datum] Expr) + +{:> F} ;; type Id = Symbol +{:> D0} ;; type Datum = Integer +{:> D0} ;; | Boolean +{:> D1} ;; | Character +{:> H1} ;; | String +{:> E0} ;; type Op0 = 'read-byte | 'peek-byte | 'void +{:> B} ;; type Op1 = 'add1 | 'sub1 +{:> D0} ;; | 'zero? +{:> D0.A D1} + ;; | 'abs | '- | 'not +{:> D1} ;; | 'char? | 'integer->char | 'char->integer +{:> E0} ;; | 'write-byte | 'eof-object? +{:> H0} ;; | 'box | 'car | 'cdr | 'unbox +{:> H0} ;; | 'empty? | 'cons? | 'box? +{:> H1} ;; | 'vector? | vector-length +{:> H1} ;; | 'string? | string-length +{:> F} ;; type Op2 = '+ | '- | '< | '= +{:> H0} ;; | eq? | 'cons +{:> H1} ;; | 'make-vector | 'vector-ref +{:> H1} ;; | 'make-string | 'string-ref +{:> H1} ;; type Op3 = 'vector-set! +{:> K} ;; type Pat = (Var Id) +{:> K} ;; | (Lit Datum) +{:> K} ;; | (Box Pat) +{:> K} ;; | (Cons Pat Pat) +{:> K} ;; | (Conj Pat Pat) + +{:> E0} (struct Eof () #:prefab) +{:> H0} (struct Empty () #:prefab) +{:> A D0} (struct Lit (i) #:prefab) +{:> D0} (struct Lit (d) #:prefab) +{:> E0} (struct Prim0 (p) #:prefab) +{:> B} (struct Prim1 (p e) #:prefab) +{:> F} (struct Prim2 (p e1 e2) #:prefab) +{:> H1} (struct Prim3 (p e1 e2 e3) #:prefab) +{:> C D0} (struct IfZero (e1 e2 e3) #:prefab) +{:> D0} (struct If (e1 e2 e3) #:prefab) +{:> E0} (struct Begin (e1 e2) #:prefab) +{:> F} (struct Let (x e1 e2) #:prefab) +{:> F} (struct Var (x) #:prefab) +{:> I} (struct App (f es) #:prefab) +{:> K} (struct Match (e ps es) #:prefab) + +{:> K} (struct Box (p) #:prefab) +{:> K} (struct Cons (p1 p2) #:prefab) +{:> K} (struct Conj (p1 p2) #:prefab) diff --git a/ziggy/src/build-runtime.rkt b/ziggy/src/build-runtime.rkt new file mode 100644 index 00000000..33284803 --- /dev/null +++ b/ziggy/src/build-runtime.rkt @@ -0,0 +1,14 @@ +#lang crook +{:= E0 E1 F H0 H1 I J K} +(provide runtime-path) + +(require racket/runtime-path) +(define-runtime-path here ".") + +(unless (system (string-append "make -C '" + (path->string (normalize-path here)) + "' runtime.o")) + (error 'build-runtime "could not build runtime")) + +(define runtime-path + (normalize-path (build-path here "runtime.o"))) diff --git a/ziggy/src/compile-ops.rkt b/ziggy/src/compile-ops.rkt new file mode 100644 index 00000000..5bd6f864 --- /dev/null +++ b/ziggy/src/compile-ops.rkt @@ -0,0 +1,385 @@ +#lang crook +{:= B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(provide {:> E0} compile-op0 compile-op1 {:> F} compile-op2 {:> H1} compile-op3 {:> F} pad-stack) +(require "ast.rkt") +{:> D0} (require "types.rkt") +(require a86/ast) + +(define rax 'rax) +{:> H1} (define eax 'eax) {:> H1} ; 32-bit load/store +{:> H0} (define rbx 'rbx) {:> H0} ; heap +{:> E0} (define rdi 'rdi) {:> E0} ; arg +{:> F} (define r8 'r8) {:> F} ; scratch in op2 +{:> D0} (define r9 'r9) {:> E0} ; scratch +{:> H1} (define r10 'r10) {:> H1} ; scratch + +{:> F} (define r15 'r15) {:> F} ; stack pad (non-volatile) +{:> F} (define rsp 'rsp) {:> F} ; stack + +{:> E0} ;; Op0 -> Asm +{:> E0} +(define (compile-op0 p) + (match p + ['void (seq (Mov rax (value->bits (void))))] + ['read-byte (seq {:> F} pad-stack (Call 'read_byte) {:> F} unpad-stack)] + ['peek-byte (seq {:> F} pad-stack (Call 'peek_byte) {:> F} unpad-stack)])) + +;; Op1 -> Asm +(define (compile-op1 p) + (match p + {:> B D0} ['add1 (Add rax 1)] + {:> B D0} ['sub1 (Sub rax 1)] + {:> D0 E1} ['add1 (Add rax (value->bits 1))] + {:> E1} ['add1 + (seq (assert-integer rax) + (Add rax (value->bits 1)))] + {:> D0 E1} ['sub1 (Sub rax (value->bits 1))] + {:> E1} ['sub1 + (seq (assert-integer rax) + (Sub rax (value->bits 1)))] + {:> D0} ['zero? + {:> D0 D1} + (seq (Cmp rax 0) + (Mov rax (value->bits #f)) + (Mov r9 (value->bits #t)) + (Cmove rax r9)) + {:> D1} + (seq {:> E1} (assert-integer rax) + (Cmp rax 0) + if-equal)] + {:> D1} ['char? + (seq (And rax mask-char) + (Cmp rax type-char) + if-equal)] + {:> D1} ['char->integer + (seq {:> E1} (assert-char rax) + (Sar rax char-shift) + (Sal rax int-shift))] + {:> D1} ['integer->char + (seq {:> E1} (assert-codepoint) + (Sar rax int-shift) + (Sal rax char-shift) + (Xor rax type-char))] + {:> E0} ['eof-object? + (seq (Cmp rax (value->bits eof)) + if-equal)] + {:> E0} ['write-byte + (seq {:> E1} assert-byte + {:> F} pad-stack + (Mov rdi rax) + (Call 'write_byte) + {:> F} unpad-stack)] + + {:> H0} ['box + (seq (Mov (Offset rbx 0) rax) ; memory write + (Mov rax rbx) ; put box in rax + (Or rax type-box) ; tag as a box + (Add rbx 8))] + + {:> H0} ['unbox + (seq (assert-box rax) + (Xor rax type-box) + (Mov rax (Offset rax 0)))] + {:> H0} ['car + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 8)))] + {:> H0} ['cdr + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 0)))] + + {:> H0} ['empty? (seq (Cmp rax (value->bits '())) if-equal)] + {:> H0} ['cons? (type-pred ptr-mask type-cons)] + {:> H0} ['box? (type-pred ptr-mask type-box)] + {:> H1} ['vector? (type-pred ptr-mask type-vect)] + {:> H1} ['string? (type-pred ptr-mask type-str)] + {:> H1} ['vector-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-vector rax) + (Xor rax type-vect) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))] + {:> H1} + ['string-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-string rax) + (Xor rax type-str) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))])) + + +{:> F} ;; Op2 -> Asm +{:> F} +(define (compile-op2 p) + (match p + ['+ + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Add rax r8))] + ['- + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Sub r8 rax) + (Mov rax r8))] + ['< + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + if-lt)] + ['= + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + if-equal)] + {:> H0} + ['cons + (seq (Mov (Offset rbx 0) rax) + (Pop rax) + (Mov (Offset rbx 8) rax) + (Mov rax rbx) + (Or rax type-cons) + (Add rbx 16))] + {:> H0} + ['eq? + (seq (Pop r8) + (Cmp rax r8) + if-equal)] + {:> H1} + ['make-vector ;; size value + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) ;; r8 = size + (assert-natural r8) + (Cmp r8 0) ; special case empty vector + (Je empty) + + (Mov r9 rbx) + (Or r9 type-vect) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Label loop) + (Mov (Offset rbx 0) rax) + (Add rbx 8) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-vect) + (Label done)))] + {:> H1} + ['vector-ref ; vector index + (seq (Pop r8) + (assert-vector r8) + (assert-integer rax) + (Cmp r8 type-vect) + (Je 'err) ; special case for empty vector + (Cmp rax 0) + (Jl 'err) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'err) + (Sal rax 3) + (Add r8 rax) + (Mov rax (Offset r8 8)))] + {:> H1} + ['make-string + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (assert-char rax) + (Cmp r8 0) ; special case empty string + (Je empty) + + (Mov r9 rbx) + (Or r9 type-str) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Sar rax char-shift) + + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd + + (Label loop) + (Mov (Offset rbx 0) eax) + (Add rbx 4) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-str) + (Label done)))] + {:> H1} + ['string-ref + (seq (Pop r8) + (assert-string r8) + (assert-integer rax) + (Cmp r8 type-str) + (Je 'err) ; special case for empty string + (Cmp rax 0) + (Jl 'err) + (Xor r8 type-str) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'err) + (Sal rax 2) + (Add r8 rax) + (Mov 'eax (Offset r8 8)) + (Sal rax char-shift) + (Or rax type-char))])) + +{:> H1} ;; Op3 -> Asm +{:> H1} +(define (compile-op3 p) + (match p + ['vector-set! + (seq (Pop r10) + (Pop r8) + (assert-vector r8) + (assert-integer r10) + (Cmp r10 0) + (Jl 'err) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar r10 int-shift) ; r10 = index + (Sub r9 1) + (Cmp r9 r10) + (Jl 'err) + (Sal r10 3) + (Add r8 r10) + (Mov (Offset r8 8) rax) + (Mov rax (value->bits (void))))])) + + +{:> D1} ;; -> Asm +{:> D1} ;; set rax to #t or #f if comparison flag is equal +{:> D1} +(define if-equal + (seq (Mov rax (value->bits #f)) + (Mov r9 (value->bits #t)) + (Cmove rax r9))) + +{:> F} ;; -> Asm +{:> F} ;; set rax to #t or #f if comparison flag is less than +{:> F} +(define if-lt + (seq (Mov rax (value->bits #f)) + (Mov r9 (value->bits #t)) + (Cmovl rax r9))) + +{:> E1} +(define (assert-type mask type) + (λ (arg) + (seq (Mov r9 arg) + (And r9 mask) + (Cmp r9 type) + (Jne 'err)))) + +{:> E1} +(define (type-pred mask type) + (seq (And rax mask) + (Cmp rax type) + if-equal)) + +{:> E1} +(define assert-integer + (assert-type mask-int type-int)) +{:> E1} +(define assert-char + (assert-type mask-char type-char)) +{:> H0} +(define assert-box + (assert-type ptr-mask type-box)) +{:> H0} +(define assert-cons + (assert-type ptr-mask type-cons)) +{:> H1} +(define assert-vector + (assert-type ptr-mask type-vect)) +{:> H1} +(define assert-string + (assert-type ptr-mask type-str)) + +{:> E1} +(define (assert-codepoint) + (let ((ok (gensym))) + (seq (assert-integer rax) + (Cmp rax (value->bits 0)) + (Jl 'err) + (Cmp rax (value->bits 1114111)) + (Jg 'err) + (Cmp rax (value->bits 55295)) + (Jl ok) + (Cmp rax (value->bits 57344)) + (Jg ok) + (Jmp 'err) + (Label ok)))) + +{:> E1} +(define assert-byte + (seq (assert-integer rax) + (Cmp rax (value->bits 0)) + (Jl 'err) + (Cmp rax (value->bits 255)) + (Jg 'err))) + +{:> H1} +(define (assert-natural r) + (seq (assert-integer r) + (Cmp r (value->bits 0)) + (Jl 'err))) + +{:> F} ;; Asm +{:> F} ;; Dynamically pad the stack to be aligned for a call +{:> F} +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +{:> F} ;; Asm +{:> F} ;; Undo the stack alignment after a call +{:> F} +(define unpad-stack + (seq (Add rsp r15))) diff --git a/ziggy/src/compile-stdin.rkt b/ziggy/src/compile-stdin.rkt new file mode 100644 index 00000000..1662cdd6 --- /dev/null +++ b/ziggy/src/compile-stdin.rkt @@ -0,0 +1,14 @@ +#lang crook +{:= A B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(provide main) +(require "parse.rkt") +(require "compile.rkt") +{:> I} (require "read-all.rkt") +(require a86/printer) + +;; -> Void +;; Compile contents of stdin, +;; emit asm code on stdout +(define (main) + (read-line) ; ignore #lang racket line + (asm-display (compile {:> A I} (parse (read)) {:> I} (apply parse (read-all))))) diff --git a/ziggy/src/compile.rkt b/ziggy/src/compile.rkt new file mode 100644 index 00000000..32ddfbe8 --- /dev/null +++ b/ziggy/src/compile.rkt @@ -0,0 +1,406 @@ +#lang crook +{:= A B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(provide (all-defined-out)) +(require "ast.rkt") +{:> B} (require "compile-ops.rkt") +{:> D0} (require "types.rkt") +(require a86/ast) + +(define rax 'rax) +{:> H0} (define rbx 'rbx) {:> H0} ; heap +{:> E0} (define rsp 'rsp) {:> E0} ; stack +{:> H0} (define rdi 'rdi) {:> H0} ; arg +{:> J} (define r8 'r8) {:> J} ; scratch +{:> F} (define r15 'r15) {:> F} ; stack pad (non-volatile) + +{:> A I} ;; Expr -> Asm +{:> A I} +(define (compile e) + (prog (Global 'entry) + {:> E0} (Extern 'peek_byte) + {:> E0} (Extern 'read_byte) + {:> E0} (Extern 'write_byte) + {:> E1} (Extern 'raise_error) + (Label 'entry) + {:> E0 F} (Sub rsp 8) + {:> A F} (compile-e e) + {:> E0 F} (Add rsp 8) + {:> F} (Push r15) {:> F} ; save callee-saved register + {:> H0} (Push rbx) + {:> H0} (Mov rbx rdi) {:> H0} ; recv heap pointer + {:> F} (compile-e e '()) + {:> H0} (Pop rbx) + {:> F} (Pop r15) {:> F} ; restore callee-save register + (Ret) + {:> E1} ;; Error handler + {:> E1} (Label 'err) + {:> F} pad-stack + {:> E1} (Call 'raise_error))) + +{:> I} ;; Prog -> Asm +{:> I} +(define (compile p) + (match p + [(Prog ds e) + (prog (Global 'entry) + (Extern 'peek_byte) + (Extern 'read_byte) + (Extern 'write_byte) + (Extern 'raise_error) + (Label 'entry) + (Push rbx) ; save callee-saved register + (Push r15) + (Mov rbx rdi) ; recv heap pointer + (compile-e e '() {:> J} #f) + (Pop r15) ; restore callee-save register + (Pop rbx) + (Ret) + (compile-defines ds) + (Label 'err) + pad-stack + (Call 'raise_error))])) + +{:> I} ;; [Listof Defn] -> Asm +{:> I} +(define (compile-defines ds) + (match ds + ['() (seq)] + [(cons d ds) + (seq (compile-define d) + (compile-defines ds))])) + +{:> I} ;; Defn -> Asm +{:> I} +(define (compile-define d) + (match d + [(Defn f xs e) + (seq (Label (symbol->label f)) + (compile-e e (reverse xs) {:> J} #t) + (Add rsp (* 8 (length xs))) ; pop args + (Ret))])) + +{:> F} ;; type CEnv = (Listof [Maybe Id]) + +{:> A F} ;; Expr -> Asm +{:> F J} ;; Expr CEnv -> Asm +{:> J} ;; Expr CEnv Boolean -> Asm +(define (compile-e e {:> F} c {:> J} t?) + (match e + {:> A D0} + [(Lit i) (seq (Mov rax i))] + {:> D0} + [(Lit d) (compile-value d)] + {:> E0} + [(Eof) (compile-value eof)] + {:> H0} + [(Empty) (compile-value '())] + {:> F} + [(Var x) (compile-variable x c)] + {:> E0} + [(Prim0 p) (compile-prim0 p)] + {:> B} + [(Prim1 p e) (compile-prim1 p e {:> F} c)] + {:> F} + [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] + {:> H1} + [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] + {:> C D0} + [(IfZero e1 e2 e3) + (compile-ifzero e1 e2 e3)] + {:> D0} + [(If e1 e2 e3) + (compile-if e1 e2 e3 {:> F} c {:> J} t?)] + {:> E0} + [(Begin e1 e2) + (compile-begin e1 e2 {:> F} c {:> J} t?)] + {:> F} + [(Let x e1 e2) + (compile-let x e1 e2 c {:> J} t?)] + {:> I} + [(App f es) + (compile-app f es c {:> J} t?)] + {:> K} + [(Match e ps es) (compile-match e ps es c t?)])) + +{:> D0} ;; Value -> Asm +{:> D0} +(define (compile-value v) + {:> D0 H1} + (seq (Mov rax (value->bits v))) + {:> H1} + (cond [(string? v) (compile-string v)] + [else (Mov rax (value->bits v))])) + +{:> F} ;; Id CEnv -> Asm +{:> F} +(define (compile-variable x c) + (let ((i (lookup x c))) + (seq (Mov rax (Offset rsp i))))) + +{:> H1} ;; String -> Asm +{:> H1} +(define (compile-string s) + (let ((len (string-length s))) + (if (zero? len) + (seq (Mov rax type-str)) + (seq (Mov rax len) + (Mov (Offset rbx 0) rax) + (compile-string-chars (string->list s) 8) + (Mov rax rbx) + (Or rax type-str) + (Add rbx + (+ 8 (* 4 (if (odd? len) (add1 len) len)))))))) + +{:> H1} ;; [Listof Char] Integer -> Asm +{:> H1} +(define (compile-string-chars cs i) + (match cs + ['() (seq)] + [(cons c cs) + (seq (Mov rax (char->integer c)) + (Mov (Offset rbx i) 'eax) + (compile-string-chars cs (+ 4 i)))])) + +{:> E0} ;; Op0 -> Asm +{:> E0} +(define (compile-prim0 p) + (compile-op0 p)) + +{:> B F} ;; Op1 Expr -> Asm +{:> F} ;; Op1 Expr CEnv -> Asm +{:> B} +(define (compile-prim1 p e {:> F} c) + (seq (compile-e e {:> F} c {:> J} #f) + (compile-op1 p))) + +{:> F} ;; Op2 Expr Expr CEnv -> Asm +{:> F} +(define (compile-prim2 p e1 e2 c) + (seq (compile-e e1 c {:> J} #f) + (Push rax) + (compile-e e2 (cons #f c) {:> J} #f) + (compile-op2 p))) + +{:> H1} ;; Op3 Expr Expr Expr CEnv -> Asm +{:> H1} +(define (compile-prim3 p e1 e2 e3 c) + (seq (compile-e e1 c {:> J} #f) + (Push rax) + (compile-e e2 (cons #f c) {:> J} #f) + (Push rax) + (compile-e e3 (cons #f (cons #f c)) {:> J} #f) + (compile-op3 p))) + + +{:> C D0} ;; Expr Expr Expr -> Asm +{:> C D0} +(define (compile-ifzero e1 e2 e3) + (let ((l1 (gensym 'ifz)) + (l2 (gensym 'ifz))) + (seq (compile-e e1) + (Cmp rax 0) + (Jne l1) + (compile-e e2) + (Jmp l2) + (Label l1) + (compile-e e3) + (Label l2)))) + +{:> D0 F} ;; Expr Expr Expr -> Asm +{:> F J} ;; Expr Expr Expr CEnv -> Asm +{:> J} ;; Expr Expr Expr CEnv Boolean -> Asm +{:> D0} +(define (compile-if e1 e2 e3 {:> F} c {:> J} t?) + (let ((l1 (gensym 'if)) + (l2 (gensym 'if))) + (seq (compile-e e1 {:> F} c {:> J} #f) + (Cmp rax (value->bits #f)) + (Je l1) + (compile-e e2 {:> F} c {:> J} t?) + (Jmp l2) + (Label l1) + (compile-e e3 {:> F} c {:> J} t?) + (Label l2)))) + +{:> E0 F} ;; Expr Expr -> Asm +{:> F J} ;; Expr Expr CEnv -> Asm +{:> J} ;; Expr Expr CEnv Boolean -> Asm +{:> E0} +(define (compile-begin e1 e2 {:> F} c {:> J} t?) + (seq (compile-e e1 {:> F} c {:> J} #f) + (compile-e e2 {:> F} c {:> J} t?))) + +{:> F J} ;; Id Expr Expr CEnv -> Asm +{:> J} ;; Id Expr Expr CEnv Boolean -> Asm +{:> F} +(define (compile-let x e1 e2 c {:> J} t?) + (seq (compile-e e1 c {:> J} #f) + (Push rax) + (compile-e e2 (cons x c) {:> J} t?) + (Add rsp 8))) + +{:> J} ;; Id [Listof Expr] CEnv Boolean -> Asm +{:> J} +(define (compile-app f es c t?) + (if t? + (compile-app-tail f es c) + (compile-app-nontail f es c))) + +{:> J} ;; Id [Listof Expr] CEnv -> Asm +{:> J} +(define (compile-app-tail f es c) + (seq (compile-es es c) + (move-args (length es) (length c)) + (Add rsp (* 8 (length c))) + (Jmp (symbol->label f)))) + +{:> J} ;; Integer Integer -> Asm +{:> J} +(define (move-args i off) + (cond [(zero? off) (seq)] + [(zero? i) (seq)] + [else + (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) + (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) + (move-args (sub1 i) off))])) + +{:> I} ;; Id [Listof Expr] CEnv -> Asm +{:> I} ;; The return address is placed above the arguments, so callee pops +{:> I} ;; arguments and return address is next frame +{:> I J} +(define (compile-app f es c) + (let ((r (gensym 'ret))) + (seq (Lea rax r) + (Push rax) + (compile-es es (cons #f c)) + (Jmp (symbol->label f)) + (Label r)))) + +{:> Z:FIXME} ;; eats previous paren if we do ({:> I J} compile-app {:> J} compile-app-nontail ...) +{:> J} +(define (compile-app-nontail f es c) + (let ((r (gensym 'ret))) + (seq (Lea rax r) + (Push rax) + (compile-es es (cons #f c)) + (Jmp (symbol->label f)) + (Label r)))) + +{:> I} ;; [Listof Expr] CEnv -> Asm +{:> I} +(define (compile-es es c) + (match es + ['() '()] + [(cons e es) + (seq (compile-e e c {:> J} #f) + (Push rax) + (compile-es es (cons #f c)))])) + +{:> K} ;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +{:> K} +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c #f) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'err) + (Label done) + (Add rsp 8)))) {:> K} ; pop the saved value being matched + +{:> K} ;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +{:> K} +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +{:> K} ;; Pat Expr CEnv Symbol Bool -> Asm +{:> K} +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + (Label next))]))) + +{:> K} ;; Pat CEnv Symbol -> (list Asm CEnv) +{:> K} +(define (compile-pattern p cm next) + (match p + [(Var '_) + (list (seq) cm)] + [(Var x) + (list (seq (Push rax)) (cons x cm))] + [(Lit l) + (let ((ok (gensym))) + (list (seq (Cmp rax (value->bits l)) + (Je ok) + (Add rsp (* 8 (length cm))) + (Jmp next) + (Label ok)) + cm))] + [(Conj p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + cm2)])])] + [(Box p) + (match (compile-pattern p cm next) + [(list i1 cm1) + (let ((ok (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Je ok) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next) + (Label ok) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + cm1))])] + [(Cons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 cm2) + (let ((ok (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Je ok) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next) + (Label ok) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + cm2))])])])) + +{:> F} ;; Id CEnv -> Integer +{:> F} +(define (lookup x cenv) + (match cenv + ['() (error "undefined variable:" x)] + [(cons y rest) + (match (eq? x y) + [#t 0] + [#f (+ 8 (lookup x rest))])])) diff --git a/ziggy/src/interp-io.rkt b/ziggy/src/interp-io.rkt new file mode 100644 index 00000000..7e3340fc --- /dev/null +++ b/ziggy/src/interp-io.rkt @@ -0,0 +1,15 @@ +#lang crook +{:= E0 E1 F H0 H1 I J K} +(provide interp/io) +(require "interp.rkt") + +{:> E0 I} ;; String Expr -> (Cons Value String) +{:> I} ;; String Prog -> (Cons Value String) +{:> E0 I} ;; Interpret e with given string as input, +{:> I} ;; Interpret p with given string as input, +;; return value and collected output as string +(define (interp/io {:> E0 I} e {:> I} p input) + (parameterize ((current-output-port (open-output-string)) + (current-input-port (open-input-string input))) + (cons (interp {:> E0 I} e {:> I} p) + (get-output-string (current-output-port))))) diff --git a/ziggy/src/interp-prim.rkt b/ziggy/src/interp-prim.rkt new file mode 100644 index 00000000..84e85c47 --- /dev/null +++ b/ziggy/src/interp-prim.rkt @@ -0,0 +1,110 @@ +#lang crook +{:= B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(provide {:> E0} interp-prim0 {:> B} interp-prim1 {:> F} interp-prim2 {:> H1} interp-prim3) + +{:> E0} ;; Op0 -> Value +{:> E0} +(define (interp-prim0 op) + (match op + ['read-byte (read-byte)] + ['peek-byte (peek-byte)] + ['void (void)])) + +{:> B D0} ;; Op1 Integer -> Integer +{:> B D0} +(define (interp-prim1 op i) + (match op + ['add1 (add1 i)] + ['sub1 (sub1 i)])) + +{:> D0 E1} ;; Op1 Value -> Value +{:> D0 E1} +(define (interp-prim1 op v) + (match op + ['add1 (add1 v)] + ['sub1 (sub1 v)] + ['zero? (zero? v)] + {:> D1} + ['char? (char? v)] + {:> D1} + ['integer->char (integer->char v)] + {:> D1} + ['char->integer (char->integer v)] + {:> E0} + ['write-byte (write-byte v)] + {:> E0} + ['eof-object? (eof-object? v)])) + +{:> E1} ;; Op1 Value -> Answer +{:> E1} +(define (interp-prim1 op v) + (match (list op v) + [(list 'add1 (? integer?)) (add1 v)] + [(list 'sub1 (? integer?)) (sub1 v)] + [(list 'zero? (? integer?)) (zero? v)] + [(list 'char? v) (char? v)] + [(list 'integer->char (? codepoint?)) (integer->char v)] + [(list 'char->integer (? char?)) (char->integer v)] + [(list 'write-byte (? byte?)) (write-byte v)] + [(list 'eof-object? v) (eof-object? v)] + {:> H0} [(list 'box v) (box v)] + {:> H0} [(list 'unbox (? box?)) (unbox v)] + {:> H0} [(list 'car (? pair?)) (car v)] + {:> H0} [(list 'cdr (? pair?)) (cdr v)] + {:> H0} [(list 'empty? v) (empty? v)] + {:> H0} [(list 'cons? v) (cons? v)] + {:> H1} [(list 'box? v) (box? v)] + {:> H1} [(list 'vector? v) (vector? v)] + {:> H1} [(list 'vector-length (? vector?)) (vector-length v)] + {:> H1} [(list 'string? v) (string? v)] + {:> H1} [(list 'string-length (? string?)) (string-length v)] + [_ 'err])) + +{:> F} ;; Op2 Value Value -> Answer +{:> F} +(define (interp-prim2 op v1 v2) + (match (list op v1 v2) + [(list '+ (? integer?) (? integer?)) (+ v1 v2)] + [(list '- (? integer?) (? integer?)) (- v1 v2)] + [(list '< (? integer?) (? integer?)) (< v1 v2)] + [(list '= (? integer?) (? integer?)) (= v1 v2)] + {:> H0} [(list 'eq? v1 v2) (eq? v1 v2)] + {:> H0} [(list 'cons v1 v2) (cons v1 v2)] + {:> H1} + [(list 'make-vector (? integer?) _) + (if (<= 0 v1) + (make-vector v1 v2) + 'err)] + {:> H1} + [(list 'vector-ref (? vector?) (? integer?)) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-ref v1 v2) + 'err)] + {:> H1} + [(list 'make-string (? integer?) (? char?)) + (if (<= 0 v1) + (make-string v1 v2) + 'err)] + {:> H1} + [(list 'string-ref (? string?) (? integer?)) + (if (<= 0 v2 (sub1 (string-length v1))) + (string-ref v1 v2) + 'err)] + [_ 'err])) + +{:> H1} ;; Op3 Value Value Value -> Answer +{:> H1} +(define (interp-prim3 p v1 v2 v3) + (match (list p v1 v2 v3) + [(list 'vector-set! (? vector?) (? integer?) _) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-set! v1 v2 v3) + 'err)] + [_ 'err])) + +{:> E1} ;; Any -> Boolean +{:> E1} +(define (codepoint? v) + (and (integer? v) + (or (<= 0 v 55295) + (<= 57344 v 1114111)))) diff --git a/ziggy/src/interp-stdin.rkt b/ziggy/src/interp-stdin.rkt new file mode 100644 index 00000000..d5eb5dd0 --- /dev/null +++ b/ziggy/src/interp-stdin.rkt @@ -0,0 +1,13 @@ +#lang crook +{:= A B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(provide main) +(require "parse.rkt") +(require "interp.rkt") +{:> I} (require "read-all.rkt") + +;; -> Void +;; Parse and interpret contents of stdin, +;; print result on stdout +(define (main) + (read-line) ; ignore #lang racket line + (println (interp {:> A I} (parse (read)) {:> I} (apply parse (read-all))))) diff --git a/ziggy/src/interp.rkt b/ziggy/src/interp.rkt new file mode 100644 index 00000000..d28e7cef --- /dev/null +++ b/ziggy/src/interp.rkt @@ -0,0 +1,204 @@ +#lang crook +{:= A B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(provide interp) +{:> F} (provide interp-env) +{:> K} (provide interp-match-pat) +(require "ast.rkt") +{:> B} (require "interp-prim.rkt") + +{:> D0} ;; type Value = +{:> D0} ;; | Integer +{:> D0} ;; | Boolean +{:> D1} ;; | Character +{:> E0} ;; | Eof +{:> E0} ;; | Void +{:> H0} ;; | '() +{:> H0} ;; | (cons Value Value) +{:> H0} ;; | (box Value) +{:> H1} ;; | (string Character ...) +{:> H1} ;; | (vector Value ...) + +{:> F} ;; type Env = (Listof (List Id Value)) + +{:> A D0} ;; Expr -> Integer +{:> D0 E1} ;; Expr -> Value +{:> E1 I} ;; Expr -> Answer +{:> I} ;; Prog -> Answer +(define (interp {:> A I} e {:> I} p) + {:> A F} + (match e + {:> A D0} [(Lit i) i] + {:> D0} [(Lit d) d] + {:> E0} [(Eof) eof] + {:> E0} [(Prim0 p) + (interp-prim0 p)] + {:> B E1} [(Prim1 p e) + (interp-prim1 p (interp e))] + {:> E1} [(Prim1 p e) + (match (interp e) + ['err 'err] + [v (interp-prim1 p v)])] + {:> C D0} [(IfZero e1 e2 e3) + (if (zero? (interp e1)) + (interp e2) + (interp e3))] + {:> D0 E1} [(If e1 e2 e3) + (if (interp e1) + (interp e2) + (interp e3))] + {:> E1} [(If e1 e2 e3) + (match (interp e1) + ['err 'err] + [v (if v + (interp e2) + (interp e3))])] + {:> E0 E1} [(Begin e1 e2) + (begin (interp e1) + (interp e2))] + {:> E1} [(Begin e1 e2) + (match (interp e1) + ['err 'err] + [v (interp e2)])]) + {:> F I} + (interp-env e '()) + {:> I} + (match p + [(Prog ds e) + (interp-env e '() ds)])) + +{:> F} ;; Expr Env -> Answer +{:> F} +(define (interp-env e r {:> I} ds) + (match e + [(Lit d) d] + [(Eof) eof] + {:> H0} + [(Empty) '()] + [(Var x) (lookup r x)] + [(Prim0 p) (interp-prim0 p)] + [(Prim1 p e) + (match (interp-env e r {:> I} ds) + ['err 'err] + [v (interp-prim1 p v)])] + [(Prim2 p e1 e2) + (match (interp-env e1 r {:> I} ds) + ['err 'err] + [v1 (match (interp-env e2 r {:> I} ds) + ['err 'err] + [v2 (interp-prim2 p v1 v2)])])] + {:> H1} + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r {:> I} ds) + ['err 'err] + [v1 (match (interp-env e2 r {:> I} ds) + ['err 'err] + [v2 (match (interp-env e3 r {:> I} ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] + [(If e0 e1 e2) + (match (interp-env e0 r {:> I} ds) + ['err 'err] + [v + (if v + (interp-env e1 r {:> I} ds) + (interp-env e2 r {:> I} ds))])] + [(Begin e1 e2) + (match (interp-env e1 r {:> I} ds) + ['err 'err] + [v (interp-env e2 r {:> I} ds)])] + [(Let x e1 e2) + (match (interp-env e1 r {:> I} ds) + ['err 'err] + [v (interp-env e2 (ext r x v) {:> I} ds)])] + {:> I} + [(App f es) + (match (interp-env* es r ds) + ['err 'err] + [vs + (match (defns-lookup ds f) + [(Defn f xs e) + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (zip xs vs) ds) + 'err)])])] + + {:> K} + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +{:> I} ;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +{:> I} +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (match (interp-env* es r ds) + ['err 'err] + [vs (cons v vs)])])])) + +{:> K} ;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +{:> K} +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +{:> K} ;; Pat Value Env -> [Maybe Env] +{:> K} +(define (interp-match-pat p v r) + (match p + [(Var '_) r] + [(Var x) (ext r x v)] + [(Lit l) (and (eqv? l v) r)] + [(Box p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(Cons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(Conj p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) + +{:> I} ;; Defns Symbol -> Defn +{:> I} +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) + +{:> I} +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) + +{:> F} ;; Env Id -> Value +{:> F} +(define (lookup r x) + (match r + [(cons (list y val) r) + (if (symbol=? x y) + val + (lookup r x))])) + +{:> F} ;; Env Id Value -> Env +{:> F} +(define (ext r x v) + (cons (list x v) r)) diff --git a/ziggy/src/main.rkt b/ziggy/src/main.rkt new file mode 100644 index 00000000..f819fc92 --- /dev/null +++ b/ziggy/src/main.rkt @@ -0,0 +1,13 @@ +#lang crook +{:= A B C D0 D1 E0 E1 F H0 H1 I J} +(require "ast.rkt") +(require "parse.rkt") +(require "interp.rkt") +(require "compile.rkt") +(require "run.rkt") +(provide (all-from-out "ast.rkt")) +(provide (all-from-out "parse.rkt")) +(provide (all-from-out "interp.rkt")) +(provide (all-from-out "compile.rkt")) +(provide (all-from-out "run.rkt")) + diff --git a/ziggy/src/parse.rkt b/ziggy/src/parse.rkt new file mode 100644 index 00000000..d337873f --- /dev/null +++ b/ziggy/src/parse.rkt @@ -0,0 +1,145 @@ +#lang crook +{:= A B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(provide parse {:> I} parse-e {:> I} parse-define) +(require "ast.rkt") + +{:> A I} ;; S-Expr -> Expr +{:> A I} +(define (parse s) + (match s + {:> E0} + ['eof (Eof)] + {:> A D0} + [(? exact-integer?) (Lit s)] + {:> D0} + [(? datum?) (Lit s)] + {:> F} + [(? symbol?) (Var s)] + {:> H0} + [(list 'quote (list)) (Empty)] + {:> E0} + [(list (? op0? o)) (Prim0 o)] + {:> B} + [(list (? op1? o) e) (Prim1 o (parse e))] + {:> F} + [(list (? op2? o) e1 e2) (Prim2 o (parse e1) (parse e2))] + {:> H1} + [(list (? op3? o) e1 e2 e3) (Prim3 o (parse e1) (parse e2) (parse e3))] + {:> E0} + [(list 'begin e1 e2) (Begin (parse e1) (parse e2))] + {:> C D0} + [(list 'if (list 'zero? e1) e2 e3) + (IfZero (parse e1) (parse e2) (parse e3))] + {:> D0} + [(list 'if e1 e2 e3) + (If (parse e1) (parse e2) (parse e3))] + {:> F} + [(list 'let (list (list (? symbol? x) e1)) e2) + (Let x (parse e1) (parse e2))] + [_ (error "Parse error")])) + +{:> I} ;; S-Expr ... -> Prog +{:> I} +(define (parse . s) + (match s + [(cons (and (cons 'define _) d) s) + (match (apply parse s) + [(Prog ds e) + (Prog (cons (parse-define d) ds) e)])] + [(cons e '()) (Prog '() (parse-e e))] + [_ (error "program parse error")])) + +{:> I} ;; S-Expr -> Defn +{:> I} +(define (parse-define s) + (match s + [(list 'define (list-rest (? symbol? f) xs) e) + (if (andmap symbol? xs) + (Defn f xs (parse-e e)) + (error "parse definition error"))] + [_ (error "Parse defn error" s)])) + +{:> I} ;; S-Expr -> Expr +{:> I} +(define (parse-e s) + (match s + [(? datum?) (Lit s)] + ['eof (Eof)] + [(? symbol?) (Var s)] + [(list 'quote (list)) (Empty)] + [(list (? op0? p0)) (Prim0 p0)] + [(list (? op1? p1) e) (Prim1 p1 (parse-e e))] + [(list (? op2? p2) e1 e2) (Prim2 p2 (parse-e e1) (parse-e e2))] + [(list (? op3? p3) e1 e2 e3) + (Prim3 p3 (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'begin e1 e2) + (Begin (parse-e e1) (parse-e e2))] + [(list 'if e1 e2 e3) + (If (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'let (list (list (? symbol? x) e1)) e2) + (Let x (parse-e e1) (parse-e e2))] + {:> K} + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] + [(cons (? symbol? f) es) + (App f (map parse-e es))] + [_ (error "Parse error" s)])) + +{:> K} ;; Expr [Listof S-Expr] +{:> K} +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])] + [_ (error "Parse match error" e ms)])) + +{:> K} ;; S-Expr -> Pat +{:> K} +(define (parse-pat p) + (match p + [(? datum?) (Lit p)] + [(? symbol?) (Var p)] + [(list 'quote (list)) (Lit '())] + [(list 'box p) + (Box (parse-pat p))] + [(list 'cons p1 p2) + (Cons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (Conj (parse-pat p1) (parse-pat p2))])) + + +{:> D0} ;; Any -> Boolean +{:> D0} +(define (datum? x) + (or (exact-integer? x) + (boolean? x) + {:> D1} + (char? x) + {:> H1} + (string? x))) + +{:> E0} ;; Any -> Boolean +{:> E0} +(define (op0? x) + (memq x '(read-byte peek-byte void))) + +{:> B} +(define (op1? x) + (memq x '(add1 sub1 {:> D0} zero? {:> D1} char? {:> D1} integer->char {:> D1} char->integer + {:> E0} write-byte {:> E0} eof-object? + {:> H0} box {:> H0} unbox {:> H0} empty? {:> H0} cons? {:> H0} box? {:> H0} car {:> H0} cdr + {:> H1} vector? {:> H1} vector-length {:> H1} string? {:> H1} string-length))) + +{:> F} +(define (op2? x) + (memq x '(+ - < = {:> H0} eq? {:> H0} cons + {:> H1} make-vector {:> H1} vector-ref {:> H1} make-string {:> H1} string-ref))) + +{:> H1} +(define (op3? x) + (memq x '(vector-set!))) diff --git a/ziggy/src/read-all.rkt b/ziggy/src/read-all.rkt new file mode 100644 index 00000000..a0a6fe31 --- /dev/null +++ b/ziggy/src/read-all.rkt @@ -0,0 +1,9 @@ +#lang crook +{:= I J K} +(provide read-all) +;; read all s-expression until eof +(define (read-all) + (let ((r (read))) + (if (eof-object? r) + '() + (cons r (read-all))))) diff --git a/ziggy/src/run-stdin.rkt b/ziggy/src/run-stdin.rkt new file mode 100644 index 00000000..12f97ba9 --- /dev/null +++ b/ziggy/src/run-stdin.rkt @@ -0,0 +1,12 @@ +#lang crook +{:= A B C D0 D1 E0 E1 F H0 H1 J K} +(provide main) +(require "parse.rkt") +(require "compile.rkt") +(require "run.rkt") + +;; -> Void +;; Compile contents of stdin and use asm-interp to run +(define (main) + (read-line) ; ignore #lang racket line + (run (compile (parse (read))))) diff --git a/ziggy/src/run.rkt b/ziggy/src/run.rkt new file mode 100644 index 00000000..88f38e1f --- /dev/null +++ b/ziggy/src/run.rkt @@ -0,0 +1,33 @@ +#lang crook +{:= A B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(require a86/interp) +{:> D0} (require "types.rkt") +{:> E0} (require "build-runtime.rkt") +(provide run {:> E0} run/io) + +{:> A D0} ;; Asm -> Integer +{:> D0 E1} ;; Asm -> Value +{:> E1} ;; Asm -> Answer +(define (run is) + {:> A D0} + (asm-interp is) + {:> D0 E0} + (bits->value (asm-interp is)) + {:> E0} + (parameterize ((current-objs (list (path->string runtime-path)))) + {:> E0 E1} + (bits->value (asm-interp is)) + {:> E1} + (match (asm-interp is) + ['err 'err] + [b (bits->value b)]))) + +{:> E0} ;; Asm String -> (cons Answer String) +{:> E0} +(define (run/io is in) + (parameterize ((current-objs (list (path->string runtime-path)))) + (match (asm-interp/io is in) + {:> E1} + [(cons 'err out) (cons 'err out)] + [(cons b out) + (cons (bits->value b) out)]))) diff --git a/ziggy/src/test/compile.rkt b/ziggy/src/test/compile.rkt new file mode 100644 index 00000000..4be3ba97 --- /dev/null +++ b/ziggy/src/test/compile.rkt @@ -0,0 +1,16 @@ +#lang crook +{:= A B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(require "../compile.rkt") +(require "../parse.rkt") +(require "../run.rkt") +(require "test-runner.rkt") + +{:> A I} +(test (λ (e) (run (compile (parse e))))) +{:> I} +(test (λ p (run (compile (apply parse p))))) + +{:> E0 I} +(test/io (λ (in e) (run/io (compile (parse e)) in))) +{:> I} +(test/io (λ (in . p) (run/io (compile (apply parse p)) in))) diff --git a/ziggy/src/test/interp.rkt b/ziggy/src/test/interp.rkt new file mode 100644 index 00000000..e5e6d9b5 --- /dev/null +++ b/ziggy/src/test/interp.rkt @@ -0,0 +1,16 @@ +#lang crook +{:= A B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(require "../interp.rkt") +{:> E0} (require "../interp-io.rkt") +(require "../parse.rkt") +(require "test-runner.rkt") + +{:> A I} +(test (λ (e) (interp (parse e)))) +{:> I} +(test (λ p (interp (apply parse p)))) + +{:> E0 I} +(test/io (λ (in e) (interp/io (parse e) in))) +{:> I} +(test/io (λ (in . p) (interp/io (apply parse p) in))) diff --git a/ziggy/src/test/test-runner.rkt b/ziggy/src/test/test-runner.rkt new file mode 100644 index 00000000..a1dadde8 --- /dev/null +++ b/ziggy/src/test/test-runner.rkt @@ -0,0 +1,360 @@ +#lang crook +{:= A B C D0 D0.A D1 E0 E1 F H0 H1 I J K} +(provide test {:> E0} test/io) +(require rackunit) + +(define (test run) + {:> A} + (begin ;; Abscond + (check-equal? (run 7) 7) + (check-equal? (run -8) -8)) + + {:> B} + (begin ;; Blackmail + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7)) + + {:> C} + (begin ;; Con + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7)) + + {:> D0} + (begin ;; Dupe + (check-equal? (run #t) #t) + (check-equal? (run #f) #f) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) + (check-equal? (run '(if #t 3 4)) 3) + (check-equal? (run '(if #f 3 4)) 4) + (check-equal? (run '(if 0 3 4)) 3) + (check-equal? (run '(zero? 4)) #f) + (check-equal? (run '(zero? 0)) #t)) + + {:> D0.A D0.A} + (begin ;; Dupe+ + (check-equal? (run '(not #t)) #f) + (check-equal? (run '(not #f)) #t) + (check-equal? (run '(not 7)) #f) + (check-equal? (run '(cond [else #t])) #t) + (check-equal? (run '(cond [(not #t) 2] [else 3])) 3) + (check-equal? (run '(cond [(if #t #t #f) 2] [else 3])) 2) + (check-equal? (run '(cond [(zero? 1) 2] [(if (not (zero? (sub1 2))) #t #f) 4] [else 3])) 4) + (check-equal? (run '(cond [#t 1] [else 2])) 1) + (check-equal? (run '(cond [1 1] [else 2])) 1) + (check-equal? (run '(case 2 [else 1])) 1) + (check-equal? (run '(case 2 [() 3] [else 1])) 1) + (check-equal? (run '(case 2 [(2) 3] [else 1])) 3) + (check-equal? (run '(case 4 [(2) 3] [else 1])) 1) + (check-equal? (run '(case 2 [(7 2) 3] [else 1])) 3) + (check-equal? (run '(case 4 [(7 2) 3] [else 1])) 1) + (check-equal? (run '(case 2 [(7 2 #t) 3] [else 1])) 3) + (check-equal? (run '(case 4 [(7 2 #t) 3] [else 1])) 1) + (check-equal? (run '(case #t [(7 2 #t) 3] [else 1])) 3) + (check-equal? (run '(case #f [(7 2 #t) 3] [else 1])) 1)) + + {:> D1} + (begin ;; Dodger + (check-equal? (run #\a) #\a) + (check-equal? (run #\b) #\b) + (check-equal? (run '(char? #\a)) #t) + (check-equal? (run '(char? #t)) #f) + (check-equal? (run '(char? 8)) #f) + (check-equal? (run '(char->integer #\a)) (char->integer #\a)) + (check-equal? (run '(integer->char 955)) #\λ)) + + {:> E0} + (begin ;; Evildoer + (check-equal? (run '(void)) (void)) + (check-equal? (run '(begin 1 2)) 2) + (check-equal? (run '(eof-object? (void))) #f)) + + {:> E1} + (begin ;; Extort + (check-equal? (run '(add1 #f)) 'err) + (check-equal? (run '(sub1 #f)) 'err) + (check-equal? (run '(zero? #f)) 'err) + (check-equal? (run '(char->integer #f)) 'err) + (check-equal? (run '(integer->char #f)) 'err) + (check-equal? (run '(integer->char -1)) 'err) + (check-equal? (run '(write-byte #f)) 'err) + (check-equal? (run '(write-byte -1)) 'err) + (check-equal? (run '(write-byte 256)) 'err) + (check-equal? (run '(begin (integer->char 97) + (integer->char 98))) + #\b)) + + {:> F} + (begin ;; Fraud + (check-equal? (run '(let ((x 7)) x)) 7) + (check-equal? (run '(let ((x 7)) 2)) 2) + (check-equal? (run '(let ((x 7)) (add1 x))) 8) + (check-equal? (run '(let ((x (add1 7))) x)) 8) + (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) + (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) + (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) + + (check-equal? (run '(let ((x 0)) + (if (zero? x) 7 8))) + 7) + (check-equal? (run '(let ((x 1)) + (add1 (if (zero? x) 7 8)))) + 9) + (check-equal? (run '(+ 3 4)) 7) + (check-equal? (run '(- 3 4)) -1) + (check-equal? (run '(+ (+ 2 1) 4)) 7) + (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) + (check-equal? (run '(let ((x (+ 1 2))) + (let ((z (- 4 x))) + (+ (+ x x) z)))) + 7) + + (check-equal? (run '(= 5 5)) #t) + (check-equal? (run '(= 4 5)) #f) + (check-equal? (run '(= (add1 4) 5)) #t) + (check-equal? (run '(< 5 5)) #f) + (check-equal? (run '(< 4 5)) #t) + (check-equal? (run '(< (add1 4) 5)) #f)) + + {:> H0} + (begin ;; Hustle + (check-equal? (run ''()) '()) + (check-equal? (run '(empty? '())) #t) + (check-equal? (run '(empty? 3)) #f) + (check-equal? (run '(empty? (cons 1 2))) #f) + (check-equal? (run '(box 1)) (box 1)) + (check-equal? (run '(box -1)) (box -1)) + (check-equal? (run '(cons 1 2)) (cons 1 2)) + (check-equal? (run '(unbox (box 1))) 1) + (check-equal? (run '(car (cons 1 2))) 1) + (check-equal? (run '(cdr (cons 1 2))) 2) + (check-equal? (run '(cons 1 '())) (list 1)) + (check-equal? (run '(let ((x (cons 1 2))) + (begin (cdr x) + (car x)))) + 1) + (check-equal? (run '(let ((x (cons 1 2))) + (let ((y (box 3))) + (unbox y)))) + 3) + (check-equal? (run '(eq? 1 1)) #t) + (check-equal? (run '(eq? 1 2)) #f) + (check-equal? (run '(eq? (cons 1 2) (cons 1 2))) #f) + (check-equal? (run '(let ((x (cons 1 2))) (eq? x x))) #t)) + + {:> H1} + (begin ;; Hoax + (check-equal? (run '(make-vector 0 0)) #()) + (check-equal? (run '(make-vector 1 0)) #(0)) + (check-equal? (run '(make-vector 3 0)) #(0 0 0)) + (check-equal? (run '(make-vector 3 5)) #(5 5 5)) + (check-equal? (run '(vector? (make-vector 0 0))) #t) + (check-equal? (run '(vector? (cons 0 0))) #f) + (check-equal? (run '(vector-ref (make-vector 0 #f) 0)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 0 4) + x))) + #(4 5 5)) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 1 4) + x))) + #(5 4 5)) + (check-equal? (run '(vector-length (make-vector 3 #f))) 3) + (check-equal? (run '(vector-length (make-vector 0 #f))) 0) + (check-equal? (run '"") "") + (check-equal? (run '"fred") "fred") + (check-equal? (run '"wilma") "wilma") + (check-equal? (run '(make-string 0 #\f)) "") + (check-equal? (run '(make-string 3 #\f)) "fff") + (check-equal? (run '(make-string 3 #\g)) "ggg") + (check-equal? (run '(string-length "")) 0) + (check-equal? (run '(string-length "fred")) 4) + (check-equal? (run '(string-ref "" 0)) 'err) + (check-equal? (run '(string-ref (make-string 0 #\a) 0)) 'err) + (check-equal? (run '(string-ref "fred" 0)) #\f) + (check-equal? (run '(string-ref "fred" 1)) #\r) + (check-equal? (run '(string-ref "fred" 2)) #\e) + (check-equal? (run '(string-ref "fred" 4)) 'err) + (check-equal? (run '(string? "fred")) #t) + (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff")) + + {:> I} + (begin ;; Iniquity + (check-equal? (run + '(define (f x) x) + '(f 5)) + 5) + (check-equal? (run + '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) + '(tri 9)) + 45) + + (check-equal? (run + '(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) + '(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + '(even? 101)) + #f) + + (check-equal? (run + '(define (map-add1 xs) + (if (empty? xs) + '() + (cons (add1 (car xs)) + (map-add1 (cdr xs))))) + '(map-add1 (cons 1 (cons 2 (cons 3 '()))))) + '(2 3 4)) + (check-equal? (run '(define (f x y) y) + '(f 1 (add1 #f))) + 'err)) + + {:> K} + (begin ;; Knock + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1))) + +{:> E0} +(define (test/io run) + (begin ;; Evildoer + (check-equal? (run "" 7) (cons 7 "")) + (check-equal? (run "" '(write-byte 97)) (cons (void) "a")) + (check-equal? (run "a" '(read-byte)) (cons 97 "")) + (check-equal? (run "b" '(begin (write-byte 97) (read-byte))) + (cons 98 "a")) + (check-equal? (run "" '(read-byte)) (cons eof "")) + (check-equal? (run "" '(eof-object? (read-byte))) (cons #t "")) + (check-equal? (run "a" '(eof-object? (read-byte))) (cons #f "")) + (check-equal? (run "" '(begin (write-byte 97) (write-byte 98))) + (cons (void) "ab")) + + (check-equal? (run "ab" '(peek-byte)) (cons 97 "")) + (check-equal? (run "ab" '(begin (peek-byte) (read-byte))) (cons 97 "")) + (check-equal? (run "†" '(read-byte)) (cons 226 "")) + (check-equal? (run "†" '(peek-byte)) (cons 226 ""))) + + {:> E1} + (begin ;; Extort + (check-equal? (run "" '(write-byte #t)) (cons 'err ""))) + + {:> F} + (begin ;; Fraud + (check-equal? (run "" '(let ((x 97)) (write-byte x))) (cons (void) "a")) + (check-equal? (run "" + '(let ((x 97)) + (begin (write-byte x) + x))) + (cons 97 "a")) + (check-equal? (run "b" '(let ((x 97)) (begin (read-byte) x))) + (cons 97 "")) + (check-equal? (run "b" '(let ((x 97)) (begin (peek-byte) x))) + (cons 97 ""))) + + {:> I} + (begin ;; Iniquity + (check-equal? (run "" + '(define (print-alphabet i) + (if (zero? i) + (void) + (begin (write-byte (- 123 i)) + (print-alphabet (sub1 i))))) + '(print-alphabet 26)) + (cons (void) "abcdefghijklmnopqrstuvwxyz")) + + (check-equal? (run "" + '(define (f x) + (write-byte x)) + '(f 97)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x y) + (write-byte x)) + '(f 97 98)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x) + (let ((y x)) + (write-byte y))) + '(f 97)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x y) + (let ((y x)) + (write-byte y))) + '(f 97 98)) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x) + (write-byte x)) + '(let ((z 97)) + (f z))) + (cons (void) "a")) + (check-equal? (run "" + '(define (f x y) + (write-byte x)) + '(let ((z 97)) + (f z 98))) + (cons (void) "a"))) + + {:> K} + (begin ;; Knock + (check-equal? (run "" + '(match (write-byte 97) + [_ 1])) + (cons 1 "a")))) diff --git a/ziggy/src/types.rkt b/ziggy/src/types.rkt new file mode 100644 index 00000000..35b97c3b --- /dev/null +++ b/ziggy/src/types.rkt @@ -0,0 +1,112 @@ +#lang crook +{:= D0 D0.A D1 E0 E1 F H0 H1 I J K} +(provide (all-defined-out)) +{:> H0} (require ffi/unsafe) + +{:> H0} (define imm-shift 3) +{:> H0} (define imm-mask #b111) +{:> H0} (define ptr-mask #b111) +{:> H0} (define type-box #b001) +{:> H0} (define type-cons #b010) +{:> H1} (define type-vect #b011) +{:> H1} (define type-str #b100) +(define int-shift {:> D0 H0} 1 {:> H0} (+ 1 imm-shift)) +(define mask-int {:> D0 H0} #b1 {:> H0} #b1111) +{:> D1} +(define char-shift {:> D1 H0} 2 {:> H0} (+ 2 imm-shift)) +(define type-int {:> D0 H0} #b0 {:> H0} #b0000) +{:> D1} +(define type-char {:> D0 H0} #b01 {:> H0} #b01000) +{:> D1} +(define mask-char {:> D0 H0} #b11 {:> H0} #b11111) + +(define (bits->value b) + (cond [(= b (value->bits #t)) #t] + [(= b (value->bits #f)) #f] + {:> E0} + [(= b (value->bits eof)) eof] + {:> E0} + [(= b (value->bits (void))) (void)] + {:> H0} + [(= b (value->bits '())) '()] + [(int-bits? b) + (arithmetic-shift b (- int-shift))] + {:> D1} + [(char-bits? b) + (integer->char (arithmetic-shift b (- char-shift)))] + {:> H0} + [(box-bits? b) + (box (bits->value (heap-ref b)))] + {:> H0} + [(cons-bits? b) + (cons (bits->value (heap-ref (+ b 8))) + (bits->value (heap-ref b)))] + {:> H1} + [(vect-bits? b) + (if (zero? (untag b)) + (vector) + (build-vector (heap-ref b) + (lambda (j) + (bits->value (heap-ref (+ b (* 8 (add1 j))))))))] + {:> H1} + [(str-bits? b) + (if (zero? (untag b)) + (string) + (build-string (heap-ref b) + (lambda (j) + (char-ref (+ b 8) j))))] + [else (error "invalid bits")])) + +(define (value->bits v) + (cond [(eq? v #t) {:> D0 H0} #b011 {:> H0} #b00011000] + [(eq? v #f) {:> D0 H0} #b111 {:> H0} #b00111000] + [(integer? v) (arithmetic-shift v int-shift)] + {:> E0} [(eof-object? v) {:> E0 H0} #b1011 {:> H0} #b01011000] + {:> E0} [(void? v) {:> E0 H0} #b1111 {:> H0} #b01111000] + {:> H0} [(empty? v) #b10011000] + {:> D1} + [(char? v) + (bitwise-ior type-char + (arithmetic-shift (char->integer v) char-shift))] + {:> H0} + [else (error "not an immediate value" v)])) + +(define (int-bits? v) + (= type-int (bitwise-and v mask-int))) + +{:> D1} +(define (char-bits? v) + (= type-char (bitwise-and v mask-char))) + +{:> H0} +(define (imm-bits? v) + (zero? (bitwise-and v imm-mask))) + +{:> H0} +(define (cons-bits? v) + (= type-cons (bitwise-and v imm-mask))) + +{:> H0} +(define (box-bits? v) + (= type-box (bitwise-and v imm-mask))) + +{:> H1} +(define (vect-bits? v) + (= type-vect (bitwise-and v imm-mask))) + +{:> H1} +(define (str-bits? v) + (= type-str (bitwise-and v imm-mask))) + +{:> H0} +(define (untag i) + (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) + (integer-length ptr-mask))) + +{:> H0} +(define (heap-ref i) + (ptr-ref (cast (untag i) _int64 _pointer) _int64)) + +{:> H1} +(define (char-ref i j) + (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j)))