diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..149159a --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustdocflags = "-C link-args=/STACK:8194304" # Increase stack size 8MB when running doctests \ No newline at end of file diff --git a/.github/workflows/assembler.yml b/.github/workflows/assembler.yml index 0baac7e..97db8d6 100644 --- a/.github/workflows/assembler.yml +++ b/.github/workflows/assembler.yml @@ -13,12 +13,11 @@ jobs: fail-fast: false matrix: crate: [ lc3-assembler ] - os: [ windows-latest, ubuntu-latest, macOS-latest ] + os: [ windows-latest, ubuntu-latest, macos-latest ] rust: - stable - - beta - nightly - - 1.42.0 + - 1.56.1 runs-on: ${{ matrix.os }} steps: diff --git a/.gitignore b/.gitignore index fa46ec1..56e85eb 100755 --- a/.gitignore +++ b/.gitignore @@ -88,3 +88,8 @@ $RECYCLE.BIN/ # Windows shortcuts *.lnk + +# LC-3 Assembly +*.asm +*.mem +*.obj diff --git a/Cargo.lock b/Cargo.lock index 5654497..12c4b0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,40 +1,30 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -[[package]] -name = "aho-corasick" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81ce3d38065e618af2d7b77e10c5ad9a069859b4be3c2250f674af3840d9c8a5" -dependencies = [ - "memchr", -] +version = 3 [[package]] -name = "annotate-snippets" -version = "0.8.0" +name = "ahash" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d78ea013094e5ea606b1c05fe35f1dd7ea1eb1ea259908d040b25bd5ec677ee5" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" dependencies = [ - "yansi-term", + "const-random", ] [[package]] -name = "ansi_term" -version = "0.11.0" +name = "ariadne" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +checksum = "f1cb2a2046bea8ce5e875551f5772024882de0b540c7f93dfc5d6cf1ca8b030c" dependencies = [ - "winapi", + "yansi", ] [[package]] -name = "arbitrary" -version = "0.4.5" +name = "assert_matches" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cb544f1057eaaff4b34f8c4dcf56fc3cd04debd291998405d135017a7c3c0f4" -dependencies = [ - "derive_arbitrary", -] +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" [[package]] name = "atty" @@ -49,268 +39,174 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bitflags" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" - -[[package]] -name = "bstr" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31accafdb70df7871592c058eca3985b71104e15ac32f64706022c58867da931" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - -[[package]] -name = "bumpalo" -version = "3.4.0" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "byteorder" -version = "1.3.4" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" - -[[package]] -name = "cast" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" -dependencies = [ - "rustc_version", -] +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cfg-if" -version = "0.1.10" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.12" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0fee792e164f78f5fe0c296cc2eb3688a2ca2b70cdff33040922d298203f0c4" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" dependencies = [ + "libc", "num-integer", "num-traits", "time", + "winapi", ] [[package]] -name = "clap" -version = "2.33.1" +name = "chumsky" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129" +checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4" dependencies = [ - "ansi_term", - "atty", - "bitflags", - "strsim", - "textwrap", - "unicode-width", - "vec_map", + "ahash", ] [[package]] -name = "criterion" -version = "0.3.3" +name = "clap" +version = "3.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70daa7ceec6cf143990669a04c7df13391d55fb27bd4079d252fca774ba244d8" +checksum = "9f1fe12880bae935d142c8702d500c63a4e8634b6c3c57ad72bf978fc7b6249a" dependencies = [ "atty", - "cast", - "clap", - "criterion-plot", - "csv", - "itertools 0.9.0", - "lazy_static", - "num-traits", - "oorandom", - "plotters", - "rayon", - "regex 1.3.9", - "serde", - "serde_cbor", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", + "bitflags", + "clap_derive", + "clap_lex", + "indexmap", + "once_cell", + "strsim", + "termcolor", + "textwrap", ] [[package]] -name = "criterion-plot" -version = "0.4.3" +name = "clap_derive" +version = "3.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e022feadec601fba1649cfa83586381a4ad31c6bf3a9ab7d408118b05dd9889d" +checksum = "ed6db9e867166a43a53f7199b5e4d1f522a1e5bd626654be263c999ce59df39a" dependencies = [ - "cast", - "itertools 0.9.0", + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "crossbeam-deque" -version = "0.7.3" +name = "clap_lex" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" +checksum = "87eba3c8c7f42ef17f6c659fc7416d0f4758cd3e58861ee63c5fa4a4dde649e4" dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", - "maybe-uninit", + "os_str_bytes", ] [[package]] -name = "crossbeam-epoch" -version = "0.8.2" +name = "const-random" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" +checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4" dependencies = [ - "autocfg", - "cfg-if", - "crossbeam-utils", - "lazy_static", - "maybe-uninit", - "memoffset", - "scopeguard", + "const-random-macro", + "proc-macro-hack", ] [[package]] -name = "crossbeam-queue" -version = "0.2.3" +name = "const-random-macro" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "774ba60a54c213d409d5353bda12d49cd68d14e45036a285234c8d6f91f92570" +checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" dependencies = [ - "cfg-if", - "crossbeam-utils", - "maybe-uninit", -] - -[[package]] -name = "crossbeam-utils" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" -dependencies = [ - "autocfg", - "cfg-if", + "getrandom", "lazy_static", + "proc-macro-hack", + "tiny-keccak", ] [[package]] -name = "csv" -version = "1.1.3" +name = "crunchy" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279" -dependencies = [ - "bstr", - "csv-core", - "itoa", - "ryu", - "serde", -] +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] -name = "csv-core" -version = "0.1.10" +name = "either" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] -name = "ctor" -version = "0.1.15" +name = "getrandom" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39858aa5bac06462d4dd4b9164848eb81ffc4aa5c479746393598fd193afa227" +checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" dependencies = [ - "quote", - "syn", -] - -[[package]] -name = "derive_arbitrary" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b43185d3e7ce7dcd44a23ca761ec026359753ebf480283a571e6463853d2ef" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", ] [[package]] -name = "difference" -version = "2.0.0" +name = "hashbrown" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" +checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" [[package]] -name = "either" -version = "1.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" - -[[package]] -name = "half" -version = "1.6.0" +name = "heck" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d36fab90f82edc3c747f9d438e06cf0a491055896f2a279638bb5beed6c40177" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" [[package]] name = "hermit-abi" -version = "0.1.14" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9586eedd4ce6b3c498bc3b4dd92fc9f11166aa908a914071953768066c67909" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] -name = "itertools" -version = "0.8.2" +name = "indexmap" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" +checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" dependencies = [ - "either", + "autocfg", + "hashbrown", ] [[package]] name = "itertools" -version = "0.9.0" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" dependencies = [ "either", ] -[[package]] -name = "itoa" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" - -[[package]] -name = "js-sys" -version = "0.3.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4b9172132a62451e56142bff9afc91c8e4a4500aa5b847da36815b63bfda916" -dependencies = [ - "wasm-bindgen", -] - [[package]] name = "lazy_static" version = "1.4.0" @@ -321,26 +217,24 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" name = "lc3-assembler" version = "0.1.0" dependencies = [ - "annotate-snippets", + "ariadne", + "assert_matches", + "chumsky", "clap", - "criterion", - "itertools 0.8.2", - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/core)", - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/prototype)", + "itertools", + "lc3-isa", "lc3-os", "lc3-shims", - "num-traits", - "pretty_assertions", - "regex 0.2.11", + "quote", ] [[package]] name = "lc3-baseline-sim" version = "0.1.0" -source = "git+https://github.com/ut-utp/core#4816ece0f2d47e54d989c7a5bc4da9c9f5415f74" +source = "git+https://github.com/ut-utp/core?branch=master#d3063ebb9c212dd441a75d6ea1a476daa85f5a4d" dependencies = [ - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/core)", - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/core)", + "lc3-isa", + "lc3-macros", "lc3-traits", "static_assertions", ] @@ -348,20 +242,9 @@ dependencies = [ [[package]] name = "lc3-isa" version = "0.1.0" -source = "git+https://github.com/ut-utp/core#4816ece0f2d47e54d989c7a5bc4da9c9f5415f74" -dependencies = [ - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/core)", - "serde", - "static_assertions", -] - -[[package]] -name = "lc3-isa" -version = "0.1.0" -source = "git+https://github.com/ut-utp/prototype#4816ece0f2d47e54d989c7a5bc4da9c9f5415f74" +source = "git+https://github.com/ut-utp/core?branch=master#d3063ebb9c212dd441a75d6ea1a476daa85f5a4d" dependencies = [ - "arbitrary", - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/prototype)", + "lc3-macros", "serde", "static_assertions", ] @@ -369,17 +252,7 @@ dependencies = [ [[package]] name = "lc3-macros" version = "0.1.0" -source = "git+https://github.com/ut-utp/core#4816ece0f2d47e54d989c7a5bc4da9c9f5415f74" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "lc3-macros" -version = "0.1.0" -source = "git+https://github.com/ut-utp/prototype#4816ece0f2d47e54d989c7a5bc4da9c9f5415f74" +source = "git+https://github.com/ut-utp/core?branch=master#d3063ebb9c212dd441a75d6ea1a476daa85f5a4d" dependencies = [ "proc-macro2", "quote", @@ -389,12 +262,12 @@ dependencies = [ [[package]] name = "lc3-os" version = "0.1.0" -source = "git+https://github.com/ut-utp/core#4816ece0f2d47e54d989c7a5bc4da9c9f5415f74" +source = "git+https://github.com/ut-utp/core?branch=master#d3063ebb9c212dd441a75d6ea1a476daa85f5a4d" dependencies = [ "lazy_static", "lc3-baseline-sim", - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/core)", - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/core)", + "lc3-isa", + "lc3-macros", "lc3-shims", "lc3-traits", "static_assertions", @@ -403,12 +276,12 @@ dependencies = [ [[package]] name = "lc3-shims" version = "0.1.0" -source = "git+https://github.com/ut-utp/core#4816ece0f2d47e54d989c7a5bc4da9c9f5415f74" +source = "git+https://github.com/ut-utp/core?branch=master#d3063ebb9c212dd441a75d6ea1a476daa85f5a4d" dependencies = [ "byteorder", "chrono", - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/core)", - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/core)", + "lc3-isa", + "lc3-macros", "lc3-traits", "static_assertions", "time", @@ -418,10 +291,10 @@ dependencies = [ [[package]] name = "lc3-traits" version = "0.1.0" -source = "git+https://github.com/ut-utp/core#4816ece0f2d47e54d989c7a5bc4da9c9f5415f74" +source = "git+https://github.com/ut-utp/core?branch=master#d3063ebb9c212dd441a75d6ea1a476daa85f5a4d" dependencies = [ - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/core)", - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/core)", + "lc3-isa", + "lc3-macros", "log", "serde", "static_assertions", @@ -429,45 +302,24 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.71" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" [[package]] name = "log" -version = "0.4.8" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] -[[package]] -name = "maybe-uninit" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" - -[[package]] -name = "memchr" -version = "2.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" - -[[package]] -name = "memoffset" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c198b026e1bbf08a937e94c6c60f9ec4a2267f5b0d2eec9c1b21b061ce2be55f" -dependencies = [ - "autocfg", -] - [[package]] name = "num-integer" -version = "0.1.43" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d59457e662d541ba17869cf51cf177c0b5f0cbf476c66bdc90bf1edac4f875b" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ "autocfg", "num-traits", @@ -475,237 +327,93 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.12" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" dependencies = [ "autocfg", ] [[package]] -name = "num_cpus" -version = "1.13.0" +name = "once_cell" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" -dependencies = [ - "hermit-abi", - "libc", -] +checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" [[package]] -name = "oorandom" -version = "11.1.2" +name = "os_str_bytes" +version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a170cebd8021a008ea92e4db85a72f80b35df514ec664b296fdcbb654eac0b2c" +checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa" [[package]] -name = "output_vt100" -version = "0.1.2" +name = "proc-macro-error" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53cdc5b785b7a58c5aad8216b3dfa114df64b0b06ae6e1501cef91df2fbdf8f9" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ - "winapi", + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", ] [[package]] -name = "plotters" -version = "0.2.15" +name = "proc-macro-error-attr" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d1685fbe7beba33de0330629da9d955ac75bd54f33d7b79f9a895590124f6bb" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "js-sys", - "num-traits", - "wasm-bindgen", - "web-sys", + "proc-macro2", + "quote", + "version_check", ] [[package]] -name = "pretty_assertions" -version = "0.6.1" +name = "proc-macro-hack" +version = "0.5.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f81e1644e1b54f5a68959a29aa86cde704219254669da328ecfdf6a1f09d427" -dependencies = [ - "ansi_term", - "ctor", - "difference", - "output_vt100", -] +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.18" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa" +checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" dependencies = [ - "unicode-xid", + "unicode-ident", ] [[package]] name = "quote" -version = "1.0.7" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" dependencies = [ "proc-macro2", ] -[[package]] -name = "rayon" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f02856753d04e03e26929f820d0a0a337ebe71f849801eea335d464b349080" -dependencies = [ - "autocfg", - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e92e15d89083484e11353891f1af602cc661426deb9564c298b270c726973280" -dependencies = [ - "crossbeam-deque", - "crossbeam-queue", - "crossbeam-utils", - "lazy_static", - "num_cpus", -] - -[[package]] -name = "regex" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.5.6", - "thread_local", - "utf8-ranges", -] - -[[package]] -name = "regex" -version = "1.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" -dependencies = [ - "regex-syntax 0.6.18", -] - -[[package]] -name = "regex-automata" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" -dependencies = [ - "byteorder", -] - -[[package]] -name = "regex-syntax" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7" -dependencies = [ - "ucd-util", -] - -[[package]] -name = "regex-syntax" -version = "0.6.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" - -[[package]] -name = "rustc_version" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -dependencies = [ - "semver", -] - -[[package]] -name = "ryu" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "semver" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - [[package]] name = "serde" -version = "1.0.114" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" dependencies = [ "serde_derive", ] -[[package]] -name = "serde_cbor" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" -dependencies = [ - "half", - "serde", -] - [[package]] name = "serde_derive" -version = "1.0.114" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0be94b04690fbaed37cddffc5c134bf537c8e3329d53e982fe04c374978f8e" +checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" dependencies = [ "proc-macro2", "quote", "syn", ] -[[package]] -name = "serde_json" -version = "1.0.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3433e879a558dde8b5e8feb2a04899cf34fdde1fafb894687e52105fc1162ac3" -dependencies = [ - "itoa", - "ryu", - "serde", -] - [[package]] name = "static_assertions" version = "1.1.0" @@ -714,46 +422,44 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strsim" -version = "0.8.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.33" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8d5d96e8cbb005d6959f119f773bfaebb5684296108fb32600c00cde305b2cd" +checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-ident", ] [[package]] -name = "textwrap" -version = "0.11.0" +name = "termcolor" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ - "unicode-width", + "winapi-util", ] [[package]] -name = "thread_local" -version = "0.3.6" +name = "textwrap" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" -dependencies = [ - "lazy_static", -] +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" [[package]] name = "time" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" dependencies = [ "libc", + "wasi 0.10.0+wasi-snapshot-preview1", "winapi", ] @@ -767,119 +473,37 @@ dependencies = [ ] [[package]] -name = "tinytemplate" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d3dc76004a03cec1c5932bca4cdc2e39aaa798e3f82363dd94f9adf6098c12f" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "ucd-util" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85f514e095d348c279b1e5cd76795082cf15bd59b93207832abe0b1d8fed236" - -[[package]] -name = "unicode-width" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" - -[[package]] -name = "unicode-xid" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" - -[[package]] -name = "utf8-ranges" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba" - -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - -[[package]] -name = "walkdir" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" -dependencies = [ - "same-file", - "winapi", - "winapi-util", -] - -[[package]] -name = "wasm-bindgen" -version = "0.2.64" +name = "tiny-keccak" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a634620115e4a229108b71bde263bb4220c483b3f07f5ba514ee8d15064c4c2" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" dependencies = [ - "cfg-if", - "wasm-bindgen-macro", + "crunchy", ] [[package]] -name = "wasm-bindgen-backend" -version = "0.2.64" +name = "unicode-ident" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e53963b583d18a5aa3aaae4b4c1cb535218246131ba22a71f05b518098571df" -dependencies = [ - "bumpalo", - "lazy_static", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] +checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" [[package]] -name = "wasm-bindgen-macro" -version = "0.2.64" +name = "version_check" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fcfd5ef6eec85623b4c6e844293d4516470d8f19cd72d0d12246017eb9060b8" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.64" +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9adff9ee0e94b926ca81b57f57f86d5545cdcb1d259e21ec9bdd95b901754c75" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] -name = "wasm-bindgen-shared" -version = "0.2.64" +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7b90ea6c632dd06fd765d44542e234d5e63d9bb917ecd64d79778a13bd79ae" - -[[package]] -name = "web-sys" -version = "0.3.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "863539788676619aac1a23e2df3655e96b32b0e05eb72ca34ba045ad573c625d" -dependencies = [ - "js-sys", - "wasm-bindgen", -] +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "winapi" @@ -913,10 +537,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "yansi-term" -version = "0.1.2" +name = "yansi" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe5c30ade05e61656247b2e334a031dfd0cc466fadef865bdcdea8d537951bf1" -dependencies = [ - "winapi", -] +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" diff --git a/assembler/.gitignore b/assembler/.gitignore index f7a9f85..d200436 100644 --- a/assembler/.gitignore +++ b/assembler/.gitignore @@ -1,4 +1,7 @@ /target **/*.rs.bk Cargo.lock -*.iml \ No newline at end of file +*.iml +*.asm +*.obj +*.mem \ No newline at end of file diff --git a/assembler/Cargo.toml b/assembler/Cargo.toml index 5fd5cd5..674c221 100644 --- a/assembler/Cargo.toml +++ b/assembler/Cargo.toml @@ -2,7 +2,8 @@ name = "lc3-assembler" version = "0.1.0" authors = ["UT UTP "] -edition = "2018" +edition = "2021" +rust-version = "1.56.1" workspace = ".." @@ -33,15 +34,15 @@ maintenance = { status = "actively-developed" } [dependencies] -regex = "0.2.1" -itertools = "0.8.2" -num-traits = "0.2.11" -annotate-snippets = { version = "0.8.0", features = ["color"] } -clap = "2.33.0" +itertools = "0.10" +clap = { version = "3.1", features = ["derive"] } +chumsky = "0.8" +ariadne = "0.1" +quote = "1" lc3-isa = { version = "0.1.0", default-features = false } lc3-shims = { version = "0.1.0", default-features = false } lc3-os = { version = "0.1.0", default-features = false } [dev-dependencies] -pretty_assertions = "0.6.1" +assert_matches = "1.5" diff --git a/assembler/README.md b/assembler/README.md index 9cc2d11..2ad42ed 100644 --- a/assembler/README.md +++ b/assembler/README.md @@ -1,12 +1,118 @@ ### `lc3-assembler` crate [![](https://github.com/ut-utp/assembler/workflows/assembler/badge.svg)](https://github.com/ut-utp/assembler/actions?query=workflow%3Aassembler) -[![Minimum supported Rust version](https://img.shields.io/badge/rustc-1.42+-red.svg?style=for-the-badge&logo=rust)](#minimum-supported-rust-version-msrv) +[![Minimum supported Rust version](https://img.shields.io/badge/rustc-1.56+-red.svg?style=for-the-badge&logo=rust)](#minimum-supported-rust-version-msrv) A small assembler for a small computer. -### Minimum Supported Rust Version (MSRV) +#### Example + +##### Input + +```asm + .ORIG x3000 + LD R0, A0 + LD R1, A1 + ADD R2, R0, #R1 + ST R2, RESULTS +A0 .FILL #1 +A1 .FILL #2 +RESULT .BLKW 0 + .END +``` + +##### Error Output +```text +Error: invalid operand + ╭─[.\example\example.asm:4:25] + │ + 4 │ ADD R2, R0, #R1 + · ─┬─ + · ╰─── here +───╯ +Error: reference to label RESULTS invalid: not previously defined + ╭─[.\example\example.asm:5:21] + │ + 5 │ ST R2, RESULTS + · ───┬─── + · ╰───── here +───╯ +Error: assembly failed +``` + +#### Features +- A command line application for assembling [Little Computer 3 (LC-3)](https://en.wikipedia.org/wiki/Little_Computer_3) assembly programs +- A Rust library for parsing, analyzing, and assembling LC-3 programs + +### Command Line Application (CLI) + +The CLI is an LC-3 assembler that can be installed and used from your command line. + +#### Installation + +Run the following in your command line: + +(TODO) + +#### Usage + +Give the CLI a path to a file containing an LC-3 assembly program: + +(TODO: command) + +If it is valid, the CLI will assemble the program into LC-3 machine code +and store it in a new binary `.mem` file (in this case, `foo.mem`). +You can then use the [UTP TUI](https://github.com/ut-utp/tui/) to load the binary file into an LC-3 emulator and run it. + +If the program is invalid, the CLI will instead print error messages indicating what is wrong (to `stderr`). + +For more options and usage information, run using the `--help` option: -This crate is currently guaranteed to compile on stable Rust 1.42 and newer. We offer no guarantees that this will remain true in future releases but do promise to always support (at minimum) the latest stable Rust version and to document changes to the MSRV in the [changelog](CHANGELOG.md). +(TODO: command) + +### Library + +The library provides Rust functions for assembling LC-3 programs. + +These functions are split into modules, most of which encapsulate typical [compiler phases](https://en.wikipedia.org/wiki/Compiler#Three-stage_compiler_structure). +These are intended for use in order: + +![A diagram indicating the order of data flow through the main modules: lex, parse, analyze, assemble, link, then layer.](https://raw.githubusercontent.com/ut-utp/assembler/master/assembler/docs/images/main_workflow.png) + +Together, the first three modules, `lex`, `parse`, and `analyze`, check that the input is valid LC-3 assembly +and parse it into a data structure (called `parse::File`, or "the syntax tree") which can be more easily assembled. +The last three modules, `assemble`, `link`, and `layer`, generate the machine code for the program and store it +as an LC-3 memory image. + +Each of these modules provides one main public function. You can use them individually, +or use the functions in the top-level module which already combine the steps as shown in the diagram above. + +For examples and more detailed information, see the API documentation for each function and module. + +#### Design + +Our goals when designing the library are, in order of priority: +1. *No "False Negatives"* -- Correctly assemble any valid LC-3 program. +2. *No "False Positives"* -- Reject any input which is not a valid LC-3 program. +3. *Maintainability* -- Provide developer documentation and flexible, debuggable abstractions. +4. *User Experience* -- Provide user documentation and high-quality error messages. +5. *Performance (Speed)* -- Run quickly. +6. *Performance (Memory)* -- Use little memory. + +Goals 1 and 2 ensure that assembly behaves as expected and provides a minimum level of feedback. + +Goal 3 is to make sure any bugs can be fixed for the foreseeable future; maintenance will likely fall to the TAs +of UT Austin's Electrical and Computer Engineering department, who may change each semester. + +Goal 4 is a priority because LC-3 assembly is an educational language. +We want to help students identify and correct their assembly errors in a way that reinforces *why* the errors occurred. +With this support, we hope to help students continue more quickly and confidently to debugging semantic errors. + +Goals 5 and 6 aim to make the tool accessible to a wide audience and provide a good experience, +no matter the power of their computers. Of course, assembly is a simple task for almost any PC today, +so these are our lowest priorities. + + +### Minimum Supported Rust Version (MSRV) -(TODO!) +This crate is currently guaranteed to compile on stable Rust 1.56.1 and newer. We offer no guarantees that this will remain true in future releases but do promise to always support (at minimum) the latest stable Rust version and to document changes to the MSRV in the [changelog](CHANGELOG.md). diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 047816f..a19f6e4 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -1,102 +1,129 @@ extern crate lc3_assembler; use std::{env, fs}; +use std::fmt::{Debug, Formatter}; use std::path::{Path, PathBuf}; -use lc3_assembler::lexer::Lexer; -use lc3_assembler::parser::parse; -use lc3_assembler::assembler::assemble; +use std::process::exit; +use ariadne::Source; +use lc3_assembler::parse::{File, parse}; use lc3_shims::memory::FileBackedMemoryShim; -use clap::clap_app; -use lc3_assembler::parser::LeniencyLevel::*; -use lc3_assembler::error::{extract_file_errors, ParseError}; -use annotate_snippets::display_list::{DisplayList, FormatOptions}; -use annotate_snippets::snippet::{Snippet, Annotation, Slice, AnnotationType, SourceAnnotation}; +use clap::{Parser}; +use lc3_isa::util::MemoryDump; +use lc3_shims::memory::error::MemoryShimError; +use lc3_assembler::{assemble, assemble_file, LeniencyLevel, parse_and_analyze, parse_and_analyze_file, SourceId, sources}; const MEM_DUMP_FILE_EXTENSION: &'static str = "mem"; -fn main() { - std::thread::Builder::new() - .name("main_greater_stack_size".to_string()) - .stack_size(8*1024*1024) - .spawn(as_).unwrap() - .join().unwrap(); +#[derive(Parser)] +#[clap(author, version, about, + long_about = "Analyzes, and/or assembles an LC-3 assembly file into an executable image + of LC-3 machine code." + )] +struct Args { + /// Input file path + #[clap(required = true, parse(from_os_str), value_name = "INPUT_FILE")] + input: PathBuf, + + /// Enforce all rules of the original LC-3 assembly language + /// + /// By default, the assembler is lenient about restrictions such as label length. + /// This option enforces restrictions specified in Patt and Patel's Introduction to Computing Systems, 3rd edition. + /// + /// These include: + /// - Labels cannot contain underscores + /// - Labels cannot exceed 20 characters in length + /// - Labels must be defined on the same line as an instruction, not separately on a previous line + /// - Qualified number literals cannot be prefixed with `0` (i.e., `0x3000` is not allowed, only `x3000`) + /// - Operands must be separated with commas (`,`), not just whitespace. + /// - Condition codes for BR instructions *must* be listed in the following order: `n`, `z`, then `p`. + // NOTE TO DEVS (THIS SHOULD NOT BE IN THE DOCS): + // When updating this list, remember to update the library's list in the docs for LeniencyLevel. + #[clap(long, short, verbatim_doc_comment)] + strict: bool, + + /// Check the correctness of the program without assembling + #[clap(long, short)] + check: bool, + + /// Link executable image without OS + /// + /// If not specified, the program is overlaid onto an image of the OS from lc3-os at link time. + #[clap(long, short)] + no_os: bool, } -fn as_() { - let matches = clap_app!(assemble_lc3 => - (version: env!("CARGO_PKG_VERSION")) - (author: env!("CARGO_PKG_AUTHORS")) - (about: env!("CARGO_PKG_DESCRIPTION")) - (@arg strict: -s --strict "Enforces all rules of the original LC-3 assembly language when validating the program") - (@arg check: -c --check "Checks the correctness of the program without attempting to assemble it") - (@arg with_os: -o --with_os "Overlays the program onto an image of the OS from lc3-os") - (@arg INPUT: +required ... "Paths to the programs to assemble") - ).get_matches(); - - for path_str in matches.values_of("INPUT").unwrap() { - let path = Path::new(path_str); - assert!(path.is_file()); - - let leniency = if matches.is_present("strict") { Strict } else { Lenient }; - - let string = fs::read_to_string(path).unwrap(); - let src = string.as_str(); - let lexer = Lexer::new(src); - let cst = parse(lexer, leniency); - - let errors = extract_file_errors(cst.clone()); - if errors.len() > 0 { - for error in errors { - let label_string = error.message(); - let label = label_string.as_str(); - let annotations = error.annotations(); - let slices = slices(annotations, src, Some(path_str)); - let snippet = create_snippet(label, slices); - let dl = DisplayList::from(snippet); - println!("{}", dl); - } - break; - } +fn main() -> Result<(), Error> { + let main_thread = + std::thread::Builder::new() + .name("main_greater_stack_size".to_string()) + .stack_size(8*1024*1024) + .spawn(as_)?; + main_thread.join().map_err(|_| Error::Unexpected)? +} - if matches.is_present("check") { - println!("{}: No errors found.", path_str); - } else { - let background = if matches.is_present("with_os") { Some(lc3_os::OS_IMAGE.clone()) } else { None }; - let mem = assemble(cst.objects, background); +enum Error { + Io(std::io::Error), + MemoryShim(MemoryShimError), + Assembler, + Unexpected +} - let mut output_path = PathBuf::from(path_str); - output_path.set_extension(MEM_DUMP_FILE_EXTENSION); - let mut file_backed_mem = FileBackedMemoryShim::with_initialized_memory(output_path, mem); - file_backed_mem.flush_all_changes().unwrap(); +impl Debug for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Error::Io(ioe) + | Error::MemoryShim(MemoryShimError::IoError(ioe)) + => write!(f, "{}", ioe), + Error::MemoryShim(_) + | Error::Assembler + | Error::Unexpected => write!(f, "assembly failed") } } } -fn create_snippet<'input>(label: &'input str, slices: Vec>) -> Snippet<'input> { - Snippet { - title: Some(Annotation { - label: Some(label), - id: None, - annotation_type: AnnotationType::Error - }), - footer: vec![], - slices, - opt: FormatOptions { color: true, anonymized_line_numbers: false } +impl From for Error { + fn from(e: std::io::Error) -> Self { + Error::Io(e) + } +} + +impl From for Error { + fn from(e: MemoryShimError) -> Self { + Error::MemoryShim(e) } } -pub fn slices<'input>(annotations: Vec>, source: &'input str, origin: Option<&'input str>) -> Vec> { - let mut slices = Vec::new(); - if !annotations.is_empty() { - slices.push( - Slice { - source, - origin, - line_start: 1, - fold: true, - annotations, +fn as_() -> Result<(), Error> { + let args = Args::parse(); + + let leniency = if args.strict { LeniencyLevel::Strict } else { LeniencyLevel::Lenient }; + + let cache = sources([args.input.clone()])?; + + if args.check { + match parse_and_analyze_file(&args.input, leniency) { + Ok(_) => { + println!("{}: No errors found.", args.input.display()); + Ok(()) } - ); + Err(error) => print_errors(error, cache) + } + } else { + match assemble_file(&args.input, leniency, args.no_os) { + Ok(mem) => { + let mut output_path = args.input.clone(); + output_path.set_extension(MEM_DUMP_FILE_EXTENSION); + let mut file_backed_mem = FileBackedMemoryShim::with_initialized_memory(output_path, mem); + file_backed_mem.flush_all_changes()?; + + Ok(()) + } + Err(error) => print_errors(error, cache) + } } - slices +} + +fn print_errors(error: lc3_assembler::error::Error, mut cache: impl ariadne::Cache) -> Result<(), Error> { + eprint!("{}", error.report_to_string(cache)?); + Err(Error::Assembler) } diff --git a/assembler/docs/analyze.md b/assembler/docs/analyze.md new file mode 100644 index 0000000..1e6f020 --- /dev/null +++ b/assembler/docs/analyze.md @@ -0,0 +1,25 @@ +# `analyze` + +## How to add a new analysis pass + +In `analyze`, different types of errors are detected by different +syntax tree [visitors](https://en.wikipedia.org/wiki/Visitor_pattern). +To analyze the syntax tree for a new type of error: + +1. Implement `Visit` (we'll call the implementor `FooAnalysis`). + 1. Set `Visit::Data` to any data it needs at construct time. + 2. Set `Visit::Output` to any data it outputs besides errors. For example, + `SymbolTableAnalysis` outputs a (estimated) symbol table. + 3. The `visit` function depth-first traverses the syntax tree, + calling the "enter" method as it first reaches each node, then calling the + "exit" method on each node after all of its children have been visited. + Override any of these methods to get the data needed for the error analysis. +2. `Visit` is implemented for small tuples of `Visit`. In `validate`, + add `FooAnalysis` to the tuple type in a call to `visit`, or if it has + data dependencies, add a new call to visit and pass the data to `visit`. + - If necessary, extend the call to the `impl_visit_tuple` macro + to increase the maximum length of tuple `Visit` is implemented for. + - `Visit::Data` for tuples is just a tuple of the component `Visit`s' `Visit::Data`, + in the same order as the `Visit`s. The same is true for `Visit::Output`. +3. (If a new call to `visit` was added:) Add the error vector output by `visit` to the + return value of `validate`. \ No newline at end of file diff --git a/assembler/docs/example/example_output.txt b/assembler/docs/example/example_output.txt new file mode 100644 index 0000000..9dbbd1a --- /dev/null +++ b/assembler/docs/example/example_output.txt @@ -0,0 +1,15 @@ +Error: invalid operand + ╭─[.\example\example.asm:4:25] + │ + 4 │ ADD R2, R0, #R1 + · ─┬─ + · ╰─── here +───╯ +Error: reference to label RESULTS invalid: not previously defined + ╭─[.\example\example.asm:5:21] + │ + 5 │ ST R2, RESULTS + · ───┬─── + · ╰───── here +───╯ +Error: assembly failed \ No newline at end of file diff --git a/assembler/docs/id_arg.md b/assembler/docs/id_arg.md new file mode 100644 index 0000000..db74d34 --- /dev/null +++ b/assembler/docs/id_arg.md @@ -0,0 +1,5 @@ +`id` should be the [`SourceId`] of the source file containing the source `String`. +This can be obtained using [`id`]: `id(&std::path::PathBuf::from("./path/to/source.asm"))`. +If the input is not from a file, the `id` of any path (even `""`) will suffice. +This argument is only to improve error messages by indicating the source file. + diff --git a/assembler/docs/images/main_workflow.png b/assembler/docs/images/main_workflow.png new file mode 100644 index 0000000..1cdc5d4 Binary files /dev/null and b/assembler/docs/images/main_workflow.png differ diff --git a/assembler/docs/no_os_arg.md b/assembler/docs/no_os_arg.md new file mode 100644 index 0000000..9512cfb --- /dev/null +++ b/assembler/docs/no_os_arg.md @@ -0,0 +1,4 @@ +`no_os` indicates whether to include the UTP OS in the resulting image. +For user programs, this should be `false` so that the OS is included +and the resulting image can be executed by the UTP LC-3 emulator. +For assembling other programs, like the OS itself, this may be set to `true`. diff --git a/assembler/docs/tests/add.asm b/assembler/docs/tests/add.asm new file mode 100644 index 0000000..57cb173 --- /dev/null +++ b/assembler/docs/tests/add.asm @@ -0,0 +1,3 @@ +.ORIG x3000 +ADD R0, R0, R0 +.END \ No newline at end of file diff --git a/assembler/docs/tests/bad_operand.asm b/assembler/docs/tests/bad_operand.asm new file mode 100644 index 0000000..604bcde --- /dev/null +++ b/assembler/docs/tests/bad_operand.asm @@ -0,0 +1,3 @@ +.ORIG x3000 +ADD R0, R0, #OOPS +.END \ No newline at end of file diff --git a/assembler/src/analyze.rs b/assembler/src/analyze.rs new file mode 100644 index 0000000..f6c5133 --- /dev/null +++ b/assembler/src/analyze.rs @@ -0,0 +1,984 @@ +//! Functions for identifying errors in the syntax trees produced by [`parse`](crate::parse). +//! +//! This module is primarily for semantic analysis, or identifying semantic errors +//! in a syntax tree. These are errors that don't have to do with incorrect syntax; +//! for example, branching (with `BR`) to a label that isn't defined. It is correct +//! syntax to use `BR` with a label, but the label doesn't refer to any address, +//! so the assembler wouldn't have the necessary information to calculate an offset. +//! In a situation like this where the syntax is correct, but the meaning (semantics) +//! is still invalid or contradictory, it is a semantic error. +//! +//! Secondarily, this module also identifies parse (or *syntax*) errors +//! inserted into the syntax tree during parsing. When performing semantic analysis, +//! the parse errors are typically ignored, in order to identify as many independent errors +//! as possible. +//! When the parse errors affect the meaning, we attempt to work around the missing semantic +//! information by making reasonable assumptions. +//! For example, if program starts with `.ORIG xOOPS`, some analyses may assume +//! that the intent was to place the program at `x3000`, the start of user space, +//! commonly used in examples. +//! +//! In other words, our approach to semantic analysis tries to avoid identifying +//! multiple errors stemming from the same root cause, particularly when the root +//! cause is a parse error. The goal is to be clear where and why a change needs to be +//! made to make the program valid, not to show every problem that an error implies. +//! So when semantic analysis encounters a parse error, it makes whatever assumptions +//! it needs in order to treat the rest of the program as normal. +use std::collections::HashMap; +use std::convert::{TryFrom, TryInto}; +use std::fmt::Debug; +use std::string::String; +use itertools::{concat, Itertools, zip}; +use lc3_isa::{Addr, Word}; +use crate::lex::{LexData, Opcode}; +use crate::parse::{File, Instruction, Operand, ProgramBlock}; +use crate::{get, get_result, LeniencyLevel, SourceId, Spanned, SpanWithSource, util, WithErrData}; +use crate::assemble::calculate_offset; +use crate::error::{Error, InvalidReferenceReason, OperandType, ProgramBlockPlacement, RoughAddr, SingleError, StrictlyInvalidLabelReason}; +use crate::error::OperandType::*; +use crate::error::Error::*; +use crate::error::SingleError::*; + +#[derive(Default)] +struct ParseErrorsAnalysis { + errors: Vec +} + +impl ParseErrorsAnalysis { + fn push_error(&mut self, single_error: SingleError, span: &SpanWithSource) { + self.errors.push(Spanned(span.clone(), single_error)); + } +} + +// TODO: use context to provide useful hints as to *why* the error occurred +impl Visit for ParseErrorsAnalysis { + type Data = (); + + fn new(_data: Self::Data) -> Self { + Default::default() + } + + type Output = (); + fn finish(self) -> (Self::Output, Vec) { + ((), self.errors) + } + + fn enter_program_block_error(&mut self, span: &SpanWithSource) { + self.push_error(BadProgramBlock, span); + } + fn enter_orig_error(&mut self, span: &SpanWithSource) { + self.push_error(BadOperands, span); + } + fn enter_instruction_error(&mut self, span: &SpanWithSource, _location: &LocationCounter) { + self.push_error(BadInstruction, span); + } + fn enter_label_error(&mut self, span: &SpanWithSource, _location: &LocationCounter) { + self.push_error(BadLabel, span); + } + fn enter_opcode_error(&mut self, span: &SpanWithSource, _location: &LocationCounter) { + self.push_error(BadOpcode, span); + } + fn enter_operands_error(&mut self, span: &SpanWithSource, _location: &LocationCounter) { + self.push_error(BadOperands, span); + } + fn enter_operand_error(&mut self, span: &SpanWithSource, _location: &LocationCounter) { + self.push_error(BadOperand, span); + } +} + +#[derive(Default)] +struct DuplicateLabelsAnalysis { + errors: Vec, + labels: HashMap>, +} + +impl Visit for DuplicateLabelsAnalysis { + type Data = (); + + fn new(_data: ()) -> Self { + Default::default() + } + + type Output = (); + fn finish(self) -> (Self::Output, Vec) { + ((), self.errors) + } + + fn exit_file(&mut self, _file: &File, _span: &SpanWithSource) { + let DuplicateLabelsAnalysis { errors, labels } = self; + labels.iter() + .filter(|(_, occurrences)| occurrences.len() > 1) + .map(|(label, occurrences)| + Single(occurrences.get(0).unwrap().id.clone(), + DuplicateLabel { + label: label.clone(), + occurrences: occurrences.clone() + }) + ) + .for_each(|e| errors.push(e)); + } + + fn enter_label(&mut self, label: &String, span: &SpanWithSource, _location: &LocationCounter) { + let occurrences = self.labels.entry(label.clone()).or_insert(Vec::new()); + occurrences.push(span.clone()); + } +} + +#[derive(Debug)] +enum InvalidSymbolError { + InvalidOrig { estimated_addr: RoughAddr }, + PriorInvalidInstruction { estimated_addr: RoughAddr }, + Duplicated, + OutOfBounds, +} + +type SymbolTableValue = Result; + +enum AddressesOccupiedError { + BadOpcode, + BadOperand +} + +impl Instruction { + fn get_first_operand(&self) -> Option<&Operand> { + get_result(&self.operands).as_ref().ok() + .and_then(|ops| get(ops, 0)) + } + + fn addresses_occupied(&self) -> Result { + match get_result(&self.opcode) { + Err(()) => Err(AddressesOccupiedError::BadOpcode), + Ok(oc) => match oc { + Opcode::Stringz => + self.get_first_operand() + .and_then(|op| op.clone().get_string()) + .ok_or(AddressesOccupiedError::BadOperand) + .map(|s| (s.len() + 1) as Addr), + Opcode::Blkw => + self.get_first_operand() + .and_then(|op| op.clone().get_unqualified_number_value()) + .ok_or(AddressesOccupiedError::BadOperand), + _ => Ok(1) + } + } + } +} + +type SymbolTable = HashMap; + +#[derive(Debug, Default)] +struct SymbolTableAnalysis { + symbol_table: SymbolTable, +} + +const ORIG_ERROR_STARTING_ADDRESS_ESTIMATE: RoughAddr = 0x3000; +const INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE: RoughAddr = 1; + +impl Visit for SymbolTableAnalysis { + type Data = (); + + fn new(_data: ()) -> Self { + Default::default() + } + + type Output = SymbolTable; + fn finish(self) -> (Self::Output, Vec) { + (self.symbol_table, vec![]) + } + + + fn enter_label(&mut self, label: &String, _span: &SpanWithSource, location: &LocationCounter) { + self.symbol_table.entry(label.clone()) + .and_modify(|e| *e = Err(InvalidSymbolError::Duplicated)) + .or_insert( + match location.state { + LocationCounterState::Valid => + location.value.try_into() + .map_err(|_| InvalidSymbolError::OutOfBounds), + LocationCounterState::InvalidOrig => + Err(InvalidSymbolError::InvalidOrig { + estimated_addr: location.value + }), + LocationCounterState::InvalidInstruction => + Err(InvalidSymbolError::PriorInvalidInstruction { + estimated_addr: location.value + }), + } + ); + } +} + + +struct ExpectedLabel { + width: u8, + position: usize +} + +struct LabelOffsetBoundsAnalysis<'a> { + errors: Vec, + symbol_table: &'a SymbolTable, + expected_label: Option +} + +impl<'a> LabelOffsetBoundsAnalysis<'a> { + fn check_offset(&mut self, label: &String, span: &SpanWithSource, width: u8, label_addr: RoughAddr, ref_addr: RoughAddr) { + match calculate_offset(ref_addr, label_addr) { + Err(_) => { + // TODO: make more precise. This case shouldn't be possible unless one of the estimated addresses is far out of bounds. + self.errors.push( + Spanned(span.clone(), + InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::OutOfBounds + })); + } + Ok(offset) => { + if util::min_signed_width(offset as i32) > width { + self.errors.push( + Spanned(span.clone(), + InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::TooDistant { + width, + est_ref_pos: ref_addr, + offset, + est_label_pos: label_addr, + } + })); + } + } + } + } +} + +impl<'a> Visit for LabelOffsetBoundsAnalysis<'a> { + type Data = &'a SymbolTable; + + fn new(symbol_table: &'a SymbolTable) -> Self { + Self { + errors: Default::default(), + symbol_table, + expected_label: Default::default(), + } + } + + type Output = (); + fn finish(self) -> (Self::Output, Vec) { + ((), self.errors) + } + + + fn enter_opcode_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) { + self.expected_label = None; + } + + fn enter_opcode(&mut self, opcode: &Opcode, _span: &SpanWithSource, _location: &LocationCounter) { + use Opcode::*; + self.expected_label = + match opcode { + Ld | Ldi | Lea + | St | Sti => Some(ExpectedLabel { width: 9, position: 1 }), + Br(_) => Some(ExpectedLabel { width: 9, position: 0 }), + Jsr => Some(ExpectedLabel { width: 11, position: 0 }), + Fill => Some(ExpectedLabel { width: 16, position: 0 }), + _ => None, + } + } + + fn enter_operands(&mut self, operands: &Vec>, span: &SpanWithSource, location: &LocationCounter) { + if let Some(ExpectedLabel { width, position }) = &self.expected_label { + if let Some((Ok(Operand::Label(label)), op_span_no_source)) = operands.get(*position) { + let op_span = (span.id.clone(), op_span_no_source.clone()).into(); + match self.symbol_table.get(label) { + None => { + self.errors.push( + Spanned(op_span, + InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::Undefined + })); + } + Some(stv) => match stv { + Ok(addr) => { + self.check_offset(label, &op_span, *width, *addr as RoughAddr, location.value); + } + Err(ste) => match ste { + InvalidSymbolError::InvalidOrig { estimated_addr } + | InvalidSymbolError::PriorInvalidInstruction { estimated_addr } => { + self.check_offset(label, &op_span, *width, *estimated_addr, location.value); + } + InvalidSymbolError::Duplicated => { + self.errors.push( + Spanned(op_span, + InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::Duplicated + })); + } + InvalidSymbolError::OutOfBounds => { + self.errors.push( + Spanned(op_span, + InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::OutOfBounds + })); + } + } + } + } + } + } + } + +} + + +#[derive(Default)] +struct OperandTypesAnalysis { + errors: Vec, + expected_operands: Option> +} + +impl OperandTypesAnalysis { + fn check_operands(&mut self, operands: &Vec>, span: &SpanWithSource) { + if let Some(expected) = &self.expected_operands { + // TODO: create longest common subsequence diff for more precise errors + let ops_len = operands.len(); + let exp_len = expected.len(); + if ops_len != exp_len { + self.errors.push(Spanned(span.clone(), WrongNumberOfOperands { expected: exp_len, actual: ops_len })) + } else { + for ((op_res, op_span_no_source), exp_ty) in zip(operands, expected) { + let op_span = (span.id.clone(), op_span_no_source.clone()).into(); + if let Ok(op) = op_res { + if !exp_ty.check(op) { + let actual = if let Operand::NumberLiteral(value) = op { + OperandType::of_number_literal(value, Some(exp_ty.accepted_number_signs())) + } else { + OperandType::of(op) + }; + self.errors.push(Spanned(op_span, OperandTypeMismatch { expected: exp_ty.clone(), actual })); + } + } + } + } + } + } +} + +fn orig_expected_operands() -> Vec { + vec![OperandType::signed_or_unsigned_number(16)] // TODO: Disallow signed? +} + +impl Visit for OperandTypesAnalysis { + type Data = (); + + fn new(_data: Self::Data) -> Self { + Default::default() + } + + type Output = (); + fn finish(self) -> (Self::Output, Vec) { + ((), self.errors) + } + + + fn enter_orig(&mut self, orig: &Vec>, span: &SpanWithSource, _location: &LocationCounter) { + self.expected_operands = Some(orig_expected_operands()); + self.check_operands(orig, span); + } + + fn enter_opcode_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) { + self.expected_operands = None; + } + + fn enter_opcode(&mut self, opcode: &Opcode, _span: &SpanWithSource, _location: &LocationCounter) { + use Opcode::*; + self.expected_operands = Some( + match opcode { + Add | And => vec![Register, Register, OperandType::reg_or_imm5()], + Br(_) => vec![OperandType::pc_offset(9)], + Jsr => vec![OperandType::pc_offset(11)], + Jmp | Jsrr => vec![Register], + Ld | Ldi | Lea + | St | Sti => vec![Register, OperandType::pc_offset(9)], + Ldr | Str => vec![Register, Register, Number { signed: true, width: 6 }], + Not => vec![Register, Register], + Ret | Rti + | Getc | Out + | Puts | In + | Putsp | Halt => vec![], + Trap => vec![OperandType::signed_or_unsigned_number(8)], + Orig => orig_expected_operands(), + Blkw => vec![UnqualifiedNumber], + Fill => vec![Or(Box::new(Label), + Box::new(OperandType::signed_or_unsigned_number(16)))], + Stringz => vec![String], + } + ); + } + + fn enter_operands(&mut self, operands: &Vec>, span: &SpanWithSource, _location: &LocationCounter) { + self.check_operands(operands, span); + } +} + + +#[derive(Default)] +struct StrictLabelAnalysis { + errors: Vec, +} + +fn validate_strict_label(label: &String) -> Option { + let contains_underscores = label.contains('_'); + let too_long = label.len() > 20; + + match (contains_underscores, too_long) { + (false, false) => None, + (true, false) => Some(StrictlyInvalidLabelReason::ContainsUnderscores), + (false, true) => Some(StrictlyInvalidLabelReason::TooLong), + (true, true) => Some(StrictlyInvalidLabelReason::ContainsUnderscoresAndTooLong), + } +} + +impl Visit for StrictLabelAnalysis { + type Data = (); + fn new(_data: Self::Data) -> Self { Default::default() } + type Output = (); + fn finish(self) -> (Self::Output, Vec) { ((), self.errors) } + + fn enter_label(&mut self, label: &String, span: &SpanWithSource, _location: &LocationCounter) { + if let Some(error_reason) = validate_strict_label(label) { + self.errors.push( + Error::Spanned(span.clone(), + StrictlyInvalidLabel { + label: label.clone(), + reason: error_reason + })); + } + } +} + + +struct ObjectPlacementAnalysis { + errors: Vec, + last_start: RoughAddr, + object_index: usize, + object_spans: Vec, +} + +impl Default for ObjectPlacementAnalysis { + fn default() -> Self { + Self { + errors: Default::default(), + last_start: ORIG_ERROR_STARTING_ADDRESS_ESTIMATE, + object_index: 0, + object_spans: Default::default(), + } + } +} + +impl Visit for ObjectPlacementAnalysis { + type Data = (); + + fn new(_data: ()) -> Self { + Default::default() + } + + type Output = (); + fn finish(self) -> (Self::Output, Vec) { + ((), self.errors) + } + + fn exit_file(&mut self, _file: &File, span: &SpanWithSource) { + self.object_spans.sort_unstable_by_key(|span| span.span_in_memory.start); + for (op1, op2) in self.object_spans.iter().tuple_windows() { + if op2.span_in_memory.start < op1.span_in_memory.end { + self.errors.push(Single(span.id.clone(), SingleError::program_blocks_overlap(op1.clone(), op2.clone()))); + } + } + } + + fn exit_program_block(&mut self, _program_block: &ProgramBlock, span: &SpanWithSource, location: &LocationCounter) { + self.object_spans.push( + ProgramBlockPlacement { + position_in_file: self.object_index, + span_in_file: span.clone(), + span_in_memory: self.last_start..location.value + }); + self.object_index += 1; + } + + fn exit_orig(&mut self, _orig: &Vec>, _span: &SpanWithSource, location: &LocationCounter) { + self.last_start = location.value; + } +} + + +struct LocationCounter { + value: RoughAddr, + state: LocationCounterState, +} + +impl LocationCounter { + fn new() -> Self { + Self { + value: Default::default(), + state: LocationCounterState::Valid, + } + } + +} + +#[derive(Debug)] +enum LocationCounterState { + Valid, + InvalidOrig, + InvalidInstruction, +} + +impl Default for LocationCounterState { + fn default() -> Self { + LocationCounterState::Valid + } +} + +impl LocationCounterState { + fn if_valid_set(&mut self, state: LocationCounterState) { + if let LocationCounterState::Valid = self { + *self = state; + } + } +} + +fn visit<'a, V, D, O>(data: D, file: &File, span: &SpanWithSource) -> (O, Vec) + where V: Visit +{ + let mut v = V::new(data); + v.enter_file(file, span); + for block in file.blocks.iter() { + visit_program_block(&mut v, file.id.clone(), block); + } + v.exit_file(file, span); + v.finish() +} + +fn visit_program_block(v: &mut impl Visit, id: SourceId, program_block: &WithErrData) { + let (pb_res, span) = program_block; + let span = (id.clone(), span.clone()).into(); + match pb_res { + Err(_) => { v.enter_program_block_error(&span); } + Ok(pb) => { + v.enter_program_block(pb, &span); + + let mut location_counter = LocationCounter::new(); + + let ProgramBlock { orig, instructions } = pb; + visit_orig(v, id.clone(), orig, &mut location_counter); + for instruction in instructions { + visit_instruction(v, id.clone(), instruction, &mut location_counter); + } + + v.exit_program_block(pb, &span, &mut location_counter); + } + } +} + +fn visit_orig(v: &mut impl Visit, id: SourceId, orig: &WithErrData>>, location_counter: &mut LocationCounter) { + let (orig_res, span) = orig; + let span = (id.clone(), span.clone()).into(); + match orig_res { + Err(_) => { + location_counter.value = ORIG_ERROR_STARTING_ADDRESS_ESTIMATE; + location_counter.state.if_valid_set(LocationCounterState::InvalidOrig); + v.enter_orig_error(&span); + } + Ok(o) => { + location_counter.value = get(o, 0) + .and_then(|op| Word::try_from(op.clone()).map(|w| w as RoughAddr).ok()) + .unwrap_or_else(| | { + location_counter.state.if_valid_set(LocationCounterState::InvalidOrig); + ORIG_ERROR_STARTING_ADDRESS_ESTIMATE + }); + + v.enter_orig( o, &span, location_counter); + for operand in o { + visit_operand(v, id.clone(), operand, location_counter); + } + + v.exit_orig(o, &span, location_counter); + } + } +} + +fn visit_instruction(v: &mut impl Visit, id: SourceId, instruction: &WithErrData, location_counter: &mut LocationCounter) { + let (inst_res, span) = instruction; + let span = (id.clone(), span.clone()).into(); + match inst_res { + Err(_) => { + v.enter_instruction_error(&span, location_counter); + location_counter.value += INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE; + location_counter.state.if_valid_set(LocationCounterState::InvalidInstruction); + } + Ok(i) => { + v.enter_instruction(i, &span, location_counter); + + let Instruction { label, opcode, operands } = i; + if let Some(l) = label { + visit_label(v, id.clone(), l, location_counter); + } + visit_opcode(v, id.clone(), opcode, location_counter); + visit_operands(v, id.clone(), operands, location_counter); + + v.exit_instruction(i, &span, location_counter); + + location_counter.value += i.addresses_occupied() + .unwrap_or_else(|_| { + location_counter.state.if_valid_set(LocationCounterState::InvalidInstruction); + INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE as Addr + }) as RoughAddr; + } + } +} + +fn visit_label(v: &mut impl Visit, id: SourceId, label: &WithErrData, location_counter: &mut LocationCounter) { + let (label_res, span) = label; + let span = (id, span.clone()).into(); + match label_res { + Err(_) => { v.enter_label_error(&span, location_counter); } + Ok(l) => { v.enter_label( l, &span, location_counter); } + } +} + +fn visit_opcode(v: &mut impl Visit, id: SourceId, opcode: &WithErrData, location_counter: &mut LocationCounter) { + let (opcode_res, span) = opcode; + let span = (id, span.clone()).into(); + match opcode_res { + Err(_) => { v.enter_opcode_error(&span, location_counter); } + Ok(oc) => { v.enter_opcode( oc, &span, location_counter); } + } +} + +fn visit_operands(v: &mut impl Visit, id: SourceId, operands: &WithErrData>>, location_counter: &mut LocationCounter) { + let (ops_res, span) = operands; + let span = (id.clone(), span.clone()).into(); + match ops_res { + Err(_) => { v.enter_operands_error(&span, location_counter); } + Ok(o) => { + v.enter_operands( o, &span, location_counter); + for operand in o { + visit_operand(v, id.clone(), operand, location_counter); + } + } + } +} + +fn visit_operand(v: &mut impl Visit, id: SourceId, operand: &WithErrData, location_counter: &mut LocationCounter) { + let (op_res, span) = operand; + let span = (id, span.clone()).into(); + match op_res { + Err(_) => { v.enter_operand_error(&span, location_counter); } + Ok(o) => { v.enter_operand( o, &span, location_counter); } + } +} + +/// A trait for syntax tree visitors, to be used by [`visit`]. +/// +/// This trait is really just a way to separate the logic of different +/// types of analysis. Analysis that can be done in one independent +/// pass over the tree can be encapsulated in its own `Visit` implementation. +trait Visit { + type Data; + fn new(data: Self::Data) -> Self; + + type Output; + fn finish(self) -> (Self::Output, Vec); + + fn enter_file(&mut self, _file: &File, _span: &SpanWithSource) {} + fn exit_file(&mut self, _file: &File, _span: &SpanWithSource) {} + + fn enter_program_block_error(&mut self, _span: &SpanWithSource) {} + fn enter_program_block(&mut self, _program_block: &ProgramBlock, _span: &SpanWithSource) {} + fn exit_program_block(&mut self, _program_block: &ProgramBlock, _span: &SpanWithSource, _location: &LocationCounter) {} + + fn enter_orig_error(&mut self, _span: &SpanWithSource) {} + fn enter_orig(&mut self, _orig: &Vec>, _span: &SpanWithSource, _location: &LocationCounter) {} + fn exit_orig(&mut self, _orig: &Vec>, _span: &SpanWithSource, _location: &LocationCounter) {} + + fn enter_instruction_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_instruction(&mut self, _instruction: &Instruction, _span: &SpanWithSource, _location: &LocationCounter) {} + fn exit_instruction(&mut self, _instruction: &Instruction, _span: &SpanWithSource, _location: &LocationCounter) {} + + fn enter_label_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_label(&mut self, _label: &String, _span: &SpanWithSource, _location: &LocationCounter) {} + + fn enter_opcode_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_opcode(&mut self, _opcode: &Opcode, _span: &SpanWithSource, _location: &LocationCounter) {} + + fn enter_operands_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_operands(&mut self, _operands: &Vec>, _span: &SpanWithSource, _location: &LocationCounter) {} + + fn enter_operand_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_operand(&mut self, _operand: &Operand, _span: &SpanWithSource, _location: &LocationCounter) {} +} + +/// Implement [`Visit`] for tuples of [`Visit`]. +/// In general, each method is called on the elements of the tuple in sequence, +/// and if there are results, they are combined in a result tuple in the same sequence. +macro_rules! impl_visit_tuple { + () => {}; + ($head:ident $head_data:ident $head_output:ident, $($tail:ident $tail_data:ident $tail_output:ident,)*) => { + impl<$head, $head_data, $head_output, $($tail, $tail_data, $tail_output),*> Visit for ($head, $($tail),*) + where + $head: Visit, + $($tail: Visit),* + { + type Data = ($head_data, $($tail_data),*); + + fn new(($head_data, $($tail_data,)*): Self::Data) -> Self { + ( + $head::new($head_data), + $( + $tail::new($tail_data) + ),* + ) + } + + type Output = ($head_output, $($tail_output),*); + + fn finish(self) -> (Self::Output, Vec) { + let ($head, $($tail),*) = self; + let ($head_output, $head_data) = $head.finish(); + $( + let ($tail_output, $tail_data) = $tail.finish(); + )* + ( + ( + $head_output, + $($tail_output),* + ) + , + concat([ + $head_data, + $($tail_data),* + ]) + ) + } + + fn enter_file(&mut self, file: &File, span: &SpanWithSource) { + let ($head, $($tail,)*) = self; + $head.enter_file(file, span); + $( + $tail.enter_file(file, span); + )* + } + fn exit_file(&mut self, file: &File, span: &SpanWithSource) { + let ($head, $($tail,)*) = self; + $head.exit_file(file, span); + $( + $tail.exit_file(file, span); + )* + } + + fn enter_program_block_error(&mut self, span: &SpanWithSource) { + let ($head, $($tail,)*) = self; + $head.enter_program_block_error(span); + $( + $tail.enter_program_block_error(span); + )* + } + fn enter_program_block(&mut self, program_block: &ProgramBlock, span: &SpanWithSource) { + let ($head, $($tail,)*) = self; + $head.enter_program_block(program_block, span); + $( + $tail.enter_program_block(program_block, span); + )* + } + fn exit_program_block(&mut self, program_block: &ProgramBlock, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.exit_program_block(program_block, span, location); + $( + $tail.exit_program_block(program_block, span, location); + )* + } + + fn enter_orig_error(&mut self, span: &SpanWithSource) { + let ($head, $($tail,)*) = self; + $head.enter_orig_error(span); + $( + $tail.enter_orig_error(span); + )* + } + fn enter_orig(&mut self, orig: &Vec>, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_orig(orig, span, location); + $( + $tail.enter_orig(orig, span, location); + )* + } + fn exit_orig(&mut self, orig: &Vec>, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.exit_orig(orig, span, location); + $( + $tail.exit_orig(orig, span, location); + )* + } + + fn enter_instruction_error(&mut self, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_instruction_error(span, location); + $( + $tail.enter_instruction_error(span, location); + )* + } + fn enter_instruction(&mut self, instruction: &Instruction, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_instruction(instruction, span, location); + $( + $tail.enter_instruction(instruction, span, location); + )* + } + fn exit_instruction(&mut self, instruction: &Instruction, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.exit_instruction(instruction, span, location); + $( + $tail.exit_instruction(instruction, span, location); + )* + } + + fn enter_label_error(&mut self, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_label_error(span, location); + $( + $tail.enter_label_error(span, location); + )* + } + fn enter_label(&mut self, label: &String, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_label(label, span, location); + $( + $tail.enter_label(label, span, location); + )* + } + + fn enter_opcode_error(&mut self, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_opcode_error(span, location); + $( + $tail.enter_opcode_error(span, location); + )* + } + fn enter_opcode(&mut self, opcode: &Opcode, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_opcode(opcode, span, location); + $( + $tail.enter_opcode(opcode, span, location); + )* + } + + fn enter_operands_error(&mut self, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_operands_error(span, location); + $( + $tail.enter_operands_error(span, location); + )* + } + fn enter_operands(&mut self, operands: &Vec>, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_operands(operands, span, location); + $( + $tail.enter_operands(operands, span, location); + )* + } + + fn enter_operand_error(&mut self, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_operand_error(span, location); + $( + $tail.enter_operand_error(span, location); + )* + } + fn enter_operand(&mut self, operand: &Operand, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_operand(operand, span, location); + $( + $tail.enter_operand(operand, span, location); + )* + } + } + + impl_visit_tuple!($($tail $tail_data $tail_output,)*); + } +} + +impl_visit_tuple!(A DA OA, B DB OB, C DC OC, D DD OD, E DE OE,); + +fn analyze_lex_data(lex_data: &LexData, file_span: &SpanWithSource) -> Vec { + let mut errors = Vec::new(); + if lex_data.no_tokens { + errors.push(Single(file_span.id.clone(), NoTokens)) + } else { + if !lex_data.orig_present { + let start_span = (file_span.id.clone(), file_span.span.start..file_span.span.start).into(); + errors.push(Spanned(start_span, NoOrig)); + } + if !lex_data.end_present { + let end_span = (file_span.id.clone(), file_span.span.end..file_span.span.end).into(); + errors.push(Spanned(end_span, NoEnd)); + } + } + errors +} + +/// Identify as many independent errors as possible which are present in the given [`File`](crate::parse::File). +/// +/// An error indicates that assembly will not be successful, and why. +/// If the result is an empty vector, the program is valid and assembly will succeed (save for bugs in analysis or assembly). +/// +/// The [`LexData`](crate::lex::LexData) provides information from lexing that is +/// lost during parsing, but can help produce more useful errors. +/// +/// See [the `error` module](crate::error) for details about the types of errors +/// and how to present them. +/// +/// See the [module-level documentation](crate::analyze) for a discussion of this +/// function's philosophy toward generating errors. In short, it will try to +/// identify a set of independent issues that can be corrected to make +/// the program valid, but not necessarily *all* the issues present. It +/// also makes some assumptions which may not always be correct depending +/// on the intent of the input's programmer. +pub fn validate(lex_data: &LexData, file_spanned: &Spanned, leniency: LeniencyLevel) -> Vec { + let (file, file_span) = file_spanned; + + let file_span_with_source = (file.id.clone(), file_span.clone()).into(); + let errors_from_lex_data = analyze_lex_data(&lex_data, &file_span_with_source); + + // For instructions on how to add a new analysis pass, see `/assembler/docs/analyze.md`. + let ((symbol_table, _, _, _, _), first_pass_errors) = + visit::<( + SymbolTableAnalysis, + ParseErrorsAnalysis, + DuplicateLabelsAnalysis, + OperandTypesAnalysis, + ObjectPlacementAnalysis, + ), _, _>(((), (), (), (), ()), file, &file_span_with_source); + + let (_, second_pass_errors) = + visit::(&symbol_table, file, &file_span_with_source); + + let strict_errors = + if let LeniencyLevel::Strict = leniency { + let (_, errors) = visit::((), file, &file_span_with_source); + errors + } else { + vec![] + }; + + concat([ + errors_from_lex_data, + first_pass_errors, + second_pass_errors, + strict_errors + ]) +} + diff --git a/assembler/src/assemble.rs b/assembler/src/assemble.rs new file mode 100644 index 0000000..834895d --- /dev/null +++ b/assembler/src/assemble.rs @@ -0,0 +1,447 @@ +//! Functions and data structures for assembling the syntax trees produced by [`parse`](crate::parse). +//! +//! This module is for assembling: converting all possible instructions to binary machine code, +//! only leaving those which refer to external labels, which are assembled in +//! the [`link`](crate::link) step. + +use std::collections::HashMap; +use std::convert::{TryFrom, TryInto}; +use std::num::TryFromIntError; +use lc3_isa::{Addr, Reg, SignedWord, Word}; +use crate::lex::{ConditionCodes, LiteralValue, Opcode}; +use crate::parse::Operand; +use crate::{parse, result, try_map, try_result, WithErrData}; + +pub(crate) type SymbolTable = HashMap; + +#[derive(Clone)] +pub(crate) enum Sr2OrImm5 { + Sr2(Reg), + Imm5(SignedWord) +} + +impl TryFrom for Sr2OrImm5 { + type Error = (); + + fn try_from(value: Operand) -> Result { + value.clone().try_into() + .map(Sr2OrImm5::Sr2) + .or_else(|_| + value.try_into() + .map(Sr2OrImm5::Imm5) + .map_err(|_| ())) + } +} + +impl TryFrom for PcOffset { + type Error = (); + + fn try_from(value: Operand) -> Result { + value.clone().try_into() + .map(PcOffset::Number) + .or_else(|_| + value.get_label() + .ok_or(()) + .map(PcOffset::Label)) + } +} + +impl TryFrom for SignedWord { + type Error = (); + + fn try_from(value: Operand) -> Result { + LiteralValue::try_from(value)? + .try_into() + .map_err(|_| ()) + } +} + +impl TryFrom for Word { + type Error = (); + + fn try_from(value: Operand) -> Result { + LiteralValue::try_from(value)? + .try_into() + .map_err(|_| ()) + } +} + +impl TryFrom for u8 { + type Error = (); + + fn try_from(value: Operand) -> Result { + LiteralValue::try_from(value)? + .try_into() + .map_err(|_| ()) + } +} + +impl TryFrom for FillValue { + type Error = (); + + fn try_from(value: Operand) -> Result { + value.clone().try_into() + .map(FillValue::Number) + .or_else(|_| + value.get_label() + .ok_or(()) + .map(FillValue::Label)) + } +} + +#[derive(Clone)] +pub(crate) enum PcOffset { + Number(SignedWord), + Label(String), +} + +#[derive(Clone)] +pub(crate) enum FillValue { + Number(Word), + Label(String), +} + +#[derive(Clone)] +pub(crate) enum Instruction { + Add { dr: Reg, sr1: Reg, sr2_or_imm5: Sr2OrImm5 }, + And { dr: Reg, sr1: Reg, sr2_or_imm5: Sr2OrImm5 }, + Br { cond_codes: ConditionCodes, pc_offset9: PcOffset }, + Jmp { base: Reg }, + Jsr { pc_offset11: PcOffset }, + Jsrr { base: Reg }, + Ld { dr: Reg, pc_offset9: PcOffset }, + Ldi { dr: Reg, pc_offset9: PcOffset }, + Ldr { dr: Reg, base: Reg, offset6: SignedWord }, + Lea { dr: Reg, pc_offset9: PcOffset }, + Not { dr: Reg, sr: Reg }, + Ret, + Rti, + St { sr: Reg, pc_offset9: PcOffset }, + Sti { sr: Reg, pc_offset9: PcOffset }, + Str { sr: Reg, base: Reg, offset6: SignedWord }, + Trap { trap_vec: u8 }, + + Fill { value: FillValue }, + Blkw { size: Addr }, // Addr used here to signify a number of locations, as max value of Addr is number of possible Addrs. + Stringz { string: String }, +} + +impl Instruction { + fn new_trap(trap_vec: u8) -> Self { + Self::Trap { trap_vec } + } + + fn addresses_occupied(&self) -> Addr { + match self { + Instruction::Blkw { size } => *size, + + // +1 is to count the null-terminator + Instruction::Stringz { string } => (string.len() + 1) as Addr, + _ => 1, + } + } +} + +struct FirstPassBlock { + origin: Addr, + instructions: Vec, +} + +/// An assembled, but unlinked program. +/// +/// Every instruction comprising the Object is assembled to binary +/// unless it refers to an external label. +/// +/// Includes a symbol table of labels which other Objects can refer to. +pub struct Object { + pub(crate) symbol_table: SymbolTable, + pub(crate) blocks: Vec, +} + +pub(crate) struct ObjectBlock { + pub(crate) origin: Addr, + pub(crate) words: Vec, +} + +#[derive(Clone)] +pub(crate) enum ObjectWord { + Value(Word), + UnlinkedInstruction(Instruction), +} + + +macro_rules! try_map_operands { + ($operands:expr => $variant:ident { $($field:ident),*$(,)* }) + => + { + { + let mut os = $operands.into_iter(); + let i = Instruction::$variant { + $($field: try_map(os.next())?,)* + }; + Ok(i) + } + } +} + +impl TryFrom<(WithErrData, WithErrData>>)> for Instruction { + type Error = (); + + + fn try_from((raw_opcode, raw_operands): (WithErrData, WithErrData>>)) -> Result { + let operands = result(raw_operands)?; + match result(raw_opcode)? { + Opcode::Add => try_map_operands!( operands => Add { dr, sr1, sr2_or_imm5 } ), + Opcode::And => try_map_operands!( operands => And { dr, sr1, sr2_or_imm5 } ), + Opcode::Br(cond_codes) => { + let mut os = operands.into_iter(); + let pc_offset9 = try_map(os.next())?; + Ok(Instruction::Br { cond_codes, pc_offset9 }) + } + Opcode::Jmp => try_map_operands!( operands => Jmp { base }), + Opcode::Jsr => try_map_operands!( operands => Jsr { pc_offset11 }), + Opcode::Jsrr => try_map_operands!( operands => Jsrr { base }), + Opcode::Ld => try_map_operands!( operands => Ld { dr, pc_offset9 }), + Opcode::Ldi => try_map_operands!( operands => Ldi { dr, pc_offset9 }), + Opcode::Ldr => try_map_operands!( operands => Ldr { dr, base, offset6 }), + Opcode::Lea => try_map_operands!( operands => Lea { dr, pc_offset9 }), + Opcode::Not => try_map_operands!( operands => Not { dr, sr }), + Opcode::Ret => Ok(Instruction::Ret), + Opcode::Rti => Ok(Instruction::Rti), + Opcode::St => try_map_operands!( operands => St { sr, pc_offset9 }), + Opcode::Sti => try_map_operands!( operands => Sti { sr, pc_offset9 }), + Opcode::Str => try_map_operands!( operands => Str { sr, base, offset6 }), + Opcode::Trap => try_map_operands!( operands => Trap { trap_vec }), + + // TODO: improve error + Opcode::Orig => Err(()), + + Opcode::Fill => try_map_operands!( operands => Fill { value }), + Opcode::Blkw => { + let mut os = operands.into_iter(); + let size = try_result(os.next())?.get_unqualified_number_value().ok_or(())?; + Ok(Instruction::Blkw { size }) + } + Opcode::Stringz => { + let mut os = operands.into_iter(); + let string = try_result(os.next())?.get_string().ok_or(())?; + Ok(Instruction::Stringz { string }) + } + + Opcode::Getc => Ok(Instruction::new_trap(0x20)), + Opcode::Out => Ok(Instruction::new_trap(0x21)), + Opcode::Puts => Ok(Instruction::new_trap(0x22)), + Opcode::In => Ok(Instruction::new_trap(0x23)), + Opcode::Putsp => Ok(Instruction::new_trap(0x24)), + Opcode::Halt => Ok(Instruction::new_trap(0x25)), + } + } +} + +impl TryFrom for ObjectWord { + type Error = (); + + fn try_from(value: parse::Instruction) -> Result { + (value.opcode, value.operands).try_into() + .map(ObjectWord::UnlinkedInstruction) + } +} + +pub(crate) enum AssemblyResult { + SingleObjectWord(ObjectWord), + MultipleObjectWords(Vec), +} + +fn calculate_addr_offset(location_counter: &Addr, label_address: &Addr) -> Result { + calculate_offset(*location_counter as i32, *label_address as i32) +} + +pub(crate) fn calculate_offset(location_counter: i32, label_address: i32) -> Result { + (label_address - (location_counter + 1)).try_into() +} + + +impl From for AssemblyResult { + fn from(i: lc3_isa::Instruction) -> Self { + AssemblyResult::SingleObjectWord(ObjectWord::Value(i.into())) + } +} + + +pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: &Addr, instruction: Instruction) -> Result { + use AssemblyResult::*; + use ObjectWord::*; + + macro_rules! assemble_pc_offset { + ($pc_offset:ident => $new_i:ident, $instr:ident { $($field:ident),*$(,)* } ) + => + { + match $pc_offset { + PcOffset::Number(sw) => lc3_isa::Instruction::$new_i($($field,)* sw).into(), + PcOffset::Label(label) => + match symbol_table.get(&label) { + Some(addr) => { + let offset = calculate_addr_offset(location_counter, addr)?; + lc3_isa::Instruction::$new_i($($field,)* offset).into() + } + None => SingleObjectWord(UnlinkedInstruction(Instruction::$instr { $($field,)* $pc_offset: PcOffset::Label(label)})), + } + } + } + } + + let res = match instruction { + Instruction::Add { dr, sr1, sr2_or_imm5 } => + match sr2_or_imm5 { + Sr2OrImm5::Sr2(sr2) => lc3_isa::Instruction::new_add_reg(dr, sr1, sr2), + Sr2OrImm5::Imm5(imm5) => lc3_isa::Instruction::new_add_imm(dr, sr1, imm5), + }.into(), + Instruction::And { dr, sr1, sr2_or_imm5 } => + match sr2_or_imm5 { + Sr2OrImm5::Sr2(sr2) => lc3_isa::Instruction::new_and_reg(dr, sr1, sr2), + Sr2OrImm5::Imm5(imm5) => lc3_isa::Instruction::new_and_imm(dr, sr1, imm5), + }.into(), + Instruction::Br { cond_codes: ConditionCodes { n, z, p }, pc_offset9 } => { + match pc_offset9 { + PcOffset::Number(sw) => lc3_isa::Instruction::new_br(n, z, p, sw).into(), + PcOffset::Label(label) => + match symbol_table.get(&label) { + Some(addr) => { + let offset = calculate_addr_offset(location_counter, addr)?; + lc3_isa::Instruction::new_br(n, z, p, offset).into() + } + None => SingleObjectWord(UnlinkedInstruction(Instruction::Br { cond_codes: ConditionCodes { n, z, p }, pc_offset9: PcOffset::Label(label) })), + } + } + } + Instruction::Jmp { base } => lc3_isa::Instruction::new_jmp(base).into(), + Instruction::Jsr { pc_offset11 } => assemble_pc_offset!(pc_offset11 => new_jsr, Jsr {}), + Instruction::Jsrr { base } => lc3_isa::Instruction::new_jsrr(base).into(), + Instruction::Ld { dr, pc_offset9 } => assemble_pc_offset!(pc_offset9 => new_ld, Ld { dr, }), + Instruction::Ldi { dr, pc_offset9 } => assemble_pc_offset!(pc_offset9 => new_ldi, Ldi { dr, }), + Instruction::Ldr { dr, base, offset6 } => lc3_isa::Instruction::new_ldr(dr, base, offset6).into(), + Instruction::Lea { dr, pc_offset9 } => assemble_pc_offset!(pc_offset9 => new_lea, Lea { dr, }), + Instruction::Not { dr, sr } => lc3_isa::Instruction::new_not(dr, sr).into(), + Instruction::Ret => lc3_isa::Instruction::new_ret().into(), + Instruction::Rti => lc3_isa::Instruction::new_rti().into(), + Instruction::St { sr, pc_offset9 } => assemble_pc_offset!(pc_offset9 => new_st, St { sr, }), + Instruction::Sti { sr, pc_offset9 } => assemble_pc_offset!(pc_offset9 => new_sti, Sti { sr, }), + Instruction::Str { sr, base, offset6 } => lc3_isa::Instruction::new_str(sr, base, offset6).into(), + Instruction::Trap { trap_vec } => lc3_isa::Instruction::new_trap(trap_vec).into(), + + Instruction::Fill { value } => { + match value { + FillValue::Number(sw) => SingleObjectWord(Value(sw)), + FillValue::Label(label) => + match symbol_table.get(&label) { + Some(addr) => SingleObjectWord(Value(*addr)), + None => SingleObjectWord(UnlinkedInstruction(Instruction::Fill { value: FillValue::Label(label) })), + } + } + } + + Instruction::Blkw { size } => MultipleObjectWords( + std::iter::repeat(Value(0x00)) + .take(size as usize) + .collect()), + Instruction::Stringz { string } => { + let mut chars = string.chars() + .map(|c| Value(c as Word)) + .collect::>(); + chars.push(Value(0x00)); // null-terminator + MultipleObjectWords(chars) + } + }; + Ok(res) +} + +type ParserInstructions = Vec>; + +fn first_pass(program_block_data: impl IntoIterator) -> Result<(Vec, SymbolTable), ()> { + let mut fp_blocks = Vec::new(); + let mut symbol_table = HashMap::new(); + + for (origin, parser_instructions) in program_block_data { + let mut instructions = Vec::new(); + let mut location_counter = origin; + + for raw_instruction in parser_instructions.into_iter() { + let parser_instruction = result(raw_instruction)?; + if let Some(l) = parser_instruction.label { + symbol_table.insert(result(l)?, location_counter); + }; + + let instruction: Instruction = (parser_instruction.opcode, parser_instruction.operands).try_into()?; + let addresses_used = instruction.addresses_occupied(); + instructions.push(instruction); + + location_counter += addresses_used; + } + + fp_blocks.push(FirstPassBlock { origin, instructions }); + } + + Ok((fp_blocks, symbol_table)) +} + +fn second_pass_one_block(symbol_table: &SymbolTable, fp_block: FirstPassBlock) -> Result { + let FirstPassBlock { origin, instructions } = fp_block; + + let mut words = Vec::new(); + let mut location_counter = origin; + + for instruction in instructions.into_iter() { + let addresses_used = instruction.addresses_occupied(); + match assemble_instruction(&symbol_table, &location_counter, instruction)? { + AssemblyResult::SingleObjectWord(wd) => { words.push(wd); } + AssemblyResult::MultipleObjectWords(wds) => { words.extend(wds); } + } + location_counter += addresses_used; + } + + Ok(ObjectBlock { origin, words }) +} + +fn second_pass(symbol_table: SymbolTable, fp_blocks: Vec) -> Result { + let blocks = + fp_blocks.into_iter() + .map(|fp_block| second_pass_one_block(&symbol_table, fp_block)) + .collect::, TryFromIntError>>()?; + + Ok(Object { symbol_table, blocks }) +} + +fn get_orig(orig_operands: WithErrData>>) -> Result { + let orig_operand = result(orig_operands)?.remove(0); + result(orig_operand)?.try_into() +} + +/// Assemble the given syntax tree. +/// +/// All instructions are converted to binary machine code, +/// except those which refer to labels in other files. +/// +/// *May* return `Err` if the program is invalid, +/// but for ease of assembly, **not all errors are checked**, +/// and the `Err` will not contain information on +/// why the error occurred. **For full error checking and detailed feedback, +/// you should [`validate`](crate::analyze::validate) the input first.** +/// +/// All labels defined in the file are treated as +/// global to all program blocks in the file. +/// In a sense, all the program blocks are +/// "automatically linked." +pub fn assemble(file: parse::File) -> Result { + let block_data = + file.blocks.into_iter() + .map(|p| { + let parse::ProgramBlock { orig, instructions } = result(p)?; + let origin = get_orig(orig)?; + Ok((origin, instructions)) + }) + .collect::, ()>>()?; + + let (fp_blocks, symbol_table) = first_pass(block_data)?; + + second_pass(symbol_table, fp_blocks).map_err(|_| ()) +} \ No newline at end of file diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs deleted file mode 100644 index e902043..0000000 --- a/assembler/src/assembler.rs +++ /dev/null @@ -1,65 +0,0 @@ -use crate::expanded::{expand_pseudo_ops, build_symbol_table, validate_placement, construct_instructions, CompleteObject, InsnOrValue, InsnOrValueWithSrc}; -use crate::cst; -use lc3_isa::{ADDR_SPACE_SIZE_IN_WORDS, Addr, Word}; - -use lc3_isa::util::MemoryDump; -use lc3_os::USER_PROG_START_ADDR; - -pub struct QueryableObject<'input> { - segments: Vec> -} - -impl<'input> QueryableObject<'input> { - pub fn get_source(&self, address: Addr) -> Option> { - self.segments.iter() - .map(|o| o.get_source(address)) - .find(Option::is_some) - .flatten() - } -} - -pub fn assemble<'input, O>(objects: O, background: Option) -> MemoryDump - where O: IntoIterator> -{ - let complete_objects = assemble_to_queryable_objects(objects); - assemble_queryable_objects(complete_objects, background) -} - - -pub fn assemble_to_queryable_objects<'input, O>(objects: O) -> QueryableObject<'input> - where O: IntoIterator> -{ - let expanded_objects = objects.into_iter().map(expand_pseudo_ops).collect(); - validate_placement(&expanded_objects).unwrap(); - let segments = expanded_objects.into_iter() - .map(|o| { - let symbol_table = build_symbol_table(&o).unwrap(); - construct_instructions(o, symbol_table) - }) - .collect(); - QueryableObject { segments } -} - - -pub fn assemble_queryable_objects(queryable_object: QueryableObject, background: Option) -> MemoryDump { - let has_background = background.is_some(); - let mut memory = background.unwrap_or(MemoryDump([0x0000; ADDR_SPACE_SIZE_IN_WORDS])); - let mut orig_set = false; - for complete_object in queryable_object.segments { - let mut i = complete_object.orig as usize; - if has_background && !orig_set { - memory[USER_PROG_START_ADDR as usize] = i as Word; - orig_set = true; - } - for insn_or_value_with_src in complete_object.insns_or_values { - let InsnOrValueWithSrc { insn_or_value, .. } = insn_or_value_with_src; - memory[i] = match insn_or_value { - InsnOrValue::Instruction(insn) => insn.into(), - InsnOrValue::Value(value) => value, - }; - i += 1; - } - } - - memory -} diff --git a/assembler/src/complete.rs b/assembler/src/complete.rs deleted file mode 100644 index 02933c9..0000000 --- a/assembler/src/complete.rs +++ /dev/null @@ -1,20 +0,0 @@ -/// In my hubris, I thought the CST was complete enough. -/// As it turns out, it was nowhere near. -/// -/// This module is an attempt to rectify my error in one behemoth structure. -/// When `complete` is complete, it will replace `cst` in name and the latter -/// will become `ir4_validated_objects`. -/// -/// The main difference here is that `complete` will store as much data as possible -/// relating to the source *and* what it will be assembled to. -/// This will allow querying for the source assembled to a memory location, -/// the addresses corresponding to labels, and whatever is required in the future -/// to provide a nice development environment. -/// -/// `cst` previously stopped where all errors could be represented as part of the tree. -/// `complete` will continue by assembling as much as possible and bringing that data in. -pub struct Program { - -} - - diff --git a/assembler/src/cst.rs b/assembler/src/cst.rs deleted file mode 100644 index ff12b3b..0000000 --- a/assembler/src/cst.rs +++ /dev/null @@ -1,518 +0,0 @@ -use lc3_isa::{Addr, SignedWord, check_signed_imm, Word}; -use crate::error::{ParseError, InvalidLabelReason, InvalidRegReason, InvalidImmediateReason}; -use crate::lexer::Token; -use crate::ir2_lines::{Line, OperationTokens, OperandTokens}; -use crate::ir3_unvalidated_objects::{UnvalidatedFile, UnvalidatedObject, UnvalidatedLine, UnvalidatedObjectContent}; -use std::convert::TryInto; -use num_traits::Num; -use std::string::ToString; -use crate::parser::LeniencyLevel; - -#[derive(Clone, Debug)] -pub struct File<'input> { - pub objects: Vec>, - pub ignored: Vec>, -} - -#[derive(Clone, Debug)] -pub struct Object<'input> { - pub origin_src: Operation<'input>, - pub origin: Immediate<'input, Addr>, - pub content: ObjectContent<'input>, -} - -#[derive(Clone, Debug)] -pub struct ObjectContent<'input> { - pub operations: Vec>, - pub empty_lines: Vec>, - pub hanging_labels: Vec>, - pub invalid_lines: Vec>, -} - -pub type Label<'input> = Checked<'input, &'input str>; -pub type Separator<'input> = Token<'input>; - -// Different from lc3_isa::Instruction in that offsets from labels aren't computed. -// Also covers pseudo-ops. -#[derive(Clone, Debug)] -pub struct Operation<'input> { - pub label: Option>, - pub operator: Token<'input>, - pub nzp: Result, ParseError>, - pub operands: Operands<'input>, - - pub src_lines: Vec, - pub separators: Vec>, - pub whitespace: Vec>, - pub comments: Vec>, - pub newlines: Vec>, -} - -#[derive(Clone, Debug)] -pub struct Checked<'input, T> { - pub src: Token<'input>, - pub value: Result, -} - -impl<'input, T> Checked<'input, T> { - pub fn unwrap(self) -> T { - self.value.unwrap() - } - - pub fn extract_error_into(self, errors: &mut Vec) { - if let Err(error) = self.value { - errors.push(error); - } - } -} - -pub type Reg<'input> = Checked<'input, lc3_isa::Reg>; -pub type Immediate<'input, T> = Checked<'input, T>; - -#[derive(Clone, Debug)] -pub enum Sr2OrImm5<'input> { - Sr2(Reg<'input>), - Imm5(Immediate<'input, SignedWord>), -} - -#[derive(Clone, Debug)] -pub enum ImmOrLabel<'input> { - Imm(Immediate<'input, SignedWord>), - Label(Label<'input>), -} - -#[derive(Clone, Debug)] -pub enum UnsignedImmOrLabel<'input> { - Imm(Immediate<'input, Word>), - Label(Label<'input>), -} - -#[derive(Clone, Debug)] -pub struct ConditionCodes { - pub n: bool, - pub z: bool, - pub p: bool, -} - -type PCOffset<'input> = Checked<'input, ImmOrLabel<'input>>; - -#[derive(Clone, Debug)] -pub enum Operands<'input> { - Add { dr: Reg<'input>, sr1: Reg<'input>, sr2_or_imm5: Checked<'input, Sr2OrImm5<'input>> }, - And { dr: Reg<'input>, sr1: Reg<'input>, sr2_or_imm5: Checked<'input, Sr2OrImm5<'input>> }, - Br { pc_offset9: PCOffset<'input> }, - Jmp { base: Reg<'input> }, - Jsr { pc_offset11: PCOffset<'input> }, - Jsrr { base: Reg<'input> }, - Ld { dr: Reg<'input>, pc_offset9: PCOffset<'input> }, - Ldi { dr: Reg<'input>, pc_offset9: PCOffset<'input> }, - Ldr { dr: Reg<'input>, base: Reg<'input>, offset6: Immediate<'input, SignedWord> }, - Lea { dr: Reg<'input>, pc_offset9: PCOffset<'input> }, - Not { dr: Reg<'input>, sr: Reg<'input> }, - Ret, - Rti, - St { sr: Reg<'input>, pc_offset9: PCOffset<'input> }, - Sti { sr: Reg<'input>, pc_offset9: PCOffset<'input> }, - Str { sr: Reg<'input>, base: Reg<'input>, offset6: Immediate<'input, SignedWord> }, - Trap { trap_vec: Immediate<'input, u8> }, - - Getc, - Out, - Puts, - In, - Putsp, - Halt, - - Orig { origin: Immediate<'input, Addr> }, - Fill { value: Checked<'input, UnsignedImmOrLabel<'input>> }, - Blkw { size_src: Token<'input>, size: Immediate<'input, Addr> }, // Addr used here to signify a number of locations. Max is number of possible Addrs. - Stringz { string: Checked<'input, String> }, - End, -} - -pub struct CstParser { - pub leniency: LeniencyLevel, -} - -impl CstParser { - - pub fn parse_cst<'input>(&self, file: UnvalidatedFile<'input>) -> File<'input> { - let UnvalidatedFile { objects, ignored } = file; - File { - objects: objects.into_iter().map(|o| self.validate_object(o)).collect(), - ignored - } - } - - fn validate_object<'input>(&self, object: UnvalidatedObject<'input>) -> Object<'input> { - let UnvalidatedObject { origin_src, origin, content } = object; - let UnvalidatedObjectContent { operations, empty_lines, hanging_labels, invalid_lines } = content; - Object { - origin_src: self.validate_line(origin_src), - origin: self.validate_numeric_immediate(origin), - content: ObjectContent { - operations: operations.into_iter().map(|o| self.validate_line(o)).collect(), - empty_lines, - hanging_labels, - invalid_lines - } - } - } - - fn validate_line<'input>(&self, line: UnvalidatedLine<'input>) -> Operation<'input> { - let UnvalidatedLine { - label, - operation: OperationTokens { - operator, - operands, - separators, - }, - whitespace, - comments, - newlines, - src_lines, - } = line.clone(); - - Operation { - label: label.map(|l| self.validate_label(l)), - operator, - nzp: self.validate_condition_codes(&operator), - operands: self.validate_operand_tokens(operands), - separators, - whitespace, - comments, - newlines, - src_lines, - } - } - - fn validate_operand_tokens<'input>(&self, operands: OperandTokens<'input>) -> Operands<'input> { - match operands { - OperandTokens::Add { dr, sr1, sr2_or_imm5 } => - Operands::Add { - dr: self.validate_reg(dr), - sr1: self.validate_reg(sr1), - sr2_or_imm5: self.validate_sr2_or_imm5(sr2_or_imm5) - }, - OperandTokens::And { dr, sr1, sr2_or_imm5 } => - Operands::And { - dr: self.validate_reg(dr), - sr1: self.validate_reg(sr1), - sr2_or_imm5: self.validate_sr2_or_imm5(sr2_or_imm5) - }, - OperandTokens::Br { label } => { - Operands::Br { pc_offset9: self.validate_imm_or_label(label, 9), } - }, - OperandTokens::Jmp { base } => Operands::Jmp { base: self.validate_reg(base) }, - OperandTokens::Jsr { label } => Operands::Jsr { pc_offset11: self.validate_imm_or_label(label, 11) }, - OperandTokens::Jsrr { base } => Operands::Jsrr { base: self.validate_reg(base) }, - OperandTokens::Ld { dr, label } => Operands::Ld { dr: self.validate_reg(dr), pc_offset9: self.validate_imm_or_label(label, 9) }, - OperandTokens::Ldi { dr, label } => Operands::Ldi { dr: self.validate_reg(dr), pc_offset9: self.validate_imm_or_label(label, 9) }, - OperandTokens::Ldr { dr, base, offset6 } => - Operands::Ldr { - dr: self.validate_reg(dr), - base: self.validate_reg(base), - offset6: self.validate_signed_immediate(offset6, 6), - }, - OperandTokens::Lea { dr, label } => Operands::Lea { dr: self.validate_reg(dr), pc_offset9: self.validate_imm_or_label(label, 9) }, - OperandTokens::Not { dr, sr } => Operands::Not { dr: self.validate_reg(dr), sr: self.validate_reg(sr) }, - OperandTokens::Ret => Operands::Ret, - OperandTokens::Rti => Operands::Rti, - OperandTokens::St { sr, label } => Operands::St { sr: self.validate_reg(sr), pc_offset9: self.validate_imm_or_label(label, 9) }, - OperandTokens::Sti { sr, label } => Operands::Sti { sr: self.validate_reg(sr), pc_offset9: self.validate_imm_or_label(label, 9) }, - OperandTokens::Str { sr, base, offset6 } => - Operands::Str { - sr: self.validate_reg(sr), - base: self.validate_reg(base), - offset6: self.validate_signed_immediate(offset6, 6), - }, - OperandTokens::Trap { trap_vec } => Operands::Trap { trap_vec: self.validate_numeric_immediate(trap_vec) }, - - OperandTokens::Getc => Operands::Getc, - OperandTokens::Out => Operands::Out, - OperandTokens::Puts => Operands::Puts, - OperandTokens::In => Operands::In, - OperandTokens::Putsp => Operands::Putsp, - OperandTokens::Halt => Operands::Halt, - - OperandTokens::Orig { origin } => Operands::Orig { origin: self.validate_numeric_immediate(origin) }, - OperandTokens::Fill { value } => Operands::Fill { value: self.validate_unsigned_imm_or_label(value) }, - OperandTokens::Blkw { size } => Operands::Blkw { size_src: size, size: self.validate_blkw_immediate(size) }, - OperandTokens::Stringz { string } => Operands::Stringz { string: self.validate_string(string) }, - OperandTokens::End => Operands::End, - } - } - - fn validate_sr2_or_imm5<'input>(&self, src: Token<'input>) -> Checked<'input, Sr2OrImm5<'input>> { - let reg = self.validate_reg(src); - let imm5 = self.validate_signed_immediate(src, 5); - let value = if let Reg { value: Ok(_), .. } = reg { - Ok(Sr2OrImm5::Sr2(reg)) - } else if let Immediate { value: Ok(_), .. } = imm5 { - Ok(Sr2OrImm5::Imm5(imm5)) - } else { - if let Reg { value: Err(ParseError::InvalidReg { reason: invalid_reg_reason, .. }), .. } = reg { - if let Immediate { value: Err(ParseError::InvalidImmediate { reason: invalid_imm5_reason, .. }), .. } = imm5 { - Err(ParseError::InvalidRegOrImm5 { - range: src.span, - invalid_reg_reason, - invalid_imm5_reason, - }) - } else { - unreachable!() - } - } else { - unreachable!() // TODO: use something cleaner like a match for this - } - }; - Checked { src, value } - } - - fn validate_reg<'input>(&self, src: Token<'input>) -> Reg<'input> { - let value = if let Some("r") | Some("R") = src.src.get(..=0) { - src.src.get(1..) - .filter(|s| s.len() == 1) - .and_then(|s| s.parse::().ok()) - .and_then(|i| i.try_into().ok()) - .ok_or(ParseError::InvalidReg { - range: src.span, - reason: InvalidRegReason::Number, - }) - } else { - Err(ParseError::InvalidReg { - range: src.span, - reason: InvalidRegReason::FirstChar, - }) - }; - Reg { src, value } - } - - fn validate_numeric_immediate<'input, T: Num>(&self, src: Token<'input>) -> Immediate<'input, T> { - let Token { src: str, span, .. } = src; - let value = if let Some(str_head) = str.get(..=0) { - let (str_head, offset) = match str.get(0..2) { - Some("0b") | Some("0x") => (str.get(1..2).unwrap(), 2), - Some(_) => (str_head, 1), - // If we don't have two chars, just pass it along? - None => (str_head, 1), - }; - - let radix = match str_head { - "b" => Some(2), - "#" => Some(10), - "x" => Some(16), - _ => None - }; - if let Some(radix) = radix { - if let Some(src_tail) = src.src.get(offset..) { - T::from_str_radix(src_tail, radix) - .map_err(|_| InvalidImmediateReason::Number { actual: src_tail.to_string() }) - } else { - Err(InvalidImmediateReason::NoNumber) - } - } else { - Err(InvalidImmediateReason::RadixChar { actual: str_head.to_string() }) - } - } else { - Err(InvalidImmediateReason::NoChars) - }.map_err(|reason| ParseError::InvalidImmediate { - range: span, - reason - }); - - Immediate { src, value } - } - - fn validate_signed_immediate<'input>(&self, src: Token<'input>, num_bits: u32) -> Immediate<'input, SignedWord> { - let Immediate { src, value } = self.validate_numeric_immediate(src); - let value = match value { - Ok(i) => { - if check_signed_imm(i, num_bits) { - Ok(i) - } else { - Err(ParseError::InvalidImmediate { - range: src.span, - reason: InvalidImmediateReason::OutOfRange { value: i, num_bits } - }) - } - } - error => error // TODO: look for appropriate combinator(s)? - }; - Immediate { src, value } - } - - fn validate_imm_or_label<'input>(&self, src: Token<'input>, num_bits: u32) -> Checked<'input, ImmOrLabel<'input>> { - let label = self.validate_label(src); - let imm = self.validate_signed_immediate(src, num_bits); - let value = if let Label { value: Ok(_), .. } = label { - Ok(ImmOrLabel::Label(label)) - } else if let Immediate { value: Ok(_), .. } = imm { - Ok(ImmOrLabel::Imm(imm)) - } else { - if let Label { value: Err(ParseError::InvalidLabel { reasons: invalid_label_reasons, .. }), .. } = label { - if let Immediate { value: Err(ParseError::InvalidImmediate { reason: invalid_immediate_reason, .. }), .. } = imm { - Err(ParseError::InvalidLabelOrImmediate { - range: src.span, - invalid_label_reasons, - invalid_immediate_reason - }) - } else { - unreachable!() - } - } else { - unreachable!() // TODO: use something cleaner like a match for this - } - }; - Checked { src, value } - } - - fn validate_unsigned_imm_or_label<'input>(&self, src: Token<'input>) -> Checked<'input, UnsignedImmOrLabel<'input>> { - let label = self.validate_label(src); - let imm = self.validate_numeric_immediate(src); - let value = if let Immediate { value: Ok(_), .. } = imm { - Ok(UnsignedImmOrLabel::Imm(imm)) - } else if let Label { value: Ok(_), .. } = label { - Ok(UnsignedImmOrLabel::Label(label)) - } else { - if let Label { value: Err(ParseError::InvalidLabel { reasons: invalid_label_reasons, .. }), .. } = label { - if let Immediate { value: Err(ParseError::InvalidImmediate { reason: invalid_immediate_reason, .. }), .. } = imm { - Err(ParseError::InvalidLabelOrImmediate { - range: src.span, - invalid_label_reasons, - invalid_immediate_reason - }) - } else { - unreachable!() - } - } else { - unreachable!() // TODO: use something cleaner like a match for this - } - }; - Checked { src, value } - } - - fn validate_label<'input>(&self, src: Token<'input>) -> Label<'input> { - let label = src.src; - - let length = label.len(); - let valid_length = if self.leniency.long_labels_allowed() { - length >= 1 - } else { - (1..=20).contains(&length) - }; - - let mut chars = label.chars(); - let first_char = chars.next(); - let first_char_alphabetic = first_char.filter(|c| c.is_alphabetic()).is_some(); - - let mut other_chars = chars.collect::>(); - other_chars.retain(|&c| !(c.is_alphanumeric() || c == '_')); - let other_chars_alphanumeric = other_chars.len() == 0; - - let mut invalidation_reasons = Vec::new(); - if !valid_length { - invalidation_reasons.push(InvalidLabelReason::Length { actual: length.clone() }); - } - if !first_char_alphabetic { - invalidation_reasons.push(InvalidLabelReason::FirstChar { actual: first_char }); - } - if !other_chars_alphanumeric { - invalidation_reasons.push(InvalidLabelReason::OtherChars { actual: other_chars.into_iter().collect::() }); - } - - let value = if invalidation_reasons.len() == 0 { - Ok(label) - } else { - Err(ParseError::InvalidLabel { - range: src.span, - reasons: invalidation_reasons, - }) - }; - - Label { src, value } - } - - fn validate_condition_codes(&self, src: &Token) -> Result, ParseError> { - let str = src.src; - if str.to_uppercase().starts_with("BR") { - let mut n = false; - let mut z = false; - let mut p = false; - for c in str[2..].to_lowercase().chars() { - match c { - // TODO: prettify with macro or non-iterative solution - 'n' => { - if n { return Err(ParseError::Misc("Duplicate condition code n.".to_string())); } - n = true; - }, - 'z' => { - if z { return Err(ParseError::Misc("Duplicate condition code z.".to_string())); } - z = true; - }, - 'p' => { - if p { return Err(ParseError::Misc("Duplicate condition code p.".to_string())); } - p = true; - }, - _ => { return Err(ParseError::Misc("Invalid condition codes.".to_string())) }, - } - } - if !(n || z || p) { - n = true; - z = true; - p = true; - } - Ok(Some(ConditionCodes { n, z, p })) - } else { - Ok(None) - } - } - - fn validate_blkw_immediate<'input>(&self, src: Token<'input>) -> Immediate<'input, Addr> { - Immediate { - src, - value: src.src.parse().map_err(|_| ParseError::Misc("Invalid BLKW immediate.".to_string())) - } - } - - fn validate_string<'input>(&self, src: Token<'input>) -> Checked<'input, String> { - let mut string = src.src.to_string(); - // remove start and end quote - string.pop(); - string.remove(0); - // remove escape characters - string = string - .replace(r#"\""#, r#"""#) - .replace(r#"\\"#, r#"\"#) - .replace(r#"\n"#, "\n"); - let value = Ok(string); - Checked { src, value } - } -} - -#[cfg(test)] -mod immediate_tests { - use super::*; - use pretty_assertions::assert_eq; - - fn single_test(num: &str, actual: N) { - let p = CstParser { leniency: LeniencyLevel::Lenient }; - - let tok = Token { src: num, span: (0, 0), ty: crate::lexer::TokenType::Ambiguous }; - - assert_eq!(actual, p.validate_numeric_immediate(tok).value.unwrap()); - } - - #[test] - fn regular() { - single_test("0x123", 0x123); - single_test("0x0123", 0x0123); - single_test("0b0101", 0b0101); - } - - #[test] - fn patt_style() { - single_test("#100", 100); - single_test("x456", 0x456); - single_test("b0101", 0b0101); - } -} diff --git a/assembler/src/error.rs b/assembler/src/error.rs index b367c6f..2d8272a 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -1,328 +1,530 @@ -use std::fmt::{Display, Formatter, Result}; -use crate::lexer::Span; -use annotate_snippets::snippet::{Snippet, Annotation, Slice, SourceAnnotation, AnnotationType}; - -use ParseError::*; -use itertools::Itertools; -use crate::cst; -use crate::cst::{Object, ObjectContent, Operation, Operands}; +//! Error types and associated functions. + +use ariadne::{Label, Report, ReportBuilder, ReportKind}; +use std::cmp::max; use lc3_isa::SignedWord; -use crate::ir2_lines::LineContent::Invalid; -use annotate_snippets::display_list::FormatOptions; +use std::fmt::{Display, Formatter}; +use crate::{SourceId, Span, SpanWithSource, util}; +use crate::lex; +use crate::lex::{LiteralValue}; +use crate::parse::Operand; +use std::ops::Range; + + +/// This crate's primary error type. Can represent multiple errors from the entire assembly process. +#[derive(Debug)] +pub enum Error { + /// A single error and the ID of the source file which caused it. + Single(SourceId, SingleError), + /// A single error and a span indicating the main substring of source code which caused it. + Spanned(SpanWithSource, SingleError), + /// A set of errors. + Multiple(Vec), +} -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum LexError { - Unknown, +pub(crate) fn into_multiple(id: SourceId, es: Vec) -> Error + where (SourceId, E): Into +{ + let errors = + es.into_iter() + .map(|e| (id.clone(), e).into()) + .collect(); + Error::Multiple(errors) } +impl From> for Error +{ + fn from(es: Vec) -> Self { + Error::Multiple(es) + } +} -#[derive(Debug, Clone)] -pub enum ParseError { - InvalidReg { - range: Span, - reason: InvalidRegReason - }, - InvalidLabel { - range: Span, - reasons: Vec, - }, - InvalidImmediate { - range: Span, - reason: InvalidImmediateReason - }, - HangingLabel { - range: Span, - }, - InvalidLine { - range: Option, - }, - InvalidRegOrImm5 { - range: Span, - invalid_reg_reason: InvalidRegReason, - invalid_imm5_reason: InvalidImmediateReason, - }, - InvalidLabelOrImmediate { - range: Span, - invalid_label_reasons: Vec, - invalid_immediate_reason: InvalidImmediateReason, - }, - Misc(String), +impl From<(SourceId, E)> for Error + where E: Into +{ + fn from((id, e): (SourceId, E)) -> Self { + Error::Single(id, e.into()) + } } -#[derive(Debug, Clone)] -pub enum InvalidRegReason { - FirstChar, - Number, +impl From for SingleError { + fn from(e: std::io::Error) -> Self { Io(e) } } -impl Display for InvalidRegReason { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - use InvalidRegReason::*; - match self { - FirstChar => { write!(f, "didn't start with R") } - Number => { write!(f, "didn't follow R with only 0-7") } - } +impl From<(SourceId, chumsky::error::Simple)> for Error { + fn from((id, e): (SourceId, chumsky::error::Simple)) -> Self { + let span = SpanWithSource { id, span: e.span() }; + Error::Spanned(span, Lex(e)) } } -#[derive(Debug, Clone)] -pub enum InvalidImmediateReason { - NoChars, - RadixChar { actual: String }, - NoNumber, - Number { actual: String }, - OutOfRange { value: SignedWord, num_bits: u32 }, +impl From<(SourceId, chumsky::error::Simple)> for Error { + fn from((id, e): (SourceId, chumsky::error::Simple)) -> Self { + let span = SpanWithSource { id, span: e.span() }; + Error::Spanned(span, Parse(e)) + } } -impl Display for InvalidImmediateReason { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - use InvalidImmediateReason::*; +impl Error { + /// Produce a set of error reports for this [`Error`], which can then be printed. + /// + /// One report will be produced for each [`SingleError`] in the [`Error`]. + /// Each report for an [`Error::Spanned`] will annotate the substring + /// which caused the error. + /// + /// To print the reports, you will need an appropriate [`ariadne::Cache`]; + /// use [`sources`](crate::sources). + pub fn report(self) -> Vec> { + use Error::*; match self { - NoChars => { write!(f, "didn't have any characters") } - NoNumber => { write!(f, "didn't follow radix sign with number") } - RadixChar { actual } => { write!(f, "didn't use valid radix sign (was: {})", actual) } - Number { actual } => { write!(f, "couldn't parse number (was: {})", actual) } - OutOfRange { value, num_bits } => { write!(f, "value {} can't be represented in {} bits", value, num_bits)} + Single(id, error) => vec![report_single(id, None, error).finish()], + Spanned(span, error) => { + let SpanWithSource { id, span: s } = span.clone(); + vec![ + report_single(id, Some(s), error) + .with_label(Label::new(span).with_message("here")) + .finish() + ] + } + Multiple(errors) => + errors.into_iter() + .flat_map(|e| e.report()) + .collect() } } -} -#[derive(Debug, Clone)] -pub enum InvalidLabelReason { - Length { actual: usize }, - FirstChar { actual: Option }, - OtherChars { actual: String }, -} + /// Produce a `String` containing error messages for this [`Error`]. + /// + /// To create an appropriate `cache`, use [`sources`](crate::sources). + pub fn report_to_string(self, mut cache: impl ariadne::Cache) -> Result { + let mut s = Vec::new(); + for report in self.report() { + report.write(&mut cache, &mut s)?; + } + Ok(String::from_utf8_lossy(&s).to_string()) + } -impl Display for InvalidLabelReason { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - use InvalidLabelReason::*; + /// Return the first [`SingleError`] in this [`Error`], if it contains any, otherwise `None`. + /// + /// Can be used to present only one error in a set, + /// or to get the only error in an [`Error`] + /// that is known to only contain one. + pub fn get_first_single_error(&self) -> Option<&SingleError> { + use Error::*; match self { - Length { actual } => { write!(f, "not between 1-20 chars (was: {})", actual) } - FirstChar { actual } => { write!(f, "first char not alphabetic (was: {:?})", actual) }, - OtherChars { actual } => { write!(f, "other chars not alphanumeric or underscores (bad chars: {})", actual) }, + Single(_, error) => Some(error), + Spanned(_, error) => Some(error), + Multiple(errors) => + match errors.get(0) { + Some(e) => e.get_first_single_error(), + None => None, + }, } } } -impl ParseError { - pub fn message(&self) -> String { +pub(crate) type RoughAddr = i32; + +use SingleError::*; + +/// An independent error without associated location data. +#[derive(Debug)] +pub enum SingleError { + /// A `std::io::Error`. + Io(std::io::Error), + /// An error which occurred during lexing. + /// + /// Lexing attempts to be error-tolerant, + /// successfully producing invalid tokens for invalid input, + /// so this error indicates a bug in [`lex`](crate::lex). + Lex(chumsky::error::Simple), + /// An error which occurred during parsing. + /// + /// Parsing attempts to be error-tolerant, + /// successfully producing a syntax tree even for invalid input, + /// so this error indicates a bug in [`parse`](crate::parse). + Parse(chumsky::error::Simple), + /// An error which occurred during assembly. + /// + /// May indicate that the input was invalid or that + /// there is a bug in [`assemble`](mod@crate::assemble). + Assemble, + /// An error which occurred during linking. + /// + /// May indicate that the inputs were invalid or that + /// there is a bug in [`link`](crate::link). + Link, + /// An error which occurred during layering due to invalid input. + Layer, + + /// More inputs were provided than could be assigned [`SourceId`](crate::SourceId)s. + /// Should never occur in reasonable use cases. + TooManyInputs, + + /// Source assumed to be a program block could not be parsed. + BadProgramBlock, + /// Source assumed to be an instruction could not be parsed. + BadInstruction, + /// Source assumed to be a label could not be parsed. + BadLabel, + /// Source assumed to be an opcode could not be parsed. + BadOpcode, + /// Source assumed to be a list of operands could not be parsed. + BadOperands, + /// Source assumed to be an operand could not be parsed. + BadOperand, + /// An operand list contained the wrong number of operands for an instruction. + WrongNumberOfOperands { + /// The correct number of operands for the instruction, given the opcode. + expected: usize, + /// The number of operands found in the operand list. + actual: usize + }, + /// The wrong type of operand was given for an instruction. + OperandTypeMismatch { + /// The correct type of operand for the instruction. + expected: OperandType, + /// The given operand's type. + actual: OperandType + }, + /// The same label was defined at two or more addresses. + DuplicateLabel { + /// The label. + label: String, + /// The set of occurrences of the label in the source code. + occurrences: Vec, + }, + /// An instruction can't be assembled due to a label reference given as an operand. + InvalidLabelReference { + /// The label. + label: String, + /// The specific reason the instruction can't be assembled. + reason: InvalidReferenceReason + }, + /// A label does not follow the strict LC-3 requirements. + StrictlyInvalidLabel { + /// The label. + label: String, + /// The specific reason the label doesn't meet strict LC-3 requirements. + reason: StrictlyInvalidLabelReason + }, + /// Two program blocks span at least one common memory location. + ProgramBlocksOverlap { placement1: ProgramBlockPlacement, placement2: ProgramBlockPlacement }, + /// The lexer produced no tokens; probably indicates no content in the source file. + NoTokens, + /// The lexer produced no token for `.ORIG`; this will likely result in no valid program blocks being parsed. + NoOrig, + /// The lexer produced no token for `.END`; this will likely result in no valid program blocks being parsed. + NoEnd, +} + +/// A reason that a label doesn't meet strict LC-3 requirements. +#[derive(Debug)] +pub enum StrictlyInvalidLabelReason { + /// The label contains underscores. + ContainsUnderscores, + /// The label is over 20 characters. + TooLong, + /// The label is over 20 characters and contains underscores. + ContainsUnderscoresAndTooLong, +} + +/// A reason that an instruction cannot be assembled due to a label reference operand. +#[derive(Debug)] +pub enum InvalidReferenceReason { + /// The label is not defined in the file. + Undefined, + /// The label is defined at more than one address in the file. + Duplicated, + /// The label is defined at an invalid address. + OutOfBounds, + /// The label is so far from the reference that the required offset would overflow the available bits. + TooDistant { width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, +} + +impl SingleError { + pub(crate) fn program_blocks_overlap(p1: ProgramBlockPlacement, p2: ProgramBlockPlacement) -> Self { + let (placement1, placement2) = + if p1.span_in_memory.start <= p2.span_in_memory.start { + (p1, p2) + } else { + (p2, p1) + }; + ProgramBlocksOverlap { placement1, placement2 } + } + + fn message(&self) -> String { match self { - InvalidLabel { reasons, .. } => { - format!("invalid label, reasons -- {}", reasons.iter().map(InvalidLabelReason::to_string).join(", ")) - }, - InvalidReg { reason, .. } => { - format!("invalid register, {}", reason) + BadProgramBlock => String::from("invalid program block"), + BadInstruction => String::from("invalid instruction"), + BadLabel => String::from("invalid label"), + BadOpcode => String::from("invalid opcode"), + BadOperands => String::from("invalid operand list"), + BadOperand => String::from("invalid operand"), + WrongNumberOfOperands { expected, actual } => + format!("wrong number of operands; expected {}, found: {}", expected, actual), + OperandTypeMismatch { expected, actual } => + format!("wrong operand type; expected {}, found: {}", expected, actual), + DuplicateLabel { label, .. } => + format!("same label used for multiple locations: {}", label), + InvalidLabelReference { label, reason } => { + let reason_str = match reason { + InvalidReferenceReason::Undefined => "not previously defined".to_string(), + InvalidReferenceReason::Duplicated => "defined in multiple locations".to_string(), + InvalidReferenceReason::OutOfBounds => "defined at invalid address".to_string(), + InvalidReferenceReason::TooDistant { width, est_ref_pos, est_label_pos, offset } => + format!("label {} at {:#0label_pos_width$X} referenced at {:#0ref_pos_width$X}; too distant, cannot represent offset of {} in available bits: {}", + label, est_label_pos, est_ref_pos, offset, width, + // TODO: Rust '#X' formatter automatically fixes width to multiple of 4... find or implement workaround to control sign-extension; for example, for 9-bit signed offsets, we would want to display 0x2FF, not 0xFEFF. Showing as decimal for now. + label_pos_width = max(4, min_signed_hex_digits_required(*est_ref_pos) as usize), + ref_pos_width = max(4, min_signed_hex_digits_required(*est_label_pos) as usize),) + }; + format!("reference to label {} invalid: {}", label, reason_str) } - Misc(message) => message.clone(), - HangingLabel { .. } => { format!("hanging label") } - InvalidLine { .. } => { format!("invalid line") } - InvalidImmediate { reason, .. } => { format!("invalid immediate, {}", reason) } - InvalidRegOrImm5 { invalid_reg_reason, invalid_imm5_reason, .. } => { - format!("invalid register or 5-bit immediate,\n\ - invalid as register because: {}\n\ - invalid as immediate because: {}", - invalid_reg_reason, invalid_imm5_reason) + ProgramBlocksOverlap { placement1, placement2 } => { + format!("program block {} in file occupying [{:#0o1s_width$X}, {:#0o1e_width$X}) overlaps program block {} occupying [{:#0o2s_width$X}, {:#0o2e_width$X})", + placement1.position_in_file, + placement1.span_in_memory.start, + placement1.span_in_memory.end, + placement2.position_in_file, + placement2.span_in_memory.start, + placement2.span_in_memory.end, + o1s_width = max(4, min_signed_hex_digits_required(placement1.span_in_memory.start) as usize), + o1e_width = max(4, min_signed_hex_digits_required(placement1.span_in_memory.end) as usize), + o2s_width = max(4, min_signed_hex_digits_required(placement2.span_in_memory.start) as usize), + o2e_width = max(4, min_signed_hex_digits_required(placement2.span_in_memory.end) as usize), + ) } - InvalidLabelOrImmediate { invalid_label_reasons, invalid_immediate_reason, .. } => { - format!("invalid label or immediate,\n\ - invalid as label because: {}\n\ - invalid as immediate because: {}", - invalid_label_reasons.iter().map(InvalidLabelReason::to_string).join(", "), - invalid_immediate_reason) + NoTokens => "no LC-3 assembly in file".to_string(), + NoOrig => "no .ORIG pseudo-op in file".to_string(), + NoEnd => "no .END pseudo-op in file".to_string(), + Io(ioe) => ioe.to_string(), + Lex(le) => le.to_string(), + Parse(pe) => pe.to_string(), + Assemble => "unexpected assembly error".to_string(), + Link => "unexpected link error".to_string(), + Layer => "unexpected layering error".to_string(), + TooManyInputs => "too many input files provided".to_string(), + StrictlyInvalidLabel { label, reason } => { + use StrictlyInvalidLabelReason::*; + let reason_str = + match reason { + ContainsUnderscores => "contains underscores", + TooLong => "over 20 characters long", + ContainsUnderscoresAndTooLong => "contains underscores and over 20 characters long" + }; + format!("label {} invalid: {}", label, reason_str) } } } - - pub fn annotations(&self) -> Vec { - let mut annotations = Vec::new(); - - macro_rules! push_annotation { - ($range:expr, $label:expr) => { - annotations.push( - SourceAnnotation { - range: $range.clone(), - label: $label, - annotation_type: AnnotationType::Error, - } - ); +} + +fn min_signed_hex_digits_required(n: i32) -> u8 { + let bin_digits = util::min_signed_width(n); + let extra = if bin_digits % 4 == 0 { 0 } else { 1 }; + bin_digits / 4 + extra +} + + +fn report_single(id: SourceId, span: Option, error: SingleError) -> ReportBuilder { + let mut r: ReportBuilder = + Report::build(ReportKind::Error, id, span.map(|s| s.start).unwrap_or(0)) + .with_message(error.message()); + match error { + DuplicateLabel { occurrences, .. } => { + let mut first_declaration_labeled = false; + for occurrence in occurrences { + let label_message = if !first_declaration_labeled { + first_declaration_labeled = true; + "first used here" + } else { + "also used here" + }; + r = r.with_label(Label::new(occurrence).with_message(label_message)) } } - match self { - InvalidLabel { range, .. } => { push_annotation!(range, "invalid label here"); }, - InvalidReg { range, .. } => { push_annotation!(range, "invalid reg here"); }, - HangingLabel { range } => { push_annotation!(range, "hanging label here"); }, - InvalidLine { range } => { - if let Some(range) = range { - push_annotation!(range, "invalid line here"); - } - } - InvalidImmediate { range, .. } => { push_annotation!(range, "invalid immediate here"); } - InvalidRegOrImm5 { range, .. } => { push_annotation!(range, "invalid register or immediate here"); } - InvalidLabelOrImmediate { range, .. } => { push_annotation!(range, "invalid label or immediate here"); } - Misc(_) => {}, + ProgramBlocksOverlap { placement1, placement2 } => { + let (first, first_pos_text, second, second_pos_text) = + if placement1.position_in_file < placement2.position_in_file { + (placement1, "end", placement2, "start") + } else { + (placement2, "start", placement1, "end") + }; + r = r.with_label(Label::new(first.span_in_file) + .with_message(format!("{} of this object overlaps the other", first_pos_text))) + .with_label(Label::new(second.span_in_file) + .with_message(format!("{} of this object overlaps the other", second_pos_text))); } - annotations + _ => {} } - - + r } -#[derive(Debug, Clone, PartialEq)] -pub struct MemoryError(pub String); -pub fn extract_file_errors(cst: cst::File) -> Vec { - let mut errors = Vec::new(); +/// A type of operand, including number width constraints. +#[derive(Clone, Debug)] +pub enum OperandType { + /// A register reference. + Register, + /// An unqualified number for use with `.BLKW`. + UnqualifiedNumber, + /// A number with a specific sign and width. + Number { signed: bool, width: u8 }, + /// A string of characters. + String, + /// A label reference. + Label, + /// A type of operand that includes multiple other types. + /// An operand of this type can be either of the contained types. + /// + /// Used for operands like PC offsets; the type of a PC offset + /// is a label OR a signed number. + Or(Box, Box) +} - let cst::File { objects, .. } = cst; - if objects.len() == 0 { - errors.push(ParseError::Misc("File contained no objects.".to_string())); - } +impl Display for OperandType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + use OperandType::*; - for object in objects { - errors.extend(extract_object_errors(object)) + match self { + Register => write!(f, "Register"), + UnqualifiedNumber => write!(f, "Unqualified Number"), + Number { signed, width } => write!(f, "Number ({}-bit, {})", width, (if *signed { "signed" } else { "unsigned" })), + String => write!(f, "String"), + Label => write!(f, "Label"), + Or(t1, t2) => write!(f, "{} or {}", t1, t2), + } } - - errors } -fn extract_object_errors(object: Object) -> Vec { - let mut errors = Vec::new(); - - let Object { origin, content, .. } = object; - - origin.extract_error_into(&mut errors); - errors.extend(extract_object_content_errors(content)); +pub(crate) enum AcceptedNumberSigns { + Signed, + Unsigned, + None, + Any +} - errors +impl AcceptedNumberSigns { + pub(crate) fn or(&self, other: &Self) -> Self { + use AcceptedNumberSigns::*; + + match (self, other) { + (Unsigned, Signed) + | (Signed, Unsigned) + | (Any, _) + | (_, Any) => Any, + (Signed, _) + | (_, Signed) => Signed, + (Unsigned, _) + | (_, Unsigned) => Unsigned, + (None, None) => None + } + } } -fn extract_object_content_errors(object_content: ObjectContent) -> Vec { - let mut errors = Vec::new(); +impl OperandType { - let ObjectContent { operations, hanging_labels, invalid_lines, .. } = object_content; + pub(crate) fn accepted_number_signs(&self) -> AcceptedNumberSigns { + use AcceptedNumberSigns::*; + use OperandType::*; - for operation in operations { - errors.extend(extract_operation_errors(operation)); + match self { + Number { signed, .. } => if *signed { Signed } else { Unsigned }, + Or(t1, t2) => t1.accepted_number_signs().or(&t2.accepted_number_signs()), + _ => None + } } + pub(crate) fn signed_or_unsigned_number(width: u8) -> Self { + use OperandType::*; - for hanging_label in hanging_labels { - let range = hanging_label.span().unwrap(); - errors.push(ParseError::HangingLabel { range }); + Or(Box::new(Number { signed: false, width }), + Box::new(Number { signed: true, width })) } - for invalid_line in invalid_lines { - let range = invalid_line.span(); - errors.push(ParseError::InvalidLine { range }); + pub(crate) fn reg_or_imm5() -> Self { + use OperandType::*; + + Or(Box::new(Register), Box::new(Number { signed: true, width: 5 })) } - errors -} + pub(crate) fn pc_offset(width: u8) -> Self { + use OperandType::*; + + Or(Box::new(Label), Box::new(Number { signed: true, width })) + } -fn extract_operation_errors(operation: Operation) -> Vec { - let mut errors = Vec::new(); + pub(crate) fn check(&self, operand: &Operand) -> bool { + use OperandType::*; - let Operation { label, operands, nzp, .. } = operation; + match self { + Register => matches!(operand, Operand::Register(_)), + UnqualifiedNumber => matches!(operand, Operand::UnqualifiedNumberLiteral(_)), + Number { signed: expected_signed, width: expected_width } => { + if let Number { signed, width } = OperandType::of(operand) { + match (signed, expected_signed) { + (true, false) => { + if let Operand::NumberLiteral(LiteralValue::SignedWord(sw)) = operand { + *sw >= 0 && (width - 1) <= *expected_width + } else { + // TODO: find way to couple OperandType::of and value extraction to avoid this case + unreachable!("Detected operand as signed type but could not extract signed value"); + } + } + (false, true) => width <= (expected_width - 1), + _ => width <= *expected_width + } - if let Some(label) = label { - label.extract_error_into(&mut errors); + } else { + false + } + } + String => matches!(operand, Operand::StringLiteral(_)), + Label => matches!(operand, Operand::Label(_)), + Or(t1, t2) => t1.check(operand) || t2.check(operand), + } } - errors.extend(extract_operands_errors(operands)); - - if let Err(error) = nzp { - errors.push(error); + pub(crate) fn of(operand: &Operand) -> Self { + use OperandType::*; + + match operand { + Operand::Register(_) => Register, + Operand::UnqualifiedNumberLiteral(_) => UnqualifiedNumber, + Operand::NumberLiteral(lv) => OperandType::of_number_literal(lv, None), + Operand::StringLiteral(_) => String, + Operand::Label(_) => Label, + } } - errors + pub(crate) fn of_number_literal(literal_value: &LiteralValue, interpret_as: Option) -> Self { + use AcceptedNumberSigns::*; + use OperandType::*; + + let value = + match literal_value { + LiteralValue::Word(value) => *value as i32, + LiteralValue::SignedWord(value) => *value as i32, + }; + let unsigned_interpretation = Number { signed: false, width: util::min_unsigned_width(value) }; + let signed_interpretation = Number { signed: true, width: util::min_signed_width(value) }; + match interpret_as { + Option::None | Some(None) => match literal_value { + LiteralValue::Word(_) => unsigned_interpretation, + LiteralValue::SignedWord(_) => signed_interpretation, + } + Some(Signed) => signed_interpretation, + Some(Unsigned) => unsigned_interpretation, + Some(Any) => Or(Box::new(signed_interpretation), + Box::new(unsigned_interpretation)), + } + } } -fn extract_operands_errors(operands: Operands) -> Vec { - use Operands::*; - - let mut errors = Vec::new(); - match operands { - Add { dr, sr1, sr2_or_imm5 } => { - dr.extract_error_into(&mut errors); - sr1.extract_error_into(&mut errors); - sr2_or_imm5.extract_error_into(&mut errors); - }, - And { dr, sr1, sr2_or_imm5 } => { - dr.extract_error_into(&mut errors); - sr1.extract_error_into(&mut errors); - sr2_or_imm5.extract_error_into(&mut errors); - }, - Br { pc_offset9 } => { - pc_offset9.extract_error_into(&mut errors); - }, - Jmp { base } => { - base.extract_error_into(&mut errors); - }, - Jsr { pc_offset11 } => { - pc_offset11.extract_error_into(&mut errors); - }, - Jsrr { base } => { - base.extract_error_into(&mut errors); - }, - Ld { dr, pc_offset9 } => { - dr.extract_error_into(&mut errors); - pc_offset9.extract_error_into(&mut errors); - }, - Ldi { dr, pc_offset9 } => { - dr.extract_error_into(&mut errors); - pc_offset9.extract_error_into(&mut errors); - }, - Ldr { dr, base, offset6 } => { - dr.extract_error_into(&mut errors); - base.extract_error_into(&mut errors); - offset6.extract_error_into(&mut errors); - }, - Lea { dr, pc_offset9 } => { - dr.extract_error_into(&mut errors); - pc_offset9.extract_error_into(&mut errors); - }, - Not { dr, sr } => { - dr.extract_error_into(&mut errors); - sr.extract_error_into(&mut errors); - }, - St { sr, pc_offset9 } => { - sr.extract_error_into(&mut errors); - pc_offset9.extract_error_into(&mut errors); - } - Sti { sr, pc_offset9 } => { - sr.extract_error_into(&mut errors); - pc_offset9.extract_error_into(&mut errors); - } - Str { sr, base, offset6 } => { - sr.extract_error_into(&mut errors); - base.extract_error_into(&mut errors); - offset6.extract_error_into(&mut errors); - } - Trap { trap_vec } => { - trap_vec.extract_error_into(&mut errors); - } - Orig { origin } => { - origin.extract_error_into(&mut errors); - } - Fill { value } => { - value.extract_error_into(&mut errors); - } - Blkw { size, .. } => { - size.extract_error_into(&mut errors); - } - Stringz { .. } => {} - - // Putting these in instead of _ to avoid forgetting to change - Ret - | Rti - | Getc - | Out - | Puts - | In - | Putsp - | Halt - | End => {} - }; - - errors +/// Data indicating the source string indices and memory addresses spanned by a program block. +#[derive(Clone, Debug)] +pub struct ProgramBlockPlacement { + pub position_in_file: usize, + pub span_in_file: SpanWithSource, + pub span_in_memory: Range, } + diff --git a/assembler/src/expanded.rs b/assembler/src/expanded.rs deleted file mode 100644 index f0fea4a..0000000 --- a/assembler/src/expanded.rs +++ /dev/null @@ -1,217 +0,0 @@ -// For expanded pseudo-op structures -use crate::cst; -use crate::cst::{Operands, ImmOrLabel, UnsignedImmOrLabel, Checked}; -use crate::error::MemoryError; -use lc3_isa; -use lc3_isa::{Word, SignedWord}; -use lc3_isa::{Addr, Instruction}; -use std::collections::HashMap; -use std::iter::repeat; -use itertools::Itertools; - -pub type SymbolTable<'input> = HashMap<&'input str, Addr>; -pub type File<'input> = Vec>; - -pub struct Object<'input> { - orig: Addr, - ops_or_values: Vec<(Option>, OpOrValue<'input>)>, -} - -#[derive(Clone)] -pub enum OpOrValue<'input> { - Operation(cst::Operation<'input>), - Value(Word), -} - -pub struct CompleteObject<'input> { - pub orig: Addr, - pub insns_or_values: Vec, - pub symbol_table: SymbolTable<'input>, -} - -impl<'input> CompleteObject<'input> { - pub fn get_source(&self, address: Addr) -> Option> { - if address < self.orig { - return None; - } - let offset = (address - self.orig) as usize; - let insn_or_value = self.insns_or_values.get(offset); - if let Some(InsnOrValueWithSrc { src_lines, .. }) = insn_or_value { - Some(src_lines.clone()) - } else { - None - } - } - - pub fn get_label_addr(&self, label: &str) -> Option<&Addr> { - self.symbol_table.get(label) - } -} - -pub struct InsnOrValueWithSrc { - pub src_lines: Vec, - pub insn_or_value: InsnOrValue, -} - -pub enum InsnOrValue { - Instruction(Instruction), - Value(Word), -} - -pub type Label<'input> = &'input str; - -pub fn expand_pseudo_ops(object: cst::Object) -> Object { - let cst::Object { origin, content, .. } = object; - - let orig = origin.unwrap(); - - let mut ops_or_values = Vec::new(); - for operation in content.operations { - let label = operation.label.clone().map(Checked::unwrap); - let mut values = Vec::new(); - match operation.operands { - Operands::Blkw { size, .. } => { - let num_values = size.unwrap() as usize; - let block = repeat((None, OpOrValue::Value(0))).take(num_values); - values.extend(block); - }, - Operands::Stringz { string } => { - for c in string.unwrap().chars() { - values.push((None, OpOrValue::Value(c as Word))); - } - values.push((None, OpOrValue::Value(0))); // null-terminate - }, - Operands::End => { /* ignore */ }, - _ => { - values.push((None, OpOrValue::Operation(operation))); - }, - }; - let first = values.get_mut(0); - if let Some(first_value) = first { // TODO: how to handle other case? - first_value.0 = label; - } - ops_or_values.extend(values); - } - - Object { orig, ops_or_values } -} - -pub fn build_symbol_table<'input>(object: &Object<'input>) -> Result, MemoryError> { - let mut symbol_table = HashMap::new(); - let mut current_location = object.orig; - for op_or_value in object.ops_or_values.iter() { - if let Some(label) = op_or_value.0 { - let other_location = symbol_table.insert(label.clone(), current_location); - if let Some(_) = other_location { - return Err(MemoryError("Duplicate label at different location.".to_string())) - } - } - current_location += 1; - }; - Ok(symbol_table) -} - -pub fn validate_placement(objects: &Vec) -> Result<(), MemoryError> { - let starts_and_ends = objects.iter().map(get_start_and_end); - for ((_, prev_end), (next_start, _)) in starts_and_ends.tuple_windows() { - if prev_end > next_start { - return Err(MemoryError("Objects overlap.".to_string())); - } - } - Ok(()) -} - -fn get_start_and_end(object: &Object) -> (Addr, Addr) { - let start = object.orig; - let end = start + object.ops_or_values.len() as Addr; - (start, end) -} - -pub fn construct_instructions<'input>(object: Object, symbol_table: HashMap<&'input str, Addr>) -> CompleteObject<'input> { - let orig = object.orig; - let mut current_location = object.orig; - let mut insns_or_values = Vec::new(); - for op_or_value in object.ops_or_values { - let (insn_or_value, src_lines) = match op_or_value.1 { - OpOrValue::Operation(cst::Operation { operands: Operands::Fill { value }, src_lines, .. }) => { - let value = match value.unwrap() { - UnsignedImmOrLabel::Imm(immediate) => immediate.unwrap(), - UnsignedImmOrLabel::Label(label) => { - let label = label.unwrap(); - symbol_table.get(label).unwrap().clone() - }, - }; - (InsnOrValue::Value(value), src_lines) - }, - OpOrValue::Operation(instruction_cst) => { - let nzp = instruction_cst.nzp.unwrap(); - let src_lines = instruction_cst.src_lines; - let insn = match instruction_cst.operands { - Operands::Add { dr, sr1, sr2_or_imm5 } => match sr2_or_imm5.unwrap() { - cst::Sr2OrImm5::Imm5(immediate) => Instruction::new_add_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - cst::Sr2OrImm5::Sr2(src_reg) => Instruction::new_add_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), - }, - Operands::And { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.unwrap() { - cst::Sr2OrImm5::Imm5(immediate) => Instruction::new_and_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - cst::Sr2OrImm5::Sr2(src_reg) => Instruction::new_and_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), - }, - - Operands::Ld { dr, pc_offset9 } => Instruction::new_ld(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Ldi { dr, pc_offset9 } => Instruction::new_ldi(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Ldr { dr, base, offset6 } => Instruction::new_ldr(dr.unwrap(), base.unwrap(), offset6.unwrap()), - Operands::Lea { dr, pc_offset9 } => Instruction::new_lea(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - - Operands::St { sr, pc_offset9 } => Instruction::new_st(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Sti { sr, pc_offset9 } => Instruction::new_sti(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Str { sr, base, offset6 } => Instruction::new_str(sr.unwrap(), base.unwrap(), offset6.unwrap()), - - Operands::Not { dr, sr } => Instruction::new_not(dr.unwrap(), sr.unwrap()), - - Operands::Br { pc_offset9, .. } => { - let nzp = nzp.unwrap(); - Instruction::new_br(nzp.n, nzp.z, nzp.p, compute_offset(pc_offset9, current_location, &symbol_table)) - } - - Operands::Jmp { base } => Instruction::new_jmp(base.unwrap()), - Operands::Jsr { pc_offset11 } => Instruction::new_jsr(compute_offset(pc_offset11, current_location, &symbol_table)), - Operands::Jsrr { base } => Instruction::new_jsrr(base.unwrap()), - - Operands::Ret => Instruction::new_ret(), - Operands::Rti => Instruction::new_rti(), - - Operands::Trap { trap_vec } => Instruction::new_trap(trap_vec.unwrap()), - Operands::Getc => Instruction::new_trap(0x20), - Operands::Out => Instruction::new_trap(0x21), - Operands::Puts => Instruction::new_trap(0x22), - Operands::In => Instruction::new_trap(0x23), - Operands::Putsp => Instruction::new_trap(0x24), - Operands::Halt => Instruction::new_trap(0x25), - - _ => unreachable!() // TODO: restructure enum to avoid this - }; - (InsnOrValue::Instruction(insn), src_lines) - } - OpOrValue::Value(value) => (InsnOrValue::Value(value), vec![]) - }; - insns_or_values.push(InsnOrValueWithSrc { - insn_or_value, - src_lines - }); - current_location += 1; - } - - CompleteObject { orig, insns_or_values, symbol_table } -} - -fn compute_offset(pc_offset: cst::Checked, location: Addr, symbol_table: &HashMap<&str, Addr>) -> SignedWord { - match pc_offset.unwrap() { - ImmOrLabel::Label(label) => { - let label = label.unwrap(); - let label_location = symbol_table.get(label).unwrap().clone(); - let label_location = label_location as i64; - let offset_base = (location + 1) as i64; - (label_location - offset_base) as SignedWord - } - ImmOrLabel::Imm(immediate) => immediate.value.unwrap() - } -} \ No newline at end of file diff --git a/assembler/src/ir1_simple_lines.rs b/assembler/src/ir1_simple_lines.rs deleted file mode 100644 index 5be2c68..0000000 --- a/assembler/src/ir1_simple_lines.rs +++ /dev/null @@ -1,99 +0,0 @@ -use crate::lexer::{Token, Lexer, TokenType}; -use std::iter::Peekable; -use itertools::Itertools; - -pub type SimpleLines<'input> = Vec>; - -pub struct SimpleLine<'input> { - pub src: String, - pub content: Vec>, - pub comment: Option>, - pub newline: Option>, -} - -pub fn parse_simple_lines(lexer: Lexer) -> SimpleLines { - let mut tokens = lexer.peekable(); - let mut simple_lines = Vec::new(); - while tokens.peek().is_some() { - let simple_line = parse_simple_line(&mut tokens); - simple_lines.push(simple_line); - } - simple_lines -} - -fn parse_simple_line<'input>(tokens: &mut Peekable>) -> SimpleLine<'input> { - let content = tokens.peeking_take_while(|&Token { ty, .. }| - ty != TokenType::Comment && ty != TokenType::Newline) - .collect::>(); - let next = tokens.next(); - let (comment, newline) = match next { - Some(Token { ty, .. }) => match ty { - TokenType::Comment => { - let newline = tokens.next(); - if let Some(Token { ty, .. }) = newline { - assert_eq!(ty, TokenType::Newline); - } - (next, newline) - } - TokenType::Newline => (None, next), - _ => unreachable!("Found more non-comment, non-newline content after skipping to comment or newline."), - } - None => (None, None), - }; - - let mut all_tokens = vec![]; - all_tokens.extend(content.clone()); - if let Some(token) = comment { - all_tokens.push(token); - } - if let Some(token) = newline { - all_tokens.push(token); - } - let src = reconstruct_src(all_tokens); - - SimpleLine { src, content, comment, newline } -} - -fn reconstruct_src<'input>(tokens: impl IntoIterator>) -> String { - let mut vec = tokens.into_iter().collect::>(); - vec.sort_by_key(|token| token.span.0); - vec.dedup(); - vec.into_iter() - .map(|token| token.src) - .join("") -} - -#[cfg(test)] -mod tests { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn no_newline() { - let lexer = Lexer::new("ADD"); - let simple_lines = parse_simple_lines(lexer); - let SimpleLine { src, content, comment, newline } = simple_lines.get(0).unwrap(); - assert_eq!(*src, "ADD".to_string()); - assert_eq!(content.len(), 1); - assert!(comment.is_none()); - assert!(newline.is_none()); - } - - #[test] - fn two_lines() { - let lexer = Lexer::new("ADD ; test\n.END"); - let simple_lines = parse_simple_lines(lexer); - let SimpleLine { src, content, comment, newline } = simple_lines.get(0).unwrap(); - assert_eq!(*src, "ADD ; test\n".to_string()); - assert_eq!(content.len(), 2); - assert!(comment.is_some()); - assert!(newline.is_some()); - - let SimpleLine { src, content, comment, newline } = simple_lines.get(1).unwrap(); - assert_eq!(*src, ".END".to_string()); - assert_eq!(content.len(), 1); - assert!(comment.is_none()); - assert!(newline.is_none()); - } -} - diff --git a/assembler/src/ir2_lines.rs b/assembler/src/ir2_lines.rs deleted file mode 100644 index 45cd40c..0000000 --- a/assembler/src/ir2_lines.rs +++ /dev/null @@ -1,447 +0,0 @@ -use crate::lexer::{Token, TokenType, Opcode, Op, NamedTrap, PseudoOp, Span}; -use crate::ir1_simple_lines::{SimpleLines, SimpleLine}; -use std::iter::Peekable; -use crate::error::ParseError; -use itertools::Itertools; - -pub type Lines<'input> = Vec>; - -#[derive(Clone, Debug)] -pub struct Line<'input> { - pub src: String, - pub content: LineContent<'input>, - pub whitespace: Vec>, // Only includes whitespace around operation - pub comment: Option>, - pub newline: Option>, -} - -impl<'input> Line<'input> { - pub fn span(&self) -> Option { - let tokens = self.tokens(); - let start = tokens.iter().map(|token| token.span.0).min(); - let end = tokens.iter().map(|token| token.span.1).max(); - if let (Some(start), Some(end)) = (start, end) { - Some((start, end)) - } else { - None - } - } - - fn tokens(&self) -> Vec<&Token> { - let mut tokens = Vec::new(); - let Line { content, whitespace, comment, newline, .. } = self; - tokens.extend(content.tokens()); - tokens.extend(whitespace); - if let Some(comment) = comment { - tokens.push(comment); - } - if let Some(newline) = newline { - tokens.push(newline); - } - tokens - } -} - -pub type Label<'input> = Token<'input>; - -#[derive(Clone, Debug)] -pub enum LineContent<'input> { - Valid(Option>, Option>), - Invalid(Vec>) -} - -impl<'input> LineContent<'input> { - fn tokens(&self) -> Vec<&Token> { - match self { - LineContent::Valid(maybe_label, maybe_operation_tokens) => { - let mut tokens = Vec::new(); - if let Some(label) = maybe_label { - tokens.push(label); - } - if let Some(operation_tokens) = maybe_operation_tokens { - tokens.extend(operation_tokens.tokens()) - } - tokens - } - LineContent::Invalid(tokens) => tokens.iter().collect() - } - } -} - - -#[derive(Clone, Debug)] -pub struct OperationTokens<'input> { - pub operator: Token<'input>, - pub operands: OperandTokens<'input>, - pub separators: Vec>, // To include internal whitespace, but not surrounding -} - -impl<'input> OperationTokens<'input> { - fn tokens(&self) -> Vec<&Token> { - let mut tokens = Vec::new(); - let OperationTokens { operator, operands, separators } = self; - tokens.push(operator); - tokens.extend(operands.tokens()); - tokens.extend(separators); - tokens - } -} - -#[derive(Clone, Debug)] -pub enum OperandTokens<'input> { - Add { dr: Token<'input>, sr1: Token<'input>, sr2_or_imm5: Token<'input> }, - And { dr: Token<'input>, sr1: Token<'input>, sr2_or_imm5: Token<'input> }, - Br { label: Label<'input> }, - Jmp { base: Token<'input> }, - Jsr { label: Label<'input> }, - Jsrr { base: Token<'input> }, - Ld { dr: Token<'input>, label: Label<'input>, }, - Ldi { dr: Token<'input>, label: Label<'input>, }, - Ldr { dr: Token<'input>, base: Token<'input>, offset6: Token<'input> }, - Lea { dr: Token<'input>, label: Label<'input> }, - Not { dr: Token<'input>, sr: Token<'input> }, - Ret, - Rti, - St { sr: Token<'input>, label: Label<'input> }, - Sti { sr: Token<'input>, label: Label<'input> }, - Str { sr: Token<'input>, base: Token<'input>, offset6: Token<'input> }, - Trap { trap_vec: Token<'input> }, - - Getc, - Out, - Puts, - In, - Putsp, - Halt, - - Orig { origin: Token<'input> }, - Fill { value: Token<'input> }, - Blkw { size: Token<'input> }, - Stringz { string: Token<'input> }, - End, -} - -impl<'input> OperandTokens<'input> { - fn tokens(&self) -> Vec<&Token> { - use OperandTokens::*; - - let mut tokens = Vec::new(); - match self { - Add { dr, sr1, sr2_or_imm5 } => { - tokens.push(dr); - tokens.push(sr1); - tokens.push(sr2_or_imm5); - }, - And { dr, sr1, sr2_or_imm5 } => { - tokens.push(dr); - tokens.push(sr1); - tokens.push(sr2_or_imm5); - }, - Br { label } => { tokens.push(label); }, - Jmp { base } => { tokens.push(base); }, - Jsr { label } => { tokens.push(label); }, - Jsrr { base } => { tokens.push(base); }, - Ld { dr, label } => { - tokens.push(dr); - tokens.push(label); - }, - Ldi { dr, label } => { - tokens.push(dr); - tokens.push(label); - }, - Ldr { dr, base, offset6 } => { - tokens.push(dr); - tokens.push(base); - tokens.push(offset6); - }, - Lea { dr, label } => { - tokens.push(dr); - tokens.push(label); - }, - Not { dr, sr } => { - tokens.push(dr); - tokens.push(sr); - }, - St { sr, label, } => { - tokens.push(sr); - tokens.push(label); - }, - Sti { sr, label, } => { - tokens.push(sr); - tokens.push(label); - }, - Str { sr, base, offset6, } => { - tokens.push(sr); - tokens.push(base); - tokens.push(offset6); - }, - Trap { trap_vec } => { tokens.push(trap_vec); }, - Orig { origin } => { tokens.push(origin); }, - Fill { value } => { tokens.push(value); }, - Blkw { size } => { tokens.push(size); }, - Stringz { string } => { tokens.push(string); }, - - Ret - | Rti - | Getc - | Out - | Puts - | In - | Putsp - | Halt - | End => {}, - } - tokens - } -} - -pub fn parse_lines(simple_lines: SimpleLines) -> Lines { - simple_lines.into_iter() - .map(parse_line) - .collect() -} - -fn parse_line(simple_line: SimpleLine) -> Line { - let SimpleLine { content: old_content, comment, newline, src, } = simple_line; - let backup = old_content.clone(); - - let mut tokens = old_content.into_iter().peekable(); - let mut whitespace = Vec::new(); - skip_and_collect_whitespace(&mut tokens, &mut whitespace); - let label = parse_ambiguous(&mut tokens).ok(); - skip_and_collect_whitespace(&mut tokens, &mut whitespace); - let content = parse_operation_tokens(&mut tokens, &mut whitespace).map_or( - LineContent::Invalid(backup), - |operation_tokens| { LineContent::Valid(label, operation_tokens) } - ); - skip_and_collect_whitespace(&mut tokens, &mut whitespace); - Line { content, whitespace, comment, newline, src, } -} - -fn parse_ambiguous<'input, T>(tokens: &mut Peekable) -> Result, ParseError> - where T: Iterator> -{ - parse_token(tokens, TokenType::Ambiguous) -} - -fn parse_string<'input, T>(tokens: &mut Peekable) -> Result, ParseError> - where T: Iterator> -{ - parse_token(tokens, TokenType::String) -} - -fn parse_token<'input, T>(tokens: &mut Peekable, target_type: TokenType) -> Result, ParseError> - where T: Iterator> -{ - if let Some(&Token { ty, .. }) = tokens.peek() { - if ty == target_type { - return Ok(tokens.next().unwrap()); - } - } - Err(ParseError::Misc("Didn't find ambiguous token next.".to_string())) -} - -// Expands to the necessary steps to parse operands into a given OperandTokens struct variant. -// Ex: fill_operands! { 3; Add { dr, sr1, sr2_or_imm5, }; tokens, separators } -// expands to: -// let whitespace = parse_whitespace(tokens)?; -// separators.extend(whitespace); -// let mut operand_buffer: [Option>; 3] = [None; 3]; -// parse_operands(tokens, &mut separators, &mut operand_buffer)?; -// OperandTokens::Add { -// dr: operand_buffer[0].unwrap(), -// sr1: operand_buffer[1].unwrap(), -// sr2_or_imm5: operand_buffer[2].unwrap(), -// } - -// TODO: put inside parse_operand_tokens to make it so we don't have to pass in references to tokens and separators -macro_rules! fill_operands { - (@munch ($op_buf:ident) -> { $name:ident, $(($field:ident, $value:expr))* }) => { - OperandTokens::$name { - $($field: $value),* - } - }; - - (@munch ($i:expr, $op_buf:ident, $id:ident,) -> { $($output:tt)* }) => { - fill_operands! { @munch ($op_buf) -> { $($output)* ($id, $op_buf[$i].unwrap()) } } - }; - - (@munch ($i:expr, $op_buf:ident, $id:ident, $($next:tt)*) -> { $($output:tt)* }) => { - fill_operands! { @munch ($i+1usize, $op_buf, $($next)*) -> { $($output)* ($id, $op_buf[$i].unwrap()) } } - }; - - ($num:expr; $name:ident { $($input:tt)+ }; $tokens:ident, $separators:ident) => { - let whitespace = parse_whitespace($tokens)?; - $separators.extend(whitespace); - let mut operand_buffer: [Option>; $num] = [None; $num]; // TODO: write inner macro to munch and get size of array - parse_operands($tokens, &mut $separators, &mut operand_buffer)?; - fill_operands! { @munch (0usize, operand_buffer, $($input)+) -> { $name, } } - }; -} - - -fn parse_operand_tokens<'input, T>(op: Op, tokens: &mut Peekable, mut separators: &mut Vec>) -> Result, ParseError> - where T: Iterator> -{ - let operands = match op { - Op::Opcode(opcode) => match opcode { - Opcode::Add => { fill_operands! { 3; Add { dr, sr1, sr2_or_imm5, }; tokens, separators } }, - Opcode::And => { fill_operands! { 3; And { dr, sr1, sr2_or_imm5, }; tokens, separators } }, - Opcode::Br => { fill_operands! { 1; Br { label, }; tokens, separators } }, - Opcode::Jmp => { fill_operands! { 1; Jmp { base, }; tokens, separators } }, - Opcode::Jsr => { fill_operands! { 1; Jsr { label, }; tokens, separators } }, - Opcode::Jsrr => { fill_operands! { 1; Jsrr { base, }; tokens, separators } }, - Opcode::Ld => { fill_operands! { 2; Ld { dr, label, }; tokens, separators } }, - Opcode::Ldi => { fill_operands! { 2; Ldi { dr, label, }; tokens, separators } }, - Opcode::Ldr => { fill_operands! { 3; Ldr { dr, base, offset6, }; tokens, separators } }, - Opcode::Lea => { fill_operands! { 2; Lea { dr, label, }; tokens, separators } }, - Opcode::Not => { fill_operands! { 2; Not { dr, sr, }; tokens, separators } }, - Opcode::Ret => OperandTokens::Ret, - Opcode::Rti => OperandTokens::Rti, - Opcode::St => { fill_operands! { 2; St { sr, label, }; tokens, separators } }, - Opcode::Sti => { fill_operands! { 2; Sti { sr, label, }; tokens, separators } }, - Opcode::Str => { fill_operands! { 3; Str { sr, base, offset6, }; tokens, separators } }, - Opcode::Trap => { fill_operands! { 1; Trap { trap_vec, }; tokens, separators } }, - }, - Op::NamedTrap(named_trap) => match named_trap { - NamedTrap::Getc => OperandTokens::Getc, - NamedTrap::Out => OperandTokens::Out, - NamedTrap::Puts => OperandTokens::Puts, - NamedTrap::In => OperandTokens::In, - NamedTrap::Putsp => OperandTokens::Putsp, - NamedTrap::Halt => OperandTokens::Halt, - }, - Op::PseudoOp(pseudo_op) => match pseudo_op { - PseudoOp::Orig => { fill_operands! { 1; Orig { origin, }; tokens, separators } }, - PseudoOp::Fill => { fill_operands! { 1; Fill { value, }; tokens, separators } }, - PseudoOp::Blkw => { fill_operands! { 1; Blkw { size, }; tokens, separators } }, - PseudoOp::Stringz => { - let whitespace = parse_whitespace(tokens)?; - separators.extend(whitespace); - let string = parse_string(tokens)?; - OperandTokens::Stringz { string } - }, - PseudoOp::End => OperandTokens::End, - }, - }; - Ok(operands) -} - -// Return None if no operation but valid line (i.e. only whitespace (optionally)) -// ^^^ assumes whitespace has already been skipped. -// Return Err if line doesn't have valid pattern of tokens -fn parse_operation_tokens<'input, T>(mut tokens: &mut Peekable, mut whitespace: &mut Vec>) -> Result>, ParseError> - where T: Iterator> -{ - match tokens.next() { - Some(token) => match token.ty { - TokenType::Op(op) => { - let mut separators = Vec::new(); - let operands = parse_operand_tokens(op, tokens, &mut separators)?; - skip_and_collect_whitespace(&mut tokens, &mut whitespace); - if tokens.peek().is_some() { - Err(ParseError::Misc("Extra tokens at end of line.".to_string())) - } else { - Ok(Some(OperationTokens { operator: token, operands, separators })) - } - } - TokenType::Whitespace => unreachable!("Function was called without first skipping whitespace."), - _ => Err(ParseError::Misc("Unexpected non-operator token at beginning of 'instruction'".to_string())) - } - None => Ok(None), - } -} - -// Returns Ok if operands parsed correctly and fills operands with Some(token) -// Otherwise, returns Err -fn parse_operands<'input, T>(tokens: &mut Peekable, separators: &mut Vec>, operands: &mut [Option>]) -> Result<(), ParseError> - where T: Iterator> -{ - for i in 0..operands.len() { - let operand = parse_ambiguous(tokens)?; - operands[i] = Some(operand); - if i < operands.len() - 1 { - let separator = parse_separator(tokens)?; - separators.extend(separator); - } - } - Ok(()) -} - -fn skip_and_collect_whitespace<'input, T>(tokens: &mut Peekable, whitespace: &mut Vec>) - where T: Iterator> -{ - let leading_whitespace = tokens.peeking_take_while(|&Token { ty, .. }| ty == TokenType::Whitespace); - whitespace.extend(leading_whitespace); -} - -fn parse_whitespace<'input, T>(tokens: &mut Peekable) -> Result>, ParseError> - where T: Iterator> -{ - let whitespace = tokens.peeking_take_while(|&Token { ty, .. }| ty == TokenType::Whitespace) - .collect::>(); - if whitespace.is_empty() { - Err(ParseError::Misc("Missing required whitespace.".to_string())) - } else { - Ok(whitespace) - } -} - -fn parse_separator<'input, T>(tokens: &mut Peekable) -> Result>, ParseError> - where T: Iterator> -{ - let separator = tokens.peeking_take_while(|&Token { ty, .. }| ty == TokenType::Whitespace || ty == TokenType::Comma) - .collect::>(); - let num_commas = separator.iter() - .filter(|&Token { ty, .. }| *ty == TokenType::Comma) - .count(); - if num_commas > 1 { - Err(ParseError::Misc("Too many comma separators.".to_string())) - } else if separator.is_empty() { - Err(ParseError::Misc("Missing separator.".to_string())) - } else { - Ok(separator) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::lexer::Lexer; - use crate::ir1_simple_lines::parse_simple_lines; - - #[test] - fn add() { - let lexer = Lexer::new("ADD R0, R0, R0"); - let simple_lines = parse_simple_lines(lexer); - let lines = parse_lines(simple_lines); - let Line { content, .. } = lines.get(0).unwrap(); - let matches = if let LineContent::Valid(None, Some(operation_tokens)) = content { - if let OperationTokens { operands: OperandTokens::Add { .. }, ..} = operation_tokens { - true - } else { false } - } else { false }; - assert!(matches); - } - - #[test] - fn labeled_add() { - let lexer = Lexer::new("LABEL\n\tADD R0, R1, #1"); - let simple_lines = parse_simple_lines(lexer); - let lines = parse_lines(simple_lines); - - let Line { content, .. } = lines.get(0).unwrap(); - let line_0_matches = if let LineContent::Valid(Some(_), None) = content { true } else { false }; - assert!(line_0_matches); - - let Line { content, .. } = lines.get(1).unwrap(); - let line_1_matches = if let LineContent::Valid(None, Some(operation_tokens)) = content { - if let OperationTokens { operands: OperandTokens::Add { .. }, .. } = operation_tokens { - true - } else { false } - } else { false }; - assert!(line_1_matches); - } - -} - diff --git a/assembler/src/ir3_unvalidated_objects.rs b/assembler/src/ir3_unvalidated_objects.rs deleted file mode 100644 index d64c3f0..0000000 --- a/assembler/src/ir3_unvalidated_objects.rs +++ /dev/null @@ -1,188 +0,0 @@ -use crate::lexer::Token; -use crate::ir2_lines::{OperationTokens, Label, Line, Lines, LineContent, OperandTokens}; -use std::iter::Peekable; -use std::mem; - -#[derive(Clone)] -pub struct UnvalidatedFile<'input> { - pub objects: Vec>, - pub ignored: Vec>, -} - -#[derive(Clone)] -pub struct UnvalidatedObject<'input> { - pub origin_src: UnvalidatedLine<'input>, - pub origin: Token<'input>, - pub content: UnvalidatedObjectContent<'input>, -} - -#[derive(Clone)] -pub struct UnvalidatedObjectContent<'input> { - pub operations: Vec>, - pub empty_lines: Vec>, - pub hanging_labels: Vec>, - pub invalid_lines: Vec>, -} - -#[derive(Clone)] -pub struct UnvalidatedLine<'input> { - pub src_lines: Vec, - pub label: Option>, - pub operation: OperationTokens<'input>, - pub whitespace: Vec>, - pub comments: Vec>, - pub newlines: Vec>, -} - -pub fn parse_unvalidated_file(lines: Lines) -> UnvalidatedFile { - let mut objects = Vec::new(); - let mut ignored = Vec::new(); - let mut lines = lines.into_iter().peekable(); - loop { - let maybe_line = lines.next(); - match maybe_line { - None => { break; }, - Some(line) => { - let line_backup = line.clone(); - match line { - Line { - content: LineContent::Valid(label, Some(operation)), - whitespace, comment, newline, src - } => { - if let OperationTokens { operands: OperandTokens::Orig { origin }, .. } = operation { - let mut comments = Vec::new(); - if let Some(comment) = comment { - comments.push(comment); - } - - let mut newlines = Vec::new(); - if let Some(newline) = newline { - newlines.push(newline); - } - let origin_src = UnvalidatedLine { src_lines: vec![src], label, operation, whitespace, comments, newlines }; - match parse_unvalidated_object_content(&mut lines) { - Ok(content) => { objects.push(UnvalidatedObject { origin_src, origin, content }); }, - Err(ObjectParseError { lines_seen, .. }) => { - ignored.push(line_backup); - ignored.extend(lines_seen); - }, - } - } else { - ignored.push(line_backup); - } - }, - line => { - ignored.push(line); - } - } - - } - } - } - UnvalidatedFile { objects, ignored } -} - -struct ObjectParseError<'input> { - lines_seen: Vec>, -} - -fn parse_unvalidated_object_content<'input, T>(lines: &mut Peekable) -> Result, ObjectParseError<'input>> - where T: Iterator> -{ - let mut operations = Vec::new(); - let mut empty_lines = Vec::new(); - let mut hanging_labels = Vec::new(); - let mut invalid_lines = Vec::new(); - - let mut lines_seen = Vec::new(); - let mut found_end = false; - - let mut hanging_label = None; - let mut src_lines = Vec::new(); - let mut whitespace = Vec::new(); - let mut comments = Vec::new(); - let mut newlines = Vec::new(); - - loop { - let maybe_line = lines.next(); - match maybe_line { - None => { break; } - Some(line) => { - lines_seen.push(line.clone()); - let line_backup = line.clone(); - - let Line { content, whitespace: line_whitespace, comment, newline, src } = line; - - if hanging_label.is_some() { - if let LineContent::Valid(None, _) = &content { - } else { - hanging_labels.push(hanging_label.take().unwrap()); - } - } - - match content { - LineContent::Invalid(_) => { invalid_lines.push(line_backup); } - LineContent::Valid(None, None) => { empty_lines.push(line_backup); }, - LineContent::Valid(Some(_), None) => { hanging_label = Some(line_backup); }, - LineContent::Valid(label, Some(operation)) => { - let label = if hanging_label.is_some() { - assert!(label.is_none()); - let Line { - content: label_content, - whitespace: label_whitespace, - comment: label_comment, - newline: label_newline, - src - } = hanging_label.take().unwrap(); - - whitespace.extend(label_whitespace); - src_lines.push(src); - if let Some(label_comment) = label_comment { comments.push(label_comment); } - if let Some(label_newline) = label_newline { newlines.push(label_newline); } - if let LineContent::Valid(label, None) = label_content { - label - } else { - unreachable!("Hanging label wasn't a line with only a label! Contact the maintainers."); - } - } else { - label - }; - - whitespace.extend(line_whitespace); - src_lines.push(src); - if let Some(comment) = comment { comments.push(comment); } - if let Some(newline) = newline { newlines.push(newline); } - let finished_src_lines = mem::replace(&mut src_lines, Vec::new()); - let finished_whitespace = mem::replace(&mut whitespace, Vec::new()); - let finished_comments = mem::replace(&mut comments, Vec::new()); - let finished_newlines = mem::replace(&mut newlines, Vec::new()); - if let OperationTokens { operands: OperandTokens::End, .. } = operation { - found_end = true; - } - let unvalidated_line = UnvalidatedLine { - label, - operation, - src_lines: finished_src_lines, - whitespace: finished_whitespace, - comments: finished_comments, - newlines: finished_newlines, - }; - operations.push(unvalidated_line); - if found_end { - break; - } - }, - } - } - } - } - - if found_end { - Ok(UnvalidatedObjectContent { operations, empty_lines, hanging_labels, invalid_lines }) - } else { - Err(ObjectParseError { - lines_seen - }) - } -} - diff --git a/assembler/src/layer.rs b/assembler/src/layer.rs new file mode 100644 index 0000000..e0e6675 --- /dev/null +++ b/assembler/src/layer.rs @@ -0,0 +1,50 @@ +//! Functions for combining blocks of LC-3 memory into an executable image. + +use lc3_isa::util::MemoryDump; +use lc3_isa::{ADDR_SPACE_SIZE_IN_WORDS, Word}; +use crate::error::SingleError; +use crate::link::Block; + +fn layer_block(image: &mut [Word; ADDR_SPACE_SIZE_IN_WORDS], block: Block) { + let Block { origin, words } = block; + let mut addr = origin as usize; + for word in words { + image[addr] = word; + addr += 1; + } +} + +/// Combine the given blocks of memory into an executable image by placing them in memory in the given order. +/// +/// Creating the memory image starts with all memory initialized to `0x0000`, +/// or if `layer_onto_os` is `true`, memory is initialized with the +/// [UTP LC-3 OS](https://github.com/ut-utp/core/tree/master/os). The +/// OS must be included for the UTP emulator to successfully load and execute the program. +/// +/// After the image is initialized, each given [`Block`](crate::link::Block) is +/// inserted into memory at its target address, in the given order. +/// No regard is given to addresses that have already been initialized; +/// each block will be layered on top of the image and overwrite its target addresses. +/// If two words in different blocks occupy the same memory location, +/// that location will contain the the second block's word in the end. +pub fn layer(blocks: impl IntoIterator, layer_onto_os: bool) -> Result { + let blocks = blocks.into_iter().collect::>(); + + let mut image = + if layer_onto_os { + let first_block = blocks.get(0).ok_or(SingleError::Layer)?; + + let mut os = lc3_os::OS_IMAGE.clone().0; + os[lc3_os::USER_PROG_START_ADDR as usize] = first_block.origin; + + os + } else { + [0; ADDR_SPACE_SIZE_IN_WORDS] + }; + + for block in blocks { + layer_block(&mut image, block); + } + + Ok(image.into()) +} diff --git a/assembler/src/lex.rs b/assembler/src/lex.rs new file mode 100644 index 0000000..ff52f62 --- /dev/null +++ b/assembler/src/lex.rs @@ -0,0 +1,634 @@ +//! Functions and data structures for lexing LC-3 assembly. +//! +//! Lexical analysis, or lexing, is the process of splitting a source string into a sequence of meaningful "tokens." +//! Each token is a small data structure which typically represents one "word" or punctuation mark +//! in the source code. Here's an example: +//! +//! ``` +//! # use lc3_assembler::LeniencyLevel; +//! # use lc3_assembler::lex::*; +//! # use lc3_assembler::lex::Token::*; +//! # use lc3_assembler::lex::Opcode::*; +//! # use lc3_isa::Reg::*; +//! # use lc3_assembler::lex::LiteralValue::*; +//! let source = "ADD R0, R0, #1; increment counter"; +//! let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); +//! assert_eq!(tokens, +//! vec![ +//! (Opcode(Add), 0.. 3), +//! (Register(R0), 4.. 6), +//! (Comma, 6.. 7), +//! (Register(R0), 8..10), +//! (Comma, 10..11), +//! (NumberLiteral(Word(1)), 12..14), +//! (Comment, 14..33), +//! ]); +//! ``` +//! +//! The string is split into seven [`Token`]s. For most of them, +//! each part separated by spaces or punctuation becomes its own token. +//! But really, tokens are based on what parts are significant; notice that the +//! entire comment is represented by one token, and there is no information +//! stored about what the comment said. This is because the content of comments +//! doesn't change the code that needs to be assembled. Maybe more obviously, +//! all of the spaces between the opcode and operands aren't represented in +//! the output tokens at all. They were only important for distinguishing separate tokens. +//! +//! Lexing only splits the string. It doesn't check whether the order of tokens makes sense. +//! For example, the following string is not valid LC-3, but it can be lexed successfully: +//! +//! ``` +//! # use lc3_assembler::LeniencyLevel; +//! # use lc3_assembler::lex::*; +//! # use lc3_assembler::lex::Token::*; +//! # use lc3_assembler::lex::Opcode::*; +//! # use lc3_isa::Reg::*; +//! # use lc3_assembler::lex::LiteralValue::*; +//! let source = "hello, world\n"; +//! let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); +//! assert_eq!(tokens, +//! vec![ +//! (Label("HELLO".to_string()), 0.. 5), +//! (Comma, 5.. 6), +//! (Label("WORLD".to_string()), 7..12), +//! (Newline, 12..13), +//! ]); +//! ``` +//! +//! [`lex`] also outputs the locations of the tokens in the source string as index ranges. +//! These are to help construct error messages which refer to specific locations in the source. +//! +//! +use chumsky::prelude::*; +use lc3_isa::{Addr, Reg, SignedWord, Word}; +use std::convert::{TryFrom, TryInto}; +use std::fmt::{Display, Formatter}; +use std::num::TryFromIntError; +use chumsky::Stream; + +use crate::Spanned; +use crate::LeniencyLevel; + +/// A unit representing a string of meaningful text in LC-3 assembly code. +/// +/// Produced by [`lex`]ing. See the [module-level documentation](crate::lex) for examples. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum Token { + /// An opcode, pseudo-op (**except `.END`**), or named TRAP routine. + Opcode(Opcode), + /// A register reference (e.g., `R0`). + Register(Reg), + /// An unqualified positive decimal number. Used as an officially required operand of `.BLKW`. + /// + /// # Examples + /// - `0` + /// - `10` + UnqualifiedNumberLiteral(Word), + /// A number literal, qualified with a base prefix (`#`, `b`, or `x`) and optional negative sign `-`. + /// + /// The qualifiers are used to calculate the numeric value during lexing and are not stored. + /// + /// # Examples + /// - `#-1` + /// - `x3000` + /// - `b0101` + NumberLiteral(LiteralValue), + /// A string literal (e.g., `"Hello, world!"`). + StringLiteral(String), + /// A label or label reference. + /// + /// Most alphanumeric strings which aren't reserved for other valid tokens + /// are valid labels, depending on the [`LeniencyLevel`](crate::LeniencyLevel) + /// used when [`lex`]ing. + Label(String), + + /// The `.END` pseudo-op. + /// + /// Not included as an [`Opcode`] because it denotes + /// the end of a program block. This makes it + /// useful for parsing to distinguish between `.END` + /// and instructions that can occur within a program block. + End, + + /// A newline. + /// + /// Matches line feeds, carriage returns, + /// and other types of vertical whitespace. + Newline, + /// A comma (`,`). + Comma, + /// A comment, including the leading semicolon. + Comment, + + /// Any string of characters which doesn't represent any other type of token. + Invalid, +} + +impl Display for Token { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +/// The numeric value represented by a number literal. +/// +/// Can be any unsigned or 2's-complement signed number with a width up to 16 bits. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum LiteralValue { + Word(Word), + SignedWord(SignedWord), +} + +impl TryFrom for Addr { + type Error = TryFromIntError; + + fn try_from(value: LiteralValue) -> Result { + match value { + LiteralValue::Word(word) => Ok(word), + LiteralValue::SignedWord(signed_word) => signed_word.try_into(), + } + } +} + +impl TryFrom for SignedWord { + type Error = TryFromIntError; + + fn try_from(value: LiteralValue) -> Result { + match value { + LiteralValue::Word(word) => word.try_into(), + LiteralValue::SignedWord(signed_word) => Ok(signed_word), + } + } +} + +impl TryFrom for u8 { + type Error = TryFromIntError; + + fn try_from(value: LiteralValue) -> Result { + match value { + LiteralValue::Word(word) => word.try_into(), + LiteralValue::SignedWord(signed_word) => signed_word.try_into(), + } + } +} + +/// The set of condition codes (`n`, `z`, and/or `p`) on which a `BR` opcode is conditioned. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct ConditionCodes { + pub(crate) n: bool, + pub(crate) z: bool, + pub(crate) p: bool, +} + +/// A specific LC-3 opcode, pseudo-op, or named TRAP routine. +/// +/// Does not include [`.END`](Token::End). +/// +/// Represents a *case-insensitive* string in the source code. +/// That is, [`Opcode::Add`] can represent `ADD`, `add`, or `Add`, etc. +/// All are treated as the same `Opcode`. Below, only the all-uppercase +/// option is listed for each `Opcode` variant. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum Opcode { + /// The opcode `ADD`. + Add, + /// The opcode `AND`. + And, + /// The opcode `BR`, conditioned on any combination of condition codes. + /// + /// # Examples + /// - `BR` + /// - `BRn` + /// - `BRzp` + Br(ConditionCodes), + /// The opcode `JMP`. + Jmp, + /// The opcode `JSR`. + Jsr, + /// The opcode `JSRR`. + Jsrr, + /// The opcode `LD`. + Ld, + /// The opcode `LDI`. + Ldi, + /// The opcode `LDR`. + Ldr, + /// The opcode `LEA`. + Lea, + /// The opcode `NOT`. + Not, + /// The opcode `RET`. + Ret, + /// The opcode `RTI`. + Rti, + /// The opcode `ST`. + St, + /// The opcode `STI`. + Sti, + /// The opcode `STR`. + Str, + /// The opcode `TRAP`. + Trap, + + // Pseudo-ops + /// The pseudo-op `.ORIG`. + Orig, + /// The pseudo-op `.FILL`. + Fill, + /// The pseudo-op `.BLKW`. + Blkw, + /// The pseudo-op `.STRINGZ`. + Stringz, + + // Named TRAP routines + /// The named TRAP routine `GETC`. + Getc, + /// The named TRAP routine `OUT`. + Out, + /// The named TRAP routine `PUTS`. + Puts, + /// The named TRAP routine `IN`. + In, + /// The named TRAP routine `PUTSP`. + Putsp, + /// The named TRAP routine `HALT`. + Halt, +} + +#[derive(Debug)] +enum CaseSensitivePassResult { + CaseInsensitiveSource(String), + CaseSensitiveToken(Token), +} + +fn number_literal_with_base(base: u32, prefix: char, leniency: LeniencyLevel) -> impl Parser> { + let strict_literal = + just(prefix) + .ignore_then(just('-').ignored().or_not()) + .then(text::digits(base)) + .try_map(move |(maybe_sign, digits): (Option<()>, String), span| { + let parse_result = if maybe_sign.is_some() { + SignedWord::from_str_radix(&format!("-{}", digits), base) + .map(LiteralValue::SignedWord) + } else { + Word::from_str_radix(&digits, base) + .map(LiteralValue::Word) + }; + parse_result.map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow + }); + let literal: Box>> = + match leniency { + LeniencyLevel::Lenient => Box::new(just("0").or_not().ignore_then(strict_literal)), + LeniencyLevel::Strict => Box::new(strict_literal), + }; + literal +} + +fn one_opcode(pattern: &'static str, output_opcode: Opcode) -> impl Parser> { + just_to(pattern, output_opcode) +} + +fn one_register(pattern: &'static str, output_reg: Reg) -> impl Parser> { + just_to(pattern, output_reg) +} + +fn just_to(pattern: &'static str, output: O) -> impl Parser> { + just(pattern).to(output) +} + +fn string_literal() -> impl Parser> { + // `escape` and `string_literal` are based on JSON parser example + // https://github.com/zesterer/chumsky/blob/d4102128315d9dbbea901a91dc5eaa0fc9a790f7/examples/json.rs#L39 + let escape = just::<_, _, Simple>('\\').ignore_then( + just('\\') + .or(just('"')) + .or(just('b').to('\x08')) + .or(just('f').to('\x0C')) + .or(just('n').to('\n')) + .or(just('r').to('\r')) + .or(just('t').to('\t')) + ); + + just('"') + .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated()) + .then_ignore(just('"')) + .collect::() + .map(Token::StringLiteral) +} + +fn comment() -> impl Parser> { + just(';') + .then(filter(|c| !is_newline(c)).repeated()) + .to(Token::Comment) +} + +fn branch_opcode(leniency: LeniencyLevel) -> impl Parser> { + let br = just::("BR"); + let res: Box>> = + match leniency { + LeniencyLevel::Lenient => Box::new( + br + .ignore_then(one_of("NZP").repeated().at_most(3)) + .map::(|cond_code_chars| { + let cond_codes = + if cond_code_chars.is_empty() { + ConditionCodes { n: true, z: true, p: true } + } else { + let n = cond_code_chars.contains(&'N'); + let z = cond_code_chars.contains(&'Z'); + let p = cond_code_chars.contains(&'P'); + ConditionCodes { n, z, p } + }; + Opcode::Br(cond_codes) + }), + ), + LeniencyLevel::Strict => Box::new( + br + .ignore_then(just("N").or_not()) + .then(just("Z").or_not()) + .then(just("P").or_not()) + .map::(|((n, z), p)| { + let cond_codes = + if n.is_none() && z.is_none() && p.is_none() { + ConditionCodes { n: true, z: true, p: true } + } else { + let n = n.is_some(); + let z = z.is_some(); + let p = p.is_some(); + ConditionCodes { n, z, p } + }; + Opcode::Br(cond_codes) + }), + ) + }; + res +} + +fn tokens(leniency: LeniencyLevel) -> impl Parser>, Error=Simple> { + let newline = text::newline() + .to(Token::Newline); + + let comma = just(',') + .to(Token::Comma); + + let non_newline_whitespace = + filter(|c: &char| c.is_whitespace() && !is_newline(c)).repeated(); + + let terminator = + filter(|c: &char| c.is_whitespace() || *c == ',' || *c == ';').ignored() + .or(end().ignored()); + + use Opcode::*; + + // These options are separated by `or` instead of all belonging + // to one tuple passed to `choice` because `choice` only supports + // tuples with up to 26 elements. + // The grouping by 'opcode type' was chosen arbitrarily. + let opcode = choice(( + one_opcode("ADD", Add), + one_opcode("AND", And), + branch_opcode(leniency), + one_opcode("JMP", Jmp), + one_opcode("JSRR", Jsrr), + one_opcode("JSR", Jsr), + one_opcode("LDI", Ldi), + one_opcode("LDR", Ldr), + one_opcode("LD", Ld), + one_opcode("LEA", Lea), + one_opcode("NOT", Not), + one_opcode("RET", Ret), + one_opcode("RTI", Rti), + one_opcode("STI", Sti), + one_opcode("STR", Str), + one_opcode("ST", St), + one_opcode("TRAP", Trap), + )) + .or(choice(( + one_opcode("GETC", Getc), + one_opcode("OUT", Out), + one_opcode("PUTSP", Putsp), + one_opcode("PUTS", Puts), + one_opcode("IN", In), + one_opcode("HALT", Halt), + ))) + .or(choice(( + one_opcode(".ORIG", Orig), + one_opcode(".FILL", Fill), + one_opcode(".BLKW", Blkw), + one_opcode(".STRINGZ", Stringz), + ))) + .then_ignore(terminator.clone().rewind()) + .map(Token::Opcode); + + let end_pseudo_op = just(".END") + .then_ignore(terminator.clone().rewind()) + .to(Token::End); + + use Reg::*; + let register = choice(( + one_register("R0", R0), + one_register("R1", R1), + one_register("R2", R2), + one_register("R3", R3), + one_register("R4", R4), + one_register("R5", R5), + one_register("R6", R6), + one_register("R7", R7), + )) + .then_ignore(terminator.clone().rewind()) + .map(Token::Register); + + let unqualified_number_literal_base = 10; + let unqualified_number_literal = text::digits(unqualified_number_literal_base) + .try_map(move |digits: String, span| { + Word::from_str_radix(&digits, unqualified_number_literal_base) + .map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow + }) + .then_ignore(terminator.clone().rewind()) + .map(Token::UnqualifiedNumberLiteral); + + let number_literal = choice(( + number_literal_with_base(2, 'B', leniency), + number_literal_with_base(10, '#', leniency), + number_literal_with_base(16, 'X', leniency), + )) + .then_ignore(terminator.clone().rewind()) + .map(Token::NumberLiteral); + + let label = text::ident() // C-style identifier. Follows all LC-3 label rules but allows arbitrary length and underscores. + .then_ignore(terminator.rewind()) + .map(Token::Label); + + let token = choice(( + opcode, + end_pseudo_op, + register, + number_literal, + unqualified_number_literal, + string_literal(), + label, + newline, + comma, + comment(), + )) + .recover_with(skip_until([',', ';', ' ', '\t', '\n', '\r', '\x0B', '\x0C', '\u{0085}', '\u{2028}', '\u{2029}'], |_| Token::Invalid)); // TODO: improve? + + token + .map_with_span(|token, span| (token, span)) + .padded_by(non_newline_whitespace) + .repeated() + .then_ignore(end()) +} + +fn is_newline(c: &char) -> bool { + // All line breaks matched by chumsky::text::newline + ['\n', + '\r', + '\x0B', // Vertical tab + '\x0C', // Form feed + '\u{0085}', // Next line + '\u{2028}', // Line separator + '\u{2029}', ].contains(c) // Paragraph separator +} + +fn case_sensitive_tokens() -> impl Parser>, Error=Simple> { + let case_sensitive_token = + choice(( + string_literal(), + comment() + )) + .map(|t| CaseSensitivePassResult::CaseSensitiveToken(t)); + + let case_insensitive_source = + filter(|c| *c != '"' && *c != ';').repeated().at_least(1) + .collect() + .map(|s| CaseSensitivePassResult::CaseInsensitiveSource(s)); + + case_insensitive_source + .or(case_sensitive_token) + .map_with_span(|cspr, s| (cspr, s)) + .repeated() + .then_ignore(end()) +} + +fn case_sensitive_pass(source: &str) -> (Option>>, Vec>) { + case_sensitive_tokens().parse_recovery_verbose(source) +} + +fn case_insensitive_pass(case_sensitive_pass_results: Vec>, leniency: LeniencyLevel) -> (Vec>, Vec>) { + let mut toks: Vec> = Vec::new(); + let mut errors = Vec::new(); + + for (cspr, span) in case_sensitive_pass_results { + match cspr { + CaseSensitivePassResult::CaseInsensitiveSource(s) => { + // TODO: profile CPU + memory to see whether this introduces any inefficiencies. + // This allows chumsky to correctly track spans while parsing this substring. + let uppercase_s = s.to_uppercase(); + let spanned_char_stream = uppercase_s.chars() + .enumerate() + .map(|(i, c)| { + let pos = span.start + i; + (c, pos..(pos + 1)) + }); + let stream = Stream::from_iter(span.end..(span.end + 1), spanned_char_stream); + let (maybe_tokens, lex_errors) = tokens(leniency).parse_recovery_verbose(stream); + + // TODO: confirm that fail case is impossible, or decide on how to handle. `recover_with` should prevent failure + if let Some(ts) = maybe_tokens { + toks.extend(ts); + } + errors.extend(lex_errors); + } + CaseSensitivePassResult::CaseSensitiveToken(t) => { + toks.push((t, span)); + } + } + } + + (toks, errors) +} + +/// Analysis data about the [`Token`]s output during [`lex`]ing. +/// +/// The result of performing some analysis on the tokens +/// after "lexing proper" is complete. Used to produce some error messages +/// during [semantic analysis](crate::analyze), after the tokens +/// have been consumed during the [`parse`](crate::parse) step. +pub struct LexData { + pub(crate) no_tokens: bool, + pub(crate) orig_present: bool, + pub(crate) end_present: bool, +} + +fn contains_token(tokens: &Vec>, token: Token) -> bool { + tokens.iter().any(|t| t.0 == token) +} + +/// Produce a sequence of [`Token`]s representative of the given source string. +/// +/// See the [module-level documentation](crate::lex) for general information and examples. +/// +/// This function also produces index ranges corresponding to each token's location +/// in the source string. It also analyzes the tokens and produces [`LexData`]. +/// Because the tokens are consumed by the [`parse`](crate::parse) step, this data saves the +/// information about the tokens which the [semantic analysis step](crate::analyze) +/// needs to produce some types of error messages. +pub fn lex(source: &str, leniency: LeniencyLevel) -> Result<(Vec>, LexData), Vec>> { + let (maybe_csprs, mut errors) = case_sensitive_pass(source); + let tokens = + maybe_csprs + .map(|csprs| { + let (maybe_tokens, cip_errors) = case_insensitive_pass(csprs, leniency); + errors.extend(cip_errors); + maybe_tokens + }); + + match tokens { + None => Err(errors), + Some(ts) => { + let no_tokens = ts.is_empty(); + let orig_present = contains_token(&ts, Token::Opcode(Opcode::Orig)); + let end_present = contains_token(&ts, Token::End); + let lex_data = LexData { no_tokens, orig_present, end_present }; + Ok((ts, lex_data)) + } + } +} + + +#[cfg(test)] +mod tests { + use super::*; + use Token::*; + use Reg::*; + use crate::lex::Opcode::*; + + #[test] + fn lone_error() { + let source = "#OOPS"; + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); + assert_eq!( + vec![ + (Invalid, 0..5), + ], + tokens); + } + + #[test] + fn error_in_context() { + let source = "ADD R0, R0, #OOPS; <- error"; + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); + assert_eq!( + vec![ + (Opcode(Add), 0.. 3), + (Register(R0), 4.. 6), + (Comma, 6.. 7), + (Register(R0), 8..10), + (Comma, 10..11), + (Invalid, 12..17), + (Comment, 17..27), + ], + tokens); + } +} \ No newline at end of file diff --git a/assembler/src/lexer.rs b/assembler/src/lexer.rs deleted file mode 100644 index 93158b0..0000000 --- a/assembler/src/lexer.rs +++ /dev/null @@ -1,218 +0,0 @@ -use regex::Regex; - -pub type Span = (usize, usize); - -#[derive(Debug, Clone, Copy, PartialEq)] -pub struct Token<'input> { - pub src: &'input str, - pub span: Span, - pub ty: TokenType, -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum TokenType { - // Insignificant Whitespace - Whitespace, - - Op(Op), - - // String Literals - // Numeric literals starting with x can't be disambiguated from labels, - // so we'll do that later based on position. - String, - - // Comments - Comment, - - // Punctuation - Comma, - Newline, - - // Chunk of non-whitespace, non-comma, non-semicolon text. - // Used as a catch-all for tokens that need to be disambiguated at parse-time, - // for example, labels and hex literals which may both start with 'x'. - // In more general terms: labels and operands. - Ambiguous, -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Op { - Opcode(Opcode), - NamedTrap(NamedTrap), - PseudoOp(PseudoOp), -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Opcode { - Add, - And, - Br, - Jmp, - Jsr, - Jsrr, - Ld, - Ldi, - Ldr, - Lea, - Not, - Ret, - Rti, - St, - Sti, - Str, - Trap, -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum PseudoOp { - Orig, - Fill, - Blkw, - Stringz, - End, -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum NamedTrap { - Getc, - Out, - Puts, - In, - Putsp, - Halt, -} - -pub struct Lexer<'input> { - src: &'input str, - patterns: Vec<(Regex, TokenType)>, - cur_pos: usize, -} - -use TokenType::*; -use Opcode::*; -use NamedTrap::*; -use PseudoOp::*; - -impl<'input> Lexer<'input> { - - // The lexer tries to find these patterns in this order. - // Registering a pattern will automatically append some stuff to the regex. - // Notably, it will add ^ to the beginning to ensure that it grabs tokens - // from the beginning of the slice it's examining, so don't use ^. - const PATTERNS: [(&'static str, TokenType); 34] = [ - (r"[^\S\r\n]+", Whitespace), - - (r"ADD", Op(Op::Opcode(Add))), - (r"AND", Op(Op::Opcode(And))), - (r"BRn?z?p?", Op(Op::Opcode(Br))), - (r"JMP", Op(Op::Opcode(Jmp))), - (r"JSRR", Op(Op::Opcode(Jsrr))), - (r"JSR", Op(Op::Opcode(Jsr))), - (r"LDI", Op(Op::Opcode(Ldi))), - (r"LDR", Op(Op::Opcode(Ldr))), - (r"LD", Op(Op::Opcode(Ld))), - (r"LEA", Op(Op::Opcode(Lea))), - (r"NOT", Op(Op::Opcode(Not))), - (r"RET", Op(Op::Opcode(Ret))), - (r"RTI", Op(Op::Opcode(Rti))), - (r"STI", Op(Op::Opcode(Sti))), - (r"STR", Op(Op::Opcode(Str))), - (r"ST", Op(Op::Opcode(St))), - (r"TRAP", Op(Op::Opcode(Trap))), - - (r"GETC", Op(Op::NamedTrap(Getc))), - (r"OUT", Op(Op::NamedTrap(Out))), - (r"PUTS", Op(Op::NamedTrap(Puts))), - (r"IN", Op(Op::NamedTrap(In))), - (r"PUTSP", Op(Op::NamedTrap(Putsp))), - (r"HALT", Op(Op::NamedTrap(Halt))), - - (r".ORIG", Op(Op::PseudoOp(Orig))), - (r".FILL", Op(Op::PseudoOp(Fill))), - (r".BLKW", Op(Op::PseudoOp(Blkw))), - (r".STRINGZ", Op(Op::PseudoOp(Stringz))), - (r".END", Op(Op::PseudoOp(End))), - - (r#""([^"\\]|\\.)*""#, String),// quotes with any number of non-quote/backslash chars *or* arbitrary chars escaped with backslashes in between. - - (r";.*", Comment), // semicolon followed by any number of chars that aren't newlines. - - (r",", Comma), - (r"(\r\n|\r|\n)", Newline), - - (r"[^\s,;]+", Ambiguous), // At least one non-whitespace, non-comma, non-semicolon character. - ]; - - pub fn new(src: &'input str) -> Lexer<'input> { - let mut this = Lexer { - src, - patterns: Vec::new(), - cur_pos: 0, - }; - - for (pattern, token_type) in Self::PATTERNS.iter() { - this.register_pattern(pattern, *token_type); - } - - this - } - - fn register_pattern(&mut self, pattern: &str, token_type: TokenType) - { - assert!(!pattern.starts_with("^")); - let pattern = format!("^(?i){}", pattern); - let regex = Regex::new(pattern.as_str()).expect("Invalid regex"); - self.patterns.push((regex, token_type)) - } - - fn tail(&self) -> &'input str { - &self.src[self.cur_pos..] - } - -} - - -impl<'input> Iterator for Lexer<'input> { - type Item = Token<'input>; - - fn next(&mut self) -> Option { - let start = self.cur_pos; - if self.src.len() <= start { - return None; - } - - let mut munches = Vec::new(); - for (pattern, token_type) in &self.patterns { - if let Some(match_) = pattern.find(self.tail()) { - munches.push((match_, *token_type)); - } - } - let (max_munch, token_type) = munches.iter() - .rev() // we want to break ties based on order in self.patterns, but max_by_key returns last match - .max_by_key(|munch| munch.0.end()) - .expect("The lexer could not recognize some character pattern you provided. Please contact the maintainers."); // TODO: handle gracefully? - - self.cur_pos += max_munch.end(); - let token = Token { - src: max_munch.as_str(), - span: (start, self.cur_pos), - ty: *token_type, - }; - return Some(token); - - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_simple() { // TODO: actually assert some stuff - let input = ".ORIG x3000\nTEST add R0, R0, R0; Tokenize me, cap'n!\nBRnzp TEST\nHALT\n.END"; - let lexer = Lexer::new(input); - for item in lexer { - println!("{:?}", item); - } - } -} diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index b776a89..bdfcf5b 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -1,37 +1,292 @@ - // TODO: docs -// TODO: denys // TODO: docs URL +#![doc = include_str!("../README.md")] + +// TODO: add more lints? +#![deny(unused)] + +use std::convert::{TryFrom, TryInto}; +use std::fmt::Debug; +use std::fs; +use std::path::PathBuf; + +mod util; pub mod error; +pub mod lex; +pub mod parse; +pub mod analyze; +pub mod assemble; +pub mod link; +pub mod layer; -pub mod lexer; +type Span = std::ops::Range; +type Spanned = (T, Span); -pub mod parser; -pub mod ir1_simple_lines; -pub mod ir2_lines; -pub mod ir3_unvalidated_objects; -pub mod cst; -pub mod expanded; +/// A parsed syntax element, or an error if it was skipped, +/// along with any other data necessary to produce an error indicating this syntax element. +pub type WithErrData = Spanned>; -pub mod complete; +fn get(v: &Vec>, i: usize) -> Option<&T> { + v.get(i) + .and_then(|res| get_result(res).as_ref().ok()) +} -pub mod assembler; +fn get_result(v: &WithErrData) -> &Result { + &v.0 +} +fn result(v: WithErrData) -> Result { + v.0 +} + +fn try_result(maybe_v: Option>) -> Result { + result(maybe_v.ok_or(())?) +} + +fn try_map(maybe_v: Option>) -> Result where + U: TryFrom +{ + try_result(maybe_v)? + .try_into() + .map_err(|_| ()) +} -#[cfg(test)] -mod tests { - use super::*; - use lexer::Lexer; - use crate::parser::parse; - use crate::parser::LeniencyLevel::Lenient; +/// An identifier for a unique source file. Produced by [`id`]. +/// +/// Used in error messages to indicate +/// which source file the text in the error is from. +pub type SourceId = String; - #[test] - fn simple() { - let src = ".ORIG x3000;\nLABEL ADD R0, R0, #0\n.end"; - let tokens = Lexer::new(src); - let cst = parse(tokens, Lenient); - println!("{:?}", cst); + +/// Data indicating a substring in a specific source file. +/// +/// Primarily used to identify the exact source code which caused an error. +#[derive(Debug, Clone)] +pub struct SpanWithSource { + id: SourceId, + span: Span, +} + +impl From<(SourceId, Span)> for SpanWithSource { + fn from((id, span): (SourceId, Span)) -> Self { + Self { id, span } } +} + +impl ariadne::Span for SpanWithSource { + type SourceId = SourceId; + + fn source(&self) -> &Self::SourceId { &self.id } + fn start(&self) -> usize { self.span.start } + fn end(&self) -> usize { self.span.end } +} + +/// The level of leniency to be used when parsing and identifying errors. +/// +/// Officially, LC-3 assembly follows strict syntax rules which can be inconvenient. +/// For example, labels officially cannot exceed 20 characters. +/// To enforce these rules, use [`LeniencyLevel::Strict`]. +/// +/// [`LeniencyLevel::Strict`] enforces the following: +/// - Labels cannot contain underscores +/// - Labels cannot exceed 20 characters in length +/// - Labels must be defined on the same line as an instruction, not separately on a previous line +/// - Qualified number literals cannot be prefixed with `0` (i.e., `0x3000` is not allowed, only `x3000`) +/// - Operands must be separated with commas (`,`), not just whitespace. +/// - Condition codes for BR instructions *must* be listed in the following order: `n`, `z`, then `p`. +// NOTE TO DEVS (THIS SHOULD NOT BE IN THE DOCS): +// When updating this list, remember to update the command line app's list. +#[derive(Copy, Clone)] +pub enum LeniencyLevel { + /// Indicates that all convenience features are to be allowed. + Lenient, + + /// Indicates that all official rules of the LC-3 assembly language + /// are to be followed, as described in *Introduction to Computing Systems: from Bits & Gates to C/C++ & Beyond (3rd ed.)*, + /// by Patt and Patel. + Strict +} + + +/// Read and cache the given source files for use in printing error messages. +/// +/// To print error messages correctly, this function must +/// be given the same paths to the source files which were +/// input to the function which returned the errors, +/// then the resulting cache must be passed to the error printing function. +/// +/// This function reads all the given source files to memory, +/// so be aware that large files may cause significant memory usage. +pub fn sources(iter: impl IntoIterator) -> Result, std::io::Error> { + let sources = iter.into_iter() + .map(|input| Ok((id(&input), read(&input)?))) + .collect::, std::io::Error>>()?; + Ok(ariadne::sources(sources)) +} + + +/// Read all of the given file. +pub fn read(input: &PathBuf) -> Result { + fs::read_to_string(input.clone()) +} + + +/// Get a [`SourceId`] for the given source file. +/// +/// If working with source code that isn't from a file, +/// you may use the id of an arbitrary path (even `""`) for +/// functions requiring a [`SourceId`]. +pub fn id(input: &PathBuf) -> SourceId { + input.to_string_lossy().to_string() +} + + +/// Check whether the given file contains valid LC-3 assembly code. +/// +/// Reads the given file, then parses and analyzes its contents for errors, +/// returning a syntax tree if successful, +/// and otherwise, the errors that were found. +/// +/// # Examples +/// ## Success +/// `add.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/add.asm")] +/// ``` +/// ```ignore +/// # use lc3_assembler::*; +/// let src_path = std::path::PathBuf::from("../docs/tests/add.asm"); +/// let result = parse_and_analyze_file(&src_path, LeniencyLevel::Lenient); +/// assert!(result.is_ok()); +/// ``` +/// +/// ## Error +/// `bad_operand.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/bad_operand.asm")] +/// ``` +/// ```ignore +/// # use lc3_assembler::*; +/// use assert_matches::assert_matches; +/// let src_path = std::path::PathBuf::from("../docs/tests/bad_operand.asm"); +/// let error = parse_and_analyze_file(&src_path, LeniencyLevel::Lenient).unwrap_err(); +/// let first_error = error.get_first_single_error().unwrap(); +/// assert_matches!(first_error, error::SingleError::BadOperand); +/// ``` +pub fn parse_and_analyze_file(input: &PathBuf, leniency: LeniencyLevel) -> Result { + let id = id(&input); + let src = read(input).map_err(|e| (id.clone(), e))?; + parse_and_analyze(&id, &src, leniency) +} + + +/// Check whether the given `String` is valid LC-3 assembly code. +/// +/// Parses, then analyzes the given `String` for errors, +/// returning a syntax tree if successful, +/// and otherwise, the errors that were found. +/// +#[doc = include_str!("../docs/id_arg.md")] +/// +/// # Examples +/// ## Success +/// `add.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/add.asm")] +/// ``` +/// ``` +/// # use lc3_assembler::*; +/// let src = include_str!("../docs/tests/add.asm").to_string(); +/// let src_id = id(&std::path::PathBuf::from("../docs/tests/add.asm")); +/// let result = parse_and_analyze(&src_id, &src, LeniencyLevel::Lenient); +/// assert!(result.is_ok()); +/// ``` +/// +/// ## Error +/// `bad_operand.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/bad_operand.asm")] +/// ``` +/// ``` +/// # use lc3_assembler::*; +/// use assert_matches::assert_matches; +/// let src = include_str!("../docs/tests/bad_operand.asm").to_string(); +/// let src_id = id(&std::path::PathBuf::from("../docs/tests/bad_operand.asm")); +/// let error = parse_and_analyze(&src_id, &src, LeniencyLevel::Lenient).unwrap_err(); +/// let first_error = error.get_first_single_error().unwrap(); +/// assert_matches!(first_error, error::SingleError::BadOperand); +/// ``` +pub fn parse_and_analyze(id: &SourceId, src: &String, leniency: LeniencyLevel) -> Result { + let (tokens, lex_data) = lex::lex(src, leniency).map_err(|es| error::into_multiple(id.clone(), es))?; + let file_spanned = parse::parse(id.clone(), src, tokens, leniency).map_err(|es| error::into_multiple(id.clone(), es))?; + let errors = analyze::validate(&lex_data, &file_spanned, leniency); + if !errors.is_empty() { + return Err(errors.into()); + } + let (file, _) = file_spanned; + Ok(file) +} + + +/// Fully assemble the contents of the given file. +/// +/// Reads the given file, then parses, analyzes, assembles, and links its contents, +/// returning an LC-3 executable image if successful, +/// and otherwise, the error(s) that were found. +/// +#[doc = include_str!("../docs/no_os_arg.md")] +/// +/// # Examples +/// `add.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/add.asm")] +/// ``` +/// ```ignore +/// # use lc3_assembler::*; +/// # fn main() -> Result<(), error::Error> { +/// let src_path = std::path::PathBuf::from("../docs/tests/add.asm"); +/// let mem = assemble_file(&src_path, LeniencyLevel::Lenient, false)?; +/// assert_eq!(mem[0x3000], 0x1000); +/// # Ok(()) +/// # } +/// ``` +pub fn assemble_file(input: &PathBuf, leniency: LeniencyLevel, no_os: bool) -> Result { + let id = id(&input); + let src = read(input).map_err(|e| (id.clone(), e))?; + assemble(&id, &src, leniency, no_os) +} + +/// Fully assemble the given `String`. +/// +/// Parses, analyzes, assembles, then links the given `String`, +/// returning an LC-3 executable image if successful, +/// and otherwise, the error(s) that were found. +/// +#[doc = include_str!("../docs/id_arg.md")] +/// +#[doc = include_str!("../docs/no_os_arg.md")] +/// +/// # Examples +/// `add.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/add.asm")] +/// ``` +/// ``` +/// # use lc3_assembler::*; +/// # fn main() -> Result<(), error::Error> { +/// let src = include_str!("../docs/tests/add.asm").to_string(); +/// let src_id = id(&std::path::PathBuf::from("../docs/tests/add.asm")); +/// let mem = assemble(&src_id, &src, LeniencyLevel::Lenient, false)?; +/// assert_eq!(mem[0x3000], 0x1000); +/// # Ok(()) +/// # } +/// ``` +pub fn assemble(id: &SourceId, src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { + let file = parse_and_analyze(id, src, leniency)?; + let assemble::Object { symbol_table, blocks } = assemble::assemble(file).map_err(|_| (id.clone(), error::SingleError::Assemble))?; + let blocks = link::link_object_blocks(&symbol_table, blocks).map_err(|e| (id.clone(), e))?; + let mem = layer::layer(blocks, !no_os).map_err(|e| (id.clone(), e))?; + Ok(mem) } diff --git a/assembler/src/link.rs b/assembler/src/link.rs new file mode 100644 index 0000000..b553d2f --- /dev/null +++ b/assembler/src/link.rs @@ -0,0 +1,90 @@ +//! Functions and data structures for linking [`Object`](crate::assemble::Object)s +//! produced by [initial assembly](crate::assemble::assemble). +//! +//! Linking is the process of assembling instructions in an object which +//! refer to labels in other objects. When writing an LC-3 program, this +//! allows referencing code which *other* programmers have assembled and +//! distributed as objects. +//! +//! This module assumes that all objects share a global namespace for labels. +//! **Linking two or more objects which each define the same label will result in undefined behavior.** + +use std::collections::HashMap; +use lc3_isa::{Addr, Word}; +use crate::assemble::{assemble_instruction, AssemblyResult, Object, ObjectWord, ObjectBlock, SymbolTable}; +use crate::error::SingleError; + +/// A block of LC-3 words and a target starting memory address. +/// +/// `origin` is the intended address of the first word in `words` when loading the block. +pub struct Block { + pub origin: Addr, + pub words: Vec, +} + +fn link_object_block(symbol_table: &SymbolTable, block: ObjectBlock) -> Result { + let mut words = Vec::new(); + let ObjectBlock { origin, words: object_words, .. } = block; + let mut location_counter = origin; + for object_word in object_words { + match object_word { + ObjectWord::Value(word) => { + words.push(word); + location_counter += 1; + }, + ObjectWord::UnlinkedInstruction(instruction) => + match assemble_instruction(&symbol_table, &location_counter, instruction).map_err(|_| SingleError::Link)? { + AssemblyResult::SingleObjectWord(word) => match word { + ObjectWord::Value(word) => { + words.push(word); + location_counter += 1; + } + ObjectWord::UnlinkedInstruction(_) => { return Err(SingleError::Link); } + } + AssemblyResult::MultipleObjectWords(ows) => { + let ws = ows.into_iter() + .map(|ow| match ow { + ObjectWord::Value(word) => Ok(word), + ObjectWord::UnlinkedInstruction(_) => Err(SingleError::Link), + }) + .collect::, SingleError>>()?; + location_counter += ws.len() as u16; + words.extend(ws); + } + } + } + } + Ok(Block { origin, words }) +} + +pub(crate) fn link_object_blocks(symbol_table: &SymbolTable, blocks: Vec) -> Result, SingleError> { + blocks.into_iter() + .map(|block| link_object_block(symbol_table, block)) + .collect() +} + + +/// Links a set of [`Object`](crate::assemble::Object)s to finish +/// assembling them into a set of loadable memory blocks. +/// +/// See the [module-level documentation](crate::link) for details on the linking process. +pub fn link(objects: impl IntoIterator) -> Result, SingleError> { + let objects = objects.into_iter().collect::>(); + + let mut global_symbol_table = HashMap::new(); + for object in objects.iter() { + for (label, addr) in object.symbol_table.iter() { + global_symbol_table.insert(label.clone(), *addr); + } + } + + let blocks = + objects.into_iter() + .map(|object| link_object_blocks(&mut global_symbol_table, object.blocks)) + .collect::>, SingleError>>()? + .into_iter() + .flatten() + .collect(); + + Ok(blocks) +} diff --git a/assembler/src/parse.rs b/assembler/src/parse.rs new file mode 100644 index 0000000..4045ed3 --- /dev/null +++ b/assembler/src/parse.rs @@ -0,0 +1,402 @@ +//! Functions and data structures for parsing LC-3 assembly. +//! +//! Parsing, or syntactic analysis, tries to structure the sequence of tokens produced by [lexing](crate::lex). +//! Tokens between `.ORIG` and `.END` tokens are structured into programs. +//! Within those programs, tokens between newlines are structured into instructions. +//! The result is a [`File`], or syntax tree, corresponding to a single source file. +//! In other words, parsing is where the assembler +//! tries to make sense of the order of the tokens. Here's an example: +//! +//! ``` +//! # use lc3_assembler::id; +//! # use lc3_assembler::LeniencyLevel; +//! # use lc3_assembler::lex::lex; +//! # use lc3_assembler::parse::*; +//! # use lc3_assembler::parse::Operand::*; +//! # use lc3_assembler::lex::Opcode::*; +//! # use lc3_assembler::lex::LiteralValue; +//! # use lc3_isa::Reg::*; +//! # use self::*; +//! let id = id(&std::path::PathBuf::from("")); +//! let source = ".ORIG x3000\nADDING ADD R0, R0, #1\n.END"; +//! let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); +//! let (file, _) = parse(id, source, tokens, LeniencyLevel::Lenient).unwrap(); +//! +//! assert_eq!(file.blocks, +//! vec![(Ok(ProgramBlock { +//! orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), +//! instructions: vec![ +//! (Ok(Instruction { +//! label: Some((Ok("ADDING".to_string()), 12..18)), +//! opcode: (Ok(Add), 19..22), +//! operands: (Ok(vec![ +//! (Ok(Register(R0)), 23..25), +//! (Ok(Register(R0)), 27..29), +//! (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33) +//! ]), 23..33) +//! }), 12..33) +//! ], +//! }), 0..38)]); +//! ``` +//! +//! Often times, the order of tokens may be invalid, but mostly correct. For example, +//! the source code may include an invalid token where a label is expected: +//! +//! ``` +//! # use lc3_assembler::id; +//! # use lc3_assembler::LeniencyLevel; +//! # use lc3_assembler::lex::lex; +//! # use lc3_assembler::parse::*; +//! # use lc3_assembler::parse::Operand::*; +//! # use lc3_assembler::lex::Opcode::*; +//! # use lc3_assembler::lex::LiteralValue; +//! # use lc3_isa::Reg::*; +//! # use self::*; +//! let id = id(&std::path::PathBuf::from("")); +//! let source = ".ORIG x3000\nA%DDER ADD R0, R0, #1\n.END"; +//! let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); +//! let (file, _) = parse(id, source, tokens, LeniencyLevel::Lenient).unwrap(); +//! +//! assert_eq!(file.blocks, +//! vec![(Ok(ProgramBlock { +//! orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), +//! instructions: vec![ +//! (Ok(Instruction { +//! label: Some((Err(()), 12..18)), // <-- Error here! +//! opcode: (Ok(Add), 19..22), // But everything else parses successfully, +//! operands: (Ok(vec![ // or at least reasonably. +//! (Ok(Register(R0)), 23..25), +//! (Ok(Register(R0)), 27..29), +//! (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33) +//! ]), 23..33) +//! }), 12..33) +//! ], +//! }), 0..38)]); +//! ``` +//! +//! +//! [`parse`] is designed to recover when it encounters a token which is out of order. It replaces +//! the smallest possible part of the syntax tree with an error and tries to make +//! a reasonable guess about where to continue. In the example above, it assumes +//! that the invalid token was supposed to be a label, discards it, and checks for an opcode +//! next. In this way, [`parse`] attempts to produce a syntax tree for any input, +//! valid *or invalid*, but the tree will contain location-specific parse errors +//! which [the semantic analysis step](crate::analyze) can try and determine the cause of. +//! By trying to recover, [`parse`] can produce multiple errors instead of +//! failing at a single early error, and semantic analysis can provide clear reasons +//! for some errors. +//! +//! However, including error data for potentially any element +//! in the syntax tree makes the tree more complex. This is why +//! the examples above have so much "noise" in addition to the main data. +//! Most elements of the syntax tree are paired with error data using +//! [`WithErrData`](crate::WithErrData). We use this type to abstract away +//! the error data and make clearer which syntax elements comprise the tree, +//! at least when working with it in code. + +use std::convert::TryFrom; +use chumsky::combinator::Repeated; +use chumsky::prelude::*; +use chumsky::primitive::NoneOf; +use chumsky::Stream; +use lc3_isa::{Reg, Word}; + +use crate::{SourceId, Spanned, WithErrData}; +use crate::LeniencyLevel; +use crate::lex::{LiteralValue, Opcode, Token}; + +/// A representation of a LC-3 assembly file structured based on correct syntax. The root of the syntax tree. +/// +/// Produced by [`parse`]. +/// +/// This assembler allows multiple "program blocks" in the same file, +/// as long as they wouldn't overlap in memory. They must be assembled +/// together and can reference each other's labels. This part of the syntax +/// tree therefore stores a list of program blocks. +#[derive(Debug)] +pub struct File { + pub(crate) id: SourceId, + #[allow(dead_code)] + pub(crate) before_first_orig: Spanned>, // TODO: check that this only contains newlines and comments (at least if strict) + pub blocks: Vec> +} + +/// A representation of an LC-3 assembly program block, starting with `.ORIG` and ending with `.END`. +#[derive(Debug, Eq, PartialEq)] +pub struct ProgramBlock { + pub orig: WithErrData>>, + pub instructions: Vec>, +} + +/// A representation of an LC-3 assembly instruction. +/// +/// When produced by [`parse`], may contain any number or types of operands. +/// Operands are just parsed as an arbitrarily long list, no matter what opcode +/// was used. The number and types of operands are validated during [semantic analysis](crate::analyze). +#[derive(Debug, Eq, PartialEq)] +pub struct Instruction { + pub label: Option>, + pub opcode: WithErrData, + pub operands: WithErrData>>, +} + +/// An operand of an LC-3 assembly instruction. +/// +/// Each variant directly corresponds to a specific [`Token`](crate::lex::Token) variant, +/// noted below. See the [`Token`] documentation for descriptions and examples of each. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Operand { + /// Corresponds to [`Token::Register`](crate::lex::Token::Register). + Register(Reg), + /// Corresponds to [`Token::UnqualifiedNumberLiteral`](crate::lex::Token::UnqualifiedNumberLiteral). + UnqualifiedNumberLiteral(Word), + /// Corresponds to [`Token::NumberLiteral`](crate::lex::Token::NumberLiteral). + NumberLiteral(LiteralValue), + /// Corresponds to [`Token::StringLiteral`](crate::lex::Token::StringLiteral). + StringLiteral(String), + /// Corresponds to [`Token::Label`](crate::lex::Token::Label). + Label(String), +} + +impl TryFrom for Reg { + type Error = (); + + fn try_from(e: Operand) -> Result { + if let Operand::Register(r) = e { + Ok(r) + } else { + Err(()) + } + } +} + +impl TryFrom for LiteralValue { + type Error = (); + + fn try_from(e: Operand) -> Result { + if let Operand::NumberLiteral(v) = e { + Ok(v) + } else { + Err(()) + } + } +} + +impl Operand { + pub(crate) fn get_string(self) -> Option { + if let Self::StringLiteral(s) = self { + Some(s) + } else { + None + } + } + + pub(crate) fn get_label(self) -> Option { + if let Self::Label(l) = self { + Some(l) + } else { + None + } + } + + pub(crate) fn get_unqualified_number_value(self) -> Option { + if let Self::UnqualifiedNumberLiteral(w) = self { + Some(w) + } else { + None + } + } +} + +fn operand() -> impl Parser, Error = Simple> { + + let operand = filter_map(move |span, t: Token| + match t.clone() { + Token::Register(reg) => Ok(Ok(Operand::Register(reg))), + Token::UnqualifiedNumberLiteral(val) => Ok(Ok(Operand::UnqualifiedNumberLiteral(val))), + Token::NumberLiteral(val) => Ok(Ok(Operand::NumberLiteral(val))), + Token::StringLiteral(s) => Ok(Ok(Operand::StringLiteral(s))), + Token::Label(s) => Ok(Ok(Operand::Label(s))), + Token::Opcode(_) + | Token::End + | Token::Invalid => Ok(Err(())), + _ => Err(Simple::expected_input_found(span, None, Some(t))) + } + ); + operand.map_with_span(|o, span| (o, span)) +} + +fn operands(leniency: LeniencyLevel) -> impl Parser>>, Error = Simple> { + let operand_separator: Box>> = + match leniency { + LeniencyLevel::Lenient => Box::new(just(Token::Comma).or_not().ignored()), + LeniencyLevel::Strict => Box::new(just(Token::Comma).ignored()), + }; + + operand() + .separated_by(operand_separator) + .map_with_span(|os, span| (Ok(os), span)) +} + +fn instruction(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { + let label = + select! { + Token::Label(s) => Ok(s), + Token::Invalid => Err(()) + } + .map_with_span(|l, s| (l, s)); + + let opcode = + filter_map(move |span, t: Token| + match t.clone() { + Token::Opcode(o) => Ok(Ok(o)), + Token::Invalid => Ok(Err(())), + _ => Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected + }) + .map_with_span(|o, span| (o, span)); + + let terminator = + just(Token::Comment).or_not() + .then(just(Token::Newline).ignored().or(end())) + .ignored(); + + let label_and_separator: Box>, Error=Simple>> = + match leniency { + LeniencyLevel::Lenient => + Box::new(label.or_not() + .then_ignore(comments_and_newlines().or_not())), + LeniencyLevel::Strict => + Box::new(label.or_not()), + }; + + label_and_separator + .then(opcode) + .then(operands(leniency)) + .then_ignore(terminator.rewind()) + .map_with_span(|((l, o), os), span| { + let instruction = Instruction { + label: l, + opcode: o, + operands: os, + }; + (Ok(instruction), span) + }) + // Pseudo-recovery strategy -- take everything until the end of the line. Consider replacing with `recover_via` if merged into `chumsky` + .or(none_of([Token::End, Token::Comment, Token::Newline]).repeated().at_least(1) + .map_with_span(|_, span| (Err(()), span))) +} + + +fn comments_and_newlines() -> impl Parser> { + just(Token::Comment).or_not() + .then(just(Token::Newline).repeated().at_least(1)) + .repeated().at_least(1) + .ignored() +} + +fn everything_until_orig() -> Repeated>> { + none_of(Token::Opcode(Opcode::Orig)).repeated() +} + +fn program_block(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { + let orig = + just(Token::Opcode(Opcode::Orig)) + .ignore_then(operands(leniency)); + + orig + .then( + instruction(leniency) + .separated_by(comments_and_newlines()) + .allow_leading() + .allow_trailing() + ) + .then_ignore(just::<_, Token, _>(Token::End)) + .map_with_span(|(orig, instructions), span| { + (Ok(ProgramBlock { orig, instructions }), span) + }) + // Pseudo-recovery strategy -- take everything until next .ORIG + .or(any().then(everything_until_orig()) + .map_with_span(|_, span| (Err(()), span))) +} + +fn file(id: SourceId, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { + everything_until_orig() + .map_with_span(|toks, span| (toks, span)) + .then( + program_block(leniency) + .separated_by(everything_until_orig()) + .allow_trailing() + ) + .then_ignore(end()) + .map_with_span(move |(before_first_orig, blocks), span| + (File { id: id.clone(), before_first_orig, blocks }, span)) +} + +/// Produce a [`File`] (syntax tree) representative of the given tokens. +/// +/// See the [module-level documentation](crate::parse) for general information and examples. +/// +/// `tokens` must be the tokens produced by [`lex`](crate::lex::lex)ing `src`. +/// For the best errors, `id` should also correspond to the same source, but if (for example) the source is not +/// from a file, this isn't strictly necessary. +pub fn parse(id: SourceId, src: &str, tokens: Vec>, leniency: LeniencyLevel) -> Result, Vec>> { + let len = src.chars().count(); + let (maybe_file, errors) = + file(id, leniency) + .parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())); + + maybe_file.ok_or(errors) +} + + +#[cfg(test)] +mod tests { + use super::*; + use super::Operand::*; + use super::Reg::*; + use super::Opcode::*; + use crate::lex::lex; + + #[test] + fn capture_tokens_before_first_orig_separately() { + let source = "%some #random junk .ORIG x3000\nADD R0, R0, R0\n.END"; + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); + let file = parse("".to_string(), source, tokens, LeniencyLevel::Lenient).unwrap(); + + assert_eq!((vec![Token::Invalid, Token::Invalid, Token::Label("JUNK".to_string())], 0..18), + file.0.before_first_orig); + } + + #[test] + fn ignore_after_end() { + let source = ".ORIG x3000\nADD R0, R0, R0\n.END then %some #random junk!"; + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); + let file = parse("".to_string(), source, tokens, LeniencyLevel::Lenient).unwrap(); + + let f = file.0; + assert_eq!((vec![], 0..5), f.before_first_orig); // TODO: probably doesn't need fixing, but span should probably be 0..0; find source of bug + assert_eq!(vec![(Ok(ProgramBlock { + orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), + instructions: vec![ + (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Ok(Register(R0)), 24..26)]), 16..26) }), 12..26) + ], + }), 0..31)], + f.blocks); + } + + #[test] + fn operand_error() { + let source = ".ORIG x3000\nADD R0, R0, #OOPS; <- error\n.END"; + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); + let file = parse("".to_string(), source, tokens, LeniencyLevel::Lenient).unwrap(); + + assert_eq!(vec![(Ok(ProgramBlock { + orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), + instructions: vec![ + (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Err(()), 24..29)]), 16..29) }), 12..29) + ], + }), 0..44)], + file.0.blocks); + } + +} \ No newline at end of file diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs deleted file mode 100644 index b4fcc46..0000000 --- a/assembler/src/parser.rs +++ /dev/null @@ -1,27 +0,0 @@ -use crate::cst::{File, CstParser}; -use crate::lexer::Lexer; -use crate::ir1_simple_lines::parse_simple_lines; -use crate::ir2_lines::parse_lines; -use crate::ir3_unvalidated_objects::parse_unvalidated_file; - -pub fn parse(tokens: Lexer, leniency: LeniencyLevel) -> File { - let ir1 = parse_simple_lines(tokens); - let ir2 = parse_lines(ir1); - let ir3 = parse_unvalidated_file(ir2); - CstParser { leniency }.parse_cst(ir3) -} - -// TODO: impl Default? -pub enum LeniencyLevel { - Lenient, - Strict, -} - -impl LeniencyLevel { - pub fn long_labels_allowed(&self) -> bool { - match self { - LeniencyLevel::Lenient => true, - LeniencyLevel::Strict => false - } - } -} diff --git a/assembler/src/util.rs b/assembler/src/util.rs new file mode 100644 index 0000000..67639ee --- /dev/null +++ b/assembler/src/util.rs @@ -0,0 +1,17 @@ +pub(crate) fn min_signed_width(n: i32) -> u8 { + let mut width = 1; + const BASE: i32 = 2; + while n < -BASE.pow(width - 1) || n >= BASE.pow(width - 1) { + width += 1; + } + width as u8 +} + +pub(crate) fn min_unsigned_width(n: i32) -> u8 { + let mut width = 1; + const BASE: i32 = 2; + while n >= BASE.pow(width) { + width += 1; + } + width as u8 +} diff --git a/assembler/tests/inputs/arithmetic_small.asm b/assembler/tests/inputs/arithmetic_small.asm deleted file mode 100644 index 3ad2007..0000000 --- a/assembler/tests/inputs/arithmetic_small.asm +++ /dev/null @@ -1,6 +0,0 @@ -.ORIG x3000 -ADD R0, R1, R2 -AND R3, R4, R5 -NOT R6, R7 -HALT -.END \ No newline at end of file diff --git a/assembler/tests/inputs/pseudo_ops.asm b/assembler/tests/inputs/pseudo_ops.asm deleted file mode 100644 index 54129e3..0000000 --- a/assembler/tests/inputs/pseudo_ops.asm +++ /dev/null @@ -1,4 +0,0 @@ -.orig x4000 -.stringz "\"this\\that\"" -.fill xBEEF -.end \ No newline at end of file diff --git a/assembler/tests/inputs/very_many_errors.asm b/assembler/tests/inputs/very_many_errors.asm new file mode 100644 index 0000000..e82cf3b --- /dev/null +++ b/assembler/tests/inputs/very_many_errors.asm @@ -0,0 +1,22 @@ +.ORIG #OOPS ; Bad .ORIG operand +AND R1, , ; Bad instruction (or operands) +LABEL ADD R0 ; Duplicate label +LABEL JMP RET ; Bad operand +.END + +.ORIG x3000 ; Likely overlapping first block +ADD R0, R0, R0 +ADD R0, R0, R0 +.END + +.ORIG x3001 ; Overlaps second block +ADD R0, R0, LABEL ; Operand type mismatch +BR LABEL ; Invalid reference to duplicate label +TOO_FAR .BLKW 0 +.END + +.ORIG x3500 +BR TOO_FAR ; Label too distant for offset to fit +.END + +.ORIG x4000 ; Bad block (missing .END) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 49ecf6d..be1aba7 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -1,25 +1,10 @@ extern crate lc3_assembler; -use lc3_assembler::lexer::Lexer; -use lc3_assembler::parser::parse; -use lc3_assembler::assembler::assemble; -use lc3_isa::Word; -use lc3_assembler::parser::LeniencyLevel::Lenient; - - -#[test] -fn arithmetic_small() { - test( - include_str!("inputs/arithmetic_small.asm"), - 0x3000, - &[ - 0x1042, - 0x5705, - 0x9DFF, - 0xF025, - ] - ); -} +use lc3_isa::{ADDR_MAX_VAL, Word}; +use std::ops::Index; +use lc3_isa::util::MemoryDump; +use lc3_assembler::{assemble, LeniencyLevel, lex, link, parse_and_analyze, parse}; +use lc3_assembler::error::Error; #[test] fn load_store_medium() { @@ -35,46 +20,631 @@ fn load_store_medium() { 0xBDFF, 0x7E3E, 0xF025, - ] + ], + LeniencyLevel::Lenient ); } -#[test] -fn pseudo_ops() { - test( - include_str!("inputs/pseudo_ops.asm"), - 0x4000, - &[ - 0x0022, - 0x0074, - 0x0068, - 0x0069, - 0x0073, - 0x005C, - 0x0074, - 0x0068, - 0x0061, - 0x0074, - 0x0022, - 0x0000, - 0xBEEF, - ] - ); + +mod single_instruction { + use super::*; + + fn single_instruction_test(input: &str, expected: Word, leniency: LeniencyLevel) { + multiple_output_test(input, &[expected], leniency); + } + + macro_rules! tests { + ($tests_name:ident + $( + $test_name:ident: $instruction:expr => $expected:expr + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + single_instruction_test($instruction, $expected, LeniencyLevel::Lenient); + } + )+ + } + }; + ($tests_name:ident (Strict) + $( + $test_name:ident: $instruction:expr => $expected:expr + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + single_instruction_test($instruction, $expected, LeniencyLevel::Strict); + } + )+ + } + }; + } + + tests! { alternative_styles + lowercase: "add r0 r0 r0" => 0x1000, + comma_separated: "add r0, r0, r0" => 0x1000, + with_semicolon: "ADD R0 R0 R0;" => 0x1000, + nonpatt_hex_imm: "ADD R7 R7 0xA" => 0x1FEA, + commented: "ADD R0 R0 R0 ; comment" => 0x1000, + } + + tests! { labels + minimal: "A ADD R0 R0 R0" => 0x1000, + on_separate_line: "A\n ADD R0 R0 R0" => 0x1000, + begins_with_opcode: "ADDER ADD R0 R0 R0" => 0x1000, + begins_with_trap: "INIT ADD R0 R0 R0" => 0x1000, + } + + tests! { labels_strict (Strict) + minimal: "A ADD R0, R0, R0" => 0x1000, + begins_with_opcode: "ADDER ADD R0, R0, R0" => 0x1000, + begins_with_trap: "INIT ADD R0, R0, R0" => 0x1000, + } + + tests! { add + minimal: "ADD R0 R0 R0" => 0x1000, + r1_2_3: "ADD R1 R2 R3" => 0x1283, + r4_5_6: "ADD R4 R5 R6" => 0x1946, + r7_imm: "ADD R7 R7 #0" => 0x1FE0, + nonzero_imm: "ADD R7 R7 #1" => 0x1FE1, + max_imm: "ADD R7 R7 #15" => 0x1FEF, + neg_imm: "ADD R7 R7 #-1" => 0x1FFF, + hex_imm: "ADD R7 R7 xA" => 0x1FEA, + } + + tests! { and + minimal: "AND R0 R0 R0" => 0x5000, + r1_2_3: "AND R1 R2 R3" => 0x5283, + r4_5_6: "AND R4 R5 R6" => 0x5946, + r7_imm: "AND R7 R7 #0" => 0x5FE0, + nonzero_imm: "AND R7 R7 #1" => 0x5FE1, + max_imm: "AND R7 R7 #15" => 0x5FEF, + neg_imm: "AND R7 R7 #-1" => 0x5FFF, + } + + tests! { jmp + r0: "JMP R0" => 0xC000, + r1: "JMP R1" => 0xC040, + r2: "JMP R2" => 0xC080, + r3: "JMP R3" => 0xC0C0, + r4: "JMP R4" => 0xC100, + r5: "JMP R5" => 0xC140, + r6: "JMP R6" => 0xC180, + r7: "JMP R7" => 0xC1C0, + } + + tests! { jsrr + r0: "JSRR R0" => 0x4000, + r1: "JSRR R1" => 0x4040, + r2: "JSRR R2" => 0x4080, + r3: "JSRR R3" => 0x40C0, + r4: "JSRR R4" => 0x4100, + r5: "JSRR R5" => 0x4140, + r6: "JSRR R6" => 0x4180, + r7: "JSRR R7" => 0x41C0, + } + + #[test] + fn rti() { + single_instruction_test("RTI", 0x8000, LeniencyLevel::Lenient); + } + + #[test] + fn ret() { + single_instruction_test("RET", 0xC1C0, LeniencyLevel::Lenient); + } + + tests! { ldr + minimal: "LDR R0 R0 #0" => 0x6000, + r1_2: "LDR R1 R2 #3" => 0x6283, + max_imm: "LDR R3 R4 #31" => 0x671F, + neg_imm: "LDR R5 R6 #-1" => 0x6BBF, + min_imm: "LDR R7 R7 #-32" => 0x6FE0, + } + + tests! { not + r0_1: "NOT R0 R1" => 0x907F, + r2_3: "NOT R2 R3" => 0x94FF, + r4_5: "NOT R4 R5" => 0x997F, + r6_7: "NOT R6 R7" => 0x9DFF, + } + + tests! { str + minimal: "STR R0 R0 #0" => 0x7000, + r1_2: "STR R1 R2 #3" => 0x7283, + max_imm: "STR R3 R4 #31" => 0x771F, + neg_imm: "STR R5 R6 #-1" => 0x7BBF, + min_imm: "STR R7 R7 #-32" => 0x7FE0, + } + + tests! { trap + minimal: "TRAP x00" => 0xF000, + halt: "TRAP x25" => 0xF025, + max: "TRAP xFF" => 0xF0FF, + decimal: "TRAP #37" => 0xF025, + } + + tests! { named_traps + getc: "GETC" => 0xF020, + out: "OUT" => 0xF021, + puts: "PUTS" => 0xF022, + in_: "IN" => 0xF023, + putsp: "PUTSP" => 0xF024, + halt: "HALT" => 0xF025, + } + + tests! { br + minimal: "BR #0" => 0x0E00, + n: "BRn #0" => 0x0800, + z: "BRz #0" => 0x0400, + p: "BRp #0" => 0x0200, + nz: "BRnz #0" => 0x0C00, + np: "BRnp #0" => 0x0A00, + zp: "BRzp #0" => 0x0600, + nzp: "BRnzp #0" => 0x0E00, + neg_imm: "BRnzp #-1" => 0x0FFF, + pos_imm: "BRnzp #1" => 0x0E01, + lenient: "BRpnz #1" => 0x0E01, + max_imm: "BRn #255" => 0x08FF, + min_imm: "BRz #-256" => 0x0500, + } + tests! { br_strict (Strict) + minimal: "BR #0" => 0x0E00, + n: "BRn #0" => 0x0800, + z: "BRz #0" => 0x0400, + p: "BRp #0" => 0x0200, + nz: "BRnz #0" => 0x0C00, + np: "BRnp #0" => 0x0A00, + zp: "BRzp #0" => 0x0600, + nzp: "BRnzp #0" => 0x0E00, + neg_imm: "BRnzp #-1" => 0x0FFF, + pos_imm: "BRnzp #1" => 0x0E01, + max_imm: "BRn #255" => 0x08FF, + min_imm: "BRz #-256" => 0x0500, + } + + macro_rules! multiple_output_tests { + ($tests_name:ident + $( + $test_name:ident: $instruction:expr => $expected:expr + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + multiple_output_test($instruction, $expected, LeniencyLevel::Lenient); + } + )+ + } + }; + } + + // TODO: make this more readable :( + // I couldn't find a way to rearrange the macros to create one + // for the boilerplate like "($opcode << 12) + ". + // Consider adding a variant in tests for this case? + macro_rules! reg_and_pcoffset9_instruction_tests { + ( + $( + $name:ident, $name2:ident: $operator:expr => $opcode:expr + ),+ + $(,)* + ) => { + $( + tests! { $name + // OPERANDS RESULT + // -------- ----- + minimal: ($operator.to_string() + " R0 #0").as_str() => (($opcode << 12) + 0x000), + pos_imm: ($operator.to_string() + " R1 #1").as_str() => (($opcode << 12) + 0x201), + neg_imm: ($operator.to_string() + " R2 #-1").as_str() => (($opcode << 12) + 0x5FF), + max_imm: ($operator.to_string() + " R3 #255").as_str() => (($opcode << 12) + 0x6FF), + min_imm: ($operator.to_string() + " R4 #-256").as_str() => (($opcode << 12) + 0x900), + hex_imm: ($operator.to_string() + " R5 xA").as_str() => (($opcode << 12) + 0xA0A), + r5: ($operator.to_string() + " R5 #0").as_str() => (($opcode << 12) + 0xA00), + r6: ($operator.to_string() + " R6 #0").as_str() => (($opcode << 12) + 0xC00), + r7: ($operator.to_string() + " R7 #0").as_str() => (($opcode << 12) + 0xE00), + } + multiple_output_tests! { $name2 + self_label: ("LABEL ".to_string() + $operator + " R0 LABEL").as_str() => &[(($opcode << 12) + 0x1FF)], + next_label: + ($operator.to_string() + " R0 LABEL\n\ + LABEL ADD R0, R0, R0").as_str() + => &[ + (($opcode << 12) + 0x000), + 0x1000], + pos_label: + ($operator.to_string() + " R0 LABEL\n\ + .BLKW 1\n\ + LABEL ADD R0, R0, R0").as_str() + => &[ + (($opcode << 12) + 0x001), + 0x0000, + 0x1000], + neg_label: + ("LABEL ADD R0, R0, R0\n\ + .BLKW 1\n".to_string() + + $operator + " R0, LABEL").as_str() + => &[ + 0x1000, + 0x0000, + (($opcode << 12) + 0x1FD)], + } + )+ + }; + } + + reg_and_pcoffset9_instruction_tests! { + ld, ld_label: "LD" => 0x2, + ldi, ldi_label: "LDI" => 0xA, + lea, lea_label: "LEA" => 0xE, + st, st_label: "ST" => 0x3, + sti, sti_label: "STI" => 0xB, + } + + tests! { jsr + minimal: "JSR #0" => 0x4800, + pos_imm: "JSR #1" => 0x4801, + neg_imm: "JSR #-1" => 0x4FFF, + max_imm: "JSR #1023" => 0x4BFF, + min_imm: "JSR #-1024" => 0x4C00, + hex_imm: "JSR xA" => 0x480A, + } + + mod pseudo_ops { + use super::*; + + tests! { fill + minimal: ".FILL #0" => 0x0000, + pos_imm: ".FILL #1" => 0x0001, + max_imm: ".FILL #65535" => 0xFFFF, + hex_imm: ".FILL xA" => 0x000A, + hex_imm2: ".FILL xBEEF" => 0xBEEF, + max_hex_imm: ".FILL xFFFF" => 0xFFFF, + } + + multiple_output_tests! { blkw + one: ".BLKW 1" => &[0,], + two: ".BLKW 2" => &[0, 0,], + ten: ".BLKW 10" => &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0,], + } + + multiple_output_tests! { stringz + a: ".STRINGZ \"a\"" => &[0x61, 0x00], + double_quote: ".STRINGZ \"\\\"\"" => &[0x22, 0x00], + backslash: ".STRINGZ \"\\\\\"" => &[0x5C, 0x00], + hello_world: ".STRINGZ \"Hello, World!\"" => &[0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x2C, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00], + } + } } +fn multiple_output_test(input: &str, expected: &[Word], leniency: LeniencyLevel) { + let input = format!(".ORIG x3000\n{}\n.END", input); + test(input.as_str(), 0x3000, expected, leniency); +} -fn test(input: &str, orig: usize, expected_mem: &[Word]) { - let lexer = Lexer::new(input); - let cst = parse(lexer, Lenient); +fn test(input: &str, orig: usize, expected_mem: &[Word], leniency: LeniencyLevel) { + let src = input.to_string(); + let mem = assemble(&"".to_string(), &src, leniency, true).unwrap(); - let mem = assemble(cst.objects, None); for i in 0..orig { - assert_eq!(0x0000, mem[i], "differed at {:#x}", i) + assert_mem(&mem, i, 0x0000); } for i in 0..expected_mem.len() { - assert_eq!(expected_mem[i], mem[orig + i], "differed at {:#x}", orig + i) + assert_mem(&mem, orig + i, expected_mem[i]); } - for i in (orig + expected_mem.len())..0xFFFF { - assert_eq!(0x0000, mem[i], "differed at {:#x}", i) + for i in (orig + expected_mem.len())..(ADDR_MAX_VAL as usize) { + assert_mem(&mem, i, 0x0000); } } + +fn assert_mem(mem: &MemoryDump, location: usize, expected: Word) { + let actual = mem[location]; + assert_eq!(expected, actual, "differed at {:#x}: expected {:#x}, was {:#x}", location, expected, actual); +} + + +mod error { + use assert_matches::assert_matches; + use lc3_assembler::error::{StrictlyInvalidLabelReason, InvalidReferenceReason, OperandType, SingleError}; + use super::*; + + macro_rules! single_error_tests { + ($tests_name:ident + $( + $test_name:ident $(($leniency:ident))?: $source:expr => $expected:pat + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + let src = $source.to_string(); + let mut leniency = LeniencyLevel::Lenient; + $(leniency = LeniencyLevel::$leniency;)? + match parse_and_analyze(&"".to_string(), &src, leniency) { + Err(error) => { + match error { + Error::Multiple(errors) => { + assert_eq!(errors.len(), 1, "Found too many args: {:?}", errors); + match errors.get(0) { + Some(Error::Single(_, error)) + | Some(Error::Spanned(_, error)) => { + assert_matches!(error, $expected); + } + _ => panic!(), + } + } + _ => panic!(), + } + } + Ok(_) => panic!(), + } + } + )+ + } + }; + } + + single_error_tests! { single_error + no_tokens: + "" + => SingleError::NoTokens, + no_orig: + "ADD R0, R0, R0\n\ + .END" + => SingleError::NoOrig, + bad_instruction: + ".ORIG x3000\n\ + #OOPS\n\ + .END" + => SingleError::BadInstruction, + strict_br_nzp_out_of_order (Strict): + ".ORIG x3000\n\ + LOOP ADD R0, R0, R0\n\ + BRpnz LOOP\n\ + .END" + => SingleError::BadInstruction, // Doesn't have to be this error, specifically. + strict_label_on_separate_line (Strict): + ".ORIG x3000\n\ + LABEL + ADD R0, R0, R0\n\ + .END" + => SingleError::BadInstruction, // Doesn't have to be this error, specifically. + bad_label: + ".ORIG x3000\n\ + #OOPS ADD R0, R0, R0\n\ + .END" + => SingleError::BadLabel, + // TODO: these errors might currently be impossible to generate. Review relevant parsing/analysis + // bad_opcode: + // ".ORIG x3000\n\ + // #OOPS R0, R0, R0\n\ + // .END" + // => SingleError::BadOpcode, + // bad_operands: + // ".ORIG x3000\n\ + // ADD #OOPS\n\ + // .END" + // => SingleError::BadOperands, + bad_operand: + ".ORIG x3000\n\ + ADD R0, R0, #OOPS\n\ + .END" + => SingleError::BadOperand, + too_few_operands: + ".ORIG x3000\n\ + ADD R0, R0\n\ + .END" + => SingleError::WrongNumberOfOperands { expected: 3, actual: 2 }, + too_many_operands: + ".ORIG x3000\n\ + ADD R0, R0, R0, R0\n\ + .END" + => SingleError::WrongNumberOfOperands { expected: 3, actual: 4 }, + operand_type_mismatch: + ".ORIG x3000\n\ + ADD \"oops\", R0, R0\n\ + .END" + => SingleError::OperandTypeMismatch { + expected: OperandType::Register, + actual: OperandType::String, + }, + duplicate_label: + ".ORIG x3000\n\ + LABEL ADD R0, R0, R0\n\ + LABEL ADD R0, R0, R0\n\ + .END" + => SingleError::DuplicateLabel { .. }, + undefined_label: + ".ORIG x3000\n\ + BR SOMEWHERE\n\ + .END" + => SingleError::InvalidLabelReference { reason: InvalidReferenceReason::Undefined, .. }, + program_blocks_overlap: + ".ORIG x3000\n\ + ADD R0, R0, R0\n\ + ADD R0, R0, R0\n\ + .END\n\ + \n\ + .ORIG x3001\n\ + ADD R0, R0, R0\n\ + ADD R0, R0, R0\n\ + .END" + => SingleError::ProgramBlocksOverlap { .. }, + label_too_distant: + ".ORIG x3000\n\ + LEA R0, LABEL\n\ + HALT\n\ + .BLKW 255\n\ + LABEL .FILL 0x1234\n\ + .END" + => SingleError::InvalidLabelReference { + reason: InvalidReferenceReason::TooDistant { + est_ref_pos: 0x3000, + est_label_pos: 0x3101, + offset: 0b1_0000_0000, + width: 9, + .. + }, + .. + }, + label_too_distant_negative: + ".ORIG x3000\n\ + HALT\n\ + LABEL .FILL 0x1234\n\ + .BLKW 255\n\ + LEA R0, LABEL\n\ + .END" + => SingleError::InvalidLabelReference { + reason: InvalidReferenceReason::TooDistant { + est_ref_pos: 0x3101, + est_label_pos: 0x3001, + offset: -0b1_0000_0001, + width: 9, + .. + }, + .. + }, + label_contains_underscores (Strict): + ".ORIG x3000\n\ + OH_NO HALT\n\ + .END" + => SingleError::StrictlyInvalidLabel { + reason: StrictlyInvalidLabelReason::ContainsUnderscores, + .. + }, + label_too_long (Strict): + ".ORIG x3000\n\ + REALLYLONGLABEL0123456789 HALT\n\ + .END" + => SingleError::StrictlyInvalidLabel { + reason: StrictlyInvalidLabelReason::TooLong, + .. + }, + label_too_long_and_contains_underscores (Strict): + ".ORIG x3000\n\ + REALLYLONGLABEL_0123456789 HALT\n\ + .END" + => SingleError::StrictlyInvalidLabel { + reason: StrictlyInvalidLabelReason::ContainsUnderscoresAndTooLong, + .. + } + } + + macro_rules! contains_error { + ($errors:expr, $pattern:pat) => { + $errors.iter() + .any(|error| { + match error { + Error::Single(_, error) + | Error::Spanned(_, error) => { + matches!(error, $pattern) + } + _ => false, + } + }) + } + } + + macro_rules! multiple_error_tests { + ($tests_name:ident + $( + $test_name:ident: $source:expr => {$($expected:pat),+ $(,)*} + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + let src = $source.to_string(); + match parse_and_analyze(&"".to_string(), &src, LeniencyLevel::Lenient) { + Err(error) => { + match error { + Error::Multiple(errors) => { + println!("{:?}", errors); + $( + assert!(contains_error!(errors, $expected)); + )+ + } + _ => panic!(), + } + } + Ok(_) => panic!(), + } + } + )+ + } + } + } + + multiple_error_tests! { multiple_errors + no_end: + ".ORIG x3000\n\ + ADD R0, R0, R0" + => + { + SingleError::BadProgramBlock, + SingleError::NoEnd + }, + two_operand_type_mismatches: + ".ORIG x3000\n\ + ADD \"hello\", WORLD, R0\n\ + .END" + => + { + SingleError::OperandTypeMismatch { expected: OperandType::Register, actual: OperandType::String }, + SingleError::OperandTypeMismatch { expected: OperandType::Register, actual: OperandType::Label } + }, + two_wrong_numbers_of_operands: + ".ORIG x3000\n\ + ADD R0\n\ + JMP R0, R0, R0\n\ + .END" + => + { + SingleError::WrongNumberOfOperands { expected: 1, actual: 3 }, + SingleError::WrongNumberOfOperands { expected: 3, actual: 1 }, + }, + very_many: + include_str!("inputs/very_many_errors.asm") + => + { + SingleError::BadOperand, + SingleError::BadInstruction, + SingleError::BadProgramBlock, + SingleError::DuplicateLabel { .. }, + SingleError::WrongNumberOfOperands { expected: 3, actual: 1 }, + SingleError::ProgramBlocksOverlap { .. }, + SingleError::OperandTypeMismatch { .. }, + SingleError::InvalidLabelReference { + reason: InvalidReferenceReason::Duplicated, + .. + }, + SingleError::InvalidLabelReference { + reason: InvalidReferenceReason::TooDistant { .. }, + .. + }, + }, + } + +} \ No newline at end of file