From ce1b521a40be6a6f605d1cd33f2adec57c7f0a6c Mon Sep 17 00:00:00 2001 From: David Gipson Date: Thu, 23 Apr 2020 19:58:04 -0500 Subject: [PATCH 01/82] assembler: start reorganizing --- Cargo.lock | 454 +----------------- assembler/.gitignore | 5 +- assembler/src/analysis/memory_placement.rs | 0 assembler/src/analysis/mod.rs | 9 + assembler/src/analysis/symbol_table.rs | 0 assembler/src/assembler.rs | 6 +- assembler/src/complete.rs | 19 +- assembler/src/error.rs | 10 +- assembler/src/expanded.rs | 24 +- .../ir1_parse_lines.rs} | 10 +- .../ir2_check_line_syntax.rs} | 8 +- .../ir3_group_lines_and_objects.rs} | 4 +- .../ir4_validate_ambiguous_tokens.rs} | 4 +- assembler/src/ir/ir5_expand_pseudo_ops.rs | 0 assembler/src/ir/mod.rs | 20 + assembler/src/lib.rs | 12 +- assembler/src/parser.rs | 8 +- assembler/src/util.rs | 12 + 18 files changed, 100 insertions(+), 505 deletions(-) create mode 100644 assembler/src/analysis/memory_placement.rs create mode 100644 assembler/src/analysis/mod.rs create mode 100644 assembler/src/analysis/symbol_table.rs rename assembler/src/{ir1_simple_lines.rs => ir/ir1_parse_lines.rs} (91%) rename assembler/src/{ir2_lines.rs => ir/ir2_check_line_syntax.rs} (99%) rename assembler/src/{ir3_unvalidated_objects.rs => ir/ir3_group_lines_and_objects.rs} (98%) rename assembler/src/{cst.rs => ir/ir4_validate_ambiguous_tokens.rs} (98%) create mode 100644 assembler/src/ir/ir5_expand_pseudo_ops.rs create mode 100644 assembler/src/ir/mod.rs create mode 100644 assembler/src/util.rs diff --git a/Cargo.lock b/Cargo.lock index a657827..7bfc2a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,15 +27,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "arbitrary" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1148c9b25d393a07c4cc3ef5dd30f82a40a1c261018c4a670611ed8e76cad3ea" -dependencies = [ - "derive_arbitrary", -] - [[package]] name = "atty" version = "0.2.14" @@ -59,39 +50,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" -[[package]] -name = "bstr" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2889e6d50f394968c8bf4240dc3f2a7eb4680844d27308f798229ac9d4725f41" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - -[[package]] -name = "bumpalo" -version = "3.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12ae9db68ad7fac5fe51304d20f016c911539251075a214f8e663babefa35187" - [[package]] name = "byteorder" version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" -[[package]] -name = "cast" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" -dependencies = [ - "rustc_version", -] - [[package]] name = "cfg-if" version = "0.1.10" @@ -124,110 +88,6 @@ dependencies = [ "vec_map", ] -[[package]] -name = "criterion" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc755679c12bda8e5523a71e4d654b6bf2e14bd838dfc48cde6559a05caf7d1" -dependencies = [ - "atty", - "cast", - "clap", - "criterion-plot", - "csv", - "itertools", - "lazy_static", - "num-traits", - "oorandom", - "plotters", - "rayon", - "regex 1.3.7", - "serde", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a01e15e0ea58e8234f96146b1f91fa9d0e4dd7a38da93ff7a75d42c0b9d3a545" -dependencies = [ - "cast", - "itertools", -] - -[[package]] -name = "crossbeam-deque" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", - "maybe-uninit", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" -dependencies = [ - "autocfg", - "cfg-if", - "crossbeam-utils", - "lazy_static", - "maybe-uninit", - "memoffset", - "scopeguard", -] - -[[package]] -name = "crossbeam-queue" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" -dependencies = [ - "autocfg", - "cfg-if", - "lazy_static", -] - -[[package]] -name = "csv" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279" -dependencies = [ - "bstr", - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - [[package]] name = "ctor" version = "0.1.13" @@ -238,17 +98,6 @@ dependencies = [ "syn", ] -[[package]] -name = "derive_arbitrary" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9735d3bee627701429c04d0e5bbaa966d22dcd64e50e4296e5aa960a15159881" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "difference" version = "2.0.0" @@ -279,21 +128,6 @@ dependencies = [ "either", ] -[[package]] -name = "itoa" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" - -[[package]] -name = "js-sys" -version = "0.3.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a27d435371a2fa5b6d2b028a74bbdb1234f308da363226a2854ca3ff8ba7055" -dependencies = [ - "wasm-bindgen", -] - [[package]] name = "lazy_static" version = "1.4.0" @@ -306,15 +140,13 @@ version = "0.1.0" dependencies = [ "annotate-snippets", "clap", - "criterion", "itertools", - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/prototype)", - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/prototype?branch=staging)", + "lc3-isa", "lc3-os", "lc3-shims", "num-traits", "pretty_assertions", - "regex 0.2.11", + "regex", ] [[package]] @@ -322,43 +154,22 @@ name = "lc3-baseline-sim" version = "0.1.0" source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e87a9a9ce10f890a1dd983223e" dependencies = [ - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/prototype?branch=staging)", - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/prototype?branch=staging)", + "lc3-isa", + "lc3-macros", "lc3-traits", "static_assertions", ] -[[package]] -name = "lc3-isa" -version = "0.1.0" -source = "git+https://github.com/ut-utp/prototype#46e0e7d5d9d0e2ec78ab483a3022a3b71e84adc4" -dependencies = [ - "arbitrary", - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/prototype)", - "serde", - "static_assertions", -] - [[package]] name = "lc3-isa" version = "0.1.0" source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e87a9a9ce10f890a1dd983223e" dependencies = [ - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/prototype?branch=staging)", + "lc3-macros", "serde", "static_assertions", ] -[[package]] -name = "lc3-macros" -version = "0.1.0" -source = "git+https://github.com/ut-utp/prototype#46e0e7d5d9d0e2ec78ab483a3022a3b71e84adc4" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "lc3-macros" version = "0.1.0" @@ -376,8 +187,8 @@ source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e dependencies = [ "lazy_static", "lc3-baseline-sim", - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/prototype?branch=staging)", - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/prototype?branch=staging)", + "lc3-isa", + "lc3-macros", "lc3-shims", "lc3-traits", "static_assertions", @@ -390,8 +201,8 @@ source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e dependencies = [ "byteorder", "chrono", - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/prototype?branch=staging)", - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/prototype?branch=staging)", + "lc3-isa", + "lc3-macros", "lc3-traits", "static_assertions", "time", @@ -403,8 +214,8 @@ name = "lc3-traits" version = "0.1.0" source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e87a9a9ce10f890a1dd983223e" dependencies = [ - "lc3-isa 0.1.0 (git+https://github.com/ut-utp/prototype?branch=staging)", - "lc3-macros 0.1.0 (git+https://github.com/ut-utp/prototype?branch=staging)", + "lc3-isa", + "lc3-macros", "log", "serde", "static_assertions", @@ -425,27 +236,12 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "maybe-uninit" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" - [[package]] name = "memchr" version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" -[[package]] -name = "memoffset" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4fc2c02a7e374099d4ee95a193111f72d2110197fe200272371758f6c3643d8" -dependencies = [ - "autocfg", -] - [[package]] name = "num-integer" version = "0.1.42" @@ -465,22 +261,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "num_cpus" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "oorandom" -version = "11.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebcec7c9c2a95cacc7cd0ecb89d8a8454eca13906f6deb55258ffff0adeb9405" - [[package]] name = "output_vt100" version = "0.1.2" @@ -490,18 +270,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "plotters" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e3bb8da247d27ae212529352020f3e5ee16e83c0c258061d27b08ab92675eeb" -dependencies = [ - "js-sys", - "num-traits", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "pretty_assertions" version = "0.6.1" @@ -532,30 +300,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rayon" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098" -dependencies = [ - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9" -dependencies = [ - "crossbeam-deque", - "crossbeam-queue", - "crossbeam-utils", - "lazy_static", - "num_cpus", -] - [[package]] name = "redox_syscall" version = "0.1.56" @@ -570,29 +314,11 @@ checksum = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.5.6", + "regex-syntax", "thread_local", "utf8-ranges", ] -[[package]] -name = "regex" -version = "1.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6020f034922e3194c711b82a627453881bc4682166cabb07134a10c26ba7692" -dependencies = [ - "regex-syntax 0.6.17", -] - -[[package]] -name = "regex-automata" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" -dependencies = [ - "byteorder", -] - [[package]] name = "regex-syntax" version = "0.5.6" @@ -602,57 +328,6 @@ dependencies = [ "ucd-util", ] -[[package]] -name = "regex-syntax" -version = "0.6.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae" - -[[package]] -name = "rustc_version" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -dependencies = [ - "semver", -] - -[[package]] -name = "ryu" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "535622e6be132bccd223f4bb2b8ac8d53cda3c7a6394944d3b2b33fb974f9d76" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "semver" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - [[package]] name = "serde" version = "1.0.106" @@ -673,17 +348,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_json" -version = "1.0.51" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da07b57ee2623368351e9a0488bb0b261322a15a6e0ae53e243cbdc0f4208da9" -dependencies = [ - "itoa", - "ryu", - "serde", -] - [[package]] name = "static_assertions" version = "1.1.0" @@ -745,16 +409,6 @@ dependencies = [ "chrono", ] -[[package]] -name = "tinytemplate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a3c6667d3e65eb1bc3aed6fd14011c6cbc3a0665218ab7f5daf040b9ec371a" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "ucd-util" version = "0.1.8" @@ -785,81 +439,6 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" -[[package]] -name = "walkdir" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" -dependencies = [ - "same-file", - "winapi", - "winapi-util", -] - -[[package]] -name = "wasm-bindgen" -version = "0.2.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc57ce05287f8376e998cbddfb4c8cb43b84a7ec55cf4551d7c00eef317a47f" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d967d37bf6c16cca2973ca3af071d0a2523392e4a594548155d89a678f4237cd" -dependencies = [ - "bumpalo", - "lazy_static", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bd151b63e1ea881bb742cd20e1d6127cef28399558f3b5d415289bc41eee3a4" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d68a5b36eef1be7868f668632863292e37739656a80fc4b9acec7b0bd35a4931" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf76fe7d25ac79748a37538b7daeed1c7a6867c92d3245c12c6222e4a20d639" - -[[package]] -name = "web-sys" -version = "0.3.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d6f51648d8c56c366144378a33290049eafdd784071077f6fe37dae64c1c4cb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "winapi" version = "0.3.8" @@ -876,15 +455,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa515c5163a99cc82bab70fd3bfdd36d827be85de63737b40fcef2ce084a436e" -dependencies = [ - "winapi", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/assembler/.gitignore b/assembler/.gitignore index f7a9f85..d200436 100644 --- a/assembler/.gitignore +++ b/assembler/.gitignore @@ -1,4 +1,7 @@ /target **/*.rs.bk Cargo.lock -*.iml \ No newline at end of file +*.iml +*.asm +*.obj +*.mem \ No newline at end of file diff --git a/assembler/src/analysis/memory_placement.rs b/assembler/src/analysis/memory_placement.rs new file mode 100644 index 0000000..e69de29 diff --git a/assembler/src/analysis/mod.rs b/assembler/src/analysis/mod.rs new file mode 100644 index 0000000..535e07a --- /dev/null +++ b/assembler/src/analysis/mod.rs @@ -0,0 +1,9 @@ +/// These modules provide functions that analyze fully-parsed syntax trees +/// and related data structures. + +/// Analyzes whether objects can/will be placed in valid memory locations without overlap. +pub mod memory_placement; + +/// Creates a structure to store the locations of labels. +/// Used for later computing offsets from label operands. +pub mod symbol_table; \ No newline at end of file diff --git a/assembler/src/analysis/symbol_table.rs b/assembler/src/analysis/symbol_table.rs new file mode 100644 index 0000000..e69de29 diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs index c2c012a..f912b41 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assembler.rs @@ -1,5 +1,5 @@ use crate::expanded::{expand_pseudo_ops, build_symbol_table, validate_placement, construct_instructions, CompleteObject, InsnOrValue, InsnOrValueWithSrc}; -use crate::cst; +use crate::ir::ir4_validate_ambiguous_tokens; use lc3_isa::{ADDR_SPACE_SIZE_IN_WORDS, Addr}; use lc3_isa::util::MemoryDump; @@ -18,7 +18,7 @@ impl<'input> QueryableObject<'input> { } pub fn assemble<'input, O>(objects: O, background: Option) -> MemoryDump - where O: IntoIterator> + where O: IntoIterator> { let complete_objects = assemble_to_queryable_objects(objects); assemble_queryable_objects(complete_objects, background) @@ -26,7 +26,7 @@ pub fn assemble<'input, O>(objects: O, background: Option) -> Memory pub fn assemble_to_queryable_objects<'input, O>(objects: O) -> QueryableObject<'input> - where O: IntoIterator> + where O: IntoIterator> { let expanded_objects = objects.into_iter().map(expand_pseudo_ops).collect(); validate_placement(&expanded_objects).unwrap(); diff --git a/assembler/src/complete.rs b/assembler/src/complete.rs index 02933c9..6a0f862 100644 --- a/assembler/src/complete.rs +++ b/assembler/src/complete.rs @@ -1,20 +1,15 @@ -/// In my hubris, I thought the CST was complete enough. -/// As it turns out, it was nowhere near. -/// -/// This module is an attempt to rectify my error in one behemoth structure. -/// When `complete` is complete, it will replace `cst` in name and the latter -/// will become `ir4_validated_objects`. -/// -/// The main difference here is that `complete` will store as much data as possible +/// `complete` will store as much data as possible /// relating to the source *and* what it will be assembled to. /// This will allow querying for the source assembled to a memory location, /// the addresses corresponding to labels, and whatever is required in the future /// to provide a nice development environment. -/// -/// `cst` previously stopped where all errors could be represented as part of the tree. -/// `complete` will continue by assembling as much as possible and bringing that data in. -pub struct Program { + +pub struct Program { + pub objects: Vec } +pub struct Object { + +} diff --git a/assembler/src/error.rs b/assembler/src/error.rs index b367c6f..5b34a2a 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -4,10 +4,10 @@ use annotate_snippets::snippet::{Snippet, Annotation, Slice, SourceAnnotation, A use ParseError::*; use itertools::Itertools; -use crate::cst; -use crate::cst::{Object, ObjectContent, Operation, Operands}; +use crate::ir::ir4_validate_ambiguous_tokens; +use crate::ir::ir4_validate_ambiguous_tokens::{Object, ObjectContent, Operation, Operands}; use lc3_isa::SignedWord; -use crate::ir2_lines::LineContent::Invalid; +use crate::ir::ir2_check_line_syntax::LineContent::Invalid; use annotate_snippets::display_list::FormatOptions; #[derive(Debug, Clone, Copy, PartialEq)] @@ -171,10 +171,10 @@ impl ParseError { #[derive(Debug, Clone, PartialEq)] pub struct MemoryError(pub String); -pub fn extract_file_errors(cst: cst::File) -> Vec { +pub fn extract_file_errors(cst: ir4_validate_ambiguous_tokens::File) -> Vec { let mut errors = Vec::new(); - let cst::File { objects, .. } = cst; + let ir4_validate_ambiguous_tokens::File { objects, .. } = cst; if objects.len() == 0 { errors.push(ParseError::Misc("File contained no objects.".to_string())); } diff --git a/assembler/src/expanded.rs b/assembler/src/expanded.rs index f0fea4a..70d24b0 100644 --- a/assembler/src/expanded.rs +++ b/assembler/src/expanded.rs @@ -1,6 +1,6 @@ // For expanded pseudo-op structures -use crate::cst; -use crate::cst::{Operands, ImmOrLabel, UnsignedImmOrLabel, Checked}; +use crate::ir::ir4_validate_ambiguous_tokens; +use crate::ir::ir4_validate_ambiguous_tokens::{Operands, ImmOrLabel, UnsignedImmOrLabel, Checked}; use crate::error::MemoryError; use lc3_isa; use lc3_isa::{Word, SignedWord}; @@ -10,7 +10,7 @@ use std::iter::repeat; use itertools::Itertools; pub type SymbolTable<'input> = HashMap<&'input str, Addr>; -pub type File<'input> = Vec>; +pub type File<'input> = Vec>; pub struct Object<'input> { orig: Addr, @@ -19,7 +19,7 @@ pub struct Object<'input> { #[derive(Clone)] pub enum OpOrValue<'input> { - Operation(cst::Operation<'input>), + Operation(ir4_validate_ambiguous_tokens::Operation<'input>), Value(Word), } @@ -60,8 +60,8 @@ pub enum InsnOrValue { pub type Label<'input> = &'input str; -pub fn expand_pseudo_ops(object: cst::Object) -> Object { - let cst::Object { origin, content, .. } = object; +pub fn expand_pseudo_ops(object: ir4_validate_ambiguous_tokens::Object) -> Object { + let ir4_validate_ambiguous_tokens::Object { origin, content, .. } = object; let orig = origin.unwrap(); @@ -133,7 +133,7 @@ pub fn construct_instructions<'input>(object: Object, symbol_table: HashMap<&'in let mut insns_or_values = Vec::new(); for op_or_value in object.ops_or_values { let (insn_or_value, src_lines) = match op_or_value.1 { - OpOrValue::Operation(cst::Operation { operands: Operands::Fill { value }, src_lines, .. }) => { + OpOrValue::Operation(ir4_validate_ambiguous_tokens::Operation { operands: Operands::Fill { value }, src_lines, .. }) => { let value = match value.unwrap() { UnsignedImmOrLabel::Imm(immediate) => immediate.unwrap(), UnsignedImmOrLabel::Label(label) => { @@ -148,12 +148,12 @@ pub fn construct_instructions<'input>(object: Object, symbol_table: HashMap<&'in let src_lines = instruction_cst.src_lines; let insn = match instruction_cst.operands { Operands::Add { dr, sr1, sr2_or_imm5 } => match sr2_or_imm5.unwrap() { - cst::Sr2OrImm5::Imm5(immediate) => Instruction::new_add_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - cst::Sr2OrImm5::Sr2(src_reg) => Instruction::new_add_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), + ir4_validate_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_add_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), + ir4_validate_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_add_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), }, Operands::And { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.unwrap() { - cst::Sr2OrImm5::Imm5(immediate) => Instruction::new_and_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - cst::Sr2OrImm5::Sr2(src_reg) => Instruction::new_and_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), + ir4_validate_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_and_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), + ir4_validate_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_and_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), }, Operands::Ld { dr, pc_offset9 } => Instruction::new_ld(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), @@ -203,7 +203,7 @@ pub fn construct_instructions<'input>(object: Object, symbol_table: HashMap<&'in CompleteObject { orig, insns_or_values, symbol_table } } -fn compute_offset(pc_offset: cst::Checked, location: Addr, symbol_table: &HashMap<&str, Addr>) -> SignedWord { +fn compute_offset(pc_offset: ir4_validate_ambiguous_tokens::Checked, location: Addr, symbol_table: &HashMap<&str, Addr>) -> SignedWord { match pc_offset.unwrap() { ImmOrLabel::Label(label) => { let label = label.unwrap(); diff --git a/assembler/src/ir1_simple_lines.rs b/assembler/src/ir/ir1_parse_lines.rs similarity index 91% rename from assembler/src/ir1_simple_lines.rs rename to assembler/src/ir/ir1_parse_lines.rs index 5be2c68..1a6d29a 100644 --- a/assembler/src/ir1_simple_lines.rs +++ b/assembler/src/ir/ir1_parse_lines.rs @@ -1,4 +1,5 @@ use crate::lexer::{Token, Lexer, TokenType}; +use crate::util::reconstruct_src; use std::iter::Peekable; use itertools::Itertools; @@ -54,15 +55,6 @@ fn parse_simple_line<'input>(tokens: &mut Peekable>) -> SimpleLine SimpleLine { src, content, comment, newline } } -fn reconstruct_src<'input>(tokens: impl IntoIterator>) -> String { - let mut vec = tokens.into_iter().collect::>(); - vec.sort_by_key(|token| token.span.0); - vec.dedup(); - vec.into_iter() - .map(|token| token.src) - .join("") -} - #[cfg(test)] mod tests { use super::*; diff --git a/assembler/src/ir2_lines.rs b/assembler/src/ir/ir2_check_line_syntax.rs similarity index 99% rename from assembler/src/ir2_lines.rs rename to assembler/src/ir/ir2_check_line_syntax.rs index 45cd40c..e731eeb 100644 --- a/assembler/src/ir2_lines.rs +++ b/assembler/src/ir/ir2_check_line_syntax.rs @@ -1,8 +1,8 @@ -use crate::lexer::{Token, TokenType, Opcode, Op, NamedTrap, PseudoOp, Span}; -use crate::ir1_simple_lines::{SimpleLines, SimpleLine}; use std::iter::Peekable; -use crate::error::ParseError; use itertools::Itertools; +use crate::lexer::{Token, TokenType, Opcode, Op, NamedTrap, PseudoOp, Span}; +use crate::ir::ir1_parse_lines::{SimpleLines, SimpleLine}; +use crate::error::ParseError; pub type Lines<'input> = Vec>; @@ -408,7 +408,7 @@ fn parse_separator<'input, T>(tokens: &mut Peekable) -> Result { diff --git a/assembler/src/cst.rs b/assembler/src/ir/ir4_validate_ambiguous_tokens.rs similarity index 98% rename from assembler/src/cst.rs rename to assembler/src/ir/ir4_validate_ambiguous_tokens.rs index ff12b3b..feb72e2 100644 --- a/assembler/src/cst.rs +++ b/assembler/src/ir/ir4_validate_ambiguous_tokens.rs @@ -1,8 +1,8 @@ use lc3_isa::{Addr, SignedWord, check_signed_imm, Word}; use crate::error::{ParseError, InvalidLabelReason, InvalidRegReason, InvalidImmediateReason}; use crate::lexer::Token; -use crate::ir2_lines::{Line, OperationTokens, OperandTokens}; -use crate::ir3_unvalidated_objects::{UnvalidatedFile, UnvalidatedObject, UnvalidatedLine, UnvalidatedObjectContent}; +use crate::ir::ir2_check_line_syntax::{Line, OperationTokens, OperandTokens}; +use crate::ir::ir3_group_lines_and_objects::{UnvalidatedFile, UnvalidatedObject, UnvalidatedLine, UnvalidatedObjectContent}; use std::convert::TryInto; use num_traits::Num; use std::string::ToString; diff --git a/assembler/src/ir/ir5_expand_pseudo_ops.rs b/assembler/src/ir/ir5_expand_pseudo_ops.rs new file mode 100644 index 0000000..e69de29 diff --git a/assembler/src/ir/mod.rs b/assembler/src/ir/mod.rs new file mode 100644 index 0000000..20f63da --- /dev/null +++ b/assembler/src/ir/mod.rs @@ -0,0 +1,20 @@ +/// The series of intermediate representations (IRs) +/// of the parse tree necessary to provide as many +/// good errors as possible while assembling. + +/// This pass separates the tokens by newline +/// and separates comments. +pub mod ir1_parse_lines; + +/// This pass checks that the sequence of tokens on each line is valid. +pub mod ir2_check_line_syntax; + +/// This pass checks that the sequence of lines is valid +/// and groups them into separate objects if there are multiple .ORIG/.ENDs. +pub mod ir3_group_lines_and_objects; + +/// This pass disambiguates and validates ambiguous immediate operands and labels. +pub mod ir4_validate_ambiguous_tokens; + +/// This pass expands each pseudo-op which fills memory into the appropriate list of values. +pub mod ir5_expand_pseudo_ops; diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index b776a89..acf7a07 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -3,19 +3,13 @@ // TODO: denys // TODO: docs URL -pub mod error; - pub mod lexer; - pub mod parser; -pub mod ir1_simple_lines; -pub mod ir2_lines; -pub mod ir3_unvalidated_objects; -pub mod cst; +pub mod ir; +pub mod error; pub mod expanded; - +pub mod util; pub mod complete; - pub mod assembler; diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index b4fcc46..2399385 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -1,8 +1,8 @@ -use crate::cst::{File, CstParser}; use crate::lexer::Lexer; -use crate::ir1_simple_lines::parse_simple_lines; -use crate::ir2_lines::parse_lines; -use crate::ir3_unvalidated_objects::parse_unvalidated_file; +use crate::ir::ir1_parse_lines::parse_simple_lines; +use crate::ir::ir2_check_line_syntax::parse_lines; +use crate::ir::ir3_group_lines_and_objects::parse_unvalidated_file; +use crate::ir::ir4_validate_ambiguous_tokens::{File, CstParser}; pub fn parse(tokens: Lexer, leniency: LeniencyLevel) -> File { let ir1 = parse_simple_lines(tokens); diff --git a/assembler/src/util.rs b/assembler/src/util.rs new file mode 100644 index 0000000..5577c09 --- /dev/null +++ b/assembler/src/util.rs @@ -0,0 +1,12 @@ +use crate::lexer::Token; +use itertools::Itertools; + +pub(crate) fn reconstruct_src<'input>(tokens: impl IntoIterator>) -> String { + let mut vec = tokens.into_iter().collect::>(); + vec.sort_by_key(|token| token.span.0); + vec.dedup(); + vec.into_iter() + .map(|token| token.src) + .join("") +} + From d6a5fc3dc331c3e97a0f3c5f3a19e001589d3fc6 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Thu, 23 Apr 2020 20:00:10 -0500 Subject: [PATCH 02/82] assembler: add LC-3 artifacts to gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index fa46ec1..56e85eb 100755 --- a/.gitignore +++ b/.gitignore @@ -88,3 +88,8 @@ $RECYCLE.BIN/ # Windows shortcuts *.lnk + +# LC-3 Assembly +*.asm +*.mem +*.obj From 378ca7128a5223e96e79badda6f170f2e470c693 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Thu, 23 Apr 2020 20:56:14 -0500 Subject: [PATCH 03/82] assembler: make types in each IR have same name for similar concepts, using module name to disambiguate --- assembler/src/ir/ir1_parse_lines.rs | 16 ++-- assembler/src/ir/ir2_check_line_syntax.rs | 10 +-- .../src/ir/ir3_group_lines_and_objects.rs | 74 +++++++++++-------- .../src/ir/ir4_validate_ambiguous_tokens.rs | 16 ++-- 4 files changed, 63 insertions(+), 53 deletions(-) diff --git a/assembler/src/ir/ir1_parse_lines.rs b/assembler/src/ir/ir1_parse_lines.rs index 1a6d29a..21b46b6 100644 --- a/assembler/src/ir/ir1_parse_lines.rs +++ b/assembler/src/ir/ir1_parse_lines.rs @@ -3,16 +3,16 @@ use crate::util::reconstruct_src; use std::iter::Peekable; use itertools::Itertools; -pub type SimpleLines<'input> = Vec>; +pub type Lines<'input> = Vec>; -pub struct SimpleLine<'input> { +pub struct Line<'input> { pub src: String, pub content: Vec>, pub comment: Option>, pub newline: Option>, } -pub fn parse_simple_lines(lexer: Lexer) -> SimpleLines { +pub fn parse_simple_lines(lexer: Lexer) -> Lines { let mut tokens = lexer.peekable(); let mut simple_lines = Vec::new(); while tokens.peek().is_some() { @@ -22,7 +22,7 @@ pub fn parse_simple_lines(lexer: Lexer) -> SimpleLines { simple_lines } -fn parse_simple_line<'input>(tokens: &mut Peekable>) -> SimpleLine<'input> { +fn parse_simple_line<'input>(tokens: &mut Peekable>) -> Line<'input> { let content = tokens.peeking_take_while(|&Token { ty, .. }| ty != TokenType::Comment && ty != TokenType::Newline) .collect::>(); @@ -52,7 +52,7 @@ fn parse_simple_line<'input>(tokens: &mut Peekable>) -> SimpleLine } let src = reconstruct_src(all_tokens); - SimpleLine { src, content, comment, newline } + Line { src, content, comment, newline } } #[cfg(test)] @@ -64,7 +64,7 @@ mod tests { fn no_newline() { let lexer = Lexer::new("ADD"); let simple_lines = parse_simple_lines(lexer); - let SimpleLine { src, content, comment, newline } = simple_lines.get(0).unwrap(); + let Line { src, content, comment, newline } = simple_lines.get(0).unwrap(); assert_eq!(*src, "ADD".to_string()); assert_eq!(content.len(), 1); assert!(comment.is_none()); @@ -75,13 +75,13 @@ mod tests { fn two_lines() { let lexer = Lexer::new("ADD ; test\n.END"); let simple_lines = parse_simple_lines(lexer); - let SimpleLine { src, content, comment, newline } = simple_lines.get(0).unwrap(); + let Line { src, content, comment, newline } = simple_lines.get(0).unwrap(); assert_eq!(*src, "ADD ; test\n".to_string()); assert_eq!(content.len(), 2); assert!(comment.is_some()); assert!(newline.is_some()); - let SimpleLine { src, content, comment, newline } = simple_lines.get(1).unwrap(); + let Line { src, content, comment, newline } = simple_lines.get(1).unwrap(); assert_eq!(*src, ".END".to_string()); assert_eq!(content.len(), 1); assert!(comment.is_none()); diff --git a/assembler/src/ir/ir2_check_line_syntax.rs b/assembler/src/ir/ir2_check_line_syntax.rs index e731eeb..89289a5 100644 --- a/assembler/src/ir/ir2_check_line_syntax.rs +++ b/assembler/src/ir/ir2_check_line_syntax.rs @@ -1,7 +1,7 @@ use std::iter::Peekable; use itertools::Itertools; use crate::lexer::{Token, TokenType, Opcode, Op, NamedTrap, PseudoOp, Span}; -use crate::ir::ir1_parse_lines::{SimpleLines, SimpleLine}; +use crate::ir::ir1_parse_lines; use crate::error::ParseError; pub type Lines<'input> = Vec>; @@ -195,14 +195,14 @@ impl<'input> OperandTokens<'input> { } } -pub fn parse_lines(simple_lines: SimpleLines) -> Lines { - simple_lines.into_iter() +pub fn parse_lines(ir1_lines: ir1_parse_lines::Lines) -> Lines { + ir1_lines.into_iter() .map(parse_line) .collect() } -fn parse_line(simple_line: SimpleLine) -> Line { - let SimpleLine { content: old_content, comment, newline, src, } = simple_line; +fn parse_line(ir1_line: ir1_parse_lines::Line) -> Line { + let ir1_parse_lines::Line { content: old_content, comment, newline, src, } = ir1_line; let backup = old_content.clone(); let mut tokens = old_content.into_iter().peekable(); diff --git a/assembler/src/ir/ir3_group_lines_and_objects.rs b/assembler/src/ir/ir3_group_lines_and_objects.rs index fd58078..4ecc16c 100644 --- a/assembler/src/ir/ir3_group_lines_and_objects.rs +++ b/assembler/src/ir/ir3_group_lines_and_objects.rs @@ -1,31 +1,41 @@ use std::iter::Peekable; use std::mem; use crate::lexer::Token; -use crate::ir::ir2_check_line_syntax::{OperationTokens, Label, Line, Lines, LineContent, OperandTokens}; +use crate::ir::ir2_check_line_syntax; + +// Shorthands +pub type IR2Line<'input> = ir2_check_line_syntax::Line<'input>; +pub type IR2Lines<'input> = ir2_check_line_syntax::Lines<'input>; +pub type IR2LineContent<'input> = ir2_check_line_syntax::LineContent<'input>; + +// Types "part of" this IR +pub type OperationTokens<'input> = ir2_check_line_syntax::OperationTokens<'input>; +pub type OperandTokens<'input> = ir2_check_line_syntax::OperandTokens<'input>; +pub type Label<'input> = ir2_check_line_syntax::Label<'input>; #[derive(Clone)] -pub struct UnvalidatedFile<'input> { - pub objects: Vec>, - pub ignored: Vec>, +pub struct File<'input> { + pub objects: Vec>, + pub ignored: Vec>, } #[derive(Clone)] -pub struct UnvalidatedObject<'input> { - pub origin_src: UnvalidatedLine<'input>, +pub struct Object<'input> { + pub origin_src: Line<'input>, pub origin: Token<'input>, - pub content: UnvalidatedObjectContent<'input>, + pub content: ObjectContent<'input>, } #[derive(Clone)] -pub struct UnvalidatedObjectContent<'input> { - pub operations: Vec>, - pub empty_lines: Vec>, - pub hanging_labels: Vec>, - pub invalid_lines: Vec>, +pub struct ObjectContent<'input> { + pub operations: Vec>, + pub empty_lines: Vec>, + pub hanging_labels: Vec>, + pub invalid_lines: Vec>, } #[derive(Clone)] -pub struct UnvalidatedLine<'input> { +pub struct Line<'input> { pub src_lines: Vec, pub label: Option>, pub operation: OperationTokens<'input>, @@ -34,7 +44,7 @@ pub struct UnvalidatedLine<'input> { pub newlines: Vec>, } -pub fn parse_unvalidated_file(lines: Lines) -> UnvalidatedFile { +pub fn parse_unvalidated_file(lines: IR2Lines) -> File { let mut objects = Vec::new(); let mut ignored = Vec::new(); let mut lines = lines.into_iter().peekable(); @@ -45,8 +55,8 @@ pub fn parse_unvalidated_file(lines: Lines) -> UnvalidatedFile { Some(line) => { let line_backup = line.clone(); match line { - Line { - content: LineContent::Valid(label, Some(operation)), + IR2Line { + content: IR2LineContent::Valid(label, Some(operation)), whitespace, comment, newline, src } => { if let OperationTokens { operands: OperandTokens::Orig { origin }, .. } = operation { @@ -59,9 +69,9 @@ pub fn parse_unvalidated_file(lines: Lines) -> UnvalidatedFile { if let Some(newline) = newline { newlines.push(newline); } - let origin_src = UnvalidatedLine { src_lines: vec![src], label, operation, whitespace, comments, newlines }; + let origin_src = Line { src_lines: vec![src], label, operation, whitespace, comments, newlines }; match parse_unvalidated_object_content(&mut lines) { - Ok(content) => { objects.push(UnvalidatedObject { origin_src, origin, content }); }, + Ok(content) => { objects.push(Object { origin_src, origin, content }); }, Err(ObjectParseError { lines_seen, .. }) => { ignored.push(line_backup); ignored.extend(lines_seen); @@ -79,15 +89,15 @@ pub fn parse_unvalidated_file(lines: Lines) -> UnvalidatedFile { } } } - UnvalidatedFile { objects, ignored } + File { objects, ignored } } struct ObjectParseError<'input> { - lines_seen: Vec>, + lines_seen: Vec>, } -fn parse_unvalidated_object_content<'input, T>(lines: &mut Peekable) -> Result, ObjectParseError<'input>> - where T: Iterator> +fn parse_unvalidated_object_content<'input, T>(lines: &mut Peekable) -> Result, ObjectParseError<'input>> + where T: Iterator> { let mut operations = Vec::new(); let mut empty_lines = Vec::new(); @@ -111,23 +121,23 @@ fn parse_unvalidated_object_content<'input, T>(lines: &mut Peekable) -> Resul lines_seen.push(line.clone()); let line_backup = line.clone(); - let Line { content, whitespace: line_whitespace, comment, newline, src } = line; + let IR2Line { content, whitespace: line_whitespace, comment, newline, src } = line; if hanging_label.is_some() { - if let LineContent::Valid(None, _) = &content { + if let IR2LineContent::Valid(None, _) = &content { } else { hanging_labels.push(hanging_label.take().unwrap()); } } match content { - LineContent::Invalid(_) => { invalid_lines.push(line_backup); } - LineContent::Valid(None, None) => { empty_lines.push(line_backup); }, - LineContent::Valid(Some(_), None) => { hanging_label = Some(line_backup); }, - LineContent::Valid(label, Some(operation)) => { + IR2LineContent::Invalid(_) => { invalid_lines.push(line_backup); } + IR2LineContent::Valid(None, None) => { empty_lines.push(line_backup); }, + IR2LineContent::Valid(Some(_), None) => { hanging_label = Some(line_backup); }, + IR2LineContent::Valid(label, Some(operation)) => { let label = if hanging_label.is_some() { assert!(label.is_none()); - let Line { + let IR2Line { content: label_content, whitespace: label_whitespace, comment: label_comment, @@ -139,7 +149,7 @@ fn parse_unvalidated_object_content<'input, T>(lines: &mut Peekable) -> Resul src_lines.push(src); if let Some(label_comment) = label_comment { comments.push(label_comment); } if let Some(label_newline) = label_newline { newlines.push(label_newline); } - if let LineContent::Valid(label, None) = label_content { + if let IR2LineContent::Valid(label, None) = label_content { label } else { unreachable!("Hanging label wasn't a line with only a label! Contact the maintainers."); @@ -159,7 +169,7 @@ fn parse_unvalidated_object_content<'input, T>(lines: &mut Peekable) -> Resul if let OperationTokens { operands: OperandTokens::End, .. } = operation { found_end = true; } - let unvalidated_line = UnvalidatedLine { + let unvalidated_line = Line { label, operation, src_lines: finished_src_lines, @@ -178,7 +188,7 @@ fn parse_unvalidated_object_content<'input, T>(lines: &mut Peekable) -> Resul } if found_end { - Ok(UnvalidatedObjectContent { operations, empty_lines, hanging_labels, invalid_lines }) + Ok(ObjectContent { operations, empty_lines, hanging_labels, invalid_lines }) } else { Err(ObjectParseError { lines_seen diff --git a/assembler/src/ir/ir4_validate_ambiguous_tokens.rs b/assembler/src/ir/ir4_validate_ambiguous_tokens.rs index feb72e2..20d843b 100644 --- a/assembler/src/ir/ir4_validate_ambiguous_tokens.rs +++ b/assembler/src/ir/ir4_validate_ambiguous_tokens.rs @@ -2,7 +2,7 @@ use lc3_isa::{Addr, SignedWord, check_signed_imm, Word}; use crate::error::{ParseError, InvalidLabelReason, InvalidRegReason, InvalidImmediateReason}; use crate::lexer::Token; use crate::ir::ir2_check_line_syntax::{Line, OperationTokens, OperandTokens}; -use crate::ir::ir3_group_lines_and_objects::{UnvalidatedFile, UnvalidatedObject, UnvalidatedLine, UnvalidatedObjectContent}; +use crate::ir::ir3_group_lines_and_objects; use std::convert::TryInto; use num_traits::Num; use std::string::ToString; @@ -136,17 +136,17 @@ pub struct CstParser { impl CstParser { - pub fn parse_cst<'input>(&self, file: UnvalidatedFile<'input>) -> File<'input> { - let UnvalidatedFile { objects, ignored } = file; + pub fn parse_cst<'input>(&self, file: ir3_group_lines_and_objects::File<'input>) -> File<'input> { + let ir3_group_lines_and_objects::File { objects, ignored } = file; File { objects: objects.into_iter().map(|o| self.validate_object(o)).collect(), ignored } } - fn validate_object<'input>(&self, object: UnvalidatedObject<'input>) -> Object<'input> { - let UnvalidatedObject { origin_src, origin, content } = object; - let UnvalidatedObjectContent { operations, empty_lines, hanging_labels, invalid_lines } = content; + fn validate_object<'input>(&self, object: ir3_group_lines_and_objects::Object<'input>) -> Object<'input> { + let ir3_group_lines_and_objects::Object { origin_src, origin, content } = object; + let ir3_group_lines_and_objects::ObjectContent { operations, empty_lines, hanging_labels, invalid_lines } = content; Object { origin_src: self.validate_line(origin_src), origin: self.validate_numeric_immediate(origin), @@ -159,8 +159,8 @@ impl CstParser { } } - fn validate_line<'input>(&self, line: UnvalidatedLine<'input>) -> Operation<'input> { - let UnvalidatedLine { + fn validate_line<'input>(&self, line: ir3_group_lines_and_objects::Line<'input>) -> Operation<'input> { + let ir3_group_lines_and_objects::Line { label, operation: OperationTokens { operator, From 5ecf289c1601dfd230ec139a745ad9fe206a7c3a Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 24 Apr 2020 01:16:03 -0500 Subject: [PATCH 04/82] assembler: split expanded.rs into appropriate modules --- assembler/src/analysis/extract_errors.rs | 0 assembler/src/analysis/memory_placement.rs | 21 ++ assembler/src/analysis/mod.rs | 8 +- assembler/src/analysis/symbol_table.rs | 21 ++ assembler/src/analysis/validate.rs | 0 assembler/src/assembler.rs | 138 ++++++++++- assembler/src/expanded.rs | 217 ------------------ .../src/ir/ir4_validate_ambiguous_tokens.rs | 24 +- assembler/src/ir/ir5_expand_pseudo_ops.rs | 54 +++++ assembler/src/lib.rs | 2 +- 10 files changed, 252 insertions(+), 233 deletions(-) create mode 100644 assembler/src/analysis/extract_errors.rs create mode 100644 assembler/src/analysis/validate.rs delete mode 100644 assembler/src/expanded.rs diff --git a/assembler/src/analysis/extract_errors.rs b/assembler/src/analysis/extract_errors.rs new file mode 100644 index 0000000..e69de29 diff --git a/assembler/src/analysis/memory_placement.rs b/assembler/src/analysis/memory_placement.rs index e69de29..7d54d0e 100644 --- a/assembler/src/analysis/memory_placement.rs +++ b/assembler/src/analysis/memory_placement.rs @@ -0,0 +1,21 @@ +use itertools::Itertools; +use lc3_isa::Addr; + +use crate::ir::ir5_expand_pseudo_ops; +use crate::error::MemoryError; + +pub fn validate_placement(objects: &Vec) -> Result<(), MemoryError> { + let starts_and_ends = objects.iter().map(get_start_and_end); + for ((_, prev_end), (next_start, _)) in starts_and_ends.tuple_windows() { + if prev_end > next_start { + return Err(MemoryError("Objects overlap.".to_string())); + } + } + Ok(()) +} + +fn get_start_and_end(object: &ir5_expand_pseudo_ops::Object) -> (Addr, Addr) { + let start = object.orig; + let end = start + object.ops_or_values.len() as Addr; + (start, end) +} diff --git a/assembler/src/analysis/mod.rs b/assembler/src/analysis/mod.rs index 535e07a..fb73117 100644 --- a/assembler/src/analysis/mod.rs +++ b/assembler/src/analysis/mod.rs @@ -6,4 +6,10 @@ pub mod memory_placement; /// Creates a structure to store the locations of labels. /// Used for later computing offsets from label operands. -pub mod symbol_table; \ No newline at end of file +pub mod symbol_table; + +/// Extracts all errors from the parsed structs. +pub mod extract_errors; + +/// Determines whether or not a parsed struct can be successfully assembled. +pub mod validate; \ No newline at end of file diff --git a/assembler/src/analysis/symbol_table.rs b/assembler/src/analysis/symbol_table.rs index e69de29..40ec4ea 100644 --- a/assembler/src/analysis/symbol_table.rs +++ b/assembler/src/analysis/symbol_table.rs @@ -0,0 +1,21 @@ +use std::collections::HashMap; +use lc3_isa::Addr; +use crate::ir::ir5_expand_pseudo_ops; +use crate::error::MemoryError; + +pub type SymbolTable<'input> = HashMap<&'input str, Addr>; + +pub fn build_symbol_table<'input>(object: &ir5_expand_pseudo_ops::Object<'input>) -> Result, MemoryError> { + let mut symbol_table = HashMap::new(); + let mut current_location = object.orig; + for op_or_value in object.ops_or_values.iter() { + if let Some(label) = op_or_value.0 { + let other_location = symbol_table.insert(label.clone(), current_location); + if let Some(_) = other_location { + return Err(MemoryError("Duplicate label at different location.".to_string())) + } + } + current_location += 1; + }; + Ok(symbol_table) +} diff --git a/assembler/src/analysis/validate.rs b/assembler/src/analysis/validate.rs new file mode 100644 index 0000000..e69de29 diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs index f912b41..467cbc2 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assembler.rs @@ -1,8 +1,12 @@ -use crate::expanded::{expand_pseudo_ops, build_symbol_table, validate_placement, construct_instructions, CompleteObject, InsnOrValue, InsnOrValueWithSrc}; -use crate::ir::ir4_validate_ambiguous_tokens; -use lc3_isa::{ADDR_SPACE_SIZE_IN_WORDS, Addr}; +use crate::ir::{ir4_validate_ambiguous_tokens, ir5_expand_pseudo_ops}; +use lc3_isa::{ADDR_SPACE_SIZE_IN_WORDS, Addr, Instruction, Word, SignedWord}; use lc3_isa::util::MemoryDump; +use crate::analysis::symbol_table::{SymbolTable, build_symbol_table}; +use std::collections::HashMap; +use crate::ir::ir4_validate_ambiguous_tokens::{UnsignedImmOrLabel, Operands, ImmOrLabel}; +use crate::ir::ir5_expand_pseudo_ops::expand_pseudo_ops; +use crate::analysis::memory_placement::validate_placement; pub struct QueryableObject<'input> { segments: Vec> @@ -56,3 +60,131 @@ pub fn assemble_queryable_objects(queryable_object: QueryableObject, background: memory } + +pub struct CompleteObject<'input> { + pub orig: Addr, + pub insns_or_values: Vec, + pub symbol_table: SymbolTable<'input>, +} + +impl<'input> CompleteObject<'input> { + pub fn get_source(&self, address: Addr) -> Option> { + if address < self.orig { + return None; + } + let offset = (address - self.orig) as usize; + let insn_or_value = self.insns_or_values.get(offset); + if let Some(InsnOrValueWithSrc { src_lines, .. }) = insn_or_value { + Some(src_lines.clone()) + } else { + None + } + } + + pub fn get_label_addr(&self, label: &str) -> Option<&Addr> { + self.symbol_table.get(label) + } +} + +pub struct InsnOrValueWithSrc { + pub src_lines: Vec, + pub insn_or_value: InsnOrValue, +} + +pub enum InsnOrValue { + Instruction(Instruction), + Value(Word), +} + +pub type Label<'input> = &'input str; + + +pub fn construct_instructions<'input>(object: ir5_expand_pseudo_ops::Object, symbol_table: HashMap<&'input str, Addr>) -> CompleteObject<'input> { + let orig = object.orig; + let mut current_location = object.orig; + let mut insns_or_values = Vec::new(); + for op_or_value in object.ops_or_values { + use ir5_expand_pseudo_ops::OpOrValue; + let (insn_or_value, src_lines) = match op_or_value.1 { + OpOrValue::Operation(ir4_validate_ambiguous_tokens::Operation { operands: Operands::Fill { value }, src_lines, .. }) => { + let value = match value.unwrap() { + UnsignedImmOrLabel::Imm(immediate) => immediate.unwrap(), + UnsignedImmOrLabel::Label(label) => { + let label = label.unwrap(); + symbol_table.get(label).unwrap().clone() + }, + }; + (InsnOrValue::Value(value), src_lines) + }, + OpOrValue::Operation(instruction_cst) => { + let nzp = instruction_cst.nzp.unwrap(); + let src_lines = instruction_cst.src_lines; + let insn = match instruction_cst.operands { + Operands::Add { dr, sr1, sr2_or_imm5 } => match sr2_or_imm5.unwrap() { + ir4_validate_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_add_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), + ir4_validate_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_add_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), + }, + Operands::And { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.unwrap() { + ir4_validate_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_and_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), + ir4_validate_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_and_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), + }, + + Operands::Ld { dr, pc_offset9 } => Instruction::new_ld(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), + Operands::Ldi { dr, pc_offset9 } => Instruction::new_ldi(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), + Operands::Ldr { dr, base, offset6 } => Instruction::new_ldr(dr.unwrap(), base.unwrap(), offset6.unwrap()), + Operands::Lea { dr, pc_offset9 } => Instruction::new_lea(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), + + Operands::St { sr, pc_offset9 } => Instruction::new_st(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), + Operands::Sti { sr, pc_offset9 } => Instruction::new_sti(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), + Operands::Str { sr, base, offset6 } => Instruction::new_str(sr.unwrap(), base.unwrap(), offset6.unwrap()), + + Operands::Not { dr, sr } => Instruction::new_not(dr.unwrap(), sr.unwrap()), + + Operands::Br { pc_offset9, .. } => { + let nzp = nzp.unwrap(); + Instruction::new_br(nzp.n, nzp.z, nzp.p, compute_offset(pc_offset9, current_location, &symbol_table)) + } + + Operands::Jmp { base } => Instruction::new_jmp(base.unwrap()), + Operands::Jsr { pc_offset11 } => Instruction::new_jsr(compute_offset(pc_offset11, current_location, &symbol_table)), + Operands::Jsrr { base } => Instruction::new_jsrr(base.unwrap()), + + Operands::Ret => Instruction::new_ret(), + Operands::Rti => Instruction::new_rti(), + + Operands::Trap { trap_vec } => Instruction::new_trap(trap_vec.unwrap()), + Operands::Getc => Instruction::new_trap(0x20), + Operands::Out => Instruction::new_trap(0x21), + Operands::Puts => Instruction::new_trap(0x22), + Operands::In => Instruction::new_trap(0x23), + Operands::Putsp => Instruction::new_trap(0x24), + Operands::Halt => Instruction::new_trap(0x25), + + _ => unreachable!() // TODO: restructure enum to avoid this + }; + (InsnOrValue::Instruction(insn), src_lines) + } + OpOrValue::Value(value) => (InsnOrValue::Value(value), vec![]) + }; + insns_or_values.push(InsnOrValueWithSrc { + insn_or_value, + src_lines + }); + current_location += 1; + } + + CompleteObject { orig, insns_or_values, symbol_table } +} + +fn compute_offset(pc_offset: ir4_validate_ambiguous_tokens::Checked, location: Addr, symbol_table: &HashMap<&str, Addr>) -> SignedWord { + match pc_offset.unwrap() { + ImmOrLabel::Label(label) => { + let label = label.unwrap(); + let label_location = symbol_table.get(label).unwrap().clone(); + let label_location = label_location as i64; + let offset_base = (location + 1) as i64; + (label_location - offset_base) as SignedWord + } + ImmOrLabel::Imm(immediate) => immediate.value.unwrap() + } +} diff --git a/assembler/src/expanded.rs b/assembler/src/expanded.rs deleted file mode 100644 index 70d24b0..0000000 --- a/assembler/src/expanded.rs +++ /dev/null @@ -1,217 +0,0 @@ -// For expanded pseudo-op structures -use crate::ir::ir4_validate_ambiguous_tokens; -use crate::ir::ir4_validate_ambiguous_tokens::{Operands, ImmOrLabel, UnsignedImmOrLabel, Checked}; -use crate::error::MemoryError; -use lc3_isa; -use lc3_isa::{Word, SignedWord}; -use lc3_isa::{Addr, Instruction}; -use std::collections::HashMap; -use std::iter::repeat; -use itertools::Itertools; - -pub type SymbolTable<'input> = HashMap<&'input str, Addr>; -pub type File<'input> = Vec>; - -pub struct Object<'input> { - orig: Addr, - ops_or_values: Vec<(Option>, OpOrValue<'input>)>, -} - -#[derive(Clone)] -pub enum OpOrValue<'input> { - Operation(ir4_validate_ambiguous_tokens::Operation<'input>), - Value(Word), -} - -pub struct CompleteObject<'input> { - pub orig: Addr, - pub insns_or_values: Vec, - pub symbol_table: SymbolTable<'input>, -} - -impl<'input> CompleteObject<'input> { - pub fn get_source(&self, address: Addr) -> Option> { - if address < self.orig { - return None; - } - let offset = (address - self.orig) as usize; - let insn_or_value = self.insns_or_values.get(offset); - if let Some(InsnOrValueWithSrc { src_lines, .. }) = insn_or_value { - Some(src_lines.clone()) - } else { - None - } - } - - pub fn get_label_addr(&self, label: &str) -> Option<&Addr> { - self.symbol_table.get(label) - } -} - -pub struct InsnOrValueWithSrc { - pub src_lines: Vec, - pub insn_or_value: InsnOrValue, -} - -pub enum InsnOrValue { - Instruction(Instruction), - Value(Word), -} - -pub type Label<'input> = &'input str; - -pub fn expand_pseudo_ops(object: ir4_validate_ambiguous_tokens::Object) -> Object { - let ir4_validate_ambiguous_tokens::Object { origin, content, .. } = object; - - let orig = origin.unwrap(); - - let mut ops_or_values = Vec::new(); - for operation in content.operations { - let label = operation.label.clone().map(Checked::unwrap); - let mut values = Vec::new(); - match operation.operands { - Operands::Blkw { size, .. } => { - let num_values = size.unwrap() as usize; - let block = repeat((None, OpOrValue::Value(0))).take(num_values); - values.extend(block); - }, - Operands::Stringz { string } => { - for c in string.unwrap().chars() { - values.push((None, OpOrValue::Value(c as Word))); - } - values.push((None, OpOrValue::Value(0))); // null-terminate - }, - Operands::End => { /* ignore */ }, - _ => { - values.push((None, OpOrValue::Operation(operation))); - }, - }; - let first = values.get_mut(0); - if let Some(first_value) = first { // TODO: how to handle other case? - first_value.0 = label; - } - ops_or_values.extend(values); - } - - Object { orig, ops_or_values } -} - -pub fn build_symbol_table<'input>(object: &Object<'input>) -> Result, MemoryError> { - let mut symbol_table = HashMap::new(); - let mut current_location = object.orig; - for op_or_value in object.ops_or_values.iter() { - if let Some(label) = op_or_value.0 { - let other_location = symbol_table.insert(label.clone(), current_location); - if let Some(_) = other_location { - return Err(MemoryError("Duplicate label at different location.".to_string())) - } - } - current_location += 1; - }; - Ok(symbol_table) -} - -pub fn validate_placement(objects: &Vec) -> Result<(), MemoryError> { - let starts_and_ends = objects.iter().map(get_start_and_end); - for ((_, prev_end), (next_start, _)) in starts_and_ends.tuple_windows() { - if prev_end > next_start { - return Err(MemoryError("Objects overlap.".to_string())); - } - } - Ok(()) -} - -fn get_start_and_end(object: &Object) -> (Addr, Addr) { - let start = object.orig; - let end = start + object.ops_or_values.len() as Addr; - (start, end) -} - -pub fn construct_instructions<'input>(object: Object, symbol_table: HashMap<&'input str, Addr>) -> CompleteObject<'input> { - let orig = object.orig; - let mut current_location = object.orig; - let mut insns_or_values = Vec::new(); - for op_or_value in object.ops_or_values { - let (insn_or_value, src_lines) = match op_or_value.1 { - OpOrValue::Operation(ir4_validate_ambiguous_tokens::Operation { operands: Operands::Fill { value }, src_lines, .. }) => { - let value = match value.unwrap() { - UnsignedImmOrLabel::Imm(immediate) => immediate.unwrap(), - UnsignedImmOrLabel::Label(label) => { - let label = label.unwrap(); - symbol_table.get(label).unwrap().clone() - }, - }; - (InsnOrValue::Value(value), src_lines) - }, - OpOrValue::Operation(instruction_cst) => { - let nzp = instruction_cst.nzp.unwrap(); - let src_lines = instruction_cst.src_lines; - let insn = match instruction_cst.operands { - Operands::Add { dr, sr1, sr2_or_imm5 } => match sr2_or_imm5.unwrap() { - ir4_validate_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_add_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - ir4_validate_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_add_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), - }, - Operands::And { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.unwrap() { - ir4_validate_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_and_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - ir4_validate_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_and_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), - }, - - Operands::Ld { dr, pc_offset9 } => Instruction::new_ld(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Ldi { dr, pc_offset9 } => Instruction::new_ldi(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Ldr { dr, base, offset6 } => Instruction::new_ldr(dr.unwrap(), base.unwrap(), offset6.unwrap()), - Operands::Lea { dr, pc_offset9 } => Instruction::new_lea(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - - Operands::St { sr, pc_offset9 } => Instruction::new_st(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Sti { sr, pc_offset9 } => Instruction::new_sti(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Str { sr, base, offset6 } => Instruction::new_str(sr.unwrap(), base.unwrap(), offset6.unwrap()), - - Operands::Not { dr, sr } => Instruction::new_not(dr.unwrap(), sr.unwrap()), - - Operands::Br { pc_offset9, .. } => { - let nzp = nzp.unwrap(); - Instruction::new_br(nzp.n, nzp.z, nzp.p, compute_offset(pc_offset9, current_location, &symbol_table)) - } - - Operands::Jmp { base } => Instruction::new_jmp(base.unwrap()), - Operands::Jsr { pc_offset11 } => Instruction::new_jsr(compute_offset(pc_offset11, current_location, &symbol_table)), - Operands::Jsrr { base } => Instruction::new_jsrr(base.unwrap()), - - Operands::Ret => Instruction::new_ret(), - Operands::Rti => Instruction::new_rti(), - - Operands::Trap { trap_vec } => Instruction::new_trap(trap_vec.unwrap()), - Operands::Getc => Instruction::new_trap(0x20), - Operands::Out => Instruction::new_trap(0x21), - Operands::Puts => Instruction::new_trap(0x22), - Operands::In => Instruction::new_trap(0x23), - Operands::Putsp => Instruction::new_trap(0x24), - Operands::Halt => Instruction::new_trap(0x25), - - _ => unreachable!() // TODO: restructure enum to avoid this - }; - (InsnOrValue::Instruction(insn), src_lines) - } - OpOrValue::Value(value) => (InsnOrValue::Value(value), vec![]) - }; - insns_or_values.push(InsnOrValueWithSrc { - insn_or_value, - src_lines - }); - current_location += 1; - } - - CompleteObject { orig, insns_or_values, symbol_table } -} - -fn compute_offset(pc_offset: ir4_validate_ambiguous_tokens::Checked, location: Addr, symbol_table: &HashMap<&str, Addr>) -> SignedWord { - match pc_offset.unwrap() { - ImmOrLabel::Label(label) => { - let label = label.unwrap(); - let label_location = symbol_table.get(label).unwrap().clone(); - let label_location = label_location as i64; - let offset_base = (location + 1) as i64; - (label_location - offset_base) as SignedWord - } - ImmOrLabel::Imm(immediate) => immediate.value.unwrap() - } -} \ No newline at end of file diff --git a/assembler/src/ir/ir4_validate_ambiguous_tokens.rs b/assembler/src/ir/ir4_validate_ambiguous_tokens.rs index 20d843b..8efe869 100644 --- a/assembler/src/ir/ir4_validate_ambiguous_tokens.rs +++ b/assembler/src/ir/ir4_validate_ambiguous_tokens.rs @@ -1,17 +1,18 @@ +use std::convert::TryInto; +use num_traits::Num; +use std::string::ToString; use lc3_isa::{Addr, SignedWord, check_signed_imm, Word}; + use crate::error::{ParseError, InvalidLabelReason, InvalidRegReason, InvalidImmediateReason}; use crate::lexer::Token; -use crate::ir::ir2_check_line_syntax::{Line, OperationTokens, OperandTokens}; +use crate::ir::ir2_check_line_syntax; use crate::ir::ir3_group_lines_and_objects; -use std::convert::TryInto; -use num_traits::Num; -use std::string::ToString; use crate::parser::LeniencyLevel; #[derive(Clone, Debug)] pub struct File<'input> { pub objects: Vec>, - pub ignored: Vec>, + pub ignored: Vec>, } #[derive(Clone, Debug)] @@ -24,9 +25,9 @@ pub struct Object<'input> { #[derive(Clone, Debug)] pub struct ObjectContent<'input> { pub operations: Vec>, - pub empty_lines: Vec>, - pub hanging_labels: Vec>, - pub invalid_lines: Vec>, + pub empty_lines: Vec>, + pub hanging_labels: Vec>, + pub invalid_lines: Vec>, } pub type Label<'input> = Checked<'input, &'input str>; @@ -162,7 +163,7 @@ impl CstParser { fn validate_line<'input>(&self, line: ir3_group_lines_and_objects::Line<'input>) -> Operation<'input> { let ir3_group_lines_and_objects::Line { label, - operation: OperationTokens { + operation: ir2_check_line_syntax::OperationTokens { operator, operands, separators, @@ -186,7 +187,8 @@ impl CstParser { } } - fn validate_operand_tokens<'input>(&self, operands: OperandTokens<'input>) -> Operands<'input> { + fn validate_operand_tokens<'input>(&self, operands: ir2_check_line_syntax::OperandTokens<'input>) -> Operands<'input> { + use ir2_check_line_syntax::OperandTokens; match operands { OperandTokens::Add { dr, sr1, sr2_or_imm5 } => Operands::Add { @@ -482,7 +484,7 @@ impl CstParser { // remove escape characters string = string .replace(r#"\""#, r#"""#) - .replace(r#"\\"#, r#"\"#) + .replace(r#"\\"#, r#"\"#) // TODO: fix this logic to escape \\ properly (atm \\n becomes '\n', not '\' and 'n') .replace(r#"\n"#, "\n"); let value = Ok(string); Checked { src, value } diff --git a/assembler/src/ir/ir5_expand_pseudo_ops.rs b/assembler/src/ir/ir5_expand_pseudo_ops.rs index e69de29..3b1dcc7 100644 --- a/assembler/src/ir/ir5_expand_pseudo_ops.rs +++ b/assembler/src/ir/ir5_expand_pseudo_ops.rs @@ -0,0 +1,54 @@ +use crate::ir::ir4_validate_ambiguous_tokens; +use lc3_isa::{Word, Addr}; +use crate::ir::ir4_validate_ambiguous_tokens::Checked; +use std::iter::repeat; + +pub type Label<'input> = &'input str; + +pub struct Object<'input> { + pub(crate) orig: Addr, + pub(crate) ops_or_values: Vec<(Option>, OpOrValue<'input>)>, +} + +#[derive(Clone)] +pub enum OpOrValue<'input> { + Operation(ir4_validate_ambiguous_tokens::Operation<'input>), + Value(Word), +} + +pub fn expand_pseudo_ops(object: ir4_validate_ambiguous_tokens::Object) -> Object { + let ir4_validate_ambiguous_tokens::Object { origin, content, .. } = object; + + let orig = origin.unwrap(); + + let mut ops_or_values = Vec::new(); + for operation in content.operations { + let label = operation.label.clone().map(Checked::unwrap); + let mut values = Vec::new(); + use ir4_validate_ambiguous_tokens::Operands; + match operation.operands { + Operands::Blkw { size, .. } => { + let num_values = size.unwrap() as usize; + let block = repeat((None, OpOrValue::Value(0))).take(num_values); + values.extend(block); + }, + Operands::Stringz { string } => { + for c in string.unwrap().chars() { + values.push((None, OpOrValue::Value(c as Word))); + } + values.push((None, OpOrValue::Value(0))); // null-terminate + }, + Operands::End => { /* ignore */ }, + _ => { + values.push((None, OpOrValue::Operation(operation))); + }, + }; + let first = values.get_mut(0); + if let Some(first_value) = first { // TODO: how to handle other case? + first_value.0 = label; + } + ops_or_values.extend(values); + } + + Object { orig, ops_or_values } +} diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index acf7a07..ef2a4b5 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -7,10 +7,10 @@ pub mod lexer; pub mod parser; pub mod ir; pub mod error; -pub mod expanded; pub mod util; pub mod complete; pub mod assembler; +pub mod analysis; #[cfg(test)] From 075f0e1129c29b2a3a22bdcd701f15f246b0906a Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 24 Apr 2020 01:24:15 -0500 Subject: [PATCH 05/82] assembler: rename IR modules and their respective parse functions --- assembler/src/assembler.rs | 20 +++++----- assembler/src/error.rs | 10 ++--- assembler/src/ir/ir1_parse_lines.rs | 6 +-- ...ine_syntax.rs => ir2_parse_line_syntax.rs} | 12 +++--- ...es_and_objects.rs => ir3_parse_objects.rs} | 16 ++++---- ...okens.rs => ir4_parse_ambiguous_tokens.rs} | 38 +++++++++---------- assembler/src/ir/ir5_expand_pseudo_ops.rs | 12 +++--- assembler/src/ir/mod.rs | 6 +-- assembler/src/parser.rs | 19 +++++----- 9 files changed, 70 insertions(+), 69 deletions(-) rename assembler/src/ir/{ir2_check_line_syntax.rs => ir2_parse_line_syntax.rs} (98%) rename assembler/src/ir/{ir3_group_lines_and_objects.rs => ir3_parse_objects.rs} (93%) rename assembler/src/ir/{ir4_validate_ambiguous_tokens.rs => ir4_parse_ambiguous_tokens.rs} (93%) diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs index 467cbc2..e37f4e5 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assembler.rs @@ -1,10 +1,10 @@ -use crate::ir::{ir4_validate_ambiguous_tokens, ir5_expand_pseudo_ops}; +use crate::ir::{ir4_parse_ambiguous_tokens, ir5_expand_pseudo_ops}; use lc3_isa::{ADDR_SPACE_SIZE_IN_WORDS, Addr, Instruction, Word, SignedWord}; use lc3_isa::util::MemoryDump; use crate::analysis::symbol_table::{SymbolTable, build_symbol_table}; use std::collections::HashMap; -use crate::ir::ir4_validate_ambiguous_tokens::{UnsignedImmOrLabel, Operands, ImmOrLabel}; +use crate::ir::ir4_parse_ambiguous_tokens::{UnsignedImmOrLabel, Operands, ImmOrLabel}; use crate::ir::ir5_expand_pseudo_ops::expand_pseudo_ops; use crate::analysis::memory_placement::validate_placement; @@ -22,7 +22,7 @@ impl<'input> QueryableObject<'input> { } pub fn assemble<'input, O>(objects: O, background: Option) -> MemoryDump - where O: IntoIterator> + where O: IntoIterator> { let complete_objects = assemble_to_queryable_objects(objects); assemble_queryable_objects(complete_objects, background) @@ -30,7 +30,7 @@ pub fn assemble<'input, O>(objects: O, background: Option) -> Memory pub fn assemble_to_queryable_objects<'input, O>(objects: O) -> QueryableObject<'input> - where O: IntoIterator> + where O: IntoIterator> { let expanded_objects = objects.into_iter().map(expand_pseudo_ops).collect(); validate_placement(&expanded_objects).unwrap(); @@ -106,7 +106,7 @@ pub fn construct_instructions<'input>(object: ir5_expand_pseudo_ops::Object, sym for op_or_value in object.ops_or_values { use ir5_expand_pseudo_ops::OpOrValue; let (insn_or_value, src_lines) = match op_or_value.1 { - OpOrValue::Operation(ir4_validate_ambiguous_tokens::Operation { operands: Operands::Fill { value }, src_lines, .. }) => { + OpOrValue::Operation(ir4_parse_ambiguous_tokens::Operation { operands: Operands::Fill { value }, src_lines, .. }) => { let value = match value.unwrap() { UnsignedImmOrLabel::Imm(immediate) => immediate.unwrap(), UnsignedImmOrLabel::Label(label) => { @@ -121,12 +121,12 @@ pub fn construct_instructions<'input>(object: ir5_expand_pseudo_ops::Object, sym let src_lines = instruction_cst.src_lines; let insn = match instruction_cst.operands { Operands::Add { dr, sr1, sr2_or_imm5 } => match sr2_or_imm5.unwrap() { - ir4_validate_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_add_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - ir4_validate_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_add_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), + ir4_parse_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_add_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), + ir4_parse_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_add_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), }, Operands::And { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.unwrap() { - ir4_validate_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_and_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - ir4_validate_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_and_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), + ir4_parse_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_and_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), + ir4_parse_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_and_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), }, Operands::Ld { dr, pc_offset9 } => Instruction::new_ld(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), @@ -176,7 +176,7 @@ pub fn construct_instructions<'input>(object: ir5_expand_pseudo_ops::Object, sym CompleteObject { orig, insns_or_values, symbol_table } } -fn compute_offset(pc_offset: ir4_validate_ambiguous_tokens::Checked, location: Addr, symbol_table: &HashMap<&str, Addr>) -> SignedWord { +fn compute_offset(pc_offset: ir4_parse_ambiguous_tokens::Checked, location: Addr, symbol_table: &HashMap<&str, Addr>) -> SignedWord { match pc_offset.unwrap() { ImmOrLabel::Label(label) => { let label = label.unwrap(); diff --git a/assembler/src/error.rs b/assembler/src/error.rs index 5b34a2a..c3951c9 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -4,10 +4,10 @@ use annotate_snippets::snippet::{Snippet, Annotation, Slice, SourceAnnotation, A use ParseError::*; use itertools::Itertools; -use crate::ir::ir4_validate_ambiguous_tokens; -use crate::ir::ir4_validate_ambiguous_tokens::{Object, ObjectContent, Operation, Operands}; +use crate::ir::ir4_parse_ambiguous_tokens; +use crate::ir::ir4_parse_ambiguous_tokens::{Object, ObjectContent, Operation, Operands}; use lc3_isa::SignedWord; -use crate::ir::ir2_check_line_syntax::LineContent::Invalid; +use crate::ir::ir2_parse_line_syntax::LineContent::Invalid; use annotate_snippets::display_list::FormatOptions; #[derive(Debug, Clone, Copy, PartialEq)] @@ -171,10 +171,10 @@ impl ParseError { #[derive(Debug, Clone, PartialEq)] pub struct MemoryError(pub String); -pub fn extract_file_errors(cst: ir4_validate_ambiguous_tokens::File) -> Vec { +pub fn extract_file_errors(cst: ir4_parse_ambiguous_tokens::File) -> Vec { let mut errors = Vec::new(); - let ir4_validate_ambiguous_tokens::File { objects, .. } = cst; + let ir4_parse_ambiguous_tokens::File { objects, .. } = cst; if objects.len() == 0 { errors.push(ParseError::Misc("File contained no objects.".to_string())); } diff --git a/assembler/src/ir/ir1_parse_lines.rs b/assembler/src/ir/ir1_parse_lines.rs index 21b46b6..28cf072 100644 --- a/assembler/src/ir/ir1_parse_lines.rs +++ b/assembler/src/ir/ir1_parse_lines.rs @@ -12,7 +12,7 @@ pub struct Line<'input> { pub newline: Option>, } -pub fn parse_simple_lines(lexer: Lexer) -> Lines { +pub fn parse_lines(lexer: Lexer) -> Lines { let mut tokens = lexer.peekable(); let mut simple_lines = Vec::new(); while tokens.peek().is_some() { @@ -63,7 +63,7 @@ mod tests { #[test] fn no_newline() { let lexer = Lexer::new("ADD"); - let simple_lines = parse_simple_lines(lexer); + let simple_lines = parse_lines(lexer); let Line { src, content, comment, newline } = simple_lines.get(0).unwrap(); assert_eq!(*src, "ADD".to_string()); assert_eq!(content.len(), 1); @@ -74,7 +74,7 @@ mod tests { #[test] fn two_lines() { let lexer = Lexer::new("ADD ; test\n.END"); - let simple_lines = parse_simple_lines(lexer); + let simple_lines = parse_lines(lexer); let Line { src, content, comment, newline } = simple_lines.get(0).unwrap(); assert_eq!(*src, "ADD ; test\n".to_string()); assert_eq!(content.len(), 2); diff --git a/assembler/src/ir/ir2_check_line_syntax.rs b/assembler/src/ir/ir2_parse_line_syntax.rs similarity index 98% rename from assembler/src/ir/ir2_check_line_syntax.rs rename to assembler/src/ir/ir2_parse_line_syntax.rs index 89289a5..282e304 100644 --- a/assembler/src/ir/ir2_check_line_syntax.rs +++ b/assembler/src/ir/ir2_parse_line_syntax.rs @@ -195,7 +195,7 @@ impl<'input> OperandTokens<'input> { } } -pub fn parse_lines(ir1_lines: ir1_parse_lines::Lines) -> Lines { +pub fn parse_line_syntax(ir1_lines: ir1_parse_lines::Lines) -> Lines { ir1_lines.into_iter() .map(parse_line) .collect() @@ -408,13 +408,13 @@ fn parse_separator<'input, T>(tokens: &mut Peekable) -> Result = ir2_check_line_syntax::Line<'input>; -pub type IR2Lines<'input> = ir2_check_line_syntax::Lines<'input>; -pub type IR2LineContent<'input> = ir2_check_line_syntax::LineContent<'input>; +pub type IR2Line<'input> = ir2_parse_line_syntax::Line<'input>; +pub type IR2Lines<'input> = ir2_parse_line_syntax::Lines<'input>; +pub type IR2LineContent<'input> = ir2_parse_line_syntax::LineContent<'input>; // Types "part of" this IR -pub type OperationTokens<'input> = ir2_check_line_syntax::OperationTokens<'input>; -pub type OperandTokens<'input> = ir2_check_line_syntax::OperandTokens<'input>; -pub type Label<'input> = ir2_check_line_syntax::Label<'input>; +pub type OperationTokens<'input> = ir2_parse_line_syntax::OperationTokens<'input>; +pub type OperandTokens<'input> = ir2_parse_line_syntax::OperandTokens<'input>; +pub type Label<'input> = ir2_parse_line_syntax::Label<'input>; #[derive(Clone)] pub struct File<'input> { @@ -44,7 +44,7 @@ pub struct Line<'input> { pub newlines: Vec>, } -pub fn parse_unvalidated_file(lines: IR2Lines) -> File { +pub fn parse_objects(lines: IR2Lines) -> File { let mut objects = Vec::new(); let mut ignored = Vec::new(); let mut lines = lines.into_iter().peekable(); diff --git a/assembler/src/ir/ir4_validate_ambiguous_tokens.rs b/assembler/src/ir/ir4_parse_ambiguous_tokens.rs similarity index 93% rename from assembler/src/ir/ir4_validate_ambiguous_tokens.rs rename to assembler/src/ir/ir4_parse_ambiguous_tokens.rs index 8efe869..09fac65 100644 --- a/assembler/src/ir/ir4_validate_ambiguous_tokens.rs +++ b/assembler/src/ir/ir4_parse_ambiguous_tokens.rs @@ -5,14 +5,14 @@ use lc3_isa::{Addr, SignedWord, check_signed_imm, Word}; use crate::error::{ParseError, InvalidLabelReason, InvalidRegReason, InvalidImmediateReason}; use crate::lexer::Token; -use crate::ir::ir2_check_line_syntax; -use crate::ir::ir3_group_lines_and_objects; +use crate::ir::ir2_parse_line_syntax; +use crate::ir::ir3_parse_objects; use crate::parser::LeniencyLevel; #[derive(Clone, Debug)] pub struct File<'input> { pub objects: Vec>, - pub ignored: Vec>, + pub ignored: Vec>, } #[derive(Clone, Debug)] @@ -25,9 +25,9 @@ pub struct Object<'input> { #[derive(Clone, Debug)] pub struct ObjectContent<'input> { pub operations: Vec>, - pub empty_lines: Vec>, - pub hanging_labels: Vec>, - pub invalid_lines: Vec>, + pub empty_lines: Vec>, + pub hanging_labels: Vec>, + pub invalid_lines: Vec>, } pub type Label<'input> = Checked<'input, &'input str>; @@ -131,23 +131,23 @@ pub enum Operands<'input> { End, } -pub struct CstParser { +pub struct AmbiguousTokenParser { pub leniency: LeniencyLevel, } -impl CstParser { +impl AmbiguousTokenParser { - pub fn parse_cst<'input>(&self, file: ir3_group_lines_and_objects::File<'input>) -> File<'input> { - let ir3_group_lines_and_objects::File { objects, ignored } = file; + pub fn parse_ambiguous_tokens<'input>(&self, file: ir3_parse_objects::File<'input>) -> File<'input> { + let ir3_parse_objects::File { objects, ignored } = file; File { objects: objects.into_iter().map(|o| self.validate_object(o)).collect(), ignored } } - fn validate_object<'input>(&self, object: ir3_group_lines_and_objects::Object<'input>) -> Object<'input> { - let ir3_group_lines_and_objects::Object { origin_src, origin, content } = object; - let ir3_group_lines_and_objects::ObjectContent { operations, empty_lines, hanging_labels, invalid_lines } = content; + fn validate_object<'input>(&self, object: ir3_parse_objects::Object<'input>) -> Object<'input> { + let ir3_parse_objects::Object { origin_src, origin, content } = object; + let ir3_parse_objects::ObjectContent { operations, empty_lines, hanging_labels, invalid_lines } = content; Object { origin_src: self.validate_line(origin_src), origin: self.validate_numeric_immediate(origin), @@ -160,10 +160,10 @@ impl CstParser { } } - fn validate_line<'input>(&self, line: ir3_group_lines_and_objects::Line<'input>) -> Operation<'input> { - let ir3_group_lines_and_objects::Line { + fn validate_line<'input>(&self, line: ir3_parse_objects::Line<'input>) -> Operation<'input> { + let ir3_parse_objects::Line { label, - operation: ir2_check_line_syntax::OperationTokens { + operation: ir2_parse_line_syntax::OperationTokens { operator, operands, separators, @@ -187,8 +187,8 @@ impl CstParser { } } - fn validate_operand_tokens<'input>(&self, operands: ir2_check_line_syntax::OperandTokens<'input>) -> Operands<'input> { - use ir2_check_line_syntax::OperandTokens; + fn validate_operand_tokens<'input>(&self, operands: ir2_parse_line_syntax::OperandTokens<'input>) -> Operands<'input> { + use ir2_parse_line_syntax::OperandTokens; match operands { OperandTokens::Add { dr, sr1, sr2_or_imm5 } => Operands::Add { @@ -497,7 +497,7 @@ mod immediate_tests { use pretty_assertions::assert_eq; fn single_test(num: &str, actual: N) { - let p = CstParser { leniency: LeniencyLevel::Lenient }; + let p = AmbiguousTokenParser { leniency: LeniencyLevel::Lenient }; let tok = Token { src: num, span: (0, 0), ty: crate::lexer::TokenType::Ambiguous }; diff --git a/assembler/src/ir/ir5_expand_pseudo_ops.rs b/assembler/src/ir/ir5_expand_pseudo_ops.rs index 3b1dcc7..cc0be3e 100644 --- a/assembler/src/ir/ir5_expand_pseudo_ops.rs +++ b/assembler/src/ir/ir5_expand_pseudo_ops.rs @@ -1,6 +1,6 @@ -use crate::ir::ir4_validate_ambiguous_tokens; +use crate::ir::ir4_parse_ambiguous_tokens; use lc3_isa::{Word, Addr}; -use crate::ir::ir4_validate_ambiguous_tokens::Checked; +use crate::ir::ir4_parse_ambiguous_tokens::Checked; use std::iter::repeat; pub type Label<'input> = &'input str; @@ -12,12 +12,12 @@ pub struct Object<'input> { #[derive(Clone)] pub enum OpOrValue<'input> { - Operation(ir4_validate_ambiguous_tokens::Operation<'input>), + Operation(ir4_parse_ambiguous_tokens::Operation<'input>), Value(Word), } -pub fn expand_pseudo_ops(object: ir4_validate_ambiguous_tokens::Object) -> Object { - let ir4_validate_ambiguous_tokens::Object { origin, content, .. } = object; +pub fn expand_pseudo_ops(object: ir4_parse_ambiguous_tokens::Object) -> Object { + let ir4_parse_ambiguous_tokens::Object { origin, content, .. } = object; let orig = origin.unwrap(); @@ -25,7 +25,7 @@ pub fn expand_pseudo_ops(object: ir4_validate_ambiguous_tokens::Object) -> Objec for operation in content.operations { let label = operation.label.clone().map(Checked::unwrap); let mut values = Vec::new(); - use ir4_validate_ambiguous_tokens::Operands; + use ir4_parse_ambiguous_tokens::Operands; match operation.operands { Operands::Blkw { size, .. } => { let num_values = size.unwrap() as usize; diff --git a/assembler/src/ir/mod.rs b/assembler/src/ir/mod.rs index 20f63da..98a88ce 100644 --- a/assembler/src/ir/mod.rs +++ b/assembler/src/ir/mod.rs @@ -7,14 +7,14 @@ pub mod ir1_parse_lines; /// This pass checks that the sequence of tokens on each line is valid. -pub mod ir2_check_line_syntax; +pub mod ir2_parse_line_syntax; /// This pass checks that the sequence of lines is valid /// and groups them into separate objects if there are multiple .ORIG/.ENDs. -pub mod ir3_group_lines_and_objects; +pub mod ir3_parse_objects; /// This pass disambiguates and validates ambiguous immediate operands and labels. -pub mod ir4_validate_ambiguous_tokens; +pub mod ir4_parse_ambiguous_tokens; /// This pass expands each pseudo-op which fills memory into the appropriate list of values. pub mod ir5_expand_pseudo_ops; diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index 2399385..672b31a 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -1,14 +1,15 @@ use crate::lexer::Lexer; -use crate::ir::ir1_parse_lines::parse_simple_lines; -use crate::ir::ir2_check_line_syntax::parse_lines; -use crate::ir::ir3_group_lines_and_objects::parse_unvalidated_file; -use crate::ir::ir4_validate_ambiguous_tokens::{File, CstParser}; +use crate::ir::ir1_parse_lines::parse_lines; +use crate::ir::ir2_parse_line_syntax::parse_line_syntax; +use crate::ir::ir3_parse_objects::parse_objects; +use crate::ir::ir4_parse_ambiguous_tokens; +use crate::ir::ir4_parse_ambiguous_tokens::AmbiguousTokenParser; -pub fn parse(tokens: Lexer, leniency: LeniencyLevel) -> File { - let ir1 = parse_simple_lines(tokens); - let ir2 = parse_lines(ir1); - let ir3 = parse_unvalidated_file(ir2); - CstParser { leniency }.parse_cst(ir3) +pub fn parse(tokens: Lexer, leniency: LeniencyLevel) -> ir4_parse_ambiguous_tokens::File { + let ir1 = parse_lines(tokens); + let ir2 = parse_line_syntax(ir1); + let ir3 = parse_objects(ir2); + AmbiguousTokenParser { leniency }.parse_ambiguous_tokens(ir3) } // TODO: impl Default? From ccc36d247b480033a8637d56f9b4ba8e41c74fb8 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 27 Apr 2020 01:31:58 -0500 Subject: [PATCH 06/82] !BROKEN! assembler: rewrite memory placement, symbol table to work on IR5 --- assembler/src/analysis/memory_placement.rs | 71 +++++++-- assembler/src/analysis/symbol_table.rs | 67 +++++++-- assembler/src/assembler.rs | 13 +- assembler/src/complete.rs | 61 +++++++- assembler/src/error.rs | 3 - assembler/src/ir/ir5_expand_pseudo_ops.rs | 159 +++++++++++++++------ 6 files changed, 295 insertions(+), 79 deletions(-) diff --git a/assembler/src/analysis/memory_placement.rs b/assembler/src/analysis/memory_placement.rs index 7d54d0e..82d97cd 100644 --- a/assembler/src/analysis/memory_placement.rs +++ b/assembler/src/analysis/memory_placement.rs @@ -2,20 +2,73 @@ use itertools::Itertools; use lc3_isa::Addr; use crate::ir::ir5_expand_pseudo_ops; -use crate::error::MemoryError; +use crate::error::ParseError; +use crate::ir::ir4_parse_ambiguous_tokens::Checked; -pub fn validate_placement(objects: &Vec) -> Result<(), MemoryError> { - let starts_and_ends = objects.iter().map(get_start_and_end); - for ((_, prev_end), (next_start, _)) in starts_and_ends.tuple_windows() { +#[derive(Debug, Clone)] +pub enum MemoryPlacementError { + InvalidOrigin { + parse_error: ParseError, + }, + UnknownPseudoOpLength { + parse_error: ParseError, + }, + ObjectsOverlap +} + +pub fn validate_placement(objects: &Vec) -> Result<(), Vec> { + let starts_and_ends = objects.iter() + .map(get_start_and_end) + .collect::>(); + let mut errors = Vec::new(); + for start_and_end in &starts_and_ends { + if let Err(error) = start_and_end { + errors.push(error.clone()); + } + } + if !errors.is_empty() { + return Err(errors); + } + let start_end_pairs = starts_and_ends.iter() + .map(|start_and_end| start_and_end.unwrap()) + .sorted_by_key(|(start, end)| *start) + .tuple_windows(); + for ((_, prev_end), (next_start, _)) in start_end_pairs { if prev_end > next_start { - return Err(MemoryError("Objects overlap.".to_string())); + errors.push(MemoryPlacementError::ObjectsOverlap); } } + if !errors.is_empty() { + return Err(errors); + } Ok(()) } -fn get_start_and_end(object: &ir5_expand_pseudo_ops::Object) -> (Addr, Addr) { - let start = object.orig; - let end = start + object.ops_or_values.len() as Addr; - (start, end) +/// Returns the first memory location the object occupies and the first memory location after the object. +/// The object occupies all locations between the 'start' inclusive and 'end' exclusive. +fn get_start_and_end(object: &ir5_expand_pseudo_ops::Object) -> Result<(Addr, Addr), MemoryPlacementError> { + match &object.origin.value { + Err(error) => { + Err(MemoryPlacementError::InvalidOrigin { + parse_error: error.clone() + }) + }, + Ok(origin) => { + let start = *origin; + let mut end = start; + for operation in object.content.operations { + match operation.num_memory_locations_occupied() { + Ok(num_locations) => { + end += num_locations as Addr; + }, + Err(error) => { + return Err(MemoryPlacementError::UnknownPseudoOpLength { + parse_error: error.clone() + }); + } + } + } + Ok((start, end)) + }, + } } diff --git a/assembler/src/analysis/symbol_table.rs b/assembler/src/analysis/symbol_table.rs index 40ec4ea..dc77a19 100644 --- a/assembler/src/analysis/symbol_table.rs +++ b/assembler/src/analysis/symbol_table.rs @@ -1,21 +1,66 @@ use std::collections::HashMap; use lc3_isa::Addr; use crate::ir::ir5_expand_pseudo_ops; -use crate::error::MemoryError; +use crate::lexer::Span; +use crate::error::ParseError; pub type SymbolTable<'input> = HashMap<&'input str, Addr>; -pub fn build_symbol_table<'input>(object: &ir5_expand_pseudo_ops::Object<'input>) -> Result, MemoryError> { +#[derive(Debug)] +pub enum SymbolTableError { + InvalidOrigin { + parse_error: ParseError, + }, + UnknownPseudoOpLength { + parse_error: ParseError, + }, + DuplicateLabel { + ranges: (Span, Span), + label_text: String + } +} + +pub fn build_symbol_table<'input>(object: &ir5_expand_pseudo_ops::Object<'input>) -> Result, Vec> { let mut symbol_table = HashMap::new(); - let mut current_location = object.orig; - for op_or_value in object.ops_or_values.iter() { - if let Some(label) = op_or_value.0 { - let other_location = symbol_table.insert(label.clone(), current_location); - if let Some(_) = other_location { - return Err(MemoryError("Duplicate label at different location.".to_string())) - } + let mut errors = Vec::new(); + match &object.origin.value { + Err(parse_error) => { + errors.push(SymbolTableError::InvalidOrigin { parse_error: parse_error.clone() }); + }, + Ok(origin) => { + let mut current_location = *origin; + for operation in object.content.operations.iter() { + if let Some(label) = &operation.label { + let span = label.src.span; + if let Ok(label_text) = label.value { + let other_value = symbol_table.insert(label_text, (current_location, span)); + if let Some((other_location, other_span)) = other_value { + errors.push(SymbolTableError::DuplicateLabel { // TODO: handle triplicate+ labels in one error + ranges: (other_span, span), + label_text: label_text.to_string() + }); + } + } + } + match operation.num_memory_locations_occupied() { + Ok(num_locations) => { + current_location += num_locations as Addr; + }, + Err(error) => { + errors.push(SymbolTableError::UnknownPseudoOpLength { + parse_error: error.clone() + }); + } + }; + }; } - current_location += 1; }; - Ok(symbol_table) + if errors.is_empty() { + let symbol_table = symbol_table.iter() + .map(|(label, (addr, span))| (*label, *addr)) + .collect(); + Ok(symbol_table) + } else { + Err(errors) + } } diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs index e37f4e5..b8405cd 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assembler.rs @@ -28,11 +28,10 @@ pub fn assemble<'input, O>(objects: O, background: Option) -> Memory assemble_queryable_objects(complete_objects, background) } - pub fn assemble_to_queryable_objects<'input, O>(objects: O) -> QueryableObject<'input> where O: IntoIterator> { - let expanded_objects = objects.into_iter().map(expand_pseudo_ops).collect(); + let expanded_objects = expand_pseudo_ops(objects); validate_placement(&expanded_objects).unwrap(); let segments = expanded_objects.into_iter() .map(|o| { @@ -98,14 +97,12 @@ pub enum InsnOrValue { pub type Label<'input> = &'input str; - pub fn construct_instructions<'input>(object: ir5_expand_pseudo_ops::Object, symbol_table: HashMap<&'input str, Addr>) -> CompleteObject<'input> { - let orig = object.orig; - let mut current_location = object.orig; + let orig = object.origin.unwrap(); + let mut current_location = orig; let mut insns_or_values = Vec::new(); - for op_or_value in object.ops_or_values { - use ir5_expand_pseudo_ops::OpOrValue; - let (insn_or_value, src_lines) = match op_or_value.1 { + for operation in object.content.operations { + let (insn_or_value, src_lines) = match operation.1 { OpOrValue::Operation(ir4_parse_ambiguous_tokens::Operation { operands: Operands::Fill { value }, src_lines, .. }) => { let value = match value.unwrap() { UnsignedImmOrLabel::Imm(immediate) => immediate.unwrap(), diff --git a/assembler/src/complete.rs b/assembler/src/complete.rs index 6a0f862..7bdbcea 100644 --- a/assembler/src/complete.rs +++ b/assembler/src/complete.rs @@ -1,15 +1,70 @@ +use crate::ir::{ir5_expand_pseudo_ops, ir2_parse_line_syntax, ir4_parse_ambiguous_tokens}; +use lc3_isa::{Addr, Word, Instruction}; +use crate::lexer::Token; +use crate::analysis::symbol_table::{SymbolTable, SymbolTableError}; +use crate::error::ParseError; + /// `complete` will store as much data as possible /// relating to the source *and* what it will be assembled to. /// This will allow querying for the source assembled to a memory location, /// the addresses corresponding to labels, and whatever is required in the future /// to provide a nice development environment. +pub type Label<'input> = ir5_expand_pseudo_ops::Label<'input>; +pub type Immediate<'input, Addr> = ir5_expand_pseudo_ops::Immediate<'input, Addr>; -pub struct Program { - pub objects: Vec +pub struct Program<'input> { + pub objects: Vec> } -pub struct Object { +pub struct Object<'input> { + pub origin_src: Operation<'input>, + pub origin: Immediate<'input, Addr>, + pub content: ObjectContent<'input>, + pub symbol_table: Result, SymbolTableError> +} +pub struct ObjectContent<'input> { + pub operations: Vec>, + pub empty_lines: Vec>, + pub hanging_labels: Vec>, + pub invalid_lines: Vec>, } +pub struct Operation<'input> { + pub label: Option>, + pub operator: Token<'input>, + pub nzp: Result, ParseError>, + pub operands: Operands<'input>, + + pub src_lines: Vec, + pub separators: Vec>, + pub whitespace: Vec>, + pub comments: Vec>, + pub newlines: Vec>, + + pub instruction_or_values: Result>, +} + +pub enum InstructionOrValues { + Instruction(Instruction, Word), + Values(Vec), +} + +impl<'input> Operation<'input> { + + pub fn num_memory_locations_occupied(&self) -> Result> { + use InstructionOrValues::*; + match &self.instruction_or_values { + Ok(Instruction(_, _)) => Ok(1), + Ok(Values(values)) => Ok(values.len()), + Err(errors) => Err(errors.clone()) + } + } + +} + +pub type Operands<'input> = ir5_expand_pseudo_ops::Operands<'input>; +pub type ConditionCodes = ir5_expand_pseudo_ops::ConditionCodes; +pub type Separator<'input> = ir5_expand_pseudo_ops::Separator<'input>; + diff --git a/assembler/src/error.rs b/assembler/src/error.rs index c3951c9..a4088be 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -168,9 +168,6 @@ impl ParseError { } -#[derive(Debug, Clone, PartialEq)] -pub struct MemoryError(pub String); - pub fn extract_file_errors(cst: ir4_parse_ambiguous_tokens::File) -> Vec { let mut errors = Vec::new(); diff --git a/assembler/src/ir/ir5_expand_pseudo_ops.rs b/assembler/src/ir/ir5_expand_pseudo_ops.rs index cc0be3e..b92b38c 100644 --- a/assembler/src/ir/ir5_expand_pseudo_ops.rs +++ b/assembler/src/ir/ir5_expand_pseudo_ops.rs @@ -1,54 +1,123 @@ -use crate::ir::ir4_parse_ambiguous_tokens; use lc3_isa::{Word, Addr}; -use crate::ir::ir4_parse_ambiguous_tokens::Checked; use std::iter::repeat; -pub type Label<'input> = &'input str; +use crate::ir::{ir4_parse_ambiguous_tokens, ir2_parse_line_syntax}; +use crate::ir::ir4_parse_ambiguous_tokens::Checked; +use crate::lexer::Token; +use crate::error::ParseError; + +pub type Label<'input> = ir4_parse_ambiguous_tokens::Label<'input>; +pub type Immediate<'input, Addr> = Checked<'input, Addr>; + +pub type File<'input> = Vec>; pub struct Object<'input> { - pub(crate) orig: Addr, - pub(crate) ops_or_values: Vec<(Option>, OpOrValue<'input>)>, -} - -#[derive(Clone)] -pub enum OpOrValue<'input> { - Operation(ir4_parse_ambiguous_tokens::Operation<'input>), - Value(Word), -} - -pub fn expand_pseudo_ops(object: ir4_parse_ambiguous_tokens::Object) -> Object { - let ir4_parse_ambiguous_tokens::Object { origin, content, .. } = object; - - let orig = origin.unwrap(); - - let mut ops_or_values = Vec::new(); - for operation in content.operations { - let label = operation.label.clone().map(Checked::unwrap); - let mut values = Vec::new(); - use ir4_parse_ambiguous_tokens::Operands; - match operation.operands { - Operands::Blkw { size, .. } => { - let num_values = size.unwrap() as usize; - let block = repeat((None, OpOrValue::Value(0))).take(num_values); - values.extend(block); - }, - Operands::Stringz { string } => { - for c in string.unwrap().chars() { - values.push((None, OpOrValue::Value(c as Word))); - } - values.push((None, OpOrValue::Value(0))); // null-terminate - }, - Operands::End => { /* ignore */ }, - _ => { - values.push((None, OpOrValue::Operation(operation))); - }, - }; - let first = values.get_mut(0); - if let Some(first_value) = first { // TODO: how to handle other case? - first_value.0 = label; + pub origin_src: Operation<'input>, + pub origin: Immediate<'input, Addr>, + pub content: ObjectContent<'input>, +} + +pub struct ObjectContent<'input> { + pub operations: Vec>, + pub empty_lines: Vec>, + pub hanging_labels: Vec>, + pub invalid_lines: Vec>, +} + +pub struct Operation<'input> { + pub label: Option>, + pub operator: Token<'input>, + pub nzp: Result, ParseError>, + pub operands: Operands<'input>, + + pub src_lines: Vec, + pub separators: Vec>, + pub whitespace: Vec>, + pub comments: Vec>, + pub newlines: Vec>, + + // Option::Some only if this Operation is a pseudo-op. + // Result::Ok only if the operands for the pseudo-op are Ok. + pub expanded: Option, ParseError>>, +} + +impl<'input> Operation<'input> { + + pub fn num_memory_locations_occupied(&self) -> Result { + match &self.expanded { + None => Ok(1), + Some(Ok(values)) => Ok(values.len()), + Some(Err(error)) => Err(error.clone()) } - ops_or_values.extend(values); } - Object { orig, ops_or_values } +} + +pub type Operands<'input> = ir4_parse_ambiguous_tokens::Operands<'input>; +pub type ConditionCodes = ir4_parse_ambiguous_tokens::ConditionCodes; +pub type Separator<'input> = ir4_parse_ambiguous_tokens::Separator<'input>; + +pub fn expand_pseudo_ops<'input, O>(objects: O) -> File<'input> + where O: IntoIterator> +{ + objects.into_iter() + .map(expand_object_pseudo_ops) + .collect() +} + +pub fn expand_object_pseudo_ops(object: ir4_parse_ambiguous_tokens::Object) -> Object { + let ir4_parse_ambiguous_tokens::Object { origin_src, origin, content, } = object; + let origin_src = expand_operation(origin_src); + let content = expand_object_content(content); + Object { origin_src, origin, content } +} + +fn expand_object_content(object_content: ir4_parse_ambiguous_tokens::ObjectContent) -> ObjectContent { + let ir4_parse_ambiguous_tokens::ObjectContent { operations, empty_lines, hanging_labels, invalid_lines, } = object_content; + let operations = operations.into_iter() + .map(expand_operation) + .collect(); + ObjectContent { operations, empty_lines, hanging_labels, invalid_lines, } +} + +// TODO: make symbol table calculate addresses without this IR, +// so we can use it here to calculate .FILLs with a label operand. +fn expand_operation(operation: ir4_parse_ambiguous_tokens::Operation) -> Operation { + use ir4_parse_ambiguous_tokens::Operands; + + let ir4_parse_ambiguous_tokens::Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines } = operation; + + let expanded = match &operands { + Operands::Blkw { size, .. } => { + match &size.value { + Ok(size) => { + let num_values = *size as usize; + let block = repeat(0).take(num_values).collect(); + Some(Ok(block)) + }, + Err(err) => { + Some(Err(err.clone())) + } + } + }, + Operands::Stringz { string } => { + match &string.value { + Ok(string) => { + let mut values = Vec::new(); + for c in string.chars() { + values.push(c as Word); + } + values.push(0); // null-terminate + Some(Ok(values)) + }, + Err(err) => { + Some(Err(err.clone())) + } + } + }, + Operands::Orig { .. } | Operands::End => { Some(Ok(vec![])) }, // Expand, but to no values + _ => { None }, // Normal operation, does not expand + }; + + Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines, expanded, } } From 5b22b2550aed6cf7fa875bee7722559b3258d5d7 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 27 Apr 2020 14:16:37 -0500 Subject: [PATCH 07/82] trash --- assembler/bin/as.rs | 1 - assembler/src/assembler.rs | 187 ------------------------------------- assembler/src/complete.rs | 183 ++++++++++++++++++++++++++++++++++-- assembler/src/lib.rs | 1 - assembler/src/parser.rs | 8 +- assembler/tests/integ.rs | 1 - 6 files changed, 181 insertions(+), 200 deletions(-) delete mode 100644 assembler/src/assembler.rs diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 047816f..fdcb5b2 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -4,7 +4,6 @@ use std::{env, fs}; use std::path::{Path, PathBuf}; use lc3_assembler::lexer::Lexer; use lc3_assembler::parser::parse; -use lc3_assembler::assembler::assemble; use lc3_shims::memory::FileBackedMemoryShim; use clap::clap_app; use lc3_assembler::parser::LeniencyLevel::*; diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs deleted file mode 100644 index b8405cd..0000000 --- a/assembler/src/assembler.rs +++ /dev/null @@ -1,187 +0,0 @@ -use crate::ir::{ir4_parse_ambiguous_tokens, ir5_expand_pseudo_ops}; -use lc3_isa::{ADDR_SPACE_SIZE_IN_WORDS, Addr, Instruction, Word, SignedWord}; - -use lc3_isa::util::MemoryDump; -use crate::analysis::symbol_table::{SymbolTable, build_symbol_table}; -use std::collections::HashMap; -use crate::ir::ir4_parse_ambiguous_tokens::{UnsignedImmOrLabel, Operands, ImmOrLabel}; -use crate::ir::ir5_expand_pseudo_ops::expand_pseudo_ops; -use crate::analysis::memory_placement::validate_placement; - -pub struct QueryableObject<'input> { - segments: Vec> -} - -impl<'input> QueryableObject<'input> { - pub fn get_source(&self, address: Addr) -> Option> { - self.segments.iter() - .map(|o| o.get_source(address)) - .find(Option::is_some) - .flatten() - } -} - -pub fn assemble<'input, O>(objects: O, background: Option) -> MemoryDump - where O: IntoIterator> -{ - let complete_objects = assemble_to_queryable_objects(objects); - assemble_queryable_objects(complete_objects, background) -} - -pub fn assemble_to_queryable_objects<'input, O>(objects: O) -> QueryableObject<'input> - where O: IntoIterator> -{ - let expanded_objects = expand_pseudo_ops(objects); - validate_placement(&expanded_objects).unwrap(); - let segments = expanded_objects.into_iter() - .map(|o| { - let symbol_table = build_symbol_table(&o).unwrap(); - construct_instructions(o, symbol_table) - }) - .collect(); - QueryableObject { segments } -} - - -pub fn assemble_queryable_objects(queryable_object: QueryableObject, background: Option) -> MemoryDump { - let mut memory = background.unwrap_or(MemoryDump([0x0000; ADDR_SPACE_SIZE_IN_WORDS])); - for complete_object in queryable_object.segments { - let mut i = complete_object.orig as usize; - for insn_or_value_with_src in complete_object.insns_or_values { - let InsnOrValueWithSrc { insn_or_value, .. } = insn_or_value_with_src; - memory[i] = match insn_or_value { - InsnOrValue::Instruction(insn) => insn.into(), - InsnOrValue::Value(value) => value, - }; - i += 1; - } - } - - memory -} - -pub struct CompleteObject<'input> { - pub orig: Addr, - pub insns_or_values: Vec, - pub symbol_table: SymbolTable<'input>, -} - -impl<'input> CompleteObject<'input> { - pub fn get_source(&self, address: Addr) -> Option> { - if address < self.orig { - return None; - } - let offset = (address - self.orig) as usize; - let insn_or_value = self.insns_or_values.get(offset); - if let Some(InsnOrValueWithSrc { src_lines, .. }) = insn_or_value { - Some(src_lines.clone()) - } else { - None - } - } - - pub fn get_label_addr(&self, label: &str) -> Option<&Addr> { - self.symbol_table.get(label) - } -} - -pub struct InsnOrValueWithSrc { - pub src_lines: Vec, - pub insn_or_value: InsnOrValue, -} - -pub enum InsnOrValue { - Instruction(Instruction), - Value(Word), -} - -pub type Label<'input> = &'input str; - -pub fn construct_instructions<'input>(object: ir5_expand_pseudo_ops::Object, symbol_table: HashMap<&'input str, Addr>) -> CompleteObject<'input> { - let orig = object.origin.unwrap(); - let mut current_location = orig; - let mut insns_or_values = Vec::new(); - for operation in object.content.operations { - let (insn_or_value, src_lines) = match operation.1 { - OpOrValue::Operation(ir4_parse_ambiguous_tokens::Operation { operands: Operands::Fill { value }, src_lines, .. }) => { - let value = match value.unwrap() { - UnsignedImmOrLabel::Imm(immediate) => immediate.unwrap(), - UnsignedImmOrLabel::Label(label) => { - let label = label.unwrap(); - symbol_table.get(label).unwrap().clone() - }, - }; - (InsnOrValue::Value(value), src_lines) - }, - OpOrValue::Operation(instruction_cst) => { - let nzp = instruction_cst.nzp.unwrap(); - let src_lines = instruction_cst.src_lines; - let insn = match instruction_cst.operands { - Operands::Add { dr, sr1, sr2_or_imm5 } => match sr2_or_imm5.unwrap() { - ir4_parse_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_add_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - ir4_parse_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_add_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), - }, - Operands::And { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.unwrap() { - ir4_parse_ambiguous_tokens::Sr2OrImm5::Imm5(immediate) => Instruction::new_and_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - ir4_parse_ambiguous_tokens::Sr2OrImm5::Sr2(src_reg) => Instruction::new_and_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), - }, - - Operands::Ld { dr, pc_offset9 } => Instruction::new_ld(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Ldi { dr, pc_offset9 } => Instruction::new_ldi(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Ldr { dr, base, offset6 } => Instruction::new_ldr(dr.unwrap(), base.unwrap(), offset6.unwrap()), - Operands::Lea { dr, pc_offset9 } => Instruction::new_lea(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - - Operands::St { sr, pc_offset9 } => Instruction::new_st(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Sti { sr, pc_offset9 } => Instruction::new_sti(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Str { sr, base, offset6 } => Instruction::new_str(sr.unwrap(), base.unwrap(), offset6.unwrap()), - - Operands::Not { dr, sr } => Instruction::new_not(dr.unwrap(), sr.unwrap()), - - Operands::Br { pc_offset9, .. } => { - let nzp = nzp.unwrap(); - Instruction::new_br(nzp.n, nzp.z, nzp.p, compute_offset(pc_offset9, current_location, &symbol_table)) - } - - Operands::Jmp { base } => Instruction::new_jmp(base.unwrap()), - Operands::Jsr { pc_offset11 } => Instruction::new_jsr(compute_offset(pc_offset11, current_location, &symbol_table)), - Operands::Jsrr { base } => Instruction::new_jsrr(base.unwrap()), - - Operands::Ret => Instruction::new_ret(), - Operands::Rti => Instruction::new_rti(), - - Operands::Trap { trap_vec } => Instruction::new_trap(trap_vec.unwrap()), - Operands::Getc => Instruction::new_trap(0x20), - Operands::Out => Instruction::new_trap(0x21), - Operands::Puts => Instruction::new_trap(0x22), - Operands::In => Instruction::new_trap(0x23), - Operands::Putsp => Instruction::new_trap(0x24), - Operands::Halt => Instruction::new_trap(0x25), - - _ => unreachable!() // TODO: restructure enum to avoid this - }; - (InsnOrValue::Instruction(insn), src_lines) - } - OpOrValue::Value(value) => (InsnOrValue::Value(value), vec![]) - }; - insns_or_values.push(InsnOrValueWithSrc { - insn_or_value, - src_lines - }); - current_location += 1; - } - - CompleteObject { orig, insns_or_values, symbol_table } -} - -fn compute_offset(pc_offset: ir4_parse_ambiguous_tokens::Checked, location: Addr, symbol_table: &HashMap<&str, Addr>) -> SignedWord { - match pc_offset.unwrap() { - ImmOrLabel::Label(label) => { - let label = label.unwrap(); - let label_location = symbol_table.get(label).unwrap().clone(); - let label_location = label_location as i64; - let offset_base = (location + 1) as i64; - (label_location - offset_base) as SignedWord - } - ImmOrLabel::Imm(immediate) => immediate.value.unwrap() - } -} diff --git a/assembler/src/complete.rs b/assembler/src/complete.rs index 7bdbcea..3543e14 100644 --- a/assembler/src/complete.rs +++ b/assembler/src/complete.rs @@ -1,8 +1,11 @@ -use crate::ir::{ir5_expand_pseudo_ops, ir2_parse_line_syntax, ir4_parse_ambiguous_tokens}; -use lc3_isa::{Addr, Word, Instruction}; -use crate::lexer::Token; -use crate::analysis::symbol_table::{SymbolTable, SymbolTableError}; +use lc3_isa::{Addr, Word, Instruction, SignedWord}; +use crate::lexer::{Token, Span}; +use crate::analysis::symbol_table; +use crate::analysis::symbol_table::{SymbolTableError, build_symbol_table}; use crate::error::ParseError; +use crate::ir::ir4_parse_ambiguous_tokens::{UnsignedImmOrLabel, ImmOrLabel, Sr2OrImm5}; +use crate::ir::{ir2_parse_line_syntax, ir4_parse_ambiguous_tokens, ir5_expand_pseudo_ops,}; +use std::collections::HashMap; /// `complete` will store as much data as possible /// relating to the source *and* what it will be assembled to. @@ -12,16 +15,17 @@ use crate::error::ParseError; pub type Label<'input> = ir5_expand_pseudo_ops::Label<'input>; pub type Immediate<'input, Addr> = ir5_expand_pseudo_ops::Immediate<'input, Addr>; +pub type SymbolTable<'input> = Result, Vec>; pub struct Program<'input> { pub objects: Vec> } pub struct Object<'input> { - pub origin_src: Operation<'input>, + pub origin_src: ir5_expand_pseudo_ops::Operation<'input>, pub origin: Immediate<'input, Addr>, pub content: ObjectContent<'input>, - pub symbol_table: Result, SymbolTableError> + pub symbol_table: SymbolTable<'input>, } pub struct ObjectContent<'input> { @@ -43,7 +47,7 @@ pub struct Operation<'input> { pub comments: Vec>, pub newlines: Vec>, - pub instruction_or_values: Result>, + pub instruction_or_values: Result>, } pub enum InstructionOrValues { @@ -53,7 +57,7 @@ pub enum InstructionOrValues { impl<'input> Operation<'input> { - pub fn num_memory_locations_occupied(&self) -> Result> { + pub fn num_memory_locations_occupied(&self) -> Result> { use InstructionOrValues::*; match &self.instruction_or_values { Ok(Instruction(_, _)) => Ok(1), @@ -68,3 +72,166 @@ pub type Operands<'input> = ir5_expand_pseudo_ops::Operands<'input>; pub type ConditionCodes = ir5_expand_pseudo_ops::ConditionCodes; pub type Separator<'input> = ir5_expand_pseudo_ops::Separator<'input>; +pub fn construct_all_instructions(file: ir5_expand_pseudo_ops::File) -> Program { + file.into_iter() + .map(construct_instructions) + .collect() +} + +pub fn construct_instructions(object: ir5_expand_pseudo_ops::Object) -> Object { + let symbol_table = build_symbol_table(&object); + let ir5_expand_pseudo_ops::Object { origin_src, origin, content } = object; + let content = construct_object_content_instructions(content, &origin, &symbol_table); + Object { origin_src, origin, content, symbol_table } +} + +fn construct_object_content_instructions<'input>(content: ir5_expand_pseudo_ops::ObjectContent<'input>, origin: &Immediate<'input, Addr>, symbol_table: &SymbolTable<'input>) -> ObjectContent<'input> { + let ir5_expand_pseudo_ops::ObjectContent { operations, empty_lines, hanging_labels, invalid_lines } = content; + let operations = construct_operations_instructions(operations, origin, symbol_table); + ObjectContent { operations, empty_lines, hanging_labels, invalid_lines, } +} + +fn construct_operations_instructions<'input, O>(operations: O, origin: &Immediate<'input, Addr>, symbol_table: &SymbolTable<'input>) -> Vec> + where O: IntoIterator> +{ + use ConstructInstructionError::*; + + let mut operations = Vec::new(); + let mut current_location = match &origin.value { + Err(error) => Err(ConstructInstructionError { error }), + Ok(origin) => Ok(origin), + }; + for operation in operations { + let operation = construct_instruction_for_operation(operation, , symbol_table); + operations.push(operation); + } + + operations +}; + +pub enum ConstructInstructionError { + EarlierParseError { + error: ParseError + }, + InvalidLabel { + span: Span, + label: String, + }, +} + +fn construct_instruction_for_operation<'input>( + operation: ir5_expand_pseudo_ops::Operation<'input>, + location: Result>, + symbol_table: &SymbolTable<'input> +) -> Operation<'input> { + + use ConstructInstructionError::*; + + let ir5_expand_pseudo_ops::Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines, expanded } = operation; + + let instruction_or_values = match location { + Err(errors) => Err(errors), + Ok(current_location) => { + let mut errors = Vec::new(); + match &operands { + Operands::Fill { value } => { + let value = match &value.value { + Err(error) => { errors.push(EarlierParseError { error: error.clone() }); Err(errors) } + Ok(UnsignedImmOrLabel::Imm(immediate)) => match &immediate.value { + Err(error) => { errors.push(EarlierParseError { error: error.clone() }); Err(errors) } + Ok(immediate) => Ok(immediate), + }, + Ok(UnsignedImmOrLabel::Label(label)) => match &label.value { + Err(error) => { errors.push(EarlierParseError { error: error.clone() }); Err(errors) } + Ok(label_value) => { + match symbol_table.get(label_value) { + Some(addr) => Ok(addr), + None => { + errors.push(InvalidLabel { + span: label.src.span, + label: label_value.to_string(), + }); + Err(errors) + } + } + }, + }, + }; + value.map(|value| InstructionOrValues::Values(vec![value])) + }, + Operands::Add { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.value { + Err(error) => { errors.push(EarlierParseError { error: error.clone() }); Err(errors) } + Ok(Sr2OrImm5::Imm5(immediate)) => { + Instruction::new_add_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), + }, + Ok(Sr2OrImm5::Sr2(src_reg)) => { + Instruction::new_add_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), + } + }, + Operands::And { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.unwrap() { + Sr2OrImm5::Imm5(immediate) => Instruction::new_and_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), + Sr2OrImm5::Sr2(src_reg) => Instruction::new_and_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), + }, + + Operands::Ld { dr, pc_offset9 } => Instruction::new_ld(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), + Operands::Ldi { dr, pc_offset9 } => Instruction::new_ldi(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), + Operands::Ldr { dr, base, offset6 } => Instruction::new_ldr(dr.unwrap(), base.unwrap(), offset6.unwrap()), + Operands::Lea { dr, pc_offset9 } => Instruction::new_lea(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), + + Operands::St { sr, pc_offset9 } => Instruction::new_st(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), + Operands::Sti { sr, pc_offset9 } => Instruction::new_sti(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), + Operands::Str { sr, base, offset6 } => Instruction::new_str(sr.unwrap(), base.unwrap(), offset6.unwrap()), + + Operands::Not { dr, sr } => Instruction::new_not(dr.unwrap(), sr.unwrap()), + + Operands::Br { pc_offset9, .. } => { + let nzp = nzp.unwrap(); + Instruction::new_br(nzp.n, nzp.z, nzp.p, compute_offset(pc_offset9, current_location, &symbol_table)) + } + + Operands::Jmp { base } => Instruction::new_jmp(base.unwrap()), + Operands::Jsr { pc_offset11 } => Instruction::new_jsr(compute_offset(pc_offset11, current_location, &symbol_table)), + Operands::Jsrr { base } => Instruction::new_jsrr(base.unwrap()), + + Operands::Ret => Instruction::new_ret(), + Operands::Rti => Instruction::new_rti(), + + Operands::Trap { trap_vec } => Instruction::new_trap(trap_vec.unwrap()), + Operands::Getc => Instruction::new_trap(0x20), + Operands::Out => Instruction::new_trap(0x21), + Operands::Puts => Instruction::new_trap(0x22), + Operands::In => Instruction::new_trap(0x23), + Operands::Putsp => Instruction::new_trap(0x24), + Operands::Halt => Instruction::new_trap(0x25), + + Operands::Orig => { + + } + Operands::Stringz { .. } + | Operands::Blkw { .. } + | Operands::End { .. } => { + match expanded.unwrap() { + Err(error) => { errors.push(EarlierParseError { error: error.clone() }); Err(errors) } + Ok(values) => { Ok(InstructionOrValues::Values(values)) } + } + } + } + } + }; + + Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines, instruction_or_values, } +} + +fn compute_offset(pc_offset: ir5_expand_pseudo_ops::Checked, location: Addr, symbol_table: &HashMap<&str, Addr>) -> SignedWord { + use ImmOrLabel::*; + match pc_offset.unwrap() { + Label(label) => { + let label = label.unwrap(); + let label_location = symbol_table.get(label).unwrap().clone(); + let label_location = label_location as i64; + let offset_base = (location + 1) as i64; + (label_location - offset_base) as SignedWord + } + Imm(immediate) => immediate.value.unwrap() + } +} diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index ef2a4b5..1c5cb3c 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -9,7 +9,6 @@ pub mod ir; pub mod error; pub mod util; pub mod complete; -pub mod assembler; pub mod analysis; diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index 672b31a..ce08b4f 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -4,12 +4,16 @@ use crate::ir::ir2_parse_line_syntax::parse_line_syntax; use crate::ir::ir3_parse_objects::parse_objects; use crate::ir::ir4_parse_ambiguous_tokens; use crate::ir::ir4_parse_ambiguous_tokens::AmbiguousTokenParser; +use crate::complete; +use crate::ir::ir5_expand_pseudo_ops::expand_pseudo_ops; -pub fn parse(tokens: Lexer, leniency: LeniencyLevel) -> ir4_parse_ambiguous_tokens::File { +pub fn parse(tokens: Lexer, leniency: LeniencyLevel) -> complete::Program { let ir1 = parse_lines(tokens); let ir2 = parse_line_syntax(ir1); let ir3 = parse_objects(ir2); - AmbiguousTokenParser { leniency }.parse_ambiguous_tokens(ir3) + let ir4 = AmbiguousTokenParser { leniency }.parse_ambiguous_tokens(ir3); + let ir5 = expand_pseudo_ops(ir4); + } // TODO: impl Default? diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 49ecf6d..375ecf2 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -2,7 +2,6 @@ extern crate lc3_assembler; use lc3_assembler::lexer::Lexer; use lc3_assembler::parser::parse; -use lc3_assembler::assembler::assemble; use lc3_isa::Word; use lc3_assembler::parser::LeniencyLevel::Lenient; From 4b62c361b201f4044c23b6b10a447d0aba108db3 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 27 Apr 2020 23:01:17 -0500 Subject: [PATCH 08/82] assembler: finish first pass at more complete CST --- assembler/src/analysis/memory_placement.rs | 4 +- assembler/src/analysis/symbol_table.rs | 2 +- assembler/src/complete.rs | 288 ++++++++++++--------- assembler/src/ir/ir5_expand_pseudo_ops.rs | 15 +- assembler/src/parser.rs | 3 +- 5 files changed, 182 insertions(+), 130 deletions(-) diff --git a/assembler/src/analysis/memory_placement.rs b/assembler/src/analysis/memory_placement.rs index 82d97cd..7fb62ce 100644 --- a/assembler/src/analysis/memory_placement.rs +++ b/assembler/src/analysis/memory_placement.rs @@ -29,7 +29,7 @@ pub fn validate_placement(objects: &Vec) -> Resul if !errors.is_empty() { return Err(errors); } - let start_end_pairs = starts_and_ends.iter() + let start_end_pairs = starts_and_ends.into_iter() .map(|start_and_end| start_and_end.unwrap()) .sorted_by_key(|(start, end)| *start) .tuple_windows(); @@ -56,7 +56,7 @@ fn get_start_and_end(object: &ir5_expand_pseudo_ops::Object) -> Result<(Addr, Ad Ok(origin) => { let start = *origin; let mut end = start; - for operation in object.content.operations { + for operation in &object.content.operations { match operation.num_memory_locations_occupied() { Ok(num_locations) => { end += num_locations as Addr; diff --git a/assembler/src/analysis/symbol_table.rs b/assembler/src/analysis/symbol_table.rs index dc77a19..a194531 100644 --- a/assembler/src/analysis/symbol_table.rs +++ b/assembler/src/analysis/symbol_table.rs @@ -6,7 +6,7 @@ use crate::error::ParseError; pub type SymbolTable<'input> = HashMap<&'input str, Addr>; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum SymbolTableError { InvalidOrigin { parse_error: ParseError, diff --git a/assembler/src/complete.rs b/assembler/src/complete.rs index 3543e14..03f9bdc 100644 --- a/assembler/src/complete.rs +++ b/assembler/src/complete.rs @@ -3,7 +3,7 @@ use crate::lexer::{Token, Span}; use crate::analysis::symbol_table; use crate::analysis::symbol_table::{SymbolTableError, build_symbol_table}; use crate::error::ParseError; -use crate::ir::ir4_parse_ambiguous_tokens::{UnsignedImmOrLabel, ImmOrLabel, Sr2OrImm5}; +use crate::ir::ir4_parse_ambiguous_tokens::{UnsignedImmOrLabel, ImmOrLabel, Sr2OrImm5, Checked}; use crate::ir::{ir2_parse_line_syntax, ir4_parse_ambiguous_tokens, ir5_expand_pseudo_ops,}; use std::collections::HashMap; @@ -18,7 +18,8 @@ pub type Immediate<'input, Addr> = ir5_expand_pseudo_ops::Immediate<'input, Addr pub type SymbolTable<'input> = Result, Vec>; pub struct Program<'input> { - pub objects: Vec> + pub objects: Vec>, + pub ignored: Vec>, } pub struct Object<'input> { @@ -47,7 +48,7 @@ pub struct Operation<'input> { pub comments: Vec>, pub newlines: Vec>, - pub instruction_or_values: Result>, + pub instruction_or_values: Result, } pub enum InstructionOrValues { @@ -57,12 +58,12 @@ pub enum InstructionOrValues { impl<'input> Operation<'input> { - pub fn num_memory_locations_occupied(&self) -> Result> { + pub fn num_memory_locations_occupied(&self) -> Result { use InstructionOrValues::*; match &self.instruction_or_values { Ok(Instruction(_, _)) => Ok(1), Ok(Values(values)) => Ok(values.len()), - Err(errors) => Err(errors.clone()) + Err(error) => Err(error.clone()) } } @@ -72,10 +73,26 @@ pub type Operands<'input> = ir5_expand_pseudo_ops::Operands<'input>; pub type ConditionCodes = ir5_expand_pseudo_ops::ConditionCodes; pub type Separator<'input> = ir5_expand_pseudo_ops::Separator<'input>; +#[derive(Clone)] +pub enum ConstructInstructionError { + EarlierParseError { + error: ParseError, + }, + SymbolTableInvalid { + errors: Vec, + }, + InvalidLabel { + span: Span, + label: String, + }, +} + pub fn construct_all_instructions(file: ir5_expand_pseudo_ops::File) -> Program { - file.into_iter() + let ir5_expand_pseudo_ops::File { objects, ignored } = file; + let objects = objects.into_iter() .map(construct_instructions) - .collect() + .collect(); + Program { objects, ignored } } pub fn construct_instructions(object: ir5_expand_pseudo_ops::Object) -> Object { @@ -96,142 +113,173 @@ fn construct_operations_instructions<'input, O>(operations: O, origin: &Immediat { use ConstructInstructionError::*; - let mut operations = Vec::new(); let mut current_location = match &origin.value { - Err(error) => Err(ConstructInstructionError { error }), - Ok(origin) => Ok(origin), + Err(error) => Err(error.into()), + Ok(origin) => Ok(*origin), }; + let mut new_operations = Vec::new(); for operation in operations { - let operation = construct_instruction_for_operation(operation, , symbol_table); - operations.push(operation); + let increment = operation.num_memory_locations_occupied(); + let new_operation = construct_instruction_for_operation(operation, ¤t_location, symbol_table); + new_operations.push(new_operation); + current_location = match (current_location, increment) { + (Ok(location), Ok(amount)) => Ok(location + amount as u16), + (Ok(_), Err(error)) => Err(error.into()), + (error, _) => error, + }; } - operations -}; - -pub enum ConstructInstructionError { - EarlierParseError { - error: ParseError - }, - InvalidLabel { - span: Span, - label: String, - }, + new_operations } + fn construct_instruction_for_operation<'input>( operation: ir5_expand_pseudo_ops::Operation<'input>, - location: Result>, + location: &Result, symbol_table: &SymbolTable<'input> ) -> Operation<'input> { + let ir5_expand_pseudo_ops::Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines, expanded } = operation; + let instruction_or_values = construct_instruction_or_values(location, symbol_table, &nzp, operands.clone(), &expanded); + Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines, instruction_or_values, } +} - use ConstructInstructionError::*; +impl From for ConstructInstructionError { + fn from(error: ParseError) -> Self { + ConstructInstructionError::EarlierParseError { + error + } + } +} +impl From<&ParseError> for ConstructInstructionError { + fn from(error: &ParseError) -> Self { + ConstructInstructionError::EarlierParseError { + error: error.clone() + } + } +} - let ir5_expand_pseudo_ops::Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines, expanded } = operation; +fn construct_instruction_or_values( + location: &Result, + symbol_table: &SymbolTable, + nzp: &Result, ParseError>, + operands: ir5_expand_pseudo_ops::Operands, + expanded: &Option, ParseError>>, +) -> Result { + use ConstructInstructionError::*; - let instruction_or_values = match location { - Err(errors) => Err(errors), - Ok(current_location) => { - let mut errors = Vec::new(); - match &operands { - Operands::Fill { value } => { - let value = match &value.value { - Err(error) => { errors.push(EarlierParseError { error: error.clone() }); Err(errors) } - Ok(UnsignedImmOrLabel::Imm(immediate)) => match &immediate.value { - Err(error) => { errors.push(EarlierParseError { error: error.clone() }); Err(errors) } - Ok(immediate) => Ok(immediate), - }, - Ok(UnsignedImmOrLabel::Label(label)) => match &label.value { - Err(error) => { errors.push(EarlierParseError { error: error.clone() }); Err(errors) } - Ok(label_value) => { - match symbol_table.get(label_value) { - Some(addr) => Ok(addr), - None => { - errors.push(InvalidLabel { - span: label.src.span, - label: label_value.to_string(), - }); - Err(errors) - } - } - }, - }, - }; - value.map(|value| InstructionOrValues::Values(vec![value])) + let location = location.clone()?; + match operands { + Operands::Fill { value } => { + let value = value.value?; // TODO: lol + let value = match value { + UnsignedImmOrLabel::Imm(immediate) => { + let immediate = immediate.value?; + Ok(immediate) }, - Operands::Add { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.value { - Err(error) => { errors.push(EarlierParseError { error: error.clone() }); Err(errors) } - Ok(Sr2OrImm5::Imm5(immediate)) => { - Instruction::new_add_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - }, - Ok(Sr2OrImm5::Sr2(src_reg)) => { - Instruction::new_add_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), + UnsignedImmOrLabel::Label(label) => { + let label_value = &label.value?; + if let Err(errors) = symbol_table { + return Err(ConstructInstructionError::SymbolTableInvalid { errors: errors.clone() }) + } + let symbol_table = symbol_table.as_ref().unwrap(); + match symbol_table.get(label_value) { + None => { + Err(InvalidLabel { + span: label.src.span, + label: label_value.to_string(), + }) + }, + Some(addr) => Ok(*addr), } }, - Operands::And { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.unwrap() { - Sr2OrImm5::Imm5(immediate) => Instruction::new_and_imm(dr.unwrap(), sr1.unwrap(), immediate.unwrap()), - Sr2OrImm5::Sr2(src_reg) => Instruction::new_and_reg(dr.unwrap(), sr1.unwrap(), src_reg.unwrap()), - }, + }; + value.map(|value| InstructionOrValues::Values(vec![value])) + }, + Operands::Add { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.value? { + Sr2OrImm5::Imm5(immediate) => { Ok(Instruction::new_add_imm(dr.value?, sr1.value?, immediate.value?,).into()) }, + Sr2OrImm5::Sr2(src_reg) => { Ok(Instruction::new_add_reg(dr.value?, sr1.value?, src_reg.value?,).into()) } + }, + Operands::And { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.value? { + Sr2OrImm5::Imm5(immediate) => { Ok(Instruction::new_and_imm(dr.value?, sr1.value?, immediate.value?,).into()) }, + Sr2OrImm5::Sr2(src_reg) => { Ok(Instruction::new_and_reg(dr.value?, sr1.value?, src_reg.value?,).into()) } + }, + Operands::Ld { dr, pc_offset9 } => Ok(Instruction::new_ld(dr.value?, compute_offset(pc_offset9, location, symbol_table)?).into()), + Operands::Ldi { dr, pc_offset9 } => Ok(Instruction::new_ldi(dr.value?, compute_offset(pc_offset9, location, symbol_table)?).into()), + Operands::Ldr { dr, base, offset6 } => Ok(Instruction::new_ldr(dr.value?, base.value?, offset6.value?).into()), + Operands::Lea { dr, pc_offset9 } => Ok(Instruction::new_lea(dr.value?, compute_offset(pc_offset9, location, symbol_table)?).into()), + + Operands::St { sr, pc_offset9 } => Ok(Instruction::new_st(sr.value?, compute_offset(pc_offset9, location, symbol_table)?).into()), + Operands::Sti { sr, pc_offset9 } => Ok(Instruction::new_sti(sr.value?, compute_offset(pc_offset9, location, symbol_table)?).into()), + Operands::Str { sr, base, offset6 } => Ok(Instruction::new_str(sr.value?, base.value?, offset6.value?).into()), + + Operands::Not { dr, sr } => Ok(Instruction::new_not(dr.value?, sr.value?).into()), + + Operands::Br { pc_offset9, .. } => { + let nzp = nzp.clone()?.unwrap(); + Ok(Instruction::new_br( + nzp.n, nzp.z, nzp.p, + compute_offset(pc_offset9, location, symbol_table)? + ).into()) + } - Operands::Ld { dr, pc_offset9 } => Instruction::new_ld(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Ldi { dr, pc_offset9 } => Instruction::new_ldi(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Ldr { dr, base, offset6 } => Instruction::new_ldr(dr.unwrap(), base.unwrap(), offset6.unwrap()), - Operands::Lea { dr, pc_offset9 } => Instruction::new_lea(dr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - - Operands::St { sr, pc_offset9 } => Instruction::new_st(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Sti { sr, pc_offset9 } => Instruction::new_sti(sr.unwrap(), compute_offset(pc_offset9, current_location, &symbol_table)), - Operands::Str { sr, base, offset6 } => Instruction::new_str(sr.unwrap(), base.unwrap(), offset6.unwrap()), - - Operands::Not { dr, sr } => Instruction::new_not(dr.unwrap(), sr.unwrap()), - - Operands::Br { pc_offset9, .. } => { - let nzp = nzp.unwrap(); - Instruction::new_br(nzp.n, nzp.z, nzp.p, compute_offset(pc_offset9, current_location, &symbol_table)) - } - - Operands::Jmp { base } => Instruction::new_jmp(base.unwrap()), - Operands::Jsr { pc_offset11 } => Instruction::new_jsr(compute_offset(pc_offset11, current_location, &symbol_table)), - Operands::Jsrr { base } => Instruction::new_jsrr(base.unwrap()), - - Operands::Ret => Instruction::new_ret(), - Operands::Rti => Instruction::new_rti(), - - Operands::Trap { trap_vec } => Instruction::new_trap(trap_vec.unwrap()), - Operands::Getc => Instruction::new_trap(0x20), - Operands::Out => Instruction::new_trap(0x21), - Operands::Puts => Instruction::new_trap(0x22), - Operands::In => Instruction::new_trap(0x23), - Operands::Putsp => Instruction::new_trap(0x24), - Operands::Halt => Instruction::new_trap(0x25), - - Operands::Orig => { - - } - Operands::Stringz { .. } - | Operands::Blkw { .. } - | Operands::End { .. } => { - match expanded.unwrap() { - Err(error) => { errors.push(EarlierParseError { error: error.clone() }); Err(errors) } - Ok(values) => { Ok(InstructionOrValues::Values(values)) } - } - } - } + Operands::Jmp { base } => Ok(Instruction::new_jmp(base.value?).into()), + Operands::Jsr { pc_offset11 } => Ok(Instruction::new_jsr(compute_offset(pc_offset11, location, symbol_table)?).into()), + Operands::Jsrr { base } => Ok(Instruction::new_jsrr(base.value?).into()), + + Operands::Ret => Ok(Instruction::new_ret().into()), + Operands::Rti => Ok(Instruction::new_rti().into()), + + Operands::Trap { trap_vec } => Ok(Instruction::new_trap(trap_vec.value?).into()), + Operands::Getc => Ok(Instruction::new_trap(0x20).into()), + Operands::Out => Ok(Instruction::new_trap(0x21).into()), + Operands::Puts => Ok(Instruction::new_trap(0x22).into()), + Operands::In => Ok(Instruction::new_trap(0x23).into()), + Operands::Putsp => Ok(Instruction::new_trap(0x24).into()), + Operands::Halt => Ok(Instruction::new_trap(0x25).into()), + + Operands::Stringz { .. } + | Operands::Blkw { .. } + | Operands::End { .. } => { + let expanded_inner = expanded.as_ref().unwrap(); + let values = expanded_inner.as_ref()?; + Ok(InstructionOrValues::Values(values.clone())) } - }; - Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines, instruction_or_values, } + Operands::Orig { .. } => { unreachable!("Unexpected attempt to assemble a .ORIG.") } + } +} + +impl From for InstructionOrValues { + fn from(inst: Instruction) -> Self { + InstructionOrValues::Instruction(inst, inst.into()) + } } -fn compute_offset(pc_offset: ir5_expand_pseudo_ops::Checked, location: Addr, symbol_table: &HashMap<&str, Addr>) -> SignedWord { +fn compute_offset(pc_offset: Checked, location: Addr, symbol_table: &SymbolTable) -> Result { use ImmOrLabel::*; - match pc_offset.unwrap() { + + let pc_offset = pc_offset.value?; + if let Err(errors) = symbol_table { + return Err(ConstructInstructionError::SymbolTableInvalid { errors: errors.clone() }) + } + let symbol_table = symbol_table.as_ref().unwrap(); + match pc_offset { + Imm(immediate) => Ok(immediate.value?), Label(label) => { - let label = label.unwrap(); - let label_location = symbol_table.get(label).unwrap().clone(); - let label_location = label_location as i64; - let offset_base = (location + 1) as i64; - (label_location - offset_base) as SignedWord - } - Imm(immediate) => immediate.value.unwrap() + let label_value = label.value?; + match symbol_table.get(label_value) { + None => { + Err(ConstructInstructionError::InvalidLabel { + span: label.src.span, + label: label_value.to_string(), + }) + }, + Some(addr) => { + let label_location = *addr as i64; + let offset_base = (location + 1) as i64; + Ok((label_location - offset_base) as SignedWord) + }, + } + }, } } diff --git a/assembler/src/ir/ir5_expand_pseudo_ops.rs b/assembler/src/ir/ir5_expand_pseudo_ops.rs index b92b38c..7d16d57 100644 --- a/assembler/src/ir/ir5_expand_pseudo_ops.rs +++ b/assembler/src/ir/ir5_expand_pseudo_ops.rs @@ -9,7 +9,10 @@ use crate::error::ParseError; pub type Label<'input> = ir4_parse_ambiguous_tokens::Label<'input>; pub type Immediate<'input, Addr> = Checked<'input, Addr>; -pub type File<'input> = Vec>; +pub struct File<'input> { + pub objects: Vec>, + pub ignored: Vec>, +} pub struct Object<'input> { pub origin_src: Operation<'input>, @@ -57,12 +60,12 @@ pub type Operands<'input> = ir4_parse_ambiguous_tokens::Operands<'input>; pub type ConditionCodes = ir4_parse_ambiguous_tokens::ConditionCodes; pub type Separator<'input> = ir4_parse_ambiguous_tokens::Separator<'input>; -pub fn expand_pseudo_ops<'input, O>(objects: O) -> File<'input> - where O: IntoIterator> -{ - objects.into_iter() +pub fn expand_pseudo_ops(file: ir4_parse_ambiguous_tokens::File) -> File { + let ir4_parse_ambiguous_tokens::File { objects, ignored } = file; + let objects = objects.into_iter() .map(expand_object_pseudo_ops) - .collect() + .collect(); + File { objects, ignored } } pub fn expand_object_pseudo_ops(object: ir4_parse_ambiguous_tokens::Object) -> Object { diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index ce08b4f..f165385 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -6,6 +6,7 @@ use crate::ir::ir4_parse_ambiguous_tokens; use crate::ir::ir4_parse_ambiguous_tokens::AmbiguousTokenParser; use crate::complete; use crate::ir::ir5_expand_pseudo_ops::expand_pseudo_ops; +use crate::complete::construct_all_instructions; pub fn parse(tokens: Lexer, leniency: LeniencyLevel) -> complete::Program { let ir1 = parse_lines(tokens); @@ -13,7 +14,7 @@ pub fn parse(tokens: Lexer, leniency: LeniencyLevel) -> complete::Program { let ir3 = parse_objects(ir2); let ir4 = AmbiguousTokenParser { leniency }.parse_ambiguous_tokens(ir3); let ir5 = expand_pseudo_ops(ir4); - + construct_all_instructions(ir5) } // TODO: impl Default? From 6c6436d78b4d30bc3c1b35408a70c9b5249a6a55 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Thu, 30 Apr 2020 20:36:17 -0500 Subject: [PATCH 09/82] assembler: update error extraction --- assembler/src/analysis/extract_errors.rs | 180 ++++++++++++++ assembler/src/analysis/memory_placement.rs | 9 +- assembler/src/complete.rs | 5 +- assembler/src/error.rs | 234 ++++++------------ .../src/ir/ir4_parse_ambiguous_tokens.rs | 8 +- 5 files changed, 269 insertions(+), 167 deletions(-) diff --git a/assembler/src/analysis/extract_errors.rs b/assembler/src/analysis/extract_errors.rs index e69de29..9491d36 100644 --- a/assembler/src/analysis/extract_errors.rs +++ b/assembler/src/analysis/extract_errors.rs @@ -0,0 +1,180 @@ +use crate::complete::{Program, Object, ObjectContent, Operation, Operands}; +use crate::error::{Error, ParseError}; +use crate::ir::ir4_parse_ambiguous_tokens; +use crate::ir::ir5_expand_pseudo_ops; + +pub fn extract_errors(program: &Program) -> Vec { + let mut errors = Vec::new(); + + let Program { objects, memory_placement_errors, .. } = program; + if objects.len() == 0 { + errors.push(ParseError::NoObjects.into()); + } + + for object in objects { + extract_object_errors_into(object, &mut errors); + } + + for memory_placement_error in memory_placement_errors { + errors.push(memory_placement_error.into()); + } + + errors +} + +fn extract_object_errors_into(object: &Object, errors: &mut Vec) { + let Object { origin_src, origin, content, symbol_table, } = object; + + extract_ir5_operation_errors(origin_src, errors); + origin.extract_error_into(errors); + extract_object_content_errors(content, errors); + + if let Err(symbol_table_errors) = symbol_table { + for symbol_table_error in symbol_table_errors { + errors.push(symbol_table_error.into()); + } + } +} + +fn extract_object_content_errors(object_content: &ObjectContent, errors: &mut Vec) { + let ObjectContent { operations, hanging_labels, invalid_lines, .. } = object_content; + + for operation in operations { + extract_operation_errors(operation, errors); + } + + for hanging_label in hanging_labels { + let range = hanging_label.span().unwrap(); + errors.push(ParseError::HangingLabel { range }.into()); + } + + for invalid_line in invalid_lines { + let range = invalid_line.span(); + errors.push(ParseError::InvalidLine { range }.into()); + } + +} + +fn extract_operation_errors(operation: &Operation, errors: &mut Vec) { + let Operation { label, operands, nzp, instruction_or_values, .. } = operation; + + if let Some(label) = label { + label.extract_error_into(errors); + } + + extract_operands_errors(operands, errors); + + if let Err(error) = nzp { + errors.push(error.into()); + } + + if let Err(inst_error) = instruction_or_values { + errors.push(inst_error.into()); + } +} + +fn extract_ir5_operation_errors(operation: &ir5_expand_pseudo_ops::Operation, errors: &mut Vec) { + let ir5_expand_pseudo_ops::Operation { label, operands, nzp, expanded, .. } = operation; + + if let Some(label) = label { + label.extract_error_into(errors); + } + + extract_operands_errors(operands, errors); + + if let Err(error) = nzp { + errors.push(error.into()); + } + + if let Some(Err(parse_error)) = expanded { + errors.push(parse_error.into()); + } + +} + +fn extract_operands_errors(operands: &Operands, errors: &mut Vec) { + use ir4_parse_ambiguous_tokens::Operands::*; + + match operands { + Add { dr, sr1, sr2_or_imm5 } => { + dr.extract_error_into(errors); + sr1.extract_error_into(errors); + sr2_or_imm5.extract_error_into(errors); + }, + And { dr, sr1, sr2_or_imm5 } => { + dr.extract_error_into(errors); + sr1.extract_error_into(errors); + sr2_or_imm5.extract_error_into(errors); + }, + Br { pc_offset9 } => { + pc_offset9.extract_error_into(errors); + }, + Jmp { base } => { + base.extract_error_into(errors); + }, + Jsr { pc_offset11 } => { + pc_offset11.extract_error_into(errors); + }, + Jsrr { base } => { + base.extract_error_into(errors); + }, + Ld { dr, pc_offset9 } => { + dr.extract_error_into(errors); + pc_offset9.extract_error_into(errors); + }, + Ldi { dr, pc_offset9 } => { + dr.extract_error_into(errors); + pc_offset9.extract_error_into(errors); + }, + Ldr { dr, base, offset6 } => { + dr.extract_error_into(errors); + base.extract_error_into(errors); + offset6.extract_error_into(errors); + }, + Lea { dr, pc_offset9 } => { + dr.extract_error_into(errors); + pc_offset9.extract_error_into(errors); + }, + Not { dr, sr } => { + dr.extract_error_into(errors); + sr.extract_error_into(errors); + }, + St { sr, pc_offset9 } => { + sr.extract_error_into(errors); + pc_offset9.extract_error_into(errors); + } + Sti { sr, pc_offset9 } => { + sr.extract_error_into(errors); + pc_offset9.extract_error_into(errors); + } + Str { sr, base, offset6 } => { + sr.extract_error_into(errors); + base.extract_error_into(errors); + offset6.extract_error_into(errors); + } + Trap { trap_vec } => { + trap_vec.extract_error_into(errors); + } + Orig { origin } => { + origin.extract_error_into(errors); + } + Fill { value } => { + value.extract_error_into(errors); + } + Blkw { size, .. } => { + size.extract_error_into(errors); + } + Stringz { .. } => {} + + // Putting these in instead of _ to avoid forgetting to change + Ret + | Rti + | Getc + | Out + | Puts + | In + | Putsp + | Halt + | End => {} + }; +} diff --git a/assembler/src/analysis/memory_placement.rs b/assembler/src/analysis/memory_placement.rs index 7fb62ce..ac2a474 100644 --- a/assembler/src/analysis/memory_placement.rs +++ b/assembler/src/analysis/memory_placement.rs @@ -16,7 +16,7 @@ pub enum MemoryPlacementError { ObjectsOverlap } -pub fn validate_placement(objects: &Vec) -> Result<(), Vec> { +pub fn validate_placement(objects: &Vec) -> Vec { let starts_and_ends = objects.iter() .map(get_start_and_end) .collect::>(); @@ -27,7 +27,7 @@ pub fn validate_placement(objects: &Vec) -> Resul } } if !errors.is_empty() { - return Err(errors); + return errors; } let start_end_pairs = starts_and_ends.into_iter() .map(|start_and_end| start_and_end.unwrap()) @@ -38,10 +38,7 @@ pub fn validate_placement(objects: &Vec) -> Resul errors.push(MemoryPlacementError::ObjectsOverlap); } } - if !errors.is_empty() { - return Err(errors); - } - Ok(()) + errors } /// Returns the first memory location the object occupies and the first memory location after the object. diff --git a/assembler/src/complete.rs b/assembler/src/complete.rs index 03f9bdc..f6d649f 100644 --- a/assembler/src/complete.rs +++ b/assembler/src/complete.rs @@ -6,6 +6,7 @@ use crate::error::ParseError; use crate::ir::ir4_parse_ambiguous_tokens::{UnsignedImmOrLabel, ImmOrLabel, Sr2OrImm5, Checked}; use crate::ir::{ir2_parse_line_syntax, ir4_parse_ambiguous_tokens, ir5_expand_pseudo_ops,}; use std::collections::HashMap; +use crate::analysis::memory_placement::{MemoryPlacementError, validate_placement}; /// `complete` will store as much data as possible /// relating to the source *and* what it will be assembled to. @@ -19,6 +20,7 @@ pub type SymbolTable<'input> = Result, Vec { pub objects: Vec>, + pub memory_placement_errors: Vec, pub ignored: Vec>, } @@ -89,10 +91,11 @@ pub enum ConstructInstructionError { pub fn construct_all_instructions(file: ir5_expand_pseudo_ops::File) -> Program { let ir5_expand_pseudo_ops::File { objects, ignored } = file; + let memory_placement_errors = validate_placement(&objects); let objects = objects.into_iter() .map(construct_instructions) .collect(); - Program { objects, ignored } + Program { objects, memory_placement_errors, ignored } } pub fn construct_instructions(object: ir5_expand_pseudo_ops::Object) -> Object { diff --git a/assembler/src/error.rs b/assembler/src/error.rs index a4088be..663b3c1 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -9,6 +9,9 @@ use crate::ir::ir4_parse_ambiguous_tokens::{Object, ObjectContent, Operation, Op use lc3_isa::SignedWord; use crate::ir::ir2_parse_line_syntax::LineContent::Invalid; use annotate_snippets::display_list::FormatOptions; +use crate::analysis::memory_placement::MemoryPlacementError; +use crate::analysis::symbol_table::SymbolTableError; +use crate::complete::ConstructInstructionError; #[derive(Debug, Clone, Copy, PartialEq)] pub enum LexError { @@ -18,6 +21,7 @@ pub enum LexError { #[derive(Debug, Clone)] pub enum ParseError { + NoObjects, InvalidReg { range: Span, reason: InvalidRegReason @@ -49,6 +53,75 @@ pub enum ParseError { Misc(String), } +pub enum Error { + Lex(LexError), + Parse(ParseError), + MemoryPlacement(MemoryPlacementError), + SymbolTable(SymbolTableError), + ConstructInstruction(ConstructInstructionError), +} + +// TODO: write macro for these From impls +impl From for Error { + fn from(error: LexError) -> Self { + Error::Lex(error) + } +} + +impl From<&LexError> for Error { + fn from(error: &LexError) -> Self { + Error::Lex(error.clone()) + } +} + +impl From for Error { + fn from(error: ParseError) -> Self { + Error::Parse(error) + } +} + +impl From<&ParseError> for Error { + fn from(error: &ParseError) -> Self { + Error::Parse(error.clone()) + } +} + +impl From for Error { + fn from(error: MemoryPlacementError) -> Self { + Error::MemoryPlacement(error) + } +} + +impl From<&MemoryPlacementError> for Error { + fn from(error: &MemoryPlacementError) -> Self { + Error::MemoryPlacement(error.clone()) + } +} + +impl From for Error { + fn from(error: SymbolTableError) -> Self { + Error::SymbolTable(error) + } +} + +impl From<&SymbolTableError> for Error { + fn from(error: &SymbolTableError) -> Self { + Error::SymbolTable(error.clone()) + } +} + +impl From for Error { + fn from(error: ConstructInstructionError) -> Self { + Error::ConstructInstruction(error) + } +} + +impl From<&ConstructInstructionError> for Error { + fn from(error: &ConstructInstructionError) -> Self { + Error::ConstructInstruction(error.clone()) + } +} + #[derive(Debug, Clone)] pub enum InvalidRegReason { FirstChar, @@ -131,6 +204,9 @@ impl ParseError { invalid_label_reasons.iter().map(InvalidLabelReason::to_string).join(", "), invalid_immediate_reason) } + NoObjects => { + format!("no objects (.ORIG/.END blocks) found in file") + } } } @@ -160,7 +236,8 @@ impl ParseError { InvalidImmediate { range, .. } => { push_annotation!(range, "invalid immediate here"); } InvalidRegOrImm5 { range, .. } => { push_annotation!(range, "invalid register or immediate here"); } InvalidLabelOrImmediate { range, .. } => { push_annotation!(range, "invalid label or immediate here"); } - Misc(_) => {}, + NoObjects + | Misc(_) => {}, } annotations } @@ -168,158 +245,3 @@ impl ParseError { } -pub fn extract_file_errors(cst: ir4_parse_ambiguous_tokens::File) -> Vec { - let mut errors = Vec::new(); - - let ir4_parse_ambiguous_tokens::File { objects, .. } = cst; - if objects.len() == 0 { - errors.push(ParseError::Misc("File contained no objects.".to_string())); - } - - for object in objects { - errors.extend(extract_object_errors(object)) - } - - errors -} - -fn extract_object_errors(object: Object) -> Vec { - let mut errors = Vec::new(); - - let Object { origin, content, .. } = object; - - origin.extract_error_into(&mut errors); - errors.extend(extract_object_content_errors(content)); - - errors -} - -fn extract_object_content_errors(object_content: ObjectContent) -> Vec { - let mut errors = Vec::new(); - - let ObjectContent { operations, hanging_labels, invalid_lines, .. } = object_content; - - for operation in operations { - errors.extend(extract_operation_errors(operation)); - } - - for hanging_label in hanging_labels { - let range = hanging_label.span().unwrap(); - errors.push(ParseError::HangingLabel { range }); - } - - for invalid_line in invalid_lines { - let range = invalid_line.span(); - errors.push(ParseError::InvalidLine { range }); - } - - errors -} - -fn extract_operation_errors(operation: Operation) -> Vec { - let mut errors = Vec::new(); - - let Operation { label, operands, nzp, .. } = operation; - - if let Some(label) = label { - label.extract_error_into(&mut errors); - } - - errors.extend(extract_operands_errors(operands)); - - if let Err(error) = nzp { - errors.push(error); - } - - errors -} - -fn extract_operands_errors(operands: Operands) -> Vec { - use Operands::*; - - let mut errors = Vec::new(); - match operands { - Add { dr, sr1, sr2_or_imm5 } => { - dr.extract_error_into(&mut errors); - sr1.extract_error_into(&mut errors); - sr2_or_imm5.extract_error_into(&mut errors); - }, - And { dr, sr1, sr2_or_imm5 } => { - dr.extract_error_into(&mut errors); - sr1.extract_error_into(&mut errors); - sr2_or_imm5.extract_error_into(&mut errors); - }, - Br { pc_offset9 } => { - pc_offset9.extract_error_into(&mut errors); - }, - Jmp { base } => { - base.extract_error_into(&mut errors); - }, - Jsr { pc_offset11 } => { - pc_offset11.extract_error_into(&mut errors); - }, - Jsrr { base } => { - base.extract_error_into(&mut errors); - }, - Ld { dr, pc_offset9 } => { - dr.extract_error_into(&mut errors); - pc_offset9.extract_error_into(&mut errors); - }, - Ldi { dr, pc_offset9 } => { - dr.extract_error_into(&mut errors); - pc_offset9.extract_error_into(&mut errors); - }, - Ldr { dr, base, offset6 } => { - dr.extract_error_into(&mut errors); - base.extract_error_into(&mut errors); - offset6.extract_error_into(&mut errors); - }, - Lea { dr, pc_offset9 } => { - dr.extract_error_into(&mut errors); - pc_offset9.extract_error_into(&mut errors); - }, - Not { dr, sr } => { - dr.extract_error_into(&mut errors); - sr.extract_error_into(&mut errors); - }, - St { sr, pc_offset9 } => { - sr.extract_error_into(&mut errors); - pc_offset9.extract_error_into(&mut errors); - } - Sti { sr, pc_offset9 } => { - sr.extract_error_into(&mut errors); - pc_offset9.extract_error_into(&mut errors); - } - Str { sr, base, offset6 } => { - sr.extract_error_into(&mut errors); - base.extract_error_into(&mut errors); - offset6.extract_error_into(&mut errors); - } - Trap { trap_vec } => { - trap_vec.extract_error_into(&mut errors); - } - Orig { origin } => { - origin.extract_error_into(&mut errors); - } - Fill { value } => { - value.extract_error_into(&mut errors); - } - Blkw { size, .. } => { - size.extract_error_into(&mut errors); - } - Stringz { .. } => {} - - // Putting these in instead of _ to avoid forgetting to change - Ret - | Rti - | Getc - | Out - | Puts - | In - | Putsp - | Halt - | End => {} - }; - - errors -} diff --git a/assembler/src/ir/ir4_parse_ambiguous_tokens.rs b/assembler/src/ir/ir4_parse_ambiguous_tokens.rs index 09fac65..3ace3bb 100644 --- a/assembler/src/ir/ir4_parse_ambiguous_tokens.rs +++ b/assembler/src/ir/ir4_parse_ambiguous_tokens.rs @@ -3,7 +3,7 @@ use num_traits::Num; use std::string::ToString; use lc3_isa::{Addr, SignedWord, check_signed_imm, Word}; -use crate::error::{ParseError, InvalidLabelReason, InvalidRegReason, InvalidImmediateReason}; +use crate::error::{ParseError, InvalidLabelReason, InvalidRegReason, InvalidImmediateReason, Error}; use crate::lexer::Token; use crate::ir::ir2_parse_line_syntax; use crate::ir::ir3_parse_objects; @@ -60,9 +60,9 @@ impl<'input, T> Checked<'input, T> { self.value.unwrap() } - pub fn extract_error_into(self, errors: &mut Vec) { - if let Err(error) = self.value { - errors.push(error); + pub fn extract_error_into(&self, errors: &mut Vec) { + if let Err(error) = &self.value { + errors.push(error.into()); } } } From a8cbfc79894ebafd29c7bffd4ace35740a5129a7 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Thu, 30 Apr 2020 20:38:31 -0500 Subject: [PATCH 10/82] assembler: add validate analysis function --- assembler/src/analysis/validate.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/assembler/src/analysis/validate.rs b/assembler/src/analysis/validate.rs index e69de29..8aa2839 100644 --- a/assembler/src/analysis/validate.rs +++ b/assembler/src/analysis/validate.rs @@ -0,0 +1,12 @@ +use crate::error::Error; +use crate::complete::Program; +use crate::analysis::extract_errors::extract_errors; + +pub fn validate(program: &Program) -> Result<(), Vec> { + let errors = extract_errors(program); + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } +} \ No newline at end of file From a3df64f11ff637c1b020b333f2a23de9f5f3f0db Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 1 May 2020 03:34:41 -0500 Subject: [PATCH 11/82] assembler: add assemble method to complete::Program --- assembler/bin/as.rs | 8 ++--- assembler/src/complete.rs | 40 +++++++++++++++++++++-- assembler/src/ir/ir5_expand_pseudo_ops.rs | 1 + 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index fdcb5b2..e2889fe 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -7,7 +7,7 @@ use lc3_assembler::parser::parse; use lc3_shims::memory::FileBackedMemoryShim; use clap::clap_app; use lc3_assembler::parser::LeniencyLevel::*; -use lc3_assembler::error::{extract_file_errors, ParseError}; +use lc3_assembler::analysis::extract_errors::extract_errors; use annotate_snippets::display_list::{DisplayList, FormatOptions}; use annotate_snippets::snippet::{Snippet, Annotation, Slice, AnnotationType, SourceAnnotation}; @@ -41,9 +41,9 @@ fn as_() { let string = fs::read_to_string(path).unwrap(); let src = string.as_str(); let lexer = Lexer::new(src); - let cst = parse(lexer, leniency); + let program = parse(lexer, leniency); - let errors = extract_file_errors(cst.clone()); + let errors = extract_errors(&program); if errors.len() > 0 { for error in errors { let label_string = error.message(); @@ -61,7 +61,7 @@ fn as_() { println!("{}: No errors found.", path_str); } else { let background = if matches.is_present("with_os") { Some(lc3_os::OS_IMAGE.clone()) } else { None }; - let mem = assemble(cst.objects, background); + let mem = cst.assemble(background); let mut output_path = PathBuf::from(path_str); output_path.set_extension(MEM_DUMP_FILE_EXTENSION); diff --git a/assembler/src/complete.rs b/assembler/src/complete.rs index f6d649f..56513a3 100644 --- a/assembler/src/complete.rs +++ b/assembler/src/complete.rs @@ -2,11 +2,12 @@ use lc3_isa::{Addr, Word, Instruction, SignedWord}; use crate::lexer::{Token, Span}; use crate::analysis::symbol_table; use crate::analysis::symbol_table::{SymbolTableError, build_symbol_table}; -use crate::error::ParseError; +use crate::error::{ParseError, Error}; use crate::ir::ir4_parse_ambiguous_tokens::{UnsignedImmOrLabel, ImmOrLabel, Sr2OrImm5, Checked}; use crate::ir::{ir2_parse_line_syntax, ir4_parse_ambiguous_tokens, ir5_expand_pseudo_ops,}; use std::collections::HashMap; use crate::analysis::memory_placement::{MemoryPlacementError, validate_placement}; +use lc3_isa::util::MemoryDump; /// `complete` will store as much data as possible /// relating to the source *and* what it will be assembled to. @@ -18,12 +19,44 @@ pub type Label<'input> = ir5_expand_pseudo_ops::Label<'input>; pub type Immediate<'input, Addr> = ir5_expand_pseudo_ops::Immediate<'input, Addr>; pub type SymbolTable<'input> = Result, Vec>; +#[derive(Debug)] pub struct Program<'input> { pub objects: Vec>, pub memory_placement_errors: Vec, pub ignored: Vec>, } +impl<'input> Program<'input> { + + pub fn assemble(&self, background: Option) -> MemoryDump { + let mut memory = if let Some(bg) = background { + MemoryDump::from(bg) + } else { + MemoryDump::blank() + }; + for object in &self.objects { + let mut i = *object.origin.value.as_ref().unwrap() as usize; + for operation in &object.content.operations { + match operation.instruction_or_values.as_ref().unwrap() { + InstructionOrValues::Instruction(_, word) => { + memory[i] = *word; + i += 1; + }, + InstructionOrValues::Values(values) => { + for value in values { + memory[i] = *value; + i += 1; + } + }, + } + } + } + memory + } + +} + +#[derive(Debug)] pub struct Object<'input> { pub origin_src: ir5_expand_pseudo_ops::Operation<'input>, pub origin: Immediate<'input, Addr>, @@ -31,6 +64,7 @@ pub struct Object<'input> { pub symbol_table: SymbolTable<'input>, } +#[derive(Debug)] pub struct ObjectContent<'input> { pub operations: Vec>, pub empty_lines: Vec>, @@ -38,6 +72,7 @@ pub struct ObjectContent<'input> { pub invalid_lines: Vec>, } +#[derive(Debug)] pub struct Operation<'input> { pub label: Option>, pub operator: Token<'input>, @@ -53,6 +88,7 @@ pub struct Operation<'input> { pub instruction_or_values: Result, } +#[derive(Debug)] pub enum InstructionOrValues { Instruction(Instruction, Word), Values(Vec), @@ -75,7 +111,7 @@ pub type Operands<'input> = ir5_expand_pseudo_ops::Operands<'input>; pub type ConditionCodes = ir5_expand_pseudo_ops::ConditionCodes; pub type Separator<'input> = ir5_expand_pseudo_ops::Separator<'input>; -#[derive(Clone)] +#[derive(Debug, Clone)] pub enum ConstructInstructionError { EarlierParseError { error: ParseError, diff --git a/assembler/src/ir/ir5_expand_pseudo_ops.rs b/assembler/src/ir/ir5_expand_pseudo_ops.rs index 7d16d57..b5ca680 100644 --- a/assembler/src/ir/ir5_expand_pseudo_ops.rs +++ b/assembler/src/ir/ir5_expand_pseudo_ops.rs @@ -27,6 +27,7 @@ pub struct ObjectContent<'input> { pub invalid_lines: Vec>, } +#[derive(Debug)] pub struct Operation<'input> { pub label: Option>, pub operator: Token<'input>, From 3d056a53f99304e807a11ce108d566a88a28e945 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 1 May 2020 04:24:48 -0500 Subject: [PATCH 12/82] assembler: add messages, annotations for new errors (BUILD FIXED) --- assembler/bin/as.rs | 26 ++++----- assembler/src/analysis/memory_placement.rs | 29 ++++++++++ assembler/src/analysis/symbol_table.rs | 52 ++++++++++++++++++ assembler/src/complete.rs | 46 ++++++++++++++++ assembler/src/error.rs | 62 +++++++++++++++++++++- assembler/tests/integ.rs | 2 +- 6 files changed, 203 insertions(+), 14 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index e2889fe..e2df30a 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -26,10 +26,10 @@ fn as_() { (version: env!("CARGO_PKG_VERSION")) (author: env!("CARGO_PKG_AUTHORS")) (about: env!("CARGO_PKG_DESCRIPTION")) - (@arg strict: -s --strict "Enforces all rules of the original LC-3 assembly language when validating the program") - (@arg check: -c --check "Checks the correctness of the program without attempting to assemble it") - (@arg with_os: -o --with_os "Overlays the program onto an image of the OS from lc3-os") - (@arg INPUT: +required ... "Paths to the programs to assemble") + (@arg strict: -s --strict "Enforces all rules of the original LC-3 assembly language when validating the program") + (@arg check: -c --check "Checks the correctness of the program without attempting to assemble it") + (@arg with_os: -o --with_os "Overlays the program onto an image of the OS from lc3-os") + (@arg INPUT: +required ... "Paths to the programs to assemble") ).get_matches(); for path_str in matches.values_of("INPUT").unwrap() { @@ -46,13 +46,15 @@ fn as_() { let errors = extract_errors(&program); if errors.len() > 0 { for error in errors { - let label_string = error.message(); - let label = label_string.as_str(); - let annotations = error.annotations(); - let slices = slices(annotations, src, Some(path_str)); - let snippet = create_snippet(label, slices); - let dl = DisplayList::from(snippet); - println!("{}", dl); + if error.should_show() { + let label_string = error.message(); + let label = label_string.as_str(); + let annotations = error.annotations(); + let slices = slices(annotations, src, Some(path_str)); + let snippet = create_snippet(label, slices); + let dl = DisplayList::from(snippet); + println!("{}", dl); + } } break; } @@ -61,7 +63,7 @@ fn as_() { println!("{}: No errors found.", path_str); } else { let background = if matches.is_present("with_os") { Some(lc3_os::OS_IMAGE.clone()) } else { None }; - let mem = cst.assemble(background); + let mem = program.assemble(background); let mut output_path = PathBuf::from(path_str); output_path.set_extension(MEM_DUMP_FILE_EXTENSION); diff --git a/assembler/src/analysis/memory_placement.rs b/assembler/src/analysis/memory_placement.rs index ac2a474..2ed5efd 100644 --- a/assembler/src/analysis/memory_placement.rs +++ b/assembler/src/analysis/memory_placement.rs @@ -4,6 +4,7 @@ use lc3_isa::Addr; use crate::ir::ir5_expand_pseudo_ops; use crate::error::ParseError; use crate::ir::ir4_parse_ambiguous_tokens::Checked; +use annotate_snippets::snippet::SourceAnnotation; #[derive(Debug, Clone)] pub enum MemoryPlacementError { @@ -16,6 +17,34 @@ pub enum MemoryPlacementError { ObjectsOverlap } +impl MemoryPlacementError { + + pub fn message(&self) -> String { + use MemoryPlacementError::*; + match self { + InvalidOrigin { .. } => "could not validate memory placement due to error parsing .ORIG", + UnknownPseudoOpLength { .. } => "could not validate memory placement due to error parsing pseudo-op", + ObjectsOverlap => "two objects (.ORIG/.END blocks) would occupy same memory locations", + }.to_string() + } + + pub fn annotations(&self) -> Vec { + vec![] + } + + pub fn should_show(&self) -> bool { + use MemoryPlacementError::*; + + match self { + InvalidOrigin { .. } + | UnknownPseudoOpLength { .. } => false, + ObjectsOverlap => true, + } + } + +} + + pub fn validate_placement(objects: &Vec) -> Vec { let starts_and_ends = objects.iter() .map(get_start_and_end) diff --git a/assembler/src/analysis/symbol_table.rs b/assembler/src/analysis/symbol_table.rs index a194531..4eb0caf 100644 --- a/assembler/src/analysis/symbol_table.rs +++ b/assembler/src/analysis/symbol_table.rs @@ -3,6 +3,7 @@ use lc3_isa::Addr; use crate::ir::ir5_expand_pseudo_ops; use crate::lexer::Span; use crate::error::ParseError; +use annotate_snippets::snippet::{AnnotationType, SourceAnnotation}; pub type SymbolTable<'input> = HashMap<&'input str, Addr>; @@ -20,6 +21,57 @@ pub enum SymbolTableError { } } +impl SymbolTableError { + + pub fn message(&self) -> String { + use SymbolTableError::*; + match self { + InvalidOrigin { .. } => format!("could not validate memory placement due to error parsing .ORIG"), + UnknownPseudoOpLength { .. } => format!("could not validate memory placement due to error parsing pseudo-op"), + DuplicateLabel { label_text, .. } => format!("duplicate label {}", label_text), + } + } + + pub fn annotations(&self) -> Vec { + use SymbolTableError::*; + let mut annotations = Vec::new(); + + macro_rules! push_annotation { + ($range:expr, $label:expr) => { + annotations.push( + SourceAnnotation { + range: $range.clone(), + label: $label, + annotation_type: AnnotationType::Error, + } + ); + } + } + + match self { + InvalidOrigin { .. } + | UnknownPseudoOpLength { .. } => {}, + DuplicateLabel { ranges: (range1, range2), .. } => { + push_annotation!(range1, "first instance here"); + push_annotation!(range2, "second instance here"); + }, + } + annotations + } + + pub fn should_show(&self) -> bool { + use SymbolTableError::*; + + match self { + InvalidOrigin { .. } + | UnknownPseudoOpLength { .. } => false, + DuplicateLabel { .. } => true, + } + } + +} + + pub fn build_symbol_table<'input>(object: &ir5_expand_pseudo_ops::Object<'input>) -> Result, Vec> { let mut symbol_table = HashMap::new(); let mut errors = Vec::new(); diff --git a/assembler/src/complete.rs b/assembler/src/complete.rs index 56513a3..9f41923 100644 --- a/assembler/src/complete.rs +++ b/assembler/src/complete.rs @@ -8,6 +8,7 @@ use crate::ir::{ir2_parse_line_syntax, ir4_parse_ambiguous_tokens, ir5_expand_ps use std::collections::HashMap; use crate::analysis::memory_placement::{MemoryPlacementError, validate_placement}; use lc3_isa::util::MemoryDump; +use annotate_snippets::snippet::{AnnotationType, SourceAnnotation}; /// `complete` will store as much data as possible /// relating to the source *and* what it will be assembled to. @@ -125,6 +126,51 @@ pub enum ConstructInstructionError { }, } +impl ConstructInstructionError { + + pub fn message(&self) -> String { + use ConstructInstructionError::*; + match self { + EarlierParseError { .. } => format!("failed to construct instruction due to previous error parsing"), + SymbolTableInvalid { .. } => format!("failed to construct instruction due to previous errors constructing symbol table"), + InvalidLabel { label, .. } => format!("instruction references invalid label {}", label), + } + } + + pub fn annotations(&self) -> Vec { + use ConstructInstructionError::*; + + let mut annotations = Vec::new(); + + macro_rules! push_annotation { + ($range:expr, $label:expr) => { + annotations.push( + SourceAnnotation { + range: $range.clone(), + label: $label, + annotation_type: AnnotationType::Error, + } + ); + } + } + match self { + EarlierParseError { .. } + | SymbolTableInvalid { .. } => {}, + InvalidLabel { span, .. } => { push_annotation!(span, "invalid label here") }, + } + annotations + } + + pub fn should_show(&self) -> bool { + use ConstructInstructionError::*; + match self { + EarlierParseError { .. } + | SymbolTableInvalid { .. } => false, + InvalidLabel { .. } => true, + } + } +} + pub fn construct_all_instructions(file: ir5_expand_pseudo_ops::File) -> Program { let ir5_expand_pseudo_ops::File { objects, ignored } = file; let memory_placement_errors = validate_placement(&objects); diff --git a/assembler/src/error.rs b/assembler/src/error.rs index 663b3c1..072e025 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -18,6 +18,27 @@ pub enum LexError { Unknown, } +impl LexError { + + pub fn message(&self) -> String { + match self { + LexError::Unknown => "encountered unknown token when lexing", + }.to_string() + } + + pub fn annotations(&self) -> Vec { + match self { + LexError::Unknown => vec![], + } + } + + pub fn should_show(&self) -> bool { + match self { + LexError::Unknown => true, + } + } +} + #[derive(Debug, Clone)] pub enum ParseError { @@ -61,6 +82,42 @@ pub enum Error { ConstructInstruction(ConstructInstructionError), } +impl Error { + pub fn message(&self) -> String { + use Error::*; + match self { + Lex(error) => error.message(), + Parse(error) => error.message(), + MemoryPlacement(error) => error.message(), + SymbolTable(error) => error.message(), + ConstructInstruction(error) => error.message(), + } + } + + pub fn annotations(&self) -> Vec { + use Error::*; + match self { + Lex(error) => error.annotations(), + Parse(error) => error.annotations(), + MemoryPlacement(error) => error.annotations(), + SymbolTable(error) => error.annotations(), + ConstructInstruction(error) => error.annotations(), + } + } + + pub fn should_show(&self) -> bool { + use Error::*; + match self { + Lex(error) => error.should_show(), + Parse(error) => error.should_show(), + MemoryPlacement(error) => error.should_show(), + SymbolTable(error) => error.should_show(), + ConstructInstruction(error) => error.should_show(), + } + } + +} + // TODO: write macro for these From impls impl From for Error { fn from(error: LexError) -> Self { @@ -241,7 +298,10 @@ impl ParseError { } annotations } - + + pub fn should_show(&self) -> bool { + true + } } diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 375ecf2..b409955 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -66,7 +66,7 @@ fn test(input: &str, orig: usize, expected_mem: &[Word]) { let lexer = Lexer::new(input); let cst = parse(lexer, Lenient); - let mem = assemble(cst.objects, None); + let mem = cst.assemble(None); for i in 0..orig { assert_eq!(0x0000, mem[i], "differed at {:#x}", i) } From c175162adca3ad8496403d2239e284c041f5f82a Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 1 May 2020 05:03:34 -0500 Subject: [PATCH 13/82] assembler: add some query methods to complete::Program CST --- assembler/src/complete.rs | 71 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/assembler/src/complete.rs b/assembler/src/complete.rs index 9f41923..8e4e973 100644 --- a/assembler/src/complete.rs +++ b/assembler/src/complete.rs @@ -9,6 +9,7 @@ use std::collections::HashMap; use crate::analysis::memory_placement::{MemoryPlacementError, validate_placement}; use lc3_isa::util::MemoryDump; use annotate_snippets::snippet::{AnnotationType, SourceAnnotation}; +use crate::analysis::validate::validate; /// `complete` will store as much data as possible /// relating to the source *and* what it will be assembled to. @@ -55,6 +56,31 @@ impl<'input> Program<'input> { memory } + pub fn assemble_safe(&self, background: Option) -> Result> { + validate(self) + .map(|_| self.assemble(background)) + } + +} + +impl<'input> Program<'input> { + pub fn get_source(&self, address: Addr) -> Option> { + for object in &self.objects { + if let Some(source) = object.get_source(address) { + return Some(source); + } + } + None + } + + pub fn get_label_addr(&self, label: &str) -> Option { + for object in &self.objects { + if let Some(addr) = object.get_label_addr(label) { + return Some(addr); + } + } + None + } } #[derive(Debug)] @@ -65,6 +91,51 @@ pub struct Object<'input> { pub symbol_table: SymbolTable<'input>, } +impl<'input> Object<'input> { + pub fn get_source(&self, address: Addr) -> Option> { + match &self.origin.value { + Err(err) => None, + Ok(origin) => { + if address < *origin { + return None; + } + let mut i = *origin as usize; + for operation in &self.content.operations { + if i as Addr == address { + return Some(operation.src_lines.clone()); + } + match operation.num_memory_locations_occupied() { + Err(_) => { return None; }, + Ok(len) => { i += len }, + } + } + None + } + } + } + + pub fn get_label_addr(&self, label_to_find: &str) -> Option { + match &self.origin.value { + Err(err) => None, + Ok(origin) => { + let mut i = *origin as usize; + for operation in &self.content.operations { + if let Some(Checked { value: Ok(label), .. }) = &operation.label { + if *label == label_to_find { + return Some(i as Addr); + } + } + match operation.num_memory_locations_occupied() { + Err(_) => { return None; }, + Ok(len) => { i += len }, + } + } + None + } + } + } +} + #[derive(Debug)] pub struct ObjectContent<'input> { pub operations: Vec>, From 5509870200e009b212b82289e47b1f3740a28504 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sun, 28 Jun 2020 23:43:59 -0500 Subject: [PATCH 14/82] assembler: add single instruction integ tests for ADD, AND --- assembler/tests/integ.rs | 50 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index b409955..154191a 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -4,6 +4,8 @@ use lc3_assembler::lexer::Lexer; use lc3_assembler::parser::parse; use lc3_isa::Word; use lc3_assembler::parser::LeniencyLevel::Lenient; +use std::ops::Index; +use lc3_isa::util::MemoryDump; #[test] @@ -61,6 +63,31 @@ fn pseudo_ops() { ); } +#[test] +fn add() { + single_instruction_tests(&[ + ("ADD R0 R0 R0", 0x1000), + ("ADD R1 R2 R3", 0x1283), + ("ADD R4 R5 R6", 0x1946), + ("ADD R7 R7 #0", 0x1FE0), + ("ADD R7 R7 #1", 0x1FE1), + ("ADD R7 R7 #15", 0x1FEF), + ("ADD R7 R7 #-1", 0x1FFF), + ]); +} + +#[test] +fn and() { + single_instruction_tests(&[ + ("AND R0 R0 R0", 0x5000), + ("AND R1 R2 R3", 0x5283), + ("AND R4 R5 R6", 0x5946), + ("AND R7 R7 #0", 0x5FE0), + ("AND R7 R7 #1", 0x5FE1), + ("AND R7 R7 #15", 0x5FEF), + ("AND R7 R7 #-1", 0x5FFF), + ]); +} fn test(input: &str, orig: usize, expected_mem: &[Word]) { let lexer = Lexer::new(input); @@ -68,12 +95,29 @@ fn test(input: &str, orig: usize, expected_mem: &[Word]) { let mem = cst.assemble(None); for i in 0..orig { - assert_eq!(0x0000, mem[i], "differed at {:#x}", i) + assert_mem(&mem, i, 0x0000); } for i in 0..expected_mem.len() { - assert_eq!(expected_mem[i], mem[orig + i], "differed at {:#x}", orig + i) + assert_mem(&mem, orig + i, expected_mem[i]); } for i in (orig + expected_mem.len())..0xFFFF { - assert_eq!(0x0000, mem[i], "differed at {:#x}", i) + assert_mem(&mem, i, 0x0000); } } + +fn single_instruction_tests(tests: &[(&str, Word)]) { + for (input, expected) in tests { + single_instruction_test(input, *expected); + } +} + +fn single_instruction_test(input: &str, expected: Word) { + let input = format!(".ORIG x3000\n{}\n.END", input); + test(input.as_str(), 0x3000, &[expected]); +} + + +fn assert_mem(mem: &MemoryDump, location: usize, expected: Word) { + let actual = mem[location]; + assert_eq!(expected, actual, "differed at {:#x}: expected {:#x}, was {:#x}", location, expected, actual); +} From 8347a2d72b69c20dc5cd6d58f9deb8ca8d8ce015 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 29 Jun 2020 22:08:29 -0500 Subject: [PATCH 15/82] assembler: add single-instruction tests for JMP, JSRR, RTI, RET, LDR, NOT, STR, and TRAP --- assembler/tests/integ.rs | 90 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 154191a..257aeba 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -89,6 +89,96 @@ fn and() { ]); } +#[test] +fn jmp() { + single_instruction_tests(&[ + ("JMP R0", 0xC000), + ("JMP R1", 0xC040), + ("JMP R2", 0xC080), + ("JMP R3", 0xC0C0), + ("JMP R4", 0xC100), + ("JMP R5", 0xC140), + ("JMP R6", 0xC180), + ("JMP R7", 0xC1C0), + ]) +} + +#[test] +fn jsrr() { + single_instruction_tests(&[ + ("JSRR R0", 0x4000), + ("JSRR R1", 0x4040), + ("JSRR R2", 0x4080), + ("JSRR R3", 0x40C0), + ("JSRR R4", 0x4100), + ("JSRR R5", 0x4140), + ("JSRR R6", 0x4180), + ("JSRR R7", 0x41C0), + ]) +} + +#[test] +fn rti() { + single_instruction_test("RTI", 0x8000); +} + +#[test] +fn ret() { + single_instruction_test("RET", 0xC1C0); +} + +#[test] +fn ldr() { + single_instruction_tests(&[ + ("LDR R0 R0 #0", 0x6000), + ("LDR R1 R2 #3", 0x6283), + ("LDR R3 R4 #31", 0x671F), + ("LDR R5 R6 #-1", 0x6BBF), + ("LDR R7 R7 #-32", 0x6FE0), + ]) +} + +#[test] +fn not() { + single_instruction_tests(&[ + ("NOT R0 R1", 0x907F), + ("NOT R2 R3", 0x94FF), + ("NOT R4 R5", 0x997F), + ("NOT R6 R7", 0x9DFF), + ]) +} + +#[test] +fn str() { + single_instruction_tests(&[ + ("STR R0 R0 #0", 0x7000), + ("STR R1 R2 #3", 0x7283), + ("STR R3 R4 #31", 0x771F), + ("STR R5 R6 #-1", 0x7BBF), + ("STR R7 R7 #-32", 0x7FE0), + ]) +} + +#[test] +fn trap() { + single_instruction_tests(&[ + ("TRAP x00", 0xF000), + ("TRAP x25", 0xF025), + ("TRAP xFF", 0xF0FF), + ("TRAP #37", 0xF025), + ]) +} + +// TODO: BR +// TODO: JSR +// TODO: LD +// TODO: LDI +// TODO: ST +// TODO: STI +// TODO: LEA +// TODO: Named TRAPs +// TODO: Pseudo-ops + fn test(input: &str, orig: usize, expected_mem: &[Word]) { let lexer = Lexer::new(input); let cst = parse(lexer, Lenient); From 5bae06410f41c15e5e81db5e7eaa425049539e98 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 1 Jul 2020 01:57:56 -0500 Subject: [PATCH 16/82] assembler: add named trap tests, add macro to run single-instruction tests individually --- assembler/tests/integ.rs | 220 ++++++++++++++++++++------------------- 1 file changed, 111 insertions(+), 109 deletions(-) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 257aeba..95da57f 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -63,111 +63,124 @@ fn pseudo_ops() { ); } -#[test] -fn add() { - single_instruction_tests(&[ - ("ADD R0 R0 R0", 0x1000), - ("ADD R1 R2 R3", 0x1283), - ("ADD R4 R5 R6", 0x1946), - ("ADD R7 R7 #0", 0x1FE0), - ("ADD R7 R7 #1", 0x1FE1), - ("ADD R7 R7 #15", 0x1FEF), - ("ADD R7 R7 #-1", 0x1FFF), - ]); -} +mod single_instruction { + use super::*; -#[test] -fn and() { - single_instruction_tests(&[ - ("AND R0 R0 R0", 0x5000), - ("AND R1 R2 R3", 0x5283), - ("AND R4 R5 R6", 0x5946), - ("AND R7 R7 #0", 0x5FE0), - ("AND R7 R7 #1", 0x5FE1), - ("AND R7 R7 #15", 0x5FEF), - ("AND R7 R7 #-1", 0x5FFF), - ]); -} + fn single_instruction_test(input: &str, expected: Word) { + let input = format!(".ORIG x3000\n{}\n.END", input); + test(input.as_str(), 0x3000, &[expected]); + } -#[test] -fn jmp() { - single_instruction_tests(&[ - ("JMP R0", 0xC000), - ("JMP R1", 0xC040), - ("JMP R2", 0xC080), - ("JMP R3", 0xC0C0), - ("JMP R4", 0xC100), - ("JMP R5", 0xC140), - ("JMP R6", 0xC180), - ("JMP R7", 0xC1C0), - ]) -} + macro_rules! single_instruction_tests { + ($tests_name:ident + $( + $test_name:ident: $instruction:expr => $expected:expr + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + single_instruction_test($instruction, $expected); + } + )+ + } + }; + } -#[test] -fn jsrr() { - single_instruction_tests(&[ - ("JSRR R0", 0x4000), - ("JSRR R1", 0x4040), - ("JSRR R2", 0x4080), - ("JSRR R3", 0x40C0), - ("JSRR R4", 0x4100), - ("JSRR R5", 0x4140), - ("JSRR R6", 0x4180), - ("JSRR R7", 0x41C0), - ]) -} + single_instruction_tests! { add + minimal: "ADD R0 R0 R0" => 0x1000, + r1_2_3: "ADD R1 R2 R3" => 0x1283, + r4_5_6: "ADD R4 R5 R6" => 0x1946, + r7_imm: "ADD R7 R7 #0" => 0x1FE0, + nonzero_imm: "ADD R7 R7 #1" => 0x1FE1, + max_imm: "ADD R7 R7 #15" => 0x1FEF, + neg_imm: "ADD R7 R7 #-1" => 0x1FFF, + } -#[test] -fn rti() { - single_instruction_test("RTI", 0x8000); -} + single_instruction_tests! { and + minimal: "AND R0 R0 R0" => 0x5000, + r1_2_3: "AND R1 R2 R3" => 0x5283, + r4_5_6: "AND R4 R5 R6" => 0x5946, + r7_imm: "AND R7 R7 #0" => 0x5FE0, + nonzero_imm: "AND R7 R7 #1" => 0x5FE1, + max_imm: "AND R7 R7 #15" => 0x5FEF, + neg_imm: "AND R7 R7 #-1" => 0x5FFF, + } -#[test] -fn ret() { - single_instruction_test("RET", 0xC1C0); -} + single_instruction_tests! { jmp + r0: "JMP R0" => 0xC000, + r1: "JMP R1" => 0xC040, + r2: "JMP R2" => 0xC080, + r3: "JMP R3" => 0xC0C0, + r4: "JMP R4" => 0xC100, + r5: "JMP R5" => 0xC140, + r6: "JMP R6" => 0xC180, + r7: "JMP R7" => 0xC1C0, + } -#[test] -fn ldr() { - single_instruction_tests(&[ - ("LDR R0 R0 #0", 0x6000), - ("LDR R1 R2 #3", 0x6283), - ("LDR R3 R4 #31", 0x671F), - ("LDR R5 R6 #-1", 0x6BBF), - ("LDR R7 R7 #-32", 0x6FE0), - ]) -} + single_instruction_tests! { jsrr + r0: "JSRR R0" => 0x4000, + r1: "JSRR R1" => 0x4040, + r2: "JSRR R2" => 0x4080, + r3: "JSRR R3" => 0x40C0, + r4: "JSRR R4" => 0x4100, + r5: "JSRR R5" => 0x4140, + r6: "JSRR R6" => 0x4180, + r7: "JSRR R7" => 0x41C0, + } -#[test] -fn not() { - single_instruction_tests(&[ - ("NOT R0 R1", 0x907F), - ("NOT R2 R3", 0x94FF), - ("NOT R4 R5", 0x997F), - ("NOT R6 R7", 0x9DFF), - ]) -} + #[test] + fn rti() { + single_instruction_test("RTI", 0x8000); + } -#[test] -fn str() { - single_instruction_tests(&[ - ("STR R0 R0 #0", 0x7000), - ("STR R1 R2 #3", 0x7283), - ("STR R3 R4 #31", 0x771F), - ("STR R5 R6 #-1", 0x7BBF), - ("STR R7 R7 #-32", 0x7FE0), - ]) -} + #[test] + fn ret() { + single_instruction_test("RET", 0xC1C0); + } -#[test] -fn trap() { - single_instruction_tests(&[ - ("TRAP x00", 0xF000), - ("TRAP x25", 0xF025), - ("TRAP xFF", 0xF0FF), - ("TRAP #37", 0xF025), - ]) -} + single_instruction_tests! { ldr + minimal: "LDR R0 R0 #0" => 0x6000, + r1_2: "LDR R1 R2 #3" => 0x6283, + max_imm: "LDR R3 R4 #31" => 0x671F, + neg_imm: "LDR R5 R6 #-1" => 0x6BBF, + min_imm: "LDR R7 R7 #-32" => 0x6FE0, + } + + single_instruction_tests! { not + r0_1: "NOT R0 R1" => 0x907F, + r2_3: "NOT R2 R3" => 0x94FF, + r4_5: "NOT R4 R5" => 0x997F, + r6_7: "NOT R6 R7" => 0x9DFF, + } + + single_instruction_tests! { str + minimal: "STR R0 R0 #0" => 0x7000, + r1_2: "STR R1 R2 #3" => 0x7283, + max_imm: "STR R3 R4 #31" => 0x771F, + neg_imm: "STR R5 R6 #-1" => 0x7BBF, + min_imm: "STR R7 R7 #-32" => 0x7FE0, + } + + single_instruction_tests! { trap + minimal: "TRAP x00" => 0xF000, + halt: "TRAP x25" => 0xF025, + max: "TRAP xFF" => 0xF0FF, + decimal: "TRAP #37" => 0xF025, + } + + single_instruction_tests! { named_traps + getc: "GETC" => 0xF020, + out: "OUT" => 0xF021, + puts: "PUTS" => 0xF022, + in_: "IN" => 0xF023, + putsp: "PUTSP" => 0xF024, + halt: "HALT" => 0xF025, + } // TODO: BR // TODO: JSR @@ -176,9 +189,10 @@ fn trap() { // TODO: ST // TODO: STI // TODO: LEA -// TODO: Named TRAPs // TODO: Pseudo-ops +} + fn test(input: &str, orig: usize, expected_mem: &[Word]) { let lexer = Lexer::new(input); let cst = parse(lexer, Lenient); @@ -195,18 +209,6 @@ fn test(input: &str, orig: usize, expected_mem: &[Word]) { } } -fn single_instruction_tests(tests: &[(&str, Word)]) { - for (input, expected) in tests { - single_instruction_test(input, *expected); - } -} - -fn single_instruction_test(input: &str, expected: Word) { - let input = format!(".ORIG x3000\n{}\n.END", input); - test(input.as_str(), 0x3000, &[expected]); -} - - fn assert_mem(mem: &MemoryDump, location: usize, expected: Word) { let actual = mem[location]; assert_eq!(expected, actual, "differed at {:#x}: expected {:#x}, was {:#x}", location, expected, actual); From 1a19e309666c684d59be339dd1e01d7b62084775 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 3 Jul 2020 01:37:11 -0500 Subject: [PATCH 17/82] =?UTF-8?q?assembler:=20add=20BR,=20LD,=20LDI,=20ST,?= =?UTF-8?q?=20STI=20single-instruction=20tests=20(just=20broke=20100!=20?= =?UTF-8?q?=F0=9F=98=81=F0=9F=98=AC)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- assembler/tests/integ.rs | 47 +++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 95da57f..37956a1 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -182,12 +182,49 @@ mod single_instruction { halt: "HALT" => 0xF025, } -// TODO: BR + single_instruction_tests! { br + minimal: "BR #0" => 0x0E00, + n: "BRn #0" => 0x0800, + z: "BRz #0" => 0x0400, + p: "BRp #0" => 0x0200, + nz: "BRnz #0" => 0x0C00, + np: "BRnp #0" => 0x0A00, + zp: "BRzp #0" => 0x0600, + nzp: "BRnzp #0" => 0x0E00, + neg_imm: "BRnzp #-1" => 0x0FFF, + pos_imm: "BRnzp #1" => 0x0E01, + max_imm: "BRn #255" => 0x08FF, + min_imm: "BRz #-256" => 0x0500, + } + + // TODO: make this more readable :( + // I couldn't find a way to rearrange the macros to create one + // for the boilerplate like "($opcode << 12) + ". + // Consider adding a variant in single_instruction_tests for this case? + macro_rules! reg_and_pcoffset9_instruction_tests { + ($name:ident, $operator:expr, $opcode:expr) => { + single_instruction_tests! { $name + // OPERANDS RESULT + // -------- ----- + minimal: ($operator + " R0 #0").as_str() => (($opcode << 12) + 0x000), + pos_imm: ($operator + " R1 #1").as_str() => (($opcode << 12) + 0x201), + neg_imm: ($operator + " R2 #-1").as_str() => (($opcode << 12) + 0x5FF), + max_imm: ($operator + " R3 #255").as_str() => (($opcode << 12) + 0x6FF), + min_imm: ($operator + " R4 #-256").as_str() => (($opcode << 12) + 0x900), + // hex_imm: ($operator + " R5 xA").as_str() => (($opcode << 12) + 0xA0A), TODO: We currently assume an argument not starting in # is a label. Allow hex literals? + r5: ($operator + " R5 #0").as_str() => (($opcode << 12) + 0xA00), + r6: ($operator + " R6 #0").as_str() => (($opcode << 12) + 0xC00), + r7: ($operator + " R7 #0").as_str() => (($opcode << 12) + 0xE00), + } + }; + } + + reg_and_pcoffset9_instruction_tests!(ld, "LD".to_string(), 0x2); + reg_and_pcoffset9_instruction_tests!(ldi, "LDI".to_string(), 0xA); + reg_and_pcoffset9_instruction_tests!(st, "ST".to_string(), 0x3); + reg_and_pcoffset9_instruction_tests!(sti, "STI".to_string(), 0xB); + // TODO: JSR -// TODO: LD -// TODO: LDI -// TODO: ST -// TODO: STI // TODO: LEA // TODO: Pseudo-ops From a3e4bc8c26be64f9a20bf18a90fabca5f79b1b95 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 3 Jul 2020 15:13:49 -0500 Subject: [PATCH 18/82] assembler: add LEA, JSR tests; adjust reg/pcoffset9 macro --- assembler/tests/integ.rs | 57 ++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 37956a1..456e7d1 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -202,30 +202,47 @@ mod single_instruction { // for the boilerplate like "($opcode << 12) + ". // Consider adding a variant in single_instruction_tests for this case? macro_rules! reg_and_pcoffset9_instruction_tests { - ($name:ident, $operator:expr, $opcode:expr) => { - single_instruction_tests! { $name - // OPERANDS RESULT - // -------- ----- - minimal: ($operator + " R0 #0").as_str() => (($opcode << 12) + 0x000), - pos_imm: ($operator + " R1 #1").as_str() => (($opcode << 12) + 0x201), - neg_imm: ($operator + " R2 #-1").as_str() => (($opcode << 12) + 0x5FF), - max_imm: ($operator + " R3 #255").as_str() => (($opcode << 12) + 0x6FF), - min_imm: ($operator + " R4 #-256").as_str() => (($opcode << 12) + 0x900), - // hex_imm: ($operator + " R5 xA").as_str() => (($opcode << 12) + 0xA0A), TODO: We currently assume an argument not starting in # is a label. Allow hex literals? - r5: ($operator + " R5 #0").as_str() => (($opcode << 12) + 0xA00), - r6: ($operator + " R6 #0").as_str() => (($opcode << 12) + 0xC00), - r7: ($operator + " R7 #0").as_str() => (($opcode << 12) + 0xE00), - } + ( + $( + $name:ident: $operator:expr => $opcode:expr + ),+ + $(,)* + ) => { + $( + single_instruction_tests! { $name + // OPERANDS RESULT + // -------- ----- + minimal: ($operator + " R0 #0").as_str() => (($opcode << 12) + 0x000), + pos_imm: ($operator + " R1 #1").as_str() => (($opcode << 12) + 0x201), + neg_imm: ($operator + " R2 #-1").as_str() => (($opcode << 12) + 0x5FF), + max_imm: ($operator + " R3 #255").as_str() => (($opcode << 12) + 0x6FF), + min_imm: ($operator + " R4 #-256").as_str() => (($opcode << 12) + 0x900), + // hex_imm: ($operator + " R5 xA").as_str() => (($opcode << 12) + 0xA0A), TODO: We currently assume an argument not starting in # is a label. Allow hex literals? + r5: ($operator + " R5 #0").as_str() => (($opcode << 12) + 0xA00), + r6: ($operator + " R6 #0").as_str() => (($opcode << 12) + 0xC00), + r7: ($operator + " R7 #0").as_str() => (($opcode << 12) + 0xE00), + } + )+ }; } - reg_and_pcoffset9_instruction_tests!(ld, "LD".to_string(), 0x2); - reg_and_pcoffset9_instruction_tests!(ldi, "LDI".to_string(), 0xA); - reg_and_pcoffset9_instruction_tests!(st, "ST".to_string(), 0x3); - reg_and_pcoffset9_instruction_tests!(sti, "STI".to_string(), 0xB); + reg_and_pcoffset9_instruction_tests! { + ld: "LD".to_string() => 0x2, + ldi: "LDI".to_string() => 0xA, + lea: "LEA".to_string() => 0xE, + st: "ST".to_string() => 0x3, + sti: "STI".to_string() => 0xB, + } + + single_instruction_tests! { jsr + minimal: "JSR #0" => 0x4800, + pos_imm: "JSR #1" => 0x4801, + neg_imm: "JSR #-1" => 0x4FFF, + max_imm: "JSR #1023" => 0x4BFF, + min_imm: "JSR #-1024" => 0x4C00, + // hex_imm: "JSR xA" => 0x480A, // TODO: We currently assume an argument not starting in # is a label. Allow hex literals? + } -// TODO: JSR -// TODO: LEA // TODO: Pseudo-ops } From 928bd71284ff3aa1c4abfdf73207cb5905595cfd Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 3 Jul 2020 15:18:22 -0500 Subject: [PATCH 19/82] assembler: tweak reg/pcoffset9 macro to remove boilerplate from uses --- assembler/tests/integ.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 456e7d1..61e939e 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -210,28 +210,28 @@ mod single_instruction { ) => { $( single_instruction_tests! { $name - // OPERANDS RESULT - // -------- ----- - minimal: ($operator + " R0 #0").as_str() => (($opcode << 12) + 0x000), - pos_imm: ($operator + " R1 #1").as_str() => (($opcode << 12) + 0x201), - neg_imm: ($operator + " R2 #-1").as_str() => (($opcode << 12) + 0x5FF), - max_imm: ($operator + " R3 #255").as_str() => (($opcode << 12) + 0x6FF), - min_imm: ($operator + " R4 #-256").as_str() => (($opcode << 12) + 0x900), - // hex_imm: ($operator + " R5 xA").as_str() => (($opcode << 12) + 0xA0A), TODO: We currently assume an argument not starting in # is a label. Allow hex literals? - r5: ($operator + " R5 #0").as_str() => (($opcode << 12) + 0xA00), - r6: ($operator + " R6 #0").as_str() => (($opcode << 12) + 0xC00), - r7: ($operator + " R7 #0").as_str() => (($opcode << 12) + 0xE00), + // OPERANDS RESULT + // -------- ----- + minimal: ($operator.to_string() + " R0 #0").as_str() => (($opcode << 12) + 0x000), + pos_imm: ($operator.to_string() + " R1 #1").as_str() => (($opcode << 12) + 0x201), + neg_imm: ($operator.to_string() + " R2 #-1").as_str() => (($opcode << 12) + 0x5FF), + max_imm: ($operator.to_string() + " R3 #255").as_str() => (($opcode << 12) + 0x6FF), + min_imm: ($operator.to_string() + " R4 #-256").as_str() => (($opcode << 12) + 0x900), + // hex_imm: ($operator.to_string() + " R5 xA").as_str() => (($opcode << 12) + 0xA0A), TODO: We currently assume an argument not starting in # is a label. Allow hex literals? + r5: ($operator.to_string() + " R5 #0").as_str() => (($opcode << 12) + 0xA00), + r6: ($operator.to_string() + " R6 #0").as_str() => (($opcode << 12) + 0xC00), + r7: ($operator.to_string() + " R7 #0").as_str() => (($opcode << 12) + 0xE00), } )+ }; } reg_and_pcoffset9_instruction_tests! { - ld: "LD".to_string() => 0x2, - ldi: "LDI".to_string() => 0xA, - lea: "LEA".to_string() => 0xE, - st: "ST".to_string() => 0x3, - sti: "STI".to_string() => 0xB, + ld: "LD" => 0x2, + ldi: "LDI" => 0xA, + lea: "LEA" => 0xE, + st: "ST" => 0x3, + sti: "STI" => 0xB, } single_instruction_tests! { jsr From 6c18893971c4fd36f5c170e237f92a645dadb22d Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 6 Jul 2020 01:46:39 -0500 Subject: [PATCH 20/82] assembler: finish remaining single instruction tests (.FILL, .BLKW, .STRINGZ) --- assembler/tests/integ.rs | 53 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 61e939e..2e64b47 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -66,9 +66,13 @@ fn pseudo_ops() { mod single_instruction { use super::*; - fn single_instruction_test(input: &str, expected: Word) { + fn single_instruction_multiple_output_test(input: &str, expected: &[Word]) { let input = format!(".ORIG x3000\n{}\n.END", input); - test(input.as_str(), 0x3000, &[expected]); + test(input.as_str(), 0x3000, expected); + } + + fn single_instruction_test(input: &str, expected: Word) { + single_instruction_multiple_output_test(input, &[expected]); } macro_rules! single_instruction_tests { @@ -243,8 +247,51 @@ mod single_instruction { // hex_imm: "JSR xA" => 0x480A, // TODO: We currently assume an argument not starting in # is a label. Allow hex literals? } -// TODO: Pseudo-ops + mod pseudo_ops { + use super::*; + + single_instruction_tests! { fill + minimal: ".FILL #0" => 0x0000, + pos_imm: ".FILL #1" => 0x0001, + max_imm: ".FILL #65535" => 0xFFFF, + hex_imm: ".FILL xA" => 0x000A, + hex_imm2: ".FILL xBEEF" => 0xBEEF, + max_hex_imm: ".FILL xFFFF" => 0xFFFF, + } + macro_rules! single_instruction_multiple_output_tests { + ($tests_name:ident + $( + $test_name:ident: $instruction:expr => $expected:expr + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + single_instruction_multiple_output_test($instruction, $expected); + } + )+ + } + }; + } + + single_instruction_multiple_output_tests! { blkw + one: ".BLKW 1" => &[0,], + two: ".BLKW 2" => &[0, 0,], + ten: ".BLKW 10" => &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0,], + } + + single_instruction_multiple_output_tests! { stringz + a: ".STRINGZ \"a\"" => &[0x61, 0x00], + double_quote: ".STRINGZ \"\\\"\"" => &[0x22, 0x00], + backslash: ".STRINGZ \"\\\\\"" => &[0x5C, 0x00], + hello_world: ".STRINGZ \"Hello, World!\"" => &[0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x2C, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00], + } + } } fn test(input: &str, orig: usize, expected_mem: &[Word]) { From ff3833dc6b30fff295dc93c15e61b1ecd4948fcb Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 8 Jul 2020 00:32:07 -0500 Subject: [PATCH 21/82] assembler: remove stale integ tests, add alternative style tests --- assembler/tests/inputs/arithmetic_small.asm | 6 --- assembler/tests/inputs/pseudo_ops.asm | 4 -- assembler/tests/integ.rs | 47 ++++----------------- 3 files changed, 9 insertions(+), 48 deletions(-) delete mode 100644 assembler/tests/inputs/arithmetic_small.asm delete mode 100644 assembler/tests/inputs/pseudo_ops.asm diff --git a/assembler/tests/inputs/arithmetic_small.asm b/assembler/tests/inputs/arithmetic_small.asm deleted file mode 100644 index 3ad2007..0000000 --- a/assembler/tests/inputs/arithmetic_small.asm +++ /dev/null @@ -1,6 +0,0 @@ -.ORIG x3000 -ADD R0, R1, R2 -AND R3, R4, R5 -NOT R6, R7 -HALT -.END \ No newline at end of file diff --git a/assembler/tests/inputs/pseudo_ops.asm b/assembler/tests/inputs/pseudo_ops.asm deleted file mode 100644 index 54129e3..0000000 --- a/assembler/tests/inputs/pseudo_ops.asm +++ /dev/null @@ -1,4 +0,0 @@ -.orig x4000 -.stringz "\"this\\that\"" -.fill xBEEF -.end \ No newline at end of file diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 2e64b47..d19a0f6 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -7,21 +7,6 @@ use lc3_assembler::parser::LeniencyLevel::Lenient; use std::ops::Index; use lc3_isa::util::MemoryDump; - -#[test] -fn arithmetic_small() { - test( - include_str!("inputs/arithmetic_small.asm"), - 0x3000, - &[ - 0x1042, - 0x5705, - 0x9DFF, - 0xF025, - ] - ); -} - #[test] fn load_store_medium() { test( @@ -40,29 +25,6 @@ fn load_store_medium() { ); } -#[test] -fn pseudo_ops() { - test( - include_str!("inputs/pseudo_ops.asm"), - 0x4000, - &[ - 0x0022, - 0x0074, - 0x0068, - 0x0069, - 0x0073, - 0x005C, - 0x0074, - 0x0068, - 0x0061, - 0x0074, - 0x0022, - 0x0000, - 0xBEEF, - ] - ); -} - mod single_instruction { use super::*; @@ -95,6 +57,14 @@ mod single_instruction { }; } + single_instruction_tests! { alternative_styles + lowercase: "add r0 r0 r0" => 0x1000, + comma_separated: "add r0, r0, r0" => 0x1000, + with_semicolon: "ADD R0 R0 R0;" => 0x1000, + nonpatt_hex_imm: "ADD R7 R7 0xA" => 0x1FEA, + commented: "ADD R0 R0 R0 ; comment" => 0x1000, + } + single_instruction_tests! { add minimal: "ADD R0 R0 R0" => 0x1000, r1_2_3: "ADD R1 R2 R3" => 0x1283, @@ -103,6 +73,7 @@ mod single_instruction { nonzero_imm: "ADD R7 R7 #1" => 0x1FE1, max_imm: "ADD R7 R7 #15" => 0x1FEF, neg_imm: "ADD R7 R7 #-1" => 0x1FFF, + hex_imm: "ADD R7 R7 xA" => 0x1FEA, } single_instruction_tests! { and From c9d2925d368d0490fe26ff77d16338cd8efcaaf5 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 3 May 2022 22:52:32 -0500 Subject: [PATCH 22/82] misc: update UTP dependencies --- Cargo.lock | 14 ++++++++------ assembler/Cargo.toml | 6 +++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7bfc2a2..d4f2caf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "aho-corasick" version = "0.6.10" @@ -152,7 +154,7 @@ dependencies = [ [[package]] name = "lc3-baseline-sim" version = "0.1.0" -source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e87a9a9ce10f890a1dd983223e" +source = "git+https://github.com/ut-utp/core?branch=master#1e9a83d39fdaa630b988108749f55d26e8134ac3" dependencies = [ "lc3-isa", "lc3-macros", @@ -163,7 +165,7 @@ dependencies = [ [[package]] name = "lc3-isa" version = "0.1.0" -source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e87a9a9ce10f890a1dd983223e" +source = "git+https://github.com/ut-utp/core?branch=master#1e9a83d39fdaa630b988108749f55d26e8134ac3" dependencies = [ "lc3-macros", "serde", @@ -173,7 +175,7 @@ dependencies = [ [[package]] name = "lc3-macros" version = "0.1.0" -source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e87a9a9ce10f890a1dd983223e" +source = "git+https://github.com/ut-utp/core?branch=master#1e9a83d39fdaa630b988108749f55d26e8134ac3" dependencies = [ "proc-macro2", "quote", @@ -183,7 +185,7 @@ dependencies = [ [[package]] name = "lc3-os" version = "0.1.0" -source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e87a9a9ce10f890a1dd983223e" +source = "git+https://github.com/ut-utp/core?branch=master#1e9a83d39fdaa630b988108749f55d26e8134ac3" dependencies = [ "lazy_static", "lc3-baseline-sim", @@ -197,7 +199,7 @@ dependencies = [ [[package]] name = "lc3-shims" version = "0.1.0" -source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e87a9a9ce10f890a1dd983223e" +source = "git+https://github.com/ut-utp/core?branch=master#1e9a83d39fdaa630b988108749f55d26e8134ac3" dependencies = [ "byteorder", "chrono", @@ -212,7 +214,7 @@ dependencies = [ [[package]] name = "lc3-traits" version = "0.1.0" -source = "git+https://github.com/ut-utp/prototype?branch=staging#e58a261e7a0314e87a9a9ce10f890a1dd983223e" +source = "git+https://github.com/ut-utp/core?branch=master#1e9a83d39fdaa630b988108749f55d26e8134ac3" dependencies = [ "lc3-isa", "lc3-macros", diff --git a/assembler/Cargo.toml b/assembler/Cargo.toml index 6cdadbf..478b17d 100644 --- a/assembler/Cargo.toml +++ b/assembler/Cargo.toml @@ -39,9 +39,9 @@ num-traits = "0.2.11" annotate-snippets = { version = "0.8.0", features = ["color"] } clap = "2.33.0" -lc3-isa = { git = "https://github.com/ut-utp/prototype", branch = "staging", version = "0.1.0", default-features = false } -lc3-shims = { git = "https://github.com/ut-utp/prototype", branch = "staging", version = "0.1.0", default-features = false } -lc3-os = { git = "https://github.com/ut-utp/prototype", branch = "staging", version = "0.1.0", default-features = false } +lc3-isa = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } +lc3-shims = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } +lc3-os = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } # TODO: ^ eventually don't pull these from git [dev-dependencies] From 02d283cb724763606bec79270e79a51cced7f404 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Thu, 5 May 2022 18:47:55 -0500 Subject: [PATCH 23/82] assembler: try chumsky lexer (untested) --- Cargo.lock | 91 +++++++++++++++- assembler/Cargo.toml | 1 + assembler/src/lib.rs | 2 + assembler/src/new.rs | 250 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 341 insertions(+), 3 deletions(-) create mode 100644 assembler/src/new.rs diff --git a/Cargo.lock b/Cargo.lock index d4f2caf..e39408c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" +dependencies = [ + "const-random", +] + [[package]] name = "aho-corasick" version = "0.6.10" @@ -64,6 +73,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "chrono" version = "0.4.11" @@ -75,6 +90,15 @@ dependencies = [ "time", ] +[[package]] +name = "chumsky" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4" +dependencies = [ + "ahash", +] + [[package]] name = "clap" version = "2.33.0" @@ -90,6 +114,34 @@ dependencies = [ "vec_map", ] +[[package]] +name = "const-random" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4" +dependencies = [ + "const-random-macro", + "proc-macro-hack", +] + +[[package]] +name = "const-random-macro" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" +dependencies = [ + "getrandom", + "lazy_static", + "proc-macro-hack", + "tiny-keccak", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "ctor" version = "0.1.13" @@ -112,6 +164,17 @@ version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" +[[package]] +name = "getrandom" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi", +] + [[package]] name = "hermit-abi" version = "0.1.11" @@ -141,6 +204,7 @@ name = "lc3-assembler" version = "0.1.0" dependencies = [ "annotate-snippets", + "chumsky", "clap", "itertools", "lc3-isa", @@ -225,9 +289,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.69" +version = "0.2.125" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99e85c08494b21a9054e7fe1374a732aeadaff3980b6990b94bfd3a70f690005" +checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b" [[package]] name = "log" @@ -235,7 +299,7 @@ version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", ] [[package]] @@ -284,6 +348,12 @@ dependencies = [ "output_vt100", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + [[package]] name = "proc-macro2" version = "1.0.10" @@ -411,6 +481,15 @@ dependencies = [ "chrono", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "ucd-util" version = "0.1.8" @@ -441,6 +520,12 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "winapi" version = "0.3.8" diff --git a/assembler/Cargo.toml b/assembler/Cargo.toml index 478b17d..15b3bba 100644 --- a/assembler/Cargo.toml +++ b/assembler/Cargo.toml @@ -38,6 +38,7 @@ itertools = "0.8.2" num-traits = "0.2.11" annotate-snippets = { version = "0.8.0", features = ["color"] } clap = "2.33.0" +chumsky = "0.8.0" lc3-isa = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } lc3-shims = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 1c5cb3c..03e779e 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -11,6 +11,8 @@ pub mod util; pub mod complete; pub mod analysis; +pub mod new; + #[cfg(test)] mod tests { diff --git a/assembler/src/new.rs b/assembler/src/new.rs new file mode 100644 index 0000000..6d75d59 --- /dev/null +++ b/assembler/src/new.rs @@ -0,0 +1,250 @@ +use chumsky::prelude::*; +use lc3_isa::{Reg, SignedWord, Word}; + +pub type Span = std::ops::Range; + +#[derive(Clone, Debug)] +enum Token { + Opcode(Opcode), + Register(Reg), + NumberLiteral(LiteralValue), + StringLiteral(String), + Label(String), + + Newline, + Comma, + + Comment, +} + +#[derive(Clone, Debug)] +enum LiteralValue { + Word(Word), + SignedWord(SignedWord), +} + +#[derive(Clone, Debug)] +pub struct ConditionCodes { + n: bool, + z: bool, + p: bool, +} + +#[derive(Clone, Debug)] +pub enum Opcode { + Add, + And, + Br(ConditionCodes), + Jmp, + Jsr, + Jsrr, + Ld, + Ldi, + Ldr, + Lea, + Not, + Ret, + Rti, + St, + Sti, + Str, + Trap, + + // Pseudo-ops + Orig, + Fill, + Blkw, + Stringz, + End, + + // Named TRAP routines + Getc, + Out, + Puts, + In, + Putsp, + Halt, +} + +fn number_literal_with_base(base: u32, prefix: char) -> impl Parser> { + just(prefix) + .ignore_then(just('-').ignored().or_not()) + .then(text::digits(base)) + .try_map(move |(maybe_sign, digits): (Option<()>, String), span| { + let parse_result = if maybe_sign.is_some() { + SignedWord::from_str_radix(&format!("-{}", digits), base) + .map(LiteralValue::SignedWord) + } else { + Word::from_str_radix(&digits, base) + .map(LiteralValue::Word) + }; + parse_result.map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow + }) +} + +fn one_opcode(pattern: &'static str, output_opcode: Opcode) -> impl Parser> { + just_to(pattern, output_opcode) +} + +fn one_register(pattern: &'static str, output_reg: Reg) -> impl Parser> { + just_to(pattern, output_reg) +} + +fn just_to(pattern: &'static str, output: O) -> impl Parser> { + just(pattern).to(output) +} + +fn lexer() -> impl Parser, Error=Simple> { + let newline = text::newline() + .to(Token::Newline); + + use Opcode::*; + let branch_opcode = + just("BR") + .ignore_then(one_of("NZP").repeated().at_most(3)) + .map::(|cond_code_chars| { + let cond_codes = + if cond_code_chars.is_empty() { + ConditionCodes { n: true, z: true, p: true } + } else { + let n = cond_code_chars.contains(&'N'); + let z = cond_code_chars.contains(&'Z'); + let p = cond_code_chars.contains(&'P'); + ConditionCodes { n, z, p } + }; + Br(cond_codes) + }); + + // These options are separated by `or` instead of all belonging + // to one tuple passed to `choice` because `choice` only supports + // tuples with up to 26 elements. + // The grouping by 'opcode type' was chosen arbitrarily. + let opcode = choice(( + one_opcode("ADD", Add), + one_opcode("AND", And), + branch_opcode, + one_opcode("JMP", Jmp), + one_opcode("JSRR", Jsrr), + one_opcode("JSR", Jsr), + one_opcode("LDI", Ldi), + one_opcode("LDR", Ldr), + one_opcode("LD", Ld), + one_opcode("LEA", Lea), + one_opcode("NOT", Not), + one_opcode("RET", Ret), + one_opcode("RTI", Rti), + one_opcode("STI", Sti), + one_opcode("STR", Str), + one_opcode("ST", St), + one_opcode("TRAP", Trap), + )) + .or(choice(( + one_opcode("GETC", Getc), + one_opcode("OUT", Out), + one_opcode("PUTSP", Putsp), + one_opcode("PUTS", Puts), + one_opcode("IN", In), + one_opcode("HALT", Halt), + ))) + .or(choice(( + one_opcode(".ORIG", Orig), + one_opcode(".FILL", Fill), + one_opcode(".BLKW", Blkw), + one_opcode(".STRINGZ", Stringz), + one_opcode(".END", End), + ))) + .map(Token::Opcode); + + use Reg::*; + let register = choice(( + one_register("R0", R0), + one_register("R1", R1), + one_register("R2", R2), + one_register("R3", R3), + one_register("R4", R4), + one_register("R5", R5), + one_register("R6", R6), + one_register("R7", R7), + )) + .map(Token::Register); + + // `escape` and `string_literal` are based on JSON parser example + // https://github.com/zesterer/chumsky/blob/d4102128315d9dbbea901a91dc5eaa0fc9a790f7/examples/json.rs#L39 + let escape = just::<_, _, Simple>('\\').ignore_then( + just('\\') + .or(just('"')) + .or(just('b').to('\x08')) + .or(just('f').to('\x0C')) + .or(just('n').to('\n')) + .or(just('r').to('\r')) + .or(just('t').to('\t')) + ); + + let string_literal = just('"') + .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated()) + .then_ignore(just('"')) + .collect::() + .map(Token::StringLiteral); + + let number_literal = choice(( + number_literal_with_base(2, 'B'), + number_literal_with_base(10, '#'), + number_literal_with_base(16, 'X'), + )) + .map(Token::NumberLiteral); + + let label = text::ident() // C-style identifier. Follows all LC-3 label rules but allows arbitrary length and underscores. + .map(Token::Label); // TODO: validate length, underscores in strict mode + + let comment = just(';') + .then(filter(|c| !is_newline(c)).repeated()) + .to(Token::Comment); + + let comma = just(',') + .to(Token::Comma); + + let token = choice(( + opcode, + register, + number_literal, + string_literal, + label, + newline, + comma, + comment, + )) + .recover_with(skip_then_retry_until([])); // TODO: improve? + + let non_newline_whitespace = + filter(|c: &char| c.is_whitespace() && !is_newline(c)).repeated(); + + token + .map_with_span(|token, span| (token, span)) + .padded_by(non_newline_whitespace) + .repeated() + .then_ignore(end()) +} + +fn is_newline(c: &char) -> bool { + // All line breaks matched by chumsky::text::newline + ['\n', + '\r', + '\x0B', // Vertical tab + '\x0C', // Form feed + '\u{0085}', // Next line + '\u{2028}', // Line separator + '\u{2029}', ].contains(c) // Paragraph separator +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple() { + let src = ".ORIG x3000;\nLABEL ADD R0, R0, #70000\n.end"; + let (tokens, errs) = lexer().parse_recovery(src.to_uppercase()); + println!("{:?}", tokens); + println!("{:?}", errs); + } +} \ No newline at end of file From 84fd53eee22aa7eefe01c550d01b9cda5937437e Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 6 May 2022 00:54:57 -0500 Subject: [PATCH 24/82] assembler: add chumsky instruction parser (untested) --- assembler/src/new.rs | 117 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 108 insertions(+), 9 deletions(-) diff --git a/assembler/src/new.rs b/assembler/src/new.rs index 6d75d59..06b33c8 100644 --- a/assembler/src/new.rs +++ b/assembler/src/new.rs @@ -1,9 +1,12 @@ use chumsky::prelude::*; +use chumsky::Stream; +use itertools::Itertools; use lc3_isa::{Reg, SignedWord, Word}; -pub type Span = std::ops::Range; +type Span = std::ops::Range; +type Spanned = (T, Span); -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] enum Token { Opcode(Opcode), Register(Reg), @@ -17,20 +20,20 @@ enum Token { Comment, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] enum LiteralValue { Word(Word), SignedWord(SignedWord), } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct ConditionCodes { n: bool, z: bool, p: bool, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum Opcode { Add, And, @@ -66,6 +69,8 @@ pub enum Opcode { Halt, } +// Lexer + fn number_literal_with_base(base: u32, prefix: char) -> impl Parser> { just(prefix) .ignore_then(just('-').ignored().or_not()) @@ -94,7 +99,7 @@ fn just_to(pattern: &'static str, output: O) -> impl Parser impl Parser, Error=Simple> { +fn lexer() -> impl Parser>, Error=Simple> { let newline = text::newline() .to(Token::Newline); @@ -236,15 +241,109 @@ fn is_newline(c: &char) -> bool { '\u{2029}', ].contains(c) // Paragraph separator } +fn lex(source: &str) -> (Option>>, Vec>) { + lexer().parse_recovery(source.to_uppercase()) +} + + +type WithErrData = Spanned>>; + +// Parser +#[derive(Debug)] +struct Program { + instructions: Vec>, +} + +#[derive(Debug)] +struct Instruction { + label: Option>, + opcode: WithErrData, + operands: Vec>, +} + +#[derive(Debug)] +enum Operand { + Register(Reg), + NumberLiteral(LiteralValue), + StringLiteral(String), + Label(String), +} + +fn operand() -> impl Parser, Error = Simple> { + let operand = select! { + Token::Register(reg) => Operand::Register(reg), + Token::NumberLiteral(val) => Operand::NumberLiteral(val), + Token::StringLiteral(s) => Operand::StringLiteral(s), + Token::Label(s) => Operand::Label(s), + }; + operand.map_with_span(|o, span| (o, span)) +} + +fn instruction() -> impl Parser, Error = Simple> { + let label = + select! { Token::Label(s) => s } + .map_with_span(|s, span| (Ok(s), span)) + .or_not(); + + let opcode = + select! { Token::Opcode(o) => o } + .map_with_span(|o, span| (Ok(o), span)); + + let operands = + operand() + .map(|(o, span)| (Ok(o), span)) + .separated_by::(just(Token::Comma)); + + + label + .then_ignore(just(Token::Newline).repeated()) + .then(opcode) + .then(operands) + .map_with_span(|((l, o), os), span| { + let instruction = Instruction { + label: l, + opcode: o, + operands: os, + }; + (instruction, span) + }) +} + +fn program() -> impl Parser, Error = Simple> { + instruction() + .map(|(i, span)| (Ok(i), span)) + .separated_by( + just(Token::Comment).or_not() + .then(just(Token::Newline).repeated().at_least(1)) + .repeated() ) + .allow_leading() + .allow_trailing() + .map_with_span(|instructions, span| { + (Program { instructions }, span) + }) +} + +fn parse(src: &str, tokens: Vec>) -> (Option>, Vec>) { + let len = src.chars().count(); + program().parse_recovery(Stream::from_iter(len..len + 1, tokens.into_iter())) +} + + #[cfg(test)] mod tests { use super::*; #[test] fn simple() { - let src = ".ORIG x3000;\nLABEL ADD R0, R0, #70000\n.end"; - let (tokens, errs) = lexer().parse_recovery(src.to_uppercase()); + let src = ".ORIG x3000;\nLABEL ADD R0, R0, #5000\n.end"; + let (tokens, lex_errs) = lex(src); println!("{:?}", tokens); - println!("{:?}", errs); + println!("{:?}", lex_errs); + + let parse_results = tokens.map(|ts| parse(src, ts)); + if let Some((program, parse_errs)) = parse_results { + println!("{:?}", program); + println!("{:?}", parse_errs); + } } } \ No newline at end of file From 0ffcb8fb7a205c176d5d951f76f2e701eee5e968 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 7 May 2022 22:49:45 -0500 Subject: [PATCH 25/82] assembler: add full file chumsky parser (untested) --- assembler/src/new.rs | 99 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 79 insertions(+), 20 deletions(-) diff --git a/assembler/src/new.rs b/assembler/src/new.rs index 06b33c8..8b7bc2d 100644 --- a/assembler/src/new.rs +++ b/assembler/src/new.rs @@ -246,9 +246,10 @@ fn lex(source: &str) -> (Option>>, Vec>) { } +// Parser + type WithErrData = Spanned>>; -// Parser #[derive(Debug)] struct Program { instructions: Vec>, @@ -258,7 +259,7 @@ struct Program { struct Instruction { label: Option>, opcode: WithErrData, - operands: Vec>, + operands: WithErrData>>, } #[derive(Debug)] @@ -279,25 +280,63 @@ fn operand() -> impl Parser, Error = Simple> { operand.map_with_span(|o, span| (o, span)) } -fn instruction() -> impl Parser, Error = Simple> { +fn any_opcode_but(denied: Opcode) -> impl Parser, Error = Simple> { + filter_map(move |span, t: Token| + if let Token::Opcode(o) = t.clone() { + if o == denied { + Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected + } else { + Ok(o) + } + } else { + Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected + }) + .map_with_span(|o, span| (o, span)) +} + +fn opcode(expected: Opcode) -> impl Parser, Error = Simple> { + let expected_token = Token::Opcode(expected); + filter_map(move |span, t| + if t == expected_token { + if let Token::Opcode(o) = t { + Ok(o) + } else { unreachable!() } + } else { + Err(Simple::expected_input_found(span, [Some(expected_token.clone())], Some(t))) + }) + .map_with_span(|o, span| (o, span)) +} + +enum OpcodeFilter { + OnlyOrig, + AnyButEnd, + OnlyEnd, +} + +fn instruction(oc_filter: OpcodeFilter) -> impl Parser, Error = Simple> { let label = select! { Token::Label(s) => s } .map_with_span(|s, span| (Ok(s), span)) .or_not(); - let opcode = - select! { Token::Opcode(o) => o } - .map_with_span(|o, span| (Ok(o), span)); + use OpcodeFilter::*; + let oc: Box, Error = Simple>> = + match oc_filter { + OnlyOrig => Box::new(opcode(Opcode::Orig)), + AnyButEnd => Box::new(any_opcode_but(Opcode::End)), + OnlyEnd => Box::new(opcode(Opcode::End)), + }; + let oc_with_err_data = oc.map(|(oc, span)| (Ok(oc), span)); let operands = operand() .map(|(o, span)| (Ok(o), span)) - .separated_by::(just(Token::Comma)); - + .separated_by::(just(Token::Comma)) + .map_with_span(|os, span| (Ok(os), span)); label .then_ignore(just(Token::Newline).repeated()) - .then(opcode) + .then(oc_with_err_data) .then(operands) .map_with_span(|((l, o), os), span| { let instruction = Instruction { @@ -309,23 +348,43 @@ fn instruction() -> impl Parser, Error = Simple impl Parser> { + just(Token::Comment).or_not() + .then(just(Token::Newline).repeated().at_least(1)) + .repeated() + .ignored() +} + fn program() -> impl Parser, Error = Simple> { - instruction() - .map(|(i, span)| (Ok(i), span)) - .separated_by( - just(Token::Comment).or_not() - .then(just(Token::Newline).repeated().at_least(1)) - .repeated() ) - .allow_leading() - .allow_trailing() - .map_with_span(|instructions, span| { + comments_and_newlines() + .ignore_then(instruction(OpcodeFilter::OnlyOrig)) + .then( + instruction(OpcodeFilter::AnyButEnd) + .map(|(i, span)| (Ok(i), span)) + .separated_by(comments_and_newlines()) + .allow_leading() + .allow_trailing() + ) + .then(instruction(OpcodeFilter::OnlyEnd)) + .then_ignore(comments_and_newlines()) + .then_ignore(end()) + .map_with_span(|((orig, instructions), end), span| { (Program { instructions }, span) }) } +fn file() -> impl Parser>>, Error = Simple> { + program() + .map(|(p, span)| (Ok(p), span)) + .separated_by(comments_and_newlines()) + .allow_leading() + .allow_trailing() + .map_with_span(|programs, span| (programs, span)) +} + fn parse(src: &str, tokens: Vec>) -> (Option>, Vec>) { let len = src.chars().count(); - program().parse_recovery(Stream::from_iter(len..len + 1, tokens.into_iter())) + program().parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) } @@ -335,7 +394,7 @@ mod tests { #[test] fn simple() { - let src = ".ORIG x3000;\nLABEL ADD R0, R0, #5000\n.end"; + let src = ".ORIG x3000;\nLABEL ADD R0, R0, #7000\n.end"; let (tokens, lex_errs) = lex(src); println!("{:?}", tokens); println!("{:?}", lex_errs); From 35e92736a24f238e928579a136cdb9f910fd9fa5 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 7 May 2022 23:17:29 -0500 Subject: [PATCH 26/82] assembler: move new lexer, parser into separate files --- assembler/src/new.rs | 408 ------------------------------------ assembler/src/new/lexer.rs | 178 ++++++++++++++++ assembler/src/new/mod.rs | 93 ++++++++ assembler/src/new/parser.rs | 150 +++++++++++++ 4 files changed, 421 insertions(+), 408 deletions(-) delete mode 100644 assembler/src/new.rs create mode 100644 assembler/src/new/lexer.rs create mode 100644 assembler/src/new/mod.rs create mode 100644 assembler/src/new/parser.rs diff --git a/assembler/src/new.rs b/assembler/src/new.rs deleted file mode 100644 index 8b7bc2d..0000000 --- a/assembler/src/new.rs +++ /dev/null @@ -1,408 +0,0 @@ -use chumsky::prelude::*; -use chumsky::Stream; -use itertools::Itertools; -use lc3_isa::{Reg, SignedWord, Word}; - -type Span = std::ops::Range; -type Spanned = (T, Span); - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -enum Token { - Opcode(Opcode), - Register(Reg), - NumberLiteral(LiteralValue), - StringLiteral(String), - Label(String), - - Newline, - Comma, - - Comment, -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -enum LiteralValue { - Word(Word), - SignedWord(SignedWord), -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct ConditionCodes { - n: bool, - z: bool, - p: bool, -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub enum Opcode { - Add, - And, - Br(ConditionCodes), - Jmp, - Jsr, - Jsrr, - Ld, - Ldi, - Ldr, - Lea, - Not, - Ret, - Rti, - St, - Sti, - Str, - Trap, - - // Pseudo-ops - Orig, - Fill, - Blkw, - Stringz, - End, - - // Named TRAP routines - Getc, - Out, - Puts, - In, - Putsp, - Halt, -} - -// Lexer - -fn number_literal_with_base(base: u32, prefix: char) -> impl Parser> { - just(prefix) - .ignore_then(just('-').ignored().or_not()) - .then(text::digits(base)) - .try_map(move |(maybe_sign, digits): (Option<()>, String), span| { - let parse_result = if maybe_sign.is_some() { - SignedWord::from_str_radix(&format!("-{}", digits), base) - .map(LiteralValue::SignedWord) - } else { - Word::from_str_radix(&digits, base) - .map(LiteralValue::Word) - }; - parse_result.map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow - }) -} - -fn one_opcode(pattern: &'static str, output_opcode: Opcode) -> impl Parser> { - just_to(pattern, output_opcode) -} - -fn one_register(pattern: &'static str, output_reg: Reg) -> impl Parser> { - just_to(pattern, output_reg) -} - -fn just_to(pattern: &'static str, output: O) -> impl Parser> { - just(pattern).to(output) -} - -fn lexer() -> impl Parser>, Error=Simple> { - let newline = text::newline() - .to(Token::Newline); - - use Opcode::*; - let branch_opcode = - just("BR") - .ignore_then(one_of("NZP").repeated().at_most(3)) - .map::(|cond_code_chars| { - let cond_codes = - if cond_code_chars.is_empty() { - ConditionCodes { n: true, z: true, p: true } - } else { - let n = cond_code_chars.contains(&'N'); - let z = cond_code_chars.contains(&'Z'); - let p = cond_code_chars.contains(&'P'); - ConditionCodes { n, z, p } - }; - Br(cond_codes) - }); - - // These options are separated by `or` instead of all belonging - // to one tuple passed to `choice` because `choice` only supports - // tuples with up to 26 elements. - // The grouping by 'opcode type' was chosen arbitrarily. - let opcode = choice(( - one_opcode("ADD", Add), - one_opcode("AND", And), - branch_opcode, - one_opcode("JMP", Jmp), - one_opcode("JSRR", Jsrr), - one_opcode("JSR", Jsr), - one_opcode("LDI", Ldi), - one_opcode("LDR", Ldr), - one_opcode("LD", Ld), - one_opcode("LEA", Lea), - one_opcode("NOT", Not), - one_opcode("RET", Ret), - one_opcode("RTI", Rti), - one_opcode("STI", Sti), - one_opcode("STR", Str), - one_opcode("ST", St), - one_opcode("TRAP", Trap), - )) - .or(choice(( - one_opcode("GETC", Getc), - one_opcode("OUT", Out), - one_opcode("PUTSP", Putsp), - one_opcode("PUTS", Puts), - one_opcode("IN", In), - one_opcode("HALT", Halt), - ))) - .or(choice(( - one_opcode(".ORIG", Orig), - one_opcode(".FILL", Fill), - one_opcode(".BLKW", Blkw), - one_opcode(".STRINGZ", Stringz), - one_opcode(".END", End), - ))) - .map(Token::Opcode); - - use Reg::*; - let register = choice(( - one_register("R0", R0), - one_register("R1", R1), - one_register("R2", R2), - one_register("R3", R3), - one_register("R4", R4), - one_register("R5", R5), - one_register("R6", R6), - one_register("R7", R7), - )) - .map(Token::Register); - - // `escape` and `string_literal` are based on JSON parser example - // https://github.com/zesterer/chumsky/blob/d4102128315d9dbbea901a91dc5eaa0fc9a790f7/examples/json.rs#L39 - let escape = just::<_, _, Simple>('\\').ignore_then( - just('\\') - .or(just('"')) - .or(just('b').to('\x08')) - .or(just('f').to('\x0C')) - .or(just('n').to('\n')) - .or(just('r').to('\r')) - .or(just('t').to('\t')) - ); - - let string_literal = just('"') - .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated()) - .then_ignore(just('"')) - .collect::() - .map(Token::StringLiteral); - - let number_literal = choice(( - number_literal_with_base(2, 'B'), - number_literal_with_base(10, '#'), - number_literal_with_base(16, 'X'), - )) - .map(Token::NumberLiteral); - - let label = text::ident() // C-style identifier. Follows all LC-3 label rules but allows arbitrary length and underscores. - .map(Token::Label); // TODO: validate length, underscores in strict mode - - let comment = just(';') - .then(filter(|c| !is_newline(c)).repeated()) - .to(Token::Comment); - - let comma = just(',') - .to(Token::Comma); - - let token = choice(( - opcode, - register, - number_literal, - string_literal, - label, - newline, - comma, - comment, - )) - .recover_with(skip_then_retry_until([])); // TODO: improve? - - let non_newline_whitespace = - filter(|c: &char| c.is_whitespace() && !is_newline(c)).repeated(); - - token - .map_with_span(|token, span| (token, span)) - .padded_by(non_newline_whitespace) - .repeated() - .then_ignore(end()) -} - -fn is_newline(c: &char) -> bool { - // All line breaks matched by chumsky::text::newline - ['\n', - '\r', - '\x0B', // Vertical tab - '\x0C', // Form feed - '\u{0085}', // Next line - '\u{2028}', // Line separator - '\u{2029}', ].contains(c) // Paragraph separator -} - -fn lex(source: &str) -> (Option>>, Vec>) { - lexer().parse_recovery(source.to_uppercase()) -} - - -// Parser - -type WithErrData = Spanned>>; - -#[derive(Debug)] -struct Program { - instructions: Vec>, -} - -#[derive(Debug)] -struct Instruction { - label: Option>, - opcode: WithErrData, - operands: WithErrData>>, -} - -#[derive(Debug)] -enum Operand { - Register(Reg), - NumberLiteral(LiteralValue), - StringLiteral(String), - Label(String), -} - -fn operand() -> impl Parser, Error = Simple> { - let operand = select! { - Token::Register(reg) => Operand::Register(reg), - Token::NumberLiteral(val) => Operand::NumberLiteral(val), - Token::StringLiteral(s) => Operand::StringLiteral(s), - Token::Label(s) => Operand::Label(s), - }; - operand.map_with_span(|o, span| (o, span)) -} - -fn any_opcode_but(denied: Opcode) -> impl Parser, Error = Simple> { - filter_map(move |span, t: Token| - if let Token::Opcode(o) = t.clone() { - if o == denied { - Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected - } else { - Ok(o) - } - } else { - Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected - }) - .map_with_span(|o, span| (o, span)) -} - -fn opcode(expected: Opcode) -> impl Parser, Error = Simple> { - let expected_token = Token::Opcode(expected); - filter_map(move |span, t| - if t == expected_token { - if let Token::Opcode(o) = t { - Ok(o) - } else { unreachable!() } - } else { - Err(Simple::expected_input_found(span, [Some(expected_token.clone())], Some(t))) - }) - .map_with_span(|o, span| (o, span)) -} - -enum OpcodeFilter { - OnlyOrig, - AnyButEnd, - OnlyEnd, -} - -fn instruction(oc_filter: OpcodeFilter) -> impl Parser, Error = Simple> { - let label = - select! { Token::Label(s) => s } - .map_with_span(|s, span| (Ok(s), span)) - .or_not(); - - use OpcodeFilter::*; - let oc: Box, Error = Simple>> = - match oc_filter { - OnlyOrig => Box::new(opcode(Opcode::Orig)), - AnyButEnd => Box::new(any_opcode_but(Opcode::End)), - OnlyEnd => Box::new(opcode(Opcode::End)), - }; - let oc_with_err_data = oc.map(|(oc, span)| (Ok(oc), span)); - - let operands = - operand() - .map(|(o, span)| (Ok(o), span)) - .separated_by::(just(Token::Comma)) - .map_with_span(|os, span| (Ok(os), span)); - - label - .then_ignore(just(Token::Newline).repeated()) - .then(oc_with_err_data) - .then(operands) - .map_with_span(|((l, o), os), span| { - let instruction = Instruction { - label: l, - opcode: o, - operands: os, - }; - (instruction, span) - }) -} - -fn comments_and_newlines() -> impl Parser> { - just(Token::Comment).or_not() - .then(just(Token::Newline).repeated().at_least(1)) - .repeated() - .ignored() -} - -fn program() -> impl Parser, Error = Simple> { - comments_and_newlines() - .ignore_then(instruction(OpcodeFilter::OnlyOrig)) - .then( - instruction(OpcodeFilter::AnyButEnd) - .map(|(i, span)| (Ok(i), span)) - .separated_by(comments_and_newlines()) - .allow_leading() - .allow_trailing() - ) - .then(instruction(OpcodeFilter::OnlyEnd)) - .then_ignore(comments_and_newlines()) - .then_ignore(end()) - .map_with_span(|((orig, instructions), end), span| { - (Program { instructions }, span) - }) -} - -fn file() -> impl Parser>>, Error = Simple> { - program() - .map(|(p, span)| (Ok(p), span)) - .separated_by(comments_and_newlines()) - .allow_leading() - .allow_trailing() - .map_with_span(|programs, span| (programs, span)) -} - -fn parse(src: &str, tokens: Vec>) -> (Option>, Vec>) { - let len = src.chars().count(); - program().parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) -} - - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn simple() { - let src = ".ORIG x3000;\nLABEL ADD R0, R0, #7000\n.end"; - let (tokens, lex_errs) = lex(src); - println!("{:?}", tokens); - println!("{:?}", lex_errs); - - let parse_results = tokens.map(|ts| parse(src, ts)); - if let Some((program, parse_errs)) = parse_results { - println!("{:?}", program); - println!("{:?}", parse_errs); - } - } -} \ No newline at end of file diff --git a/assembler/src/new/lexer.rs b/assembler/src/new/lexer.rs new file mode 100644 index 0000000..023c132 --- /dev/null +++ b/assembler/src/new/lexer.rs @@ -0,0 +1,178 @@ +use chumsky::prelude::*; +use lc3_isa::{SignedWord, Word}; +use super::{LiteralValue, Opcode, Reg, Token, Spanned, ConditionCodes }; + +fn number_literal_with_base(base: u32, prefix: char) -> impl Parser> { + just(prefix) + .ignore_then(just('-').ignored().or_not()) + .then(text::digits(base)) + .try_map(move |(maybe_sign, digits): (Option<()>, String), span| { + let parse_result = if maybe_sign.is_some() { + SignedWord::from_str_radix(&format!("-{}", digits), base) + .map(LiteralValue::SignedWord) + } else { + Word::from_str_radix(&digits, base) + .map(LiteralValue::Word) + }; + parse_result.map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow + }) +} + +fn one_opcode(pattern: &'static str, output_opcode: Opcode) -> impl Parser> { + just_to(pattern, output_opcode) +} + +fn one_register(pattern: &'static str, output_reg: Reg) -> impl Parser> { + just_to(pattern, output_reg) +} + +fn just_to(pattern: &'static str, output: O) -> impl Parser> { + just(pattern).to(output) +} + +fn lexer() -> impl Parser>, Error=Simple> { + let newline = text::newline() + .to(Token::Newline); + + use Opcode::*; + let branch_opcode = + just("BR") + .ignore_then(one_of("NZP").repeated().at_most(3)) + .map::(|cond_code_chars| { + let cond_codes = + if cond_code_chars.is_empty() { + ConditionCodes { n: true, z: true, p: true } + } else { + let n = cond_code_chars.contains(&'N'); + let z = cond_code_chars.contains(&'Z'); + let p = cond_code_chars.contains(&'P'); + ConditionCodes { n, z, p } + }; + Br(cond_codes) + }); + + // These options are separated by `or` instead of all belonging + // to one tuple passed to `choice` because `choice` only supports + // tuples with up to 26 elements. + // The grouping by 'opcode type' was chosen arbitrarily. + let opcode = choice(( + one_opcode("ADD", Add), + one_opcode("AND", And), + branch_opcode, + one_opcode("JMP", Jmp), + one_opcode("JSRR", Jsrr), + one_opcode("JSR", Jsr), + one_opcode("LDI", Ldi), + one_opcode("LDR", Ldr), + one_opcode("LD", Ld), + one_opcode("LEA", Lea), + one_opcode("NOT", Not), + one_opcode("RET", Ret), + one_opcode("RTI", Rti), + one_opcode("STI", Sti), + one_opcode("STR", Str), + one_opcode("ST", St), + one_opcode("TRAP", Trap), + )) + .or(choice(( + one_opcode("GETC", Getc), + one_opcode("OUT", Out), + one_opcode("PUTSP", Putsp), + one_opcode("PUTS", Puts), + one_opcode("IN", In), + one_opcode("HALT", Halt), + ))) + .or(choice(( + one_opcode(".ORIG", Orig), + one_opcode(".FILL", Fill), + one_opcode(".BLKW", Blkw), + one_opcode(".STRINGZ", Stringz), + one_opcode(".END", End), + ))) + .map(Token::Opcode); + + use Reg::*; + let register = choice(( + one_register("R0", R0), + one_register("R1", R1), + one_register("R2", R2), + one_register("R3", R3), + one_register("R4", R4), + one_register("R5", R5), + one_register("R6", R6), + one_register("R7", R7), + )) + .map(Token::Register); + + // `escape` and `string_literal` are based on JSON parser example + // https://github.com/zesterer/chumsky/blob/d4102128315d9dbbea901a91dc5eaa0fc9a790f7/examples/json.rs#L39 + let escape = just::<_, _, Simple>('\\').ignore_then( + just('\\') + .or(just('"')) + .or(just('b').to('\x08')) + .or(just('f').to('\x0C')) + .or(just('n').to('\n')) + .or(just('r').to('\r')) + .or(just('t').to('\t')) + ); + + let string_literal = just('"') + .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated()) + .then_ignore(just('"')) + .collect::() + .map(Token::StringLiteral); + + let number_literal = choice(( + number_literal_with_base(2, 'B'), + number_literal_with_base(10, '#'), + number_literal_with_base(16, 'X'), + )) + .map(Token::NumberLiteral); + + let label = text::ident() // C-style identifier. Follows all LC-3 label rules but allows arbitrary length and underscores. + .map(Token::Label); // TODO: validate length, underscores in strict mode + + let comment = just(';') + .then(filter(|c| !is_newline(c)).repeated()) + .to(Token::Comment); + + let comma = just(',') + .to(Token::Comma); + + let token = choice(( + opcode, + register, + number_literal, + string_literal, + label, + newline, + comma, + comment, + )) + .recover_with(skip_then_retry_until([])); // TODO: improve? + + let non_newline_whitespace = + filter(|c: &char| c.is_whitespace() && !is_newline(c)).repeated(); + + token + .map_with_span(|token, span| (token, span)) + .padded_by(non_newline_whitespace) + .repeated() + .then_ignore(end()) +} + +fn is_newline(c: &char) -> bool { + // All line breaks matched by chumsky::text::newline + ['\n', + '\r', + '\x0B', // Vertical tab + '\x0C', // Form feed + '\u{0085}', // Next line + '\u{2028}', // Line separator + '\u{2029}', ].contains(c) // Paragraph separator +} + +pub(crate) fn lex(source: &str) -> (Option>>, Vec>) { + lexer().parse_recovery(source.to_uppercase()) +} + diff --git a/assembler/src/new/mod.rs b/assembler/src/new/mod.rs new file mode 100644 index 0000000..983002f --- /dev/null +++ b/assembler/src/new/mod.rs @@ -0,0 +1,93 @@ +mod lexer; +mod parser; + +use chumsky::prelude::*; +use chumsky::Stream; +use itertools::Itertools; +use lc3_isa::{Reg, SignedWord, Word}; + +type Span = std::ops::Range; +type Spanned = (T, Span); + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub(crate) enum Token { + Opcode(Opcode), + Register(Reg), + NumberLiteral(LiteralValue), + StringLiteral(String), + Label(String), + + Newline, + Comma, + + Comment, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub(crate) enum LiteralValue { + Word(Word), + SignedWord(SignedWord), +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct ConditionCodes { + n: bool, + z: bool, + p: bool, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum Opcode { + Add, + And, + Br(ConditionCodes), + Jmp, + Jsr, + Jsrr, + Ld, + Ldi, + Ldr, + Lea, + Not, + Ret, + Rti, + St, + Sti, + Str, + Trap, + + // Pseudo-ops + Orig, + Fill, + Blkw, + Stringz, + End, + + // Named TRAP routines + Getc, + Out, + Puts, + In, + Putsp, + Halt, +} + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple() { + let src = ".ORIG x3000;\nLABEL ADD R0, R0, #7000\n.end"; + let (tokens, lex_errs) = lexer::lex(src); + println!("{:?}", tokens); + println!("{:?}", lex_errs); + + let parse_results = tokens.map(|ts| parser::parse(src, ts)); + if let Some((program, parse_errs)) = parse_results { + println!("{:?}", program); + println!("{:?}", parse_errs); + } + } +} \ No newline at end of file diff --git a/assembler/src/new/parser.rs b/assembler/src/new/parser.rs new file mode 100644 index 0000000..bfa0737 --- /dev/null +++ b/assembler/src/new/parser.rs @@ -0,0 +1,150 @@ +use chumsky::prelude::*; +use chumsky::Stream; +use super::{Spanned, Token, Opcode, Reg, LiteralValue, }; + +type WithErrData = Spanned>>; + +#[derive(Debug)] +pub(crate) struct Program { + orig: WithErrData, + instructions: Vec>, + end: WithErrData, +} + +#[derive(Debug)] +struct Instruction { + label: Option>, + opcode: WithErrData, + operands: WithErrData>>, +} + +#[derive(Debug)] +enum Operand { + Register(Reg), + NumberLiteral(LiteralValue), + StringLiteral(String), + Label(String), +} + +fn operand() -> impl Parser, Error = Simple> { + let operand = select! { + Token::Register(reg) => Operand::Register(reg), + Token::NumberLiteral(val) => Operand::NumberLiteral(val), + Token::StringLiteral(s) => Operand::StringLiteral(s), + Token::Label(s) => Operand::Label(s), + }; + operand.map_with_span(|o, span| (o, span)) +} + +fn any_opcode_but(denied: Opcode) -> impl Parser, Error = Simple> { + filter_map(move |span, t: Token| + if let Token::Opcode(o) = t.clone() { + if o == denied { + Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected + } else { + Ok(o) + } + } else { + Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected + }) + .map_with_span(|o, span| (o, span)) +} + +fn opcode(expected: Opcode) -> impl Parser, Error = Simple> { + let expected_token = Token::Opcode(expected); + filter_map(move |span, t| + if t == expected_token { + if let Token::Opcode(o) = t { + Ok(o) + } else { unreachable!() } + } else { + Err(Simple::expected_input_found(span, [Some(expected_token.clone())], Some(t))) + }) + .map_with_span(|o, span| (o, span)) +} + +enum OpcodeFilter { + OnlyOrig, + AnyButEnd, + OnlyEnd, +} + +fn instruction(oc_filter: OpcodeFilter) -> impl Parser, Error = Simple> { + let label = + select! { Token::Label(s) => s } + .map_with_span(|s, span| (Ok(s), span)) + .or_not(); + + use OpcodeFilter::*; + let oc: Box, Error = Simple>> = + match oc_filter { + OnlyOrig => Box::new(opcode(Opcode::Orig)), + AnyButEnd => Box::new(any_opcode_but(Opcode::End)), + OnlyEnd => Box::new(opcode(Opcode::End)), + }; + let oc_with_err_data = oc.map(|(oc, span)| (Ok(oc), span)); + + let operands = + operand() + .map(|(o, span)| (Ok(o), span)) + .separated_by::(just(Token::Comma)) + .map_with_span(|os, span| (Ok(os), span)); + + label + .then_ignore(just(Token::Newline).repeated()) + .then(oc_with_err_data) + .then(operands) + .map_with_span(|((l, o), os), span| { + let instruction = Instruction { + label: l, + opcode: o, + operands: os, + }; + (instruction, span) + }) +} + +fn comments_and_newlines() -> impl Parser> { + just(Token::Comment).or_not() + .then(just(Token::Newline).repeated().at_least(1)) + .repeated() + .ignored() +} + +fn program() -> impl Parser, Error = Simple> { + comments_and_newlines() + .ignore_then( + instruction(OpcodeFilter::OnlyOrig) + .map(|(i, span)| (Ok(i), span))) + .then( + instruction(OpcodeFilter::AnyButEnd) + .map(|(i, span)| (Ok(i), span)) + .separated_by(comments_and_newlines()) + .allow_leading() + .allow_trailing() + ) + .then( + instruction(OpcodeFilter::OnlyEnd) + .map(|(i, span)| (Ok(i), span))) + .then_ignore(comments_and_newlines()) + .then_ignore(end()) + .map_with_span(|((orig, instructions), end), span| { + (Program { orig, instructions, end }, span) + }) +} + +type File = Vec>; + +fn file() -> impl Parser>>, Error = Simple> { + program() + .map(|(p, span)| (Ok(p), span)) + .separated_by(comments_and_newlines()) + .allow_leading() + .allow_trailing() + .map_with_span(|programs, span| (programs, span)) +} + +pub(crate) fn parse(src: &str, tokens: Vec>) -> (Option>, Vec>) { + let len = src.chars().count(); + file().parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) +} From 9b389a970ea5345f438193d28e3c6dab02d77756 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 9 May 2022 23:51:04 -0500 Subject: [PATCH 27/82] assembler: switch integration tests to new parser --- assembler/src/lib.rs | 2 + assembler/src/new/assembler.rs | 559 +++++++++++++++++++++++++++++++++ assembler/src/new/lexer.rs | 259 +++++++++++++-- assembler/src/new/linker.rs | 75 +++++ assembler/src/new/mod.rs | 81 +---- assembler/src/new/parser.rs | 112 +++++-- assembler/tests/integ.rs | 21 +- 7 files changed, 973 insertions(+), 136 deletions(-) create mode 100644 assembler/src/new/assembler.rs create mode 100644 assembler/src/new/linker.rs diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 03e779e..c227290 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -3,6 +3,8 @@ // TODO: denys // TODO: docs URL +extern crate core; + pub mod lexer; pub mod parser; pub mod ir; diff --git a/assembler/src/new/assembler.rs b/assembler/src/new/assembler.rs new file mode 100644 index 0000000..efaa15f --- /dev/null +++ b/assembler/src/new/assembler.rs @@ -0,0 +1,559 @@ +use std::collections::HashMap; +use std::convert::{TryFrom, TryInto}; +use std::fmt::Debug; +use std::num::{ParseIntError, TryFromIntError}; +use lc3_isa::{Addr, Reg, SignedWord, Word}; +use crate::new::lexer::{ConditionCodes, LiteralValue, Opcode}; +use crate::new::parser::Operand; +use super::parser; +use super::parser::{Program, WithErrData}; + +pub(crate) type SymbolTable = HashMap; + +#[derive(Clone)] +pub(crate) enum Sr2OrImm5 { + Sr2(Reg), + Imm5(SignedWord) +} + +impl TryFrom for Sr2OrImm5 { + type Error = (); + + fn try_from(value: Operand) -> Result { + Reg::try_from(value.clone()) + .map(Sr2OrImm5::Sr2) + .or_else(|_| + LiteralValue::try_from(value) + .unwrap_try_into() + .map(Sr2OrImm5::Imm5) + .map_err(|_| ())) + } +} + +impl TryFrom for PcOffset { + type Error = (); + + fn try_from(value: Operand) -> Result { + LiteralValue::try_from(value.clone()) + .map(|lv| { + let sw = lv.try_into().unwrap(); + PcOffset::Number(sw) + }) + .or_else(|_| Ok(PcOffset::Label(value.label()))) + } +} + +impl TryFrom for SignedWord { + type Error = TryFromIntError; + + fn try_from(value: Operand) -> Result { + LiteralValue::try_from(value).unwrap_try_into() + } +} + +impl TryFrom for Word { + type Error = TryFromIntError; + + fn try_from(value: Operand) -> Result { + LiteralValue::try_from(value).unwrap_try_into() + } +} + +impl TryFrom for u8 { + type Error = TryFromIntError; + + fn try_from(value: Operand) -> Result { + LiteralValue::try_from(value).unwrap_try_into() + } +} + +impl TryFrom for FillValue { + type Error = (); + + fn try_from(value: Operand) -> Result { + LiteralValue::try_from(value.clone()) + .unwrap_try_into() + .map(FillValue::Number) + .or_else(|_| Ok(FillValue::Label(value.label()))) + } +} + +#[derive(Clone)] +pub(crate) enum PcOffset { + Number(SignedWord), + Label(String), +} + +#[derive(Clone)] +pub(crate) enum FillValue { + Number(Word), + Label(String), +} + +#[derive(Clone)] +pub(crate) enum Instruction { + Add { dr: Reg, sr1: Reg, sr2_or_imm5: Sr2OrImm5 }, + And { dr: Reg, sr1: Reg, sr2_or_imm5: Sr2OrImm5 }, + Br { cond_codes: ConditionCodes, pc_offset9: PcOffset }, + Jmp { base: Reg }, + Jsr { pc_offset11: PcOffset }, + Jsrr { base: Reg }, + Ld { dr: Reg, pc_offset9: PcOffset }, + Ldi { dr: Reg, pc_offset9: PcOffset }, + Ldr { dr: Reg, base: Reg, offset6: SignedWord }, + Lea { dr: Reg, pc_offset9: PcOffset }, + Not { dr: Reg, sr: Reg }, + Ret, + Rti, + St { sr: Reg, pc_offset9: PcOffset }, + Sti { sr: Reg, pc_offset9: PcOffset }, + Str { sr: Reg, base: Reg, offset6: SignedWord }, + Trap { trap_vec: u8 }, + + Fill { value: FillValue }, + Blkw { size: Addr }, // Addr used here to signify a number of locations, as max value of Addr is number of possible Addrs. + Stringz { string: String }, +} + +impl Instruction { + fn addresses_occupied(&self) -> Addr { + use Instruction::*; + + match self { + Add { .. } + | And { .. } + | Br { .. } + | Jmp { .. } + | Jsr { .. } + | Jsrr { .. } + | Ld { .. } + | Ldi { .. } + | Ldr { .. } + | Lea { .. } + | Not { .. } + | Ret + | Rti + | St { .. } + | Sti { .. } + | Str { .. } + | Trap { .. } + | Fill { .. } => 1, + + Blkw { size } => *size, + + // +1 is to count the null-terminator + Stringz { string } => (string.len() + 1) as Addr, // TODO: correct for escape characters + } + } +} + +pub struct Object { + pub(crate) symbol_table: SymbolTable, + pub(crate) origin: Addr, + pub(crate) words: Vec, +} + +#[derive(Clone)] +pub(crate) enum ObjectWord { + Value(Word), + UnlinkedInstruction(Instruction), +} + +fn unwrap(v: WithErrData) -> T { + v.0.unwrap() +} + +trait UnwrapTryFrom where + Self: Sized +{ + type Error; + + fn unwrap_try_from(v: T) -> Result; +} + +trait UnwrapTryInto { + type Error; + + fn unwrap_try_into(self) -> Result; +} + +impl UnwrapTryInto for U where + T: UnwrapTryFrom +{ + type Error = E; + + fn unwrap_try_into(self) -> Result { + T::unwrap_try_from(self) + } +} + +impl UnwrapTryFrom> for U where + U: TryFrom +{ + type Error = E; + + fn unwrap_try_from(v: WithErrData) -> Result { + unwrap(v).try_into() + } +} + +impl UnwrapTryFrom> for U where + U: TryFrom +{ + type Error = E; + + fn unwrap_try_from(v: Result) -> Result { + v.unwrap().try_into() + } +} + +fn unwrap_into(maybe_v: Option) -> U where + E: Debug, + U: UnwrapTryFrom +{ + maybe_v.unwrap().unwrap_try_into().unwrap() +} + + +impl TryFrom for Instruction { + type Error = (); + + fn try_from(i: parser::Instruction) -> Result { + + let parser::Instruction { opcode: raw_opcode, operands: raw_operands, .. } = i; + let operands = unwrap(raw_operands); + match unwrap(raw_opcode) { + Opcode::Add => { + let mut os = operands.into_iter(); + let dr = unwrap_into(os.next()); + let sr1 = unwrap_into(os.next()); + let sr2_or_imm5 = unwrap_into(os.next()); + Ok(Instruction::Add { dr, sr1, sr2_or_imm5 }) + } + Opcode::And => { + let mut os = operands.into_iter(); + let dr = unwrap_into(os.next()); + let sr1 = unwrap_into(os.next()); + let sr2_or_imm5 = unwrap_into(os.next()); + Ok(Instruction::And { dr, sr1, sr2_or_imm5 }) + } + Opcode::Br(cond_codes) => { + let mut os = operands.into_iter(); + let pc_offset9 = unwrap_into(os.next()); + Ok(Instruction::Br { cond_codes, pc_offset9 }) + } + Opcode::Jmp => { + let mut os = operands.into_iter(); + let base = unwrap_into(os.next()); + Ok(Instruction::Jmp { base }) + } + Opcode::Jsr => { + let mut os = operands.into_iter(); + let pc_offset11 = unwrap_into(os.next()); + Ok(Instruction::Jsr { pc_offset11 }) + } + Opcode::Jsrr => { + let mut os = operands.into_iter(); + let base = unwrap_into(os.next()); + Ok(Instruction::Jsrr { base }) + } + Opcode::Ld => { + let mut os = operands.into_iter(); + let dr = unwrap_into(os.next()); + let pc_offset9 = unwrap_into(os.next()); + Ok(Instruction::Ld { dr, pc_offset9 }) + } + Opcode::Ldi => { + let mut os = operands.into_iter(); + let dr = unwrap_into(os.next()); + let pc_offset9 = unwrap_into(os.next()); + Ok(Instruction::Ldi { dr, pc_offset9 }) + } + Opcode::Ldr => { + let mut os = operands.into_iter(); + let dr = unwrap_into(os.next()); + let base = unwrap_into(os.next()); + let offset6 = unwrap_into(os.next()); + Ok(Instruction::Ldr { dr, base, offset6 }) + } + Opcode::Lea => { + let mut os = operands.into_iter(); + let dr = unwrap_into(os.next()); + let pc_offset9 = unwrap_into(os.next()); + Ok(Instruction::Lea { dr, pc_offset9 }) + } + Opcode::Not => { + let mut os = operands.into_iter(); + let dr = unwrap_into(os.next()); + let sr = unwrap_into(os.next()); + Ok(Instruction::Not { dr, sr }) + } + Opcode::Ret => Ok(Instruction::Ret), + Opcode::Rti => Ok(Instruction::Rti), + Opcode::St => { + let mut os = operands.into_iter(); + let sr = unwrap_into(os.next()); + let pc_offset9 = unwrap_into(os.next()); + Ok(Instruction::St { sr, pc_offset9 }) + } + Opcode::Sti => { + let mut os = operands.into_iter(); + let sr = unwrap_into(os.next()); + let pc_offset9 = unwrap_into(os.next()); + Ok(Instruction::Sti { sr, pc_offset9 }) + } + Opcode::Str => { + let mut os = operands.into_iter(); + let sr = unwrap_into(os.next()); + let base = unwrap_into(os.next()); + let offset6 = unwrap_into(os.next()); + Ok(Instruction::Str { sr, base, offset6 }) + } + Opcode::Trap => { + let mut os = operands.into_iter(); + let trap_vec = unwrap_into(os.next()); + Ok(Instruction::Trap { trap_vec }) + } + + // TODO: improve error + Opcode::Orig => Err(()), + Opcode::End => Err(()), + + Opcode::Fill => { + let mut os = operands.into_iter(); + let value = unwrap_into(os.next()); + Ok(Instruction::Fill { value }) + } + Opcode::Blkw => { + let mut os = operands.into_iter(); + let size = unwrap(os.next().unwrap()).unqualified_number_value(); + Ok(Instruction::Blkw { size }) + } + Opcode::Stringz => { + let mut os = operands.into_iter(); + let string = unwrap(os.next().unwrap()).string(); + Ok(Instruction::Stringz { string }) + } + + Opcode::Getc => Ok(Instruction::Trap { trap_vec: 0x20 }), + Opcode::Out => Ok(Instruction::Trap { trap_vec: 0x21 }), + Opcode::Puts => Ok(Instruction::Trap { trap_vec: 0x22 }), + Opcode::In => Ok(Instruction::Trap { trap_vec: 0x23 }), + Opcode::Putsp => Ok(Instruction::Trap { trap_vec: 0x24 }), + Opcode::Halt => Ok(Instruction::Trap { trap_vec: 0x25 }), + } + } +} + +impl TryFrom for ObjectWord { + type Error = (); + + fn try_from(value: parser::Instruction) -> Result { + Instruction::try_from(value) + .map(ObjectWord::UnlinkedInstruction) + } +} + +pub(crate) enum AssemblyResult { + SingleObjectWord(ObjectWord), + MultipleObjectWords(Vec), +} + +fn calculate_offset(location_counter: &Addr, label_address: &Addr) -> SignedWord { + let lc = *location_counter as i32; + let la = *label_address as i32; + (la - (lc + 1)) as SignedWord +} + +pub(crate) fn try_assemble(symbol_table: &SymbolTable, location_counter: &Addr, instruction: Instruction) -> AssemblyResult { + use AssemblyResult::*; + use ObjectWord::*; + + match instruction { + Instruction::Add { dr, sr1, sr2_or_imm5 } => { + let word = + match sr2_or_imm5 { + Sr2OrImm5::Sr2(sr2) => lc3_isa::Instruction::new_add_reg(dr, sr1, sr2), + Sr2OrImm5::Imm5(imm5) => lc3_isa::Instruction::new_add_imm(dr, sr1, imm5), + }.into(); + SingleObjectWord(Value(word)) + } + Instruction::And { dr, sr1, sr2_or_imm5 } => { + let word = + match sr2_or_imm5 { + Sr2OrImm5::Sr2(sr2) => lc3_isa::Instruction::new_and_reg(dr, sr1, sr2), + Sr2OrImm5::Imm5(imm5) => lc3_isa::Instruction::new_and_imm(dr, sr1, imm5), + }.into(); + SingleObjectWord(Value(word)) + } + Instruction::Br { cond_codes: ConditionCodes { n, z, p }, pc_offset9 } => { + match pc_offset9 { + PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_br(n, z, p, sw).into())), + PcOffset::Label(label) => + match symbol_table.get(&label) { + Some(addr) => { + let offset = calculate_offset(location_counter, addr); + SingleObjectWord(Value(lc3_isa::Instruction::new_br(n, z, p, offset).into())) + } + None => SingleObjectWord(UnlinkedInstruction(Instruction::Br { cond_codes: ConditionCodes { n, z, p }, pc_offset9: PcOffset::Label(label) })), + } + } + } + Instruction::Jmp { base } => SingleObjectWord(Value(lc3_isa::Instruction::new_jmp(base).into())), + Instruction::Jsr { pc_offset11 } => { + match pc_offset11 { + PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_jsr(sw).into())), + PcOffset::Label(label) => + match symbol_table.get(&label) { + Some(addr) => { + let offset = calculate_offset(location_counter, addr); + SingleObjectWord(Value(lc3_isa::Instruction::new_jsr(offset).into())) + } + None => SingleObjectWord(UnlinkedInstruction(Instruction::Jsr { pc_offset11: PcOffset::Label(label) })), + } + } + } + Instruction::Jsrr { base } => SingleObjectWord(Value(lc3_isa::Instruction::new_jsrr(base).into())), + Instruction::Ld { dr, pc_offset9 } => { + match pc_offset9 { + PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_ld(dr, sw).into())), + PcOffset::Label(label) => + match symbol_table.get(&label) { + Some(addr) => { + let offset = calculate_offset(location_counter, addr); + SingleObjectWord(Value(lc3_isa::Instruction::new_ld(dr, offset).into())) + } + None => SingleObjectWord(UnlinkedInstruction(Instruction::Ld { dr, pc_offset9: PcOffset::Label(label)})), + } + } + } + Instruction::Ldi { dr, pc_offset9 } => { + match pc_offset9 { + PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_ldi(dr, sw).into())), + PcOffset::Label(label) => + match symbol_table.get(&label) { + Some(addr) => { + let offset = calculate_offset(location_counter, addr); + SingleObjectWord(Value(lc3_isa::Instruction::new_ldi(dr, offset).into())) + } + None => SingleObjectWord(UnlinkedInstruction(Instruction::Ldi { dr, pc_offset9: PcOffset::Label(label)})), + } + } + } + Instruction::Ldr { dr, base, offset6 } => SingleObjectWord(Value(lc3_isa::Instruction::new_ldr(dr, base, offset6).into())), + Instruction::Lea { dr, pc_offset9 } => { + match pc_offset9 { + PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_lea(dr, sw).into())), + PcOffset::Label(label) => + match symbol_table.get(&label) { + Some(addr) => { + let offset = calculate_offset(location_counter, addr); + SingleObjectWord(Value(lc3_isa::Instruction::new_lea(dr, offset).into())) + } + None => SingleObjectWord(UnlinkedInstruction(Instruction::Lea { dr, pc_offset9: PcOffset::Label(label)})), + } + } + } + Instruction::Not { dr, sr } => SingleObjectWord(Value(lc3_isa::Instruction::new_not(dr, sr).into())), + Instruction::Ret => SingleObjectWord(Value(lc3_isa::Instruction::new_ret().into())), + Instruction::Rti => SingleObjectWord(Value(lc3_isa::Instruction::new_rti().into())), + Instruction::St { sr, pc_offset9 } => { + match pc_offset9 { + PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_st(sr, sw).into())), + PcOffset::Label(label) => + match symbol_table.get(&label) { + Some(addr) => { + let offset = calculate_offset(location_counter, addr); + SingleObjectWord(Value(lc3_isa::Instruction::new_st(sr, offset).into())) + } + None => SingleObjectWord(UnlinkedInstruction(Instruction::St { sr, pc_offset9: PcOffset::Label(label)})), + } + } + } + Instruction::Sti { sr, pc_offset9 } => { + match pc_offset9 { + PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_sti(sr, sw).into())), + PcOffset::Label(label) => + match symbol_table.get(&label) { + Some(addr) => { + let offset = calculate_offset(location_counter, addr); + SingleObjectWord(Value(lc3_isa::Instruction::new_sti(sr, offset).into())) + } + None => SingleObjectWord(UnlinkedInstruction(Instruction::Sti { sr, pc_offset9: PcOffset::Label(label)})), + } + } + } + Instruction::Str { sr, base, offset6 } => SingleObjectWord(Value(lc3_isa::Instruction::new_str(sr, base, offset6).into())), + Instruction::Trap { trap_vec } => SingleObjectWord(Value(lc3_isa::Instruction::new_trap(trap_vec).into())), + + Instruction::Fill { value } => { + match value { + FillValue::Number(sw) => SingleObjectWord(Value(sw)), + FillValue::Label(label) => + match symbol_table.get(&label) { + Some(addr) => SingleObjectWord(Value(*addr)), + None => SingleObjectWord(UnlinkedInstruction(Instruction::Fill { value: FillValue::Label(label) })), + } + } + } + + Instruction::Blkw { size } => MultipleObjectWords( + std::iter::repeat(Value(0x00)) + .take(size as usize) + .collect()), + Instruction::Stringz { string } => { + let mut chars = string.chars() + .map(|c| Value(c as Word)) // TODO: correct for escape chars + .collect::>(); + chars.push(Value(0x00)); // null-terminator + MultipleObjectWords(chars) + } + } +} + +fn first_pass(origin: Addr, instructions: Vec>) -> (Vec, SymbolTable) { + let mut symbol_table = HashMap::new(); + let mut words = Vec::new(); + let mut location_counter = origin; + + for raw_instruction in instructions.into_iter() { + let parser_instruction = unwrap(raw_instruction); + if let Some(l) = parser_instruction.label.clone() { // TODO: label not needed for conversion to Instruction; consider changing to TryFrom<(Opcode, Operands)> to avoid clone + symbol_table.insert(unwrap(l), location_counter); + }; + + let instruction: Instruction = parser_instruction.try_into().unwrap(); + let addresses_used = instruction.addresses_occupied(); + words.push(instruction); + + location_counter += addresses_used; + } + + (words, symbol_table) +} + +fn second_pass(symbol_table: SymbolTable, origin: Addr, instructions: Vec) -> Object { + let mut location_counter = origin; + let mut words = Vec::new(); + + for instruction in instructions.into_iter() { + let addresses_used = instruction.addresses_occupied(); + match try_assemble(&symbol_table, &location_counter, instruction) { + AssemblyResult::SingleObjectWord(wd) => { words.push(wd); } + AssemblyResult::MultipleObjectWords(wds) => { words.extend(wds); } + } + location_counter += addresses_used; + } + + Object { origin, symbol_table, words } +} + +pub fn assemble(program: Program) -> Object { + let Program { orig, instructions: parser_instructions, .. } = program; + let parser::Instruction { operands: raw_orig_operands, .. } = unwrap(orig); + let orig_operand = unwrap(raw_orig_operands).remove(0); + let origin = LiteralValue::unwrap_try_from(orig_operand).unwrap_try_into().unwrap(); + + let (instructions, symbol_table) = first_pass(origin, parser_instructions); + second_pass(symbol_table, origin, instructions) +} \ No newline at end of file diff --git a/assembler/src/new/lexer.rs b/assembler/src/new/lexer.rs index 023c132..b31a074 100644 --- a/assembler/src/new/lexer.rs +++ b/assembler/src/new/lexer.rs @@ -1,9 +1,116 @@ use chumsky::prelude::*; -use lc3_isa::{SignedWord, Word}; -use super::{LiteralValue, Opcode, Reg, Token, Spanned, ConditionCodes }; +use lc3_isa::{Addr, Reg, SignedWord, Word}; +use std::convert::{TryFrom, TryInto}; +use super::Spanned; +use std::num::TryFromIntError; +use chumsky::Stream; +use crate::new::LeniencyLevel; -fn number_literal_with_base(base: u32, prefix: char) -> impl Parser> { - just(prefix) +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum Token { + Opcode(Opcode), + Register(Reg), + UnqualifiedNumberLiteral(Word), + NumberLiteral(LiteralValue), + StringLiteral(String), + Label(String), + + Newline, + Comma, + + Comment, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum LiteralValue { + Word(Word), + SignedWord(SignedWord), +} + +impl TryFrom for Addr { + type Error = TryFromIntError; + + fn try_from(value: LiteralValue) -> Result { + match value { + LiteralValue::Word(word) => Ok(word), + LiteralValue::SignedWord(signed_word) => signed_word.try_into(), + } + } +} + +impl TryFrom for SignedWord { + type Error = TryFromIntError; + + fn try_from(value: LiteralValue) -> Result { + match value { + LiteralValue::Word(word) => word.try_into(), + LiteralValue::SignedWord(signed_word) => Ok(signed_word), + } + } +} + +impl TryFrom for u8 { + type Error = TryFromIntError; + + fn try_from(value: LiteralValue) -> Result { + match value { + LiteralValue::Word(word) => word.try_into(), + LiteralValue::SignedWord(signed_word) => signed_word.try_into(), + } + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct ConditionCodes { + pub(crate) n: bool, + pub(crate) z: bool, + pub(crate) p: bool, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum Opcode { + Add, + And, + Br(ConditionCodes), + Jmp, + Jsr, + Jsrr, + Ld, + Ldi, + Ldr, + Lea, + Not, + Ret, + Rti, + St, + Sti, + Str, + Trap, + + // Pseudo-ops + Orig, + Fill, + Blkw, + Stringz, + End, + + // Named TRAP routines + Getc, + Out, + Puts, + In, + Putsp, + Halt, +} + +enum CaseSensitivePassResult { + CaseInsensitiveSource(String), + CaseSensitiveToken(Token), +} + +fn number_literal_with_base(base: u32, prefix: char, leniency: LeniencyLevel) -> impl Parser> { + let strict_literal = + just(prefix) .ignore_then(just('-').ignored().or_not()) .then(text::digits(base)) .try_map(move |(maybe_sign, digits): (Option<()>, String), span| { @@ -15,7 +122,13 @@ fn number_literal_with_base(base: u32, prefix: char) -> impl Parser>> = + match leniency { + LeniencyLevel::Lenient => Box::new(just("0").or_not().ignore_then(strict_literal)), + LeniencyLevel::Strict => Box::new(strict_literal), + }; + literal } fn one_opcode(pattern: &'static str, output_opcode: Opcode) -> impl Parser> { @@ -30,7 +143,33 @@ fn just_to(pattern: &'static str, output: O) -> impl Parser impl Parser>, Error=Simple> { +fn string_literal() -> impl Parser> { + // `escape` and `string_literal` are based on JSON parser example + // https://github.com/zesterer/chumsky/blob/d4102128315d9dbbea901a91dc5eaa0fc9a790f7/examples/json.rs#L39 + let escape = just::<_, _, Simple>('\\').ignore_then( + just('\\') + .or(just('"')) + .or(just('b').to('\x08')) + .or(just('f').to('\x0C')) + .or(just('n').to('\n')) + .or(just('r').to('\r')) + .or(just('t').to('\t')) + ); + + just('"') + .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated()) + .then_ignore(just('"')) + .collect::() + .map(Token::StringLiteral) +} + +fn comment() -> impl Parser> { + just(';') + .then(filter(|c| !is_newline(c)).repeated()) + .to(Token::Comment) +} + +fn tokens(leniency: LeniencyLevel) -> impl Parser>, Error=Simple> { let newline = text::newline() .to(Token::Newline); @@ -104,38 +243,24 @@ fn lexer() -> impl Parser>, Error=Simple> { )) .map(Token::Register); - // `escape` and `string_literal` are based on JSON parser example - // https://github.com/zesterer/chumsky/blob/d4102128315d9dbbea901a91dc5eaa0fc9a790f7/examples/json.rs#L39 - let escape = just::<_, _, Simple>('\\').ignore_then( - just('\\') - .or(just('"')) - .or(just('b').to('\x08')) - .or(just('f').to('\x0C')) - .or(just('n').to('\n')) - .or(just('r').to('\r')) - .or(just('t').to('\t')) - ); - - let string_literal = just('"') - .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated()) - .then_ignore(just('"')) - .collect::() - .map(Token::StringLiteral); + let unqualified_number_literal_base = 10; + let unqualified_number_literal = text::digits(unqualified_number_literal_base) + .try_map(move |digits: String, span| { + Word::from_str_radix(&digits, unqualified_number_literal_base) + .map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow + }) + .map(Token::UnqualifiedNumberLiteral); let number_literal = choice(( - number_literal_with_base(2, 'B'), - number_literal_with_base(10, '#'), - number_literal_with_base(16, 'X'), + number_literal_with_base(2, 'B', leniency), + number_literal_with_base(10, '#', leniency), + number_literal_with_base(16, 'X', leniency), )) .map(Token::NumberLiteral); let label = text::ident() // C-style identifier. Follows all LC-3 label rules but allows arbitrary length and underscores. .map(Token::Label); // TODO: validate length, underscores in strict mode - let comment = just(';') - .then(filter(|c| !is_newline(c)).repeated()) - .to(Token::Comment); - let comma = just(',') .to(Token::Comma); @@ -143,11 +268,12 @@ fn lexer() -> impl Parser>, Error=Simple> { opcode, register, number_literal, - string_literal, + unqualified_number_literal, + string_literal(), label, newline, comma, - comment, + comment(), )) .recover_with(skip_then_retry_until([])); // TODO: improve? @@ -172,7 +298,72 @@ fn is_newline(c: &char) -> bool { '\u{2029}', ].contains(c) // Paragraph separator } -pub(crate) fn lex(source: &str) -> (Option>>, Vec>) { - lexer().parse_recovery(source.to_uppercase()) +fn case_sensitive_tokens() -> impl Parser>, Error=Simple> { + let case_sensitive_token = + choice(( + string_literal(), + comment() + )) + .map(|t| CaseSensitivePassResult::CaseSensitiveToken(t)); + + let case_insensitive_source = + filter(|c| *c != '"' && *c != ';').repeated().at_least(1) + .collect() + .map(|s| CaseSensitivePassResult::CaseInsensitiveSource(s)); + + case_insensitive_source + .or(case_sensitive_token) + .map_with_span(|cspr, s| (cspr, s)) + .repeated() + .then_ignore(end()) +} + +fn case_sensitive_pass(source: &str) -> (Option>>, Vec>) { + case_sensitive_tokens().parse_recovery_verbose(source) } +fn case_insensitive_pass(case_sensitive_pass_results: Vec>, leniency: LeniencyLevel) -> (Option>>, Vec>) { + let mut toks: Option>> = None; + let mut errors = Vec::new(); + + for (cspr, span) in case_sensitive_pass_results { + match cspr { + CaseSensitivePassResult::CaseInsensitiveSource(s) => { + // TODO: profile CPU + memory to see whether this introduces any inefficiencies. + // This allows chumsky to correctly track spans while parsing this substring. + let uppercase_s = s.to_uppercase(); + let spanned_char_stream = uppercase_s.chars() + .enumerate() + .map(|(i, c)| { + let pos = span.start + i; + (c, pos..(pos + 1)) + }); + let stream = Stream::from_iter(span.end..(span.end + 1), spanned_char_stream); + let (maybe_tokens, lex_errors) = tokens(leniency).parse_recovery_verbose(stream); + + if let Some(ts) = maybe_tokens { + toks.get_or_insert(Vec::new()).extend(ts); + } + errors.extend(lex_errors); + } + CaseSensitivePassResult::CaseSensitiveToken(t) => { + toks.get_or_insert(Vec::new()).push((t, span)); + } + } + } + + (toks, errors) +} + +pub fn lex(source: &str, leniency: LeniencyLevel) -> (Option>>, Vec>) { + let (maybe_csprs, mut errors) = case_sensitive_pass(source); + let tokens = + if let Some(csprs) = maybe_csprs { + let (maybe_tokens, cip_errors) = case_insensitive_pass(csprs, leniency); + errors.extend(cip_errors); + maybe_tokens + } else { + None + }; + (tokens, errors) +} diff --git a/assembler/src/new/linker.rs b/assembler/src/new/linker.rs new file mode 100644 index 0000000..90baa29 --- /dev/null +++ b/assembler/src/new/linker.rs @@ -0,0 +1,75 @@ +use std::collections::HashMap; +use chumsky::chain::Chain; +use chumsky::Parser; +use lc3_isa::util::MemoryDump; +use lc3_isa::{Addr, ADDR_SPACE_SIZE_IN_WORDS, Word}; +use crate::new::assembler::try_assemble; +use super::assembler::{SymbolTable, Object, ObjectWord, AssemblyResult}; + +struct LinkedObject { + origin: Addr, + words: Vec, +} + +fn layer_object(image: &mut [Word; ADDR_SPACE_SIZE_IN_WORDS], object: LinkedObject) { + let LinkedObject { origin, words } = object; + let mut addr = origin as usize; + for word in words { + image[addr] = word; + addr += 1; + } +} + +fn link_object(symbol_table: &SymbolTable, object: Object) -> LinkedObject { + let mut words = Vec::new(); + let Object { origin, words: object_words, .. } = object; + let mut location_counter = origin; + for object_word in object_words { + match object_word { + ObjectWord::Value(word) => { + words.push(word); + location_counter += 1; + }, + ObjectWord::UnlinkedInstruction(instruction) => + match try_assemble(&symbol_table, &location_counter, instruction) { + AssemblyResult::SingleObjectWord(word) => match word { + ObjectWord::Value(word) => { + words.push(word); + location_counter += 1; + } + ObjectWord::UnlinkedInstruction(_) => panic!("Failed to link an instruction") + } + AssemblyResult::MultipleObjectWords(ows) => { + let mut ws = ows.into_iter() + .map(|ow| match ow { + ObjectWord::Value(word) => word, + ObjectWord::UnlinkedInstruction(_) => panic!("Unexpected unlinked instruction") + }) + .collect::>(); + location_counter += ws.len() as u16; + words.extend(ws); + } + } + } + } + LinkedObject { origin, words } +} + +pub fn link(objects: impl IntoIterator) -> MemoryDump { + let objects = objects.into_iter().collect::>(); + + let mut symbol_table = HashMap::new(); + for object in objects.iter() { + for (label, addr) in object.symbol_table.iter() { + symbol_table.insert(label.clone(), *addr); + } + } + + let mut image = [0; ADDR_SPACE_SIZE_IN_WORDS]; + for object in objects { + let linked_object = link_object(&symbol_table, object); + layer_object(&mut image, linked_object); + } + + image.into() +} \ No newline at end of file diff --git a/assembler/src/new/mod.rs b/assembler/src/new/mod.rs index 983002f..86453b9 100644 --- a/assembler/src/new/mod.rs +++ b/assembler/src/new/mod.rs @@ -1,78 +1,17 @@ -mod lexer; -mod parser; - -use chumsky::prelude::*; -use chumsky::Stream; -use itertools::Itertools; -use lc3_isa::{Reg, SignedWord, Word}; +pub mod lexer; +pub mod parser; +pub mod assembler; +pub mod linker; type Span = std::ops::Range; type Spanned = (T, Span); -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub(crate) enum Token { - Opcode(Opcode), - Register(Reg), - NumberLiteral(LiteralValue), - StringLiteral(String), - Label(String), - - Newline, - Comma, - - Comment, -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub(crate) enum LiteralValue { - Word(Word), - SignedWord(SignedWord), -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct ConditionCodes { - n: bool, - z: bool, - p: bool, -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub enum Opcode { - Add, - And, - Br(ConditionCodes), - Jmp, - Jsr, - Jsrr, - Ld, - Ldi, - Ldr, - Lea, - Not, - Ret, - Rti, - St, - Sti, - Str, - Trap, - - // Pseudo-ops - Orig, - Fill, - Blkw, - Stringz, - End, - - // Named TRAP routines - Getc, - Out, - Puts, - In, - Putsp, - Halt, +#[derive(Copy, Clone)] +pub enum LeniencyLevel { + Lenient, + Strict } - #[cfg(test)] mod tests { use super::*; @@ -80,11 +19,11 @@ mod tests { #[test] fn simple() { let src = ".ORIG x3000;\nLABEL ADD R0, R0, #7000\n.end"; - let (tokens, lex_errs) = lexer::lex(src); + let (tokens, lex_errs) = lexer::lex(src, LeniencyLevel::Lenient); println!("{:?}", tokens); println!("{:?}", lex_errs); - let parse_results = tokens.map(|ts| parser::parse(src, ts)); + let parse_results = tokens.map(|ts| parser::parse(src, ts, LeniencyLevel::Strict)); if let Some((program, parse_errs)) = parse_results { println!("{:?}", program); println!("{:?}", parse_errs); diff --git a/assembler/src/new/parser.rs b/assembler/src/new/parser.rs index bfa0737..70d1544 100644 --- a/assembler/src/new/parser.rs +++ b/assembler/src/new/parser.rs @@ -1,37 +1,93 @@ +use std::convert::TryFrom; use chumsky::prelude::*; use chumsky::Stream; -use super::{Spanned, Token, Opcode, Reg, LiteralValue, }; +use crate::new::LeniencyLevel; +use crate::new::lexer::{LiteralValue, Opcode, Token}; +use super::Spanned; +use lc3_isa::{Reg, Word}; -type WithErrData = Spanned>>; +pub(crate) type WithErrData = Spanned>>; #[derive(Debug)] -pub(crate) struct Program { - orig: WithErrData, - instructions: Vec>, +pub struct Program { + pub(crate) orig: WithErrData, + pub(crate) instructions: Vec>, end: WithErrData, } #[derive(Debug)] -struct Instruction { - label: Option>, - opcode: WithErrData, - operands: WithErrData>>, +pub(crate) struct Instruction { + pub(crate) label: Option>, + pub(crate) opcode: WithErrData, + pub(crate) operands: WithErrData>>, } -#[derive(Debug)] -enum Operand { +#[derive(Clone, Debug)] +pub(crate) enum Operand { Register(Reg), + UnqualifiedNumberLiteral(Word), NumberLiteral(LiteralValue), StringLiteral(String), Label(String), } +impl TryFrom for Reg { + type Error = (); + + fn try_from(e: Operand) -> Result { + if let Operand::Register(r) = e { + Ok(r) + } else { + Err(()) + } + } +} + +impl TryFrom for LiteralValue { + type Error = (); + + fn try_from(e: Operand) -> Result { + if let Operand::NumberLiteral(v) = e { + Ok(v) + } else { + Err(()) + } + } +} + +impl Operand { + pub(crate) fn string(self) -> String { + if let Self::StringLiteral(s) = self { + s + } else { + panic!("Not a string literal") + } + } + + pub(crate) fn label(self) -> String { + if let Self::Label(l) = self { + l + } else { + panic!("Not a label") + } + } + + pub(crate) fn unqualified_number_value(self) -> Word { + if let Self::UnqualifiedNumberLiteral(w) = self { + w + } else { + panic!("Not an unqualified number literal") + } + } +} + fn operand() -> impl Parser, Error = Simple> { let operand = select! { - Token::Register(reg) => Operand::Register(reg), - Token::NumberLiteral(val) => Operand::NumberLiteral(val), - Token::StringLiteral(s) => Operand::StringLiteral(s), - Token::Label(s) => Operand::Label(s), + Token::Register(reg) => Operand::Register(reg), + Token::UnqualifiedNumberLiteral(val) => Operand::UnqualifiedNumberLiteral(val), + Token::NumberLiteral(val) => Operand::NumberLiteral(val), + Token::StringLiteral(s) => Operand::StringLiteral(s), + Token::Label(s) => Operand::Label(s), }; operand.map_with_span(|o, span| (o, span)) } @@ -69,7 +125,7 @@ enum OpcodeFilter { OnlyEnd, } -fn instruction(oc_filter: OpcodeFilter) -> impl Parser, Error = Simple> { +fn instruction(oc_filter: OpcodeFilter, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { let label = select! { Token::Label(s) => s } .map_with_span(|s, span| (Ok(s), span)) @@ -84,10 +140,16 @@ fn instruction(oc_filter: OpcodeFilter) -> impl Parser>> = + match leniency { + LeniencyLevel::Lenient => Box::new(just(Token::Comma).or_not().ignored()), + LeniencyLevel::Strict => Box::new(just(Token::Comma).ignored()), + }; + let operands = operand() .map(|(o, span)| (Ok(o), span)) - .separated_by::(just(Token::Comma)) + .separated_by(operand_separator) .map_with_span(|os, span| (Ok(os), span)); label @@ -111,20 +173,20 @@ fn comments_and_newlines() -> impl Parser> { .ignored() } -fn program() -> impl Parser, Error = Simple> { +fn program(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { comments_and_newlines() .ignore_then( - instruction(OpcodeFilter::OnlyOrig) + instruction(OpcodeFilter::OnlyOrig, leniency) .map(|(i, span)| (Ok(i), span))) .then( - instruction(OpcodeFilter::AnyButEnd) + instruction(OpcodeFilter::AnyButEnd, leniency) .map(|(i, span)| (Ok(i), span)) .separated_by(comments_and_newlines()) .allow_leading() .allow_trailing() ) .then( - instruction(OpcodeFilter::OnlyEnd) + instruction(OpcodeFilter::OnlyEnd, leniency) .map(|(i, span)| (Ok(i), span))) .then_ignore(comments_and_newlines()) .then_ignore(end()) @@ -135,8 +197,8 @@ fn program() -> impl Parser, Error = Simple> { type File = Vec>; -fn file() -> impl Parser>>, Error = Simple> { - program() +fn file(leniency: LeniencyLevel) -> impl Parser>>, Error = Simple> { + program(leniency) .map(|(p, span)| (Ok(p), span)) .separated_by(comments_and_newlines()) .allow_leading() @@ -144,7 +206,7 @@ fn file() -> impl Parser>>, Error = Simp .map_with_span(|programs, span| (programs, span)) } -pub(crate) fn parse(src: &str, tokens: Vec>) -> (Option>, Vec>) { +pub fn parse(src: &str, tokens: Vec>, leniency: LeniencyLevel) -> (Option>, Vec>) { let len = src.chars().count(); - file().parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) + file(leniency).parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) } diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index d19a0f6..93bd3cf 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -2,8 +2,7 @@ extern crate lc3_assembler; use lc3_assembler::lexer::Lexer; use lc3_assembler::parser::parse; -use lc3_isa::Word; -use lc3_assembler::parser::LeniencyLevel::Lenient; +use lc3_isa::{ADDR_MAX_VAL, Word}; use std::ops::Index; use lc3_isa::util::MemoryDump; @@ -266,17 +265,27 @@ mod single_instruction { } fn test(input: &str, orig: usize, expected_mem: &[Word]) { - let lexer = Lexer::new(input); - let cst = parse(lexer, Lenient); + use lc3_assembler::new::*; + + let (maybe_tokens, lex_errs) = lexer::lex(input, LeniencyLevel::Lenient); + let tokens = maybe_tokens.expect("lexing failed"); + println!("{:?}", tokens); + + let (maybe_file, parse_errs) = parser::parse(input, tokens, LeniencyLevel::Lenient); + let (mut file, span) = maybe_file.expect("parsing failed"); + assert_eq!(1, file.len(), "parsed unexpected number of programs: {}", file.len()); + let program = file.remove(0).0.expect("parse error in program"); + let object = assembler::assemble(program); + + let mem = linker::link([object]); - let mem = cst.assemble(None); for i in 0..orig { assert_mem(&mem, i, 0x0000); } for i in 0..expected_mem.len() { assert_mem(&mem, orig + i, expected_mem[i]); } - for i in (orig + expected_mem.len())..0xFFFF { + for i in (orig + expected_mem.len())..(ADDR_MAX_VAL as usize) { assert_mem(&mem, i, 0x0000); } } From b21f196a6538c7b306b4391de589e749ae7d2fcf Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 18 May 2022 01:02:38 -0500 Subject: [PATCH 28/82] assembler: remove original assembler --- assembler/bin/as.rs | 68 +-- assembler/src/analysis/extract_errors.rs | 180 ------ assembler/src/analysis/memory_placement.rs | 100 ---- assembler/src/analysis/mod.rs | 15 - assembler/src/analysis/symbol_table.rs | 118 ---- assembler/src/analysis/validate.rs | 12 - assembler/src/{new => }/assembler.rs | 8 +- assembler/src/complete.rs | 441 --------------- assembler/src/error.rs | 307 ----------- assembler/src/ir/ir1_parse_lines.rs | 91 --- assembler/src/ir/ir2_parse_line_syntax.rs | 447 --------------- assembler/src/ir/ir3_parse_objects.rs | 198 ------- .../src/ir/ir4_parse_ambiguous_tokens.rs | 520 ------------------ assembler/src/ir/ir5_expand_pseudo_ops.rs | 127 ----- assembler/src/ir/mod.rs | 20 - assembler/src/lexer.rs | 464 ++++++++++------ assembler/src/lib.rs | 33 +- assembler/src/{new => }/linker.rs | 3 +- assembler/src/new/lexer.rs | 369 ------------- assembler/src/new/mod.rs | 32 -- assembler/src/new/parser.rs | 212 ------- assembler/src/parser.rs | 240 +++++++- assembler/src/util.rs | 12 - assembler/tests/integ.rs | 6 +- 24 files changed, 558 insertions(+), 3465 deletions(-) delete mode 100644 assembler/src/analysis/extract_errors.rs delete mode 100644 assembler/src/analysis/memory_placement.rs delete mode 100644 assembler/src/analysis/mod.rs delete mode 100644 assembler/src/analysis/symbol_table.rs delete mode 100644 assembler/src/analysis/validate.rs rename assembler/src/{new => }/assembler.rs (99%) delete mode 100644 assembler/src/complete.rs delete mode 100644 assembler/src/error.rs delete mode 100644 assembler/src/ir/ir1_parse_lines.rs delete mode 100644 assembler/src/ir/ir2_parse_line_syntax.rs delete mode 100644 assembler/src/ir/ir3_parse_objects.rs delete mode 100644 assembler/src/ir/ir4_parse_ambiguous_tokens.rs delete mode 100644 assembler/src/ir/ir5_expand_pseudo_ops.rs delete mode 100644 assembler/src/ir/mod.rs rename assembler/src/{new => }/linker.rs (95%) delete mode 100644 assembler/src/new/lexer.rs delete mode 100644 assembler/src/new/mod.rs delete mode 100644 assembler/src/new/parser.rs delete mode 100644 assembler/src/util.rs diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index e2df30a..f355538 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -2,14 +2,13 @@ extern crate lc3_assembler; use std::{env, fs}; use std::path::{Path, PathBuf}; -use lc3_assembler::lexer::Lexer; use lc3_assembler::parser::parse; use lc3_shims::memory::FileBackedMemoryShim; use clap::clap_app; -use lc3_assembler::parser::LeniencyLevel::*; -use lc3_assembler::analysis::extract_errors::extract_errors; -use annotate_snippets::display_list::{DisplayList, FormatOptions}; -use annotate_snippets::snippet::{Snippet, Annotation, Slice, AnnotationType, SourceAnnotation}; +use lc3_assembler::assembler::assemble; +use lc3_assembler::LeniencyLevel; +use lc3_assembler::lexer::lex; +use lc3_assembler::linker::link; const MEM_DUMP_FILE_EXTENSION: &'static str = "mem"; @@ -36,34 +35,26 @@ fn as_() { let path = Path::new(path_str); assert!(path.is_file()); - let leniency = if matches.is_present("strict") { Strict } else { Lenient }; + let leniency = if matches.is_present("strict") { LeniencyLevel::Strict } else { LeniencyLevel::Lenient }; let string = fs::read_to_string(path).unwrap(); let src = string.as_str(); - let lexer = Lexer::new(src); - let program = parse(lexer, leniency); - let errors = extract_errors(&program); - if errors.len() > 0 { - for error in errors { - if error.should_show() { - let label_string = error.message(); - let label = label_string.as_str(); - let annotations = error.annotations(); - let slices = slices(annotations, src, Some(path_str)); - let snippet = create_snippet(label, slices); - let dl = DisplayList::from(snippet); - println!("{}", dl); - } - } - break; - } + let (maybe_tokens, lex_errs) = lex(src, leniency); + let tokens = maybe_tokens.expect("lexing failed"); + + let (maybe_file, parse_errs) = parse(src, tokens, leniency); + let (mut file, span) = maybe_file.expect("parsing failed"); + assert_eq!(1, file.len(), "parsed unexpected number of programs: {}", file.len()); + let program = file.remove(0).0.expect("parse error in program"); if matches.is_present("check") { println!("{}: No errors found.", path_str); } else { let background = if matches.is_present("with_os") { Some(lc3_os::OS_IMAGE.clone()) } else { None }; - let mem = program.assemble(background); + + let object = assemble(program); + let mem = link([object]); let mut output_path = PathBuf::from(path_str); output_path.set_extension(MEM_DUMP_FILE_EXTENSION); @@ -72,32 +63,3 @@ fn as_() { } } } - -fn create_snippet<'input>(label: &'input str, slices: Vec>) -> Snippet<'input> { - Snippet { - title: Some(Annotation { - label: Some(label), - id: None, - annotation_type: AnnotationType::Error - }), - footer: vec![], - slices, - opt: FormatOptions { color: true, anonymized_line_numbers: false } - } -} - -pub fn slices<'input>(annotations: Vec>, source: &'input str, origin: Option<&'input str>) -> Vec> { - let mut slices = Vec::new(); - if !annotations.is_empty() { - slices.push( - Slice { - source, - origin, - line_start: 1, - fold: true, - annotations, - } - ); - } - slices -} diff --git a/assembler/src/analysis/extract_errors.rs b/assembler/src/analysis/extract_errors.rs deleted file mode 100644 index 9491d36..0000000 --- a/assembler/src/analysis/extract_errors.rs +++ /dev/null @@ -1,180 +0,0 @@ -use crate::complete::{Program, Object, ObjectContent, Operation, Operands}; -use crate::error::{Error, ParseError}; -use crate::ir::ir4_parse_ambiguous_tokens; -use crate::ir::ir5_expand_pseudo_ops; - -pub fn extract_errors(program: &Program) -> Vec { - let mut errors = Vec::new(); - - let Program { objects, memory_placement_errors, .. } = program; - if objects.len() == 0 { - errors.push(ParseError::NoObjects.into()); - } - - for object in objects { - extract_object_errors_into(object, &mut errors); - } - - for memory_placement_error in memory_placement_errors { - errors.push(memory_placement_error.into()); - } - - errors -} - -fn extract_object_errors_into(object: &Object, errors: &mut Vec) { - let Object { origin_src, origin, content, symbol_table, } = object; - - extract_ir5_operation_errors(origin_src, errors); - origin.extract_error_into(errors); - extract_object_content_errors(content, errors); - - if let Err(symbol_table_errors) = symbol_table { - for symbol_table_error in symbol_table_errors { - errors.push(symbol_table_error.into()); - } - } -} - -fn extract_object_content_errors(object_content: &ObjectContent, errors: &mut Vec) { - let ObjectContent { operations, hanging_labels, invalid_lines, .. } = object_content; - - for operation in operations { - extract_operation_errors(operation, errors); - } - - for hanging_label in hanging_labels { - let range = hanging_label.span().unwrap(); - errors.push(ParseError::HangingLabel { range }.into()); - } - - for invalid_line in invalid_lines { - let range = invalid_line.span(); - errors.push(ParseError::InvalidLine { range }.into()); - } - -} - -fn extract_operation_errors(operation: &Operation, errors: &mut Vec) { - let Operation { label, operands, nzp, instruction_or_values, .. } = operation; - - if let Some(label) = label { - label.extract_error_into(errors); - } - - extract_operands_errors(operands, errors); - - if let Err(error) = nzp { - errors.push(error.into()); - } - - if let Err(inst_error) = instruction_or_values { - errors.push(inst_error.into()); - } -} - -fn extract_ir5_operation_errors(operation: &ir5_expand_pseudo_ops::Operation, errors: &mut Vec) { - let ir5_expand_pseudo_ops::Operation { label, operands, nzp, expanded, .. } = operation; - - if let Some(label) = label { - label.extract_error_into(errors); - } - - extract_operands_errors(operands, errors); - - if let Err(error) = nzp { - errors.push(error.into()); - } - - if let Some(Err(parse_error)) = expanded { - errors.push(parse_error.into()); - } - -} - -fn extract_operands_errors(operands: &Operands, errors: &mut Vec) { - use ir4_parse_ambiguous_tokens::Operands::*; - - match operands { - Add { dr, sr1, sr2_or_imm5 } => { - dr.extract_error_into(errors); - sr1.extract_error_into(errors); - sr2_or_imm5.extract_error_into(errors); - }, - And { dr, sr1, sr2_or_imm5 } => { - dr.extract_error_into(errors); - sr1.extract_error_into(errors); - sr2_or_imm5.extract_error_into(errors); - }, - Br { pc_offset9 } => { - pc_offset9.extract_error_into(errors); - }, - Jmp { base } => { - base.extract_error_into(errors); - }, - Jsr { pc_offset11 } => { - pc_offset11.extract_error_into(errors); - }, - Jsrr { base } => { - base.extract_error_into(errors); - }, - Ld { dr, pc_offset9 } => { - dr.extract_error_into(errors); - pc_offset9.extract_error_into(errors); - }, - Ldi { dr, pc_offset9 } => { - dr.extract_error_into(errors); - pc_offset9.extract_error_into(errors); - }, - Ldr { dr, base, offset6 } => { - dr.extract_error_into(errors); - base.extract_error_into(errors); - offset6.extract_error_into(errors); - }, - Lea { dr, pc_offset9 } => { - dr.extract_error_into(errors); - pc_offset9.extract_error_into(errors); - }, - Not { dr, sr } => { - dr.extract_error_into(errors); - sr.extract_error_into(errors); - }, - St { sr, pc_offset9 } => { - sr.extract_error_into(errors); - pc_offset9.extract_error_into(errors); - } - Sti { sr, pc_offset9 } => { - sr.extract_error_into(errors); - pc_offset9.extract_error_into(errors); - } - Str { sr, base, offset6 } => { - sr.extract_error_into(errors); - base.extract_error_into(errors); - offset6.extract_error_into(errors); - } - Trap { trap_vec } => { - trap_vec.extract_error_into(errors); - } - Orig { origin } => { - origin.extract_error_into(errors); - } - Fill { value } => { - value.extract_error_into(errors); - } - Blkw { size, .. } => { - size.extract_error_into(errors); - } - Stringz { .. } => {} - - // Putting these in instead of _ to avoid forgetting to change - Ret - | Rti - | Getc - | Out - | Puts - | In - | Putsp - | Halt - | End => {} - }; -} diff --git a/assembler/src/analysis/memory_placement.rs b/assembler/src/analysis/memory_placement.rs deleted file mode 100644 index 2ed5efd..0000000 --- a/assembler/src/analysis/memory_placement.rs +++ /dev/null @@ -1,100 +0,0 @@ -use itertools::Itertools; -use lc3_isa::Addr; - -use crate::ir::ir5_expand_pseudo_ops; -use crate::error::ParseError; -use crate::ir::ir4_parse_ambiguous_tokens::Checked; -use annotate_snippets::snippet::SourceAnnotation; - -#[derive(Debug, Clone)] -pub enum MemoryPlacementError { - InvalidOrigin { - parse_error: ParseError, - }, - UnknownPseudoOpLength { - parse_error: ParseError, - }, - ObjectsOverlap -} - -impl MemoryPlacementError { - - pub fn message(&self) -> String { - use MemoryPlacementError::*; - match self { - InvalidOrigin { .. } => "could not validate memory placement due to error parsing .ORIG", - UnknownPseudoOpLength { .. } => "could not validate memory placement due to error parsing pseudo-op", - ObjectsOverlap => "two objects (.ORIG/.END blocks) would occupy same memory locations", - }.to_string() - } - - pub fn annotations(&self) -> Vec { - vec![] - } - - pub fn should_show(&self) -> bool { - use MemoryPlacementError::*; - - match self { - InvalidOrigin { .. } - | UnknownPseudoOpLength { .. } => false, - ObjectsOverlap => true, - } - } - -} - - -pub fn validate_placement(objects: &Vec) -> Vec { - let starts_and_ends = objects.iter() - .map(get_start_and_end) - .collect::>(); - let mut errors = Vec::new(); - for start_and_end in &starts_and_ends { - if let Err(error) = start_and_end { - errors.push(error.clone()); - } - } - if !errors.is_empty() { - return errors; - } - let start_end_pairs = starts_and_ends.into_iter() - .map(|start_and_end| start_and_end.unwrap()) - .sorted_by_key(|(start, end)| *start) - .tuple_windows(); - for ((_, prev_end), (next_start, _)) in start_end_pairs { - if prev_end > next_start { - errors.push(MemoryPlacementError::ObjectsOverlap); - } - } - errors -} - -/// Returns the first memory location the object occupies and the first memory location after the object. -/// The object occupies all locations between the 'start' inclusive and 'end' exclusive. -fn get_start_and_end(object: &ir5_expand_pseudo_ops::Object) -> Result<(Addr, Addr), MemoryPlacementError> { - match &object.origin.value { - Err(error) => { - Err(MemoryPlacementError::InvalidOrigin { - parse_error: error.clone() - }) - }, - Ok(origin) => { - let start = *origin; - let mut end = start; - for operation in &object.content.operations { - match operation.num_memory_locations_occupied() { - Ok(num_locations) => { - end += num_locations as Addr; - }, - Err(error) => { - return Err(MemoryPlacementError::UnknownPseudoOpLength { - parse_error: error.clone() - }); - } - } - } - Ok((start, end)) - }, - } -} diff --git a/assembler/src/analysis/mod.rs b/assembler/src/analysis/mod.rs deleted file mode 100644 index fb73117..0000000 --- a/assembler/src/analysis/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -/// These modules provide functions that analyze fully-parsed syntax trees -/// and related data structures. - -/// Analyzes whether objects can/will be placed in valid memory locations without overlap. -pub mod memory_placement; - -/// Creates a structure to store the locations of labels. -/// Used for later computing offsets from label operands. -pub mod symbol_table; - -/// Extracts all errors from the parsed structs. -pub mod extract_errors; - -/// Determines whether or not a parsed struct can be successfully assembled. -pub mod validate; \ No newline at end of file diff --git a/assembler/src/analysis/symbol_table.rs b/assembler/src/analysis/symbol_table.rs deleted file mode 100644 index 4eb0caf..0000000 --- a/assembler/src/analysis/symbol_table.rs +++ /dev/null @@ -1,118 +0,0 @@ -use std::collections::HashMap; -use lc3_isa::Addr; -use crate::ir::ir5_expand_pseudo_ops; -use crate::lexer::Span; -use crate::error::ParseError; -use annotate_snippets::snippet::{AnnotationType, SourceAnnotation}; - -pub type SymbolTable<'input> = HashMap<&'input str, Addr>; - -#[derive(Debug, Clone)] -pub enum SymbolTableError { - InvalidOrigin { - parse_error: ParseError, - }, - UnknownPseudoOpLength { - parse_error: ParseError, - }, - DuplicateLabel { - ranges: (Span, Span), - label_text: String - } -} - -impl SymbolTableError { - - pub fn message(&self) -> String { - use SymbolTableError::*; - match self { - InvalidOrigin { .. } => format!("could not validate memory placement due to error parsing .ORIG"), - UnknownPseudoOpLength { .. } => format!("could not validate memory placement due to error parsing pseudo-op"), - DuplicateLabel { label_text, .. } => format!("duplicate label {}", label_text), - } - } - - pub fn annotations(&self) -> Vec { - use SymbolTableError::*; - let mut annotations = Vec::new(); - - macro_rules! push_annotation { - ($range:expr, $label:expr) => { - annotations.push( - SourceAnnotation { - range: $range.clone(), - label: $label, - annotation_type: AnnotationType::Error, - } - ); - } - } - - match self { - InvalidOrigin { .. } - | UnknownPseudoOpLength { .. } => {}, - DuplicateLabel { ranges: (range1, range2), .. } => { - push_annotation!(range1, "first instance here"); - push_annotation!(range2, "second instance here"); - }, - } - annotations - } - - pub fn should_show(&self) -> bool { - use SymbolTableError::*; - - match self { - InvalidOrigin { .. } - | UnknownPseudoOpLength { .. } => false, - DuplicateLabel { .. } => true, - } - } - -} - - -pub fn build_symbol_table<'input>(object: &ir5_expand_pseudo_ops::Object<'input>) -> Result, Vec> { - let mut symbol_table = HashMap::new(); - let mut errors = Vec::new(); - match &object.origin.value { - Err(parse_error) => { - errors.push(SymbolTableError::InvalidOrigin { parse_error: parse_error.clone() }); - }, - Ok(origin) => { - let mut current_location = *origin; - for operation in object.content.operations.iter() { - if let Some(label) = &operation.label { - let span = label.src.span; - if let Ok(label_text) = label.value { - let other_value = symbol_table.insert(label_text, (current_location, span)); - if let Some((other_location, other_span)) = other_value { - errors.push(SymbolTableError::DuplicateLabel { // TODO: handle triplicate+ labels in one error - ranges: (other_span, span), - label_text: label_text.to_string() - }); - } - } - } - match operation.num_memory_locations_occupied() { - Ok(num_locations) => { - current_location += num_locations as Addr; - }, - Err(error) => { - errors.push(SymbolTableError::UnknownPseudoOpLength { - parse_error: error.clone() - }); - } - }; - }; - } - }; - if errors.is_empty() { - let symbol_table = symbol_table.iter() - .map(|(label, (addr, span))| (*label, *addr)) - .collect(); - Ok(symbol_table) - } else { - Err(errors) - } -} diff --git a/assembler/src/analysis/validate.rs b/assembler/src/analysis/validate.rs deleted file mode 100644 index 8aa2839..0000000 --- a/assembler/src/analysis/validate.rs +++ /dev/null @@ -1,12 +0,0 @@ -use crate::error::Error; -use crate::complete::Program; -use crate::analysis::extract_errors::extract_errors; - -pub fn validate(program: &Program) -> Result<(), Vec> { - let errors = extract_errors(program); - if errors.is_empty() { - Ok(()) - } else { - Err(errors) - } -} \ No newline at end of file diff --git a/assembler/src/new/assembler.rs b/assembler/src/assembler.rs similarity index 99% rename from assembler/src/new/assembler.rs rename to assembler/src/assembler.rs index efaa15f..28f6cdb 100644 --- a/assembler/src/new/assembler.rs +++ b/assembler/src/assembler.rs @@ -3,10 +3,10 @@ use std::convert::{TryFrom, TryInto}; use std::fmt::Debug; use std::num::{ParseIntError, TryFromIntError}; use lc3_isa::{Addr, Reg, SignedWord, Word}; -use crate::new::lexer::{ConditionCodes, LiteralValue, Opcode}; -use crate::new::parser::Operand; -use super::parser; -use super::parser::{Program, WithErrData}; +use crate::lexer::{ConditionCodes, LiteralValue, Opcode}; +use crate::parser::Operand; +use crate::parser; +use crate::parser::{Program, WithErrData}; pub(crate) type SymbolTable = HashMap; diff --git a/assembler/src/complete.rs b/assembler/src/complete.rs deleted file mode 100644 index 8e4e973..0000000 --- a/assembler/src/complete.rs +++ /dev/null @@ -1,441 +0,0 @@ -use lc3_isa::{Addr, Word, Instruction, SignedWord}; -use crate::lexer::{Token, Span}; -use crate::analysis::symbol_table; -use crate::analysis::symbol_table::{SymbolTableError, build_symbol_table}; -use crate::error::{ParseError, Error}; -use crate::ir::ir4_parse_ambiguous_tokens::{UnsignedImmOrLabel, ImmOrLabel, Sr2OrImm5, Checked}; -use crate::ir::{ir2_parse_line_syntax, ir4_parse_ambiguous_tokens, ir5_expand_pseudo_ops,}; -use std::collections::HashMap; -use crate::analysis::memory_placement::{MemoryPlacementError, validate_placement}; -use lc3_isa::util::MemoryDump; -use annotate_snippets::snippet::{AnnotationType, SourceAnnotation}; -use crate::analysis::validate::validate; - -/// `complete` will store as much data as possible -/// relating to the source *and* what it will be assembled to. -/// This will allow querying for the source assembled to a memory location, -/// the addresses corresponding to labels, and whatever is required in the future -/// to provide a nice development environment. - -pub type Label<'input> = ir5_expand_pseudo_ops::Label<'input>; -pub type Immediate<'input, Addr> = ir5_expand_pseudo_ops::Immediate<'input, Addr>; -pub type SymbolTable<'input> = Result, Vec>; - -#[derive(Debug)] -pub struct Program<'input> { - pub objects: Vec>, - pub memory_placement_errors: Vec, - pub ignored: Vec>, -} - -impl<'input> Program<'input> { - - pub fn assemble(&self, background: Option) -> MemoryDump { - let mut memory = if let Some(bg) = background { - MemoryDump::from(bg) - } else { - MemoryDump::blank() - }; - for object in &self.objects { - let mut i = *object.origin.value.as_ref().unwrap() as usize; - for operation in &object.content.operations { - match operation.instruction_or_values.as_ref().unwrap() { - InstructionOrValues::Instruction(_, word) => { - memory[i] = *word; - i += 1; - }, - InstructionOrValues::Values(values) => { - for value in values { - memory[i] = *value; - i += 1; - } - }, - } - } - } - memory - } - - pub fn assemble_safe(&self, background: Option) -> Result> { - validate(self) - .map(|_| self.assemble(background)) - } - -} - -impl<'input> Program<'input> { - pub fn get_source(&self, address: Addr) -> Option> { - for object in &self.objects { - if let Some(source) = object.get_source(address) { - return Some(source); - } - } - None - } - - pub fn get_label_addr(&self, label: &str) -> Option { - for object in &self.objects { - if let Some(addr) = object.get_label_addr(label) { - return Some(addr); - } - } - None - } -} - -#[derive(Debug)] -pub struct Object<'input> { - pub origin_src: ir5_expand_pseudo_ops::Operation<'input>, - pub origin: Immediate<'input, Addr>, - pub content: ObjectContent<'input>, - pub symbol_table: SymbolTable<'input>, -} - -impl<'input> Object<'input> { - pub fn get_source(&self, address: Addr) -> Option> { - match &self.origin.value { - Err(err) => None, - Ok(origin) => { - if address < *origin { - return None; - } - let mut i = *origin as usize; - for operation in &self.content.operations { - if i as Addr == address { - return Some(operation.src_lines.clone()); - } - match operation.num_memory_locations_occupied() { - Err(_) => { return None; }, - Ok(len) => { i += len }, - } - } - None - } - } - } - - pub fn get_label_addr(&self, label_to_find: &str) -> Option { - match &self.origin.value { - Err(err) => None, - Ok(origin) => { - let mut i = *origin as usize; - for operation in &self.content.operations { - if let Some(Checked { value: Ok(label), .. }) = &operation.label { - if *label == label_to_find { - return Some(i as Addr); - } - } - match operation.num_memory_locations_occupied() { - Err(_) => { return None; }, - Ok(len) => { i += len }, - } - } - None - } - } - } -} - -#[derive(Debug)] -pub struct ObjectContent<'input> { - pub operations: Vec>, - pub empty_lines: Vec>, - pub hanging_labels: Vec>, - pub invalid_lines: Vec>, -} - -#[derive(Debug)] -pub struct Operation<'input> { - pub label: Option>, - pub operator: Token<'input>, - pub nzp: Result, ParseError>, - pub operands: Operands<'input>, - - pub src_lines: Vec, - pub separators: Vec>, - pub whitespace: Vec>, - pub comments: Vec>, - pub newlines: Vec>, - - pub instruction_or_values: Result, -} - -#[derive(Debug)] -pub enum InstructionOrValues { - Instruction(Instruction, Word), - Values(Vec), -} - -impl<'input> Operation<'input> { - - pub fn num_memory_locations_occupied(&self) -> Result { - use InstructionOrValues::*; - match &self.instruction_or_values { - Ok(Instruction(_, _)) => Ok(1), - Ok(Values(values)) => Ok(values.len()), - Err(error) => Err(error.clone()) - } - } - -} - -pub type Operands<'input> = ir5_expand_pseudo_ops::Operands<'input>; -pub type ConditionCodes = ir5_expand_pseudo_ops::ConditionCodes; -pub type Separator<'input> = ir5_expand_pseudo_ops::Separator<'input>; - -#[derive(Debug, Clone)] -pub enum ConstructInstructionError { - EarlierParseError { - error: ParseError, - }, - SymbolTableInvalid { - errors: Vec, - }, - InvalidLabel { - span: Span, - label: String, - }, -} - -impl ConstructInstructionError { - - pub fn message(&self) -> String { - use ConstructInstructionError::*; - match self { - EarlierParseError { .. } => format!("failed to construct instruction due to previous error parsing"), - SymbolTableInvalid { .. } => format!("failed to construct instruction due to previous errors constructing symbol table"), - InvalidLabel { label, .. } => format!("instruction references invalid label {}", label), - } - } - - pub fn annotations(&self) -> Vec { - use ConstructInstructionError::*; - - let mut annotations = Vec::new(); - - macro_rules! push_annotation { - ($range:expr, $label:expr) => { - annotations.push( - SourceAnnotation { - range: $range.clone(), - label: $label, - annotation_type: AnnotationType::Error, - } - ); - } - } - match self { - EarlierParseError { .. } - | SymbolTableInvalid { .. } => {}, - InvalidLabel { span, .. } => { push_annotation!(span, "invalid label here") }, - } - annotations - } - - pub fn should_show(&self) -> bool { - use ConstructInstructionError::*; - match self { - EarlierParseError { .. } - | SymbolTableInvalid { .. } => false, - InvalidLabel { .. } => true, - } - } -} - -pub fn construct_all_instructions(file: ir5_expand_pseudo_ops::File) -> Program { - let ir5_expand_pseudo_ops::File { objects, ignored } = file; - let memory_placement_errors = validate_placement(&objects); - let objects = objects.into_iter() - .map(construct_instructions) - .collect(); - Program { objects, memory_placement_errors, ignored } -} - -pub fn construct_instructions(object: ir5_expand_pseudo_ops::Object) -> Object { - let symbol_table = build_symbol_table(&object); - let ir5_expand_pseudo_ops::Object { origin_src, origin, content } = object; - let content = construct_object_content_instructions(content, &origin, &symbol_table); - Object { origin_src, origin, content, symbol_table } -} - -fn construct_object_content_instructions<'input>(content: ir5_expand_pseudo_ops::ObjectContent<'input>, origin: &Immediate<'input, Addr>, symbol_table: &SymbolTable<'input>) -> ObjectContent<'input> { - let ir5_expand_pseudo_ops::ObjectContent { operations, empty_lines, hanging_labels, invalid_lines } = content; - let operations = construct_operations_instructions(operations, origin, symbol_table); - ObjectContent { operations, empty_lines, hanging_labels, invalid_lines, } -} - -fn construct_operations_instructions<'input, O>(operations: O, origin: &Immediate<'input, Addr>, symbol_table: &SymbolTable<'input>) -> Vec> - where O: IntoIterator> -{ - use ConstructInstructionError::*; - - let mut current_location = match &origin.value { - Err(error) => Err(error.into()), - Ok(origin) => Ok(*origin), - }; - let mut new_operations = Vec::new(); - for operation in operations { - let increment = operation.num_memory_locations_occupied(); - let new_operation = construct_instruction_for_operation(operation, ¤t_location, symbol_table); - new_operations.push(new_operation); - current_location = match (current_location, increment) { - (Ok(location), Ok(amount)) => Ok(location + amount as u16), - (Ok(_), Err(error)) => Err(error.into()), - (error, _) => error, - }; - } - - new_operations -} - - -fn construct_instruction_for_operation<'input>( - operation: ir5_expand_pseudo_ops::Operation<'input>, - location: &Result, - symbol_table: &SymbolTable<'input> -) -> Operation<'input> { - let ir5_expand_pseudo_ops::Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines, expanded } = operation; - let instruction_or_values = construct_instruction_or_values(location, symbol_table, &nzp, operands.clone(), &expanded); - Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines, instruction_or_values, } -} - -impl From for ConstructInstructionError { - fn from(error: ParseError) -> Self { - ConstructInstructionError::EarlierParseError { - error - } - } -} -impl From<&ParseError> for ConstructInstructionError { - fn from(error: &ParseError) -> Self { - ConstructInstructionError::EarlierParseError { - error: error.clone() - } - } -} - -fn construct_instruction_or_values( - location: &Result, - symbol_table: &SymbolTable, - nzp: &Result, ParseError>, - operands: ir5_expand_pseudo_ops::Operands, - expanded: &Option, ParseError>>, -) -> Result { - use ConstructInstructionError::*; - - let location = location.clone()?; - match operands { - Operands::Fill { value } => { - let value = value.value?; // TODO: lol - let value = match value { - UnsignedImmOrLabel::Imm(immediate) => { - let immediate = immediate.value?; - Ok(immediate) - }, - UnsignedImmOrLabel::Label(label) => { - let label_value = &label.value?; - if let Err(errors) = symbol_table { - return Err(ConstructInstructionError::SymbolTableInvalid { errors: errors.clone() }) - } - let symbol_table = symbol_table.as_ref().unwrap(); - match symbol_table.get(label_value) { - None => { - Err(InvalidLabel { - span: label.src.span, - label: label_value.to_string(), - }) - }, - Some(addr) => Ok(*addr), - } - }, - }; - value.map(|value| InstructionOrValues::Values(vec![value])) - }, - Operands::Add { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.value? { - Sr2OrImm5::Imm5(immediate) => { Ok(Instruction::new_add_imm(dr.value?, sr1.value?, immediate.value?,).into()) }, - Sr2OrImm5::Sr2(src_reg) => { Ok(Instruction::new_add_reg(dr.value?, sr1.value?, src_reg.value?,).into()) } - }, - Operands::And { dr, sr1, sr2_or_imm5, } => match sr2_or_imm5.value? { - Sr2OrImm5::Imm5(immediate) => { Ok(Instruction::new_and_imm(dr.value?, sr1.value?, immediate.value?,).into()) }, - Sr2OrImm5::Sr2(src_reg) => { Ok(Instruction::new_and_reg(dr.value?, sr1.value?, src_reg.value?,).into()) } - }, - Operands::Ld { dr, pc_offset9 } => Ok(Instruction::new_ld(dr.value?, compute_offset(pc_offset9, location, symbol_table)?).into()), - Operands::Ldi { dr, pc_offset9 } => Ok(Instruction::new_ldi(dr.value?, compute_offset(pc_offset9, location, symbol_table)?).into()), - Operands::Ldr { dr, base, offset6 } => Ok(Instruction::new_ldr(dr.value?, base.value?, offset6.value?).into()), - Operands::Lea { dr, pc_offset9 } => Ok(Instruction::new_lea(dr.value?, compute_offset(pc_offset9, location, symbol_table)?).into()), - - Operands::St { sr, pc_offset9 } => Ok(Instruction::new_st(sr.value?, compute_offset(pc_offset9, location, symbol_table)?).into()), - Operands::Sti { sr, pc_offset9 } => Ok(Instruction::new_sti(sr.value?, compute_offset(pc_offset9, location, symbol_table)?).into()), - Operands::Str { sr, base, offset6 } => Ok(Instruction::new_str(sr.value?, base.value?, offset6.value?).into()), - - Operands::Not { dr, sr } => Ok(Instruction::new_not(dr.value?, sr.value?).into()), - - Operands::Br { pc_offset9, .. } => { - let nzp = nzp.clone()?.unwrap(); - Ok(Instruction::new_br( - nzp.n, nzp.z, nzp.p, - compute_offset(pc_offset9, location, symbol_table)? - ).into()) - } - - Operands::Jmp { base } => Ok(Instruction::new_jmp(base.value?).into()), - Operands::Jsr { pc_offset11 } => Ok(Instruction::new_jsr(compute_offset(pc_offset11, location, symbol_table)?).into()), - Operands::Jsrr { base } => Ok(Instruction::new_jsrr(base.value?).into()), - - Operands::Ret => Ok(Instruction::new_ret().into()), - Operands::Rti => Ok(Instruction::new_rti().into()), - - Operands::Trap { trap_vec } => Ok(Instruction::new_trap(trap_vec.value?).into()), - Operands::Getc => Ok(Instruction::new_trap(0x20).into()), - Operands::Out => Ok(Instruction::new_trap(0x21).into()), - Operands::Puts => Ok(Instruction::new_trap(0x22).into()), - Operands::In => Ok(Instruction::new_trap(0x23).into()), - Operands::Putsp => Ok(Instruction::new_trap(0x24).into()), - Operands::Halt => Ok(Instruction::new_trap(0x25).into()), - - Operands::Stringz { .. } - | Operands::Blkw { .. } - | Operands::End { .. } => { - let expanded_inner = expanded.as_ref().unwrap(); - let values = expanded_inner.as_ref()?; - Ok(InstructionOrValues::Values(values.clone())) - } - - Operands::Orig { .. } => { unreachable!("Unexpected attempt to assemble a .ORIG.") } - } -} - -impl From for InstructionOrValues { - fn from(inst: Instruction) -> Self { - InstructionOrValues::Instruction(inst, inst.into()) - } -} - -fn compute_offset(pc_offset: Checked, location: Addr, symbol_table: &SymbolTable) -> Result { - use ImmOrLabel::*; - - let pc_offset = pc_offset.value?; - if let Err(errors) = symbol_table { - return Err(ConstructInstructionError::SymbolTableInvalid { errors: errors.clone() }) - } - let symbol_table = symbol_table.as_ref().unwrap(); - match pc_offset { - Imm(immediate) => Ok(immediate.value?), - Label(label) => { - let label_value = label.value?; - match symbol_table.get(label_value) { - None => { - Err(ConstructInstructionError::InvalidLabel { - span: label.src.span, - label: label_value.to_string(), - }) - }, - Some(addr) => { - let label_location = *addr as i64; - let offset_base = (location + 1) as i64; - Ok((label_location - offset_base) as SignedWord) - }, - } - }, - } -} diff --git a/assembler/src/error.rs b/assembler/src/error.rs deleted file mode 100644 index 072e025..0000000 --- a/assembler/src/error.rs +++ /dev/null @@ -1,307 +0,0 @@ -use std::fmt::{Display, Formatter, Result}; -use crate::lexer::Span; -use annotate_snippets::snippet::{Snippet, Annotation, Slice, SourceAnnotation, AnnotationType}; - -use ParseError::*; -use itertools::Itertools; -use crate::ir::ir4_parse_ambiguous_tokens; -use crate::ir::ir4_parse_ambiguous_tokens::{Object, ObjectContent, Operation, Operands}; -use lc3_isa::SignedWord; -use crate::ir::ir2_parse_line_syntax::LineContent::Invalid; -use annotate_snippets::display_list::FormatOptions; -use crate::analysis::memory_placement::MemoryPlacementError; -use crate::analysis::symbol_table::SymbolTableError; -use crate::complete::ConstructInstructionError; - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum LexError { - Unknown, -} - -impl LexError { - - pub fn message(&self) -> String { - match self { - LexError::Unknown => "encountered unknown token when lexing", - }.to_string() - } - - pub fn annotations(&self) -> Vec { - match self { - LexError::Unknown => vec![], - } - } - - pub fn should_show(&self) -> bool { - match self { - LexError::Unknown => true, - } - } -} - - -#[derive(Debug, Clone)] -pub enum ParseError { - NoObjects, - InvalidReg { - range: Span, - reason: InvalidRegReason - }, - InvalidLabel { - range: Span, - reasons: Vec, - }, - InvalidImmediate { - range: Span, - reason: InvalidImmediateReason - }, - HangingLabel { - range: Span, - }, - InvalidLine { - range: Option, - }, - InvalidRegOrImm5 { - range: Span, - invalid_reg_reason: InvalidRegReason, - invalid_imm5_reason: InvalidImmediateReason, - }, - InvalidLabelOrImmediate { - range: Span, - invalid_label_reasons: Vec, - invalid_immediate_reason: InvalidImmediateReason, - }, - Misc(String), -} - -pub enum Error { - Lex(LexError), - Parse(ParseError), - MemoryPlacement(MemoryPlacementError), - SymbolTable(SymbolTableError), - ConstructInstruction(ConstructInstructionError), -} - -impl Error { - pub fn message(&self) -> String { - use Error::*; - match self { - Lex(error) => error.message(), - Parse(error) => error.message(), - MemoryPlacement(error) => error.message(), - SymbolTable(error) => error.message(), - ConstructInstruction(error) => error.message(), - } - } - - pub fn annotations(&self) -> Vec { - use Error::*; - match self { - Lex(error) => error.annotations(), - Parse(error) => error.annotations(), - MemoryPlacement(error) => error.annotations(), - SymbolTable(error) => error.annotations(), - ConstructInstruction(error) => error.annotations(), - } - } - - pub fn should_show(&self) -> bool { - use Error::*; - match self { - Lex(error) => error.should_show(), - Parse(error) => error.should_show(), - MemoryPlacement(error) => error.should_show(), - SymbolTable(error) => error.should_show(), - ConstructInstruction(error) => error.should_show(), - } - } - -} - -// TODO: write macro for these From impls -impl From for Error { - fn from(error: LexError) -> Self { - Error::Lex(error) - } -} - -impl From<&LexError> for Error { - fn from(error: &LexError) -> Self { - Error::Lex(error.clone()) - } -} - -impl From for Error { - fn from(error: ParseError) -> Self { - Error::Parse(error) - } -} - -impl From<&ParseError> for Error { - fn from(error: &ParseError) -> Self { - Error::Parse(error.clone()) - } -} - -impl From for Error { - fn from(error: MemoryPlacementError) -> Self { - Error::MemoryPlacement(error) - } -} - -impl From<&MemoryPlacementError> for Error { - fn from(error: &MemoryPlacementError) -> Self { - Error::MemoryPlacement(error.clone()) - } -} - -impl From for Error { - fn from(error: SymbolTableError) -> Self { - Error::SymbolTable(error) - } -} - -impl From<&SymbolTableError> for Error { - fn from(error: &SymbolTableError) -> Self { - Error::SymbolTable(error.clone()) - } -} - -impl From for Error { - fn from(error: ConstructInstructionError) -> Self { - Error::ConstructInstruction(error) - } -} - -impl From<&ConstructInstructionError> for Error { - fn from(error: &ConstructInstructionError) -> Self { - Error::ConstructInstruction(error.clone()) - } -} - -#[derive(Debug, Clone)] -pub enum InvalidRegReason { - FirstChar, - Number, -} - -impl Display for InvalidRegReason { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - use InvalidRegReason::*; - match self { - FirstChar => { write!(f, "didn't start with R") } - Number => { write!(f, "didn't follow R with only 0-7") } - } - } -} - -#[derive(Debug, Clone)] -pub enum InvalidImmediateReason { - NoChars, - RadixChar { actual: String }, - NoNumber, - Number { actual: String }, - OutOfRange { value: SignedWord, num_bits: u32 }, -} - -impl Display for InvalidImmediateReason { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - use InvalidImmediateReason::*; - match self { - NoChars => { write!(f, "didn't have any characters") } - NoNumber => { write!(f, "didn't follow radix sign with number") } - RadixChar { actual } => { write!(f, "didn't use valid radix sign (was: {})", actual) } - Number { actual } => { write!(f, "couldn't parse number (was: {})", actual) } - OutOfRange { value, num_bits } => { write!(f, "value {} can't be represented in {} bits", value, num_bits)} - } - } -} - -#[derive(Debug, Clone)] -pub enum InvalidLabelReason { - Length { actual: usize }, - FirstChar { actual: Option }, - OtherChars { actual: String }, -} - -impl Display for InvalidLabelReason { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - use InvalidLabelReason::*; - match self { - Length { actual } => { write!(f, "not between 1-20 chars (was: {})", actual) } - FirstChar { actual } => { write!(f, "first char not alphabetic (was: {:?})", actual) }, - OtherChars { actual } => { write!(f, "other chars not alphanumeric or underscores (bad chars: {})", actual) }, - } - } -} - -impl ParseError { - pub fn message(&self) -> String { - match self { - InvalidLabel { reasons, .. } => { - format!("invalid label, reasons -- {}", reasons.iter().map(InvalidLabelReason::to_string).join(", ")) - }, - InvalidReg { reason, .. } => { - format!("invalid register, {}", reason) - } - Misc(message) => message.clone(), - HangingLabel { .. } => { format!("hanging label") } - InvalidLine { .. } => { format!("invalid line") } - InvalidImmediate { reason, .. } => { format!("invalid immediate, {}", reason) } - InvalidRegOrImm5 { invalid_reg_reason, invalid_imm5_reason, .. } => { - format!("invalid register or 5-bit immediate,\n\ - invalid as register because: {}\n\ - invalid as immediate because: {}", - invalid_reg_reason, invalid_imm5_reason) - } - InvalidLabelOrImmediate { invalid_label_reasons, invalid_immediate_reason, .. } => { - format!("invalid label or immediate,\n\ - invalid as label because: {}\n\ - invalid as immediate because: {}", - invalid_label_reasons.iter().map(InvalidLabelReason::to_string).join(", "), - invalid_immediate_reason) - } - NoObjects => { - format!("no objects (.ORIG/.END blocks) found in file") - } - } - } - - pub fn annotations(&self) -> Vec { - let mut annotations = Vec::new(); - - macro_rules! push_annotation { - ($range:expr, $label:expr) => { - annotations.push( - SourceAnnotation { - range: $range.clone(), - label: $label, - annotation_type: AnnotationType::Error, - } - ); - } - } - match self { - InvalidLabel { range, .. } => { push_annotation!(range, "invalid label here"); }, - InvalidReg { range, .. } => { push_annotation!(range, "invalid reg here"); }, - HangingLabel { range } => { push_annotation!(range, "hanging label here"); }, - InvalidLine { range } => { - if let Some(range) = range { - push_annotation!(range, "invalid line here"); - } - } - InvalidImmediate { range, .. } => { push_annotation!(range, "invalid immediate here"); } - InvalidRegOrImm5 { range, .. } => { push_annotation!(range, "invalid register or immediate here"); } - InvalidLabelOrImmediate { range, .. } => { push_annotation!(range, "invalid label or immediate here"); } - NoObjects - | Misc(_) => {}, - } - annotations - } - - pub fn should_show(&self) -> bool { - true - } - -} - diff --git a/assembler/src/ir/ir1_parse_lines.rs b/assembler/src/ir/ir1_parse_lines.rs deleted file mode 100644 index 28cf072..0000000 --- a/assembler/src/ir/ir1_parse_lines.rs +++ /dev/null @@ -1,91 +0,0 @@ -use crate::lexer::{Token, Lexer, TokenType}; -use crate::util::reconstruct_src; -use std::iter::Peekable; -use itertools::Itertools; - -pub type Lines<'input> = Vec>; - -pub struct Line<'input> { - pub src: String, - pub content: Vec>, - pub comment: Option>, - pub newline: Option>, -} - -pub fn parse_lines(lexer: Lexer) -> Lines { - let mut tokens = lexer.peekable(); - let mut simple_lines = Vec::new(); - while tokens.peek().is_some() { - let simple_line = parse_simple_line(&mut tokens); - simple_lines.push(simple_line); - } - simple_lines -} - -fn parse_simple_line<'input>(tokens: &mut Peekable>) -> Line<'input> { - let content = tokens.peeking_take_while(|&Token { ty, .. }| - ty != TokenType::Comment && ty != TokenType::Newline) - .collect::>(); - let next = tokens.next(); - let (comment, newline) = match next { - Some(Token { ty, .. }) => match ty { - TokenType::Comment => { - let newline = tokens.next(); - if let Some(Token { ty, .. }) = newline { - assert_eq!(ty, TokenType::Newline); - } - (next, newline) - } - TokenType::Newline => (None, next), - _ => unreachable!("Found more non-comment, non-newline content after skipping to comment or newline."), - } - None => (None, None), - }; - - let mut all_tokens = vec![]; - all_tokens.extend(content.clone()); - if let Some(token) = comment { - all_tokens.push(token); - } - if let Some(token) = newline { - all_tokens.push(token); - } - let src = reconstruct_src(all_tokens); - - Line { src, content, comment, newline } -} - -#[cfg(test)] -mod tests { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn no_newline() { - let lexer = Lexer::new("ADD"); - let simple_lines = parse_lines(lexer); - let Line { src, content, comment, newline } = simple_lines.get(0).unwrap(); - assert_eq!(*src, "ADD".to_string()); - assert_eq!(content.len(), 1); - assert!(comment.is_none()); - assert!(newline.is_none()); - } - - #[test] - fn two_lines() { - let lexer = Lexer::new("ADD ; test\n.END"); - let simple_lines = parse_lines(lexer); - let Line { src, content, comment, newline } = simple_lines.get(0).unwrap(); - assert_eq!(*src, "ADD ; test\n".to_string()); - assert_eq!(content.len(), 2); - assert!(comment.is_some()); - assert!(newline.is_some()); - - let Line { src, content, comment, newline } = simple_lines.get(1).unwrap(); - assert_eq!(*src, ".END".to_string()); - assert_eq!(content.len(), 1); - assert!(comment.is_none()); - assert!(newline.is_none()); - } -} - diff --git a/assembler/src/ir/ir2_parse_line_syntax.rs b/assembler/src/ir/ir2_parse_line_syntax.rs deleted file mode 100644 index 282e304..0000000 --- a/assembler/src/ir/ir2_parse_line_syntax.rs +++ /dev/null @@ -1,447 +0,0 @@ -use std::iter::Peekable; -use itertools::Itertools; -use crate::lexer::{Token, TokenType, Opcode, Op, NamedTrap, PseudoOp, Span}; -use crate::ir::ir1_parse_lines; -use crate::error::ParseError; - -pub type Lines<'input> = Vec>; - -#[derive(Clone, Debug)] -pub struct Line<'input> { - pub src: String, - pub content: LineContent<'input>, - pub whitespace: Vec>, // Only includes whitespace around operation - pub comment: Option>, - pub newline: Option>, -} - -impl<'input> Line<'input> { - pub fn span(&self) -> Option { - let tokens = self.tokens(); - let start = tokens.iter().map(|token| token.span.0).min(); - let end = tokens.iter().map(|token| token.span.1).max(); - if let (Some(start), Some(end)) = (start, end) { - Some((start, end)) - } else { - None - } - } - - fn tokens(&self) -> Vec<&Token> { - let mut tokens = Vec::new(); - let Line { content, whitespace, comment, newline, .. } = self; - tokens.extend(content.tokens()); - tokens.extend(whitespace); - if let Some(comment) = comment { - tokens.push(comment); - } - if let Some(newline) = newline { - tokens.push(newline); - } - tokens - } -} - -pub type Label<'input> = Token<'input>; - -#[derive(Clone, Debug)] -pub enum LineContent<'input> { - Valid(Option>, Option>), - Invalid(Vec>) -} - -impl<'input> LineContent<'input> { - fn tokens(&self) -> Vec<&Token> { - match self { - LineContent::Valid(maybe_label, maybe_operation_tokens) => { - let mut tokens = Vec::new(); - if let Some(label) = maybe_label { - tokens.push(label); - } - if let Some(operation_tokens) = maybe_operation_tokens { - tokens.extend(operation_tokens.tokens()) - } - tokens - } - LineContent::Invalid(tokens) => tokens.iter().collect() - } - } -} - - -#[derive(Clone, Debug)] -pub struct OperationTokens<'input> { - pub operator: Token<'input>, - pub operands: OperandTokens<'input>, - pub separators: Vec>, // To include internal whitespace, but not surrounding -} - -impl<'input> OperationTokens<'input> { - fn tokens(&self) -> Vec<&Token> { - let mut tokens = Vec::new(); - let OperationTokens { operator, operands, separators } = self; - tokens.push(operator); - tokens.extend(operands.tokens()); - tokens.extend(separators); - tokens - } -} - -#[derive(Clone, Debug)] -pub enum OperandTokens<'input> { - Add { dr: Token<'input>, sr1: Token<'input>, sr2_or_imm5: Token<'input> }, - And { dr: Token<'input>, sr1: Token<'input>, sr2_or_imm5: Token<'input> }, - Br { label: Label<'input> }, - Jmp { base: Token<'input> }, - Jsr { label: Label<'input> }, - Jsrr { base: Token<'input> }, - Ld { dr: Token<'input>, label: Label<'input>, }, - Ldi { dr: Token<'input>, label: Label<'input>, }, - Ldr { dr: Token<'input>, base: Token<'input>, offset6: Token<'input> }, - Lea { dr: Token<'input>, label: Label<'input> }, - Not { dr: Token<'input>, sr: Token<'input> }, - Ret, - Rti, - St { sr: Token<'input>, label: Label<'input> }, - Sti { sr: Token<'input>, label: Label<'input> }, - Str { sr: Token<'input>, base: Token<'input>, offset6: Token<'input> }, - Trap { trap_vec: Token<'input> }, - - Getc, - Out, - Puts, - In, - Putsp, - Halt, - - Orig { origin: Token<'input> }, - Fill { value: Token<'input> }, - Blkw { size: Token<'input> }, - Stringz { string: Token<'input> }, - End, -} - -impl<'input> OperandTokens<'input> { - fn tokens(&self) -> Vec<&Token> { - use OperandTokens::*; - - let mut tokens = Vec::new(); - match self { - Add { dr, sr1, sr2_or_imm5 } => { - tokens.push(dr); - tokens.push(sr1); - tokens.push(sr2_or_imm5); - }, - And { dr, sr1, sr2_or_imm5 } => { - tokens.push(dr); - tokens.push(sr1); - tokens.push(sr2_or_imm5); - }, - Br { label } => { tokens.push(label); }, - Jmp { base } => { tokens.push(base); }, - Jsr { label } => { tokens.push(label); }, - Jsrr { base } => { tokens.push(base); }, - Ld { dr, label } => { - tokens.push(dr); - tokens.push(label); - }, - Ldi { dr, label } => { - tokens.push(dr); - tokens.push(label); - }, - Ldr { dr, base, offset6 } => { - tokens.push(dr); - tokens.push(base); - tokens.push(offset6); - }, - Lea { dr, label } => { - tokens.push(dr); - tokens.push(label); - }, - Not { dr, sr } => { - tokens.push(dr); - tokens.push(sr); - }, - St { sr, label, } => { - tokens.push(sr); - tokens.push(label); - }, - Sti { sr, label, } => { - tokens.push(sr); - tokens.push(label); - }, - Str { sr, base, offset6, } => { - tokens.push(sr); - tokens.push(base); - tokens.push(offset6); - }, - Trap { trap_vec } => { tokens.push(trap_vec); }, - Orig { origin } => { tokens.push(origin); }, - Fill { value } => { tokens.push(value); }, - Blkw { size } => { tokens.push(size); }, - Stringz { string } => { tokens.push(string); }, - - Ret - | Rti - | Getc - | Out - | Puts - | In - | Putsp - | Halt - | End => {}, - } - tokens - } -} - -pub fn parse_line_syntax(ir1_lines: ir1_parse_lines::Lines) -> Lines { - ir1_lines.into_iter() - .map(parse_line) - .collect() -} - -fn parse_line(ir1_line: ir1_parse_lines::Line) -> Line { - let ir1_parse_lines::Line { content: old_content, comment, newline, src, } = ir1_line; - let backup = old_content.clone(); - - let mut tokens = old_content.into_iter().peekable(); - let mut whitespace = Vec::new(); - skip_and_collect_whitespace(&mut tokens, &mut whitespace); - let label = parse_ambiguous(&mut tokens).ok(); - skip_and_collect_whitespace(&mut tokens, &mut whitespace); - let content = parse_operation_tokens(&mut tokens, &mut whitespace).map_or( - LineContent::Invalid(backup), - |operation_tokens| { LineContent::Valid(label, operation_tokens) } - ); - skip_and_collect_whitespace(&mut tokens, &mut whitespace); - Line { content, whitespace, comment, newline, src, } -} - -fn parse_ambiguous<'input, T>(tokens: &mut Peekable) -> Result, ParseError> - where T: Iterator> -{ - parse_token(tokens, TokenType::Ambiguous) -} - -fn parse_string<'input, T>(tokens: &mut Peekable) -> Result, ParseError> - where T: Iterator> -{ - parse_token(tokens, TokenType::String) -} - -fn parse_token<'input, T>(tokens: &mut Peekable, target_type: TokenType) -> Result, ParseError> - where T: Iterator> -{ - if let Some(&Token { ty, .. }) = tokens.peek() { - if ty == target_type { - return Ok(tokens.next().unwrap()); - } - } - Err(ParseError::Misc("Didn't find ambiguous token next.".to_string())) -} - -// Expands to the necessary steps to parse operands into a given OperandTokens struct variant. -// Ex: fill_operands! { 3; Add { dr, sr1, sr2_or_imm5, }; tokens, separators } -// expands to: -// let whitespace = parse_whitespace(tokens)?; -// separators.extend(whitespace); -// let mut operand_buffer: [Option>; 3] = [None; 3]; -// parse_operands(tokens, &mut separators, &mut operand_buffer)?; -// OperandTokens::Add { -// dr: operand_buffer[0].unwrap(), -// sr1: operand_buffer[1].unwrap(), -// sr2_or_imm5: operand_buffer[2].unwrap(), -// } - -// TODO: put inside parse_operand_tokens to make it so we don't have to pass in references to tokens and separators -macro_rules! fill_operands { - (@munch ($op_buf:ident) -> { $name:ident, $(($field:ident, $value:expr))* }) => { - OperandTokens::$name { - $($field: $value),* - } - }; - - (@munch ($i:expr, $op_buf:ident, $id:ident,) -> { $($output:tt)* }) => { - fill_operands! { @munch ($op_buf) -> { $($output)* ($id, $op_buf[$i].unwrap()) } } - }; - - (@munch ($i:expr, $op_buf:ident, $id:ident, $($next:tt)*) -> { $($output:tt)* }) => { - fill_operands! { @munch ($i+1usize, $op_buf, $($next)*) -> { $($output)* ($id, $op_buf[$i].unwrap()) } } - }; - - ($num:expr; $name:ident { $($input:tt)+ }; $tokens:ident, $separators:ident) => { - let whitespace = parse_whitespace($tokens)?; - $separators.extend(whitespace); - let mut operand_buffer: [Option>; $num] = [None; $num]; // TODO: write inner macro to munch and get size of array - parse_operands($tokens, &mut $separators, &mut operand_buffer)?; - fill_operands! { @munch (0usize, operand_buffer, $($input)+) -> { $name, } } - }; -} - - -fn parse_operand_tokens<'input, T>(op: Op, tokens: &mut Peekable, mut separators: &mut Vec>) -> Result, ParseError> - where T: Iterator> -{ - let operands = match op { - Op::Opcode(opcode) => match opcode { - Opcode::Add => { fill_operands! { 3; Add { dr, sr1, sr2_or_imm5, }; tokens, separators } }, - Opcode::And => { fill_operands! { 3; And { dr, sr1, sr2_or_imm5, }; tokens, separators } }, - Opcode::Br => { fill_operands! { 1; Br { label, }; tokens, separators } }, - Opcode::Jmp => { fill_operands! { 1; Jmp { base, }; tokens, separators } }, - Opcode::Jsr => { fill_operands! { 1; Jsr { label, }; tokens, separators } }, - Opcode::Jsrr => { fill_operands! { 1; Jsrr { base, }; tokens, separators } }, - Opcode::Ld => { fill_operands! { 2; Ld { dr, label, }; tokens, separators } }, - Opcode::Ldi => { fill_operands! { 2; Ldi { dr, label, }; tokens, separators } }, - Opcode::Ldr => { fill_operands! { 3; Ldr { dr, base, offset6, }; tokens, separators } }, - Opcode::Lea => { fill_operands! { 2; Lea { dr, label, }; tokens, separators } }, - Opcode::Not => { fill_operands! { 2; Not { dr, sr, }; tokens, separators } }, - Opcode::Ret => OperandTokens::Ret, - Opcode::Rti => OperandTokens::Rti, - Opcode::St => { fill_operands! { 2; St { sr, label, }; tokens, separators } }, - Opcode::Sti => { fill_operands! { 2; Sti { sr, label, }; tokens, separators } }, - Opcode::Str => { fill_operands! { 3; Str { sr, base, offset6, }; tokens, separators } }, - Opcode::Trap => { fill_operands! { 1; Trap { trap_vec, }; tokens, separators } }, - }, - Op::NamedTrap(named_trap) => match named_trap { - NamedTrap::Getc => OperandTokens::Getc, - NamedTrap::Out => OperandTokens::Out, - NamedTrap::Puts => OperandTokens::Puts, - NamedTrap::In => OperandTokens::In, - NamedTrap::Putsp => OperandTokens::Putsp, - NamedTrap::Halt => OperandTokens::Halt, - }, - Op::PseudoOp(pseudo_op) => match pseudo_op { - PseudoOp::Orig => { fill_operands! { 1; Orig { origin, }; tokens, separators } }, - PseudoOp::Fill => { fill_operands! { 1; Fill { value, }; tokens, separators } }, - PseudoOp::Blkw => { fill_operands! { 1; Blkw { size, }; tokens, separators } }, - PseudoOp::Stringz => { - let whitespace = parse_whitespace(tokens)?; - separators.extend(whitespace); - let string = parse_string(tokens)?; - OperandTokens::Stringz { string } - }, - PseudoOp::End => OperandTokens::End, - }, - }; - Ok(operands) -} - -// Return None if no operation but valid line (i.e. only whitespace (optionally)) -// ^^^ assumes whitespace has already been skipped. -// Return Err if line doesn't have valid pattern of tokens -fn parse_operation_tokens<'input, T>(mut tokens: &mut Peekable, mut whitespace: &mut Vec>) -> Result>, ParseError> - where T: Iterator> -{ - match tokens.next() { - Some(token) => match token.ty { - TokenType::Op(op) => { - let mut separators = Vec::new(); - let operands = parse_operand_tokens(op, tokens, &mut separators)?; - skip_and_collect_whitespace(&mut tokens, &mut whitespace); - if tokens.peek().is_some() { - Err(ParseError::Misc("Extra tokens at end of line.".to_string())) - } else { - Ok(Some(OperationTokens { operator: token, operands, separators })) - } - } - TokenType::Whitespace => unreachable!("Function was called without first skipping whitespace."), - _ => Err(ParseError::Misc("Unexpected non-operator token at beginning of 'instruction'".to_string())) - } - None => Ok(None), - } -} - -// Returns Ok if operands parsed correctly and fills operands with Some(token) -// Otherwise, returns Err -fn parse_operands<'input, T>(tokens: &mut Peekable, separators: &mut Vec>, operands: &mut [Option>]) -> Result<(), ParseError> - where T: Iterator> -{ - for i in 0..operands.len() { - let operand = parse_ambiguous(tokens)?; - operands[i] = Some(operand); - if i < operands.len() - 1 { - let separator = parse_separator(tokens)?; - separators.extend(separator); - } - } - Ok(()) -} - -fn skip_and_collect_whitespace<'input, T>(tokens: &mut Peekable, whitespace: &mut Vec>) - where T: Iterator> -{ - let leading_whitespace = tokens.peeking_take_while(|&Token { ty, .. }| ty == TokenType::Whitespace); - whitespace.extend(leading_whitespace); -} - -fn parse_whitespace<'input, T>(tokens: &mut Peekable) -> Result>, ParseError> - where T: Iterator> -{ - let whitespace = tokens.peeking_take_while(|&Token { ty, .. }| ty == TokenType::Whitespace) - .collect::>(); - if whitespace.is_empty() { - Err(ParseError::Misc("Missing required whitespace.".to_string())) - } else { - Ok(whitespace) - } -} - -fn parse_separator<'input, T>(tokens: &mut Peekable) -> Result>, ParseError> - where T: Iterator> -{ - let separator = tokens.peeking_take_while(|&Token { ty, .. }| ty == TokenType::Whitespace || ty == TokenType::Comma) - .collect::>(); - let num_commas = separator.iter() - .filter(|&Token { ty, .. }| *ty == TokenType::Comma) - .count(); - if num_commas > 1 { - Err(ParseError::Misc("Too many comma separators.".to_string())) - } else if separator.is_empty() { - Err(ParseError::Misc("Missing separator.".to_string())) - } else { - Ok(separator) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::lexer::Lexer; - use crate::ir::ir1_parse_lines::parse_lines; - - #[test] - fn add() { - let lexer = Lexer::new("ADD R0, R0, R0"); - let simple_lines = parse_lines(lexer); - let lines = parse_line_syntax(simple_lines); - let Line { content, .. } = lines.get(0).unwrap(); - let matches = if let LineContent::Valid(None, Some(operation_tokens)) = content { - if let OperationTokens { operands: OperandTokens::Add { .. }, ..} = operation_tokens { - true - } else { false } - } else { false }; - assert!(matches); - } - - #[test] - fn labeled_add() { - let lexer = Lexer::new("LABEL\n\tADD R0, R1, #1"); - let simple_lines = parse_lines(lexer); - let lines = parse_line_syntax(simple_lines); - - let Line { content, .. } = lines.get(0).unwrap(); - let line_0_matches = if let LineContent::Valid(Some(_), None) = content { true } else { false }; - assert!(line_0_matches); - - let Line { content, .. } = lines.get(1).unwrap(); - let line_1_matches = if let LineContent::Valid(None, Some(operation_tokens)) = content { - if let OperationTokens { operands: OperandTokens::Add { .. }, .. } = operation_tokens { - true - } else { false } - } else { false }; - assert!(line_1_matches); - } - -} - diff --git a/assembler/src/ir/ir3_parse_objects.rs b/assembler/src/ir/ir3_parse_objects.rs deleted file mode 100644 index d16ddf7..0000000 --- a/assembler/src/ir/ir3_parse_objects.rs +++ /dev/null @@ -1,198 +0,0 @@ -use std::iter::Peekable; -use std::mem; -use crate::lexer::Token; -use crate::ir::ir2_parse_line_syntax; - -// Shorthands -pub type IR2Line<'input> = ir2_parse_line_syntax::Line<'input>; -pub type IR2Lines<'input> = ir2_parse_line_syntax::Lines<'input>; -pub type IR2LineContent<'input> = ir2_parse_line_syntax::LineContent<'input>; - -// Types "part of" this IR -pub type OperationTokens<'input> = ir2_parse_line_syntax::OperationTokens<'input>; -pub type OperandTokens<'input> = ir2_parse_line_syntax::OperandTokens<'input>; -pub type Label<'input> = ir2_parse_line_syntax::Label<'input>; - -#[derive(Clone)] -pub struct File<'input> { - pub objects: Vec>, - pub ignored: Vec>, -} - -#[derive(Clone)] -pub struct Object<'input> { - pub origin_src: Line<'input>, - pub origin: Token<'input>, - pub content: ObjectContent<'input>, -} - -#[derive(Clone)] -pub struct ObjectContent<'input> { - pub operations: Vec>, - pub empty_lines: Vec>, - pub hanging_labels: Vec>, - pub invalid_lines: Vec>, -} - -#[derive(Clone)] -pub struct Line<'input> { - pub src_lines: Vec, - pub label: Option>, - pub operation: OperationTokens<'input>, - pub whitespace: Vec>, - pub comments: Vec>, - pub newlines: Vec>, -} - -pub fn parse_objects(lines: IR2Lines) -> File { - let mut objects = Vec::new(); - let mut ignored = Vec::new(); - let mut lines = lines.into_iter().peekable(); - loop { - let maybe_line = lines.next(); - match maybe_line { - None => { break; }, - Some(line) => { - let line_backup = line.clone(); - match line { - IR2Line { - content: IR2LineContent::Valid(label, Some(operation)), - whitespace, comment, newline, src - } => { - if let OperationTokens { operands: OperandTokens::Orig { origin }, .. } = operation { - let mut comments = Vec::new(); - if let Some(comment) = comment { - comments.push(comment); - } - - let mut newlines = Vec::new(); - if let Some(newline) = newline { - newlines.push(newline); - } - let origin_src = Line { src_lines: vec![src], label, operation, whitespace, comments, newlines }; - match parse_unvalidated_object_content(&mut lines) { - Ok(content) => { objects.push(Object { origin_src, origin, content }); }, - Err(ObjectParseError { lines_seen, .. }) => { - ignored.push(line_backup); - ignored.extend(lines_seen); - }, - } - } else { - ignored.push(line_backup); - } - }, - line => { - ignored.push(line); - } - } - - } - } - } - File { objects, ignored } -} - -struct ObjectParseError<'input> { - lines_seen: Vec>, -} - -fn parse_unvalidated_object_content<'input, T>(lines: &mut Peekable) -> Result, ObjectParseError<'input>> - where T: Iterator> -{ - let mut operations = Vec::new(); - let mut empty_lines = Vec::new(); - let mut hanging_labels = Vec::new(); - let mut invalid_lines = Vec::new(); - - let mut lines_seen = Vec::new(); - let mut found_end = false; - - let mut hanging_label = None; - let mut src_lines = Vec::new(); - let mut whitespace = Vec::new(); - let mut comments = Vec::new(); - let mut newlines = Vec::new(); - - loop { - let maybe_line = lines.next(); - match maybe_line { - None => { break; } - Some(line) => { - lines_seen.push(line.clone()); - let line_backup = line.clone(); - - let IR2Line { content, whitespace: line_whitespace, comment, newline, src } = line; - - if hanging_label.is_some() { - if let IR2LineContent::Valid(None, _) = &content { - } else { - hanging_labels.push(hanging_label.take().unwrap()); - } - } - - match content { - IR2LineContent::Invalid(_) => { invalid_lines.push(line_backup); } - IR2LineContent::Valid(None, None) => { empty_lines.push(line_backup); }, - IR2LineContent::Valid(Some(_), None) => { hanging_label = Some(line_backup); }, - IR2LineContent::Valid(label, Some(operation)) => { - let label = if hanging_label.is_some() { - assert!(label.is_none()); - let IR2Line { - content: label_content, - whitespace: label_whitespace, - comment: label_comment, - newline: label_newline, - src - } = hanging_label.take().unwrap(); - - whitespace.extend(label_whitespace); - src_lines.push(src); - if let Some(label_comment) = label_comment { comments.push(label_comment); } - if let Some(label_newline) = label_newline { newlines.push(label_newline); } - if let IR2LineContent::Valid(label, None) = label_content { - label - } else { - unreachable!("Hanging label wasn't a line with only a label! Contact the maintainers."); - } - } else { - label - }; - - whitespace.extend(line_whitespace); - src_lines.push(src); - if let Some(comment) = comment { comments.push(comment); } - if let Some(newline) = newline { newlines.push(newline); } - let finished_src_lines = mem::replace(&mut src_lines, Vec::new()); - let finished_whitespace = mem::replace(&mut whitespace, Vec::new()); - let finished_comments = mem::replace(&mut comments, Vec::new()); - let finished_newlines = mem::replace(&mut newlines, Vec::new()); - if let OperationTokens { operands: OperandTokens::End, .. } = operation { - found_end = true; - } - let unvalidated_line = Line { - label, - operation, - src_lines: finished_src_lines, - whitespace: finished_whitespace, - comments: finished_comments, - newlines: finished_newlines, - }; - operations.push(unvalidated_line); - if found_end { - break; - } - }, - } - } - } - } - - if found_end { - Ok(ObjectContent { operations, empty_lines, hanging_labels, invalid_lines }) - } else { - Err(ObjectParseError { - lines_seen - }) - } -} - diff --git a/assembler/src/ir/ir4_parse_ambiguous_tokens.rs b/assembler/src/ir/ir4_parse_ambiguous_tokens.rs deleted file mode 100644 index 3ace3bb..0000000 --- a/assembler/src/ir/ir4_parse_ambiguous_tokens.rs +++ /dev/null @@ -1,520 +0,0 @@ -use std::convert::TryInto; -use num_traits::Num; -use std::string::ToString; -use lc3_isa::{Addr, SignedWord, check_signed_imm, Word}; - -use crate::error::{ParseError, InvalidLabelReason, InvalidRegReason, InvalidImmediateReason, Error}; -use crate::lexer::Token; -use crate::ir::ir2_parse_line_syntax; -use crate::ir::ir3_parse_objects; -use crate::parser::LeniencyLevel; - -#[derive(Clone, Debug)] -pub struct File<'input> { - pub objects: Vec>, - pub ignored: Vec>, -} - -#[derive(Clone, Debug)] -pub struct Object<'input> { - pub origin_src: Operation<'input>, - pub origin: Immediate<'input, Addr>, - pub content: ObjectContent<'input>, -} - -#[derive(Clone, Debug)] -pub struct ObjectContent<'input> { - pub operations: Vec>, - pub empty_lines: Vec>, - pub hanging_labels: Vec>, - pub invalid_lines: Vec>, -} - -pub type Label<'input> = Checked<'input, &'input str>; -pub type Separator<'input> = Token<'input>; - -// Different from lc3_isa::Instruction in that offsets from labels aren't computed. -// Also covers pseudo-ops. -#[derive(Clone, Debug)] -pub struct Operation<'input> { - pub label: Option>, - pub operator: Token<'input>, - pub nzp: Result, ParseError>, - pub operands: Operands<'input>, - - pub src_lines: Vec, - pub separators: Vec>, - pub whitespace: Vec>, - pub comments: Vec>, - pub newlines: Vec>, -} - -#[derive(Clone, Debug)] -pub struct Checked<'input, T> { - pub src: Token<'input>, - pub value: Result, -} - -impl<'input, T> Checked<'input, T> { - pub fn unwrap(self) -> T { - self.value.unwrap() - } - - pub fn extract_error_into(&self, errors: &mut Vec) { - if let Err(error) = &self.value { - errors.push(error.into()); - } - } -} - -pub type Reg<'input> = Checked<'input, lc3_isa::Reg>; -pub type Immediate<'input, T> = Checked<'input, T>; - -#[derive(Clone, Debug)] -pub enum Sr2OrImm5<'input> { - Sr2(Reg<'input>), - Imm5(Immediate<'input, SignedWord>), -} - -#[derive(Clone, Debug)] -pub enum ImmOrLabel<'input> { - Imm(Immediate<'input, SignedWord>), - Label(Label<'input>), -} - -#[derive(Clone, Debug)] -pub enum UnsignedImmOrLabel<'input> { - Imm(Immediate<'input, Word>), - Label(Label<'input>), -} - -#[derive(Clone, Debug)] -pub struct ConditionCodes { - pub n: bool, - pub z: bool, - pub p: bool, -} - -type PCOffset<'input> = Checked<'input, ImmOrLabel<'input>>; - -#[derive(Clone, Debug)] -pub enum Operands<'input> { - Add { dr: Reg<'input>, sr1: Reg<'input>, sr2_or_imm5: Checked<'input, Sr2OrImm5<'input>> }, - And { dr: Reg<'input>, sr1: Reg<'input>, sr2_or_imm5: Checked<'input, Sr2OrImm5<'input>> }, - Br { pc_offset9: PCOffset<'input> }, - Jmp { base: Reg<'input> }, - Jsr { pc_offset11: PCOffset<'input> }, - Jsrr { base: Reg<'input> }, - Ld { dr: Reg<'input>, pc_offset9: PCOffset<'input> }, - Ldi { dr: Reg<'input>, pc_offset9: PCOffset<'input> }, - Ldr { dr: Reg<'input>, base: Reg<'input>, offset6: Immediate<'input, SignedWord> }, - Lea { dr: Reg<'input>, pc_offset9: PCOffset<'input> }, - Not { dr: Reg<'input>, sr: Reg<'input> }, - Ret, - Rti, - St { sr: Reg<'input>, pc_offset9: PCOffset<'input> }, - Sti { sr: Reg<'input>, pc_offset9: PCOffset<'input> }, - Str { sr: Reg<'input>, base: Reg<'input>, offset6: Immediate<'input, SignedWord> }, - Trap { trap_vec: Immediate<'input, u8> }, - - Getc, - Out, - Puts, - In, - Putsp, - Halt, - - Orig { origin: Immediate<'input, Addr> }, - Fill { value: Checked<'input, UnsignedImmOrLabel<'input>> }, - Blkw { size_src: Token<'input>, size: Immediate<'input, Addr> }, // Addr used here to signify a number of locations. Max is number of possible Addrs. - Stringz { string: Checked<'input, String> }, - End, -} - -pub struct AmbiguousTokenParser { - pub leniency: LeniencyLevel, -} - -impl AmbiguousTokenParser { - - pub fn parse_ambiguous_tokens<'input>(&self, file: ir3_parse_objects::File<'input>) -> File<'input> { - let ir3_parse_objects::File { objects, ignored } = file; - File { - objects: objects.into_iter().map(|o| self.validate_object(o)).collect(), - ignored - } - } - - fn validate_object<'input>(&self, object: ir3_parse_objects::Object<'input>) -> Object<'input> { - let ir3_parse_objects::Object { origin_src, origin, content } = object; - let ir3_parse_objects::ObjectContent { operations, empty_lines, hanging_labels, invalid_lines } = content; - Object { - origin_src: self.validate_line(origin_src), - origin: self.validate_numeric_immediate(origin), - content: ObjectContent { - operations: operations.into_iter().map(|o| self.validate_line(o)).collect(), - empty_lines, - hanging_labels, - invalid_lines - } - } - } - - fn validate_line<'input>(&self, line: ir3_parse_objects::Line<'input>) -> Operation<'input> { - let ir3_parse_objects::Line { - label, - operation: ir2_parse_line_syntax::OperationTokens { - operator, - operands, - separators, - }, - whitespace, - comments, - newlines, - src_lines, - } = line.clone(); - - Operation { - label: label.map(|l| self.validate_label(l)), - operator, - nzp: self.validate_condition_codes(&operator), - operands: self.validate_operand_tokens(operands), - separators, - whitespace, - comments, - newlines, - src_lines, - } - } - - fn validate_operand_tokens<'input>(&self, operands: ir2_parse_line_syntax::OperandTokens<'input>) -> Operands<'input> { - use ir2_parse_line_syntax::OperandTokens; - match operands { - OperandTokens::Add { dr, sr1, sr2_or_imm5 } => - Operands::Add { - dr: self.validate_reg(dr), - sr1: self.validate_reg(sr1), - sr2_or_imm5: self.validate_sr2_or_imm5(sr2_or_imm5) - }, - OperandTokens::And { dr, sr1, sr2_or_imm5 } => - Operands::And { - dr: self.validate_reg(dr), - sr1: self.validate_reg(sr1), - sr2_or_imm5: self.validate_sr2_or_imm5(sr2_or_imm5) - }, - OperandTokens::Br { label } => { - Operands::Br { pc_offset9: self.validate_imm_or_label(label, 9), } - }, - OperandTokens::Jmp { base } => Operands::Jmp { base: self.validate_reg(base) }, - OperandTokens::Jsr { label } => Operands::Jsr { pc_offset11: self.validate_imm_or_label(label, 11) }, - OperandTokens::Jsrr { base } => Operands::Jsrr { base: self.validate_reg(base) }, - OperandTokens::Ld { dr, label } => Operands::Ld { dr: self.validate_reg(dr), pc_offset9: self.validate_imm_or_label(label, 9) }, - OperandTokens::Ldi { dr, label } => Operands::Ldi { dr: self.validate_reg(dr), pc_offset9: self.validate_imm_or_label(label, 9) }, - OperandTokens::Ldr { dr, base, offset6 } => - Operands::Ldr { - dr: self.validate_reg(dr), - base: self.validate_reg(base), - offset6: self.validate_signed_immediate(offset6, 6), - }, - OperandTokens::Lea { dr, label } => Operands::Lea { dr: self.validate_reg(dr), pc_offset9: self.validate_imm_or_label(label, 9) }, - OperandTokens::Not { dr, sr } => Operands::Not { dr: self.validate_reg(dr), sr: self.validate_reg(sr) }, - OperandTokens::Ret => Operands::Ret, - OperandTokens::Rti => Operands::Rti, - OperandTokens::St { sr, label } => Operands::St { sr: self.validate_reg(sr), pc_offset9: self.validate_imm_or_label(label, 9) }, - OperandTokens::Sti { sr, label } => Operands::Sti { sr: self.validate_reg(sr), pc_offset9: self.validate_imm_or_label(label, 9) }, - OperandTokens::Str { sr, base, offset6 } => - Operands::Str { - sr: self.validate_reg(sr), - base: self.validate_reg(base), - offset6: self.validate_signed_immediate(offset6, 6), - }, - OperandTokens::Trap { trap_vec } => Operands::Trap { trap_vec: self.validate_numeric_immediate(trap_vec) }, - - OperandTokens::Getc => Operands::Getc, - OperandTokens::Out => Operands::Out, - OperandTokens::Puts => Operands::Puts, - OperandTokens::In => Operands::In, - OperandTokens::Putsp => Operands::Putsp, - OperandTokens::Halt => Operands::Halt, - - OperandTokens::Orig { origin } => Operands::Orig { origin: self.validate_numeric_immediate(origin) }, - OperandTokens::Fill { value } => Operands::Fill { value: self.validate_unsigned_imm_or_label(value) }, - OperandTokens::Blkw { size } => Operands::Blkw { size_src: size, size: self.validate_blkw_immediate(size) }, - OperandTokens::Stringz { string } => Operands::Stringz { string: self.validate_string(string) }, - OperandTokens::End => Operands::End, - } - } - - fn validate_sr2_or_imm5<'input>(&self, src: Token<'input>) -> Checked<'input, Sr2OrImm5<'input>> { - let reg = self.validate_reg(src); - let imm5 = self.validate_signed_immediate(src, 5); - let value = if let Reg { value: Ok(_), .. } = reg { - Ok(Sr2OrImm5::Sr2(reg)) - } else if let Immediate { value: Ok(_), .. } = imm5 { - Ok(Sr2OrImm5::Imm5(imm5)) - } else { - if let Reg { value: Err(ParseError::InvalidReg { reason: invalid_reg_reason, .. }), .. } = reg { - if let Immediate { value: Err(ParseError::InvalidImmediate { reason: invalid_imm5_reason, .. }), .. } = imm5 { - Err(ParseError::InvalidRegOrImm5 { - range: src.span, - invalid_reg_reason, - invalid_imm5_reason, - }) - } else { - unreachable!() - } - } else { - unreachable!() // TODO: use something cleaner like a match for this - } - }; - Checked { src, value } - } - - fn validate_reg<'input>(&self, src: Token<'input>) -> Reg<'input> { - let value = if let Some("r") | Some("R") = src.src.get(..=0) { - src.src.get(1..) - .filter(|s| s.len() == 1) - .and_then(|s| s.parse::().ok()) - .and_then(|i| i.try_into().ok()) - .ok_or(ParseError::InvalidReg { - range: src.span, - reason: InvalidRegReason::Number, - }) - } else { - Err(ParseError::InvalidReg { - range: src.span, - reason: InvalidRegReason::FirstChar, - }) - }; - Reg { src, value } - } - - fn validate_numeric_immediate<'input, T: Num>(&self, src: Token<'input>) -> Immediate<'input, T> { - let Token { src: str, span, .. } = src; - let value = if let Some(str_head) = str.get(..=0) { - let (str_head, offset) = match str.get(0..2) { - Some("0b") | Some("0x") => (str.get(1..2).unwrap(), 2), - Some(_) => (str_head, 1), - // If we don't have two chars, just pass it along? - None => (str_head, 1), - }; - - let radix = match str_head { - "b" => Some(2), - "#" => Some(10), - "x" => Some(16), - _ => None - }; - if let Some(radix) = radix { - if let Some(src_tail) = src.src.get(offset..) { - T::from_str_radix(src_tail, radix) - .map_err(|_| InvalidImmediateReason::Number { actual: src_tail.to_string() }) - } else { - Err(InvalidImmediateReason::NoNumber) - } - } else { - Err(InvalidImmediateReason::RadixChar { actual: str_head.to_string() }) - } - } else { - Err(InvalidImmediateReason::NoChars) - }.map_err(|reason| ParseError::InvalidImmediate { - range: span, - reason - }); - - Immediate { src, value } - } - - fn validate_signed_immediate<'input>(&self, src: Token<'input>, num_bits: u32) -> Immediate<'input, SignedWord> { - let Immediate { src, value } = self.validate_numeric_immediate(src); - let value = match value { - Ok(i) => { - if check_signed_imm(i, num_bits) { - Ok(i) - } else { - Err(ParseError::InvalidImmediate { - range: src.span, - reason: InvalidImmediateReason::OutOfRange { value: i, num_bits } - }) - } - } - error => error // TODO: look for appropriate combinator(s)? - }; - Immediate { src, value } - } - - fn validate_imm_or_label<'input>(&self, src: Token<'input>, num_bits: u32) -> Checked<'input, ImmOrLabel<'input>> { - let label = self.validate_label(src); - let imm = self.validate_signed_immediate(src, num_bits); - let value = if let Label { value: Ok(_), .. } = label { - Ok(ImmOrLabel::Label(label)) - } else if let Immediate { value: Ok(_), .. } = imm { - Ok(ImmOrLabel::Imm(imm)) - } else { - if let Label { value: Err(ParseError::InvalidLabel { reasons: invalid_label_reasons, .. }), .. } = label { - if let Immediate { value: Err(ParseError::InvalidImmediate { reason: invalid_immediate_reason, .. }), .. } = imm { - Err(ParseError::InvalidLabelOrImmediate { - range: src.span, - invalid_label_reasons, - invalid_immediate_reason - }) - } else { - unreachable!() - } - } else { - unreachable!() // TODO: use something cleaner like a match for this - } - }; - Checked { src, value } - } - - fn validate_unsigned_imm_or_label<'input>(&self, src: Token<'input>) -> Checked<'input, UnsignedImmOrLabel<'input>> { - let label = self.validate_label(src); - let imm = self.validate_numeric_immediate(src); - let value = if let Immediate { value: Ok(_), .. } = imm { - Ok(UnsignedImmOrLabel::Imm(imm)) - } else if let Label { value: Ok(_), .. } = label { - Ok(UnsignedImmOrLabel::Label(label)) - } else { - if let Label { value: Err(ParseError::InvalidLabel { reasons: invalid_label_reasons, .. }), .. } = label { - if let Immediate { value: Err(ParseError::InvalidImmediate { reason: invalid_immediate_reason, .. }), .. } = imm { - Err(ParseError::InvalidLabelOrImmediate { - range: src.span, - invalid_label_reasons, - invalid_immediate_reason - }) - } else { - unreachable!() - } - } else { - unreachable!() // TODO: use something cleaner like a match for this - } - }; - Checked { src, value } - } - - fn validate_label<'input>(&self, src: Token<'input>) -> Label<'input> { - let label = src.src; - - let length = label.len(); - let valid_length = if self.leniency.long_labels_allowed() { - length >= 1 - } else { - (1..=20).contains(&length) - }; - - let mut chars = label.chars(); - let first_char = chars.next(); - let first_char_alphabetic = first_char.filter(|c| c.is_alphabetic()).is_some(); - - let mut other_chars = chars.collect::>(); - other_chars.retain(|&c| !(c.is_alphanumeric() || c == '_')); - let other_chars_alphanumeric = other_chars.len() == 0; - - let mut invalidation_reasons = Vec::new(); - if !valid_length { - invalidation_reasons.push(InvalidLabelReason::Length { actual: length.clone() }); - } - if !first_char_alphabetic { - invalidation_reasons.push(InvalidLabelReason::FirstChar { actual: first_char }); - } - if !other_chars_alphanumeric { - invalidation_reasons.push(InvalidLabelReason::OtherChars { actual: other_chars.into_iter().collect::() }); - } - - let value = if invalidation_reasons.len() == 0 { - Ok(label) - } else { - Err(ParseError::InvalidLabel { - range: src.span, - reasons: invalidation_reasons, - }) - }; - - Label { src, value } - } - - fn validate_condition_codes(&self, src: &Token) -> Result, ParseError> { - let str = src.src; - if str.to_uppercase().starts_with("BR") { - let mut n = false; - let mut z = false; - let mut p = false; - for c in str[2..].to_lowercase().chars() { - match c { - // TODO: prettify with macro or non-iterative solution - 'n' => { - if n { return Err(ParseError::Misc("Duplicate condition code n.".to_string())); } - n = true; - }, - 'z' => { - if z { return Err(ParseError::Misc("Duplicate condition code z.".to_string())); } - z = true; - }, - 'p' => { - if p { return Err(ParseError::Misc("Duplicate condition code p.".to_string())); } - p = true; - }, - _ => { return Err(ParseError::Misc("Invalid condition codes.".to_string())) }, - } - } - if !(n || z || p) { - n = true; - z = true; - p = true; - } - Ok(Some(ConditionCodes { n, z, p })) - } else { - Ok(None) - } - } - - fn validate_blkw_immediate<'input>(&self, src: Token<'input>) -> Immediate<'input, Addr> { - Immediate { - src, - value: src.src.parse().map_err(|_| ParseError::Misc("Invalid BLKW immediate.".to_string())) - } - } - - fn validate_string<'input>(&self, src: Token<'input>) -> Checked<'input, String> { - let mut string = src.src.to_string(); - // remove start and end quote - string.pop(); - string.remove(0); - // remove escape characters - string = string - .replace(r#"\""#, r#"""#) - .replace(r#"\\"#, r#"\"#) // TODO: fix this logic to escape \\ properly (atm \\n becomes '\n', not '\' and 'n') - .replace(r#"\n"#, "\n"); - let value = Ok(string); - Checked { src, value } - } -} - -#[cfg(test)] -mod immediate_tests { - use super::*; - use pretty_assertions::assert_eq; - - fn single_test(num: &str, actual: N) { - let p = AmbiguousTokenParser { leniency: LeniencyLevel::Lenient }; - - let tok = Token { src: num, span: (0, 0), ty: crate::lexer::TokenType::Ambiguous }; - - assert_eq!(actual, p.validate_numeric_immediate(tok).value.unwrap()); - } - - #[test] - fn regular() { - single_test("0x123", 0x123); - single_test("0x0123", 0x0123); - single_test("0b0101", 0b0101); - } - - #[test] - fn patt_style() { - single_test("#100", 100); - single_test("x456", 0x456); - single_test("b0101", 0b0101); - } -} diff --git a/assembler/src/ir/ir5_expand_pseudo_ops.rs b/assembler/src/ir/ir5_expand_pseudo_ops.rs deleted file mode 100644 index b5ca680..0000000 --- a/assembler/src/ir/ir5_expand_pseudo_ops.rs +++ /dev/null @@ -1,127 +0,0 @@ -use lc3_isa::{Word, Addr}; -use std::iter::repeat; - -use crate::ir::{ir4_parse_ambiguous_tokens, ir2_parse_line_syntax}; -use crate::ir::ir4_parse_ambiguous_tokens::Checked; -use crate::lexer::Token; -use crate::error::ParseError; - -pub type Label<'input> = ir4_parse_ambiguous_tokens::Label<'input>; -pub type Immediate<'input, Addr> = Checked<'input, Addr>; - -pub struct File<'input> { - pub objects: Vec>, - pub ignored: Vec>, -} - -pub struct Object<'input> { - pub origin_src: Operation<'input>, - pub origin: Immediate<'input, Addr>, - pub content: ObjectContent<'input>, -} - -pub struct ObjectContent<'input> { - pub operations: Vec>, - pub empty_lines: Vec>, - pub hanging_labels: Vec>, - pub invalid_lines: Vec>, -} - -#[derive(Debug)] -pub struct Operation<'input> { - pub label: Option>, - pub operator: Token<'input>, - pub nzp: Result, ParseError>, - pub operands: Operands<'input>, - - pub src_lines: Vec, - pub separators: Vec>, - pub whitespace: Vec>, - pub comments: Vec>, - pub newlines: Vec>, - - // Option::Some only if this Operation is a pseudo-op. - // Result::Ok only if the operands for the pseudo-op are Ok. - pub expanded: Option, ParseError>>, -} - -impl<'input> Operation<'input> { - - pub fn num_memory_locations_occupied(&self) -> Result { - match &self.expanded { - None => Ok(1), - Some(Ok(values)) => Ok(values.len()), - Some(Err(error)) => Err(error.clone()) - } - } - -} - -pub type Operands<'input> = ir4_parse_ambiguous_tokens::Operands<'input>; -pub type ConditionCodes = ir4_parse_ambiguous_tokens::ConditionCodes; -pub type Separator<'input> = ir4_parse_ambiguous_tokens::Separator<'input>; - -pub fn expand_pseudo_ops(file: ir4_parse_ambiguous_tokens::File) -> File { - let ir4_parse_ambiguous_tokens::File { objects, ignored } = file; - let objects = objects.into_iter() - .map(expand_object_pseudo_ops) - .collect(); - File { objects, ignored } -} - -pub fn expand_object_pseudo_ops(object: ir4_parse_ambiguous_tokens::Object) -> Object { - let ir4_parse_ambiguous_tokens::Object { origin_src, origin, content, } = object; - let origin_src = expand_operation(origin_src); - let content = expand_object_content(content); - Object { origin_src, origin, content } -} - -fn expand_object_content(object_content: ir4_parse_ambiguous_tokens::ObjectContent) -> ObjectContent { - let ir4_parse_ambiguous_tokens::ObjectContent { operations, empty_lines, hanging_labels, invalid_lines, } = object_content; - let operations = operations.into_iter() - .map(expand_operation) - .collect(); - ObjectContent { operations, empty_lines, hanging_labels, invalid_lines, } -} - -// TODO: make symbol table calculate addresses without this IR, -// so we can use it here to calculate .FILLs with a label operand. -fn expand_operation(operation: ir4_parse_ambiguous_tokens::Operation) -> Operation { - use ir4_parse_ambiguous_tokens::Operands; - - let ir4_parse_ambiguous_tokens::Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines } = operation; - - let expanded = match &operands { - Operands::Blkw { size, .. } => { - match &size.value { - Ok(size) => { - let num_values = *size as usize; - let block = repeat(0).take(num_values).collect(); - Some(Ok(block)) - }, - Err(err) => { - Some(Err(err.clone())) - } - } - }, - Operands::Stringz { string } => { - match &string.value { - Ok(string) => { - let mut values = Vec::new(); - for c in string.chars() { - values.push(c as Word); - } - values.push(0); // null-terminate - Some(Ok(values)) - }, - Err(err) => { - Some(Err(err.clone())) - } - } - }, - Operands::Orig { .. } | Operands::End => { Some(Ok(vec![])) }, // Expand, but to no values - _ => { None }, // Normal operation, does not expand - }; - - Operation { label, operator, nzp, operands, src_lines, separators, whitespace, comments, newlines, expanded, } -} diff --git a/assembler/src/ir/mod.rs b/assembler/src/ir/mod.rs deleted file mode 100644 index 98a88ce..0000000 --- a/assembler/src/ir/mod.rs +++ /dev/null @@ -1,20 +0,0 @@ -/// The series of intermediate representations (IRs) -/// of the parse tree necessary to provide as many -/// good errors as possible while assembling. - -/// This pass separates the tokens by newline -/// and separates comments. -pub mod ir1_parse_lines; - -/// This pass checks that the sequence of tokens on each line is valid. -pub mod ir2_parse_line_syntax; - -/// This pass checks that the sequence of lines is valid -/// and groups them into separate objects if there are multiple .ORIG/.ENDs. -pub mod ir3_parse_objects; - -/// This pass disambiguates and validates ambiguous immediate operands and labels. -pub mod ir4_parse_ambiguous_tokens; - -/// This pass expands each pseudo-op which fills memory into the appropriate list of values. -pub mod ir5_expand_pseudo_ops; diff --git a/assembler/src/lexer.rs b/assembler/src/lexer.rs index 93158b0..5e40cb7 100644 --- a/assembler/src/lexer.rs +++ b/assembler/src/lexer.rs @@ -1,52 +1,78 @@ -use regex::Regex; +use chumsky::prelude::*; +use lc3_isa::{Addr, Reg, SignedWord, Word}; +use std::convert::{TryFrom, TryInto}; +use std::num::TryFromIntError; +use chumsky::Stream; -pub type Span = (usize, usize); +use crate::Spanned; +use crate::LeniencyLevel; -#[derive(Debug, Clone, Copy, PartialEq)] -pub struct Token<'input> { - pub src: &'input str, - pub span: Span, - pub ty: TokenType, +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum Token { + Opcode(Opcode), + Register(Reg), + UnqualifiedNumberLiteral(Word), + NumberLiteral(LiteralValue), + StringLiteral(String), + Label(String), + + Newline, + Comma, + + Comment, } -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum TokenType { - // Insignificant Whitespace - Whitespace, +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum LiteralValue { + Word(Word), + SignedWord(SignedWord), +} - Op(Op), +impl TryFrom for Addr { + type Error = TryFromIntError; - // String Literals - // Numeric literals starting with x can't be disambiguated from labels, - // so we'll do that later based on position. - String, + fn try_from(value: LiteralValue) -> Result { + match value { + LiteralValue::Word(word) => Ok(word), + LiteralValue::SignedWord(signed_word) => signed_word.try_into(), + } + } +} - // Comments - Comment, +impl TryFrom for SignedWord { + type Error = TryFromIntError; - // Punctuation - Comma, - Newline, + fn try_from(value: LiteralValue) -> Result { + match value { + LiteralValue::Word(word) => word.try_into(), + LiteralValue::SignedWord(signed_word) => Ok(signed_word), + } + } +} + +impl TryFrom for u8 { + type Error = TryFromIntError; - // Chunk of non-whitespace, non-comma, non-semicolon text. - // Used as a catch-all for tokens that need to be disambiguated at parse-time, - // for example, labels and hex literals which may both start with 'x'. - // In more general terms: labels and operands. - Ambiguous, + fn try_from(value: LiteralValue) -> Result { + match value { + LiteralValue::Word(word) => word.try_into(), + LiteralValue::SignedWord(signed_word) => signed_word.try_into(), + } + } } -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Op { - Opcode(Opcode), - NamedTrap(NamedTrap), - PseudoOp(PseudoOp), +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct ConditionCodes { + pub(crate) n: bool, + pub(crate) z: bool, + pub(crate) p: bool, } -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum Opcode { Add, And, - Br, + Br(ConditionCodes), Jmp, Jsr, Jsrr, @@ -61,19 +87,15 @@ pub enum Opcode { Sti, Str, Trap, -} -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum PseudoOp { + // Pseudo-ops Orig, Fill, Blkw, Stringz, End, -} -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum NamedTrap { + // Named TRAP routines Getc, Out, Puts, @@ -82,137 +104,267 @@ pub enum NamedTrap { Halt, } -pub struct Lexer<'input> { - src: &'input str, - patterns: Vec<(Regex, TokenType)>, - cur_pos: usize, -} - -use TokenType::*; -use Opcode::*; -use NamedTrap::*; -use PseudoOp::*; - -impl<'input> Lexer<'input> { - - // The lexer tries to find these patterns in this order. - // Registering a pattern will automatically append some stuff to the regex. - // Notably, it will add ^ to the beginning to ensure that it grabs tokens - // from the beginning of the slice it's examining, so don't use ^. - const PATTERNS: [(&'static str, TokenType); 34] = [ - (r"[^\S\r\n]+", Whitespace), - - (r"ADD", Op(Op::Opcode(Add))), - (r"AND", Op(Op::Opcode(And))), - (r"BRn?z?p?", Op(Op::Opcode(Br))), - (r"JMP", Op(Op::Opcode(Jmp))), - (r"JSRR", Op(Op::Opcode(Jsrr))), - (r"JSR", Op(Op::Opcode(Jsr))), - (r"LDI", Op(Op::Opcode(Ldi))), - (r"LDR", Op(Op::Opcode(Ldr))), - (r"LD", Op(Op::Opcode(Ld))), - (r"LEA", Op(Op::Opcode(Lea))), - (r"NOT", Op(Op::Opcode(Not))), - (r"RET", Op(Op::Opcode(Ret))), - (r"RTI", Op(Op::Opcode(Rti))), - (r"STI", Op(Op::Opcode(Sti))), - (r"STR", Op(Op::Opcode(Str))), - (r"ST", Op(Op::Opcode(St))), - (r"TRAP", Op(Op::Opcode(Trap))), - - (r"GETC", Op(Op::NamedTrap(Getc))), - (r"OUT", Op(Op::NamedTrap(Out))), - (r"PUTS", Op(Op::NamedTrap(Puts))), - (r"IN", Op(Op::NamedTrap(In))), - (r"PUTSP", Op(Op::NamedTrap(Putsp))), - (r"HALT", Op(Op::NamedTrap(Halt))), - - (r".ORIG", Op(Op::PseudoOp(Orig))), - (r".FILL", Op(Op::PseudoOp(Fill))), - (r".BLKW", Op(Op::PseudoOp(Blkw))), - (r".STRINGZ", Op(Op::PseudoOp(Stringz))), - (r".END", Op(Op::PseudoOp(End))), - - (r#""([^"\\]|\\.)*""#, String),// quotes with any number of non-quote/backslash chars *or* arbitrary chars escaped with backslashes in between. - - (r";.*", Comment), // semicolon followed by any number of chars that aren't newlines. - - (r",", Comma), - (r"(\r\n|\r|\n)", Newline), - - (r"[^\s,;]+", Ambiguous), // At least one non-whitespace, non-comma, non-semicolon character. - ]; - - pub fn new(src: &'input str) -> Lexer<'input> { - let mut this = Lexer { - src, - patterns: Vec::new(), - cur_pos: 0, +enum CaseSensitivePassResult { + CaseInsensitiveSource(String), + CaseSensitiveToken(Token), +} + +fn number_literal_with_base(base: u32, prefix: char, leniency: LeniencyLevel) -> impl Parser> { + let strict_literal = + just(prefix) + .ignore_then(just('-').ignored().or_not()) + .then(text::digits(base)) + .try_map(move |(maybe_sign, digits): (Option<()>, String), span| { + let parse_result = if maybe_sign.is_some() { + SignedWord::from_str_radix(&format!("-{}", digits), base) + .map(LiteralValue::SignedWord) + } else { + Word::from_str_radix(&digits, base) + .map(LiteralValue::Word) + }; + parse_result.map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow + }); + let literal: Box>> = + match leniency { + LeniencyLevel::Lenient => Box::new(just("0").or_not().ignore_then(strict_literal)), + LeniencyLevel::Strict => Box::new(strict_literal), }; + literal +} - for (pattern, token_type) in Self::PATTERNS.iter() { - this.register_pattern(pattern, *token_type); - } +fn one_opcode(pattern: &'static str, output_opcode: Opcode) -> impl Parser> { + just_to(pattern, output_opcode) +} - this - } +fn one_register(pattern: &'static str, output_reg: Reg) -> impl Parser> { + just_to(pattern, output_reg) +} - fn register_pattern(&mut self, pattern: &str, token_type: TokenType) - { - assert!(!pattern.starts_with("^")); - let pattern = format!("^(?i){}", pattern); - let regex = Regex::new(pattern.as_str()).expect("Invalid regex"); - self.patterns.push((regex, token_type)) - } +fn just_to(pattern: &'static str, output: O) -> impl Parser> { + just(pattern).to(output) +} - fn tail(&self) -> &'input str { - &self.src[self.cur_pos..] - } +fn string_literal() -> impl Parser> { + // `escape` and `string_literal` are based on JSON parser example + // https://github.com/zesterer/chumsky/blob/d4102128315d9dbbea901a91dc5eaa0fc9a790f7/examples/json.rs#L39 + let escape = just::<_, _, Simple>('\\').ignore_then( + just('\\') + .or(just('"')) + .or(just('b').to('\x08')) + .or(just('f').to('\x0C')) + .or(just('n').to('\n')) + .or(just('r').to('\r')) + .or(just('t').to('\t')) + ); + + just('"') + .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated()) + .then_ignore(just('"')) + .collect::() + .map(Token::StringLiteral) +} +fn comment() -> impl Parser> { + just(';') + .then(filter(|c| !is_newline(c)).repeated()) + .to(Token::Comment) } +fn tokens(leniency: LeniencyLevel) -> impl Parser>, Error=Simple> { + let newline = text::newline() + .to(Token::Newline); + + use Opcode::*; + let branch_opcode = + just("BR") + .ignore_then(one_of("NZP").repeated().at_most(3)) + .map::(|cond_code_chars| { + let cond_codes = + if cond_code_chars.is_empty() { + ConditionCodes { n: true, z: true, p: true } + } else { + let n = cond_code_chars.contains(&'N'); + let z = cond_code_chars.contains(&'Z'); + let p = cond_code_chars.contains(&'P'); + ConditionCodes { n, z, p } + }; + Br(cond_codes) + }); + + // These options are separated by `or` instead of all belonging + // to one tuple passed to `choice` because `choice` only supports + // tuples with up to 26 elements. + // The grouping by 'opcode type' was chosen arbitrarily. + let opcode = choice(( + one_opcode("ADD", Add), + one_opcode("AND", And), + branch_opcode, + one_opcode("JMP", Jmp), + one_opcode("JSRR", Jsrr), + one_opcode("JSR", Jsr), + one_opcode("LDI", Ldi), + one_opcode("LDR", Ldr), + one_opcode("LD", Ld), + one_opcode("LEA", Lea), + one_opcode("NOT", Not), + one_opcode("RET", Ret), + one_opcode("RTI", Rti), + one_opcode("STI", Sti), + one_opcode("STR", Str), + one_opcode("ST", St), + one_opcode("TRAP", Trap), + )) + .or(choice(( + one_opcode("GETC", Getc), + one_opcode("OUT", Out), + one_opcode("PUTSP", Putsp), + one_opcode("PUTS", Puts), + one_opcode("IN", In), + one_opcode("HALT", Halt), + ))) + .or(choice(( + one_opcode(".ORIG", Orig), + one_opcode(".FILL", Fill), + one_opcode(".BLKW", Blkw), + one_opcode(".STRINGZ", Stringz), + one_opcode(".END", End), + ))) + .map(Token::Opcode); + + use Reg::*; + let register = choice(( + one_register("R0", R0), + one_register("R1", R1), + one_register("R2", R2), + one_register("R3", R3), + one_register("R4", R4), + one_register("R5", R5), + one_register("R6", R6), + one_register("R7", R7), + )) + .map(Token::Register); + + let unqualified_number_literal_base = 10; + let unqualified_number_literal = text::digits(unqualified_number_literal_base) + .try_map(move |digits: String, span| { + Word::from_str_radix(&digits, unqualified_number_literal_base) + .map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow + }) + .map(Token::UnqualifiedNumberLiteral); + + let number_literal = choice(( + number_literal_with_base(2, 'B', leniency), + number_literal_with_base(10, '#', leniency), + number_literal_with_base(16, 'X', leniency), + )) + .map(Token::NumberLiteral); + + let label = text::ident() // C-style identifier. Follows all LC-3 label rules but allows arbitrary length and underscores. + .map(Token::Label); // TODO: validate length, underscores in strict mode + + let comma = just(',') + .to(Token::Comma); + + let token = choice(( + opcode, + register, + number_literal, + unqualified_number_literal, + string_literal(), + label, + newline, + comma, + comment(), + )) + .recover_with(skip_then_retry_until([])); // TODO: improve? + + let non_newline_whitespace = + filter(|c: &char| c.is_whitespace() && !is_newline(c)).repeated(); + + token + .map_with_span(|token, span| (token, span)) + .padded_by(non_newline_whitespace) + .repeated() + .then_ignore(end()) +} -impl<'input> Iterator for Lexer<'input> { - type Item = Token<'input>; +fn is_newline(c: &char) -> bool { + // All line breaks matched by chumsky::text::newline + ['\n', + '\r', + '\x0B', // Vertical tab + '\x0C', // Form feed + '\u{0085}', // Next line + '\u{2028}', // Line separator + '\u{2029}', ].contains(c) // Paragraph separator +} - fn next(&mut self) -> Option { - let start = self.cur_pos; - if self.src.len() <= start { - return None; - } +fn case_sensitive_tokens() -> impl Parser>, Error=Simple> { + let case_sensitive_token = + choice(( + string_literal(), + comment() + )) + .map(|t| CaseSensitivePassResult::CaseSensitiveToken(t)); + + let case_insensitive_source = + filter(|c| *c != '"' && *c != ';').repeated().at_least(1) + .collect() + .map(|s| CaseSensitivePassResult::CaseInsensitiveSource(s)); + + case_insensitive_source + .or(case_sensitive_token) + .map_with_span(|cspr, s| (cspr, s)) + .repeated() + .then_ignore(end()) +} + +fn case_sensitive_pass(source: &str) -> (Option>>, Vec>) { + case_sensitive_tokens().parse_recovery_verbose(source) +} - let mut munches = Vec::new(); - for (pattern, token_type) in &self.patterns { - if let Some(match_) = pattern.find(self.tail()) { - munches.push((match_, *token_type)); +fn case_insensitive_pass(case_sensitive_pass_results: Vec>, leniency: LeniencyLevel) -> (Option>>, Vec>) { + let mut toks: Option>> = None; + let mut errors = Vec::new(); + + for (cspr, span) in case_sensitive_pass_results { + match cspr { + CaseSensitivePassResult::CaseInsensitiveSource(s) => { + // TODO: profile CPU + memory to see whether this introduces any inefficiencies. + // This allows chumsky to correctly track spans while parsing this substring. + let uppercase_s = s.to_uppercase(); + let spanned_char_stream = uppercase_s.chars() + .enumerate() + .map(|(i, c)| { + let pos = span.start + i; + (c, pos..(pos + 1)) + }); + let stream = Stream::from_iter(span.end..(span.end + 1), spanned_char_stream); + let (maybe_tokens, lex_errors) = tokens(leniency).parse_recovery_verbose(stream); + + if let Some(ts) = maybe_tokens { + toks.get_or_insert(Vec::new()).extend(ts); + } + errors.extend(lex_errors); + } + CaseSensitivePassResult::CaseSensitiveToken(t) => { + toks.get_or_insert(Vec::new()).push((t, span)); } } - let (max_munch, token_type) = munches.iter() - .rev() // we want to break ties based on order in self.patterns, but max_by_key returns last match - .max_by_key(|munch| munch.0.end()) - .expect("The lexer could not recognize some character pattern you provided. Please contact the maintainers."); // TODO: handle gracefully? - - self.cur_pos += max_munch.end(); - let token = Token { - src: max_munch.as_str(), - span: (start, self.cur_pos), - ty: *token_type, - }; - return Some(token); - } -} -#[cfg(test)] -mod tests { - use super::*; + (toks, errors) +} - #[test] - fn test_simple() { // TODO: actually assert some stuff - let input = ".ORIG x3000\nTEST add R0, R0, R0; Tokenize me, cap'n!\nBRnzp TEST\nHALT\n.END"; - let lexer = Lexer::new(input); - for item in lexer { - println!("{:?}", item); - } - } +pub fn lex(source: &str, leniency: LeniencyLevel) -> (Option>>, Vec>) { + let (maybe_csprs, mut errors) = case_sensitive_pass(source); + let tokens = + if let Some(csprs) = maybe_csprs { + let (maybe_tokens, cip_errors) = case_insensitive_pass(csprs, leniency); + errors.extend(cip_errors); + maybe_tokens + } else { + None + }; + (tokens, errors) } diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index c227290..1ad1e6b 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -7,28 +7,33 @@ extern crate core; pub mod lexer; pub mod parser; -pub mod ir; -pub mod error; -pub mod util; -pub mod complete; -pub mod analysis; +pub mod assembler; +pub mod linker; -pub mod new; +type Span = std::ops::Range; +type Spanned = (T, Span); +#[derive(Copy, Clone)] +pub enum LeniencyLevel { + Lenient, + Strict +} #[cfg(test)] mod tests { use super::*; - use lexer::Lexer; - use crate::parser::parse; - use crate::parser::LeniencyLevel::Lenient; #[test] fn simple() { - let src = ".ORIG x3000;\nLABEL ADD R0, R0, #0\n.end"; - let tokens = Lexer::new(src); - let cst = parse(tokens, Lenient); - println!("{:?}", cst); + let src = ".ORIG x3000;\nLABEL ADD R0, R0, #7000\n.end"; + let (tokens, lex_errs) = lexer::lex(src, LeniencyLevel::Lenient); + println!("{:?}", tokens); + println!("{:?}", lex_errs); + + let parse_results = tokens.map(|ts| parser::parse(src, ts, LeniencyLevel::Strict)); + if let Some((program, parse_errs)) = parse_results { + println!("{:?}", program); + println!("{:?}", parse_errs); + } } - } diff --git a/assembler/src/new/linker.rs b/assembler/src/linker.rs similarity index 95% rename from assembler/src/new/linker.rs rename to assembler/src/linker.rs index 90baa29..0e6abb0 100644 --- a/assembler/src/new/linker.rs +++ b/assembler/src/linker.rs @@ -3,8 +3,7 @@ use chumsky::chain::Chain; use chumsky::Parser; use lc3_isa::util::MemoryDump; use lc3_isa::{Addr, ADDR_SPACE_SIZE_IN_WORDS, Word}; -use crate::new::assembler::try_assemble; -use super::assembler::{SymbolTable, Object, ObjectWord, AssemblyResult}; +use crate::assembler::{try_assemble, SymbolTable, Object, ObjectWord, AssemblyResult}; struct LinkedObject { origin: Addr, diff --git a/assembler/src/new/lexer.rs b/assembler/src/new/lexer.rs deleted file mode 100644 index b31a074..0000000 --- a/assembler/src/new/lexer.rs +++ /dev/null @@ -1,369 +0,0 @@ -use chumsky::prelude::*; -use lc3_isa::{Addr, Reg, SignedWord, Word}; -use std::convert::{TryFrom, TryInto}; -use super::Spanned; -use std::num::TryFromIntError; -use chumsky::Stream; -use crate::new::LeniencyLevel; - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub enum Token { - Opcode(Opcode), - Register(Reg), - UnqualifiedNumberLiteral(Word), - NumberLiteral(LiteralValue), - StringLiteral(String), - Label(String), - - Newline, - Comma, - - Comment, -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub enum LiteralValue { - Word(Word), - SignedWord(SignedWord), -} - -impl TryFrom for Addr { - type Error = TryFromIntError; - - fn try_from(value: LiteralValue) -> Result { - match value { - LiteralValue::Word(word) => Ok(word), - LiteralValue::SignedWord(signed_word) => signed_word.try_into(), - } - } -} - -impl TryFrom for SignedWord { - type Error = TryFromIntError; - - fn try_from(value: LiteralValue) -> Result { - match value { - LiteralValue::Word(word) => word.try_into(), - LiteralValue::SignedWord(signed_word) => Ok(signed_word), - } - } -} - -impl TryFrom for u8 { - type Error = TryFromIntError; - - fn try_from(value: LiteralValue) -> Result { - match value { - LiteralValue::Word(word) => word.try_into(), - LiteralValue::SignedWord(signed_word) => signed_word.try_into(), - } - } -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct ConditionCodes { - pub(crate) n: bool, - pub(crate) z: bool, - pub(crate) p: bool, -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub enum Opcode { - Add, - And, - Br(ConditionCodes), - Jmp, - Jsr, - Jsrr, - Ld, - Ldi, - Ldr, - Lea, - Not, - Ret, - Rti, - St, - Sti, - Str, - Trap, - - // Pseudo-ops - Orig, - Fill, - Blkw, - Stringz, - End, - - // Named TRAP routines - Getc, - Out, - Puts, - In, - Putsp, - Halt, -} - -enum CaseSensitivePassResult { - CaseInsensitiveSource(String), - CaseSensitiveToken(Token), -} - -fn number_literal_with_base(base: u32, prefix: char, leniency: LeniencyLevel) -> impl Parser> { - let strict_literal = - just(prefix) - .ignore_then(just('-').ignored().or_not()) - .then(text::digits(base)) - .try_map(move |(maybe_sign, digits): (Option<()>, String), span| { - let parse_result = if maybe_sign.is_some() { - SignedWord::from_str_radix(&format!("-{}", digits), base) - .map(LiteralValue::SignedWord) - } else { - Word::from_str_radix(&digits, base) - .map(LiteralValue::Word) - }; - parse_result.map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow - }); - let literal: Box>> = - match leniency { - LeniencyLevel::Lenient => Box::new(just("0").or_not().ignore_then(strict_literal)), - LeniencyLevel::Strict => Box::new(strict_literal), - }; - literal -} - -fn one_opcode(pattern: &'static str, output_opcode: Opcode) -> impl Parser> { - just_to(pattern, output_opcode) -} - -fn one_register(pattern: &'static str, output_reg: Reg) -> impl Parser> { - just_to(pattern, output_reg) -} - -fn just_to(pattern: &'static str, output: O) -> impl Parser> { - just(pattern).to(output) -} - -fn string_literal() -> impl Parser> { - // `escape` and `string_literal` are based on JSON parser example - // https://github.com/zesterer/chumsky/blob/d4102128315d9dbbea901a91dc5eaa0fc9a790f7/examples/json.rs#L39 - let escape = just::<_, _, Simple>('\\').ignore_then( - just('\\') - .or(just('"')) - .or(just('b').to('\x08')) - .or(just('f').to('\x0C')) - .or(just('n').to('\n')) - .or(just('r').to('\r')) - .or(just('t').to('\t')) - ); - - just('"') - .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated()) - .then_ignore(just('"')) - .collect::() - .map(Token::StringLiteral) -} - -fn comment() -> impl Parser> { - just(';') - .then(filter(|c| !is_newline(c)).repeated()) - .to(Token::Comment) -} - -fn tokens(leniency: LeniencyLevel) -> impl Parser>, Error=Simple> { - let newline = text::newline() - .to(Token::Newline); - - use Opcode::*; - let branch_opcode = - just("BR") - .ignore_then(one_of("NZP").repeated().at_most(3)) - .map::(|cond_code_chars| { - let cond_codes = - if cond_code_chars.is_empty() { - ConditionCodes { n: true, z: true, p: true } - } else { - let n = cond_code_chars.contains(&'N'); - let z = cond_code_chars.contains(&'Z'); - let p = cond_code_chars.contains(&'P'); - ConditionCodes { n, z, p } - }; - Br(cond_codes) - }); - - // These options are separated by `or` instead of all belonging - // to one tuple passed to `choice` because `choice` only supports - // tuples with up to 26 elements. - // The grouping by 'opcode type' was chosen arbitrarily. - let opcode = choice(( - one_opcode("ADD", Add), - one_opcode("AND", And), - branch_opcode, - one_opcode("JMP", Jmp), - one_opcode("JSRR", Jsrr), - one_opcode("JSR", Jsr), - one_opcode("LDI", Ldi), - one_opcode("LDR", Ldr), - one_opcode("LD", Ld), - one_opcode("LEA", Lea), - one_opcode("NOT", Not), - one_opcode("RET", Ret), - one_opcode("RTI", Rti), - one_opcode("STI", Sti), - one_opcode("STR", Str), - one_opcode("ST", St), - one_opcode("TRAP", Trap), - )) - .or(choice(( - one_opcode("GETC", Getc), - one_opcode("OUT", Out), - one_opcode("PUTSP", Putsp), - one_opcode("PUTS", Puts), - one_opcode("IN", In), - one_opcode("HALT", Halt), - ))) - .or(choice(( - one_opcode(".ORIG", Orig), - one_opcode(".FILL", Fill), - one_opcode(".BLKW", Blkw), - one_opcode(".STRINGZ", Stringz), - one_opcode(".END", End), - ))) - .map(Token::Opcode); - - use Reg::*; - let register = choice(( - one_register("R0", R0), - one_register("R1", R1), - one_register("R2", R2), - one_register("R3", R3), - one_register("R4", R4), - one_register("R5", R5), - one_register("R6", R6), - one_register("R7", R7), - )) - .map(Token::Register); - - let unqualified_number_literal_base = 10; - let unqualified_number_literal = text::digits(unqualified_number_literal_base) - .try_map(move |digits: String, span| { - Word::from_str_radix(&digits, unqualified_number_literal_base) - .map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow - }) - .map(Token::UnqualifiedNumberLiteral); - - let number_literal = choice(( - number_literal_with_base(2, 'B', leniency), - number_literal_with_base(10, '#', leniency), - number_literal_with_base(16, 'X', leniency), - )) - .map(Token::NumberLiteral); - - let label = text::ident() // C-style identifier. Follows all LC-3 label rules but allows arbitrary length and underscores. - .map(Token::Label); // TODO: validate length, underscores in strict mode - - let comma = just(',') - .to(Token::Comma); - - let token = choice(( - opcode, - register, - number_literal, - unqualified_number_literal, - string_literal(), - label, - newline, - comma, - comment(), - )) - .recover_with(skip_then_retry_until([])); // TODO: improve? - - let non_newline_whitespace = - filter(|c: &char| c.is_whitespace() && !is_newline(c)).repeated(); - - token - .map_with_span(|token, span| (token, span)) - .padded_by(non_newline_whitespace) - .repeated() - .then_ignore(end()) -} - -fn is_newline(c: &char) -> bool { - // All line breaks matched by chumsky::text::newline - ['\n', - '\r', - '\x0B', // Vertical tab - '\x0C', // Form feed - '\u{0085}', // Next line - '\u{2028}', // Line separator - '\u{2029}', ].contains(c) // Paragraph separator -} - -fn case_sensitive_tokens() -> impl Parser>, Error=Simple> { - let case_sensitive_token = - choice(( - string_literal(), - comment() - )) - .map(|t| CaseSensitivePassResult::CaseSensitiveToken(t)); - - let case_insensitive_source = - filter(|c| *c != '"' && *c != ';').repeated().at_least(1) - .collect() - .map(|s| CaseSensitivePassResult::CaseInsensitiveSource(s)); - - case_insensitive_source - .or(case_sensitive_token) - .map_with_span(|cspr, s| (cspr, s)) - .repeated() - .then_ignore(end()) -} - -fn case_sensitive_pass(source: &str) -> (Option>>, Vec>) { - case_sensitive_tokens().parse_recovery_verbose(source) -} - -fn case_insensitive_pass(case_sensitive_pass_results: Vec>, leniency: LeniencyLevel) -> (Option>>, Vec>) { - let mut toks: Option>> = None; - let mut errors = Vec::new(); - - for (cspr, span) in case_sensitive_pass_results { - match cspr { - CaseSensitivePassResult::CaseInsensitiveSource(s) => { - // TODO: profile CPU + memory to see whether this introduces any inefficiencies. - // This allows chumsky to correctly track spans while parsing this substring. - let uppercase_s = s.to_uppercase(); - let spanned_char_stream = uppercase_s.chars() - .enumerate() - .map(|(i, c)| { - let pos = span.start + i; - (c, pos..(pos + 1)) - }); - let stream = Stream::from_iter(span.end..(span.end + 1), spanned_char_stream); - let (maybe_tokens, lex_errors) = tokens(leniency).parse_recovery_verbose(stream); - - if let Some(ts) = maybe_tokens { - toks.get_or_insert(Vec::new()).extend(ts); - } - errors.extend(lex_errors); - } - CaseSensitivePassResult::CaseSensitiveToken(t) => { - toks.get_or_insert(Vec::new()).push((t, span)); - } - } - } - - (toks, errors) -} - -pub fn lex(source: &str, leniency: LeniencyLevel) -> (Option>>, Vec>) { - let (maybe_csprs, mut errors) = case_sensitive_pass(source); - let tokens = - if let Some(csprs) = maybe_csprs { - let (maybe_tokens, cip_errors) = case_insensitive_pass(csprs, leniency); - errors.extend(cip_errors); - maybe_tokens - } else { - None - }; - (tokens, errors) -} diff --git a/assembler/src/new/mod.rs b/assembler/src/new/mod.rs deleted file mode 100644 index 86453b9..0000000 --- a/assembler/src/new/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -pub mod lexer; -pub mod parser; -pub mod assembler; -pub mod linker; - -type Span = std::ops::Range; -type Spanned = (T, Span); - -#[derive(Copy, Clone)] -pub enum LeniencyLevel { - Lenient, - Strict -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn simple() { - let src = ".ORIG x3000;\nLABEL ADD R0, R0, #7000\n.end"; - let (tokens, lex_errs) = lexer::lex(src, LeniencyLevel::Lenient); - println!("{:?}", tokens); - println!("{:?}", lex_errs); - - let parse_results = tokens.map(|ts| parser::parse(src, ts, LeniencyLevel::Strict)); - if let Some((program, parse_errs)) = parse_results { - println!("{:?}", program); - println!("{:?}", parse_errs); - } - } -} \ No newline at end of file diff --git a/assembler/src/new/parser.rs b/assembler/src/new/parser.rs deleted file mode 100644 index 70d1544..0000000 --- a/assembler/src/new/parser.rs +++ /dev/null @@ -1,212 +0,0 @@ -use std::convert::TryFrom; -use chumsky::prelude::*; -use chumsky::Stream; -use crate::new::LeniencyLevel; -use crate::new::lexer::{LiteralValue, Opcode, Token}; -use super::Spanned; -use lc3_isa::{Reg, Word}; - -pub(crate) type WithErrData = Spanned>>; - -#[derive(Debug)] -pub struct Program { - pub(crate) orig: WithErrData, - pub(crate) instructions: Vec>, - end: WithErrData, -} - -#[derive(Debug)] -pub(crate) struct Instruction { - pub(crate) label: Option>, - pub(crate) opcode: WithErrData, - pub(crate) operands: WithErrData>>, -} - -#[derive(Clone, Debug)] -pub(crate) enum Operand { - Register(Reg), - UnqualifiedNumberLiteral(Word), - NumberLiteral(LiteralValue), - StringLiteral(String), - Label(String), -} - -impl TryFrom for Reg { - type Error = (); - - fn try_from(e: Operand) -> Result { - if let Operand::Register(r) = e { - Ok(r) - } else { - Err(()) - } - } -} - -impl TryFrom for LiteralValue { - type Error = (); - - fn try_from(e: Operand) -> Result { - if let Operand::NumberLiteral(v) = e { - Ok(v) - } else { - Err(()) - } - } -} - -impl Operand { - pub(crate) fn string(self) -> String { - if let Self::StringLiteral(s) = self { - s - } else { - panic!("Not a string literal") - } - } - - pub(crate) fn label(self) -> String { - if let Self::Label(l) = self { - l - } else { - panic!("Not a label") - } - } - - pub(crate) fn unqualified_number_value(self) -> Word { - if let Self::UnqualifiedNumberLiteral(w) = self { - w - } else { - panic!("Not an unqualified number literal") - } - } -} - -fn operand() -> impl Parser, Error = Simple> { - let operand = select! { - Token::Register(reg) => Operand::Register(reg), - Token::UnqualifiedNumberLiteral(val) => Operand::UnqualifiedNumberLiteral(val), - Token::NumberLiteral(val) => Operand::NumberLiteral(val), - Token::StringLiteral(s) => Operand::StringLiteral(s), - Token::Label(s) => Operand::Label(s), - }; - operand.map_with_span(|o, span| (o, span)) -} - -fn any_opcode_but(denied: Opcode) -> impl Parser, Error = Simple> { - filter_map(move |span, t: Token| - if let Token::Opcode(o) = t.clone() { - if o == denied { - Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected - } else { - Ok(o) - } - } else { - Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected - }) - .map_with_span(|o, span| (o, span)) -} - -fn opcode(expected: Opcode) -> impl Parser, Error = Simple> { - let expected_token = Token::Opcode(expected); - filter_map(move |span, t| - if t == expected_token { - if let Token::Opcode(o) = t { - Ok(o) - } else { unreachable!() } - } else { - Err(Simple::expected_input_found(span, [Some(expected_token.clone())], Some(t))) - }) - .map_with_span(|o, span| (o, span)) -} - -enum OpcodeFilter { - OnlyOrig, - AnyButEnd, - OnlyEnd, -} - -fn instruction(oc_filter: OpcodeFilter, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { - let label = - select! { Token::Label(s) => s } - .map_with_span(|s, span| (Ok(s), span)) - .or_not(); - - use OpcodeFilter::*; - let oc: Box, Error = Simple>> = - match oc_filter { - OnlyOrig => Box::new(opcode(Opcode::Orig)), - AnyButEnd => Box::new(any_opcode_but(Opcode::End)), - OnlyEnd => Box::new(opcode(Opcode::End)), - }; - let oc_with_err_data = oc.map(|(oc, span)| (Ok(oc), span)); - - let operand_separator: Box>> = - match leniency { - LeniencyLevel::Lenient => Box::new(just(Token::Comma).or_not().ignored()), - LeniencyLevel::Strict => Box::new(just(Token::Comma).ignored()), - }; - - let operands = - operand() - .map(|(o, span)| (Ok(o), span)) - .separated_by(operand_separator) - .map_with_span(|os, span| (Ok(os), span)); - - label - .then_ignore(just(Token::Newline).repeated()) - .then(oc_with_err_data) - .then(operands) - .map_with_span(|((l, o), os), span| { - let instruction = Instruction { - label: l, - opcode: o, - operands: os, - }; - (instruction, span) - }) -} - -fn comments_and_newlines() -> impl Parser> { - just(Token::Comment).or_not() - .then(just(Token::Newline).repeated().at_least(1)) - .repeated() - .ignored() -} - -fn program(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { - comments_and_newlines() - .ignore_then( - instruction(OpcodeFilter::OnlyOrig, leniency) - .map(|(i, span)| (Ok(i), span))) - .then( - instruction(OpcodeFilter::AnyButEnd, leniency) - .map(|(i, span)| (Ok(i), span)) - .separated_by(comments_and_newlines()) - .allow_leading() - .allow_trailing() - ) - .then( - instruction(OpcodeFilter::OnlyEnd, leniency) - .map(|(i, span)| (Ok(i), span))) - .then_ignore(comments_and_newlines()) - .then_ignore(end()) - .map_with_span(|((orig, instructions), end), span| { - (Program { orig, instructions, end }, span) - }) -} - -type File = Vec>; - -fn file(leniency: LeniencyLevel) -> impl Parser>>, Error = Simple> { - program(leniency) - .map(|(p, span)| (Ok(p), span)) - .separated_by(comments_and_newlines()) - .allow_leading() - .allow_trailing() - .map_with_span(|programs, span| (programs, span)) -} - -pub fn parse(src: &str, tokens: Vec>, leniency: LeniencyLevel) -> (Option>, Vec>) { - let len = src.chars().count(); - file(leniency).parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) -} diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index f165385..9ac3160 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -1,33 +1,213 @@ -use crate::lexer::Lexer; -use crate::ir::ir1_parse_lines::parse_lines; -use crate::ir::ir2_parse_line_syntax::parse_line_syntax; -use crate::ir::ir3_parse_objects::parse_objects; -use crate::ir::ir4_parse_ambiguous_tokens; -use crate::ir::ir4_parse_ambiguous_tokens::AmbiguousTokenParser; -use crate::complete; -use crate::ir::ir5_expand_pseudo_ops::expand_pseudo_ops; -use crate::complete::construct_all_instructions; - -pub fn parse(tokens: Lexer, leniency: LeniencyLevel) -> complete::Program { - let ir1 = parse_lines(tokens); - let ir2 = parse_line_syntax(ir1); - let ir3 = parse_objects(ir2); - let ir4 = AmbiguousTokenParser { leniency }.parse_ambiguous_tokens(ir3); - let ir5 = expand_pseudo_ops(ir4); - construct_all_instructions(ir5) -} - -// TODO: impl Default? -pub enum LeniencyLevel { - Lenient, - Strict, -} - -impl LeniencyLevel { - pub fn long_labels_allowed(&self) -> bool { - match self { - LeniencyLevel::Lenient => true, - LeniencyLevel::Strict => false +use std::convert::TryFrom; +use chumsky::prelude::*; +use chumsky::Stream; +use lc3_isa::{Reg, Word}; + +use crate::Spanned; +use crate::LeniencyLevel; +use crate::lexer::{LiteralValue, Opcode, Token}; + +pub(crate) type WithErrData = Spanned>>; + +#[derive(Debug)] +pub struct Program { + pub(crate) orig: WithErrData, + pub(crate) instructions: Vec>, + end: WithErrData, +} + +#[derive(Debug)] +pub(crate) struct Instruction { + pub(crate) label: Option>, + pub(crate) opcode: WithErrData, + pub(crate) operands: WithErrData>>, +} + +#[derive(Clone, Debug)] +pub(crate) enum Operand { + Register(Reg), + UnqualifiedNumberLiteral(Word), + NumberLiteral(LiteralValue), + StringLiteral(String), + Label(String), +} + +impl TryFrom for Reg { + type Error = (); + + fn try_from(e: Operand) -> Result { + if let Operand::Register(r) = e { + Ok(r) + } else { + Err(()) + } + } +} + +impl TryFrom for LiteralValue { + type Error = (); + + fn try_from(e: Operand) -> Result { + if let Operand::NumberLiteral(v) = e { + Ok(v) + } else { + Err(()) + } + } +} + +impl Operand { + pub(crate) fn string(self) -> String { + if let Self::StringLiteral(s) = self { + s + } else { + panic!("Not a string literal") + } + } + + pub(crate) fn label(self) -> String { + if let Self::Label(l) = self { + l + } else { + panic!("Not a label") + } + } + + pub(crate) fn unqualified_number_value(self) -> Word { + if let Self::UnqualifiedNumberLiteral(w) = self { + w + } else { + panic!("Not an unqualified number literal") } } } + +fn operand() -> impl Parser, Error = Simple> { + let operand = select! { + Token::Register(reg) => Operand::Register(reg), + Token::UnqualifiedNumberLiteral(val) => Operand::UnqualifiedNumberLiteral(val), + Token::NumberLiteral(val) => Operand::NumberLiteral(val), + Token::StringLiteral(s) => Operand::StringLiteral(s), + Token::Label(s) => Operand::Label(s), + }; + operand.map_with_span(|o, span| (o, span)) +} + +fn any_opcode_but(denied: Opcode) -> impl Parser, Error = Simple> { + filter_map(move |span, t: Token| + if let Token::Opcode(o) = t.clone() { + if o == denied { + Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected + } else { + Ok(o) + } + } else { + Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected + }) + .map_with_span(|o, span| (o, span)) +} + +fn opcode(expected: Opcode) -> impl Parser, Error = Simple> { + let expected_token = Token::Opcode(expected); + filter_map(move |span, t| + if t == expected_token { + if let Token::Opcode(o) = t { + Ok(o) + } else { unreachable!() } + } else { + Err(Simple::expected_input_found(span, [Some(expected_token.clone())], Some(t))) + }) + .map_with_span(|o, span| (o, span)) +} + +enum OpcodeFilter { + OnlyOrig, + AnyButEnd, + OnlyEnd, +} + +fn instruction(oc_filter: OpcodeFilter, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { + let label = + select! { Token::Label(s) => s } + .map_with_span(|s, span| (Ok(s), span)) + .or_not(); + + use OpcodeFilter::*; + let oc: Box, Error = Simple>> = + match oc_filter { + OnlyOrig => Box::new(opcode(Opcode::Orig)), + AnyButEnd => Box::new(any_opcode_but(Opcode::End)), + OnlyEnd => Box::new(opcode(Opcode::End)), + }; + let oc_with_err_data = oc.map(|(oc, span)| (Ok(oc), span)); + + let operand_separator: Box>> = + match leniency { + LeniencyLevel::Lenient => Box::new(just(Token::Comma).or_not().ignored()), + LeniencyLevel::Strict => Box::new(just(Token::Comma).ignored()), + }; + + let operands = + operand() + .map(|(o, span)| (Ok(o), span)) + .separated_by(operand_separator) + .map_with_span(|os, span| (Ok(os), span)); + + label + .then_ignore(just(Token::Newline).repeated()) + .then(oc_with_err_data) + .then(operands) + .map_with_span(|((l, o), os), span| { + let instruction = Instruction { + label: l, + opcode: o, + operands: os, + }; + (instruction, span) + }) +} + +fn comments_and_newlines() -> impl Parser> { + just(Token::Comment).or_not() + .then(just(Token::Newline).repeated().at_least(1)) + .repeated() + .ignored() +} + +fn program(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { + comments_and_newlines() + .ignore_then( + instruction(OpcodeFilter::OnlyOrig, leniency) + .map(|(i, span)| (Ok(i), span))) + .then( + instruction(OpcodeFilter::AnyButEnd, leniency) + .map(|(i, span)| (Ok(i), span)) + .separated_by(comments_and_newlines()) + .allow_leading() + .allow_trailing() + ) + .then( + instruction(OpcodeFilter::OnlyEnd, leniency) + .map(|(i, span)| (Ok(i), span))) + .then_ignore(comments_and_newlines()) + .then_ignore(end()) + .map_with_span(|((orig, instructions), end), span| { + (Program { orig, instructions, end }, span) + }) +} + +type File = Vec>; + +fn file(leniency: LeniencyLevel) -> impl Parser>>, Error = Simple> { + program(leniency) + .map(|(p, span)| (Ok(p), span)) + .separated_by(comments_and_newlines()) + .allow_leading() + .allow_trailing() + .map_with_span(|programs, span| (programs, span)) +} + +pub fn parse(src: &str, tokens: Vec>, leniency: LeniencyLevel) -> (Option>, Vec>) { + let len = src.chars().count(); + file(leniency).parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) +} diff --git a/assembler/src/util.rs b/assembler/src/util.rs deleted file mode 100644 index 5577c09..0000000 --- a/assembler/src/util.rs +++ /dev/null @@ -1,12 +0,0 @@ -use crate::lexer::Token; -use itertools::Itertools; - -pub(crate) fn reconstruct_src<'input>(tokens: impl IntoIterator>) -> String { - let mut vec = tokens.into_iter().collect::>(); - vec.sort_by_key(|token| token.span.0); - vec.dedup(); - vec.into_iter() - .map(|token| token.src) - .join("") -} - diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 93bd3cf..6ff2385 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -1,10 +1,9 @@ extern crate lc3_assembler; -use lc3_assembler::lexer::Lexer; -use lc3_assembler::parser::parse; use lc3_isa::{ADDR_MAX_VAL, Word}; use std::ops::Index; use lc3_isa::util::MemoryDump; +use lc3_assembler::{assembler, lexer, linker, parser, LeniencyLevel}; #[test] fn load_store_medium() { @@ -265,11 +264,8 @@ mod single_instruction { } fn test(input: &str, orig: usize, expected_mem: &[Word]) { - use lc3_assembler::new::*; - let (maybe_tokens, lex_errs) = lexer::lex(input, LeniencyLevel::Lenient); let tokens = maybe_tokens.expect("lexing failed"); - println!("{:?}", tokens); let (maybe_file, parse_errs) = parser::parse(input, tokens, LeniencyLevel::Lenient); let (mut file, span) = maybe_file.expect("parsing failed"); From aab715aa5e90815a49b0fcae5428112802a1d269 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 18 May 2022 11:57:17 -0500 Subject: [PATCH 29/82] assembler: allow assembling onto OS image again --- assembler/bin/as.rs | 2 +- assembler/src/linker.rs | 8 ++++++-- assembler/tests/integ.rs | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index f355538..3f4c39a 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -54,7 +54,7 @@ fn as_() { let background = if matches.is_present("with_os") { Some(lc3_os::OS_IMAGE.clone()) } else { None }; let object = assemble(program); - let mem = link([object]); + let mem = link([object], background); let mut output_path = PathBuf::from(path_str); output_path.set_extension(MEM_DUMP_FILE_EXTENSION); diff --git a/assembler/src/linker.rs b/assembler/src/linker.rs index 0e6abb0..8bc6b72 100644 --- a/assembler/src/linker.rs +++ b/assembler/src/linker.rs @@ -54,7 +54,7 @@ fn link_object(symbol_table: &SymbolTable, object: Object) -> LinkedObject { LinkedObject { origin, words } } -pub fn link(objects: impl IntoIterator) -> MemoryDump { +pub fn link(objects: impl IntoIterator, background: Option) -> MemoryDump { let objects = objects.into_iter().collect::>(); let mut symbol_table = HashMap::new(); @@ -64,7 +64,11 @@ pub fn link(objects: impl IntoIterator) -> MemoryDump { } } - let mut image = [0; ADDR_SPACE_SIZE_IN_WORDS]; + let mut image = + match background { + Some(mem) => mem.0, + None => [0; ADDR_SPACE_SIZE_IN_WORDS] + }; for object in objects { let linked_object = link_object(&symbol_table, object); layer_object(&mut image, linked_object); diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 6ff2385..223e990 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -273,7 +273,7 @@ fn test(input: &str, orig: usize, expected_mem: &[Word]) { let program = file.remove(0).0.expect("parse error in program"); let object = assembler::assemble(program); - let mem = linker::link([object]); + let mem = linker::link([object], None); for i in 0..orig { assert_mem(&mem, i, 0x0000); From 418c328c6f0728f7c3f3332ddfefa04193285be9 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 18 May 2022 12:04:43 -0500 Subject: [PATCH 30/82] assembler: assemble all objects in given file (no overlap check) --- assembler/bin/as.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 3f4c39a..9c734be 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -45,16 +45,17 @@ fn as_() { let (maybe_file, parse_errs) = parse(src, tokens, leniency); let (mut file, span) = maybe_file.expect("parsing failed"); - assert_eq!(1, file.len(), "parsed unexpected number of programs: {}", file.len()); - let program = file.remove(0).0.expect("parse error in program"); if matches.is_present("check") { println!("{}: No errors found.", path_str); } else { let background = if matches.is_present("with_os") { Some(lc3_os::OS_IMAGE.clone()) } else { None }; - let object = assemble(program); - let mem = link([object], background); + let objects = + file.into_iter() + .map(|program| assemble(program.0.unwrap())); + + let mem = link(objects, background); let mut output_path = PathBuf::from(path_str); output_path.set_extension(MEM_DUMP_FILE_EXTENSION); From 31478b3a13dd825442359062f051b97f501ee0c7 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 18 May 2022 13:49:05 -0500 Subject: [PATCH 31/82] assembler: require terminators for opcodes et al to emulate max munch --- assembler/bin/as.rs | 2 +- assembler/src/lexer.rs | 21 +++++++++++++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 9c734be..f4d42ea 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -54,7 +54,7 @@ fn as_() { let objects = file.into_iter() .map(|program| assemble(program.0.unwrap())); - + let mem = link(objects, background); let mut output_path = PathBuf::from(path_str); diff --git a/assembler/src/lexer.rs b/assembler/src/lexer.rs index 5e40cb7..6b5bb4a 100644 --- a/assembler/src/lexer.rs +++ b/assembler/src/lexer.rs @@ -174,6 +174,16 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err let newline = text::newline() .to(Token::Newline); + let comma = just(',') + .to(Token::Comma); + + let non_newline_whitespace = + filter(|c: &char| c.is_whitespace() && !is_newline(c)).repeated(); + + let terminator = + filter(|c: &char| c.is_whitespace() || *c == ',' || *c == ';').ignored() + .or(end().ignored()); + use Opcode::*; let branch_opcode = just("BR") @@ -229,6 +239,7 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err one_opcode(".STRINGZ", Stringz), one_opcode(".END", End), ))) + .then_ignore(terminator.clone().rewind()) .map(Token::Opcode); use Reg::*; @@ -242,6 +253,7 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err one_register("R6", R6), one_register("R7", R7), )) + .then_ignore(terminator.clone().rewind()) .map(Token::Register); let unqualified_number_literal_base = 10; @@ -250,6 +262,7 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err Word::from_str_radix(&digits, unqualified_number_literal_base) .map_err(|e| Simple::custom(span, e.to_string())) // TODO: parse error should only be on overflow or underflow }) + .then_ignore(terminator.clone().rewind()) .map(Token::UnqualifiedNumberLiteral); let number_literal = choice(( @@ -257,14 +270,13 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err number_literal_with_base(10, '#', leniency), number_literal_with_base(16, 'X', leniency), )) + .then_ignore(terminator.clone().rewind()) .map(Token::NumberLiteral); let label = text::ident() // C-style identifier. Follows all LC-3 label rules but allows arbitrary length and underscores. + .then_ignore(terminator.rewind()) .map(Token::Label); // TODO: validate length, underscores in strict mode - let comma = just(',') - .to(Token::Comma); - let token = choice(( opcode, register, @@ -278,9 +290,6 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err )) .recover_with(skip_then_retry_until([])); // TODO: improve? - let non_newline_whitespace = - filter(|c: &char| c.is_whitespace() && !is_newline(c)).repeated(); - token .map_with_span(|token, span| (token, span)) .padded_by(non_newline_whitespace) From 2d2785ab94dc25760a32fb10d29310ae8b47f663 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 18 May 2022 14:51:58 -0500 Subject: [PATCH 32/82] assembler: add label lexing tests --- assembler/tests/integ.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 223e990..1654e99 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -63,6 +63,12 @@ mod single_instruction { commented: "ADD R0 R0 R0 ; comment" => 0x1000, } + single_instruction_tests! { labels + minimal: "A ADD R0 R0 R0" => 0x1000, + begins_with_opcode: "ADDER ADD R0 R0 R0" => 0x1000, + begins_with_trap: "INIT ADD R0 R0 R0" => 0x1000, + } + single_instruction_tests! { add minimal: "ADD R0 R0 R0" => 0x1000, r1_2_3: "ADD R1 R2 R3" => 0x1283, From a5ce20c8d7d709f662cc0c3e56d051166aa892bb Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 18 May 2022 17:20:47 -0500 Subject: [PATCH 33/82] assembler: make lexer error-tolerant --- assembler/src/lexer.rs | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/assembler/src/lexer.rs b/assembler/src/lexer.rs index 6b5bb4a..9edffbc 100644 --- a/assembler/src/lexer.rs +++ b/assembler/src/lexer.rs @@ -20,6 +20,8 @@ pub enum Token { Comma, Comment, + + Error, } #[derive(Clone, Debug, Eq, Hash, PartialEq)] @@ -288,7 +290,7 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err comma, comment(), )) - .recover_with(skip_then_retry_until([])); // TODO: improve? + .recover_with(skip_until([',', ';', ' ', '\t', '\n', '\r', '\x0B', '\x0C', '\u{0085}', '\u{2028}', '\u{2029}'], |_| Token::Error)); // TODO: improve? token .map_with_span(|token, span| (token, span)) @@ -377,3 +379,40 @@ pub fn lex(source: &str, leniency: LeniencyLevel) -> (Option> }; (tokens, errors) } + + +#[cfg(test)] +mod tests { + use super::*; + use Token::*; + use Reg::*; + use crate::lexer::Opcode::*; + + #[test] + fn lone_error() { + let source = "#OOPS"; + let (tokens, _) = lex(source, LeniencyLevel::Lenient); + assert_eq!( + Some(vec![ + (Error, 0..5), + ]), + tokens); + } + + #[test] + fn error_in_context() { + let source = "ADD R0, R0, #OOPS; <- error"; + let (tokens, _) = lex(source, LeniencyLevel::Lenient); + assert_eq!( + Some(vec![ + (Opcode(Add), 0.. 3), + (Register(R0), 4.. 6), + (Comma, 6.. 7), + (Register(R0), 8..10), + (Comma, 10..11), + (Error, 12..17), + (Comment, 17..27), + ]), + tokens); + } +} \ No newline at end of file From e84d908c04ddd866c9e2d821f5daabb3e7d93fe1 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Thu, 19 May 2022 00:30:13 -0500 Subject: [PATCH 34/82] assembler: make parser aware of lex errors --- assembler/src/parser.rs | 105 +++++++++++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 29 deletions(-) diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index 9ac3160..cab557f 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -7,23 +7,23 @@ use crate::Spanned; use crate::LeniencyLevel; use crate::lexer::{LiteralValue, Opcode, Token}; -pub(crate) type WithErrData = Spanned>>; +pub(crate) type WithErrData = Spanned>; -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub struct Program { pub(crate) orig: WithErrData, pub(crate) instructions: Vec>, end: WithErrData, } -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub(crate) struct Instruction { pub(crate) label: Option>, pub(crate) opcode: WithErrData, pub(crate) operands: WithErrData>>, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub(crate) enum Operand { Register(Reg), UnqualifiedNumberLiteral(Word), @@ -82,38 +82,42 @@ impl Operand { } } -fn operand() -> impl Parser, Error = Simple> { +fn operand() -> impl Parser, Error = Simple> { let operand = select! { - Token::Register(reg) => Operand::Register(reg), - Token::UnqualifiedNumberLiteral(val) => Operand::UnqualifiedNumberLiteral(val), - Token::NumberLiteral(val) => Operand::NumberLiteral(val), - Token::StringLiteral(s) => Operand::StringLiteral(s), - Token::Label(s) => Operand::Label(s), + Token::Register(reg) => Ok(Operand::Register(reg)), + Token::UnqualifiedNumberLiteral(val) => Ok(Operand::UnqualifiedNumberLiteral(val)), + Token::NumberLiteral(val) => Ok(Operand::NumberLiteral(val)), + Token::StringLiteral(s) => Ok(Operand::StringLiteral(s)), + Token::Label(s) => Ok(Operand::Label(s)), + Token::Error => Err(()), }; operand.map_with_span(|o, span| (o, span)) } -fn any_opcode_but(denied: Opcode) -> impl Parser, Error = Simple> { +fn any_opcode_but(denied: Opcode) -> impl Parser, Error = Simple> { filter_map(move |span, t: Token| - if let Token::Opcode(o) = t.clone() { - if o == denied { - Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected - } else { - Ok(o) - } - } else { - Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected + match t.clone() { + Token::Opcode(o) => + if o == denied { + Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected + } else { + Ok(Ok(o)) + }, + Token::Error => Ok(Err(())), + _ => Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected }) .map_with_span(|o, span| (o, span)) } -fn opcode(expected: Opcode) -> impl Parser, Error = Simple> { +fn opcode(expected: Opcode) -> impl Parser, Error = Simple> { let expected_token = Token::Opcode(expected); filter_map(move |span, t| if t == expected_token { if let Token::Opcode(o) = t { - Ok(o) + Ok(Ok(o)) } else { unreachable!() } + } else if let Token::Error = t { + Ok(Err(())) } else { Err(Simple::expected_input_found(span, [Some(expected_token.clone())], Some(t))) }) @@ -128,18 +132,19 @@ enum OpcodeFilter { fn instruction(oc_filter: OpcodeFilter, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { let label = - select! { Token::Label(s) => s } - .map_with_span(|s, span| (Ok(s), span)) - .or_not(); + select! { + Token::Label(s) => Ok(s), + Token::Error => Err(()) + } + .map_with_span(|l, s| (l, s)); use OpcodeFilter::*; - let oc: Box, Error = Simple>> = + let oc: Box, Error = Simple>> = match oc_filter { OnlyOrig => Box::new(opcode(Opcode::Orig)), AnyButEnd => Box::new(any_opcode_but(Opcode::End)), OnlyEnd => Box::new(opcode(Opcode::End)), }; - let oc_with_err_data = oc.map(|(oc, span)| (Ok(oc), span)); let operand_separator: Box>> = match leniency { @@ -149,13 +154,12 @@ fn instruction(oc_filter: OpcodeFilter, leniency: LeniencyLevel) -> impl Parser< let operands = operand() - .map(|(o, span)| (Ok(o), span)) .separated_by(operand_separator) .map_with_span(|os, span| (Ok(os), span)); - label + label.or_not() .then_ignore(just(Token::Newline).repeated()) - .then(oc_with_err_data) + .then(oc) .then(operands) .map_with_span(|((l, o), os), span| { let instruction = Instruction { @@ -211,3 +215,46 @@ pub fn parse(src: &str, tokens: Vec>, leniency: LeniencyLevel) -> let len = src.chars().count(); file(leniency).parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) } + + +#[cfg(test)] +mod tests { + use super::*; + use super::Operand::*; + use super::Reg::*; + use super::Opcode::*; + use crate::lexer::lex; + + #[test] + fn operand_error() { + let source = ".ORIG x3000\nADD R0, R0, #OOPS; <- error\n.END"; + let (maybe_tokens, _) = lex(source, LeniencyLevel::Lenient); + let tokens = maybe_tokens.unwrap(); + let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); + + assert_eq!(Some((vec![(Ok(Program { + orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), + instructions: vec![ + (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Err(()), 24..29)]), 16..29) }), 12..29) + ], + end: (Ok(Instruction { label: None, opcode: (Ok(End), 40..44), operands: (Ok(vec![]), 44..44) }), 40..44) }), 0..44)], 0..44)), + file); + } + + #[test] + fn label_error() { + let source = ".ORIG x3000\nA%DDER ADD R0, R0, #1; <- error\n.END"; + let (maybe_tokens, _) = lex(source, LeniencyLevel::Lenient); + let tokens = maybe_tokens.unwrap(); + let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); + + assert_eq!(Some((vec![(Ok(Program { + orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), + instructions: vec![ + (Ok(Instruction { label: Some((Err(()), 12..18)), opcode: (Ok(Add), 19..22), operands: (Ok(vec![(Ok(Register(R0)), 23..25), (Ok(Register(R0)), 27..29), (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33)]), 23..33) }), 12..33) + ], + end: (Ok(Instruction { label: None, opcode: (Ok(End), 44..48), operands: (Ok(vec![]), 48..48) }), 44..48) }), 0..48)], 0..48)), + file); + } + +} \ No newline at end of file From f53d1de10f895435b6e28907bcd94ae37ff1683c Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 21 May 2022 02:42:13 -0500 Subject: [PATCH 35/82] assembler: add basic operand type analysis --- assembler/src/analysis.rs | 150 ++++++++++++++++++++++++++++++++++++++ assembler/src/lib.rs | 1 + assembler/src/parser.rs | 4 +- 3 files changed, 153 insertions(+), 2 deletions(-) create mode 100644 assembler/src/analysis.rs diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs new file mode 100644 index 0000000..310f827 --- /dev/null +++ b/assembler/src/analysis.rs @@ -0,0 +1,150 @@ +use std::ops::Range; +use itertools::zip; +use crate::lexer::Opcode; +use crate::parser::{File, Instruction, Operand, Program, WithErrData}; +use crate::Spanned; + +type ErrorList = Vec>; + +use Error::*; +enum Error { + BadProgram, + BadInstruction, + BadLabel, + BadOpcode, + BadOperands, + BadOperand, + WrongNumberOfOperands { expected: usize, actual: usize }, + OperandTypeMismatch { expected: OperandType, actual: OperandType } +} + +use OperandType::*; +enum OperandType { + Register, + UnqualifiedNumber, + Number, + String, + Label, + Or(Box, Box) +} + +impl OperandType { + pub(crate) fn reg_or_imm() -> Self { + Or(Box::new(Register), Box::new(Number)) + } + + pub(crate) fn pc_offset() -> Self { + Or(Box::new(Label), Box::new(Number)) + } + + pub(crate) fn check(&self, operand: &Operand) -> bool { + match self { + Register => matches!(operand, Operand::Register(_)), + UnqualifiedNumber => matches!(operand, Operand::UnqualifiedNumberLiteral(_)), + Number => matches!(operand, Operand::NumberLiteral(_)), + String => matches!(operand, Operand::StringLiteral(_)), + Label => matches!(operand, Operand::Label(_)), + Or(t1, t2) => t1.check(operand) || t2.check(operand), + } + } + + pub(crate) fn of(operand: &Operand) -> Self { + match operand { + Operand::Register(_) => Register, + Operand::UnqualifiedNumberLiteral(_) => UnqualifiedNumber, + Operand::NumberLiteral(_) => Number, + Operand::StringLiteral(_) => String, + Operand::Label(_) => Label, + } + } +} + +fn check_result_then(errors: &mut ErrorList, wed: &WithErrData, error: Error, f: impl FnOnce(&mut ErrorList, &T, &Range)) { + let (res, span) = wed; + match res { + Err(_) => { errors.push((error, span.clone())); } + Ok(v) => { f(errors, v, span); } + } +} + +fn validate(file: &File) -> ErrorList { + let mut errors = Vec::new(); + for program in file { + validate_program(&mut errors, program); + } + errors +} + +fn validate_program(errors: &mut ErrorList, program: &WithErrData) { + check_result_then(errors, program, BadProgram, |es, prog, _| { + let Program { orig, instructions, end } = prog; + validate_instruction(es, orig); + for instruction in instructions { + validate_instruction(es, instruction); + } + validate_instruction(es, end); + }); +} + +fn validate_instruction(errors: &mut ErrorList, instruction: &WithErrData) { + check_result_then(errors, instruction, BadInstruction, |es, inst, _| { + let Instruction { label, opcode, operands } = inst; + + if let Some((Err(_), label_span)) = label { + es.push((BadLabel, label_span.clone())); + } + + let (oc_res, opcode_span) = opcode; + if let Err(_) = oc_res { + es.push((BadOpcode, opcode_span.clone())); + } + + use Opcode::*; + let expected_operands = match oc_res { + Err(_) => None, + Ok(oc) => Some(match oc { + Add | And => vec![Register, Register, OperandType::reg_or_imm()], + Br(_) | Jsr => vec![OperandType::pc_offset()], + Jmp | Jsrr => vec![Register], + Ld | Ldi | Lea + | St | Sti => vec![Register, OperandType::pc_offset()], + Ldr | Str => vec![Register, Register, Number], + Not => vec![Register, Register], + Ret | Rti + | Getc | Out + | Puts | In + | Putsp | Halt + | End => vec![], + Trap + | Orig | Blkw => vec![Number], + Fill => vec![Or(Box::new(Label), Box::new(Number))], + Stringz => vec![String], + }), + }; + validate_operands(es, operands, expected_operands); + }); +} + +fn validate_operands(errors: &mut ErrorList, operands: &WithErrData>>, expected_types: Option>) { + check_result_then(errors, operands, BadOperands, |es, ops, ops_span| { + if let Some(expected) = expected_types { + // TODO: create longest common subsequence diff for more precise errors + let ops_len = ops.len(); + let exp_len = expected.len(); + if ops_len != exp_len { + es.push((WrongNumberOfOperands { expected: exp_len, actual: ops_len }, ops_span.clone())) + } else { + for ((op_res, op_span), exp_ty) in zip(ops, expected) { + match op_res { + Err(_) => { es.push((BadOperand, op_span.clone())) } + Ok(op) => { + if !exp_ty.check(op) { + es.push((OperandTypeMismatch { expected: exp_ty, actual: OperandType::of(op) }, op_span.clone())); + } + } + } + } + } + } + }); +} \ No newline at end of file diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 1ad1e6b..00dbcf0 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -9,6 +9,7 @@ pub mod lexer; pub mod parser; pub mod assembler; pub mod linker; +pub mod analysis; type Span = std::ops::Range; type Spanned = (T, Span); diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index cab557f..ebbe4cb 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -13,7 +13,7 @@ pub(crate) type WithErrData = Spanned>; pub struct Program { pub(crate) orig: WithErrData, pub(crate) instructions: Vec>, - end: WithErrData, + pub(crate) end: WithErrData, } #[derive(Debug, Eq, PartialEq)] @@ -200,7 +200,7 @@ fn program(leniency: LeniencyLevel) -> impl Parser, Erro }) } -type File = Vec>; +pub(crate) type File = Vec>; fn file(leniency: LeniencyLevel) -> impl Parser>>, Error = Simple> { program(leniency) From b014e1726546d394ff7d8894e5b5034cfbe92c1f Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 21 May 2022 17:01:21 -0500 Subject: [PATCH 36/82] assembler: add error reporting with ariadne --- Cargo.lock | 16 +++++++++++++ assembler/Cargo.toml | 1 + assembler/bin/as.rs | 14 ++++++++++++ assembler/src/analysis.rs | 47 ++++++++++++++++++++++++++++++++++++--- 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e39408c..71c76da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,6 +38,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "ariadne" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1cb2a2046bea8ce5e875551f5772024882de0b540c7f93dfc5d6cf1ca8b030c" +dependencies = [ + "yansi", +] + [[package]] name = "atty" version = "0.2.14" @@ -204,6 +213,7 @@ name = "lc3-assembler" version = "0.1.0" dependencies = [ "annotate-snippets", + "ariadne", "chumsky", "clap", "itertools", @@ -548,6 +558,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + [[package]] name = "yansi-term" version = "0.1.1" diff --git a/assembler/Cargo.toml b/assembler/Cargo.toml index 15b3bba..1d869a1 100644 --- a/assembler/Cargo.toml +++ b/assembler/Cargo.toml @@ -39,6 +39,7 @@ num-traits = "0.2.11" annotate-snippets = { version = "0.8.0", features = ["color"] } clap = "2.33.0" chumsky = "0.8.0" +ariadne = "0.1.5" lc3-isa = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } lc3-shims = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index f4d42ea..4612b69 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -2,9 +2,12 @@ extern crate lc3_assembler; use std::{env, fs}; use std::path::{Path, PathBuf}; +use std::process::exit; +use ariadne::Source; use lc3_assembler::parser::parse; use lc3_shims::memory::FileBackedMemoryShim; use clap::clap_app; +use lc3_assembler::analysis::{report, validate}; use lc3_assembler::assembler::assemble; use lc3_assembler::LeniencyLevel; use lc3_assembler::lexer::lex; @@ -46,6 +49,17 @@ fn as_() { let (maybe_file, parse_errs) = parse(src, tokens, leniency); let (mut file, span) = maybe_file.expect("parsing failed"); + println!("{:?}", file); + let errors = validate(&file); + + if !errors.is_empty() { + for error in errors { + let report = report(error); + report.eprint(Source::from(src)); + } + continue; + } + if matches.is_present("check") { println!("{}: No errors found.", path_str); } else { diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 310f827..4f98d91 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -1,5 +1,8 @@ +use std::fmt::{Display, format, Formatter}; use std::ops::Range; +use std::string::String; use itertools::zip; +use ariadne::{Label, Report, ReportBuilder, ReportKind}; use crate::lexer::Opcode; use crate::parser::{File, Instruction, Operand, Program, WithErrData}; use crate::Spanned; @@ -7,7 +10,7 @@ use crate::Spanned; type ErrorList = Vec>; use Error::*; -enum Error { +pub enum Error { BadProgram, BadInstruction, BadLabel, @@ -18,8 +21,33 @@ enum Error { OperandTypeMismatch { expected: OperandType, actual: OperandType } } +impl Error { + fn message(&self) -> String { + match self { + BadProgram => String::from("invalid program"), + BadInstruction => String::from("invalid instruction"), + BadLabel => String::from("invalid label"), + BadOpcode => String::from("invalid opcode"), + BadOperands => String::from("invalid operand list"), + BadOperand => String::from("invalid operand"), + WrongNumberOfOperands { expected, actual } => + format!("wrong number of operands; expected {}, found: {}", expected, actual), + OperandTypeMismatch { expected, actual } => + format!("wrong operand type; expected {}, found: {}", expected, actual), + } + } +} + +pub fn report(spanned_error: Spanned) -> Report { + let (error, span) = spanned_error; + Report::build(ReportKind::Error, (), 0) + .with_message(error.message()) + .with_label(Label::new(span).with_message("here")) + .finish() +} + use OperandType::*; -enum OperandType { +pub enum OperandType { Register, UnqualifiedNumber, Number, @@ -28,6 +56,19 @@ enum OperandType { Or(Box, Box) } +impl Display for OperandType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Register => write!(f, "Register"), + UnqualifiedNumber => write!(f, "Unqualified Number"), + Number => write!(f, "Number"), + String => write!(f, "String"), + Label => write!(f, "Label"), + Or(t1, t2) => write!(f, "{} or {}", t1, t2), + } + } +} + impl OperandType { pub(crate) fn reg_or_imm() -> Self { Or(Box::new(Register), Box::new(Number)) @@ -67,7 +108,7 @@ fn check_result_then(errors: &mut ErrorList, wed: &WithErrData, error: Err } } -fn validate(file: &File) -> ErrorList { +pub fn validate(file: &File) -> ErrorList { let mut errors = Vec::new(); for program in file { validate_program(&mut errors, program); From e06502829cd3091b70d59c97b98c33322c782fc9 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sun, 22 May 2022 13:37:17 -0500 Subject: [PATCH 37/82] assembler: update to clap v3, improve CLI help info --- Cargo.lock | 175 ++++++++++++++++++++++++++----------------- assembler/Cargo.toml | 6 +- assembler/bin/as.rs | 61 ++++++++++----- 3 files changed, 150 insertions(+), 92 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 71c76da..742d686 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,15 +29,6 @@ dependencies = [ "yansi-term", ] -[[package]] -name = "ansi_term" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -dependencies = [ - "winapi", -] - [[package]] name = "ariadne" version = "0.1.5" @@ -110,17 +101,41 @@ dependencies = [ [[package]] name = "clap" -version = "2.33.0" +version = "3.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" +checksum = "d2dbdf4bdacb33466e854ce889eee8dfd5729abf7ccd7664d0a2d60cd384440b" dependencies = [ - "ansi_term", "atty", "bitflags", + "clap_derive", + "clap_lex", + "indexmap", + "lazy_static", "strsim", + "termcolor", "textwrap", - "unicode-width", - "vec_map", +] + +[[package]] +name = "clap_derive" +version = "3.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25320346e922cffe59c0bbc5410c8d8784509efb321488971081313cb1e1a33c" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213" +dependencies = [ + "os_str_bytes", ] [[package]] @@ -151,22 +166,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" -[[package]] -name = "ctor" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47c5e5ac752e18207b12e16b10631ae5f7f68f8805f335f9b817ead83d9ffce1" -dependencies = [ - "quote", - "syn", -] - -[[package]] -name = "difference" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" - [[package]] name = "either" version = "1.5.3" @@ -184,6 +183,18 @@ dependencies = [ "wasi", ] +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + +[[package]] +name = "heck" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" + [[package]] name = "hermit-abi" version = "0.1.11" @@ -193,6 +204,16 @@ dependencies = [ "libc", ] +[[package]] +name = "indexmap" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee" +dependencies = [ + "autocfg", + "hashbrown", +] + [[package]] name = "itertools" version = "0.8.2" @@ -221,7 +242,7 @@ dependencies = [ "lc3-os", "lc3-shims", "num-traits", - "pretty_assertions", + "quote", "regex", ] @@ -338,24 +359,33 @@ dependencies = [ ] [[package]] -name = "output_vt100" -version = "0.1.2" +name = "os_str_bytes" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53cdc5b785b7a58c5aad8216b3dfa114df64b0b06ae6e1501cef91df2fbdf8f9" +checksum = "029d8d0b2f198229de29dca79676f2738ff952edf3fde542eb8bf94d8c21b435" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ - "winapi", + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", ] [[package]] -name = "pretty_assertions" -version = "0.6.1" +name = "proc-macro-error-attr" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f81e1644e1b54f5a68959a29aa86cde704219254669da328ecfdf6a1f09d427" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "ansi_term", - "ctor", - "difference", - "output_vt100", + "proc-macro2", + "quote", + "version_check", ] [[package]] @@ -366,18 +396,18 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.10" +version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df246d292ff63439fea9bc8c0a270bed0e390d5ebd4db4ba15aba81111b5abe3" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" dependencies = [ - "unicode-xid", + "unicode-ident", ] [[package]] name = "quote" -version = "1.0.3" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bdc6c187c65bca4260c9011c9e3132efe4909da44726bad24cf7572ae338d7f" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" dependencies = [ "proc-macro2", ] @@ -438,30 +468,36 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strsim" -version = "0.8.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.17" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0df0eb663f387145cab623dea85b09c2c5b4b0aef44e945d928e682fce71bb03" +checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-ident", ] [[package]] -name = "textwrap" -version = "0.11.0" +name = "termcolor" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ - "unicode-width", + "winapi-util", ] +[[package]] +name = "textwrap" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" + [[package]] name = "thread_local" version = "0.3.6" @@ -507,16 +543,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c85f514e095d348c279b1e5cd76795082cf15bd59b93207832abe0b1d8fed236" [[package]] -name = "unicode-width" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" - -[[package]] -name = "unicode-xid" -version = "0.2.0" +name = "unicode-ident" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" [[package]] name = "utf8-ranges" @@ -525,10 +555,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba" [[package]] -name = "vec_map" -version = "0.8.1" +name = "version_check" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasi" @@ -552,6 +582,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/assembler/Cargo.toml b/assembler/Cargo.toml index 1d869a1..dc595f4 100644 --- a/assembler/Cargo.toml +++ b/assembler/Cargo.toml @@ -37,14 +37,12 @@ regex = "0.2.1" itertools = "0.8.2" num-traits = "0.2.11" annotate-snippets = { version = "0.8.0", features = ["color"] } -clap = "2.33.0" +clap = { version = "3.1.18", features = ["derive"] } chumsky = "0.8.0" ariadne = "0.1.5" +quote = "1.0.18" lc3-isa = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } lc3-shims = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } lc3-os = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } # TODO: ^ eventually don't pull these from git - -[dev-dependencies] -pretty_assertions = "0.6.1" diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 4612b69..ed58aa1 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -6,7 +6,7 @@ use std::process::exit; use ariadne::Source; use lc3_assembler::parser::parse; use lc3_shims::memory::FileBackedMemoryShim; -use clap::clap_app; +use clap::{Parser}; use lc3_assembler::analysis::{report, validate}; use lc3_assembler::assembler::assemble; use lc3_assembler::LeniencyLevel; @@ -15,6 +15,37 @@ use lc3_assembler::linker::link; const MEM_DUMP_FILE_EXTENSION: &'static str = "mem"; +#[derive(Parser)] +#[clap(author, version, about, + long_about = "Analyzes, assembles, and/or links LC-3 assembly and object files. \ + Each given assembly file is assembled to a single object file, \ + then all assembled or given object files are linked into a single executable image \ + of LC-3 machine code." + )] +struct Args { + /// Input file paths + #[clap(required = true, parse(from_os_str), value_name = "INPUT_FILE")] + input: Vec, + + /// Enforce all rules of the original LC-3 assembly language + /// + /// By default, the assembler is lenient about restrictions such as label length. + /// This option enforces restrictions specified in Patt and Patel's Introduction to Computing Systems, 3rd edition. + // TODO: provide full list of restrictions + #[clap(long, short)] + strict: bool, + + /// Check the correctness of the program without assembling + #[clap(long, short)] + check: bool, + + /// Link executable image without OS + /// + /// If not specified, the program is overlaid onto an image of the OS from lc3-os at link time. + #[clap(long, short)] + no_os: bool, +} + fn main() { std::thread::Builder::new() .name("main_greater_stack_size".to_string()) @@ -24,23 +55,14 @@ fn main() { } fn as_() { - let matches = clap_app!(assemble_lc3 => - (version: env!("CARGO_PKG_VERSION")) - (author: env!("CARGO_PKG_AUTHORS")) - (about: env!("CARGO_PKG_DESCRIPTION")) - (@arg strict: -s --strict "Enforces all rules of the original LC-3 assembly language when validating the program") - (@arg check: -c --check "Checks the correctness of the program without attempting to assemble it") - (@arg with_os: -o --with_os "Overlays the program onto an image of the OS from lc3-os") - (@arg INPUT: +required ... "Paths to the programs to assemble") - ).get_matches(); - - for path_str in matches.values_of("INPUT").unwrap() { - let path = Path::new(path_str); + let args = Args::parse(); + + for path in args.input { assert!(path.is_file()); - let leniency = if matches.is_present("strict") { LeniencyLevel::Strict } else { LeniencyLevel::Lenient }; + let leniency = if args.strict { LeniencyLevel::Strict } else { LeniencyLevel::Lenient }; - let string = fs::read_to_string(path).unwrap(); + let string = fs::read_to_string(path.clone()).unwrap(); let src = string.as_str(); let (maybe_tokens, lex_errs) = lex(src, leniency); @@ -49,7 +71,6 @@ fn as_() { let (maybe_file, parse_errs) = parse(src, tokens, leniency); let (mut file, span) = maybe_file.expect("parsing failed"); - println!("{:?}", file); let errors = validate(&file); if !errors.is_empty() { @@ -60,10 +81,10 @@ fn as_() { continue; } - if matches.is_present("check") { - println!("{}: No errors found.", path_str); + if args.check { + println!("{}: No errors found.", path.to_str().unwrap()); } else { - let background = if matches.is_present("with_os") { Some(lc3_os::OS_IMAGE.clone()) } else { None }; + let background = if args.no_os { None } else { Some(lc3_os::OS_IMAGE.clone()) }; let objects = file.into_iter() @@ -71,7 +92,7 @@ fn as_() { let mem = link(objects, background); - let mut output_path = PathBuf::from(path_str); + let mut output_path = path.clone(); output_path.set_extension(MEM_DUMP_FILE_EXTENSION); let mut file_backed_mem = FileBackedMemoryShim::with_initialized_memory(output_path, mem); file_backed_mem.flush_all_changes().unwrap(); From 74ab585fd86d757c8101ab0e431fcf6b004637e1 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sun, 22 May 2022 18:51:06 -0500 Subject: [PATCH 38/82] assembler: validate bounds of numbers --- assembler/src/analysis.rs | 159 +++++++++++++++++++++++++++++++------- 1 file changed, 132 insertions(+), 27 deletions(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 4f98d91..72cea56 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -3,7 +3,8 @@ use std::ops::Range; use std::string::String; use itertools::zip; use ariadne::{Label, Report, ReportBuilder, ReportKind}; -use crate::lexer::Opcode; +use lc3_isa::{SignedWord, Word}; +use crate::lexer::{LiteralValue, Opcode}; use crate::parser::{File, Instruction, Operand, Program, WithErrData}; use crate::Spanned; @@ -50,7 +51,7 @@ use OperandType::*; pub enum OperandType { Register, UnqualifiedNumber, - Number, + Number { signed: bool, width: u8 }, String, Label, Or(Box, Box) @@ -59,33 +60,89 @@ pub enum OperandType { impl Display for OperandType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - Register => write!(f, "Register"), - UnqualifiedNumber => write!(f, "Unqualified Number"), - Number => write!(f, "Number"), - String => write!(f, "String"), - Label => write!(f, "Label"), - Or(t1, t2) => write!(f, "{} or {}", t1, t2), + Register => write!(f, "Register"), + UnqualifiedNumber => write!(f, "Unqualified Number"), + Number { signed, width } => write!(f, "Number ({}-bit, {})", width, (if *signed { "signed" } else { "unsigned" })), + String => write!(f, "String"), + Label => write!(f, "Label"), + Or(t1, t2) => write!(f, "{} or {}", t1, t2), + } + } +} + +pub(crate) enum AcceptedNumberSigns { + Signed, + Unsigned, + None, + Any +} + +impl AcceptedNumberSigns { + pub(crate) fn or(&self, other: &Self) -> Self { + use AcceptedNumberSigns::*; + match (self, other) { + (Unsigned, Signed) + | (Signed, Unsigned) + | (Any, _) + | (_, Any) => Any, + (Signed, _) + | (_, Signed) => Signed, + (Unsigned, _) + | (_, Unsigned) => Unsigned, + (None, None) => None } } } impl OperandType { - pub(crate) fn reg_or_imm() -> Self { - Or(Box::new(Register), Box::new(Number)) + pub(crate) fn accepted_number_signs(&self) -> AcceptedNumberSigns { + use AcceptedNumberSigns::*; + + match self { + Number { signed, .. } => if *signed { Signed } else { Unsigned }, + Or(t1, t2) => t1.accepted_number_signs().or(&t2.accepted_number_signs()), + _ => None + } + } + pub(crate) fn signed_or_unsigned_number(width: u8) -> Self { + Or(Box::new(Number { signed: false, width }), + Box::new(Number { signed: true, width })) + } + + pub(crate) fn reg_or_imm5() -> Self { + Or(Box::new(Register), Box::new(Number { signed: true, width: 5 })) } - pub(crate) fn pc_offset() -> Self { - Or(Box::new(Label), Box::new(Number)) + pub(crate) fn pc_offset(width: u8) -> Self { + Or(Box::new(Label), Box::new(Number { signed: true, width })) } pub(crate) fn check(&self, operand: &Operand) -> bool { match self { - Register => matches!(operand, Operand::Register(_)), - UnqualifiedNumber => matches!(operand, Operand::UnqualifiedNumberLiteral(_)), - Number => matches!(operand, Operand::NumberLiteral(_)), - String => matches!(operand, Operand::StringLiteral(_)), - Label => matches!(operand, Operand::Label(_)), - Or(t1, t2) => t1.check(operand) || t2.check(operand), + Register => matches!(operand, Operand::Register(_)), + UnqualifiedNumber => matches!(operand, Operand::UnqualifiedNumberLiteral(_)), + Number { signed: expected_signed, width: expected_width } => { + if let Number { signed, width } = OperandType::of(operand) { + match (signed, expected_signed) { + (true, false) => { + if let Operand::NumberLiteral(LiteralValue::SignedWord(sw)) = operand { + *sw >= 0 && (width - 1) <= *expected_width + } else { + // TODO: find way to couple OperandType::of and value extraction to avoid this case + unreachable!("Detected operand as signed type but could not extract signed value"); + } + } + (false, true) => width <= (expected_width - 1), + _ => width <= *expected_width + } + + } else { + false + } + } + String => matches!(operand, Operand::StringLiteral(_)), + Label => matches!(operand, Operand::Label(_)), + Or(t1, t2) => t1.check(operand) || t2.check(operand), } } @@ -93,11 +150,51 @@ impl OperandType { match operand { Operand::Register(_) => Register, Operand::UnqualifiedNumberLiteral(_) => UnqualifiedNumber, - Operand::NumberLiteral(_) => Number, + Operand::NumberLiteral(lv) => OperandType::of_number_literal(lv, None), Operand::StringLiteral(_) => String, Operand::Label(_) => Label, } } + + pub(crate) fn of_number_literal(literal_value: &LiteralValue, interpret_as: Option) -> Self { + use AcceptedNumberSigns::*; + + let value = + match literal_value { + LiteralValue::Word(value) => *value as i32, + LiteralValue::SignedWord(value) => *value as i32, + }; + let unsigned_interpretation = Number { signed: false, width: min_unsigned_width(value) }; + let signed_interpretation = Number { signed: true, width: min_signed_width(value) }; + match interpret_as { + Option::None | Some(None) => match literal_value { + LiteralValue::Word(_) => unsigned_interpretation, + LiteralValue::SignedWord(_) => signed_interpretation, + } + Some(Signed) => signed_interpretation, + Some(Unsigned) => unsigned_interpretation, + Some(Any) => Or(Box::new(signed_interpretation), + Box::new(unsigned_interpretation)), + } + } +} + +fn min_signed_width(n: i32) -> u8 { + let mut width = 1; + const BASE: i32 = 2; + while n < -BASE.pow(width - 1) || n >= BASE.pow(width - 1) { + width += 1; + } + width as u8 +} + +fn min_unsigned_width(n: i32) -> u8 { + let mut width = 1; + const BASE: i32 = 2; + while n >= BASE.pow(width) { + width += 1; + } + width as u8 } fn check_result_then(errors: &mut ErrorList, wed: &WithErrData, error: Error, f: impl FnOnce(&mut ErrorList, &T, &Range)) { @@ -144,21 +241,24 @@ fn validate_instruction(errors: &mut ErrorList, instruction: &WithErrData None, Ok(oc) => Some(match oc { - Add | And => vec![Register, Register, OperandType::reg_or_imm()], - Br(_) | Jsr => vec![OperandType::pc_offset()], + Add | And => vec![Register, Register, OperandType::reg_or_imm5()], + Br(_) => vec![OperandType::pc_offset(9)], + Jsr => vec![OperandType::pc_offset(11)], Jmp | Jsrr => vec![Register], Ld | Ldi | Lea - | St | Sti => vec![Register, OperandType::pc_offset()], - Ldr | Str => vec![Register, Register, Number], + | St | Sti => vec![Register, OperandType::pc_offset(9)], + Ldr | Str => vec![Register, Register, Number { signed: true, width: 6 }], Not => vec![Register, Register], Ret | Rti | Getc | Out | Puts | In | Putsp | Halt | End => vec![], - Trap - | Orig | Blkw => vec![Number], - Fill => vec![Or(Box::new(Label), Box::new(Number))], + Trap => vec![OperandType::signed_or_unsigned_number(8)], + Orig => vec![OperandType::signed_or_unsigned_number(16)], // TODO: Disallow signed? + Blkw => vec![UnqualifiedNumber], + Fill => vec![Or(Box::new(Label), + Box::new(OperandType::signed_or_unsigned_number(16)))], Stringz => vec![String], }), }; @@ -180,7 +280,12 @@ fn validate_operands(errors: &mut ErrorList, operands: &WithErrData { es.push((BadOperand, op_span.clone())) } Ok(op) => { if !exp_ty.check(op) { - es.push((OperandTypeMismatch { expected: exp_ty, actual: OperandType::of(op) }, op_span.clone())); + let actual = if let Operand::NumberLiteral(value) = op { + OperandType::of_number_literal(value, Some(exp_ty.accepted_number_signs())) + } else { + OperandType::of(op) + }; + es.push((OperandTypeMismatch { expected: exp_ty, actual }, op_span.clone())); } } } From 5c29c45274b299ee4cd9de0fbb03ab70f6dec963 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 23 May 2022 01:00:55 -0500 Subject: [PATCH 39/82] assembler: separate analysis checks for extension --- assembler/src/analysis.rs | 267 ++++++++++++++++++++++++++------------ 1 file changed, 187 insertions(+), 80 deletions(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 72cea56..1842ecc 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -1,12 +1,11 @@ use std::fmt::{Display, format, Formatter}; -use std::ops::Range; use std::string::String; use itertools::zip; use ariadne::{Label, Report, ReportBuilder, ReportKind}; use lc3_isa::{SignedWord, Word}; use crate::lexer::{LiteralValue, Opcode}; use crate::parser::{File, Instruction, Operand, Program, WithErrData}; -use crate::Spanned; +use crate::{Span, Spanned}; type ErrorList = Vec>; @@ -48,6 +47,7 @@ pub fn report(spanned_error: Spanned) -> Report { } use OperandType::*; +#[derive(Clone)] pub enum OperandType { Register, UnqualifiedNumber, @@ -197,100 +197,207 @@ fn min_unsigned_width(n: i32) -> u8 { width as u8 } -fn check_result_then(errors: &mut ErrorList, wed: &WithErrData, error: Error, f: impl FnOnce(&mut ErrorList, &T, &Range)) { - let (res, span) = wed; - match res { - Err(_) => { errors.push((error, span.clone())); } - Ok(v) => { f(errors, v, span); } - } +use Analysis::*; +enum Analysis { + OperandTypes { expected_operands: Option> }, } -pub fn validate(file: &File) -> ErrorList { - let mut errors = Vec::new(); - for program in file { - validate_program(&mut errors, program); +impl Analysis { + fn operand_types() -> Self { + OperandTypes { expected_operands: None } } - errors -} -fn validate_program(errors: &mut ErrorList, program: &WithErrData) { - check_result_then(errors, program, BadProgram, |es, prog, _| { - let Program { orig, instructions, end } = prog; - validate_instruction(es, orig); - for instruction in instructions { - validate_instruction(es, instruction); + fn visit_file(&mut self, errors: &mut ErrorList, file: &File) { + match self { + _ => {} } - validate_instruction(es, end); - }); -} + } -fn validate_instruction(errors: &mut ErrorList, instruction: &WithErrData) { - check_result_then(errors, instruction, BadInstruction, |es, inst, _| { - let Instruction { label, opcode, operands } = inst; + fn visit_program(&mut self, errors: &mut ErrorList, program: &Program, span: &Span) { + match self { + _ => {} + } + } - if let Some((Err(_), label_span)) = label { - es.push((BadLabel, label_span.clone())); + fn visit_instruction(&mut self, errors: &mut ErrorList, instruction: &Instruction, span: &Span) { + match self { + OperandTypes { expected_operands } => { + use Opcode::*; + *expected_operands = match &instruction.opcode.0 { + Err(_) => None, + Ok(oc) => Some(match oc { + Add | And => vec![Register, Register, OperandType::reg_or_imm5()], + Br(_) => vec![OperandType::pc_offset(9)], + Jsr => vec![OperandType::pc_offset(11)], + Jmp | Jsrr => vec![Register], + Ld | Ldi | Lea + | St | Sti => vec![Register, OperandType::pc_offset(9)], + Ldr | Str => vec![Register, Register, Number { signed: true, width: 6 }], + Not => vec![Register, Register], + Ret | Rti + | Getc | Out + | Puts | In + | Putsp | Halt + | End => vec![], + Trap => vec![OperandType::signed_or_unsigned_number(8)], + Orig => vec![OperandType::signed_or_unsigned_number(16)], // TODO: Disallow signed? + Blkw => vec![UnqualifiedNumber], + Fill => vec![Or(Box::new(Label), + Box::new(OperandType::signed_or_unsigned_number(16)))], + Stringz => vec![String], + }), + }; + } } + } - let (oc_res, opcode_span) = opcode; - if let Err(_) = oc_res { - es.push((BadOpcode, opcode_span.clone())); + fn visit_label(&mut self, errors: &mut ErrorList, label: &String, span: &Span) { + match self { + _ => {} } + } - use Opcode::*; - let expected_operands = match oc_res { - Err(_) => None, - Ok(oc) => Some(match oc { - Add | And => vec![Register, Register, OperandType::reg_or_imm5()], - Br(_) => vec![OperandType::pc_offset(9)], - Jsr => vec![OperandType::pc_offset(11)], - Jmp | Jsrr => vec![Register], - Ld | Ldi | Lea - | St | Sti => vec![Register, OperandType::pc_offset(9)], - Ldr | Str => vec![Register, Register, Number { signed: true, width: 6 }], - Not => vec![Register, Register], - Ret | Rti - | Getc | Out - | Puts | In - | Putsp | Halt - | End => vec![], - Trap => vec![OperandType::signed_or_unsigned_number(8)], - Orig => vec![OperandType::signed_or_unsigned_number(16)], // TODO: Disallow signed? - Blkw => vec![UnqualifiedNumber], - Fill => vec![Or(Box::new(Label), - Box::new(OperandType::signed_or_unsigned_number(16)))], - Stringz => vec![String], - }), - }; - validate_operands(es, operands, expected_operands); - }); + fn visit_operands(&mut self, errors: &mut ErrorList, operands: &Vec>, span: &Span) { + match self { + OperandTypes { expected_operands } => { + if let Some(expected) = expected_operands { + // TODO: create longest common subsequence diff for more precise errors + let ops_len = operands.len(); + let exp_len = expected.len(); + if ops_len != exp_len { + errors.push((WrongNumberOfOperands { expected: exp_len, actual: ops_len }, span.clone())) + } else { + for ((op_res, op_span), exp_ty) in zip(operands, expected) { + if let Ok(op) = op_res { + if !exp_ty.check(op) { + let actual = if let Operand::NumberLiteral(value) = op { + OperandType::of_number_literal(value, Some(exp_ty.accepted_number_signs())) + } else { + OperandType::of(op) + }; + errors.push((OperandTypeMismatch { expected: exp_ty.clone(), actual }, op_span.clone())); + } + } + } + } + } + } + + } + } + + fn visit_operand(&mut self, errors: &mut ErrorList, operand: &Operand, span: &Span) { + match self { + _ => {} + } + } + +} + +struct Analyzer { + errors: ErrorList, + analyses: [Analysis; 1], } -fn validate_operands(errors: &mut ErrorList, operands: &WithErrData>>, expected_types: Option>) { - check_result_then(errors, operands, BadOperands, |es, ops, ops_span| { - if let Some(expected) = expected_types { - // TODO: create longest common subsequence diff for more precise errors - let ops_len = ops.len(); - let exp_len = expected.len(); - if ops_len != exp_len { - es.push((WrongNumberOfOperands { expected: exp_len, actual: ops_len }, ops_span.clone())) - } else { - for ((op_res, op_span), exp_ty) in zip(ops, expected) { - match op_res { - Err(_) => { es.push((BadOperand, op_span.clone())) } - Ok(op) => { - if !exp_ty.check(op) { - let actual = if let Operand::NumberLiteral(value) = op { - OperandType::of_number_literal(value, Some(exp_ty.accepted_number_signs())) - } else { - OperandType::of(op) - }; - es.push((OperandTypeMismatch { expected: exp_ty, actual }, op_span.clone())); +impl Analyzer { + fn new() -> Self { + Self { + errors: Vec::new(), + analyses: [ + Analysis::operand_types() + ] + } + } + + fn analyze(&mut self, file: &File) { + for analysis in self.analyses.iter_mut() { + analysis.visit_file(&mut self.errors, file); + } + for program in file { + self.analyze_program(program); + } + } + + fn analyze_program(&mut self, program: &WithErrData) { + let (program_res, program_span) = program; + match program_res { + Err(_) => { self.errors.push((BadProgram, program_span.clone())); } + Ok(prog) => { + for analysis in self.analyses.iter_mut() { + analysis.visit_program(&mut self.errors, prog, program_span); + } + + let Program { orig, instructions, end } = prog; + self.analyze_instruction(orig); + for instruction in instructions { + self.analyze_instruction(instruction); + } + self.analyze_instruction(end); + } + } + } + + fn analyze_instruction(&mut self, instruction: &WithErrData) { + let (instruction_res, instruction_span) = instruction; + match instruction_res { + Err(_) => { self.errors.push((BadInstruction, instruction_span.clone())); }, + Ok(inst) => { + for analysis in self.analyses.iter_mut() { + analysis.visit_instruction(&mut self.errors, inst, instruction_span); + } + + let Instruction { label, opcode, operands } = inst; + + if let Some(l_wed) = label { + let (label_res, label_span) = l_wed; + match label_res { + Err(_) => { self.errors.push((BadLabel, label_span.clone())) }, + Ok(l) => { + for analysis in self.analyses.iter_mut() { + analysis.visit_label(&mut self.errors, l, label_span); } } } } + + let (oc_res, opcode_span) = opcode; + if let Err(_) = oc_res { + self.errors.push((BadOpcode, opcode_span.clone())); + } + + self.analyze_operands(operands); } } - }); -} \ No newline at end of file + } + + fn analyze_operands(&mut self, operands: &WithErrData>>) { + let (operands_res, operands_span) = operands; + match operands_res { + Err(_) => { self.errors.push((BadOperands, operands_span.clone())); } + Ok(ops) => { + for analysis in self.analyses.iter_mut() { + analysis.visit_operands(&mut self.errors, ops, operands_span); + } + } + } + } + + fn analyze_operand(&mut self, operand: &WithErrData) { + let (operand_res, operand_span) = operand; + match operand_res { + Err(_) => { self.errors.push((BadOperand, operand_span.clone())); } + Ok(op) => { + for analysis in self.analyses.iter_mut() { + analysis.visit_operand(&mut self.errors, op, operand_span); + } + } + } + } +} + +pub fn validate(file: &File) -> ErrorList { + let mut analyzer = Analyzer::new(); + analyzer.analyze(file); + analyzer.errors +} + From 00947f44a01e4fc40f963755ab97f47e26759f8b Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 23 May 2022 13:05:10 -0500 Subject: [PATCH 40/82] assembler: check for duplicate labels --- assembler/src/analysis.rs | 68 ++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 1842ecc..aa34502 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -1,3 +1,4 @@ +use std::collections::{HashMap, HashSet}; use std::fmt::{Display, format, Formatter}; use std::string::String; use itertools::zip; @@ -18,7 +19,8 @@ pub enum Error { BadOperands, BadOperand, WrongNumberOfOperands { expected: usize, actual: usize }, - OperandTypeMismatch { expected: OperandType, actual: OperandType } + OperandTypeMismatch { expected: OperandType, actual: OperandType }, + DuplicateLabel { label: String, occurrences: Vec, }, } impl Error { @@ -34,16 +36,35 @@ impl Error { format!("wrong number of operands; expected {}, found: {}", expected, actual), OperandTypeMismatch { expected, actual } => format!("wrong operand type; expected {}, found: {}", expected, actual), + DuplicateLabel { label, .. } => + format!("same label used for multiple locations: {}", label) } } } pub fn report(spanned_error: Spanned) -> Report { let (error, span) = spanned_error; - Report::build(ReportKind::Error, (), 0) - .with_message(error.message()) - .with_label(Label::new(span).with_message("here")) - .finish() + let mut r = + Report::build(ReportKind::Error, (), 0) + .with_message(error.message()); + match error { + DuplicateLabel { occurrences, .. } => { + let mut first_declaration_labeled = false; + for occurrence in occurrences { + let label_message = if !first_declaration_labeled { + first_declaration_labeled = true; + "first used here" + } else { + "also used here" + }; + r = r.with_label(Label::new(occurrence).with_message(label_message)) + } + } + _ => { + r = r.with_label(Label::new(span).with_message("here")); + } + } + r.finish() } use OperandType::*; @@ -200,6 +221,7 @@ fn min_unsigned_width(n: i32) -> u8 { use Analysis::*; enum Analysis { OperandTypes { expected_operands: Option> }, + DuplicateLabels { labels: HashMap>, } } impl Analysis { @@ -207,6 +229,10 @@ impl Analysis { OperandTypes { expected_operands: None } } + fn duplicate_labels() -> Self { + DuplicateLabels { labels: HashMap::new() } + } + fn visit_file(&mut self, errors: &mut ErrorList, file: &File) { match self { _ => {} @@ -248,11 +274,16 @@ impl Analysis { }), }; } + _ => {} } } fn visit_label(&mut self, errors: &mut ErrorList, label: &String, span: &Span) { match self { + DuplicateLabels { labels } => { + let occurrences = labels.entry(label.clone()).or_insert(Vec::new()); + occurrences.push(span.clone()); + } _ => {} } } @@ -282,7 +313,7 @@ impl Analysis { } } } - + _ => {} } } @@ -292,11 +323,28 @@ impl Analysis { } } + fn exit_file(&mut self, errors: &mut ErrorList, file: &File) { + match self { + DuplicateLabels { labels } => { + labels.iter() + .filter(|(_, occurrences)| occurrences.len() > 1) + .map(|(label, occurrences)| + (DuplicateLabel { + label: label.clone(), + occurrences: occurrences.clone() + }, 0..0) // TODO: dummy span, refactor so not required for errors with alternate span data + ) + .for_each(|e| errors.push(e)); + } + _ => {} + } + } + } struct Analyzer { errors: ErrorList, - analyses: [Analysis; 1], + analyses: [Analysis; 2], } impl Analyzer { @@ -304,7 +352,8 @@ impl Analyzer { Self { errors: Vec::new(), analyses: [ - Analysis::operand_types() + Analysis::operand_types(), + Analysis::duplicate_labels() ] } } @@ -316,6 +365,9 @@ impl Analyzer { for program in file { self.analyze_program(program); } + for analysis in self.analyses.iter_mut() { + analysis.exit_file(&mut self.errors, file); + } } fn analyze_program(&mut self, program: &WithErrData) { From bce1ff38493584cf71ac8f832fa0d617fbf09ec1 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 23 May 2022 21:02:07 -0500 Subject: [PATCH 41/82] assembler: ignore tokens outside programs as specified --- assembler/src/analysis.rs | 6 +-- assembler/src/assembler.rs | 1 - assembler/src/lexer.rs | 16 +++--- assembler/src/parser.rs | 100 ++++++++++++++++++------------------- 4 files changed, 62 insertions(+), 61 deletions(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index aa34502..1b58b9f 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -263,8 +263,7 @@ impl Analysis { Ret | Rti | Getc | Out | Puts | In - | Putsp | Halt - | End => vec![], + | Putsp | Halt => vec![], Trap => vec![OperandType::signed_or_unsigned_number(8)], Orig => vec![OperandType::signed_or_unsigned_number(16)], // TODO: Disallow signed? Blkw => vec![UnqualifiedNumber], @@ -379,12 +378,11 @@ impl Analyzer { analysis.visit_program(&mut self.errors, prog, program_span); } - let Program { orig, instructions, end } = prog; + let Program { orig, instructions } = prog; self.analyze_instruction(orig); for instruction in instructions { self.analyze_instruction(instruction); } - self.analyze_instruction(end); } } } diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs index 28f6cdb..3bc7ecf 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assembler.rs @@ -317,7 +317,6 @@ impl TryFrom for Instruction { // TODO: improve error Opcode::Orig => Err(()), - Opcode::End => Err(()), Opcode::Fill => { let mut os = operands.into_iter(); diff --git a/assembler/src/lexer.rs b/assembler/src/lexer.rs index 9edffbc..a365d7a 100644 --- a/assembler/src/lexer.rs +++ b/assembler/src/lexer.rs @@ -15,13 +15,14 @@ pub enum Token { NumberLiteral(LiteralValue), StringLiteral(String), Label(String), + End, Newline, Comma, Comment, - Error, + Invalid, } #[derive(Clone, Debug, Eq, Hash, PartialEq)] @@ -95,7 +96,6 @@ pub enum Opcode { Fill, Blkw, Stringz, - End, // Named TRAP routines Getc, @@ -239,11 +239,14 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err one_opcode(".FILL", Fill), one_opcode(".BLKW", Blkw), one_opcode(".STRINGZ", Stringz), - one_opcode(".END", End), ))) .then_ignore(terminator.clone().rewind()) .map(Token::Opcode); + let end_pseudo_op = just(".END") + .then_ignore(terminator.clone().rewind()) + .to(Token::End); + use Reg::*; let register = choice(( one_register("R0", R0), @@ -281,6 +284,7 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err let token = choice(( opcode, + end_pseudo_op, register, number_literal, unqualified_number_literal, @@ -290,7 +294,7 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err comma, comment(), )) - .recover_with(skip_until([',', ';', ' ', '\t', '\n', '\r', '\x0B', '\x0C', '\u{0085}', '\u{2028}', '\u{2029}'], |_| Token::Error)); // TODO: improve? + .recover_with(skip_until([',', ';', ' ', '\t', '\n', '\r', '\x0B', '\x0C', '\u{0085}', '\u{2028}', '\u{2029}'], |_| Token::Invalid)); // TODO: improve? token .map_with_span(|token, span| (token, span)) @@ -394,7 +398,7 @@ mod tests { let (tokens, _) = lex(source, LeniencyLevel::Lenient); assert_eq!( Some(vec![ - (Error, 0..5), + (Invalid, 0..5), ]), tokens); } @@ -410,7 +414,7 @@ mod tests { (Comma, 6.. 7), (Register(R0), 8..10), (Comma, 10..11), - (Error, 12..17), + (Invalid, 12..17), (Comment, 17..27), ]), tokens); diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index ebbe4cb..f864f43 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -13,7 +13,6 @@ pub(crate) type WithErrData = Spanned>; pub struct Program { pub(crate) orig: WithErrData, pub(crate) instructions: Vec>, - pub(crate) end: WithErrData, } #[derive(Debug, Eq, PartialEq)] @@ -89,21 +88,16 @@ fn operand() -> impl Parser, Error = Simple> Token::NumberLiteral(val) => Ok(Operand::NumberLiteral(val)), Token::StringLiteral(s) => Ok(Operand::StringLiteral(s)), Token::Label(s) => Ok(Operand::Label(s)), - Token::Error => Err(()), + Token::Invalid => Err(()), }; operand.map_with_span(|o, span| (o, span)) } -fn any_opcode_but(denied: Opcode) -> impl Parser, Error = Simple> { +fn any_opcode() -> impl Parser, Error = Simple> { filter_map(move |span, t: Token| match t.clone() { - Token::Opcode(o) => - if o == denied { - Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected - } else { - Ok(Ok(o)) - }, - Token::Error => Ok(Err(())), + Token::Opcode(o) => Ok(Ok(o)), + Token::Invalid => Ok(Err(())), _ => Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected }) .map_with_span(|o, span| (o, span)) @@ -116,7 +110,7 @@ fn opcode(expected: Opcode) -> impl Parser, Error = S if let Token::Opcode(o) = t { Ok(Ok(o)) } else { unreachable!() } - } else if let Token::Error = t { + } else if let Token::Invalid = t { Ok(Err(())) } else { Err(Simple::expected_input_found(span, [Some(expected_token.clone())], Some(t))) @@ -124,26 +118,19 @@ fn opcode(expected: Opcode) -> impl Parser, Error = S .map_with_span(|o, span| (o, span)) } -enum OpcodeFilter { - OnlyOrig, - AnyButEnd, - OnlyEnd, -} - -fn instruction(oc_filter: OpcodeFilter, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { +fn instruction(orig: bool, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { let label = select! { Token::Label(s) => Ok(s), - Token::Error => Err(()) + Token::Invalid => Err(()) } .map_with_span(|l, s| (l, s)); - use OpcodeFilter::*; let oc: Box, Error = Simple>> = - match oc_filter { - OnlyOrig => Box::new(opcode(Opcode::Orig)), - AnyButEnd => Box::new(any_opcode_but(Opcode::End)), - OnlyEnd => Box::new(opcode(Opcode::End)), + if orig { + Box::new(opcode(Opcode::Orig)) + } else { + Box::new(any_opcode()) }; let operand_separator: Box>> = @@ -179,35 +166,32 @@ fn comments_and_newlines() -> impl Parser> { } fn program(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { - comments_and_newlines() - .ignore_then( - instruction(OpcodeFilter::OnlyOrig, leniency) - .map(|(i, span)| (Ok(i), span))) + instruction(true, leniency) + .map(|(i, span)| (Ok(i), span)) .then( - instruction(OpcodeFilter::AnyButEnd, leniency) + instruction(false, leniency) .map(|(i, span)| (Ok(i), span)) .separated_by(comments_and_newlines()) .allow_leading() .allow_trailing() ) - .then( - instruction(OpcodeFilter::OnlyEnd, leniency) - .map(|(i, span)| (Ok(i), span))) - .then_ignore(comments_and_newlines()) - .then_ignore(end()) - .map_with_span(|((orig, instructions), end), span| { - (Program { orig, instructions, end }, span) + .then_ignore(just::<_, Token, _>(Token::End)) + .map_with_span(|(orig, instructions), span| { + (Program { orig, instructions }, span) }) } pub(crate) type File = Vec>; fn file(leniency: LeniencyLevel) -> impl Parser>>, Error = Simple> { - program(leniency) - .map(|(p, span)| (Ok(p), span)) - .separated_by(comments_and_newlines()) - .allow_leading() - .allow_trailing() + comments_and_newlines() + .ignore_then( + program(leniency) + .map(|(p, span)| (Ok(p), span)) + .separated_by(none_of(Token::Opcode(Opcode::Orig)).repeated()) + .allow_trailing() + ) + .then_ignore(end()) .map_with_span(|programs, span| (programs, span)) } @@ -226,8 +210,8 @@ mod tests { use crate::lexer::lex; #[test] - fn operand_error() { - let source = ".ORIG x3000\nADD R0, R0, #OOPS; <- error\n.END"; + fn ignore_after_end() { + let source = ".ORIG x3000\nADD R0, R0, R0\n.END then %some #random junk!"; let (maybe_tokens, _) = lex(source, LeniencyLevel::Lenient); let tokens = maybe_tokens.unwrap(); let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); @@ -235,9 +219,25 @@ mod tests { assert_eq!(Some((vec![(Ok(Program { orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), instructions: vec![ - (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Err(()), 24..29)]), 16..29) }), 12..29) + (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Ok(Register(R0)), 24..26)]), 16..26) }), 12..26) ], - end: (Ok(Instruction { label: None, opcode: (Ok(End), 40..44), operands: (Ok(vec![]), 44..44) }), 40..44) }), 0..44)], 0..44)), + }), 0..31)], 0..56)), + file); + } + + #[test] + fn operand_error() { + let source = ".ORIG x3000\nADD R0, R0, #OOPS; <- error\n.END"; + let (maybe_tokens, _) = lex(source, LeniencyLevel::Lenient); + let tokens = maybe_tokens.unwrap(); + let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); + + assert_eq!(Some((vec![(Ok(Program { + orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), + instructions: vec![ + (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Err(()), 24..29)]), 16..29) }), 12..29) + ], + }), 0..44)], 0..44)), file); } @@ -249,11 +249,11 @@ mod tests { let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); assert_eq!(Some((vec![(Ok(Program { - orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), - instructions: vec![ - (Ok(Instruction { label: Some((Err(()), 12..18)), opcode: (Ok(Add), 19..22), operands: (Ok(vec![(Ok(Register(R0)), 23..25), (Ok(Register(R0)), 27..29), (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33)]), 23..33) }), 12..33) - ], - end: (Ok(Instruction { label: None, opcode: (Ok(End), 44..48), operands: (Ok(vec![]), 48..48) }), 44..48) }), 0..48)], 0..48)), + orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), + instructions: vec![ + (Ok(Instruction { label: Some((Err(()), 12..18)), opcode: (Ok(Add), 19..22), operands: (Ok(vec![(Ok(Register(R0)), 23..25), (Ok(Register(R0)), 27..29), (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33)]), 23..33) }), 12..33) + ], + }), 0..48)], 0..48)), file); } From 2e38af56870a41aa13af285f47bc7cb955218b3d Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 24 May 2022 16:30:14 -0500 Subject: [PATCH 42/82] assembler: recover if text before .ORIG invalid --- assembler/bin/as.rs | 2 +- assembler/src/analysis.rs | 2 +- assembler/src/parser.rs | 71 ++++++++++++++++++++++++++------------- assembler/tests/integ.rs | 4 +-- 4 files changed, 51 insertions(+), 28 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index ed58aa1..1b5deb4 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -87,7 +87,7 @@ fn as_() { let background = if args.no_os { None } else { Some(lc3_os::OS_IMAGE.clone()) }; let objects = - file.into_iter() + file.programs.into_iter() .map(|program| assemble(program.0.unwrap())); let mem = link(objects, background); diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 1b58b9f..0fce840 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -361,7 +361,7 @@ impl Analyzer { for analysis in self.analyses.iter_mut() { analysis.visit_file(&mut self.errors, file); } - for program in file { + for program in file.programs.iter() { self.analyze_program(program); } for analysis in self.analyses.iter_mut() { diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index f864f43..e8aa0eb 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -1,9 +1,10 @@ use std::convert::TryFrom; use chumsky::prelude::*; +use chumsky::recovery::SkipUntil; use chumsky::Stream; use lc3_isa::{Reg, Word}; -use crate::Spanned; +use crate::{Span, Spanned}; use crate::LeniencyLevel; use crate::lexer::{LiteralValue, Opcode, Token}; @@ -110,8 +111,8 @@ fn opcode(expected: Opcode) -> impl Parser, Error = S if let Token::Opcode(o) = t { Ok(Ok(o)) } else { unreachable!() } - } else if let Token::Invalid = t { - Ok(Err(())) + // } else if let Token::Invalid = t { + // Ok(Err(())) } else { Err(Simple::expected_input_found(span, [Some(expected_token.clone())], Some(t))) }) @@ -145,7 +146,7 @@ fn instruction(orig: bool, leniency: LeniencyLevel) -> impl Parser impl Parser, Erro }) } -pub(crate) type File = Vec>; +#[derive(Debug)] +pub struct File { + pub(crate) before_first_orig: Spanned>, // TODO: check that this only contains newlines and comments (at least if strict) + pub programs: Vec> +} -fn file(leniency: LeniencyLevel) -> impl Parser>>, Error = Simple> { - comments_and_newlines() - .ignore_then( +fn file(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { + none_of(Token::Opcode(Opcode::Orig)).repeated() + .map_with_span(|toks, span| (toks, span)) + .then( program(leniency) .map(|(p, span)| (Ok(p), span)) .separated_by(none_of(Token::Opcode(Opcode::Orig)).repeated()) .allow_trailing() ) .then_ignore(end()) - .map_with_span(|programs, span| (programs, span)) + .map_with_span(|(before_first_orig, programs), span| + (File { before_first_orig, programs }, span)) } pub fn parse(src: &str, tokens: Vec>, leniency: LeniencyLevel) -> (Option>, Vec>) { let len = src.chars().count(); - file(leniency).parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) + file(leniency) + .parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) } @@ -209,6 +217,18 @@ mod tests { use super::Opcode::*; use crate::lexer::lex; + #[test] + fn capture_tokens_before_first_orig_separately() { + let source = "%some #random junk .ORIG x3000\nADD R0, R0, R0\n.END"; + let (maybe_tokens, _) = lex(source, LeniencyLevel::Lenient); + let tokens = maybe_tokens.unwrap(); + let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); + + println!("{:?}", file); + assert_eq!((vec![Token::Invalid, Token::Invalid, Token::Label("JUNK".to_string())], 0..18), + file.unwrap().0.before_first_orig); + } + #[test] fn ignore_after_end() { let source = ".ORIG x3000\nADD R0, R0, R0\n.END then %some #random junk!"; @@ -216,13 +236,16 @@ mod tests { let tokens = maybe_tokens.unwrap(); let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); - assert_eq!(Some((vec![(Ok(Program { - orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), - instructions: vec![ - (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Ok(Register(R0)), 24..26)]), 16..26) }), 12..26) - ], - }), 0..31)], 0..56)), - file); + + let f = file.unwrap().0; + assert_eq!((vec![], 0..5), f.before_first_orig); // TODO: probably doesn't need fixing, but span should probably be 0..0; find source of bug + assert_eq!(vec![(Ok(Program { + orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), + instructions: vec![ + (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Ok(Register(R0)), 24..26)]), 16..26) }), 12..26) + ], + }), 0..31)], + f.programs); } #[test] @@ -232,13 +255,13 @@ mod tests { let tokens = maybe_tokens.unwrap(); let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); - assert_eq!(Some((vec![(Ok(Program { + assert_eq!(vec![(Ok(Program { orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), instructions: vec![ (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Err(()), 24..29)]), 16..29) }), 12..29) ], - }), 0..44)], 0..44)), - file); + }), 0..44)], + file.unwrap().0.programs); } #[test] @@ -248,13 +271,13 @@ mod tests { let tokens = maybe_tokens.unwrap(); let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); - assert_eq!(Some((vec![(Ok(Program { + assert_eq!(vec![(Ok(Program { orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), instructions: vec![ - (Ok(Instruction { label: Some((Err(()), 12..18)), opcode: (Ok(Add), 19..22), operands: (Ok(vec![(Ok(Register(R0)), 23..25), (Ok(Register(R0)), 27..29), (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33)]), 23..33) }), 12..33) + (Ok(Instruction { label: Some((Err(()), 12..18)), opcode: (Ok(Add), 19..22), operands: (Ok(vec![(Ok(Register(R0)), 23..25), (Ok(Register(R0)), 27..29), (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33)]), 23..33) }), 12..33) ], - }), 0..48)], 0..48)), - file); + }), 0..48)], + file.unwrap().0.programs); } } \ No newline at end of file diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 1654e99..7ca260a 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -275,8 +275,8 @@ fn test(input: &str, orig: usize, expected_mem: &[Word]) { let (maybe_file, parse_errs) = parser::parse(input, tokens, LeniencyLevel::Lenient); let (mut file, span) = maybe_file.expect("parsing failed"); - assert_eq!(1, file.len(), "parsed unexpected number of programs: {}", file.len()); - let program = file.remove(0).0.expect("parse error in program"); + assert_eq!(1, file.programs.len(), "parsed unexpected number of programs: {}", file.programs.len()); + let program = file.programs.remove(0).0.expect("parse error in program"); let object = assembler::assemble(program); let mem = linker::link([object], None); From b5da038b399b634390bc4faf05b2d37964b6ad27 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 1 Jun 2022 00:21:02 -0500 Subject: [PATCH 43/82] assembler: refactor analysis visitor pattern --- assembler/src/analysis.rs | 427 +++++++++++++++++++++---------------- assembler/src/assembler.rs | 3 +- assembler/src/parser.rs | 79 +++---- 3 files changed, 275 insertions(+), 234 deletions(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 0fce840..5a58f31 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -1,9 +1,9 @@ use std::collections::{HashMap, HashSet}; use std::fmt::{Display, format, Formatter}; use std::string::String; -use itertools::zip; +use itertools::{concat, zip}; use ariadne::{Label, Report, ReportBuilder, ReportKind}; -use lc3_isa::{SignedWord, Word}; +use lc3_isa::{Addr, SignedWord, Word}; use crate::lexer::{LiteralValue, Opcode}; use crate::parser::{File, Instruction, Operand, Program, WithErrData}; use crate::{Span, Spanned}; @@ -218,236 +218,297 @@ fn min_unsigned_width(n: i32) -> u8 { width as u8 } -use Analysis::*; -enum Analysis { - OperandTypes { expected_operands: Option> }, - DuplicateLabels { labels: HashMap>, } +enum InvalidSymbolReason { + InvalidOrig, + PriorInvalidInstruction { estimated_addr: Addr }, + Duplicated, + OutOfBounds, } -impl Analysis { - fn operand_types() -> Self { - OperandTypes { expected_operands: None } +type SymbolTableValue = Result; + + +#[derive(Default)] +struct ParseErrors { + errors: ErrorList +} + +impl ParseErrors { + fn new() -> Self { + Default::default() } - fn duplicate_labels() -> Self { - DuplicateLabels { labels: HashMap::new() } + fn push_error(&mut self, error: Error, span: &Span) { + self.errors.push((error, span.clone())); } +} - fn visit_file(&mut self, errors: &mut ErrorList, file: &File) { - match self { - _ => {} - } +impl MutVisitor for ParseErrors { + fn enter_program_error(&mut self, span: &Span) { + self.push_error(BadProgram, span); + } + fn enter_orig_error(&mut self, span: &Span) { + self.push_error(BadOperands, span); + } + fn enter_instruction_error(&mut self, span: &Span) { + self.push_error(BadInstruction, span); + } + fn enter_label_error(&mut self, span: &Span) { + self.push_error(BadLabel, span); + } + fn enter_opcode_error(&mut self, span: &Span) { + self.push_error(BadOpcode, span); } + fn enter_operands_error(&mut self, span: &Span) { + self.push_error(BadOperands, span); + } + fn enter_operand_error(&mut self, span: &Span) { + self.push_error(BadOperand, span); + } +} - fn visit_program(&mut self, errors: &mut ErrorList, program: &Program, span: &Span) { - match self { - _ => {} - } + +#[derive(Default)] +struct DuplicateLabels { + errors: ErrorList, + labels: HashMap>, +} + +impl DuplicateLabels { + fn new() -> Self { + Default::default() } +} - fn visit_instruction(&mut self, errors: &mut ErrorList, instruction: &Instruction, span: &Span) { - match self { - OperandTypes { expected_operands } => { - use Opcode::*; - *expected_operands = match &instruction.opcode.0 { - Err(_) => None, - Ok(oc) => Some(match oc { - Add | And => vec![Register, Register, OperandType::reg_or_imm5()], - Br(_) => vec![OperandType::pc_offset(9)], - Jsr => vec![OperandType::pc_offset(11)], - Jmp | Jsrr => vec![Register], - Ld | Ldi | Lea - | St | Sti => vec![Register, OperandType::pc_offset(9)], - Ldr | Str => vec![Register, Register, Number { signed: true, width: 6 }], - Not => vec![Register, Register], - Ret | Rti - | Getc | Out - | Puts | In - | Putsp | Halt => vec![], - Trap => vec![OperandType::signed_or_unsigned_number(8)], - Orig => vec![OperandType::signed_or_unsigned_number(16)], // TODO: Disallow signed? - Blkw => vec![UnqualifiedNumber], - Fill => vec![Or(Box::new(Label), - Box::new(OperandType::signed_or_unsigned_number(16)))], - Stringz => vec![String], - }), - }; - } - _ => {} - } +impl MutVisitor for DuplicateLabels { + fn exit_file(&mut self, _file: &File) { + let DuplicateLabels { errors, labels } = self; + labels.iter() + .filter(|(_, occurrences)| occurrences.len() > 1) + .map(|(label, occurrences)| + (DuplicateLabel { + label: label.clone(), + occurrences: occurrences.clone() + }, 0..0) // TODO: dummy span, refactor so not required for errors with alternate span data + ) + .for_each(|e| errors.push(e)); } - fn visit_label(&mut self, errors: &mut ErrorList, label: &String, span: &Span) { - match self { - DuplicateLabels { labels } => { - let occurrences = labels.entry(label.clone()).or_insert(Vec::new()); - occurrences.push(span.clone()); + fn enter_label(&mut self, label: &String, span: &Span) { + let occurrences = self.labels.entry(label.clone()).or_insert(Vec::new()); + occurrences.push(span.clone()); + } +} + + +#[derive(Default)] +struct OperandTypes { + errors: ErrorList, + expected_operands: Option> +} + +impl OperandTypes { + fn new() -> Self { + Default::default() + } +} + +fn orig_expected_operands() -> Vec { + vec![OperandType::signed_or_unsigned_number(16)] // TODO: Disallow signed? +} + +impl MutVisitor for OperandTypes { + fn enter_orig(&mut self, orig: &Vec>, span: &Span) { + self.expected_operands = Some(orig_expected_operands()); + self.enter_operands(orig, span); + } + + fn enter_opcode_error(&mut self, _span: &Span) { + self.expected_operands = None; + } + + fn enter_opcode(&mut self, opcode: &Opcode, _span: &Span) { + use Opcode::*; + self.expected_operands = Some( + match opcode { + Add | And => vec![Register, Register, OperandType::reg_or_imm5()], + Br(_) => vec![OperandType::pc_offset(9)], + Jsr => vec![OperandType::pc_offset(11)], + Jmp | Jsrr => vec![Register], + Ld | Ldi | Lea + | St | Sti => vec![Register, OperandType::pc_offset(9)], + Ldr | Str => vec![Register, Register, Number { signed: true, width: 6 }], + Not => vec![Register, Register], + Ret | Rti + | Getc | Out + | Puts | In + | Putsp | Halt => vec![], + Trap => vec![OperandType::signed_or_unsigned_number(8)], + Orig => orig_expected_operands(), + Blkw => vec![UnqualifiedNumber], + Fill => vec![Or(Box::new(Label), + Box::new(OperandType::signed_or_unsigned_number(16)))], + Stringz => vec![String], } - _ => {} - } + ); } - fn visit_operands(&mut self, errors: &mut ErrorList, operands: &Vec>, span: &Span) { - match self { - OperandTypes { expected_operands } => { - if let Some(expected) = expected_operands { - // TODO: create longest common subsequence diff for more precise errors - let ops_len = operands.len(); - let exp_len = expected.len(); - if ops_len != exp_len { - errors.push((WrongNumberOfOperands { expected: exp_len, actual: ops_len }, span.clone())) - } else { - for ((op_res, op_span), exp_ty) in zip(operands, expected) { - if let Ok(op) = op_res { - if !exp_ty.check(op) { - let actual = if let Operand::NumberLiteral(value) = op { - OperandType::of_number_literal(value, Some(exp_ty.accepted_number_signs())) - } else { - OperandType::of(op) - }; - errors.push((OperandTypeMismatch { expected: exp_ty.clone(), actual }, op_span.clone())); - } - } + fn enter_operands(&mut self, operands: &Vec>, span: &Span) { + if let Some(expected) = &self.expected_operands { + // TODO: create longest common subsequence diff for more precise errors + let ops_len = operands.len(); + let exp_len = expected.len(); + if ops_len != exp_len { + self.errors.push((WrongNumberOfOperands { expected: exp_len, actual: ops_len }, span.clone())) + } else { + for ((op_res, op_span), exp_ty) in zip(operands, expected) { + if let Ok(op) = op_res { + if !exp_ty.check(op) { + let actual = if let Operand::NumberLiteral(value) = op { + OperandType::of_number_literal(value, Some(exp_ty.accepted_number_signs())) + } else { + OperandType::of(op) + }; + self.errors.push((OperandTypeMismatch { expected: exp_ty.clone(), actual }, op_span.clone())); } } } } - _ => {} } } +} - fn visit_operand(&mut self, errors: &mut ErrorList, operand: &Operand, span: &Span) { - match self { - _ => {} - } +fn visit(v: &mut impl MutVisitor, file: &File) { + v.enter_file(file); + for program in file.programs.iter() { + visit_program(v, program); } + v.exit_file(file); +} - fn exit_file(&mut self, errors: &mut ErrorList, file: &File) { - match self { - DuplicateLabels { labels } => { - labels.iter() - .filter(|(_, occurrences)| occurrences.len() > 1) - .map(|(label, occurrences)| - (DuplicateLabel { - label: label.clone(), - occurrences: occurrences.clone() - }, 0..0) // TODO: dummy span, refactor so not required for errors with alternate span data - ) - .for_each(|e| errors.push(e)); +fn visit_program(v: &mut impl MutVisitor, program: &WithErrData) { + let (program_res, span) = program; + match program_res { + Err(_) => { v.enter_program_error(span); } + Ok(p) => { + v.enter_program( p, span); + + let Program { orig, instructions } = p; + visit_orig(v, orig); + for instruction in instructions { + visit_instruction(v, instruction); } - _ => {} } } - } -struct Analyzer { - errors: ErrorList, - analyses: [Analysis; 2], +fn visit_orig(v: &mut impl MutVisitor, orig: &WithErrData>>) { + let (orig_res, span) = orig; + match orig_res { + Err(_) => { v.enter_orig_error(span); } + Ok(o) => { + v.enter_orig( o, span); + for operand in o { + visit_operand(v, operand); + } + } + } } -impl Analyzer { - fn new() -> Self { - Self { - errors: Vec::new(), - analyses: [ - Analysis::operand_types(), - Analysis::duplicate_labels() - ] +fn visit_instruction(v: &mut impl MutVisitor, instruction: &WithErrData) { + let (inst_res, span) = instruction; + match inst_res { + Err(_) => { v.enter_instruction_error(span); } + Ok(i) => { + v.enter_instruction(i, span); + + let Instruction { label, opcode, operands } = i; + if let Some(l) = label { + visit_label(v, l); + } + visit_opcode(v, opcode); + visit_operands(v, operands); } } +} - fn analyze(&mut self, file: &File) { - for analysis in self.analyses.iter_mut() { - analysis.visit_file(&mut self.errors, file); - } - for program in file.programs.iter() { - self.analyze_program(program); - } - for analysis in self.analyses.iter_mut() { - analysis.exit_file(&mut self.errors, file); - } +fn visit_label(v: &mut impl MutVisitor, label: &WithErrData) { + let (label_res, span) = label; + match label_res { + Err(_) => { v.enter_label_error(span); } + Ok(l) => { v.enter_label( l, span); } } +} - fn analyze_program(&mut self, program: &WithErrData) { - let (program_res, program_span) = program; - match program_res { - Err(_) => { self.errors.push((BadProgram, program_span.clone())); } - Ok(prog) => { - for analysis in self.analyses.iter_mut() { - analysis.visit_program(&mut self.errors, prog, program_span); - } +fn visit_opcode(v: &mut impl MutVisitor, opcode: &WithErrData) { + let (opcode_res, span) = opcode; + match opcode_res { + Err(_) => { v.enter_opcode_error(span); } + Ok(oc) => { v.enter_opcode( oc, span); } + } +} - let Program { orig, instructions } = prog; - self.analyze_instruction(orig); - for instruction in instructions { - self.analyze_instruction(instruction); - } +fn visit_operands(v: &mut impl MutVisitor, operands: &WithErrData>>) { + let (ops_res, span) = operands; + match ops_res { + Err(_) => { v.enter_operands_error(span); } + Ok(o) => { + v.enter_operands( o, span); + for operand in o { + visit_operand(v, operand); } } } +} + +fn visit_operand(v: &mut impl MutVisitor, operand: &WithErrData) { + let (op_res, span) = operand; + match op_res { + Err(_) => { v.enter_operand_error(span); } + Ok(o) => { v.enter_operand( o, span); } + } +} - fn analyze_instruction(&mut self, instruction: &WithErrData) { - let (instruction_res, instruction_span) = instruction; - match instruction_res { - Err(_) => { self.errors.push((BadInstruction, instruction_span.clone())); }, - Ok(inst) => { - for analysis in self.analyses.iter_mut() { - analysis.visit_instruction(&mut self.errors, inst, instruction_span); - } +trait MutVisitor { + fn enter_file(&mut self, _file: &File) {} + fn exit_file(&mut self, _file: &File) {} - let Instruction { label, opcode, operands } = inst; + fn enter_program_error(&mut self, _span: &Span) {} + fn enter_program(&mut self, _program: &Program, _span: &Span) {} - if let Some(l_wed) = label { - let (label_res, label_span) = l_wed; - match label_res { - Err(_) => { self.errors.push((BadLabel, label_span.clone())) }, - Ok(l) => { - for analysis in self.analyses.iter_mut() { - analysis.visit_label(&mut self.errors, l, label_span); - } - } - } - } + fn enter_orig_error(&mut self, _span: &Span) {} + fn enter_orig(&mut self, _orig: &Vec>, _span: &Span) {} - let (oc_res, opcode_span) = opcode; - if let Err(_) = oc_res { - self.errors.push((BadOpcode, opcode_span.clone())); - } + fn enter_instruction_error(&mut self, _span: &Span) {} + fn enter_instruction(&mut self, _instruction: &Instruction, _span: &Span) {} - self.analyze_operands(operands); - } - } - } + fn enter_label_error(&mut self, _span: &Span) {} + fn enter_label(&mut self, _label: &String, _span: &Span) {} - fn analyze_operands(&mut self, operands: &WithErrData>>) { - let (operands_res, operands_span) = operands; - match operands_res { - Err(_) => { self.errors.push((BadOperands, operands_span.clone())); } - Ok(ops) => { - for analysis in self.analyses.iter_mut() { - analysis.visit_operands(&mut self.errors, ops, operands_span); - } - } - } - } + fn enter_opcode_error(&mut self, _span: &Span) {} + fn enter_opcode(&mut self, _opcode: &Opcode, _span: &Span) {} - fn analyze_operand(&mut self, operand: &WithErrData) { - let (operand_res, operand_span) = operand; - match operand_res { - Err(_) => { self.errors.push((BadOperand, operand_span.clone())); } - Ok(op) => { - for analysis in self.analyses.iter_mut() { - analysis.visit_operand(&mut self.errors, op, operand_span); - } - } - } - } + fn enter_operands_error(&mut self, _span: &Span) {} + fn enter_operands(&mut self, _operands: &Vec>, _span: &Span) {} + + fn enter_operand_error(&mut self, _span: &Span) {} + fn enter_operand(&mut self, _operand: &Operand, _span: &Span) {} } pub fn validate(file: &File) -> ErrorList { - let mut analyzer = Analyzer::new(); - analyzer.analyze(file); - analyzer.errors + let mut pe = ParseErrors::new(); + visit(&mut pe, file); + + let mut dl = DuplicateLabels::new(); + visit(&mut dl, file); + + let mut ot = OperandTypes::new(); + visit(&mut ot, file); + + concat([ + pe.errors, + dl.errors, + ot.errors + ]) } diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs index 3bc7ecf..37c4f91 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assembler.rs @@ -548,8 +548,7 @@ fn second_pass(symbol_table: SymbolTable, origin: Addr, instructions: Vec Object { - let Program { orig, instructions: parser_instructions, .. } = program; - let parser::Instruction { operands: raw_orig_operands, .. } = unwrap(orig); + let Program { orig: raw_orig_operands, instructions: parser_instructions, .. } = program; let orig_operand = unwrap(raw_orig_operands).remove(0); let origin = LiteralValue::unwrap_try_from(orig_operand).unwrap_try_into().unwrap(); diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index e8aa0eb..b55f379 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -12,7 +12,7 @@ pub(crate) type WithErrData = Spanned>; #[derive(Debug, Eq, PartialEq)] pub struct Program { - pub(crate) orig: WithErrData, + pub(crate) orig: WithErrData>>, pub(crate) instructions: Vec>, } @@ -94,32 +94,19 @@ fn operand() -> impl Parser, Error = Simple> operand.map_with_span(|o, span| (o, span)) } -fn any_opcode() -> impl Parser, Error = Simple> { - filter_map(move |span, t: Token| - match t.clone() { - Token::Opcode(o) => Ok(Ok(o)), - Token::Invalid => Ok(Err(())), - _ => Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected - }) - .map_with_span(|o, span| (o, span)) -} +fn operands(leniency: LeniencyLevel) -> impl Parser>>, Error = Simple> { + let operand_separator: Box>> = + match leniency { + LeniencyLevel::Lenient => Box::new(just(Token::Comma).or_not().ignored()), + LeniencyLevel::Strict => Box::new(just(Token::Comma).ignored()), + }; -fn opcode(expected: Opcode) -> impl Parser, Error = Simple> { - let expected_token = Token::Opcode(expected); - filter_map(move |span, t| - if t == expected_token { - if let Token::Opcode(o) = t { - Ok(Ok(o)) - } else { unreachable!() } - // } else if let Token::Invalid = t { - // Ok(Err(())) - } else { - Err(Simple::expected_input_found(span, [Some(expected_token.clone())], Some(t))) - }) - .map_with_span(|o, span| (o, span)) + operand() + .separated_by(operand_separator) + .map_with_span(|os, span| (Ok(os), span)) } -fn instruction(orig: bool, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { +fn instruction(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { let label = select! { Token::Label(s) => Ok(s), @@ -127,28 +114,19 @@ fn instruction(orig: bool, leniency: LeniencyLevel) -> impl Parser, Error = Simple>> = - if orig { - Box::new(opcode(Opcode::Orig)) - } else { - Box::new(any_opcode()) - }; - - let operand_separator: Box>> = - match leniency { - LeniencyLevel::Lenient => Box::new(just(Token::Comma).or_not().ignored()), - LeniencyLevel::Strict => Box::new(just(Token::Comma).ignored()), - }; - - let operands = - operand() - .separated_by(operand_separator) - .map_with_span(|os, span| (Ok(os), span)); + let opcode = + filter_map(move |span, t: Token| + match t.clone() { + Token::Opcode(o) => Ok(Ok(o)), + Token::Invalid => Ok(Err(())), + _ => Err(Simple::expected_input_found(span, None, Some(t))) // TODO: improve error, expected + }) + .map_with_span(|o, span| (o, span)); label.or_not() .then_ignore(comments_and_newlines()) - .then(oc) - .then(operands) + .then(opcode) + .then(operands(leniency)) .map_with_span(|((l, o), os), span| { let instruction = Instruction { label: l, @@ -167,10 +145,13 @@ fn comments_and_newlines() -> impl Parser> { } fn program(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { - instruction(true, leniency) - .map(|(i, span)| (Ok(i), span)) + let orig = + just(Token::Opcode(Opcode::Orig)) + .ignore_then(operands(leniency)); + + orig .then( - instruction(false, leniency) + instruction(leniency) .map(|(i, span)| (Ok(i), span)) .separated_by(comments_and_newlines()) .allow_leading() @@ -240,7 +221,7 @@ mod tests { let f = file.unwrap().0; assert_eq!((vec![], 0..5), f.before_first_orig); // TODO: probably doesn't need fixing, but span should probably be 0..0; find source of bug assert_eq!(vec![(Ok(Program { - orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), + orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), instructions: vec![ (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Ok(Register(R0)), 24..26)]), 16..26) }), 12..26) ], @@ -256,7 +237,7 @@ mod tests { let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); assert_eq!(vec![(Ok(Program { - orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), + orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), instructions: vec![ (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Err(()), 24..29)]), 16..29) }), 12..29) ], @@ -272,7 +253,7 @@ mod tests { let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); assert_eq!(vec![(Ok(Program { - orig: (Ok(Instruction { label: None, opcode: (Ok(Orig), 0..5), operands: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11) }), 0..11), + orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), instructions: vec![ (Ok(Instruction { label: Some((Err(()), 12..18)), opcode: (Ok(Add), 19..22), operands: (Ok(vec![(Ok(Register(R0)), 23..25), (Ok(Register(R0)), 27..29), (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33)]), 23..33) }), 12..33) ], From 18eee9cefad5cced29fd03909e478e9047d21517 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 3 Jun 2022 13:44:42 -0500 Subject: [PATCH 44/82] assembler: remove mid-level unwraps, add symbol table analysis --- assembler/bin/as.rs | 2 +- assembler/src/analysis.rs | 177 +++++++++++++++++++++++++---- assembler/src/assembler.rs | 225 +++++++++++++------------------------ assembler/src/linker.rs | 4 +- assembler/src/parser.rs | 60 +++++++--- assembler/tests/integ.rs | 2 +- 6 files changed, 282 insertions(+), 188 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 1b5deb4..8c0d4b3 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -88,7 +88,7 @@ fn as_() { let objects = file.programs.into_iter() - .map(|program| assemble(program.0.unwrap())); + .map(|program| assemble(program.0.expect("Found invalid object.")).expect("Failed to assemble object.")); let mem = link(objects, background); diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 5a58f31..ebdd246 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -1,11 +1,12 @@ use std::collections::{HashMap, HashSet}; +use std::convert::{TryFrom, TryInto}; use std::fmt::{Display, format, Formatter}; use std::string::String; use itertools::{concat, zip}; use ariadne::{Label, Report, ReportBuilder, ReportKind}; use lc3_isa::{Addr, SignedWord, Word}; use crate::lexer::{LiteralValue, Opcode}; -use crate::parser::{File, Instruction, Operand, Program, WithErrData}; +use crate::parser::{File, get_first, get_result, Instruction, Operand, Program, result, WithErrData}; use crate::{Span, Spanned}; type ErrorList = Vec>; @@ -68,6 +69,8 @@ pub fn report(spanned_error: Spanned) -> Report { } use OperandType::*; +use crate::assembler::get_orig; + #[derive(Clone)] pub enum OperandType { Register, @@ -218,22 +221,13 @@ fn min_unsigned_width(n: i32) -> u8 { width as u8 } -enum InvalidSymbolReason { - InvalidOrig, - PriorInvalidInstruction { estimated_addr: Addr }, - Duplicated, - OutOfBounds, -} - -type SymbolTableValue = Result; - #[derive(Default)] -struct ParseErrors { +struct ParseErrorsAnalysis { errors: ErrorList } -impl ParseErrors { +impl ParseErrorsAnalysis { fn new() -> Self { Default::default() } @@ -243,7 +237,7 @@ impl ParseErrors { } } -impl MutVisitor for ParseErrors { +impl MutVisitor for ParseErrorsAnalysis { fn enter_program_error(&mut self, span: &Span) { self.push_error(BadProgram, span); } @@ -269,20 +263,20 @@ impl MutVisitor for ParseErrors { #[derive(Default)] -struct DuplicateLabels { +struct DuplicateLabelsAnalysis { errors: ErrorList, labels: HashMap>, } -impl DuplicateLabels { +impl DuplicateLabelsAnalysis { fn new() -> Self { Default::default() } } -impl MutVisitor for DuplicateLabels { +impl MutVisitor for DuplicateLabelsAnalysis { fn exit_file(&mut self, _file: &File) { - let DuplicateLabels { errors, labels } = self; + let DuplicateLabelsAnalysis { errors, labels } = self; labels.iter() .filter(|(_, occurrences)| occurrences.len() > 1) .map(|(label, occurrences)| @@ -301,13 +295,147 @@ impl MutVisitor for DuplicateLabels { } +type RoughAddr = u32; + +#[derive(Debug)] +enum InvalidSymbolError { + InvalidOrig { estimated_addr: RoughAddr }, + PriorInvalidInstruction { estimated_addr: RoughAddr }, + Duplicated, + OutOfBounds, +} + +type SymbolTableValue = Result; + +#[derive(Debug)] +enum SymbolTableState { + Valid, + InvalidOrig, + InvalidInstruction, +} + +impl Default for SymbolTableState { + fn default() -> Self { + SymbolTableState::Valid + } +} + +enum AddressesOccupiedError { + BadOpcode, + BadOperand +} + +impl Instruction { + fn get_label(&self) -> Option<&String> { + self.label.as_ref() + .and_then(|res| get_result(res).as_ref().ok()) + } + + fn get_first_operand(&self) -> Option<&Operand> { + get_result(&self.operands).as_ref().ok() + .and_then(|ops| get_first(ops)) + } + + fn addresses_occupied(&self) -> Result { + match get_result(&self.opcode) { + Err(()) => Err(AddressesOccupiedError::BadOpcode), + Ok(oc) => match oc { + Opcode::Stringz => + self.get_first_operand() + .and_then(|op| op.clone().get_string()) + .ok_or(AddressesOccupiedError::BadOperand) + .map(|s| s.len() as Addr), + Opcode::Blkw => + self.get_first_operand() + .and_then(|op| op.clone().get_unqualified_number_value()) + .ok_or(AddressesOccupiedError::BadOperand), + _ => Ok(1) + } + } + } +} + +type SymbolTable = HashMap; + +#[derive(Debug, Default)] +struct SymbolTableAnalysis { + location_counter: RoughAddr, + state: SymbolTableState, + symbol_table: SymbolTable, +} + +impl SymbolTableAnalysis { + fn new() -> Self { + Default::default() + } + + fn invalidate_state(&mut self, state: SymbolTableState) { + if let SymbolTableState::Valid = self.state { + self.state = state; + } + } +} + +const ORIG_ERROR_STARTING_ADDRESS_ESTIMATE: RoughAddr = 0x3000; +const INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE: RoughAddr = 1; + +impl MutVisitor for SymbolTableAnalysis { + fn enter_orig_error(&mut self, _span: &Span) { + self.location_counter = ORIG_ERROR_STARTING_ADDRESS_ESTIMATE; + self.invalidate_state(SymbolTableState::InvalidOrig); + } + + fn enter_orig(&mut self, orig: &Vec>, _span: &Span) { + self.location_counter = get_first(orig) + .and_then(|op| Word::try_from(op.clone()).map(|w| w as RoughAddr).ok()) + .unwrap_or_else(| | { + self.state = SymbolTableState::InvalidOrig; + ORIG_ERROR_STARTING_ADDRESS_ESTIMATE + }); + } + + fn enter_instruction_error(&mut self, _span: &Span) { + self.location_counter += INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE; + self.invalidate_state(SymbolTableState::InvalidInstruction); + } + + fn enter_instruction(&mut self, instruction: &Instruction, _span: &Span) { + if let Some(label) = instruction.get_label() { + self.symbol_table.entry(label.clone()) + .and_modify(|e| *e = Err(InvalidSymbolError::Duplicated)) + .or_insert( + match self.state { + SymbolTableState::Valid => + self.location_counter.try_into() + .map_err(|_| InvalidSymbolError::OutOfBounds), + SymbolTableState::InvalidOrig => + Err(InvalidSymbolError::InvalidOrig { + estimated_addr: self.location_counter + }), + SymbolTableState::InvalidInstruction => + Err(InvalidSymbolError::PriorInvalidInstruction { + estimated_addr: self.location_counter + }), + } + ); + } + self.location_counter += instruction.addresses_occupied() + .unwrap_or_else(|_| { + self.state = SymbolTableState::InvalidInstruction; + INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE as Addr + }) as RoughAddr; + } +} + + + #[derive(Default)] -struct OperandTypes { +struct OperandTypesAnalysis { errors: ErrorList, expected_operands: Option> } -impl OperandTypes { +impl OperandTypesAnalysis { fn new() -> Self { Default::default() } @@ -317,7 +445,7 @@ fn orig_expected_operands() -> Vec { vec![OperandType::signed_or_unsigned_number(16)] // TODO: Disallow signed? } -impl MutVisitor for OperandTypes { +impl MutVisitor for OperandTypesAnalysis { fn enter_orig(&mut self, orig: &Vec>, span: &Span) { self.expected_operands = Some(orig_expected_operands()); self.enter_operands(orig, span); @@ -496,15 +624,18 @@ trait MutVisitor { } pub fn validate(file: &File) -> ErrorList { - let mut pe = ParseErrors::new(); + let mut pe = ParseErrorsAnalysis::new(); visit(&mut pe, file); - let mut dl = DuplicateLabels::new(); + let mut dl = DuplicateLabelsAnalysis::new(); visit(&mut dl, file); - let mut ot = OperandTypes::new(); + let mut ot = OperandTypesAnalysis::new(); visit(&mut ot, file); + let mut st = SymbolTableAnalysis::new(); + visit(&mut st, file); + concat([ pe.errors, dl.errors, diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs index 37c4f91..56727ac 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assembler.rs @@ -4,7 +4,7 @@ use std::fmt::Debug; use std::num::{ParseIntError, TryFromIntError}; use lc3_isa::{Addr, Reg, SignedWord, Word}; use crate::lexer::{ConditionCodes, LiteralValue, Opcode}; -use crate::parser::Operand; +use crate::parser::{Operand, result, try_map, try_result}; use crate::parser; use crate::parser::{Program, WithErrData}; @@ -20,11 +20,10 @@ impl TryFrom for Sr2OrImm5 { type Error = (); fn try_from(value: Operand) -> Result { - Reg::try_from(value.clone()) + value.clone().try_into() .map(Sr2OrImm5::Sr2) .or_else(|_| - LiteralValue::try_from(value) - .unwrap_try_into() + value.try_into() .map(Sr2OrImm5::Imm5) .map_err(|_| ())) } @@ -34,36 +33,42 @@ impl TryFrom for PcOffset { type Error = (); fn try_from(value: Operand) -> Result { - LiteralValue::try_from(value.clone()) - .map(|lv| { - let sw = lv.try_into().unwrap(); - PcOffset::Number(sw) - }) - .or_else(|_| Ok(PcOffset::Label(value.label()))) + value.clone().try_into() + .map(PcOffset::Number) + .or_else(|_| + value.get_label() + .ok_or(()) + .map(PcOffset::Label)) } } impl TryFrom for SignedWord { - type Error = TryFromIntError; + type Error = (); fn try_from(value: Operand) -> Result { - LiteralValue::try_from(value).unwrap_try_into() + LiteralValue::try_from(value)? + .try_into() + .map_err(|_| ()) } } impl TryFrom for Word { - type Error = TryFromIntError; + type Error = (); fn try_from(value: Operand) -> Result { - LiteralValue::try_from(value).unwrap_try_into() + LiteralValue::try_from(value)? + .try_into() + .map_err(|_| ()) } } impl TryFrom for u8 { - type Error = TryFromIntError; + type Error = (); fn try_from(value: Operand) -> Result { - LiteralValue::try_from(value).unwrap_try_into() + LiteralValue::try_from(value)? + .try_into() + .map_err(|_| ()) } } @@ -71,10 +76,12 @@ impl TryFrom for FillValue { type Error = (); fn try_from(value: Operand) -> Result { - LiteralValue::try_from(value.clone()) - .unwrap_try_into() + value.clone().try_into() .map(FillValue::Number) - .or_else(|_| Ok(FillValue::Label(value.label()))) + .or_else(|_| + value.get_label() + .ok_or(()) + .map(FillValue::Label)) } } @@ -117,32 +124,12 @@ pub(crate) enum Instruction { impl Instruction { fn addresses_occupied(&self) -> Addr { - use Instruction::*; - match self { - Add { .. } - | And { .. } - | Br { .. } - | Jmp { .. } - | Jsr { .. } - | Jsrr { .. } - | Ld { .. } - | Ldi { .. } - | Ldr { .. } - | Lea { .. } - | Not { .. } - | Ret - | Rti - | St { .. } - | Sti { .. } - | Str { .. } - | Trap { .. } - | Fill { .. } => 1, - - Blkw { size } => *size, + Instruction::Blkw { size } => *size, // +1 is to count the null-terminator - Stringz { string } => (string.len() + 1) as Addr, // TODO: correct for escape characters + Instruction::Stringz { string } => (string.len() + 1) as Addr, + _ => 1, } } } @@ -159,61 +146,6 @@ pub(crate) enum ObjectWord { UnlinkedInstruction(Instruction), } -fn unwrap(v: WithErrData) -> T { - v.0.unwrap() -} - -trait UnwrapTryFrom where - Self: Sized -{ - type Error; - - fn unwrap_try_from(v: T) -> Result; -} - -trait UnwrapTryInto { - type Error; - - fn unwrap_try_into(self) -> Result; -} - -impl UnwrapTryInto for U where - T: UnwrapTryFrom -{ - type Error = E; - - fn unwrap_try_into(self) -> Result { - T::unwrap_try_from(self) - } -} - -impl UnwrapTryFrom> for U where - U: TryFrom -{ - type Error = E; - - fn unwrap_try_from(v: WithErrData) -> Result { - unwrap(v).try_into() - } -} - -impl UnwrapTryFrom> for U where - U: TryFrom -{ - type Error = E; - - fn unwrap_try_from(v: Result) -> Result { - v.unwrap().try_into() - } -} - -fn unwrap_into(maybe_v: Option) -> U where - E: Debug, - U: UnwrapTryFrom -{ - maybe_v.unwrap().unwrap_try_into().unwrap() -} - impl TryFrom for Instruction { type Error = (); @@ -221,97 +153,97 @@ impl TryFrom for Instruction { fn try_from(i: parser::Instruction) -> Result { let parser::Instruction { opcode: raw_opcode, operands: raw_operands, .. } = i; - let operands = unwrap(raw_operands); - match unwrap(raw_opcode) { + let operands = result(raw_operands)?; + match result(raw_opcode)? { Opcode::Add => { let mut os = operands.into_iter(); - let dr = unwrap_into(os.next()); - let sr1 = unwrap_into(os.next()); - let sr2_or_imm5 = unwrap_into(os.next()); + let dr = try_map(os.next())?; + let sr1 = try_map(os.next())?; + let sr2_or_imm5 = try_map(os.next())?; Ok(Instruction::Add { dr, sr1, sr2_or_imm5 }) } Opcode::And => { let mut os = operands.into_iter(); - let dr = unwrap_into(os.next()); - let sr1 = unwrap_into(os.next()); - let sr2_or_imm5 = unwrap_into(os.next()); + let dr = try_map(os.next())?; + let sr1 = try_map(os.next())?; + let sr2_or_imm5 = try_map(os.next())?; Ok(Instruction::And { dr, sr1, sr2_or_imm5 }) } Opcode::Br(cond_codes) => { let mut os = operands.into_iter(); - let pc_offset9 = unwrap_into(os.next()); + let pc_offset9 = try_map(os.next())?; Ok(Instruction::Br { cond_codes, pc_offset9 }) } Opcode::Jmp => { let mut os = operands.into_iter(); - let base = unwrap_into(os.next()); + let base = try_map(os.next())?; Ok(Instruction::Jmp { base }) } Opcode::Jsr => { let mut os = operands.into_iter(); - let pc_offset11 = unwrap_into(os.next()); + let pc_offset11 = try_map(os.next())?; Ok(Instruction::Jsr { pc_offset11 }) } Opcode::Jsrr => { let mut os = operands.into_iter(); - let base = unwrap_into(os.next()); + let base = try_map(os.next())?; Ok(Instruction::Jsrr { base }) } Opcode::Ld => { let mut os = operands.into_iter(); - let dr = unwrap_into(os.next()); - let pc_offset9 = unwrap_into(os.next()); + let dr = try_map(os.next())?; + let pc_offset9 = try_map(os.next())?; Ok(Instruction::Ld { dr, pc_offset9 }) } Opcode::Ldi => { let mut os = operands.into_iter(); - let dr = unwrap_into(os.next()); - let pc_offset9 = unwrap_into(os.next()); + let dr = try_map(os.next())?; + let pc_offset9 = try_map(os.next())?; Ok(Instruction::Ldi { dr, pc_offset9 }) } Opcode::Ldr => { let mut os = operands.into_iter(); - let dr = unwrap_into(os.next()); - let base = unwrap_into(os.next()); - let offset6 = unwrap_into(os.next()); + let dr = try_map(os.next())?; + let base = try_map(os.next())?; + let offset6 = try_map(os.next())?; Ok(Instruction::Ldr { dr, base, offset6 }) } Opcode::Lea => { let mut os = operands.into_iter(); - let dr = unwrap_into(os.next()); - let pc_offset9 = unwrap_into(os.next()); + let dr = try_map(os.next())?; + let pc_offset9 = try_map(os.next())?; Ok(Instruction::Lea { dr, pc_offset9 }) } Opcode::Not => { let mut os = operands.into_iter(); - let dr = unwrap_into(os.next()); - let sr = unwrap_into(os.next()); + let dr = try_map(os.next())?; + let sr = try_map(os.next())?; Ok(Instruction::Not { dr, sr }) } Opcode::Ret => Ok(Instruction::Ret), Opcode::Rti => Ok(Instruction::Rti), Opcode::St => { let mut os = operands.into_iter(); - let sr = unwrap_into(os.next()); - let pc_offset9 = unwrap_into(os.next()); + let sr = try_map(os.next())?; + let pc_offset9 = try_map(os.next())?; Ok(Instruction::St { sr, pc_offset9 }) } Opcode::Sti => { let mut os = operands.into_iter(); - let sr = unwrap_into(os.next()); - let pc_offset9 = unwrap_into(os.next()); + let sr = try_map(os.next())?; + let pc_offset9 = try_map(os.next())?; Ok(Instruction::Sti { sr, pc_offset9 }) } Opcode::Str => { let mut os = operands.into_iter(); - let sr = unwrap_into(os.next()); - let base = unwrap_into(os.next()); - let offset6 = unwrap_into(os.next()); + let sr = try_map(os.next())?; + let base = try_map(os.next())?; + let offset6 = try_map(os.next())?; Ok(Instruction::Str { sr, base, offset6 }) } Opcode::Trap => { let mut os = operands.into_iter(); - let trap_vec = unwrap_into(os.next()); + let trap_vec = try_map(os.next())?; Ok(Instruction::Trap { trap_vec }) } @@ -320,17 +252,17 @@ impl TryFrom for Instruction { Opcode::Fill => { let mut os = operands.into_iter(); - let value = unwrap_into(os.next()); + let value = try_map(os.next())?; Ok(Instruction::Fill { value }) } Opcode::Blkw => { let mut os = operands.into_iter(); - let size = unwrap(os.next().unwrap()).unqualified_number_value(); + let size = try_result(os.next())?.get_unqualified_number_value().ok_or(())?; Ok(Instruction::Blkw { size }) } Opcode::Stringz => { let mut os = operands.into_iter(); - let string = unwrap(os.next().unwrap()).string(); + let string = try_result(os.next())?.get_string().ok_or(())?; Ok(Instruction::Stringz { string }) } @@ -364,7 +296,7 @@ fn calculate_offset(location_counter: &Addr, label_address: &Addr) -> SignedWord (la - (lc + 1)) as SignedWord } -pub(crate) fn try_assemble(symbol_table: &SymbolTable, location_counter: &Addr, instruction: Instruction) -> AssemblyResult { +pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: &Addr, instruction: Instruction) -> AssemblyResult { use AssemblyResult::*; use ObjectWord::*; @@ -502,7 +434,7 @@ pub(crate) fn try_assemble(symbol_table: &SymbolTable, location_counter: &Addr, .collect()), Instruction::Stringz { string } => { let mut chars = string.chars() - .map(|c| Value(c as Word)) // TODO: correct for escape chars + .map(|c| Value(c as Word)) .collect::>(); chars.push(Value(0x00)); // null-terminator MultipleObjectWords(chars) @@ -510,25 +442,25 @@ pub(crate) fn try_assemble(symbol_table: &SymbolTable, location_counter: &Addr, } } -fn first_pass(origin: Addr, instructions: Vec>) -> (Vec, SymbolTable) { +fn first_pass(origin: Addr, instructions: Vec>) -> Result<(Vec, SymbolTable), ()> { let mut symbol_table = HashMap::new(); let mut words = Vec::new(); let mut location_counter = origin; for raw_instruction in instructions.into_iter() { - let parser_instruction = unwrap(raw_instruction); + let parser_instruction = result(raw_instruction)?; if let Some(l) = parser_instruction.label.clone() { // TODO: label not needed for conversion to Instruction; consider changing to TryFrom<(Opcode, Operands)> to avoid clone - symbol_table.insert(unwrap(l), location_counter); + symbol_table.insert(result(l)?, location_counter); }; - let instruction: Instruction = parser_instruction.try_into().unwrap(); + let instruction: Instruction = parser_instruction.try_into()?; let addresses_used = instruction.addresses_occupied(); words.push(instruction); location_counter += addresses_used; } - (words, symbol_table) + Ok((words, symbol_table)) } fn second_pass(symbol_table: SymbolTable, origin: Addr, instructions: Vec) -> Object { @@ -537,7 +469,7 @@ fn second_pass(symbol_table: SymbolTable, origin: Addr, instructions: Vec { words.push(wd); } AssemblyResult::MultipleObjectWords(wds) => { words.extend(wds); } } @@ -547,11 +479,14 @@ fn second_pass(symbol_table: SymbolTable, origin: Addr, instructions: Vec Object { - let Program { orig: raw_orig_operands, instructions: parser_instructions, .. } = program; - let orig_operand = unwrap(raw_orig_operands).remove(0); - let origin = LiteralValue::unwrap_try_from(orig_operand).unwrap_try_into().unwrap(); +pub(crate) fn get_orig(orig_operands: WithErrData>>) -> Result { + let orig_operand = result(orig_operands)?.remove(0); + result(orig_operand)?.try_into() +} - let (instructions, symbol_table) = first_pass(origin, parser_instructions); - second_pass(symbol_table, origin, instructions) +pub fn assemble(program: Program) -> Result { + let Program { orig, instructions: parser_instructions, .. } = program; + let origin = get_orig(orig)?; + let (instructions, symbol_table) = first_pass(origin, parser_instructions)?; + Ok(second_pass(symbol_table, origin, instructions)) } \ No newline at end of file diff --git a/assembler/src/linker.rs b/assembler/src/linker.rs index 8bc6b72..fe0a5fd 100644 --- a/assembler/src/linker.rs +++ b/assembler/src/linker.rs @@ -3,7 +3,7 @@ use chumsky::chain::Chain; use chumsky::Parser; use lc3_isa::util::MemoryDump; use lc3_isa::{Addr, ADDR_SPACE_SIZE_IN_WORDS, Word}; -use crate::assembler::{try_assemble, SymbolTable, Object, ObjectWord, AssemblyResult}; +use crate::assembler::{assemble_instruction, SymbolTable, Object, ObjectWord, AssemblyResult}; struct LinkedObject { origin: Addr, @@ -30,7 +30,7 @@ fn link_object(symbol_table: &SymbolTable, object: Object) -> LinkedObject { location_counter += 1; }, ObjectWord::UnlinkedInstruction(instruction) => - match try_assemble(&symbol_table, &location_counter, instruction) { + match assemble_instruction(&symbol_table, &location_counter, instruction) { AssemblyResult::SingleObjectWord(word) => match word { ObjectWord::Value(word) => { words.push(word); diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index b55f379..f8bc162 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -1,4 +1,4 @@ -use std::convert::TryFrom; +use std::convert::{TryFrom, TryInto}; use chumsky::prelude::*; use chumsky::recovery::SkipUntil; use chumsky::Stream; @@ -10,6 +10,31 @@ use crate::lexer::{LiteralValue, Opcode, Token}; pub(crate) type WithErrData = Spanned>; +pub(crate) fn get_first(v: &Vec>) -> Option<&T> { + v.get(0) + .and_then(|res| get_result(res).as_ref().ok()) +} + +pub(crate) fn get_result(v: &WithErrData) -> &Result { + &v.0 +} + +pub(crate) fn result(v: WithErrData) -> Result { + v.0 +} + +pub(crate) fn try_result(maybe_v: Option>) -> Result { + result(maybe_v.ok_or(())?) +} + +pub(crate) fn try_map(maybe_v: Option>) -> Result where + U: TryFrom +{ + try_result(maybe_v)? + .try_into() + .map_err(|_| ()) +} + #[derive(Debug, Eq, PartialEq)] pub struct Program { pub(crate) orig: WithErrData>>, @@ -57,27 +82,27 @@ impl TryFrom for LiteralValue { } impl Operand { - pub(crate) fn string(self) -> String { + pub(crate) fn get_string(self) -> Option { if let Self::StringLiteral(s) = self { - s + Some(s) } else { - panic!("Not a string literal") + None } } - pub(crate) fn label(self) -> String { + pub(crate) fn get_label(self) -> Option { if let Self::Label(l) = self { - l + Some(l) } else { - panic!("Not a label") + None } } - pub(crate) fn unqualified_number_value(self) -> Word { + pub(crate) fn get_unqualified_number_value(self) -> Option { if let Self::UnqualifiedNumberLiteral(w) = self { - w + Some(w) } else { - panic!("Not an unqualified number literal") + None } } } @@ -106,7 +131,7 @@ fn operands(leniency: LeniencyLevel) -> impl Parser impl Parser, Error = Simple> { +fn instruction(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { let label = select! { Token::Label(s) => Ok(s), @@ -124,7 +149,7 @@ fn instruction(leniency: LeniencyLevel) -> impl Parser impl Parser impl Parser> { just(Token::Comment).or_not() .then(just(Token::Newline).repeated().at_least(1)) - .repeated() + .repeated().at_least(1) .ignored() } @@ -152,7 +182,6 @@ fn program(leniency: LeniencyLevel) -> impl Parser, Erro orig .then( instruction(leniency) - .map(|(i, span)| (Ok(i), span)) .separated_by(comments_and_newlines()) .allow_leading() .allow_trailing() @@ -205,7 +234,6 @@ mod tests { let tokens = maybe_tokens.unwrap(); let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); - println!("{:?}", file); assert_eq!((vec![Token::Invalid, Token::Invalid, Token::Label("JUNK".to_string())], 0..18), file.unwrap().0.before_first_orig); } diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 7ca260a..aeb49c2 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -277,7 +277,7 @@ fn test(input: &str, orig: usize, expected_mem: &[Word]) { let (mut file, span) = maybe_file.expect("parsing failed"); assert_eq!(1, file.programs.len(), "parsed unexpected number of programs: {}", file.programs.len()); let program = file.programs.remove(0).0.expect("parse error in program"); - let object = assembler::assemble(program); + let object = assembler::assemble(program).expect("assembly failed"); let mem = linker::link([object], None); From 6d0629f5b3fdf2fcb5edf1fbcdd48e31a359189e Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 3 Jun 2022 18:54:42 -0500 Subject: [PATCH 45/82] assembler: check label offset bounds --- assembler/bin/as.rs | 2 +- assembler/src/analysis.rs | 215 ++++++++++++++++++++++++++++++++----- assembler/src/assembler.rs | 39 +++---- assembler/src/linker.rs | 13 +-- assembler/src/parser.rs | 4 +- assembler/tests/integ.rs | 2 +- 6 files changed, 218 insertions(+), 57 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 8c0d4b3..1bddea1 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -90,7 +90,7 @@ fn as_() { file.programs.into_iter() .map(|program| assemble(program.0.expect("Found invalid object.")).expect("Failed to assemble object.")); - let mem = link(objects, background); + let mem = link(objects, background).expect("linking failed"); let mut output_path = path.clone(); output_path.set_extension(MEM_DUMP_FILE_EXTENSION); diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index ebdd246..ba98fb9 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -1,12 +1,13 @@ +use std::cmp::min; use std::collections::{HashMap, HashSet}; use std::convert::{TryFrom, TryInto}; -use std::fmt::{Display, format, Formatter}; +use std::fmt::{Debug, Display, format, Formatter}; use std::string::String; use itertools::{concat, zip}; use ariadne::{Label, Report, ReportBuilder, ReportKind}; use lc3_isa::{Addr, SignedWord, Word}; use crate::lexer::{LiteralValue, Opcode}; -use crate::parser::{File, get_first, get_result, Instruction, Operand, Program, result, WithErrData}; +use crate::parser::{File, get, get_result, Instruction, Operand, Program, result, WithErrData}; use crate::{Span, Spanned}; type ErrorList = Vec>; @@ -22,6 +23,14 @@ pub enum Error { WrongNumberOfOperands { expected: usize, actual: usize }, OperandTypeMismatch { expected: OperandType, actual: OperandType }, DuplicateLabel { label: String, occurrences: Vec, }, + InvalidLabelReference { label: String, reason: InvalidReferenceReason }, + LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr }, +} + +pub enum InvalidReferenceReason { + Undefined, + Duplicated, + OutOfBounds, } impl Error { @@ -38,7 +47,21 @@ impl Error { OperandTypeMismatch { expected, actual } => format!("wrong operand type; expected {}, found: {}", expected, actual), DuplicateLabel { label, .. } => - format!("same label used for multiple locations: {}", label) + format!("same label used for multiple locations: {}", label), + InvalidLabelReference { label, reason } => { + let reason_str = match reason { + InvalidReferenceReason::Undefined => "not previously defined", + InvalidReferenceReason::Duplicated => "defined in multiple locations", + InvalidReferenceReason::OutOfBounds => "defined at invalid address", + }; + format!("reference to label {} invalid: {}", label, reason_str) + } + LabelTooDistant { label, width, est_ref_pos, est_label_pos } => { + format!("label {} at {:#0label_pos_width$X} referenced at {:#0ref_pos_width$X}; too distant, cannot represent in available bits: {}", + label, est_label_pos, est_ref_pos, width, + label_pos_width = min(4, min_signed_width(*est_ref_pos) as usize), + ref_pos_width = min(4, min_signed_width(*est_label_pos) as usize),) + } } } } @@ -69,7 +92,7 @@ pub fn report(spanned_error: Spanned) -> Report { } use OperandType::*; -use crate::assembler::get_orig; +use crate::assembler::{calculate_offset, get_orig}; #[derive(Clone)] pub enum OperandType { @@ -295,7 +318,7 @@ impl MutVisitor for DuplicateLabelsAnalysis { } -type RoughAddr = u32; +type RoughAddr = i32; #[derive(Debug)] enum InvalidSymbolError { @@ -333,7 +356,7 @@ impl Instruction { fn get_first_operand(&self) -> Option<&Operand> { get_result(&self.operands).as_ref().ok() - .and_then(|ops| get_first(ops)) + .and_then(|ops| get(ops, 0)) } fn addresses_occupied(&self) -> Result { @@ -386,10 +409,10 @@ impl MutVisitor for SymbolTableAnalysis { } fn enter_orig(&mut self, orig: &Vec>, _span: &Span) { - self.location_counter = get_first(orig) + self.location_counter = get(orig, 0) .and_then(|op| Word::try_from(op.clone()).map(|w| w as RoughAddr).ok()) .unwrap_or_else(| | { - self.state = SymbolTableState::InvalidOrig; + self.invalidate_state(SymbolTableState::InvalidOrig); ORIG_ERROR_STARTING_ADDRESS_ESTIMATE }); } @@ -399,34 +422,163 @@ impl MutVisitor for SymbolTableAnalysis { self.invalidate_state(SymbolTableState::InvalidInstruction); } - fn enter_instruction(&mut self, instruction: &Instruction, _span: &Span) { - if let Some(label) = instruction.get_label() { - self.symbol_table.entry(label.clone()) - .and_modify(|e| *e = Err(InvalidSymbolError::Duplicated)) - .or_insert( - match self.state { - SymbolTableState::Valid => - self.location_counter.try_into() - .map_err(|_| InvalidSymbolError::OutOfBounds), - SymbolTableState::InvalidOrig => - Err(InvalidSymbolError::InvalidOrig { - estimated_addr: self.location_counter - }), - SymbolTableState::InvalidInstruction => - Err(InvalidSymbolError::PriorInvalidInstruction { - estimated_addr: self.location_counter - }), - } - ); - } + fn exit_instruction(&mut self, instruction: &Instruction, _span: &Span) { self.location_counter += instruction.addresses_occupied() .unwrap_or_else(|_| { self.state = SymbolTableState::InvalidInstruction; INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE as Addr }) as RoughAddr; } + + fn enter_label(&mut self, label: &String, _span: &Span) { + self.symbol_table.entry(label.clone()) + .and_modify(|e| *e = Err(InvalidSymbolError::Duplicated)) + .or_insert( + match self.state { + SymbolTableState::Valid => + self.location_counter.try_into() + .map_err(|_| InvalidSymbolError::OutOfBounds), + SymbolTableState::InvalidOrig => + Err(InvalidSymbolError::InvalidOrig { + estimated_addr: self.location_counter + }), + SymbolTableState::InvalidInstruction => + Err(InvalidSymbolError::PriorInvalidInstruction { + estimated_addr: self.location_counter + }), + } + ); + } +} + + +struct ExpectedLabel { + width: u8, + position: usize +} + +struct LabelOffsetBoundsAnalysis<'a> { + errors: ErrorList, + symbol_table: &'a SymbolTable, + location_counter: RoughAddr, + expected_label: Option +} + +impl<'a> LabelOffsetBoundsAnalysis<'a> { + fn new(symbol_table: &'a SymbolTable) -> Self { + Self { + errors: Default::default(), + symbol_table, + location_counter: Default::default(), + expected_label: Default::default(), + } + } + + fn check_offset(&mut self, label: &String, span: &Span, width: u8, label_addr: RoughAddr) { + match calculate_offset(self.location_counter, label_addr) { + Err(_) => { + // TODO: make more precise. This case shouldn't be possible unless one of the estimated addresses is far out of bounds. + self.errors.push( + (InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::OutOfBounds + }, span.clone())); + } + Ok(offset) => { + if min_signed_width(offset as i32) > width { + self.errors.push( + (LabelTooDistant { + label: label.clone(), + width, + est_ref_pos: self.location_counter, + est_label_pos: label_addr, + }, span.clone())) + } + } + } + } } +impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { + fn enter_orig_error(&mut self, _span: &Span) { + self.location_counter = ORIG_ERROR_STARTING_ADDRESS_ESTIMATE; + } + + fn enter_orig(&mut self, orig: &Vec>, _span: &Span) { + self.location_counter = get(orig, 0) + .and_then(|op| Word::try_from(op.clone()).map(|w| w as RoughAddr).ok()) + .unwrap_or(ORIG_ERROR_STARTING_ADDRESS_ESTIMATE); + } + + fn enter_instruction_error(&mut self, _span: &Span) { + self.location_counter += INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE; + } + + fn exit_instruction(&mut self, instruction: &Instruction, _span: &Span) { + self.location_counter += instruction.addresses_occupied() + .unwrap_or(INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE as Addr) + as RoughAddr; + } + + fn enter_opcode_error(&mut self, _span: &Span) { + self.expected_label = None; + } + + fn enter_opcode(&mut self, opcode: &Opcode, _span: &Span) { + use Opcode::*; + self.expected_label = + match opcode { + Ld | Ldi | Lea + | St | Sti => Some(ExpectedLabel { width: 9, position: 1 }), + Br(_) => Some(ExpectedLabel { width: 9, position: 0 }), + Jsr => Some(ExpectedLabel { width: 11, position: 0 }), + Fill => Some(ExpectedLabel { width: 16, position: 0 }), + _ => None, + } + } + + fn enter_operands(&mut self, operands: &Vec>, _span: &Span) { + if let Some(ExpectedLabel { width, position }) = &self.expected_label { + if let Some((Ok(Operand::Label(label)), op_span)) = operands.get(*position) { + match self.symbol_table.get(label) { + None => { + self.errors.push( + (InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::Undefined + }, op_span.clone())); + } + Some(stv) => match stv { + Ok(addr) => { + self.check_offset(label, op_span, *width, *addr as RoughAddr); + } + Err(ste) => match ste { + InvalidSymbolError::InvalidOrig { estimated_addr } + | InvalidSymbolError::PriorInvalidInstruction { estimated_addr } => { + self.check_offset(label, op_span, *width, *estimated_addr); + } + InvalidSymbolError::Duplicated => { + self.errors.push( + (InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::Duplicated + }, op_span.clone())); + } + InvalidSymbolError::OutOfBounds => { + self.errors.push( + (InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::OutOfBounds + }, op_span.clone())); + } + } + } + } + } + } + } + +} #[derive(Default)] @@ -609,6 +761,7 @@ trait MutVisitor { fn enter_instruction_error(&mut self, _span: &Span) {} fn enter_instruction(&mut self, _instruction: &Instruction, _span: &Span) {} + fn exit_instruction(&mut self, _instruction: &Instruction, _span: &Span) {} fn enter_label_error(&mut self, _span: &Span) {} fn enter_label(&mut self, _label: &String, _span: &Span) {} @@ -636,10 +789,14 @@ pub fn validate(file: &File) -> ErrorList { let mut st = SymbolTableAnalysis::new(); visit(&mut st, file); + let mut lob = LabelOffsetBoundsAnalysis::new(&st.symbol_table); + visit(&mut lob, file); + concat([ pe.errors, dl.errors, - ot.errors + ot.errors, + lob.errors, ]) } diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs index 56727ac..632e9c3 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assembler.rs @@ -290,17 +290,19 @@ pub(crate) enum AssemblyResult { MultipleObjectWords(Vec), } -fn calculate_offset(location_counter: &Addr, label_address: &Addr) -> SignedWord { - let lc = *location_counter as i32; - let la = *label_address as i32; - (la - (lc + 1)) as SignedWord +fn calculate_addr_offset(location_counter: &Addr, label_address: &Addr) -> Result { + calculate_offset(*location_counter as i32, *label_address as i32) } -pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: &Addr, instruction: Instruction) -> AssemblyResult { +pub(crate) fn calculate_offset(location_counter: i32, label_address: i32) -> Result { + (label_address - (location_counter + 1)).try_into() +} + +pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: &Addr, instruction: Instruction) -> Result { use AssemblyResult::*; use ObjectWord::*; - match instruction { + let res = match instruction { Instruction::Add { dr, sr1, sr2_or_imm5 } => { let word = match sr2_or_imm5 { @@ -323,7 +325,7 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: PcOffset::Label(label) => match symbol_table.get(&label) { Some(addr) => { - let offset = calculate_offset(location_counter, addr); + let offset = calculate_addr_offset(location_counter, addr)?; SingleObjectWord(Value(lc3_isa::Instruction::new_br(n, z, p, offset).into())) } None => SingleObjectWord(UnlinkedInstruction(Instruction::Br { cond_codes: ConditionCodes { n, z, p }, pc_offset9: PcOffset::Label(label) })), @@ -337,7 +339,7 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: PcOffset::Label(label) => match symbol_table.get(&label) { Some(addr) => { - let offset = calculate_offset(location_counter, addr); + let offset = calculate_addr_offset(location_counter, addr)?; SingleObjectWord(Value(lc3_isa::Instruction::new_jsr(offset).into())) } None => SingleObjectWord(UnlinkedInstruction(Instruction::Jsr { pc_offset11: PcOffset::Label(label) })), @@ -351,7 +353,7 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: PcOffset::Label(label) => match symbol_table.get(&label) { Some(addr) => { - let offset = calculate_offset(location_counter, addr); + let offset = calculate_addr_offset(location_counter, addr)?; SingleObjectWord(Value(lc3_isa::Instruction::new_ld(dr, offset).into())) } None => SingleObjectWord(UnlinkedInstruction(Instruction::Ld { dr, pc_offset9: PcOffset::Label(label)})), @@ -364,7 +366,7 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: PcOffset::Label(label) => match symbol_table.get(&label) { Some(addr) => { - let offset = calculate_offset(location_counter, addr); + let offset = calculate_addr_offset(location_counter, addr)?; SingleObjectWord(Value(lc3_isa::Instruction::new_ldi(dr, offset).into())) } None => SingleObjectWord(UnlinkedInstruction(Instruction::Ldi { dr, pc_offset9: PcOffset::Label(label)})), @@ -378,7 +380,7 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: PcOffset::Label(label) => match symbol_table.get(&label) { Some(addr) => { - let offset = calculate_offset(location_counter, addr); + let offset = calculate_addr_offset(location_counter, addr)?; SingleObjectWord(Value(lc3_isa::Instruction::new_lea(dr, offset).into())) } None => SingleObjectWord(UnlinkedInstruction(Instruction::Lea { dr, pc_offset9: PcOffset::Label(label)})), @@ -394,7 +396,7 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: PcOffset::Label(label) => match symbol_table.get(&label) { Some(addr) => { - let offset = calculate_offset(location_counter, addr); + let offset = calculate_addr_offset(location_counter, addr)?; SingleObjectWord(Value(lc3_isa::Instruction::new_st(sr, offset).into())) } None => SingleObjectWord(UnlinkedInstruction(Instruction::St { sr, pc_offset9: PcOffset::Label(label)})), @@ -407,7 +409,7 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: PcOffset::Label(label) => match symbol_table.get(&label) { Some(addr) => { - let offset = calculate_offset(location_counter, addr); + let offset = calculate_addr_offset(location_counter, addr)?; SingleObjectWord(Value(lc3_isa::Instruction::new_sti(sr, offset).into())) } None => SingleObjectWord(UnlinkedInstruction(Instruction::Sti { sr, pc_offset9: PcOffset::Label(label)})), @@ -439,7 +441,8 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: chars.push(Value(0x00)); // null-terminator MultipleObjectWords(chars) } - } + }; + Ok(res) } fn first_pass(origin: Addr, instructions: Vec>) -> Result<(Vec, SymbolTable), ()> { @@ -463,20 +466,20 @@ fn first_pass(origin: Addr, instructions: Vec>) Ok((words, symbol_table)) } -fn second_pass(symbol_table: SymbolTable, origin: Addr, instructions: Vec) -> Object { +fn second_pass(symbol_table: SymbolTable, origin: Addr, instructions: Vec) -> Result { let mut location_counter = origin; let mut words = Vec::new(); for instruction in instructions.into_iter() { let addresses_used = instruction.addresses_occupied(); - match assemble_instruction(&symbol_table, &location_counter, instruction) { + match assemble_instruction(&symbol_table, &location_counter, instruction)? { AssemblyResult::SingleObjectWord(wd) => { words.push(wd); } AssemblyResult::MultipleObjectWords(wds) => { words.extend(wds); } } location_counter += addresses_used; } - Object { origin, symbol_table, words } + Ok(Object { origin, symbol_table, words }) } pub(crate) fn get_orig(orig_operands: WithErrData>>) -> Result { @@ -488,5 +491,5 @@ pub fn assemble(program: Program) -> Result { let Program { orig, instructions: parser_instructions, .. } = program; let origin = get_orig(orig)?; let (instructions, symbol_table) = first_pass(origin, parser_instructions)?; - Ok(second_pass(symbol_table, origin, instructions)) + second_pass(symbol_table, origin, instructions).map_err(|_| ()) } \ No newline at end of file diff --git a/assembler/src/linker.rs b/assembler/src/linker.rs index fe0a5fd..af23f2d 100644 --- a/assembler/src/linker.rs +++ b/assembler/src/linker.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::num::{ParseIntError, TryFromIntError}; use chumsky::chain::Chain; use chumsky::Parser; use lc3_isa::util::MemoryDump; @@ -19,7 +20,7 @@ fn layer_object(image: &mut [Word; ADDR_SPACE_SIZE_IN_WORDS], object: LinkedObje } } -fn link_object(symbol_table: &SymbolTable, object: Object) -> LinkedObject { +fn link_object(symbol_table: &SymbolTable, object: Object) -> Result { let mut words = Vec::new(); let Object { origin, words: object_words, .. } = object; let mut location_counter = origin; @@ -30,7 +31,7 @@ fn link_object(symbol_table: &SymbolTable, object: Object) -> LinkedObject { location_counter += 1; }, ObjectWord::UnlinkedInstruction(instruction) => - match assemble_instruction(&symbol_table, &location_counter, instruction) { + match assemble_instruction(&symbol_table, &location_counter, instruction)? { AssemblyResult::SingleObjectWord(word) => match word { ObjectWord::Value(word) => { words.push(word); @@ -51,10 +52,10 @@ fn link_object(symbol_table: &SymbolTable, object: Object) -> LinkedObject { } } } - LinkedObject { origin, words } + Ok(LinkedObject { origin, words }) } -pub fn link(objects: impl IntoIterator, background: Option) -> MemoryDump { +pub fn link(objects: impl IntoIterator, background: Option) -> Result { let objects = objects.into_iter().collect::>(); let mut symbol_table = HashMap::new(); @@ -70,9 +71,9 @@ pub fn link(objects: impl IntoIterator, background: Option [0; ADDR_SPACE_SIZE_IN_WORDS] }; for object in objects { - let linked_object = link_object(&symbol_table, object); + let linked_object = link_object(&symbol_table, object)?; layer_object(&mut image, linked_object); } - image.into() + Ok(image.into()) } \ No newline at end of file diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index f8bc162..244d8d2 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -10,8 +10,8 @@ use crate::lexer::{LiteralValue, Opcode, Token}; pub(crate) type WithErrData = Spanned>; -pub(crate) fn get_first(v: &Vec>) -> Option<&T> { - v.get(0) +pub(crate) fn get(v: &Vec>, i: usize) -> Option<&T> { + v.get(i) .and_then(|res| get_result(res).as_ref().ok()) } diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index aeb49c2..cca642f 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -279,7 +279,7 @@ fn test(input: &str, orig: usize, expected_mem: &[Word]) { let program = file.programs.remove(0).0.expect("parse error in program"); let object = assembler::assemble(program).expect("assembly failed"); - let mem = linker::link([object], None); + let mem = linker::link([object], None).expect("linking failed"); for i in 0..orig { assert_mem(&mem, i, 0x0000); From 6767d96ac5dd4deb069ac672ee9c13c5088e6d27 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 4 Jun 2022 16:33:01 -0500 Subject: [PATCH 46/82] assembler: add call to exit_instruction in visit algo --- assembler/src/analysis.rs | 2 ++ assembler/src/parser.rs | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index ba98fb9..72c881c 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -708,6 +708,8 @@ fn visit_instruction(v: &mut impl MutVisitor, instruction: &WithErrData impl Parser Date: Sat, 4 Jun 2022 17:47:07 -0500 Subject: [PATCH 47/82] assembler: add offset to distant label error message, correct math --- assembler/src/analysis.rs | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 72c881c..493c7df 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -1,4 +1,4 @@ -use std::cmp::min; +use std::cmp::{max, min}; use std::collections::{HashMap, HashSet}; use std::convert::{TryFrom, TryInto}; use std::fmt::{Debug, Display, format, Formatter}; @@ -24,7 +24,7 @@ pub enum Error { OperandTypeMismatch { expected: OperandType, actual: OperandType }, DuplicateLabel { label: String, occurrences: Vec, }, InvalidLabelReference { label: String, reason: InvalidReferenceReason }, - LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr }, + LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, } pub enum InvalidReferenceReason { @@ -56,16 +56,24 @@ impl Error { }; format!("reference to label {} invalid: {}", label, reason_str) } - LabelTooDistant { label, width, est_ref_pos, est_label_pos } => { - format!("label {} at {:#0label_pos_width$X} referenced at {:#0ref_pos_width$X}; too distant, cannot represent in available bits: {}", - label, est_label_pos, est_ref_pos, width, - label_pos_width = min(4, min_signed_width(*est_ref_pos) as usize), - ref_pos_width = min(4, min_signed_width(*est_label_pos) as usize),) + LabelTooDistant { label, width, est_ref_pos, est_label_pos, offset } => { + format!("label {} at {:#0label_pos_width$X} referenced at {:#0ref_pos_width$X}; too distant, cannot represent offset of {} in available bits: {}", + label, est_label_pos, est_ref_pos, offset, width, + // TODO: Rust '#X' formatter automatically fixes width to multiple of 4... find or implement workaround to control sign-extension; for example, for 9-bit signed offsets, we would want to display 0x2FF, not 0xFEFF. Showing as decimal for now. + label_pos_width = max(4, min_signed_hex_digits_required(*est_ref_pos) as usize), + ref_pos_width = max(4, min_signed_hex_digits_required(*est_label_pos) as usize),) } } } } +fn min_signed_hex_digits_required(n: i32) -> u8 { + let bin_digits = min_signed_width(n); + let extra = if bin_digits % 4 == 0 { 0 } else { 1 }; + bin_digits / 4 + extra +} + + pub fn report(spanned_error: Spanned) -> Report { let (error, span) = spanned_error; let mut r = @@ -491,6 +499,7 @@ impl<'a> LabelOffsetBoundsAnalysis<'a> { label: label.clone(), width, est_ref_pos: self.location_counter, + offset, est_label_pos: label_addr, }, span.clone())) } From 1c3375b34e0e60f30660397221f8ccf8c9b407e7 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sun, 5 Jun 2022 23:47:14 -0500 Subject: [PATCH 48/82] assembler: lift location counting from analysis passes to visit algo --- assembler/src/analysis.rs | 297 +++++++++++++++++++------------------- 1 file changed, 145 insertions(+), 152 deletions(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 493c7df..c7dd43a 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -275,19 +275,19 @@ impl MutVisitor for ParseErrorsAnalysis { fn enter_orig_error(&mut self, span: &Span) { self.push_error(BadOperands, span); } - fn enter_instruction_error(&mut self, span: &Span) { + fn enter_instruction_error(&mut self, span: &Span, _location: &LocationCounter) { self.push_error(BadInstruction, span); } - fn enter_label_error(&mut self, span: &Span) { + fn enter_label_error(&mut self, span: &Span, _location: &LocationCounter) { self.push_error(BadLabel, span); } - fn enter_opcode_error(&mut self, span: &Span) { + fn enter_opcode_error(&mut self, span: &Span, _location: &LocationCounter) { self.push_error(BadOpcode, span); } - fn enter_operands_error(&mut self, span: &Span) { + fn enter_operands_error(&mut self, span: &Span, _location: &LocationCounter) { self.push_error(BadOperands, span); } - fn enter_operand_error(&mut self, span: &Span) { + fn enter_operand_error(&mut self, span: &Span, _location: &LocationCounter) { self.push_error(BadOperand, span); } } @@ -319,7 +319,7 @@ impl MutVisitor for DuplicateLabelsAnalysis { .for_each(|e| errors.push(e)); } - fn enter_label(&mut self, label: &String, span: &Span) { + fn enter_label(&mut self, label: &String, span: &Span, _location: &LocationCounter) { let occurrences = self.labels.entry(label.clone()).or_insert(Vec::new()); occurrences.push(span.clone()); } @@ -338,19 +338,6 @@ enum InvalidSymbolError { type SymbolTableValue = Result; -#[derive(Debug)] -enum SymbolTableState { - Valid, - InvalidOrig, - InvalidInstruction, -} - -impl Default for SymbolTableState { - fn default() -> Self { - SymbolTableState::Valid - } -} - enum AddressesOccupiedError { BadOpcode, BadOperand @@ -390,8 +377,6 @@ type SymbolTable = HashMap; #[derive(Debug, Default)] struct SymbolTableAnalysis { - location_counter: RoughAddr, - state: SymbolTableState, symbol_table: SymbolTable, } @@ -399,60 +384,27 @@ impl SymbolTableAnalysis { fn new() -> Self { Default::default() } - - fn invalidate_state(&mut self, state: SymbolTableState) { - if let SymbolTableState::Valid = self.state { - self.state = state; - } - } } const ORIG_ERROR_STARTING_ADDRESS_ESTIMATE: RoughAddr = 0x3000; const INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE: RoughAddr = 1; impl MutVisitor for SymbolTableAnalysis { - fn enter_orig_error(&mut self, _span: &Span) { - self.location_counter = ORIG_ERROR_STARTING_ADDRESS_ESTIMATE; - self.invalidate_state(SymbolTableState::InvalidOrig); - } - - fn enter_orig(&mut self, orig: &Vec>, _span: &Span) { - self.location_counter = get(orig, 0) - .and_then(|op| Word::try_from(op.clone()).map(|w| w as RoughAddr).ok()) - .unwrap_or_else(| | { - self.invalidate_state(SymbolTableState::InvalidOrig); - ORIG_ERROR_STARTING_ADDRESS_ESTIMATE - }); - } - - fn enter_instruction_error(&mut self, _span: &Span) { - self.location_counter += INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE; - self.invalidate_state(SymbolTableState::InvalidInstruction); - } - - fn exit_instruction(&mut self, instruction: &Instruction, _span: &Span) { - self.location_counter += instruction.addresses_occupied() - .unwrap_or_else(|_| { - self.state = SymbolTableState::InvalidInstruction; - INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE as Addr - }) as RoughAddr; - } - - fn enter_label(&mut self, label: &String, _span: &Span) { + fn enter_label(&mut self, label: &String, _span: &Span, location: &LocationCounter) { self.symbol_table.entry(label.clone()) .and_modify(|e| *e = Err(InvalidSymbolError::Duplicated)) .or_insert( - match self.state { - SymbolTableState::Valid => - self.location_counter.try_into() + match location.state { + LocationCounterState::Valid => + location.value.try_into() .map_err(|_| InvalidSymbolError::OutOfBounds), - SymbolTableState::InvalidOrig => + LocationCounterState::InvalidOrig => Err(InvalidSymbolError::InvalidOrig { - estimated_addr: self.location_counter + estimated_addr: location.value }), - SymbolTableState::InvalidInstruction => + LocationCounterState::InvalidInstruction => Err(InvalidSymbolError::PriorInvalidInstruction { - estimated_addr: self.location_counter + estimated_addr: location.value }), } ); @@ -468,7 +420,6 @@ struct ExpectedLabel { struct LabelOffsetBoundsAnalysis<'a> { errors: ErrorList, symbol_table: &'a SymbolTable, - location_counter: RoughAddr, expected_label: Option } @@ -477,13 +428,12 @@ impl<'a> LabelOffsetBoundsAnalysis<'a> { Self { errors: Default::default(), symbol_table, - location_counter: Default::default(), expected_label: Default::default(), } } - fn check_offset(&mut self, label: &String, span: &Span, width: u8, label_addr: RoughAddr) { - match calculate_offset(self.location_counter, label_addr) { + fn check_offset(&mut self, label: &String, span: &Span, width: u8, label_addr: RoughAddr, ref_addr: RoughAddr) { + match calculate_offset(ref_addr, label_addr) { Err(_) => { // TODO: make more precise. This case shouldn't be possible unless one of the estimated addresses is far out of bounds. self.errors.push( @@ -498,7 +448,7 @@ impl<'a> LabelOffsetBoundsAnalysis<'a> { (LabelTooDistant { label: label.clone(), width, - est_ref_pos: self.location_counter, + est_ref_pos: ref_addr, offset, est_label_pos: label_addr, }, span.clone())) @@ -509,31 +459,11 @@ impl<'a> LabelOffsetBoundsAnalysis<'a> { } impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { - fn enter_orig_error(&mut self, _span: &Span) { - self.location_counter = ORIG_ERROR_STARTING_ADDRESS_ESTIMATE; - } - - fn enter_orig(&mut self, orig: &Vec>, _span: &Span) { - self.location_counter = get(orig, 0) - .and_then(|op| Word::try_from(op.clone()).map(|w| w as RoughAddr).ok()) - .unwrap_or(ORIG_ERROR_STARTING_ADDRESS_ESTIMATE); - } - - fn enter_instruction_error(&mut self, _span: &Span) { - self.location_counter += INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE; - } - - fn exit_instruction(&mut self, instruction: &Instruction, _span: &Span) { - self.location_counter += instruction.addresses_occupied() - .unwrap_or(INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE as Addr) - as RoughAddr; - } - - fn enter_opcode_error(&mut self, _span: &Span) { + fn enter_opcode_error(&mut self, _span: &Span, _location: &LocationCounter) { self.expected_label = None; } - fn enter_opcode(&mut self, opcode: &Opcode, _span: &Span) { + fn enter_opcode(&mut self, opcode: &Opcode, _span: &Span, _location: &LocationCounter) { use Opcode::*; self.expected_label = match opcode { @@ -546,7 +476,7 @@ impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { } } - fn enter_operands(&mut self, operands: &Vec>, _span: &Span) { + fn enter_operands(&mut self, operands: &Vec>, _span: &Span, location: &LocationCounter) { if let Some(ExpectedLabel { width, position }) = &self.expected_label { if let Some((Ok(Operand::Label(label)), op_span)) = operands.get(*position) { match self.symbol_table.get(label) { @@ -559,12 +489,12 @@ impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { } Some(stv) => match stv { Ok(addr) => { - self.check_offset(label, op_span, *width, *addr as RoughAddr); + self.check_offset(label, op_span, *width, *addr as RoughAddr, location.value); } Err(ste) => match ste { InvalidSymbolError::InvalidOrig { estimated_addr } | InvalidSymbolError::PriorInvalidInstruction { estimated_addr } => { - self.check_offset(label, op_span, *width, *estimated_addr); + self.check_offset(label, op_span, *width, *estimated_addr, location.value); } InvalidSymbolError::Duplicated => { self.errors.push( @@ -600,6 +530,30 @@ impl OperandTypesAnalysis { fn new() -> Self { Default::default() } + + fn check_operands(&mut self, operands: &Vec>, span: &Span) { + if let Some(expected) = &self.expected_operands { + // TODO: create longest common subsequence diff for more precise errors + let ops_len = operands.len(); + let exp_len = expected.len(); + if ops_len != exp_len { + self.errors.push((WrongNumberOfOperands { expected: exp_len, actual: ops_len }, span.clone())) + } else { + for ((op_res, op_span), exp_ty) in zip(operands, expected) { + if let Ok(op) = op_res { + if !exp_ty.check(op) { + let actual = if let Operand::NumberLiteral(value) = op { + OperandType::of_number_literal(value, Some(exp_ty.accepted_number_signs())) + } else { + OperandType::of(op) + }; + self.errors.push((OperandTypeMismatch { expected: exp_ty.clone(), actual }, op_span.clone())); + } + } + } + } + } + } } fn orig_expected_operands() -> Vec { @@ -609,14 +563,14 @@ fn orig_expected_operands() -> Vec { impl MutVisitor for OperandTypesAnalysis { fn enter_orig(&mut self, orig: &Vec>, span: &Span) { self.expected_operands = Some(orig_expected_operands()); - self.enter_operands(orig, span); + self.check_operands(orig, span); } - fn enter_opcode_error(&mut self, _span: &Span) { + fn enter_opcode_error(&mut self, _span: &Span, _location: &LocationCounter) { self.expected_operands = None; } - fn enter_opcode(&mut self, opcode: &Opcode, _span: &Span) { + fn enter_opcode(&mut self, opcode: &Opcode, _span: &Span, _location: &LocationCounter) { use Opcode::*; self.expected_operands = Some( match opcode { @@ -642,27 +596,43 @@ impl MutVisitor for OperandTypesAnalysis { ); } - fn enter_operands(&mut self, operands: &Vec>, span: &Span) { - if let Some(expected) = &self.expected_operands { - // TODO: create longest common subsequence diff for more precise errors - let ops_len = operands.len(); - let exp_len = expected.len(); - if ops_len != exp_len { - self.errors.push((WrongNumberOfOperands { expected: exp_len, actual: ops_len }, span.clone())) - } else { - for ((op_res, op_span), exp_ty) in zip(operands, expected) { - if let Ok(op) = op_res { - if !exp_ty.check(op) { - let actual = if let Operand::NumberLiteral(value) = op { - OperandType::of_number_literal(value, Some(exp_ty.accepted_number_signs())) - } else { - OperandType::of(op) - }; - self.errors.push((OperandTypeMismatch { expected: exp_ty.clone(), actual }, op_span.clone())); - } - } - } - } + fn enter_operands(&mut self, operands: &Vec>, span: &Span, _location: &LocationCounter) { + self.check_operands(operands, span); + } +} + +struct LocationCounter { + value: RoughAddr, + state: LocationCounterState, +} + +impl LocationCounter { + fn new() -> Self { + Self { + value: Default::default(), + state: LocationCounterState::Valid, + } + } + +} + +#[derive(Debug)] +enum LocationCounterState { + Valid, + InvalidOrig, + InvalidInstruction, +} + +impl Default for LocationCounterState { + fn default() -> Self { + LocationCounterState::Valid + } +} + +impl LocationCounterState { + fn if_valid_set(&mut self, state: LocationCounterState) { + if let LocationCounterState::Valid = self { + *self = state; } } } @@ -682,81 +652,104 @@ fn visit_program(v: &mut impl MutVisitor, program: &WithErrData) { Ok(p) => { v.enter_program( p, span); + let mut location_counter = LocationCounter::new(); + let Program { orig, instructions } = p; - visit_orig(v, orig); + visit_orig(v, orig, &mut location_counter); for instruction in instructions { - visit_instruction(v, instruction); + visit_instruction(v, instruction, &mut location_counter); } } } } -fn visit_orig(v: &mut impl MutVisitor, orig: &WithErrData>>) { +fn visit_orig(v: &mut impl MutVisitor, orig: &WithErrData>>, location_counter: &mut LocationCounter) { let (orig_res, span) = orig; match orig_res { - Err(_) => { v.enter_orig_error(span); } + Err(_) => { + location_counter.value = ORIG_ERROR_STARTING_ADDRESS_ESTIMATE; + location_counter.state.if_valid_set(LocationCounterState::InvalidOrig); + v.enter_orig_error(span); + } Ok(o) => { + location_counter.value = get(o, 0) + .and_then(|op| Word::try_from(op.clone()).map(|w| w as RoughAddr).ok()) + .unwrap_or_else(| | { + location_counter.state.if_valid_set(LocationCounterState::InvalidOrig); + ORIG_ERROR_STARTING_ADDRESS_ESTIMATE + }); + v.enter_orig( o, span); for operand in o { - visit_operand(v, operand); + visit_operand(v, operand, location_counter); } } } } -fn visit_instruction(v: &mut impl MutVisitor, instruction: &WithErrData) { +fn visit_instruction(v: &mut impl MutVisitor, instruction: &WithErrData, location_counter: &mut LocationCounter) { let (inst_res, span) = instruction; match inst_res { - Err(_) => { v.enter_instruction_error(span); } + Err(_) => { + v.enter_instruction_error(span, location_counter); + location_counter.value += INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE; + location_counter.state.if_valid_set(LocationCounterState::InvalidInstruction); + } Ok(i) => { - v.enter_instruction(i, span); + v.enter_instruction(i, span, location_counter); let Instruction { label, opcode, operands } = i; if let Some(l) = label { - visit_label(v, l); + visit_label(v, l, location_counter); } - visit_opcode(v, opcode); - visit_operands(v, operands); + visit_opcode(v, opcode, location_counter); + visit_operands(v, operands, location_counter); + + v.exit_instruction(i, span, location_counter); - v.exit_instruction(i, span); + location_counter.value += i.addresses_occupied() + .unwrap_or_else(|_| { + location_counter.state.if_valid_set(LocationCounterState::InvalidInstruction); + INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE as Addr + }) as RoughAddr; } } } -fn visit_label(v: &mut impl MutVisitor, label: &WithErrData) { +fn visit_label(v: &mut impl MutVisitor, label: &WithErrData, location_counter: &mut LocationCounter) { let (label_res, span) = label; match label_res { - Err(_) => { v.enter_label_error(span); } - Ok(l) => { v.enter_label( l, span); } + Err(_) => { v.enter_label_error(span, location_counter); } + Ok(l) => { v.enter_label( l, span, location_counter); } } } -fn visit_opcode(v: &mut impl MutVisitor, opcode: &WithErrData) { +fn visit_opcode(v: &mut impl MutVisitor, opcode: &WithErrData, location_counter: &mut LocationCounter) { let (opcode_res, span) = opcode; match opcode_res { - Err(_) => { v.enter_opcode_error(span); } - Ok(oc) => { v.enter_opcode( oc, span); } + Err(_) => { v.enter_opcode_error(span, location_counter); } + Ok(oc) => { v.enter_opcode( oc, span, location_counter); } } } -fn visit_operands(v: &mut impl MutVisitor, operands: &WithErrData>>) { +fn visit_operands(v: &mut impl MutVisitor, operands: &WithErrData>>, location_counter: &mut LocationCounter) { let (ops_res, span) = operands; match ops_res { - Err(_) => { v.enter_operands_error(span); } + Err(_) => { v.enter_operands_error(span, location_counter); } Ok(o) => { - v.enter_operands( o, span); + v.enter_operands( o, span, location_counter); for operand in o { - visit_operand(v, operand); + visit_operand(v, operand, location_counter); } } } } -fn visit_operand(v: &mut impl MutVisitor, operand: &WithErrData) { +fn visit_operand(v: &mut impl MutVisitor, operand: &WithErrData, location_counter: &mut LocationCounter) { let (op_res, span) = operand; match op_res { - Err(_) => { v.enter_operand_error(span); } - Ok(o) => { v.enter_operand( o, span); } + Err(_) => { v.enter_operand_error(span, location_counter); } + Ok(o) => { v.enter_operand( o, span, location_counter); } } } @@ -770,21 +763,21 @@ trait MutVisitor { fn enter_orig_error(&mut self, _span: &Span) {} fn enter_orig(&mut self, _orig: &Vec>, _span: &Span) {} - fn enter_instruction_error(&mut self, _span: &Span) {} - fn enter_instruction(&mut self, _instruction: &Instruction, _span: &Span) {} - fn exit_instruction(&mut self, _instruction: &Instruction, _span: &Span) {} + fn enter_instruction_error(&mut self, _span: &Span, _location: &LocationCounter) {} + fn enter_instruction(&mut self, _instruction: &Instruction, _span: &Span, _location: &LocationCounter) {} + fn exit_instruction(&mut self, _instruction: &Instruction, _span: &Span, _location: &LocationCounter) {} - fn enter_label_error(&mut self, _span: &Span) {} - fn enter_label(&mut self, _label: &String, _span: &Span) {} + fn enter_label_error(&mut self, _span: &Span, _location: &LocationCounter) {} + fn enter_label(&mut self, _label: &String, _span: &Span, _location: &LocationCounter) {} - fn enter_opcode_error(&mut self, _span: &Span) {} - fn enter_opcode(&mut self, _opcode: &Opcode, _span: &Span) {} + fn enter_opcode_error(&mut self, _span: &Span, _location: &LocationCounter) {} + fn enter_opcode(&mut self, _opcode: &Opcode, _span: &Span, _location: &LocationCounter) {} - fn enter_operands_error(&mut self, _span: &Span) {} - fn enter_operands(&mut self, _operands: &Vec>, _span: &Span) {} + fn enter_operands_error(&mut self, _span: &Span, _location: &LocationCounter) {} + fn enter_operands(&mut self, _operands: &Vec>, _span: &Span, _location: &LocationCounter) {} - fn enter_operand_error(&mut self, _span: &Span) {} - fn enter_operand(&mut self, _operand: &Operand, _span: &Span) {} + fn enter_operand_error(&mut self, _span: &Span, _location: &LocationCounter) {} + fn enter_operand(&mut self, _operand: &Operand, _span: &Span, _location: &LocationCounter) {} } pub fn validate(file: &File) -> ErrorList { From daf1ab3b62a577abd12ef89ca5493202d364898c Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 6 Jun 2022 13:04:52 -0500 Subject: [PATCH 49/82] assembler: insert pointer to ORIG when linking OS --- assembler/bin/as.rs | 4 +--- assembler/src/linker.rs | 14 ++++++++++---- assembler/tests/integ.rs | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 1bddea1..3c4aef8 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -84,13 +84,11 @@ fn as_() { if args.check { println!("{}: No errors found.", path.to_str().unwrap()); } else { - let background = if args.no_os { None } else { Some(lc3_os::OS_IMAGE.clone()) }; - let objects = file.programs.into_iter() .map(|program| assemble(program.0.expect("Found invalid object.")).expect("Failed to assemble object.")); - let mem = link(objects, background).expect("linking failed"); + let mem = link(objects, !args.no_os).expect("linking failed"); let mut output_path = path.clone(); output_path.set_extension(MEM_DUMP_FILE_EXTENSION); diff --git a/assembler/src/linker.rs b/assembler/src/linker.rs index af23f2d..99f2fd4 100644 --- a/assembler/src/linker.rs +++ b/assembler/src/linker.rs @@ -55,7 +55,7 @@ fn link_object(symbol_table: &SymbolTable, object: Object) -> Result, background: Option) -> Result { +pub fn link(objects: impl IntoIterator, overlay_on_os: bool) -> Result { let objects = objects.into_iter().collect::>(); let mut symbol_table = HashMap::new(); @@ -66,9 +66,15 @@ pub fn link(objects: impl IntoIterator, background: Option mem.0, - None => [0; ADDR_SPACE_SIZE_IN_WORDS] + if overlay_on_os { + let mut os = lc3_os::OS_IMAGE.clone().0; + os[lc3_isa::USER_PROGRAM_START_ADDR as usize] = + objects.get(0) + .expect("Found no objects in file; could not find origin.") + .origin; // TODO: fail gracefully + os + } else { + [0; ADDR_SPACE_SIZE_IN_WORDS] }; for object in objects { let linked_object = link_object(&symbol_table, object)?; diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index cca642f..51e673d 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -279,7 +279,7 @@ fn test(input: &str, orig: usize, expected_mem: &[Word]) { let program = file.programs.remove(0).0.expect("parse error in program"); let object = assembler::assemble(program).expect("assembly failed"); - let mem = linker::link([object], None).expect("linking failed"); + let mem = linker::link([object], false).expect("linking failed"); for i in 0..orig { assert_mem(&mem, i, 0x0000); From 443f3bf8382ff1a38543b90232ae903bbdb301e3 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 6 Jun 2022 13:22:50 -0500 Subject: [PATCH 50/82] assembler: re-enable bugged hex imm tests --- assembler/tests/integ.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 51e673d..cc90eb0 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -196,7 +196,7 @@ mod single_instruction { neg_imm: ($operator.to_string() + " R2 #-1").as_str() => (($opcode << 12) + 0x5FF), max_imm: ($operator.to_string() + " R3 #255").as_str() => (($opcode << 12) + 0x6FF), min_imm: ($operator.to_string() + " R4 #-256").as_str() => (($opcode << 12) + 0x900), - // hex_imm: ($operator.to_string() + " R5 xA").as_str() => (($opcode << 12) + 0xA0A), TODO: We currently assume an argument not starting in # is a label. Allow hex literals? + hex_imm: ($operator.to_string() + " R5 xA").as_str() => (($opcode << 12) + 0xA0A), r5: ($operator.to_string() + " R5 #0").as_str() => (($opcode << 12) + 0xA00), r6: ($operator.to_string() + " R6 #0").as_str() => (($opcode << 12) + 0xC00), r7: ($operator.to_string() + " R7 #0").as_str() => (($opcode << 12) + 0xE00), @@ -219,7 +219,7 @@ mod single_instruction { neg_imm: "JSR #-1" => 0x4FFF, max_imm: "JSR #1023" => 0x4BFF, min_imm: "JSR #-1024" => 0x4C00, - // hex_imm: "JSR xA" => 0x480A, // TODO: We currently assume an argument not starting in # is a label. Allow hex literals? + hex_imm: "JSR xA" => 0x480A, } mod pseudo_ops { From 0a478f70b51f53f6ee92a71d31310ebc50894da6 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 6 Jun 2022 13:30:34 -0500 Subject: [PATCH 51/82] assembler: account for null-terminator in addresses occupied by string --- assembler/src/analysis.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index c7dd43a..6fd6109 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -362,7 +362,7 @@ impl Instruction { self.get_first_operand() .and_then(|op| op.clone().get_string()) .ok_or(AddressesOccupiedError::BadOperand) - .map(|s| s.len() as Addr), + .map(|s| (s.len() + 1) as Addr), Opcode::Blkw => self.get_first_operand() .and_then(|op| op.clone().get_unqualified_number_value()) From c3e9e5da16e56bb061a1c6c2de18a89037979177 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 6 Jun 2022 19:14:32 -0500 Subject: [PATCH 52/82] assembler: add check for object overlap --- assembler/src/analysis.rs | 104 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 100 insertions(+), 4 deletions(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 6fd6109..294f76b 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -2,8 +2,9 @@ use std::cmp::{max, min}; use std::collections::{HashMap, HashSet}; use std::convert::{TryFrom, TryInto}; use std::fmt::{Debug, Display, format, Formatter}; +use std::ops::Range; use std::string::String; -use itertools::{concat, zip}; +use itertools::{concat, Itertools, zip}; use ariadne::{Label, Report, ReportBuilder, ReportKind}; use lc3_isa::{Addr, SignedWord, Word}; use crate::lexer::{LiteralValue, Opcode}; @@ -25,6 +26,7 @@ pub enum Error { DuplicateLabel { label: String, occurrences: Vec, }, InvalidLabelReference { label: String, reason: InvalidReferenceReason }, LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, + ObjectsOverlap { placement1: ObjectPlacement, placement2: ObjectPlacement } } pub enum InvalidReferenceReason { @@ -34,6 +36,16 @@ pub enum InvalidReferenceReason { } impl Error { + fn objects_overlap(p1: ObjectPlacement, p2: ObjectPlacement) -> Self { + let (placement1, placement2) = + if p1.span_in_memory.start <= p2.span_in_memory.start { + (p1, p2) + } else { + (p2, p1) + }; + ObjectsOverlap { placement1, placement2 } + } + fn message(&self) -> String { match self { BadProgram => String::from("invalid program"), @@ -63,6 +75,20 @@ impl Error { label_pos_width = max(4, min_signed_hex_digits_required(*est_ref_pos) as usize), ref_pos_width = max(4, min_signed_hex_digits_required(*est_label_pos) as usize),) } + ObjectsOverlap { placement1, placement2 } => { + format!("object {} in file occupying [{:#0o1s_width$X}, {:#0o1e_width$X}) overlaps object {} occupying [{:#0o2s_width$X}, {:#0o2e_width$X})", + placement1.position_in_file, + placement1.span_in_memory.start, + placement1.span_in_memory.end, + placement2.position_in_file, + placement2.span_in_memory.start, + placement2.span_in_memory.end, + o1s_width = max(4, min_signed_hex_digits_required(placement1.span_in_memory.start) as usize), + o1e_width = max(4, min_signed_hex_digits_required(placement1.span_in_memory.end) as usize), + o2s_width = max(4, min_signed_hex_digits_required(placement2.span_in_memory.start) as usize), + o2e_width = max(4, min_signed_hex_digits_required(placement2.span_in_memory.end) as usize), + ) + } } } } @@ -92,6 +118,12 @@ pub fn report(spanned_error: Spanned) -> Report { r = r.with_label(Label::new(occurrence).with_message(label_message)) } } + ObjectsOverlap { placement1, placement2 } => { + r = r.with_label(Label::new(placement1.span_in_file) + .with_message("end of this object overlaps the other")) + .with_label(Label::new(placement2.span_in_file) + .with_message("start of this object overlaps the other")); + } _ => { r = r.with_label(Label::new(span).with_message("here")); } @@ -561,7 +593,7 @@ fn orig_expected_operands() -> Vec { } impl MutVisitor for OperandTypesAnalysis { - fn enter_orig(&mut self, orig: &Vec>, span: &Span) { + fn enter_orig(&mut self, orig: &Vec>, span: &Span, _location: &LocationCounter) { self.expected_operands = Some(orig_expected_operands()); self.check_operands(orig, span); } @@ -601,6 +633,60 @@ impl MutVisitor for OperandTypesAnalysis { } } + +struct ObjectPlacementAnalysis { + errors: ErrorList, + last_start: RoughAddr, + object_index: usize, + object_spans: Vec, +} + +#[derive(Clone)] +pub struct ObjectPlacement { + position_in_file: usize, + span_in_file: Span, + span_in_memory: Range, +} + +impl ObjectPlacementAnalysis { + fn new() -> Self { + Self { + errors: Default::default(), + last_start: ORIG_ERROR_STARTING_ADDRESS_ESTIMATE, + object_index: 0, + object_spans: Default::default(), + } + } +} + +impl MutVisitor for ObjectPlacementAnalysis { + fn exit_file(&mut self, _file: &File) { + self.object_spans.sort_unstable_by_key(|span| span.span_in_memory.start); + for (op1, op2) in self.object_spans.iter().tuple_windows() { + if op2.span_in_memory.start < op1.span_in_memory.end { + self.errors.push(( + Error::objects_overlap(op1.clone(), op2.clone()), + 0..0)); // TODO: refactor to avoid dummy span + } + } + } + + fn exit_program(&mut self, _program: &Program, span: &Span, location: &LocationCounter) { + self.object_spans.push( + ObjectPlacement { + position_in_file: self.object_index, + span_in_file: span.clone(), + span_in_memory: self.last_start..location.value + }); + self.object_index += 1; + } + + fn exit_orig(&mut self, _orig: &Vec>, _span: &Span, location: &LocationCounter) { + self.last_start = location.value; + } +} + + struct LocationCounter { value: RoughAddr, state: LocationCounterState, @@ -659,6 +745,8 @@ fn visit_program(v: &mut impl MutVisitor, program: &WithErrData) { for instruction in instructions { visit_instruction(v, instruction, &mut location_counter); } + + v.exit_program(p, span, &mut location_counter); } } } @@ -679,10 +767,12 @@ fn visit_orig(v: &mut impl MutVisitor, orig: &WithErrData>, _span: &Span) {} + fn enter_orig(&mut self, _orig: &Vec>, _span: &Span, _location: &LocationCounter) {} + fn exit_orig(&mut self, _orig: &Vec>, _span: &Span, _location: &LocationCounter) {} fn enter_instruction_error(&mut self, _span: &Span, _location: &LocationCounter) {} fn enter_instruction(&mut self, _instruction: &Instruction, _span: &Span, _location: &LocationCounter) {} @@ -796,11 +888,15 @@ pub fn validate(file: &File) -> ErrorList { let mut lob = LabelOffsetBoundsAnalysis::new(&st.symbol_table); visit(&mut lob, file); + let mut op = ObjectPlacementAnalysis::new(); + visit(&mut op, file); + concat([ pe.errors, dl.errors, ot.errors, lob.errors, + op.errors, ]) } From 9670d1f4d1274b2ee3d45f8f1386bcf4583431bf Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 6 Jun 2022 20:39:24 -0500 Subject: [PATCH 53/82] assembler: improve object overlap error label presentation --- assembler/src/analysis.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index 294f76b..d401400 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -119,10 +119,16 @@ pub fn report(spanned_error: Spanned) -> Report { } } ObjectsOverlap { placement1, placement2 } => { - r = r.with_label(Label::new(placement1.span_in_file) - .with_message("end of this object overlaps the other")) - .with_label(Label::new(placement2.span_in_file) - .with_message("start of this object overlaps the other")); + let (first, first_pos_text, second, second_pos_text) = + if placement1.position_in_file < placement2.position_in_file { + (placement1, "end", placement2, "start") + } else { + (placement2, "start", placement1, "end") + }; + r = r.with_label(Label::new(first.span_in_file) + .with_message(format!("{} of this object overlaps the other", first_pos_text))) + .with_label(Label::new(second.span_in_file) + .with_message(format!("{} of this object overlaps the other", second_pos_text))); } _ => { r = r.with_label(Label::new(span).with_message("here")); From 74d8e675b4eda6395b6ace4c9af285f06f642ffd Mon Sep 17 00:00:00 2001 From: David Gipson Date: Mon, 6 Jun 2022 22:48:36 -0500 Subject: [PATCH 54/82] assembler: correct address used to set OS user program pointer --- assembler/src/linker.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assembler/src/linker.rs b/assembler/src/linker.rs index 99f2fd4..5c248ca 100644 --- a/assembler/src/linker.rs +++ b/assembler/src/linker.rs @@ -68,7 +68,7 @@ pub fn link(objects: impl IntoIterator, overlay_on_os: bool) -> Res let mut image = if overlay_on_os { let mut os = lc3_os::OS_IMAGE.clone().0; - os[lc3_isa::USER_PROGRAM_START_ADDR as usize] = + os[lc3_os::USER_PROG_START_ADDR as usize] = objects.get(0) .expect("Found no objects in file; could not find origin.") .origin; // TODO: fail gracefully From 8d8f7b7e117fca7b6b2a504366dafc6525d57dfc Mon Sep 17 00:00:00 2001 From: David Gipson Date: Thu, 9 Jun 2022 17:02:56 -0500 Subject: [PATCH 55/82] assembler: cover lex/parse errors more gracefully, esp. missing .ORIG/.END --- assembler/bin/as.rs | 23 ++++++++++++---- assembler/src/analysis.rs | 51 ++++++++++++++++++++++++++++++------ assembler/src/lexer.rs | 55 ++++++++++++++++++++++++++++----------- assembler/src/lib.rs | 2 +- assembler/src/parser.rs | 26 +++++++++++------- assembler/tests/integ.rs | 6 ++--- 6 files changed, 122 insertions(+), 41 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 3c4aef8..eede0e2 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -62,16 +62,28 @@ fn as_() { let leniency = if args.strict { LeniencyLevel::Strict } else { LeniencyLevel::Lenient }; - let string = fs::read_to_string(path.clone()).unwrap(); + let string = fs::read_to_string(path.clone()).expect(&format!("Could not read file at: {:?}", path)); let src = string.as_str(); - let (maybe_tokens, lex_errs) = lex(src, leniency); - let tokens = maybe_tokens.expect("lexing failed"); + let (maybe_tokens, lex_data, lex_errs) = lex(src, leniency); + if let None = maybe_tokens { + for lex_err in lex_errs { + println!("Lex error: {}", lex_err); + } + continue; + } + let tokens = maybe_tokens.expect("Lexing failed, but produced no errors."); let (maybe_file, parse_errs) = parse(src, tokens, leniency); - let (mut file, span) = maybe_file.expect("parsing failed"); + if let None = maybe_file { + for parse_err in parse_errs { + println!("{}", parse_err); + } + continue; + } + let spanned_file = maybe_file.expect("Parsing failed, but produced no errors."); - let errors = validate(&file); + let errors = validate(&lex_data, &spanned_file); if !errors.is_empty() { for error in errors { @@ -84,6 +96,7 @@ fn as_() { if args.check { println!("{}: No errors found.", path.to_str().unwrap()); } else { + let mut file = spanned_file.0; let objects = file.programs.into_iter() .map(|program| assemble(program.0.expect("Found invalid object.")).expect("Failed to assemble object.")); diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index d401400..edb97f1 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -7,7 +7,7 @@ use std::string::String; use itertools::{concat, Itertools, zip}; use ariadne::{Label, Report, ReportBuilder, ReportKind}; use lc3_isa::{Addr, SignedWord, Word}; -use crate::lexer::{LiteralValue, Opcode}; +use crate::lexer::{LexData, LiteralValue, Opcode}; use crate::parser::{File, get, get_result, Instruction, Operand, Program, result, WithErrData}; use crate::{Span, Spanned}; @@ -26,7 +26,10 @@ pub enum Error { DuplicateLabel { label: String, occurrences: Vec, }, InvalidLabelReference { label: String, reason: InvalidReferenceReason }, LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, - ObjectsOverlap { placement1: ObjectPlacement, placement2: ObjectPlacement } + ObjectsOverlap { placement1: ObjectPlacement, placement2: ObjectPlacement }, + NoTokens, + NoOrig, + NoEnd, } pub enum InvalidReferenceReason { @@ -89,6 +92,15 @@ impl Error { o2e_width = max(4, min_signed_hex_digits_required(placement2.span_in_memory.end) as usize), ) } + NoTokens => { + "no LC-3 assembly in file".to_string() + } + NoOrig => { + "no .ORIG pseudo-op in file".to_string() + } + NoEnd => { + "no .END pseudo-op in file".to_string() + } } } } @@ -130,6 +142,7 @@ pub fn report(spanned_error: Spanned) -> Report { .with_label(Label::new(second.span_in_file) .with_message(format!("{} of this object overlaps the other", second_pos_text))); } + NoTokens => {}, _ => { r = r.with_label(Label::new(span).with_message("here")); } @@ -878,7 +891,28 @@ trait MutVisitor { fn enter_operand(&mut self, _operand: &Operand, _span: &Span, _location: &LocationCounter) {} } -pub fn validate(file: &File) -> ErrorList { + +fn analyze_lex_data(lex_data: &LexData, file_span: &Span) -> ErrorList { + let mut errors = Vec::new(); + if lex_data.no_tokens { + errors.push((NoTokens, 0..0)) + } else { + if !lex_data.orig_present { + errors.push((NoOrig, file_span.start..file_span.start)); + } + if !lex_data.end_present { + errors.push((NoEnd, file_span.end..file_span.end)); + } + } + errors +} + + +pub fn validate(lex_data: &LexData, file_spanned: &Spanned) -> ErrorList { + let (file, file_span) = file_spanned; + + let errors_from_lex_data = analyze_lex_data(&lex_data, file_span); + let mut pe = ParseErrorsAnalysis::new(); visit(&mut pe, file); @@ -898,11 +932,12 @@ pub fn validate(file: &File) -> ErrorList { visit(&mut op, file); concat([ - pe.errors, - dl.errors, - ot.errors, - lob.errors, - op.errors, + errors_from_lex_data, + pe.errors, + dl.errors, + ot.errors, + lob.errors, + op.errors, ]) } diff --git a/assembler/src/lexer.rs b/assembler/src/lexer.rs index a365d7a..753aeaf 100644 --- a/assembler/src/lexer.rs +++ b/assembler/src/lexer.rs @@ -1,6 +1,7 @@ use chumsky::prelude::*; use lc3_isa::{Addr, Reg, SignedWord, Word}; use std::convert::{TryFrom, TryInto}; +use std::fmt::{Display, Formatter}; use std::num::TryFromIntError; use chumsky::Stream; @@ -25,6 +26,12 @@ pub enum Token { Invalid, } +impl Display for Token { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum LiteralValue { Word(Word), @@ -106,6 +113,7 @@ pub enum Opcode { Halt, } +#[derive(Debug)] enum CaseSensitivePassResult { CaseInsensitiveSource(String), CaseSensitiveToken(Token), @@ -338,8 +346,8 @@ fn case_sensitive_pass(source: &str) -> (Option>, leniency: LeniencyLevel) -> (Option>>, Vec>) { - let mut toks: Option>> = None; +fn case_insensitive_pass(case_sensitive_pass_results: Vec>, leniency: LeniencyLevel) -> (Vec>, Vec>) { + let mut toks: Vec> = Vec::new(); let mut errors = Vec::new(); for (cspr, span) in case_sensitive_pass_results { @@ -357,13 +365,14 @@ fn case_insensitive_pass(case_sensitive_pass_results: Vec { - toks.get_or_insert(Vec::new()).push((t, span)); + toks.push((t, span)); } } } @@ -371,17 +380,33 @@ fn case_insensitive_pass(case_sensitive_pass_results: Vec (Option>>, Vec>) { +pub struct LexData { + pub(crate) no_tokens: bool, + pub(crate) orig_present: bool, + pub(crate) end_present: bool, +} + +fn contains_token(tokens: &Option>>, token: Token) -> bool { + match tokens { + None => false, + Some(ts) => ts.iter().any(|t| t.0 == token) + } +} + +pub fn lex(source: &str, leniency: LeniencyLevel) -> (Option>>, LexData, Vec>) { let (maybe_csprs, mut errors) = case_sensitive_pass(source); let tokens = - if let Some(csprs) = maybe_csprs { - let (maybe_tokens, cip_errors) = case_insensitive_pass(csprs, leniency); - errors.extend(cip_errors); - maybe_tokens - } else { - None - }; - (tokens, errors) + maybe_csprs + .map(|csprs| { + let (maybe_tokens, cip_errors) = case_insensitive_pass(csprs, leniency); + errors.extend(cip_errors); + maybe_tokens + }); + let no_tokens = if let Some(ts) = &tokens { ts.is_empty() } else { true }; + let orig_present = contains_token(&tokens, Token::Opcode(Opcode::Orig)); + let end_present = contains_token(&tokens, Token::End); + let lex_data = LexData { no_tokens, orig_present, end_present }; + (tokens, lex_data, errors) } @@ -395,7 +420,7 @@ mod tests { #[test] fn lone_error() { let source = "#OOPS"; - let (tokens, _) = lex(source, LeniencyLevel::Lenient); + let (tokens, _, _) = lex(source, LeniencyLevel::Lenient); assert_eq!( Some(vec![ (Invalid, 0..5), @@ -406,7 +431,7 @@ mod tests { #[test] fn error_in_context() { let source = "ADD R0, R0, #OOPS; <- error"; - let (tokens, _) = lex(source, LeniencyLevel::Lenient); + let (tokens, _, _) = lex(source, LeniencyLevel::Lenient); assert_eq!( Some(vec![ (Opcode(Add), 0.. 3), diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 00dbcf0..b77bed0 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -27,7 +27,7 @@ mod tests { #[test] fn simple() { let src = ".ORIG x3000;\nLABEL ADD R0, R0, #7000\n.end"; - let (tokens, lex_errs) = lexer::lex(src, LeniencyLevel::Lenient); + let (tokens, _, lex_errs) = lexer::lex(src, LeniencyLevel::Lenient); println!("{:?}", tokens); println!("{:?}", lex_errs); diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index d9960d1..3aef6a7 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -1,5 +1,7 @@ use std::convert::{TryFrom, TryInto}; +use chumsky::combinator::Repeated; use chumsky::prelude::*; +use chumsky::primitive::NoneOf; use chumsky::recovery::SkipUntil; use chumsky::Stream; use lc3_isa::{Reg, Word}; @@ -173,7 +175,11 @@ fn comments_and_newlines() -> impl Parser> { .ignored() } -fn program(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { +fn everything_until_orig() -> Repeated>> { + none_of(Token::Opcode(Opcode::Orig)).repeated() +} + +fn program(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { let orig = just(Token::Opcode(Opcode::Orig)) .ignore_then(operands(leniency)); @@ -187,8 +193,11 @@ fn program(leniency: LeniencyLevel) -> impl Parser, Erro ) .then_ignore(just::<_, Token, _>(Token::End)) .map_with_span(|(orig, instructions), span| { - (Program { orig, instructions }, span) + (Ok(Program { orig, instructions }), span) }) + // Pseudo-recovery strategy -- take everything until next .ORIG + .or(any().then(everything_until_orig()) + .map_with_span(|_, span| (Err(()), span))) } #[derive(Debug)] @@ -198,12 +207,11 @@ pub struct File { } fn file(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { - none_of(Token::Opcode(Opcode::Orig)).repeated() + everything_until_orig() .map_with_span(|toks, span| (toks, span)) .then( program(leniency) - .map(|(p, span)| (Ok(p), span)) - .separated_by(none_of(Token::Opcode(Opcode::Orig)).repeated()) + .separated_by(everything_until_orig()) .allow_trailing() ) .then_ignore(end()) @@ -229,7 +237,7 @@ mod tests { #[test] fn capture_tokens_before_first_orig_separately() { let source = "%some #random junk .ORIG x3000\nADD R0, R0, R0\n.END"; - let (maybe_tokens, _) = lex(source, LeniencyLevel::Lenient); + let (maybe_tokens, _, _) = lex(source, LeniencyLevel::Lenient); let tokens = maybe_tokens.unwrap(); let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); @@ -240,7 +248,7 @@ mod tests { #[test] fn ignore_after_end() { let source = ".ORIG x3000\nADD R0, R0, R0\n.END then %some #random junk!"; - let (maybe_tokens, _) = lex(source, LeniencyLevel::Lenient); + let (maybe_tokens, _, _) = lex(source, LeniencyLevel::Lenient); let tokens = maybe_tokens.unwrap(); let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); @@ -259,7 +267,7 @@ mod tests { #[test] fn operand_error() { let source = ".ORIG x3000\nADD R0, R0, #OOPS; <- error\n.END"; - let (maybe_tokens, _) = lex(source, LeniencyLevel::Lenient); + let (maybe_tokens, _, _) = lex(source, LeniencyLevel::Lenient); let tokens = maybe_tokens.unwrap(); let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); @@ -275,7 +283,7 @@ mod tests { #[test] fn label_error() { let source = ".ORIG x3000\nA%DDER ADD R0, R0, #1; <- error\n.END"; - let (maybe_tokens, _) = lex(source, LeniencyLevel::Lenient); + let (maybe_tokens, _, _) = lex(source, LeniencyLevel::Lenient); let tokens = maybe_tokens.unwrap(); let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index cc90eb0..a54ea09 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -270,11 +270,11 @@ mod single_instruction { } fn test(input: &str, orig: usize, expected_mem: &[Word]) { - let (maybe_tokens, lex_errs) = lexer::lex(input, LeniencyLevel::Lenient); + let (maybe_tokens, _, _) = lexer::lex(input, LeniencyLevel::Lenient); let tokens = maybe_tokens.expect("lexing failed"); - let (maybe_file, parse_errs) = parser::parse(input, tokens, LeniencyLevel::Lenient); - let (mut file, span) = maybe_file.expect("parsing failed"); + let (maybe_file, _) = parser::parse(input, tokens, LeniencyLevel::Lenient); + let (mut file, _) = maybe_file.expect("parsing failed"); assert_eq!(1, file.programs.len(), "parsed unexpected number of programs: {}", file.programs.len()); let program = file.programs.remove(0).0.expect("parse error in program"); let object = assembler::assemble(program).expect("assembly failed"); From 49ff7b256019118ffe88fbbb5432c0bdd6ec9355 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 14 Jun 2022 16:51:14 -0500 Subject: [PATCH 56/82] assembler: unify errors, provide API for main workflow --- assembler/bin/as.rs | 102 ++++++++++++------------- assembler/src/analysis.rs | 151 ++++++++++++++++++++++--------------- assembler/src/assembler.rs | 86 ++++++++++++++------- assembler/src/lexer.rs | 37 ++++----- assembler/src/lib.rs | 79 +++++++++++++++---- assembler/src/linker.rs | 35 +++++---- assembler/src/parser.rs | 38 +++++----- assembler/tests/integ.rs | 14 +--- 8 files changed, 327 insertions(+), 215 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index eede0e2..621bf5c 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -4,14 +4,11 @@ use std::{env, fs}; use std::path::{Path, PathBuf}; use std::process::exit; use ariadne::Source; -use lc3_assembler::parser::parse; +use lc3_assembler::parser::{File, parse}; use lc3_shims::memory::FileBackedMemoryShim; use clap::{Parser}; -use lc3_assembler::analysis::{report, validate}; -use lc3_assembler::assembler::assemble; -use lc3_assembler::LeniencyLevel; -use lc3_assembler::lexer::lex; -use lc3_assembler::linker::link; +use lc3_isa::util::MemoryDump; +use lc3_assembler::{assemble, assemble_file, LeniencyLevel, parse_and_analyze, parse_and_analyze_file}; const MEM_DUMP_FILE_EXTENSION: &'static str = "mem"; @@ -23,9 +20,9 @@ const MEM_DUMP_FILE_EXTENSION: &'static str = "mem"; of LC-3 machine code." )] struct Args { - /// Input file paths + /// Input file path #[clap(required = true, parse(from_os_str), value_name = "INPUT_FILE")] - input: Vec, + input: PathBuf, /// Enforce all rules of the original LC-3 assembly language /// @@ -54,59 +51,62 @@ fn main() { .join().unwrap(); } -fn as_() { - let args = Args::parse(); +enum Error { + Io(std::io::Error), + MemoryShim(lc3_shims::memory::error::MemoryShimError), + Assembler +} - for path in args.input { - assert!(path.is_file()); +impl From for Error { + fn from(e: std::io::Error) -> Self { + Error::Io(e) + } +} - let leniency = if args.strict { LeniencyLevel::Strict } else { LeniencyLevel::Lenient }; +impl From for Error { + fn from(e: lc3_shims::memory::error::MemoryShimError) -> Self { + Error::MemoryShim(e) + } +} - let string = fs::read_to_string(path.clone()).expect(&format!("Could not read file at: {:?}", path)); - let src = string.as_str(); +fn as_() -> Result<(), Error> { + let args = Args::parse(); - let (maybe_tokens, lex_data, lex_errs) = lex(src, leniency); - if let None = maybe_tokens { - for lex_err in lex_errs { - println!("Lex error: {}", lex_err); - } - continue; - } - let tokens = maybe_tokens.expect("Lexing failed, but produced no errors."); + let leniency = if args.strict { LeniencyLevel::Strict } else { LeniencyLevel::Lenient }; + + let src = fs::read_to_string(args.input.clone())?; - let (maybe_file, parse_errs) = parse(src, tokens, leniency); - if let None = maybe_file { - for parse_err in parse_errs { - println!("{}", parse_err); + if args.check { + match parse_and_analyze(&src, leniency) { + Ok(_) => { + println!("{}: No errors found.", args.input.display()); + Ok(()) } - continue; + Err(error) => print_errors(error, &src) } - let spanned_file = maybe_file.expect("Parsing failed, but produced no errors."); - - let errors = validate(&lex_data, &spanned_file); - - if !errors.is_empty() { - for error in errors { - let report = report(error); - report.eprint(Source::from(src)); + } else { + match assemble(&src, leniency, args.no_os) { + Ok(mem) => { + let mut output_path = args.input.clone(); + output_path.set_extension(MEM_DUMP_FILE_EXTENSION); + let mut file_backed_mem = FileBackedMemoryShim::with_initialized_memory(output_path, mem); + file_backed_mem.flush_all_changes()?; + + Ok(()) } - continue; + Err(error) => print_errors(error, &src) } + } +} - if args.check { - println!("{}: No errors found.", path.to_str().unwrap()); - } else { - let mut file = spanned_file.0; - let objects = - file.programs.into_iter() - .map(|program| assemble(program.0.expect("Found invalid object.")).expect("Failed to assemble object.")); - - let mem = link(objects, !args.no_os).expect("linking failed"); +fn print_errors(error: lc3_assembler::analysis::Error, src: &String) -> Result<(), Error> { + let print_results = + error.report().into_iter() + .map(|report| report.eprint(Source::from(src))) + .collect::>(); - let mut output_path = path.clone(); - output_path.set_extension(MEM_DUMP_FILE_EXTENSION); - let mut file_backed_mem = FileBackedMemoryShim::with_initialized_memory(output_path, mem); - file_backed_mem.flush_all_changes().unwrap(); - } + for print_result in print_results { + print_result? } + Err(Error::Assembler) } diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index edb97f1..a86471f 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -11,10 +11,42 @@ use crate::lexer::{LexData, LiteralValue, Opcode}; use crate::parser::{File, get, get_result, Instruction, Operand, Program, result, WithErrData}; use crate::{Span, Spanned}; -type ErrorList = Vec>; +type ErrorList = Vec; use Error::*; +#[derive(Debug)] pub enum Error { + Single(SingleError), + Spanned(Span, SingleError), + Multiple(Vec), +} + +impl Error { + pub fn report(self) -> Vec { + match self { + Single(error) => vec![report_single(error).finish()], + Spanned(span, error) => vec![ + report_single(error) + .with_label(Label::new(span).with_message("here")) + .finish() + ], + Multiple(errors) => + errors.into_iter() + .flat_map(|e| e.report()) + .collect() + } + } +} + +use SingleError::*; +#[derive(Debug)] +pub enum SingleError { + Io(std::io::Error), + Lex(chumsky::error::Simple), + Parse(chumsky::error::Simple), + Assemble, + Link, + BadProgram, BadInstruction, BadLabel, @@ -32,13 +64,14 @@ pub enum Error { NoEnd, } +#[derive(Debug)] pub enum InvalidReferenceReason { Undefined, Duplicated, OutOfBounds, } -impl Error { +impl SingleError { fn objects_overlap(p1: ObjectPlacement, p2: ObjectPlacement) -> Self { let (placement1, placement2) = if p1.span_in_memory.start <= p2.span_in_memory.start { @@ -92,15 +125,14 @@ impl Error { o2e_width = max(4, min_signed_hex_digits_required(placement2.span_in_memory.end) as usize), ) } - NoTokens => { - "no LC-3 assembly in file".to_string() - } - NoOrig => { - "no .ORIG pseudo-op in file".to_string() - } - NoEnd => { - "no .END pseudo-op in file".to_string() - } + NoTokens => "no LC-3 assembly in file".to_string(), + NoOrig => "no .ORIG pseudo-op in file".to_string(), + NoEnd => "no .END pseudo-op in file".to_string(), + Io(ioe) => ioe.to_string(), + Lex(le) => le.to_string(), + Parse(pe) => pe.to_string(), + Assemble => "unexpected assembly error".to_string(), + Link => "unexpected link error".to_string(), } } } @@ -112,11 +144,9 @@ fn min_signed_hex_digits_required(n: i32) -> u8 { } -pub fn report(spanned_error: Spanned) -> Report { - let (error, span) = spanned_error; - let mut r = - Report::build(ReportKind::Error, (), 0) - .with_message(error.message()); +fn report_single(error: SingleError) -> ReportBuilder { + let mut r = Report::build(ReportKind::Error, (), 0) + .with_message(error.message()); match error { DuplicateLabel { occurrences, .. } => { let mut first_declaration_labeled = false; @@ -138,22 +168,20 @@ pub fn report(spanned_error: Spanned) -> Report { (placement2, "start", placement1, "end") }; r = r.with_label(Label::new(first.span_in_file) - .with_message(format!("{} of this object overlaps the other", first_pos_text))) - .with_label(Label::new(second.span_in_file) + .with_message(format!("{} of this object overlaps the other", first_pos_text))) + .with_label(Label::new(second.span_in_file) .with_message(format!("{} of this object overlaps the other", second_pos_text))); } - NoTokens => {}, - _ => { - r = r.with_label(Label::new(span).with_message("here")); - } + _ => {} } - r.finish() + r } + use OperandType::*; use crate::assembler::{calculate_offset, get_orig}; -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum OperandType { Register, UnqualifiedNumber, @@ -314,8 +342,8 @@ impl ParseErrorsAnalysis { Default::default() } - fn push_error(&mut self, error: Error, span: &Span) { - self.errors.push((error, span.clone())); + fn push_error(&mut self, single_error: SingleError, span: &Span) { + self.errors.push(Spanned(span.clone(), single_error)); } } @@ -362,10 +390,11 @@ impl MutVisitor for DuplicateLabelsAnalysis { labels.iter() .filter(|(_, occurrences)| occurrences.len() > 1) .map(|(label, occurrences)| - (DuplicateLabel { + Single( + DuplicateLabel { label: label.clone(), occurrences: occurrences.clone() - }, 0..0) // TODO: dummy span, refactor so not required for errors with alternate span data + }) ) .for_each(|e| errors.push(e)); } @@ -488,21 +517,23 @@ impl<'a> LabelOffsetBoundsAnalysis<'a> { Err(_) => { // TODO: make more precise. This case shouldn't be possible unless one of the estimated addresses is far out of bounds. self.errors.push( - (InvalidLabelReference { - label: label.clone(), - reason: InvalidReferenceReason::OutOfBounds - }, span.clone())); + Spanned(span.clone(), + InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::OutOfBounds + })); } Ok(offset) => { if min_signed_width(offset as i32) > width { self.errors.push( - (LabelTooDistant { - label: label.clone(), - width, - est_ref_pos: ref_addr, - offset, - est_label_pos: label_addr, - }, span.clone())) + Spanned(span.clone(), + LabelTooDistant { + label: label.clone(), + width, + est_ref_pos: ref_addr, + offset, + est_label_pos: label_addr, + })); } } } @@ -533,10 +564,11 @@ impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { match self.symbol_table.get(label) { None => { self.errors.push( - (InvalidLabelReference { + Spanned(op_span.clone(), + InvalidLabelReference { label: label.clone(), reason: InvalidReferenceReason::Undefined - }, op_span.clone())); + })); } Some(stv) => match stv { Ok(addr) => { @@ -549,17 +581,19 @@ impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { } InvalidSymbolError::Duplicated => { self.errors.push( - (InvalidLabelReference { - label: label.clone(), - reason: InvalidReferenceReason::Duplicated - }, op_span.clone())); + Spanned(op_span.clone(), + InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::Duplicated + })); } InvalidSymbolError::OutOfBounds => { self.errors.push( - (InvalidLabelReference { - label: label.clone(), - reason: InvalidReferenceReason::OutOfBounds - }, op_span.clone())); + Spanned(op_span.clone(), + InvalidLabelReference { + label: label.clone(), + reason: InvalidReferenceReason::OutOfBounds + })); } } } @@ -588,7 +622,7 @@ impl OperandTypesAnalysis { let ops_len = operands.len(); let exp_len = expected.len(); if ops_len != exp_len { - self.errors.push((WrongNumberOfOperands { expected: exp_len, actual: ops_len }, span.clone())) + self.errors.push(Spanned(span.clone(), WrongNumberOfOperands { expected: exp_len, actual: ops_len })) } else { for ((op_res, op_span), exp_ty) in zip(operands, expected) { if let Ok(op) = op_res { @@ -598,7 +632,7 @@ impl OperandTypesAnalysis { } else { OperandType::of(op) }; - self.errors.push((OperandTypeMismatch { expected: exp_ty.clone(), actual }, op_span.clone())); + self.errors.push(Spanned(op_span.clone(), OperandTypeMismatch { expected: exp_ty.clone(), actual })); } } } @@ -660,7 +694,7 @@ struct ObjectPlacementAnalysis { object_spans: Vec, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct ObjectPlacement { position_in_file: usize, span_in_file: Span, @@ -683,9 +717,7 @@ impl MutVisitor for ObjectPlacementAnalysis { self.object_spans.sort_unstable_by_key(|span| span.span_in_memory.start); for (op1, op2) in self.object_spans.iter().tuple_windows() { if op2.span_in_memory.start < op1.span_in_memory.end { - self.errors.push(( - Error::objects_overlap(op1.clone(), op2.clone()), - 0..0)); // TODO: refactor to avoid dummy span + self.errors.push(Single(SingleError::objects_overlap(op1.clone(), op2.clone()))); } } } @@ -895,20 +927,19 @@ trait MutVisitor { fn analyze_lex_data(lex_data: &LexData, file_span: &Span) -> ErrorList { let mut errors = Vec::new(); if lex_data.no_tokens { - errors.push((NoTokens, 0..0)) + errors.push(Single(NoTokens)) } else { if !lex_data.orig_present { - errors.push((NoOrig, file_span.start..file_span.start)); + errors.push(Spanned(file_span.start..file_span.start, NoOrig)); } if !lex_data.end_present { - errors.push((NoEnd, file_span.end..file_span.end)); + errors.push(Spanned(file_span.end..file_span.end, NoEnd)); } } errors } - -pub fn validate(lex_data: &LexData, file_spanned: &Spanned) -> ErrorList { +pub fn validate(lex_data: &LexData, file_spanned: &Spanned) -> Vec { let (file, file_span) = file_spanned; let errors_from_lex_data = analyze_lex_data(&lex_data, file_span); diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs index 632e9c3..685b5fe 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assembler.rs @@ -134,8 +134,17 @@ impl Instruction { } } +struct FirstPassRegion { + origin: Addr, + instructions: Vec, +} + pub struct Object { pub(crate) symbol_table: SymbolTable, + pub(crate) regions: Vec, +} + +pub struct Region { pub(crate) origin: Addr, pub(crate) words: Vec, } @@ -147,12 +156,10 @@ pub(crate) enum ObjectWord { } -impl TryFrom for Instruction { +impl TryFrom<(WithErrData, WithErrData>>)> for Instruction { type Error = (); - fn try_from(i: parser::Instruction) -> Result { - - let parser::Instruction { opcode: raw_opcode, operands: raw_operands, .. } = i; + fn try_from((raw_opcode, raw_operands): (WithErrData, WithErrData>>)) -> Result { let operands = result(raw_operands)?; match result(raw_opcode)? { Opcode::Add => { @@ -280,7 +287,7 @@ impl TryFrom for ObjectWord { type Error = (); fn try_from(value: parser::Instruction) -> Result { - Instruction::try_from(value) + (value.opcode, value.operands).try_into() .map(ObjectWord::UnlinkedInstruction) } } @@ -445,30 +452,40 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: Ok(res) } -fn first_pass(origin: Addr, instructions: Vec>) -> Result<(Vec, SymbolTable), ()> { +type ParserInstructions = Vec>; + +fn first_pass(region_data: impl IntoIterator) -> Result<(Vec, SymbolTable), ()> { + let mut fp_regions = Vec::new(); let mut symbol_table = HashMap::new(); - let mut words = Vec::new(); - let mut location_counter = origin; - for raw_instruction in instructions.into_iter() { - let parser_instruction = result(raw_instruction)?; - if let Some(l) = parser_instruction.label.clone() { // TODO: label not needed for conversion to Instruction; consider changing to TryFrom<(Opcode, Operands)> to avoid clone - symbol_table.insert(result(l)?, location_counter); - }; + for (origin, parser_instructions) in region_data { + let mut instructions = Vec::new(); + let mut location_counter = origin; - let instruction: Instruction = parser_instruction.try_into()?; - let addresses_used = instruction.addresses_occupied(); - words.push(instruction); + for raw_instruction in parser_instructions.into_iter() { + let parser_instruction = result(raw_instruction)?; + if let Some(l) = parser_instruction.label { + symbol_table.insert(result(l)?, location_counter); + }; - location_counter += addresses_used; + let instruction: Instruction = (parser_instruction.opcode, parser_instruction.operands).try_into()?; + let addresses_used = instruction.addresses_occupied(); + instructions.push(instruction); + + location_counter += addresses_used; + } + + fp_regions.push(FirstPassRegion { origin, instructions }); } - Ok((words, symbol_table)) + Ok((fp_regions, symbol_table)) } -fn second_pass(symbol_table: SymbolTable, origin: Addr, instructions: Vec) -> Result { - let mut location_counter = origin; +fn second_pass_one_region(symbol_table: &SymbolTable, fp_region: FirstPassRegion) -> Result { + let FirstPassRegion { origin, instructions } = fp_region; + let mut words = Vec::new(); + let mut location_counter = origin; for instruction in instructions.into_iter() { let addresses_used = instruction.addresses_occupied(); @@ -479,7 +496,16 @@ fn second_pass(symbol_table: SymbolTable, origin: Addr, instructions: Vec) -> Result { + let regions = + fp_regions.into_iter() + .map(|fp_region| second_pass_one_region(&symbol_table, fp_region)) + .collect::, TryFromIntError>>()?; + + Ok(Object { symbol_table, regions }) } pub(crate) fn get_orig(orig_operands: WithErrData>>) -> Result { @@ -487,9 +513,17 @@ pub(crate) fn get_orig(orig_operands: WithErrData>>) -> result(orig_operand)?.try_into() } -pub fn assemble(program: Program) -> Result { - let Program { orig, instructions: parser_instructions, .. } = program; - let origin = get_orig(orig)?; - let (instructions, symbol_table) = first_pass(origin, parser_instructions)?; - second_pass(symbol_table, origin, instructions).map_err(|_| ()) +pub fn assemble(file: parser::File) -> Result { + let region_data = + file.programs.into_iter() + .map(|p| { + let parser::Program { orig, instructions } = result(p)?; + let origin = get_orig(orig)?; + Ok((origin, instructions)) + }) + .collect::, ()>>()?; + + let (fp_regions, symbol_table) = first_pass(region_data)?; + + second_pass(symbol_table, fp_regions).map_err(|_| ()) } \ No newline at end of file diff --git a/assembler/src/lexer.rs b/assembler/src/lexer.rs index 753aeaf..cecb531 100644 --- a/assembler/src/lexer.rs +++ b/assembler/src/lexer.rs @@ -386,14 +386,11 @@ pub struct LexData { pub(crate) end_present: bool, } -fn contains_token(tokens: &Option>>, token: Token) -> bool { - match tokens { - None => false, - Some(ts) => ts.iter().any(|t| t.0 == token) - } +fn contains_token(tokens: &Vec>, token: Token) -> bool { + tokens.iter().any(|t| t.0 == token) } -pub fn lex(source: &str, leniency: LeniencyLevel) -> (Option>>, LexData, Vec>) { +pub fn lex(source: &str, leniency: LeniencyLevel) -> Result<(Vec>, LexData), Vec>> { let (maybe_csprs, mut errors) = case_sensitive_pass(source); let tokens = maybe_csprs @@ -402,11 +399,17 @@ pub fn lex(source: &str, leniency: LeniencyLevel) -> (Option> errors.extend(cip_errors); maybe_tokens }); - let no_tokens = if let Some(ts) = &tokens { ts.is_empty() } else { true }; - let orig_present = contains_token(&tokens, Token::Opcode(Opcode::Orig)); - let end_present = contains_token(&tokens, Token::End); - let lex_data = LexData { no_tokens, orig_present, end_present }; - (tokens, lex_data, errors) + + match tokens { + None => Err(errors), + Some(ts) => { + let no_tokens = ts.is_empty(); + let orig_present = contains_token(&ts, Token::Opcode(Opcode::Orig)); + let end_present = contains_token(&ts, Token::End); + let lex_data = LexData { no_tokens, orig_present, end_present }; + Ok((ts, lex_data)) + } + } } @@ -420,20 +423,20 @@ mod tests { #[test] fn lone_error() { let source = "#OOPS"; - let (tokens, _, _) = lex(source, LeniencyLevel::Lenient); + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); assert_eq!( - Some(vec![ + vec![ (Invalid, 0..5), - ]), + ], tokens); } #[test] fn error_in_context() { let source = "ADD R0, R0, #OOPS; <- error"; - let (tokens, _, _) = lex(source, LeniencyLevel::Lenient); + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); assert_eq!( - Some(vec![ + vec![ (Opcode(Add), 0.. 3), (Register(R0), 4.. 6), (Comma, 6.. 7), @@ -441,7 +444,7 @@ mod tests { (Comma, 10..11), (Invalid, 12..17), (Comment, 17..27), - ]), + ], tokens); } } \ No newline at end of file diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index b77bed0..03b83b5 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -5,6 +5,9 @@ extern crate core; +use std::fs; +use std::path::PathBuf; + pub mod lexer; pub mod parser; pub mod assembler; @@ -20,21 +23,69 @@ pub enum LeniencyLevel { Strict } -#[cfg(test)] -mod tests { - use super::*; +const MEM_DUMP_FILE_EXTENSION: &'static str = "mem"; + +impl From for analysis::SingleError { + fn from(error: std::io::Error) -> Self { + analysis::SingleError::Io(error) + } +} - #[test] - fn simple() { - let src = ".ORIG x3000;\nLABEL ADD R0, R0, #7000\n.end"; - let (tokens, _, lex_errs) = lexer::lex(src, LeniencyLevel::Lenient); - println!("{:?}", tokens); - println!("{:?}", lex_errs); +impl From> for analysis::Error + where E: Into +{ + fn from(errors: Vec) -> Self { + let es = errors.into_iter() + .map(|e| e.into()) + .collect(); + analysis::Error::Multiple(es) + } +} - let parse_results = tokens.map(|ts| parser::parse(src, ts, LeniencyLevel::Strict)); - if let Some((program, parse_errs)) = parse_results { - println!("{:?}", program); - println!("{:?}", parse_errs); - } +impl From for analysis::Error + where E: Into +{ + fn from(error: E) -> Self { + analysis::Error::Single(error.into()) } } + +impl From> for analysis::SingleError { + fn from(error: chumsky::error::Simple) -> Self { + analysis::SingleError::Lex(error) + } +} + +impl From> for analysis::SingleError { + fn from(error: chumsky::error::Simple) -> Self { + analysis::SingleError::Parse(error) + } +} + +pub fn parse_and_analyze_file(input: PathBuf, leniency: LeniencyLevel) -> Result { + let src = fs::read_to_string(input)?; + parse_and_analyze(&src, leniency) +} + +pub fn parse_and_analyze(src: &String, leniency: LeniencyLevel) -> Result { + let (tokens, lex_data) = lexer::lex(src, leniency)?; + let file_spanned = parser::parse(src, tokens, leniency)?; + let errors = analysis::validate(&lex_data, &file_spanned); + if !errors.is_empty() { + return Err(errors.into()); + } + let (file, _) = file_spanned; + Ok(file) +} + +pub fn assemble_file(input: PathBuf, leniency: LeniencyLevel, no_os: bool) -> Result { + let src = fs::read_to_string(input)?; + assemble(&src, leniency, no_os) +} + +pub fn assemble(src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { + let file = parse_and_analyze(src, leniency)?; + let object = assembler::assemble(file).map_err(|_| analysis::SingleError::Assemble)?; + let mem = linker::link([object], !no_os)?; + Ok(mem) +} diff --git a/assembler/src/linker.rs b/assembler/src/linker.rs index 5c248ca..d4448f9 100644 --- a/assembler/src/linker.rs +++ b/assembler/src/linker.rs @@ -4,15 +4,15 @@ use chumsky::chain::Chain; use chumsky::Parser; use lc3_isa::util::MemoryDump; use lc3_isa::{Addr, ADDR_SPACE_SIZE_IN_WORDS, Word}; -use crate::assembler::{assemble_instruction, SymbolTable, Object, ObjectWord, AssemblyResult}; +use crate::assembler::{assemble_instruction, SymbolTable, Object, ObjectWord, AssemblyResult, Region}; -struct LinkedObject { +struct LinkedRegion { origin: Addr, words: Vec, } -fn layer_object(image: &mut [Word; ADDR_SPACE_SIZE_IN_WORDS], object: LinkedObject) { - let LinkedObject { origin, words } = object; +fn layer_region(image: &mut [Word; ADDR_SPACE_SIZE_IN_WORDS], object: LinkedRegion) { + let LinkedRegion { origin, words } = object; let mut addr = origin as usize; for word in words { image[addr] = word; @@ -20,12 +20,12 @@ fn layer_object(image: &mut [Word; ADDR_SPACE_SIZE_IN_WORDS], object: LinkedObje } } -fn link_object(symbol_table: &SymbolTable, object: Object) -> Result { +fn link_region(symbol_table: &SymbolTable, region: Region) -> Result { let mut words = Vec::new(); - let Object { origin, words: object_words, .. } = object; + let Region { origin, words: region_words, .. } = region; let mut location_counter = origin; - for object_word in object_words { - match object_word { + for region_word in region_words { + match region_word { ObjectWord::Value(word) => { words.push(word); location_counter += 1; @@ -52,10 +52,10 @@ fn link_object(symbol_table: &SymbolTable, object: Object) -> Result, overlay_on_os: bool) -> Result { +pub fn link(objects: impl IntoIterator, overlay_on_os: bool) -> Result { let objects = objects.into_iter().collect::>(); let mut symbol_table = HashMap::new(); @@ -67,18 +67,21 @@ pub fn link(objects: impl IntoIterator, overlay_on_os: bool) -> Res let mut image = if overlay_on_os { + let first_object = objects.get(0).ok_or(crate::analysis::SingleError::Link)?; + let first_region = first_object.regions.get(0).ok_or(crate::analysis::SingleError::Link)?; + let mut os = lc3_os::OS_IMAGE.clone().0; - os[lc3_os::USER_PROG_START_ADDR as usize] = - objects.get(0) - .expect("Found no objects in file; could not find origin.") - .origin; // TODO: fail gracefully + os[lc3_os::USER_PROG_START_ADDR as usize] = first_region.origin; + os } else { [0; ADDR_SPACE_SIZE_IN_WORDS] }; for object in objects { - let linked_object = link_object(&symbol_table, object)?; - layer_object(&mut image, linked_object); + for region in object.regions { + let linked_region = link_region(&symbol_table, region).map_err(|_| crate::analysis::SingleError::Link)?; + layer_region(&mut image, linked_region); + } } Ok(image.into()) diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index 3aef6a7..1d2a238 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -219,10 +219,13 @@ fn file(leniency: LeniencyLevel) -> impl Parser, Error = Si (File { before_first_orig, programs }, span)) } -pub fn parse(src: &str, tokens: Vec>, leniency: LeniencyLevel) -> (Option>, Vec>) { +pub fn parse(src: &str, tokens: Vec>, leniency: LeniencyLevel) -> Result, Vec>> { let len = src.chars().count(); - file(leniency) - .parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())) + let (maybe_file, errors) = + file(leniency) + .parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())); + + maybe_file.ok_or(errors) } @@ -237,23 +240,20 @@ mod tests { #[test] fn capture_tokens_before_first_orig_separately() { let source = "%some #random junk .ORIG x3000\nADD R0, R0, R0\n.END"; - let (maybe_tokens, _, _) = lex(source, LeniencyLevel::Lenient); - let tokens = maybe_tokens.unwrap(); - let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); + let file = parse(source, tokens, LeniencyLevel::Lenient).unwrap(); assert_eq!((vec![Token::Invalid, Token::Invalid, Token::Label("JUNK".to_string())], 0..18), - file.unwrap().0.before_first_orig); + file.0.before_first_orig); } #[test] fn ignore_after_end() { let source = ".ORIG x3000\nADD R0, R0, R0\n.END then %some #random junk!"; - let (maybe_tokens, _, _) = lex(source, LeniencyLevel::Lenient); - let tokens = maybe_tokens.unwrap(); - let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); - + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); + let file = parse(source, tokens, LeniencyLevel::Lenient).unwrap(); - let f = file.unwrap().0; + let f = file.0; assert_eq!((vec![], 0..5), f.before_first_orig); // TODO: probably doesn't need fixing, but span should probably be 0..0; find source of bug assert_eq!(vec![(Ok(Program { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), @@ -267,9 +267,8 @@ mod tests { #[test] fn operand_error() { let source = ".ORIG x3000\nADD R0, R0, #OOPS; <- error\n.END"; - let (maybe_tokens, _, _) = lex(source, LeniencyLevel::Lenient); - let tokens = maybe_tokens.unwrap(); - let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); + let file = parse(source, tokens, LeniencyLevel::Lenient).unwrap(); assert_eq!(vec![(Ok(Program { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), @@ -277,15 +276,14 @@ mod tests { (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Err(()), 24..29)]), 16..29) }), 12..29) ], }), 0..44)], - file.unwrap().0.programs); + file.0.programs); } #[test] fn label_error() { let source = ".ORIG x3000\nA%DDER ADD R0, R0, #1; <- error\n.END"; - let (maybe_tokens, _, _) = lex(source, LeniencyLevel::Lenient); - let tokens = maybe_tokens.unwrap(); - let (file, _) = parse(source, tokens, LeniencyLevel::Lenient); + let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); + let file = parse(source, tokens, LeniencyLevel::Lenient).unwrap(); assert_eq!(vec![(Ok(Program { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), @@ -293,7 +291,7 @@ mod tests { (Ok(Instruction { label: Some((Err(()), 12..18)), opcode: (Ok(Add), 19..22), operands: (Ok(vec![(Ok(Register(R0)), 23..25), (Ok(Register(R0)), 27..29), (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33)]), 23..33) }), 12..33) ], }), 0..48)], - file.unwrap().0.programs); + file.0.programs); } } \ No newline at end of file diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index a54ea09..9bb1d5d 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -3,7 +3,7 @@ extern crate lc3_assembler; use lc3_isa::{ADDR_MAX_VAL, Word}; use std::ops::Index; use lc3_isa::util::MemoryDump; -use lc3_assembler::{assembler, lexer, linker, parser, LeniencyLevel}; +use lc3_assembler::{assembler, lexer, linker, parser, LeniencyLevel, assemble}; #[test] fn load_store_medium() { @@ -270,16 +270,8 @@ mod single_instruction { } fn test(input: &str, orig: usize, expected_mem: &[Word]) { - let (maybe_tokens, _, _) = lexer::lex(input, LeniencyLevel::Lenient); - let tokens = maybe_tokens.expect("lexing failed"); - - let (maybe_file, _) = parser::parse(input, tokens, LeniencyLevel::Lenient); - let (mut file, _) = maybe_file.expect("parsing failed"); - assert_eq!(1, file.programs.len(), "parsed unexpected number of programs: {}", file.programs.len()); - let program = file.programs.remove(0).0.expect("parse error in program"); - let object = assembler::assemble(program).expect("assembly failed"); - - let mem = linker::link([object], false).expect("linking failed"); + let src = input.to_string(); + let mem = assemble(&src, LeniencyLevel::Lenient, true).unwrap(); for i in 0..orig { assert_mem(&mem, i, 0x0000); From 452e38e6eab94e177bfa175931747bd0cb5b8665 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 15 Jun 2022 00:37:10 -0500 Subject: [PATCH 57/82] assembler: add minimal tests for single errors --- Cargo.lock | 7 ++ assembler/Cargo.toml | 3 + assembler/src/analysis.rs | 20 ++--- assembler/tests/integ.rs | 154 +++++++++++++++++++++++++++++++++++++- 4 files changed, 173 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 742d686..c714495 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,6 +38,12 @@ dependencies = [ "yansi", ] +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + [[package]] name = "atty" version = "0.2.14" @@ -235,6 +241,7 @@ version = "0.1.0" dependencies = [ "annotate-snippets", "ariadne", + "assert_matches", "chumsky", "clap", "itertools", diff --git a/assembler/Cargo.toml b/assembler/Cargo.toml index dc595f4..654f9bb 100644 --- a/assembler/Cargo.toml +++ b/assembler/Cargo.toml @@ -46,3 +46,6 @@ lc3-isa = { git = "https://github.com/ut-utp/core", branch = "master", version = lc3-shims = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } lc3-os = { git = "https://github.com/ut-utp/core", branch = "master", version = "0.1.0", default-features = false } # TODO: ^ eventually don't pull these from git + +[dev-dependencies] +assert_matches = "1.5.0" diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index a86471f..cdf27d5 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -58,7 +58,7 @@ pub enum SingleError { DuplicateLabel { label: String, occurrences: Vec, }, InvalidLabelReference { label: String, reason: InvalidReferenceReason }, LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, - ObjectsOverlap { placement1: ObjectPlacement, placement2: ObjectPlacement }, + RegionsOverlap { placement1: RegionPlacement, placement2: RegionPlacement }, NoTokens, NoOrig, NoEnd, @@ -72,14 +72,14 @@ pub enum InvalidReferenceReason { } impl SingleError { - fn objects_overlap(p1: ObjectPlacement, p2: ObjectPlacement) -> Self { + fn regions_overlap(p1: RegionPlacement, p2: RegionPlacement) -> Self { let (placement1, placement2) = if p1.span_in_memory.start <= p2.span_in_memory.start { (p1, p2) } else { (p2, p1) }; - ObjectsOverlap { placement1, placement2 } + RegionsOverlap { placement1, placement2 } } fn message(&self) -> String { @@ -111,8 +111,8 @@ impl SingleError { label_pos_width = max(4, min_signed_hex_digits_required(*est_ref_pos) as usize), ref_pos_width = max(4, min_signed_hex_digits_required(*est_label_pos) as usize),) } - ObjectsOverlap { placement1, placement2 } => { - format!("object {} in file occupying [{:#0o1s_width$X}, {:#0o1e_width$X}) overlaps object {} occupying [{:#0o2s_width$X}, {:#0o2e_width$X})", + RegionsOverlap { placement1, placement2 } => { + format!("region {} in file occupying [{:#0o1s_width$X}, {:#0o1e_width$X}) overlaps region {} occupying [{:#0o2s_width$X}, {:#0o2e_width$X})", placement1.position_in_file, placement1.span_in_memory.start, placement1.span_in_memory.end, @@ -160,7 +160,7 @@ fn report_single(error: SingleError) -> ReportBuilder { r = r.with_label(Label::new(occurrence).with_message(label_message)) } } - ObjectsOverlap { placement1, placement2 } => { + RegionsOverlap { placement1, placement2 } => { let (first, first_pos_text, second, second_pos_text) = if placement1.position_in_file < placement2.position_in_file { (placement1, "end", placement2, "start") @@ -691,11 +691,11 @@ struct ObjectPlacementAnalysis { errors: ErrorList, last_start: RoughAddr, object_index: usize, - object_spans: Vec, + object_spans: Vec, } #[derive(Clone, Debug)] -pub struct ObjectPlacement { +pub struct RegionPlacement { position_in_file: usize, span_in_file: Span, span_in_memory: Range, @@ -717,14 +717,14 @@ impl MutVisitor for ObjectPlacementAnalysis { self.object_spans.sort_unstable_by_key(|span| span.span_in_memory.start); for (op1, op2) in self.object_spans.iter().tuple_windows() { if op2.span_in_memory.start < op1.span_in_memory.end { - self.errors.push(Single(SingleError::objects_overlap(op1.clone(), op2.clone()))); + self.errors.push(Single(SingleError::regions_overlap(op1.clone(), op2.clone()))); } } } fn exit_program(&mut self, _program: &Program, span: &Span, location: &LocationCounter) { self.object_spans.push( - ObjectPlacement { + RegionPlacement { position_in_file: self.object_index, span_in_file: span.clone(), span_in_memory: self.last_start..location.value diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 9bb1d5d..b075e4c 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -3,7 +3,8 @@ extern crate lc3_assembler; use lc3_isa::{ADDR_MAX_VAL, Word}; use std::ops::Index; use lc3_isa::util::MemoryDump; -use lc3_assembler::{assembler, lexer, linker, parser, LeniencyLevel, assemble}; +use lc3_assembler::{assembler, lexer, linker, parser, LeniencyLevel, parse_and_analyze, assemble}; +use lc3_assembler::analysis::Error; #[test] fn load_store_medium() { @@ -288,3 +289,154 @@ fn assert_mem(mem: &MemoryDump, location: usize, expected: Word) { let actual = mem[location]; assert_eq!(expected, actual, "differed at {:#x}: expected {:#x}, was {:#x}", location, expected, actual); } + + + +mod error { + use assert_matches::assert_matches; + use lc3_assembler::analysis::{SingleError, OperandType, InvalidReferenceReason}; + use super::*; + + macro_rules! single_error_tests { + ($tests_name:ident + $( + $test_name:ident: $source:expr => $expected:pat + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + let src = $source.to_string(); + match parse_and_analyze(&src, LeniencyLevel::Lenient) { + Err(error) => { + match error { + Error::Multiple(errors) => { + assert_eq!(errors.len(), 1, "Found too many args: {:?}", errors); + match errors.get(0) { + Some(Error::Single(error)) + | Some(Error::Spanned(_, error)) => { + assert_matches!(error, $expected); + } + _ => panic!(), + } + } + _ => panic!(), + } + } + Ok(_) => panic!(), + } + } + )+ + } + }; + } + + single_error_tests! { single_error + no_tokens: + "" + => SingleError::NoTokens, + no_orig: + "ADD R0, R0, R0\n\ + .END" + => SingleError::NoOrig, + bad_instruction: + ".ORIG x3000\n\ + #OOPS\n\ + .END" + => SingleError::BadInstruction, + bad_label: + ".ORIG x3000\n\ + #OOPS ADD R0, R0, R0\n\ + .END" + => SingleError::BadLabel, + // TODO: these errors might currently be impossible to generate. Review relevant parsing/analysis + // bad_opcode: + // ".ORIG x3000\n\ + // #OOPS R0, R0, R0\n\ + // .END" + // => SingleError::BadOpcode, + // bad_operands: + // ".ORIG x3000\n\ + // ADD #OOPS\n\ + // .END" + // => SingleError::BadOperands, + bad_operand: + ".ORIG x3000\n\ + ADD R0, R0, #OOPS\n\ + .END" + => SingleError::BadOperand, + too_few_operands: + ".ORIG x3000\n\ + ADD R0, R0\n\ + .END" + => SingleError::WrongNumberOfOperands { expected: 3, actual: 2 }, + too_many_operands: + ".ORIG x3000\n\ + ADD R0, R0, R0, R0\n\ + .END" + => SingleError::WrongNumberOfOperands { expected: 3, actual: 4 }, + operand_type_mismatch: + ".ORIG x3000\n\ + ADD \"oops\", R0, R0\n\ + .END" + => SingleError::OperandTypeMismatch { + expected: OperandType::Register, + actual: OperandType::String, + }, + duplicate_label: + ".ORIG x3000\n\ + LABEL ADD R0, R0, R0\n\ + LABEL ADD R0, R0, R0\n\ + .END" + => SingleError::DuplicateLabel { .. }, + undefined_label: + ".ORIG x3000\n\ + BR SOMEWHERE\n\ + .END" + => SingleError::InvalidLabelReference { reason: InvalidReferenceReason::Undefined, .. }, + regions_overlap: + ".ORIG x3000\n\ + ADD R0, R0, R0\n\ + ADD R0, R0, R0\n\ + .END\n\ + \n\ + .ORIG x3001\n\ + ADD R0, R0, R0\n\ + ADD R0, R0, R0\n\ + .END" + => SingleError::RegionsOverlap { .. }, + label_too_distant: + ".ORIG x3000\n\ + LEA R0, LABEL\n\ + HALT\n\ + .BLKW 255\n\ + LABEL .FILL 0x1234\n\ + .END" + => SingleError::LabelTooDistant { + est_ref_pos: 0x3000, + est_label_pos: 0x3101, + offset: 0b1_0000_0000, + width: 9, + .. + }, + label_too_distant_negative: + ".ORIG x3000\n\ + HALT\n\ + LABEL .FILL 0x1234\n\ + .BLKW 255\n\ + LEA R0, LABEL\n\ + .END" + => SingleError::LabelTooDistant { + est_ref_pos: 0x3101, + est_label_pos: 0x3001, + offset: -0b1_0000_0001, + width: 9, + .. + }, + } + +} \ No newline at end of file From 58c579fa4d8809ed138635f6a5d20f6af55d20b6 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 15 Jun 2022 23:50:17 -0500 Subject: [PATCH 58/82] assembler: rename test macros, add basic label reference tests --- assembler/tests/integ.rs | 119 +++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 47 deletions(-) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index b075e4c..5591e2a 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -27,16 +27,16 @@ fn load_store_medium() { mod single_instruction { use super::*; - fn single_instruction_multiple_output_test(input: &str, expected: &[Word]) { + fn multiple_output_test(input: &str, expected: &[Word]) { let input = format!(".ORIG x3000\n{}\n.END", input); test(input.as_str(), 0x3000, expected); } fn single_instruction_test(input: &str, expected: Word) { - single_instruction_multiple_output_test(input, &[expected]); + multiple_output_test(input, &[expected]); } - macro_rules! single_instruction_tests { + macro_rules! tests { ($tests_name:ident $( $test_name:ident: $instruction:expr => $expected:expr @@ -56,7 +56,7 @@ mod single_instruction { }; } - single_instruction_tests! { alternative_styles + tests! { alternative_styles lowercase: "add r0 r0 r0" => 0x1000, comma_separated: "add r0, r0, r0" => 0x1000, with_semicolon: "ADD R0 R0 R0;" => 0x1000, @@ -64,13 +64,13 @@ mod single_instruction { commented: "ADD R0 R0 R0 ; comment" => 0x1000, } - single_instruction_tests! { labels + tests! { labels minimal: "A ADD R0 R0 R0" => 0x1000, begins_with_opcode: "ADDER ADD R0 R0 R0" => 0x1000, begins_with_trap: "INIT ADD R0 R0 R0" => 0x1000, } - single_instruction_tests! { add + tests! { add minimal: "ADD R0 R0 R0" => 0x1000, r1_2_3: "ADD R1 R2 R3" => 0x1283, r4_5_6: "ADD R4 R5 R6" => 0x1946, @@ -81,7 +81,7 @@ mod single_instruction { hex_imm: "ADD R7 R7 xA" => 0x1FEA, } - single_instruction_tests! { and + tests! { and minimal: "AND R0 R0 R0" => 0x5000, r1_2_3: "AND R1 R2 R3" => 0x5283, r4_5_6: "AND R4 R5 R6" => 0x5946, @@ -91,7 +91,7 @@ mod single_instruction { neg_imm: "AND R7 R7 #-1" => 0x5FFF, } - single_instruction_tests! { jmp + tests! { jmp r0: "JMP R0" => 0xC000, r1: "JMP R1" => 0xC040, r2: "JMP R2" => 0xC080, @@ -102,7 +102,7 @@ mod single_instruction { r7: "JMP R7" => 0xC1C0, } - single_instruction_tests! { jsrr + tests! { jsrr r0: "JSRR R0" => 0x4000, r1: "JSRR R1" => 0x4040, r2: "JSRR R2" => 0x4080, @@ -123,7 +123,7 @@ mod single_instruction { single_instruction_test("RET", 0xC1C0); } - single_instruction_tests! { ldr + tests! { ldr minimal: "LDR R0 R0 #0" => 0x6000, r1_2: "LDR R1 R2 #3" => 0x6283, max_imm: "LDR R3 R4 #31" => 0x671F, @@ -131,14 +131,14 @@ mod single_instruction { min_imm: "LDR R7 R7 #-32" => 0x6FE0, } - single_instruction_tests! { not + tests! { not r0_1: "NOT R0 R1" => 0x907F, r2_3: "NOT R2 R3" => 0x94FF, r4_5: "NOT R4 R5" => 0x997F, r6_7: "NOT R6 R7" => 0x9DFF, } - single_instruction_tests! { str + tests! { str minimal: "STR R0 R0 #0" => 0x7000, r1_2: "STR R1 R2 #3" => 0x7283, max_imm: "STR R3 R4 #31" => 0x771F, @@ -146,14 +146,14 @@ mod single_instruction { min_imm: "STR R7 R7 #-32" => 0x7FE0, } - single_instruction_tests! { trap + tests! { trap minimal: "TRAP x00" => 0xF000, halt: "TRAP x25" => 0xF025, max: "TRAP xFF" => 0xF0FF, decimal: "TRAP #37" => 0xF025, } - single_instruction_tests! { named_traps + tests! { named_traps getc: "GETC" => 0xF020, out: "OUT" => 0xF021, puts: "PUTS" => 0xF022, @@ -162,7 +162,7 @@ mod single_instruction { halt: "HALT" => 0xF025, } - single_instruction_tests! { br + tests! { br minimal: "BR #0" => 0x0E00, n: "BRn #0" => 0x0800, z: "BRz #0" => 0x0400, @@ -177,19 +177,39 @@ mod single_instruction { min_imm: "BRz #-256" => 0x0500, } + macro_rules! multiple_output_tests { + ($tests_name:ident + $( + $test_name:ident: $instruction:expr => $expected:expr + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + multiple_output_test($instruction, $expected); + } + )+ + } + }; + } + // TODO: make this more readable :( // I couldn't find a way to rearrange the macros to create one // for the boilerplate like "($opcode << 12) + ". - // Consider adding a variant in single_instruction_tests for this case? + // Consider adding a variant in tests for this case? macro_rules! reg_and_pcoffset9_instruction_tests { ( $( - $name:ident: $operator:expr => $opcode:expr + $name:ident, $name2:ident: $operator:expr => $opcode:expr ),+ $(,)* ) => { $( - single_instruction_tests! { $name + tests! { $name // OPERANDS RESULT // -------- ----- minimal: ($operator.to_string() + " R0 #0").as_str() => (($opcode << 12) + 0x000), @@ -202,19 +222,44 @@ mod single_instruction { r6: ($operator.to_string() + " R6 #0").as_str() => (($opcode << 12) + 0xC00), r7: ($operator.to_string() + " R7 #0").as_str() => (($opcode << 12) + 0xE00), } + multiple_output_tests! { $name2 + self_label: ("LABEL ".to_string() + $operator + " R0 LABEL").as_str() => &[(($opcode << 12) + 0x1FF)], + next_label: + ($operator.to_string() + " R0 LABEL\n\ + LABEL ADD R0, R0, R0").as_str() + => &[ + (($opcode << 12) + 0x000), + 0x1000], + pos_label: + ($operator.to_string() + " R0 LABEL\n\ + .BLKW 1\n\ + LABEL ADD R0, R0, R0").as_str() + => &[ + (($opcode << 12) + 0x001), + 0x0000, + 0x1000], + neg_label: + ("LABEL ADD R0, R0, R0\n\ + .BLKW 1\n".to_string() + + $operator + " R0, LABEL").as_str() + => &[ + 0x1000, + 0x0000, + (($opcode << 12) + 0x1FD)], + } )+ }; } reg_and_pcoffset9_instruction_tests! { - ld: "LD" => 0x2, - ldi: "LDI" => 0xA, - lea: "LEA" => 0xE, - st: "ST" => 0x3, - sti: "STI" => 0xB, + ld, ld_label: "LD" => 0x2, + ldi, ldi_label: "LDI" => 0xA, + lea, lea_label: "LEA" => 0xE, + st, st_label: "ST" => 0x3, + sti, sti_label: "STI" => 0xB, } - single_instruction_tests! { jsr + tests! { jsr minimal: "JSR #0" => 0x4800, pos_imm: "JSR #1" => 0x4801, neg_imm: "JSR #-1" => 0x4FFF, @@ -226,7 +271,7 @@ mod single_instruction { mod pseudo_ops { use super::*; - single_instruction_tests! { fill + tests! { fill minimal: ".FILL #0" => 0x0000, pos_imm: ".FILL #1" => 0x0001, max_imm: ".FILL #65535" => 0xFFFF, @@ -235,33 +280,13 @@ mod single_instruction { max_hex_imm: ".FILL xFFFF" => 0xFFFF, } - macro_rules! single_instruction_multiple_output_tests { - ($tests_name:ident - $( - $test_name:ident: $instruction:expr => $expected:expr - ),+ - $(,)* - ) => { - mod $tests_name { - use super::*; - - $( - #[test] - fn $test_name() { - single_instruction_multiple_output_test($instruction, $expected); - } - )+ - } - }; - } - - single_instruction_multiple_output_tests! { blkw + multiple_output_tests! { blkw one: ".BLKW 1" => &[0,], two: ".BLKW 2" => &[0, 0,], ten: ".BLKW 10" => &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0,], } - single_instruction_multiple_output_tests! { stringz + multiple_output_tests! { stringz a: ".STRINGZ \"a\"" => &[0x61, 0x00], double_quote: ".STRINGZ \"\\\"\"" => &[0x22, 0x00], backslash: ".STRINGZ \"\\\\\"" => &[0x5C, 0x00], From ee7d7f072513936d82d19cc56cc96003c4d1a6c5 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 18 Jun 2022 02:33:30 -0500 Subject: [PATCH 59/82] assembler: add multiple error tests, improve instruction parsing recovery --- assembler/src/parser.rs | 45 +++++++++++--- assembler/tests/integ.rs | 126 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 158 insertions(+), 13 deletions(-) diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index 1d2a238..a8ad3cd 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -110,14 +110,20 @@ impl Operand { } fn operand() -> impl Parser, Error = Simple> { - let operand = select! { - Token::Register(reg) => Ok(Operand::Register(reg)), - Token::UnqualifiedNumberLiteral(val) => Ok(Operand::UnqualifiedNumberLiteral(val)), - Token::NumberLiteral(val) => Ok(Operand::NumberLiteral(val)), - Token::StringLiteral(s) => Ok(Operand::StringLiteral(s)), - Token::Label(s) => Ok(Operand::Label(s)), - Token::Invalid => Err(()), - }; + + let operand = filter_map(move |span, t: Token| + match t.clone() { + Token::Register(reg) => Ok(Ok(Operand::Register(reg))), + Token::UnqualifiedNumberLiteral(val) => Ok(Ok(Operand::UnqualifiedNumberLiteral(val))), + Token::NumberLiteral(val) => Ok(Ok(Operand::NumberLiteral(val))), + Token::StringLiteral(s) => Ok(Ok(Operand::StringLiteral(s))), + Token::Label(s) => Ok(Ok(Operand::Label(s))), + Token::Opcode(_) + | Token::End + | Token::Invalid => Ok(Err(())), + _ => Err(Simple::expected_input_found(span, None, Some(t))) + } + ); operand.map_with_span(|o, span| (o, span)) } @@ -150,10 +156,16 @@ fn instruction(leniency: LeniencyLevel) -> impl Parser { + let (tokens, _) = lex($src, LeniencyLevel::Lenient).unwrap(); + let len = $src.chars().count(); + let $p = + $parser + .parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())); + } + } + + #[test] + fn instruction_error() { + parse!(let (maybe_instruction, errs) = instruction(LeniencyLevel::Lenient), "JMP RET .END"); + println!("{:?}", maybe_instruction); + println!("{:?}", errs); + } + } \ No newline at end of file diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 5591e2a..eb6f201 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -24,14 +24,10 @@ fn load_store_medium() { ); } + mod single_instruction { use super::*; - fn multiple_output_test(input: &str, expected: &[Word]) { - let input = format!(".ORIG x3000\n{}\n.END", input); - test(input.as_str(), 0x3000, expected); - } - fn single_instruction_test(input: &str, expected: Word) { multiple_output_test(input, &[expected]); } @@ -295,6 +291,11 @@ mod single_instruction { } } +fn multiple_output_test(input: &str, expected: &[Word]) { + let input = format!(".ORIG x3000\n{}\n.END", input); + test(input.as_str(), 0x3000, expected); +} + fn test(input: &str, orig: usize, expected_mem: &[Word]) { let src = input.to_string(); let mem = assemble(&src, LeniencyLevel::Lenient, true).unwrap(); @@ -464,4 +465,119 @@ mod error { }, } + macro_rules! contains_error { + ($errors:expr, $pattern:pat) => { + $errors.iter() + .any(|error| { + match error { + Error::Single(error) + | Error::Spanned(_, error) => { + matches!(error, $pattern) + } + _ => false, + } + }) + } + } + + macro_rules! multiple_error_tests { + ($tests_name:ident + $( + $test_name:ident: $source:expr => {$($expected:pat),+ $(,)*} + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + let src = $source.to_string(); + match parse_and_analyze(&src, LeniencyLevel::Lenient) { + Err(error) => { + match error { + Error::Multiple(errors) => { + println!("{:?}", errors); + $( + assert!(contains_error!(errors, $expected)); + )+ + } + _ => panic!(), + } + } + Ok(_) => panic!(), + } + } + )+ + } + } + } + + multiple_error_tests! { multiple_errors + no_end: + ".ORIG x3000\n\ + ADD R0, R0, R0" + => + { + SingleError::BadProgram, + SingleError::NoEnd + }, + two_operand_type_mismatches: + ".ORIG x3000\n\ + ADD \"hello\", WORLD, R0\n\ + .END" + => + { + SingleError::OperandTypeMismatch { expected: OperandType::Register, actual: OperandType::String }, + SingleError::OperandTypeMismatch { expected: OperandType::Register, actual: OperandType::Label } + }, + two_wrong_numbers_of_operands: + ".ORIG x3000\n\ + ADD R0\n\ + JMP R0, R0, R0\n\ + .END" + => + { + SingleError::WrongNumberOfOperands { expected: 1, actual: 3 }, + SingleError::WrongNumberOfOperands { expected: 3, actual: 1 }, + }, + very_many: + ".ORIG #OOPS ; Bad .ORIG operand \n\ + AND R1, , ; Bad instruction (or operands) \n\ + LABEL ADD R0 ; Duplicate label \n\ + LABEL JMP RET ; Bad operand \n\ + .END \n\ + \n\ + .ORIG x3000 ; Likely overlapping first region \n\ + ADD R0, R0, R0 \n\ + ADD R0, R0, R0 \n\ + .END \n\ + \n\ + .ORIG x3001 ; Overlaps second region \n\ + ADD R0, R0, LABEL ; Operand type mismatch \n\ + BR LABEL ; Invalid reference to duplicate label\n\ + TOO_FAR .BLKW 0 \n\ + .END \n\ + \n\ + .ORIG x3500 \n\ + BR TOO_FAR ; Label too distant for offset to fit \n\ + .END \n\ + \n\ + .ORIG x4000 ; Bad program (missing .END) \n\ + " + => + { + SingleError::BadOperand, + SingleError::BadInstruction, + SingleError::BadProgram, + SingleError::DuplicateLabel { .. }, + SingleError::WrongNumberOfOperands { expected: 3, actual: 1 }, + SingleError::RegionsOverlap { .. }, + SingleError::OperandTypeMismatch { .. }, + SingleError::InvalidLabelReference { .. }, + SingleError::LabelTooDistant { .. }, + }, + } + } \ No newline at end of file From 93500870dde6d78834805c994c8dc4cf30e02b45 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 18 Jun 2022 03:06:27 -0500 Subject: [PATCH 60/82] assembler: replace 'program' with 'region' in abstract syntax --- assembler/src/analysis.rs | 40 +++++++++++++++++++------------------- assembler/src/assembler.rs | 6 +++--- assembler/src/parser.rs | 26 ++++++++++++------------- assembler/tests/integ.rs | 6 +++--- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index cdf27d5..aa1e2e0 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -8,7 +8,7 @@ use itertools::{concat, Itertools, zip}; use ariadne::{Label, Report, ReportBuilder, ReportKind}; use lc3_isa::{Addr, SignedWord, Word}; use crate::lexer::{LexData, LiteralValue, Opcode}; -use crate::parser::{File, get, get_result, Instruction, Operand, Program, result, WithErrData}; +use crate::parser::{File, get, get_result, Instruction, Operand, Region, result, WithErrData}; use crate::{Span, Spanned}; type ErrorList = Vec; @@ -46,8 +46,8 @@ pub enum SingleError { Parse(chumsky::error::Simple), Assemble, Link, - - BadProgram, + + BadRegion, BadInstruction, BadLabel, BadOpcode, @@ -84,7 +84,7 @@ impl SingleError { fn message(&self) -> String { match self { - BadProgram => String::from("invalid program"), + BadRegion => String::from("invalid region"), BadInstruction => String::from("invalid instruction"), BadLabel => String::from("invalid label"), BadOpcode => String::from("invalid opcode"), @@ -348,8 +348,8 @@ impl ParseErrorsAnalysis { } impl MutVisitor for ParseErrorsAnalysis { - fn enter_program_error(&mut self, span: &Span) { - self.push_error(BadProgram, span); + fn enter_region_error(&mut self, span: &Span) { + self.push_error(BadRegion, span); } fn enter_orig_error(&mut self, span: &Span) { self.push_error(BadOperands, span); @@ -722,7 +722,7 @@ impl MutVisitor for ObjectPlacementAnalysis { } } - fn exit_program(&mut self, _program: &Program, span: &Span, location: &LocationCounter) { + fn exit_region(&mut self, _region: &Region, span: &Span, location: &LocationCounter) { self.object_spans.push( RegionPlacement { position_in_file: self.object_index, @@ -776,28 +776,28 @@ impl LocationCounterState { fn visit(v: &mut impl MutVisitor, file: &File) { v.enter_file(file); - for program in file.programs.iter() { - visit_program(v, program); + for region in file.regions.iter() { + visit_region(v, region); } v.exit_file(file); } -fn visit_program(v: &mut impl MutVisitor, program: &WithErrData) { - let (program_res, span) = program; - match program_res { - Err(_) => { v.enter_program_error(span); } - Ok(p) => { - v.enter_program( p, span); +fn visit_region(v: &mut impl MutVisitor, region: &WithErrData) { + let (region_res, span) = region; + match region_res { + Err(_) => { v.enter_region_error(span); } + Ok(r) => { + v.enter_region(r, span); let mut location_counter = LocationCounter::new(); - let Program { orig, instructions } = p; + let Region { orig, instructions } = r; visit_orig(v, orig, &mut location_counter); for instruction in instructions { visit_instruction(v, instruction, &mut location_counter); } - v.exit_program(p, span, &mut location_counter); + v.exit_region(r, span, &mut location_counter); } } } @@ -898,9 +898,9 @@ trait MutVisitor { fn enter_file(&mut self, _file: &File) {} fn exit_file(&mut self, _file: &File) {} - fn enter_program_error(&mut self, _span: &Span) {} - fn enter_program(&mut self, _program: &Program, _span: &Span) {} - fn exit_program(&mut self, _program: &Program, _span: &Span, _location: &LocationCounter) {} + fn enter_region_error(&mut self, _span: &Span) {} + fn enter_region(&mut self, _region: &Region, _span: &Span) {} + fn exit_region(&mut self, _region: &Region, _span: &Span, _location: &LocationCounter) {} fn enter_orig_error(&mut self, _span: &Span) {} fn enter_orig(&mut self, _orig: &Vec>, _span: &Span, _location: &LocationCounter) {} diff --git a/assembler/src/assembler.rs b/assembler/src/assembler.rs index 685b5fe..78356a5 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assembler.rs @@ -6,7 +6,7 @@ use lc3_isa::{Addr, Reg, SignedWord, Word}; use crate::lexer::{ConditionCodes, LiteralValue, Opcode}; use crate::parser::{Operand, result, try_map, try_result}; use crate::parser; -use crate::parser::{Program, WithErrData}; +use crate::parser::WithErrData; pub(crate) type SymbolTable = HashMap; @@ -515,9 +515,9 @@ pub(crate) fn get_orig(orig_operands: WithErrData>>) -> pub fn assemble(file: parser::File) -> Result { let region_data = - file.programs.into_iter() + file.regions.into_iter() .map(|p| { - let parser::Program { orig, instructions } = result(p)?; + let parser::Region { orig, instructions } = result(p)?; let origin = get_orig(orig)?; Ok((origin, instructions)) }) diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index a8ad3cd..9ca7a57 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -38,7 +38,7 @@ pub(crate) fn try_map(maybe_v: Option>) -> Result } #[derive(Debug, Eq, PartialEq)] -pub struct Program { +pub struct Region { pub(crate) orig: WithErrData>>, pub(crate) instructions: Vec>, } @@ -191,7 +191,7 @@ fn everything_until_orig() -> Repeated>> { none_of(Token::Opcode(Opcode::Orig)).repeated() } -fn program(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { +fn region(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { let orig = just(Token::Opcode(Opcode::Orig)) .ignore_then(operands(leniency)); @@ -205,7 +205,7 @@ fn program(leniency: LeniencyLevel) -> impl Parser, ) .then_ignore(just::<_, Token, _>(Token::End)) .map_with_span(|(orig, instructions), span| { - (Ok(Program { orig, instructions }), span) + (Ok(Region { orig, instructions }), span) }) // Pseudo-recovery strategy -- take everything until next .ORIG .or(any().then(everything_until_orig()) @@ -215,20 +215,20 @@ fn program(leniency: LeniencyLevel) -> impl Parser, #[derive(Debug)] pub struct File { pub(crate) before_first_orig: Spanned>, // TODO: check that this only contains newlines and comments (at least if strict) - pub programs: Vec> + pub regions: Vec> } fn file(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { everything_until_orig() .map_with_span(|toks, span| (toks, span)) .then( - program(leniency) + region(leniency) .separated_by(everything_until_orig()) .allow_trailing() ) .then_ignore(end()) - .map_with_span(|(before_first_orig, programs), span| - (File { before_first_orig, programs }, span)) + .map_with_span(|(before_first_orig, regions), span| + (File { before_first_orig, regions }, span)) } pub fn parse(src: &str, tokens: Vec>, leniency: LeniencyLevel) -> Result, Vec>> { @@ -267,13 +267,13 @@ mod tests { let f = file.0; assert_eq!((vec![], 0..5), f.before_first_orig); // TODO: probably doesn't need fixing, but span should probably be 0..0; find source of bug - assert_eq!(vec![(Ok(Program { + assert_eq!(vec![(Ok(Region { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), instructions: vec![ (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Ok(Register(R0)), 24..26)]), 16..26) }), 12..26) ], }), 0..31)], - f.programs); + f.regions); } #[test] @@ -282,13 +282,13 @@ mod tests { let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); let file = parse(source, tokens, LeniencyLevel::Lenient).unwrap(); - assert_eq!(vec![(Ok(Program { + assert_eq!(vec![(Ok(Region { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), instructions: vec![ (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Err(()), 24..29)]), 16..29) }), 12..29) ], }), 0..44)], - file.0.programs); + file.0.regions); } #[test] @@ -297,13 +297,13 @@ mod tests { let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); let file = parse(source, tokens, LeniencyLevel::Lenient).unwrap(); - assert_eq!(vec![(Ok(Program { + assert_eq!(vec![(Ok(Region { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), instructions: vec![ (Ok(Instruction { label: Some((Err(()), 12..18)), opcode: (Ok(Add), 19..22), operands: (Ok(vec![(Ok(Register(R0)), 23..25), (Ok(Register(R0)), 27..29), (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33)]), 23..33) }), 12..33) ], }), 0..48)], - file.0.programs); + file.0.regions); } macro_rules! parse { diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index eb6f201..71b591a 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -520,7 +520,7 @@ mod error { ADD R0, R0, R0" => { - SingleError::BadProgram, + SingleError::BadRegion, SingleError::NoEnd }, two_operand_type_mismatches: @@ -564,13 +564,13 @@ mod error { BR TOO_FAR ; Label too distant for offset to fit \n\ .END \n\ \n\ - .ORIG x4000 ; Bad program (missing .END) \n\ + .ORIG x4000 ; Bad region (missing .END) \n\ " => { SingleError::BadOperand, SingleError::BadInstruction, - SingleError::BadProgram, + SingleError::BadRegion, SingleError::DuplicateLabel { .. }, SingleError::WrongNumberOfOperands { expected: 3, actual: 1 }, SingleError::RegionsOverlap { .. }, From 3779e579f3455230d2147b1d6e8dda5402f5e913 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 18 Jun 2022 03:34:07 -0500 Subject: [PATCH 61/82] assembler: move error data and functions to new modules --- assembler/bin/as.rs | 2 +- assembler/src/analysis.rs | 350 ++--------------------------------- assembler/src/error.rs | 371 ++++++++++++++++++++++++++++++++++++++ assembler/src/lib.rs | 51 +----- assembler/src/linker.rs | 9 +- assembler/src/util.rs | 17 ++ assembler/tests/integ.rs | 6 +- 7 files changed, 416 insertions(+), 390 deletions(-) create mode 100644 assembler/src/error.rs create mode 100644 assembler/src/util.rs diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 621bf5c..d4e04ed 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -99,7 +99,7 @@ fn as_() -> Result<(), Error> { } } -fn print_errors(error: lc3_assembler::analysis::Error, src: &String) -> Result<(), Error> { +fn print_errors(error: lc3_assembler::error::Error, src: &String) -> Result<(), Error> { let print_results = error.report().into_iter() .map(|report| report.eprint(Source::from(src))) diff --git a/assembler/src/analysis.rs b/assembler/src/analysis.rs index aa1e2e0..74f4e86 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analysis.rs @@ -9,332 +9,16 @@ use ariadne::{Label, Report, ReportBuilder, ReportKind}; use lc3_isa::{Addr, SignedWord, Word}; use crate::lexer::{LexData, LiteralValue, Opcode}; use crate::parser::{File, get, get_result, Instruction, Operand, Region, result, WithErrData}; -use crate::{Span, Spanned}; - -type ErrorList = Vec; - -use Error::*; -#[derive(Debug)] -pub enum Error { - Single(SingleError), - Spanned(Span, SingleError), - Multiple(Vec), -} - -impl Error { - pub fn report(self) -> Vec { - match self { - Single(error) => vec![report_single(error).finish()], - Spanned(span, error) => vec![ - report_single(error) - .with_label(Label::new(span).with_message("here")) - .finish() - ], - Multiple(errors) => - errors.into_iter() - .flat_map(|e| e.report()) - .collect() - } - } -} - -use SingleError::*; -#[derive(Debug)] -pub enum SingleError { - Io(std::io::Error), - Lex(chumsky::error::Simple), - Parse(chumsky::error::Simple), - Assemble, - Link, - - BadRegion, - BadInstruction, - BadLabel, - BadOpcode, - BadOperands, - BadOperand, - WrongNumberOfOperands { expected: usize, actual: usize }, - OperandTypeMismatch { expected: OperandType, actual: OperandType }, - DuplicateLabel { label: String, occurrences: Vec, }, - InvalidLabelReference { label: String, reason: InvalidReferenceReason }, - LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, - RegionsOverlap { placement1: RegionPlacement, placement2: RegionPlacement }, - NoTokens, - NoOrig, - NoEnd, -} - -#[derive(Debug)] -pub enum InvalidReferenceReason { - Undefined, - Duplicated, - OutOfBounds, -} - -impl SingleError { - fn regions_overlap(p1: RegionPlacement, p2: RegionPlacement) -> Self { - let (placement1, placement2) = - if p1.span_in_memory.start <= p2.span_in_memory.start { - (p1, p2) - } else { - (p2, p1) - }; - RegionsOverlap { placement1, placement2 } - } - - fn message(&self) -> String { - match self { - BadRegion => String::from("invalid region"), - BadInstruction => String::from("invalid instruction"), - BadLabel => String::from("invalid label"), - BadOpcode => String::from("invalid opcode"), - BadOperands => String::from("invalid operand list"), - BadOperand => String::from("invalid operand"), - WrongNumberOfOperands { expected, actual } => - format!("wrong number of operands; expected {}, found: {}", expected, actual), - OperandTypeMismatch { expected, actual } => - format!("wrong operand type; expected {}, found: {}", expected, actual), - DuplicateLabel { label, .. } => - format!("same label used for multiple locations: {}", label), - InvalidLabelReference { label, reason } => { - let reason_str = match reason { - InvalidReferenceReason::Undefined => "not previously defined", - InvalidReferenceReason::Duplicated => "defined in multiple locations", - InvalidReferenceReason::OutOfBounds => "defined at invalid address", - }; - format!("reference to label {} invalid: {}", label, reason_str) - } - LabelTooDistant { label, width, est_ref_pos, est_label_pos, offset } => { - format!("label {} at {:#0label_pos_width$X} referenced at {:#0ref_pos_width$X}; too distant, cannot represent offset of {} in available bits: {}", - label, est_label_pos, est_ref_pos, offset, width, - // TODO: Rust '#X' formatter automatically fixes width to multiple of 4... find or implement workaround to control sign-extension; for example, for 9-bit signed offsets, we would want to display 0x2FF, not 0xFEFF. Showing as decimal for now. - label_pos_width = max(4, min_signed_hex_digits_required(*est_ref_pos) as usize), - ref_pos_width = max(4, min_signed_hex_digits_required(*est_label_pos) as usize),) - } - RegionsOverlap { placement1, placement2 } => { - format!("region {} in file occupying [{:#0o1s_width$X}, {:#0o1e_width$X}) overlaps region {} occupying [{:#0o2s_width$X}, {:#0o2e_width$X})", - placement1.position_in_file, - placement1.span_in_memory.start, - placement1.span_in_memory.end, - placement2.position_in_file, - placement2.span_in_memory.start, - placement2.span_in_memory.end, - o1s_width = max(4, min_signed_hex_digits_required(placement1.span_in_memory.start) as usize), - o1e_width = max(4, min_signed_hex_digits_required(placement1.span_in_memory.end) as usize), - o2s_width = max(4, min_signed_hex_digits_required(placement2.span_in_memory.start) as usize), - o2e_width = max(4, min_signed_hex_digits_required(placement2.span_in_memory.end) as usize), - ) - } - NoTokens => "no LC-3 assembly in file".to_string(), - NoOrig => "no .ORIG pseudo-op in file".to_string(), - NoEnd => "no .END pseudo-op in file".to_string(), - Io(ioe) => ioe.to_string(), - Lex(le) => le.to_string(), - Parse(pe) => pe.to_string(), - Assemble => "unexpected assembly error".to_string(), - Link => "unexpected link error".to_string(), - } - } -} - -fn min_signed_hex_digits_required(n: i32) -> u8 { - let bin_digits = min_signed_width(n); - let extra = if bin_digits % 4 == 0 { 0 } else { 1 }; - bin_digits / 4 + extra -} - - -fn report_single(error: SingleError) -> ReportBuilder { - let mut r = Report::build(ReportKind::Error, (), 0) - .with_message(error.message()); - match error { - DuplicateLabel { occurrences, .. } => { - let mut first_declaration_labeled = false; - for occurrence in occurrences { - let label_message = if !first_declaration_labeled { - first_declaration_labeled = true; - "first used here" - } else { - "also used here" - }; - r = r.with_label(Label::new(occurrence).with_message(label_message)) - } - } - RegionsOverlap { placement1, placement2 } => { - let (first, first_pos_text, second, second_pos_text) = - if placement1.position_in_file < placement2.position_in_file { - (placement1, "end", placement2, "start") - } else { - (placement2, "start", placement1, "end") - }; - r = r.with_label(Label::new(first.span_in_file) - .with_message(format!("{} of this object overlaps the other", first_pos_text))) - .with_label(Label::new(second.span_in_file) - .with_message(format!("{} of this object overlaps the other", second_pos_text))); - } - _ => {} - } - r -} - - -use OperandType::*; +use crate::{Span, Spanned, util}; use crate::assembler::{calculate_offset, get_orig}; - -#[derive(Clone, Debug)] -pub enum OperandType { - Register, - UnqualifiedNumber, - Number { signed: bool, width: u8 }, - String, - Label, - Or(Box, Box) -} - -impl Display for OperandType { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Register => write!(f, "Register"), - UnqualifiedNumber => write!(f, "Unqualified Number"), - Number { signed, width } => write!(f, "Number ({}-bit, {})", width, (if *signed { "signed" } else { "unsigned" })), - String => write!(f, "String"), - Label => write!(f, "Label"), - Or(t1, t2) => write!(f, "{} or {}", t1, t2), - } - } -} - -pub(crate) enum AcceptedNumberSigns { - Signed, - Unsigned, - None, - Any -} - -impl AcceptedNumberSigns { - pub(crate) fn or(&self, other: &Self) -> Self { - use AcceptedNumberSigns::*; - match (self, other) { - (Unsigned, Signed) - | (Signed, Unsigned) - | (Any, _) - | (_, Any) => Any, - (Signed, _) - | (_, Signed) => Signed, - (Unsigned, _) - | (_, Unsigned) => Unsigned, - (None, None) => None - } - } -} - -impl OperandType { - pub(crate) fn accepted_number_signs(&self) -> AcceptedNumberSigns { - use AcceptedNumberSigns::*; - - match self { - Number { signed, .. } => if *signed { Signed } else { Unsigned }, - Or(t1, t2) => t1.accepted_number_signs().or(&t2.accepted_number_signs()), - _ => None - } - } - pub(crate) fn signed_or_unsigned_number(width: u8) -> Self { - Or(Box::new(Number { signed: false, width }), - Box::new(Number { signed: true, width })) - } - - pub(crate) fn reg_or_imm5() -> Self { - Or(Box::new(Register), Box::new(Number { signed: true, width: 5 })) - } - - pub(crate) fn pc_offset(width: u8) -> Self { - Or(Box::new(Label), Box::new(Number { signed: true, width })) - } - - pub(crate) fn check(&self, operand: &Operand) -> bool { - match self { - Register => matches!(operand, Operand::Register(_)), - UnqualifiedNumber => matches!(operand, Operand::UnqualifiedNumberLiteral(_)), - Number { signed: expected_signed, width: expected_width } => { - if let Number { signed, width } = OperandType::of(operand) { - match (signed, expected_signed) { - (true, false) => { - if let Operand::NumberLiteral(LiteralValue::SignedWord(sw)) = operand { - *sw >= 0 && (width - 1) <= *expected_width - } else { - // TODO: find way to couple OperandType::of and value extraction to avoid this case - unreachable!("Detected operand as signed type but could not extract signed value"); - } - } - (false, true) => width <= (expected_width - 1), - _ => width <= *expected_width - } - - } else { - false - } - } - String => matches!(operand, Operand::StringLiteral(_)), - Label => matches!(operand, Operand::Label(_)), - Or(t1, t2) => t1.check(operand) || t2.check(operand), - } - } - - pub(crate) fn of(operand: &Operand) -> Self { - match operand { - Operand::Register(_) => Register, - Operand::UnqualifiedNumberLiteral(_) => UnqualifiedNumber, - Operand::NumberLiteral(lv) => OperandType::of_number_literal(lv, None), - Operand::StringLiteral(_) => String, - Operand::Label(_) => Label, - } - } - - pub(crate) fn of_number_literal(literal_value: &LiteralValue, interpret_as: Option) -> Self { - use AcceptedNumberSigns::*; - - let value = - match literal_value { - LiteralValue::Word(value) => *value as i32, - LiteralValue::SignedWord(value) => *value as i32, - }; - let unsigned_interpretation = Number { signed: false, width: min_unsigned_width(value) }; - let signed_interpretation = Number { signed: true, width: min_signed_width(value) }; - match interpret_as { - Option::None | Some(None) => match literal_value { - LiteralValue::Word(_) => unsigned_interpretation, - LiteralValue::SignedWord(_) => signed_interpretation, - } - Some(Signed) => signed_interpretation, - Some(Unsigned) => unsigned_interpretation, - Some(Any) => Or(Box::new(signed_interpretation), - Box::new(unsigned_interpretation)), - } - } -} - -fn min_signed_width(n: i32) -> u8 { - let mut width = 1; - const BASE: i32 = 2; - while n < -BASE.pow(width - 1) || n >= BASE.pow(width - 1) { - width += 1; - } - width as u8 -} - -fn min_unsigned_width(n: i32) -> u8 { - let mut width = 1; - const BASE: i32 = 2; - while n >= BASE.pow(width) { - width += 1; - } - width as u8 -} - +use crate::error::{Error, InvalidReferenceReason, OperandType, RegionPlacement, RoughAddr, SingleError}; +use crate::error::OperandType::*; +use crate::error::Error::*; +use crate::error::SingleError::*; #[derive(Default)] struct ParseErrorsAnalysis { - errors: ErrorList + errors: Vec } impl ParseErrorsAnalysis { @@ -374,7 +58,7 @@ impl MutVisitor for ParseErrorsAnalysis { #[derive(Default)] struct DuplicateLabelsAnalysis { - errors: ErrorList, + errors: Vec, labels: HashMap>, } @@ -405,9 +89,6 @@ impl MutVisitor for DuplicateLabelsAnalysis { } } - -type RoughAddr = i32; - #[derive(Debug)] enum InvalidSymbolError { InvalidOrig { estimated_addr: RoughAddr }, @@ -498,7 +179,7 @@ struct ExpectedLabel { } struct LabelOffsetBoundsAnalysis<'a> { - errors: ErrorList, + errors: Vec, symbol_table: &'a SymbolTable, expected_label: Option } @@ -524,7 +205,7 @@ impl<'a> LabelOffsetBoundsAnalysis<'a> { })); } Ok(offset) => { - if min_signed_width(offset as i32) > width { + if util::min_signed_width(offset as i32) > width { self.errors.push( Spanned(span.clone(), LabelTooDistant { @@ -607,7 +288,7 @@ impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { #[derive(Default)] struct OperandTypesAnalysis { - errors: ErrorList, + errors: Vec, expected_operands: Option> } @@ -688,19 +369,12 @@ impl MutVisitor for OperandTypesAnalysis { struct ObjectPlacementAnalysis { - errors: ErrorList, + errors: Vec, last_start: RoughAddr, object_index: usize, object_spans: Vec, } -#[derive(Clone, Debug)] -pub struct RegionPlacement { - position_in_file: usize, - span_in_file: Span, - span_in_memory: Range, -} - impl ObjectPlacementAnalysis { fn new() -> Self { Self { @@ -924,7 +598,7 @@ trait MutVisitor { } -fn analyze_lex_data(lex_data: &LexData, file_span: &Span) -> ErrorList { +fn analyze_lex_data(lex_data: &LexData, file_span: &Span) -> Vec { let mut errors = Vec::new(); if lex_data.no_tokens { errors.push(Single(NoTokens)) diff --git a/assembler/src/error.rs b/assembler/src/error.rs new file mode 100644 index 0000000..8bd0bd6 --- /dev/null +++ b/assembler/src/error.rs @@ -0,0 +1,371 @@ +use ariadne::{Label, Report, ReportBuilder, ReportKind}; +use std::cmp::max; +use lc3_isa::SignedWord; +use std::fmt::{Display, Formatter}; +use crate::{analysis, Span, util}; +use crate::lexer; +use crate::lexer::LiteralValue; +use crate::parser::Operand; +use std::ops::Range; + + +impl From for SingleError { + fn from(error: std::io::Error) -> Self { + Io(error) + } +} + +impl From> for Error + where E: Into +{ + fn from(errors: Vec) -> Self { + let es = errors.into_iter() + .map(|e| e.into()) + .collect(); + Error::Multiple(es) + } +} + +impl From for Error + where E: Into +{ + fn from(error: E) -> Self { + Error::Single(error.into()) + } +} + +impl From> for SingleError { + fn from(error: chumsky::error::Simple) -> Self { + Lex(error) + } +} + +impl From> for SingleError { + fn from(error: chumsky::error::Simple) -> Self { + Parse(error) + } +} + +#[derive(Debug)] +pub enum Error { + Single(SingleError), + Spanned(Span, SingleError), + Multiple(Vec), +} + +impl Error { + pub fn report(self) -> Vec { + use Error::*; + match self { + Single(error) => vec![report_single(error).finish()], + Spanned(span, error) => vec![ + report_single(error) + .with_label(Label::new(span).with_message("here")) + .finish() + ], + Multiple(errors) => + errors.into_iter() + .flat_map(|e| e.report()) + .collect() + } + } +} + +pub(crate) type RoughAddr = i32; + +use SingleError::*; + +#[derive(Debug)] +pub enum SingleError { + Io(std::io::Error), + Lex(chumsky::error::Simple), + Parse(chumsky::error::Simple), + Assemble, + Link, + + BadRegion, + BadInstruction, + BadLabel, + BadOpcode, + BadOperands, + BadOperand, + WrongNumberOfOperands { expected: usize, actual: usize }, + OperandTypeMismatch { expected: OperandType, actual: OperandType }, + DuplicateLabel { label: String, occurrences: Vec, }, + InvalidLabelReference { label: String, reason: InvalidReferenceReason }, + LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, + RegionsOverlap { placement1: RegionPlacement, placement2: RegionPlacement }, + NoTokens, + NoOrig, + NoEnd, +} + +#[derive(Debug)] +pub enum InvalidReferenceReason { + Undefined, + Duplicated, + OutOfBounds, +} + +impl SingleError { + pub(crate) fn regions_overlap(p1: RegionPlacement, p2: RegionPlacement) -> Self { + let (placement1, placement2) = + if p1.span_in_memory.start <= p2.span_in_memory.start { + (p1, p2) + } else { + (p2, p1) + }; + RegionsOverlap { placement1, placement2 } + } + + fn message(&self) -> String { + match self { + BadRegion => String::from("invalid region"), + BadInstruction => String::from("invalid instruction"), + BadLabel => String::from("invalid label"), + BadOpcode => String::from("invalid opcode"), + BadOperands => String::from("invalid operand list"), + BadOperand => String::from("invalid operand"), + WrongNumberOfOperands { expected, actual } => + format!("wrong number of operands; expected {}, found: {}", expected, actual), + OperandTypeMismatch { expected, actual } => + format!("wrong operand type; expected {}, found: {}", expected, actual), + DuplicateLabel { label, .. } => + format!("same label used for multiple locations: {}", label), + InvalidLabelReference { label, reason } => { + let reason_str = match reason { + InvalidReferenceReason::Undefined => "not previously defined", + InvalidReferenceReason::Duplicated => "defined in multiple locations", + InvalidReferenceReason::OutOfBounds => "defined at invalid address", + }; + format!("reference to label {} invalid: {}", label, reason_str) + } + LabelTooDistant { label, width, est_ref_pos, est_label_pos, offset } => { + format!("label {} at {:#0label_pos_width$X} referenced at {:#0ref_pos_width$X}; too distant, cannot represent offset of {} in available bits: {}", + label, est_label_pos, est_ref_pos, offset, width, + // TODO: Rust '#X' formatter automatically fixes width to multiple of 4... find or implement workaround to control sign-extension; for example, for 9-bit signed offsets, we would want to display 0x2FF, not 0xFEFF. Showing as decimal for now. + label_pos_width = max(4, min_signed_hex_digits_required(*est_ref_pos) as usize), + ref_pos_width = max(4, min_signed_hex_digits_required(*est_label_pos) as usize),) + } + RegionsOverlap { placement1, placement2 } => { + format!("region {} in file occupying [{:#0o1s_width$X}, {:#0o1e_width$X}) overlaps region {} occupying [{:#0o2s_width$X}, {:#0o2e_width$X})", + placement1.position_in_file, + placement1.span_in_memory.start, + placement1.span_in_memory.end, + placement2.position_in_file, + placement2.span_in_memory.start, + placement2.span_in_memory.end, + o1s_width = max(4, min_signed_hex_digits_required(placement1.span_in_memory.start) as usize), + o1e_width = max(4, min_signed_hex_digits_required(placement1.span_in_memory.end) as usize), + o2s_width = max(4, min_signed_hex_digits_required(placement2.span_in_memory.start) as usize), + o2e_width = max(4, min_signed_hex_digits_required(placement2.span_in_memory.end) as usize), + ) + } + NoTokens => "no LC-3 assembly in file".to_string(), + NoOrig => "no .ORIG pseudo-op in file".to_string(), + NoEnd => "no .END pseudo-op in file".to_string(), + Io(ioe) => ioe.to_string(), + Lex(le) => le.to_string(), + Parse(pe) => pe.to_string(), + Assemble => "unexpected assembly error".to_string(), + Link => "unexpected link error".to_string(), + } + } +} + +fn min_signed_hex_digits_required(n: i32) -> u8 { + let bin_digits = util::min_signed_width(n); + let extra = if bin_digits % 4 == 0 { 0 } else { 1 }; + bin_digits / 4 + extra +} + + +fn report_single(error: SingleError) -> ReportBuilder { + let mut r = Report::build(ReportKind::Error, (), 0) + .with_message(error.message()); + match error { + DuplicateLabel { occurrences, .. } => { + let mut first_declaration_labeled = false; + for occurrence in occurrences { + let label_message = if !first_declaration_labeled { + first_declaration_labeled = true; + "first used here" + } else { + "also used here" + }; + r = r.with_label(Label::new(occurrence).with_message(label_message)) + } + } + RegionsOverlap { placement1, placement2 } => { + let (first, first_pos_text, second, second_pos_text) = + if placement1.position_in_file < placement2.position_in_file { + (placement1, "end", placement2, "start") + } else { + (placement2, "start", placement1, "end") + }; + r = r.with_label(Label::new(first.span_in_file) + .with_message(format!("{} of this object overlaps the other", first_pos_text))) + .with_label(Label::new(second.span_in_file) + .with_message(format!("{} of this object overlaps the other", second_pos_text))); + } + _ => {} + } + r +} + + +#[derive(Clone, Debug)] +pub enum OperandType { + Register, + UnqualifiedNumber, + Number { signed: bool, width: u8 }, + String, + Label, + Or(Box, Box) +} + +impl Display for OperandType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + use OperandType::*; + + match self { + Register => write!(f, "Register"), + UnqualifiedNumber => write!(f, "Unqualified Number"), + Number { signed, width } => write!(f, "Number ({}-bit, {})", width, (if *signed { "signed" } else { "unsigned" })), + String => write!(f, "String"), + Label => write!(f, "Label"), + Or(t1, t2) => write!(f, "{} or {}", t1, t2), + } + } +} + +pub(crate) enum AcceptedNumberSigns { + Signed, + Unsigned, + None, + Any +} + +impl AcceptedNumberSigns { + pub(crate) fn or(&self, other: &Self) -> Self { + use AcceptedNumberSigns::*; + + match (self, other) { + (Unsigned, Signed) + | (Signed, Unsigned) + | (Any, _) + | (_, Any) => Any, + (Signed, _) + | (_, Signed) => Signed, + (Unsigned, _) + | (_, Unsigned) => Unsigned, + (None, None) => None + } + } +} + +impl OperandType { + + pub(crate) fn accepted_number_signs(&self) -> AcceptedNumberSigns { + use AcceptedNumberSigns::*; + use OperandType::*; + + match self { + Number { signed, .. } => if *signed { Signed } else { Unsigned }, + Or(t1, t2) => t1.accepted_number_signs().or(&t2.accepted_number_signs()), + _ => None + } + } + pub(crate) fn signed_or_unsigned_number(width: u8) -> Self { + use OperandType::*; + + Or(Box::new(Number { signed: false, width }), + Box::new(Number { signed: true, width })) + } + + pub(crate) fn reg_or_imm5() -> Self { + use OperandType::*; + + Or(Box::new(Register), Box::new(Number { signed: true, width: 5 })) + } + + pub(crate) fn pc_offset(width: u8) -> Self { + use OperandType::*; + + Or(Box::new(Label), Box::new(Number { signed: true, width })) + } + + pub(crate) fn check(&self, operand: &Operand) -> bool { + use OperandType::*; + + match self { + Register => matches!(operand, Operand::Register(_)), + UnqualifiedNumber => matches!(operand, Operand::UnqualifiedNumberLiteral(_)), + Number { signed: expected_signed, width: expected_width } => { + if let Number { signed, width } = OperandType::of(operand) { + match (signed, expected_signed) { + (true, false) => { + if let Operand::NumberLiteral(LiteralValue::SignedWord(sw)) = operand { + *sw >= 0 && (width - 1) <= *expected_width + } else { + // TODO: find way to couple OperandType::of and value extraction to avoid this case + unreachable!("Detected operand as signed type but could not extract signed value"); + } + } + (false, true) => width <= (expected_width - 1), + _ => width <= *expected_width + } + + } else { + false + } + } + String => matches!(operand, Operand::StringLiteral(_)), + Label => matches!(operand, Operand::Label(_)), + Or(t1, t2) => t1.check(operand) || t2.check(operand), + } + } + + pub(crate) fn of(operand: &Operand) -> Self { + use OperandType::*; + + match operand { + Operand::Register(_) => Register, + Operand::UnqualifiedNumberLiteral(_) => UnqualifiedNumber, + Operand::NumberLiteral(lv) => OperandType::of_number_literal(lv, None), + Operand::StringLiteral(_) => String, + Operand::Label(_) => Label, + } + } + + pub(crate) fn of_number_literal(literal_value: &LiteralValue, interpret_as: Option) -> Self { + use AcceptedNumberSigns::*; + use OperandType::*; + + let value = + match literal_value { + LiteralValue::Word(value) => *value as i32, + LiteralValue::SignedWord(value) => *value as i32, + }; + let unsigned_interpretation = Number { signed: false, width: util::min_unsigned_width(value) }; + let signed_interpretation = Number { signed: true, width: util::min_signed_width(value) }; + match interpret_as { + Option::None | Some(None) => match literal_value { + LiteralValue::Word(_) => unsigned_interpretation, + LiteralValue::SignedWord(_) => signed_interpretation, + } + Some(Signed) => signed_interpretation, + Some(Unsigned) => unsigned_interpretation, + Some(Any) => Or(Box::new(signed_interpretation), + Box::new(unsigned_interpretation)), + } + } +} + +#[derive(Clone, Debug)] +pub struct RegionPlacement { + pub(crate) position_in_file: usize, + pub(crate) span_in_file: Span, + pub(crate) span_in_memory: Range, +} + diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 03b83b5..00f373b 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -13,6 +13,8 @@ pub mod parser; pub mod assembler; pub mod linker; pub mod analysis; +pub mod error; +mod util; type Span = std::ops::Range; type Spanned = (T, Span); @@ -23,51 +25,12 @@ pub enum LeniencyLevel { Strict } -const MEM_DUMP_FILE_EXTENSION: &'static str = "mem"; - -impl From for analysis::SingleError { - fn from(error: std::io::Error) -> Self { - analysis::SingleError::Io(error) - } -} - -impl From> for analysis::Error - where E: Into -{ - fn from(errors: Vec) -> Self { - let es = errors.into_iter() - .map(|e| e.into()) - .collect(); - analysis::Error::Multiple(es) - } -} - -impl From for analysis::Error - where E: Into -{ - fn from(error: E) -> Self { - analysis::Error::Single(error.into()) - } -} - -impl From> for analysis::SingleError { - fn from(error: chumsky::error::Simple) -> Self { - analysis::SingleError::Lex(error) - } -} - -impl From> for analysis::SingleError { - fn from(error: chumsky::error::Simple) -> Self { - analysis::SingleError::Parse(error) - } -} - -pub fn parse_and_analyze_file(input: PathBuf, leniency: LeniencyLevel) -> Result { +pub fn parse_and_analyze_file(input: PathBuf, leniency: LeniencyLevel) -> Result { let src = fs::read_to_string(input)?; parse_and_analyze(&src, leniency) } -pub fn parse_and_analyze(src: &String, leniency: LeniencyLevel) -> Result { +pub fn parse_and_analyze(src: &String, leniency: LeniencyLevel) -> Result { let (tokens, lex_data) = lexer::lex(src, leniency)?; let file_spanned = parser::parse(src, tokens, leniency)?; let errors = analysis::validate(&lex_data, &file_spanned); @@ -78,14 +41,14 @@ pub fn parse_and_analyze(src: &String, leniency: LeniencyLevel) -> Result Result { +pub fn assemble_file(input: PathBuf, leniency: LeniencyLevel, no_os: bool) -> Result { let src = fs::read_to_string(input)?; assemble(&src, leniency, no_os) } -pub fn assemble(src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { +pub fn assemble(src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { let file = parse_and_analyze(src, leniency)?; - let object = assembler::assemble(file).map_err(|_| analysis::SingleError::Assemble)?; + let object = assembler::assemble(file).map_err(|_| error::SingleError::Assemble)?; let mem = linker::link([object], !no_os)?; Ok(mem) } diff --git a/assembler/src/linker.rs b/assembler/src/linker.rs index d4448f9..445c937 100644 --- a/assembler/src/linker.rs +++ b/assembler/src/linker.rs @@ -5,6 +5,7 @@ use chumsky::Parser; use lc3_isa::util::MemoryDump; use lc3_isa::{Addr, ADDR_SPACE_SIZE_IN_WORDS, Word}; use crate::assembler::{assemble_instruction, SymbolTable, Object, ObjectWord, AssemblyResult, Region}; +use crate::error::SingleError; struct LinkedRegion { origin: Addr, @@ -55,7 +56,7 @@ fn link_region(symbol_table: &SymbolTable, region: Region) -> Result, overlay_on_os: bool) -> Result { +pub fn link(objects: impl IntoIterator, overlay_on_os: bool) -> Result { let objects = objects.into_iter().collect::>(); let mut symbol_table = HashMap::new(); @@ -67,8 +68,8 @@ pub fn link(objects: impl IntoIterator, overlay_on_os: bool) -> Res let mut image = if overlay_on_os { - let first_object = objects.get(0).ok_or(crate::analysis::SingleError::Link)?; - let first_region = first_object.regions.get(0).ok_or(crate::analysis::SingleError::Link)?; + let first_object = objects.get(0).ok_or(SingleError::Link)?; + let first_region = first_object.regions.get(0).ok_or(SingleError::Link)?; let mut os = lc3_os::OS_IMAGE.clone().0; os[lc3_os::USER_PROG_START_ADDR as usize] = first_region.origin; @@ -79,7 +80,7 @@ pub fn link(objects: impl IntoIterator, overlay_on_os: bool) -> Res }; for object in objects { for region in object.regions { - let linked_region = link_region(&symbol_table, region).map_err(|_| crate::analysis::SingleError::Link)?; + let linked_region = link_region(&symbol_table, region).map_err(|_| SingleError::Link)?; layer_region(&mut image, linked_region); } } diff --git a/assembler/src/util.rs b/assembler/src/util.rs new file mode 100644 index 0000000..67639ee --- /dev/null +++ b/assembler/src/util.rs @@ -0,0 +1,17 @@ +pub(crate) fn min_signed_width(n: i32) -> u8 { + let mut width = 1; + const BASE: i32 = 2; + while n < -BASE.pow(width - 1) || n >= BASE.pow(width - 1) { + width += 1; + } + width as u8 +} + +pub(crate) fn min_unsigned_width(n: i32) -> u8 { + let mut width = 1; + const BASE: i32 = 2; + while n >= BASE.pow(width) { + width += 1; + } + width as u8 +} diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 71b591a..7a85ae2 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -3,8 +3,8 @@ extern crate lc3_assembler; use lc3_isa::{ADDR_MAX_VAL, Word}; use std::ops::Index; use lc3_isa::util::MemoryDump; -use lc3_assembler::{assembler, lexer, linker, parser, LeniencyLevel, parse_and_analyze, assemble}; -use lc3_assembler::analysis::Error; +use lc3_assembler::{assemble, assembler, LeniencyLevel, lexer, linker, parse_and_analyze, parser}; +use lc3_assembler::error::Error; #[test] fn load_store_medium() { @@ -320,7 +320,7 @@ fn assert_mem(mem: &MemoryDump, location: usize, expected: Word) { mod error { use assert_matches::assert_matches; - use lc3_assembler::analysis::{SingleError, OperandType, InvalidReferenceReason}; + use lc3_assembler::error::{InvalidReferenceReason, OperandType, SingleError}; use super::*; macro_rules! single_error_tests { From 9c8396f95bf3d57f726b9d8707b5dee9b6746363 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 18 Jun 2022 18:57:39 -0500 Subject: [PATCH 62/82] assembler: rename modules after core functions --- assembler/bin/as.rs | 2 +- assembler/src/{analysis.rs => analyze.rs} | 6 ++--- assembler/src/{assembler.rs => assemble.rs} | 18 ++++++------- assembler/src/error.rs | 14 +++++------ assembler/src/{lexer.rs => lex.rs} | 2 +- assembler/src/lib.rs | 28 +++++++++++---------- assembler/src/{linker.rs => link.rs} | 2 +- assembler/src/{parser.rs => parse.rs} | 4 +-- assembler/tests/integ.rs | 2 +- 9 files changed, 40 insertions(+), 38 deletions(-) rename assembler/src/{analysis.rs => analyze.rs} (99%) rename assembler/src/{assembler.rs => assemble.rs} (97%) rename assembler/src/{lexer.rs => lex.rs} (99%) rename assembler/src/{linker.rs => link.rs} (96%) rename assembler/src/{parser.rs => parse.rs} (99%) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index d4e04ed..b21725f 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -4,7 +4,7 @@ use std::{env, fs}; use std::path::{Path, PathBuf}; use std::process::exit; use ariadne::Source; -use lc3_assembler::parser::{File, parse}; +use lc3_assembler::parse::{File, parse}; use lc3_shims::memory::FileBackedMemoryShim; use clap::{Parser}; use lc3_isa::util::MemoryDump; diff --git a/assembler/src/analysis.rs b/assembler/src/analyze.rs similarity index 99% rename from assembler/src/analysis.rs rename to assembler/src/analyze.rs index 74f4e86..230b9c5 100644 --- a/assembler/src/analysis.rs +++ b/assembler/src/analyze.rs @@ -7,10 +7,10 @@ use std::string::String; use itertools::{concat, Itertools, zip}; use ariadne::{Label, Report, ReportBuilder, ReportKind}; use lc3_isa::{Addr, SignedWord, Word}; -use crate::lexer::{LexData, LiteralValue, Opcode}; -use crate::parser::{File, get, get_result, Instruction, Operand, Region, result, WithErrData}; +use crate::lex::{LexData, LiteralValue, Opcode}; +use crate::parse::{File, get, get_result, Instruction, Operand, Region, result, WithErrData}; use crate::{Span, Spanned, util}; -use crate::assembler::{calculate_offset, get_orig}; +use crate::assemble::{calculate_offset, get_orig}; use crate::error::{Error, InvalidReferenceReason, OperandType, RegionPlacement, RoughAddr, SingleError}; use crate::error::OperandType::*; use crate::error::Error::*; diff --git a/assembler/src/assembler.rs b/assembler/src/assemble.rs similarity index 97% rename from assembler/src/assembler.rs rename to assembler/src/assemble.rs index 78356a5..17f561d 100644 --- a/assembler/src/assembler.rs +++ b/assembler/src/assemble.rs @@ -3,10 +3,10 @@ use std::convert::{TryFrom, TryInto}; use std::fmt::Debug; use std::num::{ParseIntError, TryFromIntError}; use lc3_isa::{Addr, Reg, SignedWord, Word}; -use crate::lexer::{ConditionCodes, LiteralValue, Opcode}; -use crate::parser::{Operand, result, try_map, try_result}; -use crate::parser; -use crate::parser::WithErrData; +use crate::lex::{ConditionCodes, LiteralValue, Opcode}; +use crate::parse::{Operand, result, try_map, try_result}; +use crate::parse; +use crate::parse::WithErrData; pub(crate) type SymbolTable = HashMap; @@ -283,10 +283,10 @@ impl TryFrom<(WithErrData, WithErrData>>)> for } } -impl TryFrom for ObjectWord { +impl TryFrom for ObjectWord { type Error = (); - fn try_from(value: parser::Instruction) -> Result { + fn try_from(value: parse::Instruction) -> Result { (value.opcode, value.operands).try_into() .map(ObjectWord::UnlinkedInstruction) } @@ -452,7 +452,7 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: Ok(res) } -type ParserInstructions = Vec>; +type ParserInstructions = Vec>; fn first_pass(region_data: impl IntoIterator) -> Result<(Vec, SymbolTable), ()> { let mut fp_regions = Vec::new(); @@ -513,11 +513,11 @@ pub(crate) fn get_orig(orig_operands: WithErrData>>) -> result(orig_operand)?.try_into() } -pub fn assemble(file: parser::File) -> Result { +pub fn assemble(file: parse::File) -> Result { let region_data = file.regions.into_iter() .map(|p| { - let parser::Region { orig, instructions } = result(p)?; + let parse::Region { orig, instructions } = result(p)?; let origin = get_orig(orig)?; Ok((origin, instructions)) }) diff --git a/assembler/src/error.rs b/assembler/src/error.rs index 8bd0bd6..f8097be 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -2,10 +2,10 @@ use ariadne::{Label, Report, ReportBuilder, ReportKind}; use std::cmp::max; use lc3_isa::SignedWord; use std::fmt::{Display, Formatter}; -use crate::{analysis, Span, util}; -use crate::lexer; -use crate::lexer::LiteralValue; -use crate::parser::Operand; +use crate::{analyze, Span, util}; +use crate::lex; +use crate::lex::LiteralValue; +use crate::parse::Operand; use std::ops::Range; @@ -40,8 +40,8 @@ impl From> for SingleError { } } -impl From> for SingleError { - fn from(error: chumsky::error::Simple) -> Self { +impl From> for SingleError { + fn from(error: chumsky::error::Simple) -> Self { Parse(error) } } @@ -79,7 +79,7 @@ use SingleError::*; pub enum SingleError { Io(std::io::Error), Lex(chumsky::error::Simple), - Parse(chumsky::error::Simple), + Parse(chumsky::error::Simple), Assemble, Link, diff --git a/assembler/src/lexer.rs b/assembler/src/lex.rs similarity index 99% rename from assembler/src/lexer.rs rename to assembler/src/lex.rs index cecb531..01c3be5 100644 --- a/assembler/src/lexer.rs +++ b/assembler/src/lex.rs @@ -418,7 +418,7 @@ mod tests { use super::*; use Token::*; use Reg::*; - use crate::lexer::Opcode::*; + use crate::lex::Opcode::*; #[test] fn lone_error() { diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 00f373b..7a25c0c 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -8,14 +8,16 @@ extern crate core; use std::fs; use std::path::PathBuf; -pub mod lexer; -pub mod parser; -pub mod assembler; -pub mod linker; -pub mod analysis; -pub mod error; mod util; +pub mod error; + +pub mod lex; +pub mod parse; +pub mod analyze; +pub mod assemble; +pub mod link; + type Span = std::ops::Range; type Spanned = (T, Span); @@ -25,15 +27,15 @@ pub enum LeniencyLevel { Strict } -pub fn parse_and_analyze_file(input: PathBuf, leniency: LeniencyLevel) -> Result { +pub fn parse_and_analyze_file(input: PathBuf, leniency: LeniencyLevel) -> Result { let src = fs::read_to_string(input)?; parse_and_analyze(&src, leniency) } -pub fn parse_and_analyze(src: &String, leniency: LeniencyLevel) -> Result { - let (tokens, lex_data) = lexer::lex(src, leniency)?; - let file_spanned = parser::parse(src, tokens, leniency)?; - let errors = analysis::validate(&lex_data, &file_spanned); +pub fn parse_and_analyze(src: &String, leniency: LeniencyLevel) -> Result { + let (tokens, lex_data) = lex::lex(src, leniency)?; + let file_spanned = parse::parse(src, tokens, leniency)?; + let errors = analyze::validate(&lex_data, &file_spanned); if !errors.is_empty() { return Err(errors.into()); } @@ -48,7 +50,7 @@ pub fn assemble_file(input: PathBuf, leniency: LeniencyLevel, no_os: bool) -> Re pub fn assemble(src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { let file = parse_and_analyze(src, leniency)?; - let object = assembler::assemble(file).map_err(|_| error::SingleError::Assemble)?; - let mem = linker::link([object], !no_os)?; + let object = assemble::assemble(file).map_err(|_| error::SingleError::Assemble)?; + let mem = link::link([object], !no_os)?; Ok(mem) } diff --git a/assembler/src/linker.rs b/assembler/src/link.rs similarity index 96% rename from assembler/src/linker.rs rename to assembler/src/link.rs index 445c937..28e2b00 100644 --- a/assembler/src/linker.rs +++ b/assembler/src/link.rs @@ -4,7 +4,7 @@ use chumsky::chain::Chain; use chumsky::Parser; use lc3_isa::util::MemoryDump; use lc3_isa::{Addr, ADDR_SPACE_SIZE_IN_WORDS, Word}; -use crate::assembler::{assemble_instruction, SymbolTable, Object, ObjectWord, AssemblyResult, Region}; +use crate::assemble::{assemble_instruction, SymbolTable, Object, ObjectWord, AssemblyResult, Region}; use crate::error::SingleError; struct LinkedRegion { diff --git a/assembler/src/parser.rs b/assembler/src/parse.rs similarity index 99% rename from assembler/src/parser.rs rename to assembler/src/parse.rs index 9ca7a57..61d5371 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parse.rs @@ -8,7 +8,7 @@ use lc3_isa::{Reg, Word}; use crate::{Span, Spanned}; use crate::LeniencyLevel; -use crate::lexer::{LiteralValue, Opcode, Token}; +use crate::lex::{LiteralValue, Opcode, Token}; pub(crate) type WithErrData = Spanned>; @@ -247,7 +247,7 @@ mod tests { use super::Operand::*; use super::Reg::*; use super::Opcode::*; - use crate::lexer::lex; + use crate::lex::lex; #[test] fn capture_tokens_before_first_orig_separately() { diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 7a85ae2..6ed796b 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -3,7 +3,7 @@ extern crate lc3_assembler; use lc3_isa::{ADDR_MAX_VAL, Word}; use std::ops::Index; use lc3_isa::util::MemoryDump; -use lc3_assembler::{assemble, assembler, LeniencyLevel, lexer, linker, parse_and_analyze, parser}; +use lc3_assembler::{assemble, LeniencyLevel, lex, link, parse_and_analyze, parse}; use lc3_assembler::error::Error; #[test] From 86153ad6feb708255c3b20972b3538bef1eae89c Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 18 Jun 2022 20:27:43 -0500 Subject: [PATCH 63/82] assembler: split link and layer steps --- assembler/src/error.rs | 2 ++ assembler/src/layer.rs | 35 ++++++++++++++++++++++++ assembler/src/lib.rs | 6 ++-- assembler/src/link.rs | 62 ++++++++++++++++-------------------------- 4 files changed, 65 insertions(+), 40 deletions(-) create mode 100644 assembler/src/layer.rs diff --git a/assembler/src/error.rs b/assembler/src/error.rs index f8097be..ec99d3b 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -82,6 +82,7 @@ pub enum SingleError { Parse(chumsky::error::Simple), Assemble, Link, + Layer, BadRegion, BadInstruction, @@ -169,6 +170,7 @@ impl SingleError { Parse(pe) => pe.to_string(), Assemble => "unexpected assembly error".to_string(), Link => "unexpected link error".to_string(), + Layer => "unexpected layering error".to_string(), } } } diff --git a/assembler/src/layer.rs b/assembler/src/layer.rs new file mode 100644 index 0000000..69d254e --- /dev/null +++ b/assembler/src/layer.rs @@ -0,0 +1,35 @@ +use lc3_isa::util::MemoryDump; +use lc3_isa::{ADDR_SPACE_SIZE_IN_WORDS, Word}; +use crate::error::SingleError; +use crate::link::LinkedRegion; + +fn layer_region(image: &mut [Word; ADDR_SPACE_SIZE_IN_WORDS], region: LinkedRegion) { + let LinkedRegion { origin, words } = region; + let mut addr = origin as usize; + for word in words { + image[addr] = word; + addr += 1; + } +} + +pub fn layer(regions: impl IntoIterator, layer_onto_os: bool) -> Result { + let regions = regions.into_iter().collect::>(); + + let mut image = + if layer_onto_os { + let first_region = regions.get(0).ok_or(SingleError::Layer)?; + + let mut os = lc3_os::OS_IMAGE.clone().0; + os[lc3_os::USER_PROG_START_ADDR as usize] = first_region.origin; + + os + } else { + [0; ADDR_SPACE_SIZE_IN_WORDS] + }; + + for region in regions { + layer_region(&mut image, region); + } + + Ok(image.into()) +} diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 7a25c0c..cc77d42 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -17,6 +17,7 @@ pub mod parse; pub mod analyze; pub mod assemble; pub mod link; +pub mod layer; type Span = std::ops::Range; type Spanned = (T, Span); @@ -50,7 +51,8 @@ pub fn assemble_file(input: PathBuf, leniency: LeniencyLevel, no_os: bool) -> Re pub fn assemble(src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { let file = parse_and_analyze(src, leniency)?; - let object = assemble::assemble(file).map_err(|_| error::SingleError::Assemble)?; - let mem = link::link([object], !no_os)?; + let assemble::Object { symbol_table, regions } = assemble::assemble(file).map_err(|_| error::SingleError::Assemble)?; + let linked_regions = link::link_regions(&symbol_table, regions)?; + let mem = layer::layer(linked_regions, !no_os)?; Ok(mem) } diff --git a/assembler/src/link.rs b/assembler/src/link.rs index 28e2b00..54e7834 100644 --- a/assembler/src/link.rs +++ b/assembler/src/link.rs @@ -4,24 +4,15 @@ use chumsky::chain::Chain; use chumsky::Parser; use lc3_isa::util::MemoryDump; use lc3_isa::{Addr, ADDR_SPACE_SIZE_IN_WORDS, Word}; -use crate::assemble::{assemble_instruction, SymbolTable, Object, ObjectWord, AssemblyResult, Region}; +use crate::assemble::{assemble_instruction, AssemblyResult, Object, ObjectWord, Region, SymbolTable}; use crate::error::SingleError; -struct LinkedRegion { - origin: Addr, - words: Vec, +pub struct LinkedRegion { + pub(crate) origin: Addr, + pub(crate) words: Vec, } -fn layer_region(image: &mut [Word; ADDR_SPACE_SIZE_IN_WORDS], object: LinkedRegion) { - let LinkedRegion { origin, words } = object; - let mut addr = origin as usize; - for word in words { - image[addr] = word; - addr += 1; - } -} - -fn link_region(symbol_table: &SymbolTable, region: Region) -> Result { +fn link_region(symbol_table: &SymbolTable, region: Region) -> Result { let mut words = Vec::new(); let Region { origin, words: region_words, .. } = region; let mut location_counter = origin; @@ -32,7 +23,7 @@ fn link_region(symbol_table: &SymbolTable, region: Region) -> Result - match assemble_instruction(&symbol_table, &location_counter, instruction)? { + match assemble_instruction(&symbol_table, &location_counter, instruction).map_err(|_| SingleError::Link)? { AssemblyResult::SingleObjectWord(word) => match word { ObjectWord::Value(word) => { words.push(word); @@ -56,34 +47,29 @@ fn link_region(symbol_table: &SymbolTable, region: Region) -> Result, overlay_on_os: bool) -> Result { +pub(crate) fn link_regions(symbol_table: &SymbolTable, regions: Vec) -> Result, SingleError> { + regions.into_iter() + .map(|region| link_region(symbol_table, region)) + .collect() +} + +pub fn link(objects: impl IntoIterator) -> Result, SingleError> { let objects = objects.into_iter().collect::>(); - let mut symbol_table = HashMap::new(); + let mut global_symbol_table = HashMap::new(); for object in objects.iter() { for (label, addr) in object.symbol_table.iter() { - symbol_table.insert(label.clone(), *addr); + global_symbol_table.insert(label.clone(), *addr); } } - let mut image = - if overlay_on_os { - let first_object = objects.get(0).ok_or(SingleError::Link)?; - let first_region = first_object.regions.get(0).ok_or(SingleError::Link)?; + let linked_regions = + objects.into_iter() + .map(|object| link_regions(&mut global_symbol_table, object.regions)) + .collect::>, SingleError>>()? + .into_iter() + .flatten() + .collect(); - let mut os = lc3_os::OS_IMAGE.clone().0; - os[lc3_os::USER_PROG_START_ADDR as usize] = first_region.origin; - - os - } else { - [0; ADDR_SPACE_SIZE_IN_WORDS] - }; - for object in objects { - for region in object.regions { - let linked_region = link_region(&symbol_table, region).map_err(|_| SingleError::Link)?; - layer_region(&mut image, linked_region); - } - } - - Ok(image.into()) -} \ No newline at end of file + Ok(linked_regions) +} From 8c8895c7dc38f501d9bda0b2e6c886bcae4876ff Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 18 Jun 2022 22:10:12 -0500 Subject: [PATCH 64/82] assembler: remove unwrap calls from binary, unreachable calls from lib --- assembler/bin/as.rs | 37 +++++++++++++++------ assembler/src/link.rs | 8 ++--- assembler/tests/inputs/very_many_errors.asm | 22 ++++++++++++ assembler/tests/integ.rs | 24 +------------ 4 files changed, 54 insertions(+), 37 deletions(-) create mode 100644 assembler/tests/inputs/very_many_errors.asm diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index b21725f..26a75f4 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -1,6 +1,7 @@ extern crate lc3_assembler; use std::{env, fs}; +use std::fmt::{Debug, Formatter}; use std::path::{Path, PathBuf}; use std::process::exit; use ariadne::Source; @@ -8,6 +9,7 @@ use lc3_assembler::parse::{File, parse}; use lc3_shims::memory::FileBackedMemoryShim; use clap::{Parser}; use lc3_isa::util::MemoryDump; +use lc3_shims::memory::error::MemoryShimError; use lc3_assembler::{assemble, assemble_file, LeniencyLevel, parse_and_analyze, parse_and_analyze_file}; const MEM_DUMP_FILE_EXTENSION: &'static str = "mem"; @@ -43,18 +45,33 @@ struct Args { no_os: bool, } -fn main() { - std::thread::Builder::new() - .name("main_greater_stack_size".to_string()) - .stack_size(8*1024*1024) - .spawn(as_).unwrap() - .join().unwrap(); +fn main() -> Result<(), Error> { + let main_thread = + std::thread::Builder::new() + .name("main_greater_stack_size".to_string()) + .stack_size(8*1024*1024) + .spawn(as_)?; + main_thread.join().map_err(|_| Error::Unexpected)? } enum Error { Io(std::io::Error), - MemoryShim(lc3_shims::memory::error::MemoryShimError), - Assembler + MemoryShim(MemoryShimError), + Assembler, + Unexpected +} + +impl Debug for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Error::Io(ioe) + | Error::MemoryShim(MemoryShimError::IoError(ioe)) + => write!(f, "{}", ioe), + Error::MemoryShim(_) + | Error::Assembler + | Error::Unexpected => write!(f, "assembly failed") + } + } } impl From for Error { @@ -63,8 +80,8 @@ impl From for Error { } } -impl From for Error { - fn from(e: lc3_shims::memory::error::MemoryShimError) -> Self { +impl From for Error { + fn from(e: MemoryShimError) -> Self { Error::MemoryShim(e) } } diff --git a/assembler/src/link.rs b/assembler/src/link.rs index 54e7834..c79284c 100644 --- a/assembler/src/link.rs +++ b/assembler/src/link.rs @@ -29,15 +29,15 @@ fn link_region(symbol_table: &SymbolTable, region: Region) -> Result panic!("Failed to link an instruction") + ObjectWord::UnlinkedInstruction(_) => { return Err(SingleError::Link); } } AssemblyResult::MultipleObjectWords(ows) => { let mut ws = ows.into_iter() .map(|ow| match ow { - ObjectWord::Value(word) => word, - ObjectWord::UnlinkedInstruction(_) => panic!("Unexpected unlinked instruction") + ObjectWord::Value(word) => Ok(word), + ObjectWord::UnlinkedInstruction(_) => Err(SingleError::Link), }) - .collect::>(); + .collect::, SingleError>>()?; location_counter += ws.len() as u16; words.extend(ws); } diff --git a/assembler/tests/inputs/very_many_errors.asm b/assembler/tests/inputs/very_many_errors.asm new file mode 100644 index 0000000..d0a355b --- /dev/null +++ b/assembler/tests/inputs/very_many_errors.asm @@ -0,0 +1,22 @@ +.ORIG #OOPS ; Bad .ORIG operand +AND R1, , ; Bad instruction (or operands) +LABEL ADD R0 ; Duplicate label +LABEL JMP RET ; Bad operand +.END + +.ORIG x3000 ; Likely overlapping first region +ADD R0, R0, R0 +ADD R0, R0, R0 +.END + +.ORIG x3001 ; Overlaps second region +ADD R0, R0, LABEL ; Operand type mismatch +BR LABEL ; Invalid reference to duplicate label +TOO_FAR .BLKW 0 +.END + +.ORIG x3500 +BR TOO_FAR ; Label too distant for offset to fit +.END + +.ORIG x4000 ; Bad region (missing .END) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 6ed796b..11fd84a 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -543,29 +543,7 @@ mod error { SingleError::WrongNumberOfOperands { expected: 3, actual: 1 }, }, very_many: - ".ORIG #OOPS ; Bad .ORIG operand \n\ - AND R1, , ; Bad instruction (or operands) \n\ - LABEL ADD R0 ; Duplicate label \n\ - LABEL JMP RET ; Bad operand \n\ - .END \n\ - \n\ - .ORIG x3000 ; Likely overlapping first region \n\ - ADD R0, R0, R0 \n\ - ADD R0, R0, R0 \n\ - .END \n\ - \n\ - .ORIG x3001 ; Overlaps second region \n\ - ADD R0, R0, LABEL ; Operand type mismatch \n\ - BR LABEL ; Invalid reference to duplicate label\n\ - TOO_FAR .BLKW 0 \n\ - .END \n\ - \n\ - .ORIG x3500 \n\ - BR TOO_FAR ; Label too distant for offset to fit \n\ - .END \n\ - \n\ - .ORIG x4000 ; Bad region (missing .END) \n\ - " + include_str!("inputs/very_many_errors.asm") => { SingleError::BadOperand, From a2eff30bd1ac9e6528e12438e707663f85c57df3 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Sat, 18 Jun 2022 22:47:07 -0500 Subject: [PATCH 65/82] assembler: remove unused code, move WithErrData to top level --- assembler/src/analyze.rs | 22 +++++++--------------- assembler/src/assemble.rs | 8 +++----- assembler/src/error.rs | 2 +- assembler/src/lib.rs | 32 +++++++++++++++++++++++++++++--- assembler/src/link.rs | 8 ++------ assembler/src/parse.rs | 33 +++------------------------------ 6 files changed, 45 insertions(+), 60 deletions(-) diff --git a/assembler/src/analyze.rs b/assembler/src/analyze.rs index 230b9c5..b565f21 100644 --- a/assembler/src/analyze.rs +++ b/assembler/src/analyze.rs @@ -1,16 +1,13 @@ -use std::cmp::{max, min}; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::convert::{TryFrom, TryInto}; -use std::fmt::{Debug, Display, format, Formatter}; -use std::ops::Range; +use std::fmt::Debug; use std::string::String; use itertools::{concat, Itertools, zip}; -use ariadne::{Label, Report, ReportBuilder, ReportKind}; -use lc3_isa::{Addr, SignedWord, Word}; -use crate::lex::{LexData, LiteralValue, Opcode}; -use crate::parse::{File, get, get_result, Instruction, Operand, Region, result, WithErrData}; -use crate::{Span, Spanned, util}; -use crate::assemble::{calculate_offset, get_orig}; +use lc3_isa::{Addr, Word}; +use crate::lex::{LexData, Opcode}; +use crate::parse::{File, Instruction, Operand, Region}; +use crate::{get, get_result, Span, Spanned, util, WithErrData}; +use crate::assemble::calculate_offset; use crate::error::{Error, InvalidReferenceReason, OperandType, RegionPlacement, RoughAddr, SingleError}; use crate::error::OperandType::*; use crate::error::Error::*; @@ -105,11 +102,6 @@ enum AddressesOccupiedError { } impl Instruction { - fn get_label(&self) -> Option<&String> { - self.label.as_ref() - .and_then(|res| get_result(res).as_ref().ok()) - } - fn get_first_operand(&self) -> Option<&Operand> { get_result(&self.operands).as_ref().ok() .and_then(|ops| get(ops, 0)) diff --git a/assembler/src/assemble.rs b/assembler/src/assemble.rs index 17f561d..6810437 100644 --- a/assembler/src/assemble.rs +++ b/assembler/src/assemble.rs @@ -1,12 +1,10 @@ use std::collections::HashMap; use std::convert::{TryFrom, TryInto}; -use std::fmt::Debug; -use std::num::{ParseIntError, TryFromIntError}; +use std::num::TryFromIntError; use lc3_isa::{Addr, Reg, SignedWord, Word}; use crate::lex::{ConditionCodes, LiteralValue, Opcode}; -use crate::parse::{Operand, result, try_map, try_result}; -use crate::parse; -use crate::parse::WithErrData; +use crate::parse::Operand; +use crate::{parse, result, try_map, try_result, WithErrData}; pub(crate) type SymbolTable = HashMap; diff --git a/assembler/src/error.rs b/assembler/src/error.rs index ec99d3b..35c0512 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -2,7 +2,7 @@ use ariadne::{Label, Report, ReportBuilder, ReportKind}; use std::cmp::max; use lc3_isa::SignedWord; use std::fmt::{Display, Formatter}; -use crate::{analyze, Span, util}; +use crate::{Span, util}; use crate::lex; use crate::lex::LiteralValue; use crate::parse::Operand; diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index cc77d42..9090eb8 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -1,10 +1,10 @@ - // TODO: docs -// TODO: denys // TODO: docs URL -extern crate core; +// TODO: add more lints? +#![deny(unused)] +use std::convert::{TryFrom, TryInto}; use std::fs; use std::path::PathBuf; @@ -21,6 +21,32 @@ pub mod layer; type Span = std::ops::Range; type Spanned = (T, Span); +type WithErrData = Spanned>; + +fn get(v: &Vec>, i: usize) -> Option<&T> { + v.get(i) + .and_then(|res| get_result(res).as_ref().ok()) +} + +fn get_result(v: &WithErrData) -> &Result { + &v.0 +} + +fn result(v: WithErrData) -> Result { + v.0 +} + +fn try_result(maybe_v: Option>) -> Result { + result(maybe_v.ok_or(())?) +} + +fn try_map(maybe_v: Option>) -> Result where + U: TryFrom +{ + try_result(maybe_v)? + .try_into() + .map_err(|_| ()) +} #[derive(Copy, Clone)] pub enum LeniencyLevel { diff --git a/assembler/src/link.rs b/assembler/src/link.rs index c79284c..90339c9 100644 --- a/assembler/src/link.rs +++ b/assembler/src/link.rs @@ -1,9 +1,5 @@ use std::collections::HashMap; -use std::num::{ParseIntError, TryFromIntError}; -use chumsky::chain::Chain; -use chumsky::Parser; -use lc3_isa::util::MemoryDump; -use lc3_isa::{Addr, ADDR_SPACE_SIZE_IN_WORDS, Word}; +use lc3_isa::{Addr, Word}; use crate::assemble::{assemble_instruction, AssemblyResult, Object, ObjectWord, Region, SymbolTable}; use crate::error::SingleError; @@ -32,7 +28,7 @@ fn link_region(symbol_table: &SymbolTable, region: Region) -> Result { return Err(SingleError::Link); } } AssemblyResult::MultipleObjectWords(ows) => { - let mut ws = ows.into_iter() + let ws = ows.into_iter() .map(|ow| match ow { ObjectWord::Value(word) => Ok(word), ObjectWord::UnlinkedInstruction(_) => Err(SingleError::Link), diff --git a/assembler/src/parse.rs b/assembler/src/parse.rs index 61d5371..51b0d27 100644 --- a/assembler/src/parse.rs +++ b/assembler/src/parse.rs @@ -1,42 +1,14 @@ -use std::convert::{TryFrom, TryInto}; +use std::convert::TryFrom; use chumsky::combinator::Repeated; use chumsky::prelude::*; use chumsky::primitive::NoneOf; -use chumsky::recovery::SkipUntil; use chumsky::Stream; use lc3_isa::{Reg, Word}; -use crate::{Span, Spanned}; +use crate::{Spanned, WithErrData}; use crate::LeniencyLevel; use crate::lex::{LiteralValue, Opcode, Token}; -pub(crate) type WithErrData = Spanned>; - -pub(crate) fn get(v: &Vec>, i: usize) -> Option<&T> { - v.get(i) - .and_then(|res| get_result(res).as_ref().ok()) -} - -pub(crate) fn get_result(v: &WithErrData) -> &Result { - &v.0 -} - -pub(crate) fn result(v: WithErrData) -> Result { - v.0 -} - -pub(crate) fn try_result(maybe_v: Option>) -> Result { - result(maybe_v.ok_or(())?) -} - -pub(crate) fn try_map(maybe_v: Option>) -> Result where - U: TryFrom -{ - try_result(maybe_v)? - .try_into() - .map_err(|_| ()) -} - #[derive(Debug, Eq, PartialEq)] pub struct Region { pub(crate) orig: WithErrData>>, @@ -214,6 +186,7 @@ fn region(leniency: LeniencyLevel) -> impl Parser, Er #[derive(Debug)] pub struct File { + #[allow(dead_code)] pub(crate) before_first_orig: Spanned>, // TODO: check that this only contains newlines and comments (at least if strict) pub regions: Vec> } From 48a59ad2f27b82591a7aeb18a5c15878f8bcb13b Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 21 Jun 2022 15:47:12 -0500 Subject: [PATCH 66/82] assembler: present source path in error messages --- assembler/bin/as.rs | 16 +-- assembler/src/analyze.rs | 213 ++++++++++++++++++++------------------- assembler/src/error.rs | 87 +++++++++------- assembler/src/lib.rs | 68 ++++++++++--- assembler/src/parse.rs | 21 ++-- assembler/tests/integ.rs | 10 +- 6 files changed, 240 insertions(+), 175 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 26a75f4..19079cb 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -10,7 +10,7 @@ use lc3_shims::memory::FileBackedMemoryShim; use clap::{Parser}; use lc3_isa::util::MemoryDump; use lc3_shims::memory::error::MemoryShimError; -use lc3_assembler::{assemble, assemble_file, LeniencyLevel, parse_and_analyze, parse_and_analyze_file}; +use lc3_assembler::{assemble, assemble_file, LeniencyLevel, parse_and_analyze, parse_and_analyze_file, SourceId, sources}; const MEM_DUMP_FILE_EXTENSION: &'static str = "mem"; @@ -91,18 +91,18 @@ fn as_() -> Result<(), Error> { let leniency = if args.strict { LeniencyLevel::Strict } else { LeniencyLevel::Lenient }; - let src = fs::read_to_string(args.input.clone())?; + let cache = sources([args.input.clone()])?; if args.check { - match parse_and_analyze(&src, leniency) { + match parse_and_analyze_file(&args.input, leniency) { Ok(_) => { println!("{}: No errors found.", args.input.display()); Ok(()) } - Err(error) => print_errors(error, &src) + Err(error) => print_errors(error, cache) } } else { - match assemble(&src, leniency, args.no_os) { + match assemble_file(&args.input, leniency, args.no_os) { Ok(mem) => { let mut output_path = args.input.clone(); output_path.set_extension(MEM_DUMP_FILE_EXTENSION); @@ -111,15 +111,15 @@ fn as_() -> Result<(), Error> { Ok(()) } - Err(error) => print_errors(error, &src) + Err(error) => print_errors(error, cache) } } } -fn print_errors(error: lc3_assembler::error::Error, src: &String) -> Result<(), Error> { +fn print_errors(error: lc3_assembler::error::Error, mut cache: impl ariadne::Cache) -> Result<(), Error> { let print_results = error.report().into_iter() - .map(|report| report.eprint(Source::from(src))) + .map(|report| report.eprint(&mut cache)) .collect::>(); for print_result in print_results { diff --git a/assembler/src/analyze.rs b/assembler/src/analyze.rs index b565f21..e205d87 100644 --- a/assembler/src/analyze.rs +++ b/assembler/src/analyze.rs @@ -6,7 +6,7 @@ use itertools::{concat, Itertools, zip}; use lc3_isa::{Addr, Word}; use crate::lex::{LexData, Opcode}; use crate::parse::{File, Instruction, Operand, Region}; -use crate::{get, get_result, Span, Spanned, util, WithErrData}; +use crate::{get, get_result, SourceId, Spanned, SpanWithSource, util, WithErrData}; use crate::assemble::calculate_offset; use crate::error::{Error, InvalidReferenceReason, OperandType, RegionPlacement, RoughAddr, SingleError}; use crate::error::OperandType::*; @@ -23,40 +23,39 @@ impl ParseErrorsAnalysis { Default::default() } - fn push_error(&mut self, single_error: SingleError, span: &Span) { + fn push_error(&mut self, single_error: SingleError, span: &SpanWithSource) { self.errors.push(Spanned(span.clone(), single_error)); } } impl MutVisitor for ParseErrorsAnalysis { - fn enter_region_error(&mut self, span: &Span) { + fn enter_region_error(&mut self, span: &SpanWithSource) { self.push_error(BadRegion, span); } - fn enter_orig_error(&mut self, span: &Span) { + fn enter_orig_error(&mut self, span: &SpanWithSource) { self.push_error(BadOperands, span); } - fn enter_instruction_error(&mut self, span: &Span, _location: &LocationCounter) { + fn enter_instruction_error(&mut self, span: &SpanWithSource, _location: &LocationCounter) { self.push_error(BadInstruction, span); } - fn enter_label_error(&mut self, span: &Span, _location: &LocationCounter) { + fn enter_label_error(&mut self, span: &SpanWithSource, _location: &LocationCounter) { self.push_error(BadLabel, span); } - fn enter_opcode_error(&mut self, span: &Span, _location: &LocationCounter) { + fn enter_opcode_error(&mut self, span: &SpanWithSource, _location: &LocationCounter) { self.push_error(BadOpcode, span); } - fn enter_operands_error(&mut self, span: &Span, _location: &LocationCounter) { + fn enter_operands_error(&mut self, span: &SpanWithSource, _location: &LocationCounter) { self.push_error(BadOperands, span); } - fn enter_operand_error(&mut self, span: &Span, _location: &LocationCounter) { + fn enter_operand_error(&mut self, span: &SpanWithSource, _location: &LocationCounter) { self.push_error(BadOperand, span); } } - #[derive(Default)] struct DuplicateLabelsAnalysis { errors: Vec, - labels: HashMap>, + labels: HashMap>, } impl DuplicateLabelsAnalysis { @@ -66,12 +65,12 @@ impl DuplicateLabelsAnalysis { } impl MutVisitor for DuplicateLabelsAnalysis { - fn exit_file(&mut self, _file: &File) { + fn exit_file(&mut self, _file: &File, _span: &SpanWithSource) { let DuplicateLabelsAnalysis { errors, labels } = self; labels.iter() .filter(|(_, occurrences)| occurrences.len() > 1) .map(|(label, occurrences)| - Single( + Single(occurrences.get(0).unwrap().id.clone(), DuplicateLabel { label: label.clone(), occurrences: occurrences.clone() @@ -80,7 +79,7 @@ impl MutVisitor for DuplicateLabelsAnalysis { .for_each(|e| errors.push(e)); } - fn enter_label(&mut self, label: &String, span: &Span, _location: &LocationCounter) { + fn enter_label(&mut self, label: &String, span: &SpanWithSource, _location: &LocationCounter) { let occurrences = self.labels.entry(label.clone()).or_insert(Vec::new()); occurrences.push(span.clone()); } @@ -143,7 +142,7 @@ const ORIG_ERROR_STARTING_ADDRESS_ESTIMATE: RoughAddr = 0x3000; const INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE: RoughAddr = 1; impl MutVisitor for SymbolTableAnalysis { - fn enter_label(&mut self, label: &String, _span: &Span, location: &LocationCounter) { + fn enter_label(&mut self, label: &String, _span: &SpanWithSource, location: &LocationCounter) { self.symbol_table.entry(label.clone()) .and_modify(|e| *e = Err(InvalidSymbolError::Duplicated)) .or_insert( @@ -185,7 +184,7 @@ impl<'a> LabelOffsetBoundsAnalysis<'a> { } } - fn check_offset(&mut self, label: &String, span: &Span, width: u8, label_addr: RoughAddr, ref_addr: RoughAddr) { + fn check_offset(&mut self, label: &String, span: &SpanWithSource, width: u8, label_addr: RoughAddr, ref_addr: RoughAddr) { match calculate_offset(ref_addr, label_addr) { Err(_) => { // TODO: make more precise. This case shouldn't be possible unless one of the estimated addresses is far out of bounds. @@ -214,11 +213,11 @@ impl<'a> LabelOffsetBoundsAnalysis<'a> { } impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { - fn enter_opcode_error(&mut self, _span: &Span, _location: &LocationCounter) { + fn enter_opcode_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) { self.expected_label = None; } - fn enter_opcode(&mut self, opcode: &Opcode, _span: &Span, _location: &LocationCounter) { + fn enter_opcode(&mut self, opcode: &Opcode, _span: &SpanWithSource, _location: &LocationCounter) { use Opcode::*; self.expected_label = match opcode { @@ -231,13 +230,14 @@ impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { } } - fn enter_operands(&mut self, operands: &Vec>, _span: &Span, location: &LocationCounter) { + fn enter_operands(&mut self, operands: &Vec>, span: &SpanWithSource, location: &LocationCounter) { if let Some(ExpectedLabel { width, position }) = &self.expected_label { - if let Some((Ok(Operand::Label(label)), op_span)) = operands.get(*position) { + if let Some((Ok(Operand::Label(label)), op_span_no_source)) = operands.get(*position) { + let op_span = (span.id.clone(), op_span_no_source.clone()).into(); match self.symbol_table.get(label) { None => { self.errors.push( - Spanned(op_span.clone(), + Spanned(op_span, InvalidLabelReference { label: label.clone(), reason: InvalidReferenceReason::Undefined @@ -245,16 +245,16 @@ impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { } Some(stv) => match stv { Ok(addr) => { - self.check_offset(label, op_span, *width, *addr as RoughAddr, location.value); + self.check_offset(label, &op_span, *width, *addr as RoughAddr, location.value); } Err(ste) => match ste { InvalidSymbolError::InvalidOrig { estimated_addr } | InvalidSymbolError::PriorInvalidInstruction { estimated_addr } => { - self.check_offset(label, op_span, *width, *estimated_addr, location.value); + self.check_offset(label, &op_span, *width, *estimated_addr, location.value); } InvalidSymbolError::Duplicated => { self.errors.push( - Spanned(op_span.clone(), + Spanned(op_span, InvalidLabelReference { label: label.clone(), reason: InvalidReferenceReason::Duplicated @@ -262,7 +262,7 @@ impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { } InvalidSymbolError::OutOfBounds => { self.errors.push( - Spanned(op_span.clone(), + Spanned(op_span, InvalidLabelReference { label: label.clone(), reason: InvalidReferenceReason::OutOfBounds @@ -289,7 +289,7 @@ impl OperandTypesAnalysis { Default::default() } - fn check_operands(&mut self, operands: &Vec>, span: &Span) { + fn check_operands(&mut self, operands: &Vec>, span: &SpanWithSource) { if let Some(expected) = &self.expected_operands { // TODO: create longest common subsequence diff for more precise errors let ops_len = operands.len(); @@ -297,7 +297,8 @@ impl OperandTypesAnalysis { if ops_len != exp_len { self.errors.push(Spanned(span.clone(), WrongNumberOfOperands { expected: exp_len, actual: ops_len })) } else { - for ((op_res, op_span), exp_ty) in zip(operands, expected) { + for ((op_res, op_span_no_source), exp_ty) in zip(operands, expected) { + let op_span = (span.id.clone(), op_span_no_source.clone()).into(); if let Ok(op) = op_res { if !exp_ty.check(op) { let actual = if let Operand::NumberLiteral(value) = op { @@ -305,7 +306,7 @@ impl OperandTypesAnalysis { } else { OperandType::of(op) }; - self.errors.push(Spanned(op_span.clone(), OperandTypeMismatch { expected: exp_ty.clone(), actual })); + self.errors.push(Spanned(op_span, OperandTypeMismatch { expected: exp_ty.clone(), actual })); } } } @@ -319,16 +320,16 @@ fn orig_expected_operands() -> Vec { } impl MutVisitor for OperandTypesAnalysis { - fn enter_orig(&mut self, orig: &Vec>, span: &Span, _location: &LocationCounter) { + fn enter_orig(&mut self, orig: &Vec>, span: &SpanWithSource, _location: &LocationCounter) { self.expected_operands = Some(orig_expected_operands()); self.check_operands(orig, span); } - fn enter_opcode_error(&mut self, _span: &Span, _location: &LocationCounter) { + fn enter_opcode_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) { self.expected_operands = None; } - fn enter_opcode(&mut self, opcode: &Opcode, _span: &Span, _location: &LocationCounter) { + fn enter_opcode(&mut self, opcode: &Opcode, _span: &SpanWithSource, _location: &LocationCounter) { use Opcode::*; self.expected_operands = Some( match opcode { @@ -354,7 +355,7 @@ impl MutVisitor for OperandTypesAnalysis { ); } - fn enter_operands(&mut self, operands: &Vec>, span: &Span, _location: &LocationCounter) { + fn enter_operands(&mut self, operands: &Vec>, span: &SpanWithSource, _location: &LocationCounter) { self.check_operands(operands, span); } } @@ -379,16 +380,16 @@ impl ObjectPlacementAnalysis { } impl MutVisitor for ObjectPlacementAnalysis { - fn exit_file(&mut self, _file: &File) { + fn exit_file(&mut self, _file: &File, span: &SpanWithSource) { self.object_spans.sort_unstable_by_key(|span| span.span_in_memory.start); for (op1, op2) in self.object_spans.iter().tuple_windows() { if op2.span_in_memory.start < op1.span_in_memory.end { - self.errors.push(Single(SingleError::regions_overlap(op1.clone(), op2.clone()))); + self.errors.push(Single(span.id.clone(), SingleError::regions_overlap(op1.clone(), op2.clone()))); } } } - fn exit_region(&mut self, _region: &Region, span: &Span, location: &LocationCounter) { + fn exit_region(&mut self, _region: &Region, span: &SpanWithSource, location: &LocationCounter) { self.object_spans.push( RegionPlacement { position_in_file: self.object_index, @@ -398,7 +399,7 @@ impl MutVisitor for ObjectPlacementAnalysis { self.object_index += 1; } - fn exit_orig(&mut self, _orig: &Vec>, _span: &Span, location: &LocationCounter) { + fn exit_orig(&mut self, _orig: &Vec>, _span: &SpanWithSource, location: &LocationCounter) { self.last_start = location.value; } } @@ -440,41 +441,43 @@ impl LocationCounterState { } } -fn visit(v: &mut impl MutVisitor, file: &File) { - v.enter_file(file); +fn visit(v: &mut impl MutVisitor, file: &File, span: &SpanWithSource) { + v.enter_file(file, span); for region in file.regions.iter() { - visit_region(v, region); + visit_region(v, file.id.clone(), region); } - v.exit_file(file); + v.exit_file(file, span); } -fn visit_region(v: &mut impl MutVisitor, region: &WithErrData) { +fn visit_region(v: &mut impl MutVisitor, id: SourceId, region: &WithErrData) { let (region_res, span) = region; + let span = (id.clone(), span.clone()).into(); match region_res { - Err(_) => { v.enter_region_error(span); } + Err(_) => { v.enter_region_error(&span); } Ok(r) => { - v.enter_region(r, span); + v.enter_region(r, &span); let mut location_counter = LocationCounter::new(); let Region { orig, instructions } = r; - visit_orig(v, orig, &mut location_counter); + visit_orig(v, id.clone(), orig, &mut location_counter); for instruction in instructions { - visit_instruction(v, instruction, &mut location_counter); + visit_instruction(v, id.clone(), instruction, &mut location_counter); } - v.exit_region(r, span, &mut location_counter); + v.exit_region(r, &span, &mut location_counter); } } } -fn visit_orig(v: &mut impl MutVisitor, orig: &WithErrData>>, location_counter: &mut LocationCounter) { +fn visit_orig(v: &mut impl MutVisitor, id: SourceId, orig: &WithErrData>>, location_counter: &mut LocationCounter) { let (orig_res, span) = orig; + let span = (id.clone(), span.clone()).into(); match orig_res { Err(_) => { location_counter.value = ORIG_ERROR_STARTING_ADDRESS_ESTIMATE; location_counter.state.if_valid_set(LocationCounterState::InvalidOrig); - v.enter_orig_error(span); + v.enter_orig_error(&span); } Ok(o) => { location_counter.value = get(o, 0) @@ -484,35 +487,36 @@ fn visit_orig(v: &mut impl MutVisitor, orig: &WithErrData, location_counter: &mut LocationCounter) { +fn visit_instruction(v: &mut impl MutVisitor, id: SourceId, instruction: &WithErrData, location_counter: &mut LocationCounter) { let (inst_res, span) = instruction; + let span = (id.clone(), span.clone()).into(); match inst_res { Err(_) => { - v.enter_instruction_error(span, location_counter); + v.enter_instruction_error(&span, location_counter); location_counter.value += INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE; location_counter.state.if_valid_set(LocationCounterState::InvalidInstruction); } Ok(i) => { - v.enter_instruction(i, span, location_counter); + v.enter_instruction(i, &span, location_counter); let Instruction { label, opcode, operands } = i; if let Some(l) = label { - visit_label(v, l, location_counter); + visit_label(v, id.clone(), l, location_counter); } - visit_opcode(v, opcode, location_counter); - visit_operands(v, operands, location_counter); + visit_opcode(v, id.clone(), opcode, location_counter); + visit_operands(v, id.clone(), operands, location_counter); - v.exit_instruction(i, span, location_counter); + v.exit_instruction(i, &span, location_counter); location_counter.value += i.addresses_occupied() .unwrap_or_else(|_| { @@ -523,83 +527,89 @@ fn visit_instruction(v: &mut impl MutVisitor, instruction: &WithErrData, location_counter: &mut LocationCounter) { +fn visit_label(v: &mut impl MutVisitor, id: SourceId, label: &WithErrData, location_counter: &mut LocationCounter) { let (label_res, span) = label; + let span = (id, span.clone()).into(); match label_res { - Err(_) => { v.enter_label_error(span, location_counter); } - Ok(l) => { v.enter_label( l, span, location_counter); } + Err(_) => { v.enter_label_error(&span, location_counter); } + Ok(l) => { v.enter_label( l, &span, location_counter); } } } -fn visit_opcode(v: &mut impl MutVisitor, opcode: &WithErrData, location_counter: &mut LocationCounter) { +fn visit_opcode(v: &mut impl MutVisitor, id: SourceId, opcode: &WithErrData, location_counter: &mut LocationCounter) { let (opcode_res, span) = opcode; + let span = (id, span.clone()).into(); match opcode_res { - Err(_) => { v.enter_opcode_error(span, location_counter); } - Ok(oc) => { v.enter_opcode( oc, span, location_counter); } + Err(_) => { v.enter_opcode_error(&span, location_counter); } + Ok(oc) => { v.enter_opcode( oc, &span, location_counter); } } } -fn visit_operands(v: &mut impl MutVisitor, operands: &WithErrData>>, location_counter: &mut LocationCounter) { +fn visit_operands(v: &mut impl MutVisitor, id: SourceId, operands: &WithErrData>>, location_counter: &mut LocationCounter) { let (ops_res, span) = operands; + let span = (id.clone(), span.clone()).into(); match ops_res { - Err(_) => { v.enter_operands_error(span, location_counter); } + Err(_) => { v.enter_operands_error(&span, location_counter); } Ok(o) => { - v.enter_operands( o, span, location_counter); + v.enter_operands( o, &span, location_counter); for operand in o { - visit_operand(v, operand, location_counter); + visit_operand(v, id.clone(), operand, location_counter); } } } } -fn visit_operand(v: &mut impl MutVisitor, operand: &WithErrData, location_counter: &mut LocationCounter) { +fn visit_operand(v: &mut impl MutVisitor, id: SourceId, operand: &WithErrData, location_counter: &mut LocationCounter) { let (op_res, span) = operand; + let span = (id, span.clone()).into(); match op_res { - Err(_) => { v.enter_operand_error(span, location_counter); } - Ok(o) => { v.enter_operand( o, span, location_counter); } + Err(_) => { v.enter_operand_error(&span, location_counter); } + Ok(o) => { v.enter_operand( o, &span, location_counter); } } } trait MutVisitor { - fn enter_file(&mut self, _file: &File) {} - fn exit_file(&mut self, _file: &File) {} + fn enter_file(&mut self, _file: &File, _span: &SpanWithSource) {} + fn exit_file(&mut self, _file: &File, _span: &SpanWithSource) {} - fn enter_region_error(&mut self, _span: &Span) {} - fn enter_region(&mut self, _region: &Region, _span: &Span) {} - fn exit_region(&mut self, _region: &Region, _span: &Span, _location: &LocationCounter) {} + fn enter_region_error(&mut self, _span: &SpanWithSource) {} + fn enter_region(&mut self, _region: &Region, _span: &SpanWithSource) {} + fn exit_region(&mut self, _region: &Region, _span: &SpanWithSource, _location: &LocationCounter) {} - fn enter_orig_error(&mut self, _span: &Span) {} - fn enter_orig(&mut self, _orig: &Vec>, _span: &Span, _location: &LocationCounter) {} - fn exit_orig(&mut self, _orig: &Vec>, _span: &Span, _location: &LocationCounter) {} + fn enter_orig_error(&mut self, _span: &SpanWithSource) {} + fn enter_orig(&mut self, _orig: &Vec>, _span: &SpanWithSource, _location: &LocationCounter) {} + fn exit_orig(&mut self, _orig: &Vec>, _span: &SpanWithSource, _location: &LocationCounter) {} - fn enter_instruction_error(&mut self, _span: &Span, _location: &LocationCounter) {} - fn enter_instruction(&mut self, _instruction: &Instruction, _span: &Span, _location: &LocationCounter) {} - fn exit_instruction(&mut self, _instruction: &Instruction, _span: &Span, _location: &LocationCounter) {} + fn enter_instruction_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_instruction(&mut self, _instruction: &Instruction, _span: &SpanWithSource, _location: &LocationCounter) {} + fn exit_instruction(&mut self, _instruction: &Instruction, _span: &SpanWithSource, _location: &LocationCounter) {} - fn enter_label_error(&mut self, _span: &Span, _location: &LocationCounter) {} - fn enter_label(&mut self, _label: &String, _span: &Span, _location: &LocationCounter) {} + fn enter_label_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_label(&mut self, _label: &String, _span: &SpanWithSource, _location: &LocationCounter) {} - fn enter_opcode_error(&mut self, _span: &Span, _location: &LocationCounter) {} - fn enter_opcode(&mut self, _opcode: &Opcode, _span: &Span, _location: &LocationCounter) {} + fn enter_opcode_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_opcode(&mut self, _opcode: &Opcode, _span: &SpanWithSource, _location: &LocationCounter) {} - fn enter_operands_error(&mut self, _span: &Span, _location: &LocationCounter) {} - fn enter_operands(&mut self, _operands: &Vec>, _span: &Span, _location: &LocationCounter) {} + fn enter_operands_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_operands(&mut self, _operands: &Vec>, _span: &SpanWithSource, _location: &LocationCounter) {} - fn enter_operand_error(&mut self, _span: &Span, _location: &LocationCounter) {} - fn enter_operand(&mut self, _operand: &Operand, _span: &Span, _location: &LocationCounter) {} + fn enter_operand_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_operand(&mut self, _operand: &Operand, _span: &SpanWithSource, _location: &LocationCounter) {} } -fn analyze_lex_data(lex_data: &LexData, file_span: &Span) -> Vec { +fn analyze_lex_data(lex_data: &LexData, file_span: &SpanWithSource) -> Vec { let mut errors = Vec::new(); if lex_data.no_tokens { - errors.push(Single(NoTokens)) + errors.push(Single(file_span.id.clone(), NoTokens)) } else { if !lex_data.orig_present { - errors.push(Spanned(file_span.start..file_span.start, NoOrig)); + let start_span = (file_span.id.clone(), file_span.span.start..file_span.span.start).into(); + errors.push(Spanned(start_span, NoOrig)); } if !lex_data.end_present { - errors.push(Spanned(file_span.end..file_span.end, NoEnd)); + let end_span = (file_span.id.clone(), file_span.span.end..file_span.span.end).into(); + errors.push(Spanned(end_span, NoEnd)); } } errors @@ -608,25 +618,26 @@ fn analyze_lex_data(lex_data: &LexData, file_span: &Span) -> Vec { pub fn validate(lex_data: &LexData, file_spanned: &Spanned) -> Vec { let (file, file_span) = file_spanned; - let errors_from_lex_data = analyze_lex_data(&lex_data, file_span); + let file_span_with_source = (file.id.clone(), file_span.clone()).into(); + let errors_from_lex_data = analyze_lex_data(&lex_data, &file_span_with_source); let mut pe = ParseErrorsAnalysis::new(); - visit(&mut pe, file); + visit(&mut pe, file, &file_span_with_source); let mut dl = DuplicateLabelsAnalysis::new(); - visit(&mut dl, file); + visit(&mut dl, file, &file_span_with_source); let mut ot = OperandTypesAnalysis::new(); - visit(&mut ot, file); + visit(&mut ot, file, &file_span_with_source); let mut st = SymbolTableAnalysis::new(); - visit(&mut st, file); + visit(&mut st, file, &file_span_with_source); let mut lob = LabelOffsetBoundsAnalysis::new(&st.symbol_table); - visit(&mut lob, file); + visit(&mut lob, file, &file_span_with_source); let mut op = ObjectPlacementAnalysis::new(); - visit(&mut op, file); + visit(&mut op, file, &file_span_with_source); concat([ errors_from_lex_data, diff --git a/assembler/src/error.rs b/assembler/src/error.rs index 35c0512..ace16d8 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -2,67 +2,76 @@ use ariadne::{Label, Report, ReportBuilder, ReportKind}; use std::cmp::max; use lc3_isa::SignedWord; use std::fmt::{Display, Formatter}; -use crate::{Span, util}; +use crate::{SourceId, Span, SpanWithSource, util}; use crate::lex; -use crate::lex::LiteralValue; +use crate::lex::{LiteralValue}; use crate::parse::Operand; use std::ops::Range; -impl From for SingleError { - fn from(error: std::io::Error) -> Self { - Io(error) - } +#[derive(Debug)] +pub enum Error { + Single(SourceId, SingleError), + Spanned(SpanWithSource, SingleError), + Multiple(Vec), } -impl From> for Error - where E: Into +pub(crate) fn into_multiple(id: SourceId, es: Vec) -> Error + where (SourceId, E): Into { - fn from(errors: Vec) -> Self { - let es = errors.into_iter() - .map(|e| e.into()) + let errors = + es.into_iter() + .map(|e| (id.clone(), e).into()) .collect(); + Error::Multiple(errors) +} + +impl From> for Error +{ + fn from(es: Vec) -> Self { Error::Multiple(es) } } -impl From for Error +impl From<(SourceId, E)> for Error where E: Into { - fn from(error: E) -> Self { - Error::Single(error.into()) + fn from((id, e): (SourceId, E)) -> Self { + Error::Single(id, e.into()) } } -impl From> for SingleError { - fn from(error: chumsky::error::Simple) -> Self { - Lex(error) - } +impl From for SingleError { + fn from(e: std::io::Error) -> Self { Io(e) } } -impl From> for SingleError { - fn from(error: chumsky::error::Simple) -> Self { - Parse(error) +impl From<(SourceId, chumsky::error::Simple)> for Error { + fn from((id, e): (SourceId, chumsky::error::Simple)) -> Self { + let span = SpanWithSource { id, span: e.span() }; + Error::Spanned(span, Lex(e)) } } -#[derive(Debug)] -pub enum Error { - Single(SingleError), - Spanned(Span, SingleError), - Multiple(Vec), +impl From<(SourceId, chumsky::error::Simple)> for Error { + fn from((id, e): (SourceId, chumsky::error::Simple)) -> Self { + let span = SpanWithSource { id, span: e.span() }; + Error::Spanned(span, Parse(e)) + } } impl Error { - pub fn report(self) -> Vec { + pub fn report(self) -> Vec> { use Error::*; match self { - Single(error) => vec![report_single(error).finish()], - Spanned(span, error) => vec![ - report_single(error) - .with_label(Label::new(span).with_message("here")) - .finish() - ], + Single(id, error) => vec![report_single(id, None, error).finish()], + Spanned(span, error) => { + let SpanWithSource { id, span: s } = span.clone(); + vec![ + report_single(id, Some(s), error) + .with_label(Label::new(span).with_message("here")) + .finish() + ] + } Multiple(errors) => errors.into_iter() .flat_map(|e| e.report()) @@ -84,6 +93,8 @@ pub enum SingleError { Link, Layer, + TooManyInputs, + BadRegion, BadInstruction, BadLabel, @@ -92,7 +103,7 @@ pub enum SingleError { BadOperand, WrongNumberOfOperands { expected: usize, actual: usize }, OperandTypeMismatch { expected: OperandType, actual: OperandType }, - DuplicateLabel { label: String, occurrences: Vec, }, + DuplicateLabel { label: String, occurrences: Vec, }, InvalidLabelReference { label: String, reason: InvalidReferenceReason }, LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, RegionsOverlap { placement1: RegionPlacement, placement2: RegionPlacement }, @@ -171,6 +182,7 @@ impl SingleError { Assemble => "unexpected assembly error".to_string(), Link => "unexpected link error".to_string(), Layer => "unexpected layering error".to_string(), + TooManyInputs => "too many input files provided".to_string(), } } } @@ -182,8 +194,9 @@ fn min_signed_hex_digits_required(n: i32) -> u8 { } -fn report_single(error: SingleError) -> ReportBuilder { - let mut r = Report::build(ReportKind::Error, (), 0) +fn report_single(id: SourceId, span: Option, error: SingleError) -> ReportBuilder { + let mut r: ReportBuilder = + Report::build(ReportKind::Error, id, span.map(|s| s.start).unwrap_or(0)) .with_message(error.message()); match error { DuplicateLabel { occurrences, .. } => { @@ -367,7 +380,7 @@ impl OperandType { #[derive(Clone, Debug)] pub struct RegionPlacement { pub(crate) position_in_file: usize, - pub(crate) span_in_file: Span, + pub(crate) span_in_file: SpanWithSource, pub(crate) span_in_memory: Range, } diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 9090eb8..7c08b08 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -5,6 +5,7 @@ #![deny(unused)] use std::convert::{TryFrom, TryInto}; +use std::fmt::Debug; use std::fs; use std::path::PathBuf; @@ -23,6 +24,28 @@ type Span = std::ops::Range; type Spanned = (T, Span); type WithErrData = Spanned>; +pub type SourceId = String; + +#[derive(Debug, Clone)] +pub struct SpanWithSource { + id: SourceId, + span: Span, +} + +impl From<(SourceId, Span)> for SpanWithSource { + fn from((id, span): (SourceId, Span)) -> Self { + Self { id, span } + } +} + +impl ariadne::Span for SpanWithSource { + type SourceId = SourceId; + + fn source(&self) -> &Self::SourceId { &self.id } + fn start(&self) -> usize { self.span.start } + fn end(&self) -> usize { self.span.end } +} + fn get(v: &Vec>, i: usize) -> Option<&T> { v.get(i) .and_then(|res| get_result(res).as_ref().ok()) @@ -54,14 +77,30 @@ pub enum LeniencyLevel { Strict } -pub fn parse_and_analyze_file(input: PathBuf, leniency: LeniencyLevel) -> Result { - let src = fs::read_to_string(input)?; - parse_and_analyze(&src, leniency) +pub fn sources(iter: impl IntoIterator) -> Result, std::io::Error> { + let sources = iter.into_iter() + .map(|input| Ok((id(&input), read(&input)?))) + .collect::, std::io::Error>>()?; + Ok(ariadne::sources(sources)) +} + +pub fn read(input: &PathBuf) -> Result { + fs::read_to_string(input.clone()) +} + +pub fn id(input: &PathBuf) -> SourceId { + input.to_string_lossy().to_string() +} + +pub fn parse_and_analyze_file(input: &PathBuf, leniency: LeniencyLevel) -> Result { + let id = id(&input); + let src = read(input).map_err(|e| (id.clone(), e))?; + parse_and_analyze(&id, &src, leniency) } -pub fn parse_and_analyze(src: &String, leniency: LeniencyLevel) -> Result { - let (tokens, lex_data) = lex::lex(src, leniency)?; - let file_spanned = parse::parse(src, tokens, leniency)?; +pub fn parse_and_analyze(id: &SourceId, src: &String, leniency: LeniencyLevel) -> Result { + let (tokens, lex_data) = lex::lex(src, leniency).map_err(|es| error::into_multiple(id.clone(), es))?; + let file_spanned = parse::parse(id.clone(), src, tokens, leniency).map_err(|es| error::into_multiple(id.clone(), es))?; let errors = analyze::validate(&lex_data, &file_spanned); if !errors.is_empty() { return Err(errors.into()); @@ -70,15 +109,16 @@ pub fn parse_and_analyze(src: &String, leniency: LeniencyLevel) -> Result Result { - let src = fs::read_to_string(input)?; - assemble(&src, leniency, no_os) +pub fn assemble_file(input: &PathBuf, leniency: LeniencyLevel, no_os: bool) -> Result { + let id = id(&input); + let src = read(input).map_err(|e| (id.clone(), e))?; + assemble(&id, &src, leniency, no_os) } -pub fn assemble(src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { - let file = parse_and_analyze(src, leniency)?; - let assemble::Object { symbol_table, regions } = assemble::assemble(file).map_err(|_| error::SingleError::Assemble)?; - let linked_regions = link::link_regions(&symbol_table, regions)?; - let mem = layer::layer(linked_regions, !no_os)?; +pub fn assemble(id: &SourceId, src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { + let file = parse_and_analyze(id, src, leniency)?; + let assemble::Object { symbol_table, regions } = assemble::assemble(file).map_err(|_| (id.clone(), error::SingleError::Assemble))?; + let linked_regions = link::link_regions(&symbol_table, regions).map_err(|e| (id.clone(), e))?; + let mem = layer::layer(linked_regions, !no_os).map_err(|e| (id.clone(), e))?; Ok(mem) } diff --git a/assembler/src/parse.rs b/assembler/src/parse.rs index 51b0d27..afba2ee 100644 --- a/assembler/src/parse.rs +++ b/assembler/src/parse.rs @@ -5,7 +5,7 @@ use chumsky::primitive::NoneOf; use chumsky::Stream; use lc3_isa::{Reg, Word}; -use crate::{Spanned, WithErrData}; +use crate::{SourceId, Spanned, WithErrData}; use crate::LeniencyLevel; use crate::lex::{LiteralValue, Opcode, Token}; @@ -186,12 +186,13 @@ fn region(leniency: LeniencyLevel) -> impl Parser, Er #[derive(Debug)] pub struct File { + pub(crate) id: SourceId, #[allow(dead_code)] pub(crate) before_first_orig: Spanned>, // TODO: check that this only contains newlines and comments (at least if strict) pub regions: Vec> } -fn file(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { +fn file(id: SourceId, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { everything_until_orig() .map_with_span(|toks, span| (toks, span)) .then( @@ -200,14 +201,14 @@ fn file(leniency: LeniencyLevel) -> impl Parser, Error = Si .allow_trailing() ) .then_ignore(end()) - .map_with_span(|(before_first_orig, regions), span| - (File { before_first_orig, regions }, span)) + .map_with_span(move |(before_first_orig, regions), span| + (File { id: id.clone(), before_first_orig, regions }, span)) } -pub fn parse(src: &str, tokens: Vec>, leniency: LeniencyLevel) -> Result, Vec>> { +pub fn parse(id: SourceId, src: &str, tokens: Vec>, leniency: LeniencyLevel) -> Result, Vec>> { let len = src.chars().count(); let (maybe_file, errors) = - file(leniency) + file(id, leniency) .parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())); maybe_file.ok_or(errors) @@ -226,7 +227,7 @@ mod tests { fn capture_tokens_before_first_orig_separately() { let source = "%some #random junk .ORIG x3000\nADD R0, R0, R0\n.END"; let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); - let file = parse(source, tokens, LeniencyLevel::Lenient).unwrap(); + let file = parse("".to_string(), source, tokens, LeniencyLevel::Lenient).unwrap(); assert_eq!((vec![Token::Invalid, Token::Invalid, Token::Label("JUNK".to_string())], 0..18), file.0.before_first_orig); @@ -236,7 +237,7 @@ mod tests { fn ignore_after_end() { let source = ".ORIG x3000\nADD R0, R0, R0\n.END then %some #random junk!"; let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); - let file = parse(source, tokens, LeniencyLevel::Lenient).unwrap(); + let file = parse("".to_string(), source, tokens, LeniencyLevel::Lenient).unwrap(); let f = file.0; assert_eq!((vec![], 0..5), f.before_first_orig); // TODO: probably doesn't need fixing, but span should probably be 0..0; find source of bug @@ -253,7 +254,7 @@ mod tests { fn operand_error() { let source = ".ORIG x3000\nADD R0, R0, #OOPS; <- error\n.END"; let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); - let file = parse(source, tokens, LeniencyLevel::Lenient).unwrap(); + let file = parse("".to_string(), source, tokens, LeniencyLevel::Lenient).unwrap(); assert_eq!(vec![(Ok(Region { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), @@ -268,7 +269,7 @@ mod tests { fn label_error() { let source = ".ORIG x3000\nA%DDER ADD R0, R0, #1; <- error\n.END"; let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); - let file = parse(source, tokens, LeniencyLevel::Lenient).unwrap(); + let file = parse("".to_string(), source, tokens, LeniencyLevel::Lenient).unwrap(); assert_eq!(vec![(Ok(Region { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 11fd84a..8c74129 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -298,7 +298,7 @@ fn multiple_output_test(input: &str, expected: &[Word]) { fn test(input: &str, orig: usize, expected_mem: &[Word]) { let src = input.to_string(); - let mem = assemble(&src, LeniencyLevel::Lenient, true).unwrap(); + let mem = assemble(&"".to_string(), &src, LeniencyLevel::Lenient, true).unwrap(); for i in 0..orig { assert_mem(&mem, i, 0x0000); @@ -337,13 +337,13 @@ mod error { #[test] fn $test_name() { let src = $source.to_string(); - match parse_and_analyze(&src, LeniencyLevel::Lenient) { + match parse_and_analyze(&"".to_string(), &src, LeniencyLevel::Lenient) { Err(error) => { match error { Error::Multiple(errors) => { assert_eq!(errors.len(), 1, "Found too many args: {:?}", errors); match errors.get(0) { - Some(Error::Single(error)) + Some(Error::Single(_, error)) | Some(Error::Spanned(_, error)) => { assert_matches!(error, $expected); } @@ -470,7 +470,7 @@ mod error { $errors.iter() .any(|error| { match error { - Error::Single(error) + Error::Single(_, error) | Error::Spanned(_, error) => { matches!(error, $pattern) } @@ -494,7 +494,7 @@ mod error { #[test] fn $test_name() { let src = $source.to_string(); - match parse_and_analyze(&src, LeniencyLevel::Lenient) { + match parse_and_analyze(&"".to_string(), &src, LeniencyLevel::Lenient) { Err(error) => { match error { Error::Multiple(errors) => { From dc99108649803a22723b905fe522355ba75e3ab5 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 21 Jun 2022 16:44:09 -0500 Subject: [PATCH 67/82] assembler: remove unused dependencies --- Cargo.lock | 79 -------------------------------------------- assembler/Cargo.toml | 3 -- 2 files changed, 82 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c714495..e1e528a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,24 +11,6 @@ dependencies = [ "const-random", ] -[[package]] -name = "aho-corasick" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81ce3d38065e618af2d7b77e10c5ad9a069859b4be3c2250f674af3840d9c8a5" -dependencies = [ - "memchr", -] - -[[package]] -name = "annotate-snippets" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d78ea013094e5ea606b1c05fe35f1dd7ea1eb1ea259908d040b25bd5ec677ee5" -dependencies = [ - "yansi-term", -] - [[package]] name = "ariadne" version = "0.1.5" @@ -239,7 +221,6 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" name = "lc3-assembler" version = "0.1.0" dependencies = [ - "annotate-snippets", "ariadne", "assert_matches", "chumsky", @@ -248,9 +229,7 @@ dependencies = [ "lc3-isa", "lc3-os", "lc3-shims", - "num-traits", "quote", - "regex", ] [[package]] @@ -340,12 +319,6 @@ dependencies = [ "cfg-if 0.1.10", ] -[[package]] -name = "memchr" -version = "2.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" - [[package]] name = "num-integer" version = "0.1.42" @@ -425,28 +398,6 @@ version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" -[[package]] -name = "regex" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", - "thread_local", - "utf8-ranges", -] - -[[package]] -name = "regex-syntax" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7" -dependencies = [ - "ucd-util", -] - [[package]] name = "serde" version = "1.0.106" @@ -505,15 +456,6 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" -[[package]] -name = "thread_local" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" -dependencies = [ - "lazy_static", -] - [[package]] name = "time" version = "0.1.42" @@ -543,24 +485,12 @@ dependencies = [ "crunchy", ] -[[package]] -name = "ucd-util" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85f514e095d348c279b1e5cd76795082cf15bd59b93207832abe0b1d8fed236" - [[package]] name = "unicode-ident" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" -[[package]] -name = "utf8-ranges" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba" - [[package]] name = "version_check" version = "0.9.4" @@ -609,12 +539,3 @@ name = "yansi" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" - -[[package]] -name = "yansi-term" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a30d0d48515e745863faad2da9c1e1bac640f9f0f83f1eecb79fc0bf4018e5d2" -dependencies = [ - "winapi", -] diff --git a/assembler/Cargo.toml b/assembler/Cargo.toml index 654f9bb..ecd60bd 100644 --- a/assembler/Cargo.toml +++ b/assembler/Cargo.toml @@ -33,10 +33,7 @@ maintenance = { status = "actively-developed" } [dependencies] -regex = "0.2.1" itertools = "0.8.2" -num-traits = "0.2.11" -annotate-snippets = { version = "0.8.0", features = ["color"] } clap = { version = "3.1.18", features = ["derive"] } chumsky = "0.8.0" ariadne = "0.1.5" From e590dd763cf30e81a9e929dae02b4954f3952a7b Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 21 Jun 2022 16:54:24 -0500 Subject: [PATCH 68/82] assembler: add method to return String error report --- assembler/bin/as.rs | 9 +-------- assembler/src/error.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 19079cb..7371567 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -117,13 +117,6 @@ fn as_() -> Result<(), Error> { } fn print_errors(error: lc3_assembler::error::Error, mut cache: impl ariadne::Cache) -> Result<(), Error> { - let print_results = - error.report().into_iter() - .map(|report| report.eprint(&mut cache)) - .collect::>(); - - for print_result in print_results { - print_result? - } + eprint!("{}", error.report_to_string(cache)?); Err(Error::Assembler) } diff --git a/assembler/src/error.rs b/assembler/src/error.rs index ace16d8..48a6655 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -78,6 +78,14 @@ impl Error { .collect() } } + + pub fn report_to_string(self, mut cache: impl ariadne::Cache) -> Result { + let mut s = Vec::new(); + for report in self.report() { + report.write(&mut cache, &mut s)?; + } + Ok(String::from_utf8_lossy(&s).to_string()) + } } pub(crate) type RoughAddr = i32; From 50c342f1d49b743459d2f0e8100118e13f2a9568 Mon Sep 17 00:00:00 2001 From: Rahul Butani Date: Tue, 21 Jun 2022 18:01:08 -0500 Subject: [PATCH 69/82] misc: bump the MSRV to 1.56, use edition 2021 --- .github/workflows/assembler.yml | 5 ++--- Cargo.lock | 4 ++-- assembler/Cargo.toml | 15 ++++++++------- assembler/README.md | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/assembler.yml b/.github/workflows/assembler.yml index 0baac7e..97db8d6 100644 --- a/.github/workflows/assembler.yml +++ b/.github/workflows/assembler.yml @@ -13,12 +13,11 @@ jobs: fail-fast: false matrix: crate: [ lc3-assembler ] - os: [ windows-latest, ubuntu-latest, macOS-latest ] + os: [ windows-latest, ubuntu-latest, macos-latest ] rust: - stable - - beta - nightly - - 1.42.0 + - 1.56.1 runs-on: ${{ matrix.os }} steps: diff --git a/Cargo.lock b/Cargo.lock index 3099a31..12c4b0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -200,9 +200,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.8.2" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" dependencies = [ "either", ] diff --git a/assembler/Cargo.toml b/assembler/Cargo.toml index bff8b4d..674c221 100644 --- a/assembler/Cargo.toml +++ b/assembler/Cargo.toml @@ -2,7 +2,8 @@ name = "lc3-assembler" version = "0.1.0" authors = ["UT UTP "] -edition = "2018" +edition = "2021" +rust-version = "1.56.1" workspace = ".." @@ -33,15 +34,15 @@ maintenance = { status = "actively-developed" } [dependencies] -itertools = "0.8.2" -clap = { version = "3.1.18", features = ["derive"] } -chumsky = "0.8.0" -ariadne = "0.1.5" -quote = "1.0.18" +itertools = "0.10" +clap = { version = "3.1", features = ["derive"] } +chumsky = "0.8" +ariadne = "0.1" +quote = "1" lc3-isa = { version = "0.1.0", default-features = false } lc3-shims = { version = "0.1.0", default-features = false } lc3-os = { version = "0.1.0", default-features = false } [dev-dependencies] -assert_matches = "1.5.0" +assert_matches = "1.5" diff --git a/assembler/README.md b/assembler/README.md index 9cc2d11..12e1df6 100644 --- a/assembler/README.md +++ b/assembler/README.md @@ -7,6 +7,6 @@ A small assembler for a small computer. ### Minimum Supported Rust Version (MSRV) -This crate is currently guaranteed to compile on stable Rust 1.42 and newer. We offer no guarantees that this will remain true in future releases but do promise to always support (at minimum) the latest stable Rust version and to document changes to the MSRV in the [changelog](CHANGELOG.md). +This crate is currently guaranteed to compile on stable Rust 1.56.1 and newer. We offer no guarantees that this will remain true in future releases but do promise to always support (at minimum) the latest stable Rust version and to document changes to the MSRV in the [changelog](CHANGELOG.md). (TODO!) From 0b9b55c053c9d6aabd182c5081b763c8fb378acb Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 22 Jun 2022 02:15:32 -0500 Subject: [PATCH 70/82] assembler: deduplicate some assembly code --- assembler/src/assemble.rs | 285 ++++++++++++-------------------------- 1 file changed, 89 insertions(+), 196 deletions(-) diff --git a/assembler/src/assemble.rs b/assembler/src/assemble.rs index 6810437..775108d 100644 --- a/assembler/src/assemble.rs +++ b/assembler/src/assemble.rs @@ -121,6 +121,10 @@ pub(crate) enum Instruction { } impl Instruction { + fn new_trap(trap_vec: u8) -> Self { + Self::Trap { trap_vec } + } + fn addresses_occupied(&self) -> Addr { match self { Instruction::Blkw { size } => *size, @@ -154,112 +158,53 @@ pub(crate) enum ObjectWord { } +macro_rules! try_map_operands { + ($operands:expr => $variant:ident { $($field:ident),*$(,)* }) + => + { + { + let mut os = $operands.into_iter(); + let i = Instruction::$variant { + $($field: try_map(os.next())?,)* + }; + Ok(i) + } + } +} + impl TryFrom<(WithErrData, WithErrData>>)> for Instruction { type Error = (); + fn try_from((raw_opcode, raw_operands): (WithErrData, WithErrData>>)) -> Result { let operands = result(raw_operands)?; match result(raw_opcode)? { - Opcode::Add => { - let mut os = operands.into_iter(); - let dr = try_map(os.next())?; - let sr1 = try_map(os.next())?; - let sr2_or_imm5 = try_map(os.next())?; - Ok(Instruction::Add { dr, sr1, sr2_or_imm5 }) - } - Opcode::And => { - let mut os = operands.into_iter(); - let dr = try_map(os.next())?; - let sr1 = try_map(os.next())?; - let sr2_or_imm5 = try_map(os.next())?; - Ok(Instruction::And { dr, sr1, sr2_or_imm5 }) - } + Opcode::Add => try_map_operands!( operands => Add { dr, sr1, sr2_or_imm5 } ), + Opcode::And => try_map_operands!( operands => And { dr, sr1, sr2_or_imm5 } ), Opcode::Br(cond_codes) => { let mut os = operands.into_iter(); let pc_offset9 = try_map(os.next())?; Ok(Instruction::Br { cond_codes, pc_offset9 }) } - Opcode::Jmp => { - let mut os = operands.into_iter(); - let base = try_map(os.next())?; - Ok(Instruction::Jmp { base }) - } - Opcode::Jsr => { - let mut os = operands.into_iter(); - let pc_offset11 = try_map(os.next())?; - Ok(Instruction::Jsr { pc_offset11 }) - } - Opcode::Jsrr => { - let mut os = operands.into_iter(); - let base = try_map(os.next())?; - Ok(Instruction::Jsrr { base }) - } - Opcode::Ld => { - let mut os = operands.into_iter(); - let dr = try_map(os.next())?; - let pc_offset9 = try_map(os.next())?; - Ok(Instruction::Ld { dr, pc_offset9 }) - } - Opcode::Ldi => { - let mut os = operands.into_iter(); - let dr = try_map(os.next())?; - let pc_offset9 = try_map(os.next())?; - Ok(Instruction::Ldi { dr, pc_offset9 }) - } - Opcode::Ldr => { - let mut os = operands.into_iter(); - let dr = try_map(os.next())?; - let base = try_map(os.next())?; - let offset6 = try_map(os.next())?; - Ok(Instruction::Ldr { dr, base, offset6 }) - } - Opcode::Lea => { - let mut os = operands.into_iter(); - let dr = try_map(os.next())?; - let pc_offset9 = try_map(os.next())?; - Ok(Instruction::Lea { dr, pc_offset9 }) - } - Opcode::Not => { - let mut os = operands.into_iter(); - let dr = try_map(os.next())?; - let sr = try_map(os.next())?; - Ok(Instruction::Not { dr, sr }) - } - Opcode::Ret => Ok(Instruction::Ret), - Opcode::Rti => Ok(Instruction::Rti), - Opcode::St => { - let mut os = operands.into_iter(); - let sr = try_map(os.next())?; - let pc_offset9 = try_map(os.next())?; - Ok(Instruction::St { sr, pc_offset9 }) - } - Opcode::Sti => { - let mut os = operands.into_iter(); - let sr = try_map(os.next())?; - let pc_offset9 = try_map(os.next())?; - Ok(Instruction::Sti { sr, pc_offset9 }) - } - Opcode::Str => { - let mut os = operands.into_iter(); - let sr = try_map(os.next())?; - let base = try_map(os.next())?; - let offset6 = try_map(os.next())?; - Ok(Instruction::Str { sr, base, offset6 }) - } - Opcode::Trap => { - let mut os = operands.into_iter(); - let trap_vec = try_map(os.next())?; - Ok(Instruction::Trap { trap_vec }) - } + Opcode::Jmp => try_map_operands!( operands => Jmp { base }), + Opcode::Jsr => try_map_operands!( operands => Jsr { pc_offset11 }), + Opcode::Jsrr => try_map_operands!( operands => Jsrr { base }), + Opcode::Ld => try_map_operands!( operands => Ld { dr, pc_offset9 }), + Opcode::Ldi => try_map_operands!( operands => Ldi { dr, pc_offset9 }), + Opcode::Ldr => try_map_operands!( operands => Ldr { dr, base, offset6 }), + Opcode::Lea => try_map_operands!( operands => Lea { dr, pc_offset9 }), + Opcode::Not => try_map_operands!( operands => Not { dr, sr }), + Opcode::Ret => Ok(Instruction::Ret), + Opcode::Rti => Ok(Instruction::Rti), + Opcode::St => try_map_operands!( operands => St { sr, pc_offset9 }), + Opcode::Sti => try_map_operands!( operands => Sti { sr, pc_offset9 }), + Opcode::Str => try_map_operands!( operands => Str { sr, base, offset6 }), + Opcode::Trap => try_map_operands!( operands => Trap { trap_vec }), // TODO: improve error Opcode::Orig => Err(()), - Opcode::Fill => { - let mut os = operands.into_iter(); - let value = try_map(os.next())?; - Ok(Instruction::Fill { value }) - } + Opcode::Fill => try_map_operands!( operands => Fill { value }), Opcode::Blkw => { let mut os = operands.into_iter(); let size = try_result(os.next())?.get_unqualified_number_value().ok_or(())?; @@ -271,12 +216,12 @@ impl TryFrom<(WithErrData, WithErrData>>)> for Ok(Instruction::Stringz { string }) } - Opcode::Getc => Ok(Instruction::Trap { trap_vec: 0x20 }), - Opcode::Out => Ok(Instruction::Trap { trap_vec: 0x21 }), - Opcode::Puts => Ok(Instruction::Trap { trap_vec: 0x22 }), - Opcode::In => Ok(Instruction::Trap { trap_vec: 0x23 }), - Opcode::Putsp => Ok(Instruction::Trap { trap_vec: 0x24 }), - Opcode::Halt => Ok(Instruction::Trap { trap_vec: 0x25 }), + Opcode::Getc => Ok(Instruction::new_trap(0x20)), + Opcode::Out => Ok(Instruction::new_trap(0x21)), + Opcode::Puts => Ok(Instruction::new_trap(0x22)), + Opcode::In => Ok(Instruction::new_trap(0x23)), + Opcode::Putsp => Ok(Instruction::new_trap(0x24)), + Opcode::Halt => Ok(Instruction::new_trap(0x25)), } } } @@ -303,126 +248,74 @@ pub(crate) fn calculate_offset(location_counter: i32, label_address: i32) -> Res (label_address - (location_counter + 1)).try_into() } + +impl From for AssemblyResult { + fn from(i: lc3_isa::Instruction) -> Self { + AssemblyResult::SingleObjectWord(ObjectWord::Value(i.into())) + } +} + + pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: &Addr, instruction: Instruction) -> Result { use AssemblyResult::*; use ObjectWord::*; - let res = match instruction { - Instruction::Add { dr, sr1, sr2_or_imm5 } => { - let word = - match sr2_or_imm5 { - Sr2OrImm5::Sr2(sr2) => lc3_isa::Instruction::new_add_reg(dr, sr1, sr2), - Sr2OrImm5::Imm5(imm5) => lc3_isa::Instruction::new_add_imm(dr, sr1, imm5), - }.into(); - SingleObjectWord(Value(word)) - } - Instruction::And { dr, sr1, sr2_or_imm5 } => { - let word = - match sr2_or_imm5 { - Sr2OrImm5::Sr2(sr2) => lc3_isa::Instruction::new_and_reg(dr, sr1, sr2), - Sr2OrImm5::Imm5(imm5) => lc3_isa::Instruction::new_and_imm(dr, sr1, imm5), - }.into(); - SingleObjectWord(Value(word)) - } - Instruction::Br { cond_codes: ConditionCodes { n, z, p }, pc_offset9 } => { - match pc_offset9 { - PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_br(n, z, p, sw).into())), - PcOffset::Label(label) => - match symbol_table.get(&label) { - Some(addr) => { - let offset = calculate_addr_offset(location_counter, addr)?; - SingleObjectWord(Value(lc3_isa::Instruction::new_br(n, z, p, offset).into())) - } - None => SingleObjectWord(UnlinkedInstruction(Instruction::Br { cond_codes: ConditionCodes { n, z, p }, pc_offset9: PcOffset::Label(label) })), - } - } - } - Instruction::Jmp { base } => SingleObjectWord(Value(lc3_isa::Instruction::new_jmp(base).into())), - Instruction::Jsr { pc_offset11 } => { - match pc_offset11 { - PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_jsr(sw).into())), - PcOffset::Label(label) => - match symbol_table.get(&label) { - Some(addr) => { - let offset = calculate_addr_offset(location_counter, addr)?; - SingleObjectWord(Value(lc3_isa::Instruction::new_jsr(offset).into())) - } - None => SingleObjectWord(UnlinkedInstruction(Instruction::Jsr { pc_offset11: PcOffset::Label(label) })), - } - } - } - Instruction::Jsrr { base } => SingleObjectWord(Value(lc3_isa::Instruction::new_jsrr(base).into())), - Instruction::Ld { dr, pc_offset9 } => { - match pc_offset9 { - PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_ld(dr, sw).into())), + macro_rules! assemble_pc_offset { + ($pc_offset:ident => $new_i:ident, $instr:ident { $($field:ident),*$(,)* } ) + => + { + match $pc_offset { + PcOffset::Number(sw) => lc3_isa::Instruction::$new_i($($field,)* sw).into(), PcOffset::Label(label) => match symbol_table.get(&label) { Some(addr) => { let offset = calculate_addr_offset(location_counter, addr)?; - SingleObjectWord(Value(lc3_isa::Instruction::new_ld(dr, offset).into())) + lc3_isa::Instruction::$new_i($($field,)* offset).into() } - None => SingleObjectWord(UnlinkedInstruction(Instruction::Ld { dr, pc_offset9: PcOffset::Label(label)})), + None => SingleObjectWord(UnlinkedInstruction(Instruction::$instr { $($field,)* $pc_offset: PcOffset::Label(label)})), } } } - Instruction::Ldi { dr, pc_offset9 } => { - match pc_offset9 { - PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_ldi(dr, sw).into())), - PcOffset::Label(label) => - match symbol_table.get(&label) { - Some(addr) => { - let offset = calculate_addr_offset(location_counter, addr)?; - SingleObjectWord(Value(lc3_isa::Instruction::new_ldi(dr, offset).into())) - } - None => SingleObjectWord(UnlinkedInstruction(Instruction::Ldi { dr, pc_offset9: PcOffset::Label(label)})), - } - } - } - Instruction::Ldr { dr, base, offset6 } => SingleObjectWord(Value(lc3_isa::Instruction::new_ldr(dr, base, offset6).into())), - Instruction::Lea { dr, pc_offset9 } => { - match pc_offset9 { - PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_lea(dr, sw).into())), - PcOffset::Label(label) => - match symbol_table.get(&label) { - Some(addr) => { - let offset = calculate_addr_offset(location_counter, addr)?; - SingleObjectWord(Value(lc3_isa::Instruction::new_lea(dr, offset).into())) - } - None => SingleObjectWord(UnlinkedInstruction(Instruction::Lea { dr, pc_offset9: PcOffset::Label(label)})), - } - } - } - Instruction::Not { dr, sr } => SingleObjectWord(Value(lc3_isa::Instruction::new_not(dr, sr).into())), - Instruction::Ret => SingleObjectWord(Value(lc3_isa::Instruction::new_ret().into())), - Instruction::Rti => SingleObjectWord(Value(lc3_isa::Instruction::new_rti().into())), - Instruction::St { sr, pc_offset9 } => { - match pc_offset9 { - PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_st(sr, sw).into())), - PcOffset::Label(label) => - match symbol_table.get(&label) { - Some(addr) => { - let offset = calculate_addr_offset(location_counter, addr)?; - SingleObjectWord(Value(lc3_isa::Instruction::new_st(sr, offset).into())) - } - None => SingleObjectWord(UnlinkedInstruction(Instruction::St { sr, pc_offset9: PcOffset::Label(label)})), - } - } - } - Instruction::Sti { sr, pc_offset9 } => { + } + + let res = match instruction { + Instruction::Add { dr, sr1, sr2_or_imm5 } => + match sr2_or_imm5 { + Sr2OrImm5::Sr2(sr2) => lc3_isa::Instruction::new_add_reg(dr, sr1, sr2), + Sr2OrImm5::Imm5(imm5) => lc3_isa::Instruction::new_add_imm(dr, sr1, imm5), + }.into(), + Instruction::And { dr, sr1, sr2_or_imm5 } => + match sr2_or_imm5 { + Sr2OrImm5::Sr2(sr2) => lc3_isa::Instruction::new_and_reg(dr, sr1, sr2), + Sr2OrImm5::Imm5(imm5) => lc3_isa::Instruction::new_and_imm(dr, sr1, imm5), + }.into(), + Instruction::Br { cond_codes: ConditionCodes { n, z, p }, pc_offset9 } => { match pc_offset9 { - PcOffset::Number(sw) => SingleObjectWord(Value(lc3_isa::Instruction::new_sti(sr, sw).into())), + PcOffset::Number(sw) => lc3_isa::Instruction::new_br(n, z, p, sw).into(), PcOffset::Label(label) => match symbol_table.get(&label) { Some(addr) => { let offset = calculate_addr_offset(location_counter, addr)?; - SingleObjectWord(Value(lc3_isa::Instruction::new_sti(sr, offset).into())) + lc3_isa::Instruction::new_br(n, z, p, offset).into() } - None => SingleObjectWord(UnlinkedInstruction(Instruction::Sti { sr, pc_offset9: PcOffset::Label(label)})), + None => SingleObjectWord(UnlinkedInstruction(Instruction::Br { cond_codes: ConditionCodes { n, z, p }, pc_offset9: PcOffset::Label(label) })), } } } - Instruction::Str { sr, base, offset6 } => SingleObjectWord(Value(lc3_isa::Instruction::new_str(sr, base, offset6).into())), - Instruction::Trap { trap_vec } => SingleObjectWord(Value(lc3_isa::Instruction::new_trap(trap_vec).into())), + Instruction::Jmp { base } => lc3_isa::Instruction::new_jmp(base).into(), + Instruction::Jsr { pc_offset11 } => assemble_pc_offset!(pc_offset11 => new_jsr, Jsr {}), + Instruction::Jsrr { base } => lc3_isa::Instruction::new_jsrr(base).into(), + Instruction::Ld { dr, pc_offset9 } => assemble_pc_offset!(pc_offset9 => new_ld, Ld { dr, }), + Instruction::Ldi { dr, pc_offset9 } => assemble_pc_offset!(pc_offset9 => new_ldi, Ldi { dr, }), + Instruction::Ldr { dr, base, offset6 } => lc3_isa::Instruction::new_ldr(dr, base, offset6).into(), + Instruction::Lea { dr, pc_offset9 } => assemble_pc_offset!(pc_offset9 => new_lea, Lea { dr, }), + Instruction::Not { dr, sr } => lc3_isa::Instruction::new_not(dr, sr).into(), + Instruction::Ret => lc3_isa::Instruction::new_ret().into(), + Instruction::Rti => lc3_isa::Instruction::new_rti().into(), + Instruction::St { sr, pc_offset9 } => assemble_pc_offset!(pc_offset9 => new_st, St { sr, }), + Instruction::Sti { sr, pc_offset9 } => assemble_pc_offset!(pc_offset9 => new_sti, Sti { sr, }), + Instruction::Str { sr, base, offset6 } => lc3_isa::Instruction::new_str(sr, base, offset6).into(), + Instruction::Trap { trap_vec } => lc3_isa::Instruction::new_trap(trap_vec).into(), Instruction::Fill { value } => { match value { From 262d1924ccaa196ca4cfd5aed2c9ddc93232ae96 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 28 Jun 2022 14:04:40 -0500 Subject: [PATCH 71/82] assembler: document top level of library --- .cargo/config.toml | 2 + assembler/README.md | 112 +++++++++++- assembler/docs/example/example_output.txt | 15 ++ assembler/docs/id_arg.md | 5 + assembler/docs/images/main_workflow.png | Bin 0 -> 40027 bytes assembler/docs/no_os_arg.md | 4 + assembler/docs/tests/add.asm | 3 + assembler/docs/tests/bad_operand.asm | 3 + assembler/src/error.rs | 13 ++ assembler/src/lib.rs | 199 +++++++++++++++++++--- 10 files changed, 331 insertions(+), 25 deletions(-) create mode 100644 .cargo/config.toml create mode 100644 assembler/docs/example/example_output.txt create mode 100644 assembler/docs/id_arg.md create mode 100644 assembler/docs/images/main_workflow.png create mode 100644 assembler/docs/no_os_arg.md create mode 100644 assembler/docs/tests/add.asm create mode 100644 assembler/docs/tests/bad_operand.asm diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..149159a --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustdocflags = "-C link-args=/STACK:8194304" # Increase stack size 8MB when running doctests \ No newline at end of file diff --git a/assembler/README.md b/assembler/README.md index 12e1df6..2ad42ed 100644 --- a/assembler/README.md +++ b/assembler/README.md @@ -1,12 +1,118 @@ ### `lc3-assembler` crate [![](https://github.com/ut-utp/assembler/workflows/assembler/badge.svg)](https://github.com/ut-utp/assembler/actions?query=workflow%3Aassembler) -[![Minimum supported Rust version](https://img.shields.io/badge/rustc-1.42+-red.svg?style=for-the-badge&logo=rust)](#minimum-supported-rust-version-msrv) +[![Minimum supported Rust version](https://img.shields.io/badge/rustc-1.56+-red.svg?style=for-the-badge&logo=rust)](#minimum-supported-rust-version-msrv) A small assembler for a small computer. +#### Example + +##### Input + +```asm + .ORIG x3000 + LD R0, A0 + LD R1, A1 + ADD R2, R0, #R1 + ST R2, RESULTS +A0 .FILL #1 +A1 .FILL #2 +RESULT .BLKW 0 + .END +``` + +##### Error Output +```text +Error: invalid operand + ╭─[.\example\example.asm:4:25] + │ + 4 │ ADD R2, R0, #R1 + · ─┬─ + · ╰─── here +───╯ +Error: reference to label RESULTS invalid: not previously defined + ╭─[.\example\example.asm:5:21] + │ + 5 │ ST R2, RESULTS + · ───┬─── + · ╰───── here +───╯ +Error: assembly failed +``` + +#### Features +- A command line application for assembling [Little Computer 3 (LC-3)](https://en.wikipedia.org/wiki/Little_Computer_3) assembly programs +- A Rust library for parsing, analyzing, and assembling LC-3 programs + +### Command Line Application (CLI) + +The CLI is an LC-3 assembler that can be installed and used from your command line. + +#### Installation + +Run the following in your command line: + +(TODO) + +#### Usage + +Give the CLI a path to a file containing an LC-3 assembly program: + +(TODO: command) + +If it is valid, the CLI will assemble the program into LC-3 machine code +and store it in a new binary `.mem` file (in this case, `foo.mem`). +You can then use the [UTP TUI](https://github.com/ut-utp/tui/) to load the binary file into an LC-3 emulator and run it. + +If the program is invalid, the CLI will instead print error messages indicating what is wrong (to `stderr`). + +For more options and usage information, run using the `--help` option: + +(TODO: command) + +### Library + +The library provides Rust functions for assembling LC-3 programs. + +These functions are split into modules, most of which encapsulate typical [compiler phases](https://en.wikipedia.org/wiki/Compiler#Three-stage_compiler_structure). +These are intended for use in order: + +![A diagram indicating the order of data flow through the main modules: lex, parse, analyze, assemble, link, then layer.](https://raw.githubusercontent.com/ut-utp/assembler/master/assembler/docs/images/main_workflow.png) + +Together, the first three modules, `lex`, `parse`, and `analyze`, check that the input is valid LC-3 assembly +and parse it into a data structure (called `parse::File`, or "the syntax tree") which can be more easily assembled. +The last three modules, `assemble`, `link`, and `layer`, generate the machine code for the program and store it +as an LC-3 memory image. + +Each of these modules provides one main public function. You can use them individually, +or use the functions in the top-level module which already combine the steps as shown in the diagram above. + +For examples and more detailed information, see the API documentation for each function and module. + +#### Design + +Our goals when designing the library are, in order of priority: +1. *No "False Negatives"* -- Correctly assemble any valid LC-3 program. +2. *No "False Positives"* -- Reject any input which is not a valid LC-3 program. +3. *Maintainability* -- Provide developer documentation and flexible, debuggable abstractions. +4. *User Experience* -- Provide user documentation and high-quality error messages. +5. *Performance (Speed)* -- Run quickly. +6. *Performance (Memory)* -- Use little memory. + +Goals 1 and 2 ensure that assembly behaves as expected and provides a minimum level of feedback. + +Goal 3 is to make sure any bugs can be fixed for the foreseeable future; maintenance will likely fall to the TAs +of UT Austin's Electrical and Computer Engineering department, who may change each semester. + +Goal 4 is a priority because LC-3 assembly is an educational language. +We want to help students identify and correct their assembly errors in a way that reinforces *why* the errors occurred. +With this support, we hope to help students continue more quickly and confidently to debugging semantic errors. + +Goals 5 and 6 aim to make the tool accessible to a wide audience and provide a good experience, +no matter the power of their computers. Of course, assembly is a simple task for almost any PC today, +so these are our lowest priorities. + + ### Minimum Supported Rust Version (MSRV) This crate is currently guaranteed to compile on stable Rust 1.56.1 and newer. We offer no guarantees that this will remain true in future releases but do promise to always support (at minimum) the latest stable Rust version and to document changes to the MSRV in the [changelog](CHANGELOG.md). - -(TODO!) diff --git a/assembler/docs/example/example_output.txt b/assembler/docs/example/example_output.txt new file mode 100644 index 0000000..9dbbd1a --- /dev/null +++ b/assembler/docs/example/example_output.txt @@ -0,0 +1,15 @@ +Error: invalid operand + ╭─[.\example\example.asm:4:25] + │ + 4 │ ADD R2, R0, #R1 + · ─┬─ + · ╰─── here +───╯ +Error: reference to label RESULTS invalid: not previously defined + ╭─[.\example\example.asm:5:21] + │ + 5 │ ST R2, RESULTS + · ───┬─── + · ╰───── here +───╯ +Error: assembly failed \ No newline at end of file diff --git a/assembler/docs/id_arg.md b/assembler/docs/id_arg.md new file mode 100644 index 0000000..db74d34 --- /dev/null +++ b/assembler/docs/id_arg.md @@ -0,0 +1,5 @@ +`id` should be the [`SourceId`] of the source file containing the source `String`. +This can be obtained using [`id`]: `id(&std::path::PathBuf::from("./path/to/source.asm"))`. +If the input is not from a file, the `id` of any path (even `""`) will suffice. +This argument is only to improve error messages by indicating the source file. + diff --git a/assembler/docs/images/main_workflow.png b/assembler/docs/images/main_workflow.png new file mode 100644 index 0000000000000000000000000000000000000000..201fffb628aa1b6edb7989b9e11d6ae9a9f42bd8 GIT binary patch literal 40027 zcmXte1z3~c_c*1Xh?IgfD4`Nk8!e0&IY!4Qi7{XS14cK3AgLfor+|QTH;9BtNq4t^ zNJ;}~DgHvi!W3knPK3j+CtMfC)QxkNxhBKU`(h=2${9QGf+B?6B5A3=bCAfCWe zLwk&kqr1zq|Hj?m&Mx+jnE%KLi3x}b0RJ0+yILatgDTq!d)V0m;5KT)o+x28w6=$( z=YKe^o=))pa1f4;2o(GuR743c&j0wvFK7QhPLZnmC@ct!6IOw#iGnQ*#H}@4{zDft zaJI6-L4=jG1T8@lXcY}Z4;^uwzK5%`maDd?suTUPsmLLC1^^Hmil3+Cs%fVT09q;-i=xD^23BZGC3PDKn47S@ zgoX$XjNc5ce@!X9`&9bJvwbwReeXirfC2pr<><)ti& z1zPDjIVw9tG~m|Gq5znRD$dAIOc^SUa`tl4)KIq*ca*Ti&lQwF0Ud30j5VCS9Q438 zHX4?~NO2WiU3(D&K|?3Fu@=t2$W__P9)ks2qL7*h6?Ob1Fjh$i|IZkvX9&?#f~(jF zLR38!APQbi))HDk2+Tp$!9frRQ&dm`>N{#%LM*YKh6?z7SB9f>4b_zm+~r-d+JdgG z>SFdld24Y6oP@qO$jZi5&rMUy$jipi)7DZKW(yKF)B%X9xd@|`RY4dhFBL_&Awb&` zYlP4fHW1Xe)Y8z`RKinLw8rxyZtJ9p5%sdtl6L|s!hjGxK?N^SJ!h!8zKEfvo*obi zbJ21`Yal$7bv&TXFsLHL!4WCpqM~H&rh%Uh)UY*%*{Q1_9j$B}FnFRUH#kHWWF#-H zsfmOO;W04k4ldUAZVHMLifBb0Wova~eJc?RSi#d!Rm)LKRb5ry(#cs{M-gU;|8&6i z`hs2vRTmeeodKQ?aS=y|vOZefMI7KIXk_H1<>6vvrKh1I4%5<7!9X<~-6bSMG{G1E zekMd&)K=ZvNy$}M9;%7fQc%Y_Awiz1Xb@Nn>VgoF0HM6Z0P;9-D{WmXQ9F4FJZlR7 zG)3DA>Zs%9pbXQ}@zk?)R1)^WsX`#)o)SPuH;k=>n~|rAqBcNGMHC>WhF`|j7L7EL z(6++AimO?wN_arDmGN@RyBQdP0dRXfSQRD?^N?53b+MBdM8fRkt&vU+ieML*ftZ1c zf~PaYTKV670#>qez&{m5l(8xh8y6cLO&j-rJh@pSy#N}Dmd0Y1AbBq*uqH}aQ(4sA z%TiSjq2`LfV_2%&!LdR@Kr4Aqw5F<^AXrxkU@LB=>h7e6zrE!39dKfXDoXBR+6G=Q zZC6K(otvnxiw+v2qU;V8mk@Qrx{K>WV8S{IaFC)KQcT&(MFZ!ERD|Nh^#wHyG$8s$ zb})6QwvwQZjjE-DpsJ-Y2#9b+Icd15!Zkz{m2D-gJypF#D1v`Q!G(mtnp(oPc=DRM z+D1xR!p25WA!iqeAW%n51!)8G($R29j%A6>QKp$3^`pL_{!g#%b4MdJ7|6PMhKN3!1Uw+pC==Gt!6;_S1%C)E zkXr-471B*)n0ua$VSOB0C>|%(k;$F>Dj4~_qI((pT6IgA5$!Hj3z0$KQN`W)p)2PEV)2Cb+X^izRFvAu-?rsc?{Q1F)*F*6W zd1Eyr7;|3Figo{|e2AFu_a+#j7=rVepg`kW5riuS62ww4_WPs7b8D=f9C%O;wNPG&P3!Iw=VjTR#vRH;$%3l){mGz zyAphe3mC&J{$-JHMt*a6VrQ9Imv3>Hi<_|4x|(&r6}Eo7Wb&fqed4u-_KkC2oD4LA zZZoW@MHxJ+-P6hN_fVC4pFBo;R%bn9c7f^#p#|l5vqba#PnJ902<|JE23ph?iY=m$ zE3`V3=E5GldMo^CtFd+eelL%@7E?nBAhPY6=*@!wB_Yo%kD0Z%avvI>`9A84O0Z}% z0}0#*9iq>V)+HZ}5}kcArpI3tbh(~~TKBpi@3hAazwbR0sAY|ex7FylI}o*;vDb8P zPMRTRyCDA|2DD>%UelHSRr-dt3X_=HW4;t{@qC}VH;O;gxg-~tA-OH;o-z#$0ewB) zt0pSmr9R^U-pt=I%|9U;qP33ysF7qMLkZpiKQ(&W^2gawjUIHGFbCH1u0Wy8_>eq~ zR}VKS7lLb`j+54W9*=7ikmi+yee5^ay>pw{l!rVLn_aw3OKYruF!839<3Z(mRXmD& zLW21zX=g`}%7$aQ98wGe!`W(FN8@m9-rsj*RD_S^NW$9esOr`XVxze8-$7rmD39FR z`43J&{WQ9bKOHWT&Cg`681NsqJeVNw$Cq^YA@WM7jg-MT0~P(>0OLJE1z=ajFs2Gx z#S~>a@`^X$D;E)dT4x{uDrsg`ks~guE8>4k%Jq;1m|0>_`ElG!RB0tUl@f+aTKH~d14kKDZ`j34A>M$}wPBQjm z2C2|XST|}@F@nqvsQz!j&WR+Ih2$gq&|+V}SDt^&D24@;qcFJIBFi7#^0`x@|CY!M zq#m-dcB2Yl+NI5VDfU}!aVY%X+QW~X`uNLGo8SmKc#Y=&(R)SGlqhLGCHnjM|8RuZ zW2LMDgi;(2@$*UU^X;F` zl)dOqp2(cd#On$-$KBi2z4*04a_gz#;-&607k4F&Dmp(;T(AuvNvA&%!Ycv5#X+>h z;j}j^F(#ceB~?B?`+ViSEmcm)v+l}!JJXmS z;Ed6v_lGFKce2fXW_ayXWRY^yxVj1<-~K*Uw7uS+QvY=cZUb&_guUFP{yJka`plZ` z&>^{uPJ`8GJU#cMQ5kslx0b(J>7eGSSE@}4YPa<=X6SB>eD!MOU==0fn~~TXc9WZr z!2_y;y&v2|8^+?r?ulSpppQd;l)%!Q0mt`-aw4L0|Rlo=q>*K@ZY@Rt|hR<)h4hH#_1_t^Soi zcEW;SM*N#I`a!h~zk1)x@RtZ&eH_E9;9c&Pv7evRiDL{(eUmC~J&t82+89XF@9*b7uL)W1N&M>9 zwY#&c%PVQPN?*3eHmI<~q?YawyWn$PI7CZ33u$>@<*KCpjkDG#UFU~;7s>YA_QqE( zrctkK+c~aD$lH0`p&6!*_{?Ty0MdNmj_A7OW-rkcKbE?4m!|V3Q*IcCVJv9 z*Wma$nDk=o>~J61Q9skj@w{+AUXlG1+3LHfxMdY`k3^B;(p)|?A})1nG5lpw%! zdBNF3hX|=zPk-}3CoSLo`DTYt94S8Tr&0}+mwT}>IZIKeom_kzPdHN6*BSCCIG9ob z?#k?Fdb9cbmL2U3rdo#{fs>^mY7&F>3aWeLoGd>ig9ST+^INxf5%S>WKv{$W! z9+AuO!ednsi2QK-mQuE+SD;57!nv9J5tY*wAAxhvYAZ&3YPC)Hx}G~!pH!H-rR&Mg1aa8d~lPa9+tdZHqz=nxp7}wgi)OZ}# z$U-uZO{^I+`wc!-lY{=4epIL=h`TyJcChdM8Oi`8g_V-62K{bviee~xZIJi)Nk3P}W!s;y z^JV&cd)}oW!^znGBYOwO1!UpjGYGK3azko&+PM^x0%Vv-#17g=^ipZdU5X-bbJ_F5 zHire8R8C?29GNXG<_ZDpKaZpO27PV{uUmA{({;VE$!Als??=L|%de2u+Rco4@D)y- zP6$xV>}CrGfd6?g=AUWN@3-0~6lc~v&)geLu73Q6N3-DM#0___tOU4k?BL_4;-)is zu}M8dKCQhOUGzN0(lqZZIy}e7CX*a;wM;)2BijCvu|uVVY4-;Mgs(ptb>dJlOh+YP z^qf{!WH|2d4=HTw2DzI~TiFXI=Xm}`3L{jpwk)Cvgblo9940xc@ly0hVA`cCuqt*3 z%Us*u3sS4!!cm!2Lt;WHhQu^%g^HpZ;E|)_aNFuSoO}lG$-+y&S(2|FV|R%5uIfB` ztpJDt=7*tYW;{Jkj~J8)rtCzj$nU8$eP`OQ=#g^L3{8$>XFngQRFjjGJUO_&pyxv> zmzQKkfBsdG@wQXLYm#gvu#=Ad)@&6h$Wh%ca|L7fs+d zNf?H!lt*ya0$*{EI7rzOWjHyrffNnF!L{S3mQ= ze2j8STC%)&hMo!lqON%7go`(q+^Qe4wjLJ!w&%ze=ZwA(}NsTc+XbpJ&gMJ#w=~M*8A|g9Igu ztNHe5DlJS$0yN$SszWjLm)F5!%n2xfw>rzARbfkX>s55Lt_+Nrs6=t{ zn4w0OX38!du6rmrDaT0t6xe!lh5PeHR*%y6tS?J^TBGqKZEfDgz7w4ORL1iO)lVI% zy&2ks6QM~Xk$3q=YW8v&w~OR4rDILuh1rC36fA_DsP(!RI5lR{G!;IcO_t2zyGv<^ zxJOe%6%4zxpBhw2f4e41T{|^1nLF9Z-;0iR zi_>{)hQ5NHA*z0&(P-RCp~v)sWBSQK?p#(zRbyjg|2PZU!hYoC_KL&rW{wdrBr-yK zovd>?nt_>7G-%QL&d;F5*m7bl<;l)Tf}6+qn0K&3;iqq&tsTRE)#i#m?EbDsuCM&f zI|#jZt_?MzAnF=g^tPo(pRvB^x$ooO_72)gMuCL)Z;zQvK~}npioSlWxBOZ$`Zp*2 zDNe*-PPuYEk(};>@7zo-{2XDJ2aPc#P9_UTWN73@i#FkQ435(ft=479qW16YkRu!?Xadtlk~0| z@T`@N=TE=h;M=?-VfpU;zIiZFg(v3F_88%Rg4m9;@Xz*hBeowJIG!5xGq{>cm(_Kj zJ$QEfS!z!!<9syje1B$)CF7_6&OT;hVeafRbZdW4@McNs(%@Ie@PjY>l#kHSYez^U z(Y?;aE!MZRZ?TJY29vNUiPyq*HKWFbAJ)lBkej@JA-o+~?H~nKd96H|H@uO!SMjX| zJze~t%-o&$`_rT*;_3UczM!Kq>idgGglU8D5{O*wkvCDtr%V1i2YxPOr-T#oBi6m@^Fu3* z#|D)k&alm5ftls^l{OE&I*Vs6=N8gzOdYJ!EO~v(IhQayM$;vdttGw6`~9~V4l)G` zgwnEl*OjjP$ONTgVPc3swNM$g#ocq}8{JTs7SQEYsC3l1K&Z)L#&O(pOabTYJC2yA zRqL5rVyMZ?8pnmbv)3R3ybqV}m+knoTTR2T^>VCxJCW^1$M^D}dwa#=TIWjb=Bm#^ z(toAVV%xtTadqY1a>!DBR^PA0rfE>U(sCji|{w0^ojquY<_6LFc z4PHmX%FKPY)r`A2c#lRt%45{Nb8iE>nj0a(5}O$BjtXsp_giT~k*B6AZ4_ zkC=~F|L`2n+PPGv+%+Em2cD%ht^|Q~gPoFb;*U&e+4)g>c2Z<=&%FVk!%|N!r3_vboqmYp zEbElMRK4tuFE0i?2vr|63Utb`@4Fr7M+GzuE5n<%mBo|q?yX2uJF8wYYKo3e1!7gd zWH;YOA#v|eR;^OBTSL<&-196XkpcyYAEOLz_pAmI^*v#d0dG8;j(TW^CUA!&x*@|Z zZi(?ccop-~x3p+SA_Mv49e^5^&wV`A4}6{NcXM?*R=F^rshNJ6Rc_jHwdUV%7!VDq&V&qzn*coxmonc)q1(ky0Bw`|^3q90zGsB*g`= z>IN?SdW4Ts)=Uk>lO6;*8*vOuA4Z0Uf2`3Ev>+V>EH!UDH(92-D;gRL0u ztPm2eKgg2q>w-`Tjwib1to7LTbhgJ9b0h_Z&8a%yq9I{^?4#L@a(xeet~-K+MHe%_ z4%pdp(#^20LCvPryzJrL09U#z#-@xMCfEDDCqBPql}?>6;yaMXEmT`SGa+0rgK% zz}50Mw$EddMTP5p)N=*cz^#0wX~4NB`b*nelA$nATSt7K_Nt`1#>IN->%iKmKcUV5 zjA$oH`n~ge!qo0tcd9m~E9Ow4K2_eTHiG--e3(HO+H8t&3XoQDl0w)U6(D0VeO*n} zV>)@NKJ{fO?(t>VgGVCMd_F_<6Or0B?z33UpFw0~>P&PjC8cc-ZH466x? zaurli69^ZP$F^~&KP_^tqpGlnu7w%tYoQ(el;0}XQ;PZ_a7SK_?{q{U&(xQ`u>Mqu zk*AawwJNuwn-2aY6p4)w-Da89ny)<}J)5~QWWC!Y*dk@$qtJIKZ#=m+f%W_CzmfQz z)g4rP_jqpLhpb=h5#qD8-(8xv4_0g6^GAnsln>`Cg@goagM<8ROqUt?e!3B+8_29+ zq&UaZ-O9IuYn_~T-mv-E&3~59P^nCp^>5*L<_uXCvvQCWw6UfkS@w&wc%gR43MS%t z9U9&;8}r`at*icp?Q3L*w2VVWop08U2uV$s?a^l|E$y~mQ0ar$&Zewc_6ve|NR$9sU;{SJ*$bb{B_k%Zo_bUf?oCSg z0u%skvDC2Ho@SrQ#i%`{qE@+2$Wwz}o>pzz@Ax|MKIOc?yX(p8sQ{sssZi~e&XaIC z1@>2noNpQIi!J!R0hn(I00umkC}n;~{fL6-?DOD3L*)?D{gH!xE?_dYOjDA2amiyV zm0nGWWJeC;PZJ`($>n|S~Tbh6RDT#gPsYpMTCeWEo{ElVsFLr_rhTgo{d+>92 zBVc(WLL}-@2oKS5M1VJ+>!lRgP{rp3C9WqqaNQf1AJjB~YScZ=2wIn4uWfX<9>8l; z!>2l%H_Y9ZvR4%|G1Re%f8$(ol17bedB@i7y3{_{D4EAeG%`P+F1Q=mgsQ6h(1MtX zTxVuEbuBNu&p_J{C$gaI{`Qq-a@p#0$rJ!i%W_Rxs}N?GSH@SuX--#S4jcbhAgWbP+?<4OwVm;?=iaV*VWG6Etx9!GJkv(lwycL1dg0`~q_+W^qs;e+Mz0te zaf2sBxHody%J(8S`8OTEJU6*RLabwSeZ02)E2+{}GC4VUw9v|C;)np$uYy7jv37KvD4m+p4Mpe3Em!2STLsxD(2@36`Vf0xYvI_NYu6X4z8tvBIfW)5{(|DjDxyl~3L z6iY$YW>7XKBePv!vSCV5I=a#@a1cnSkjhV4RVWc7TQdLhx<>nwS5hkKTWHaSQtb-` zhuTfAm=rI0R2gc#_&RU8+Pk*A%gd`_?8argHCgzP;llh&1-jU^kUdN=_4domaEDf8 zdy=znL#j6}#hFQ_qWGJnB@niH`a+7GC<(+~-aOoyZ?T(&UY}zQKimW-?=>5m00-@E zB|Ra?j5*mKGZ?%)I~*%&Zr)b<^1HF2;fK(u6BlSH*&}Es9s44j38mWTketb%=P1s3 zJH+Sg5B%8oODC4EdN7Gdu;pb5~Ymswtp1Xc24tC`^{ zruqoiBnEyFQ%{$2me|{T_n&%S&Sm%AqeugkKRTl@_*gGTf&hRV-g`h3abdfJBlIP^ zGqnu0R&mi56qJ-w2{8%qO_}cZm2CW z(T|r@!sQr9m{;W4$SEe8>%x~6zc5w=cKwpg?Ojh{Pq7??JX8ceQfTGQ`;?5j%(`3m z`PEii2-kSjl^$i$EkorCo&92#kx+FbsQnspR<~|;l62N3x;pDln~`AsM~W7)&h>^W z5{suEq6E&=r|Fyw_wG06!)&VxG|e1K-LR@*w{Q>{8|tO&|KAG`gF4yytJR2AUcOlC z!}0ADN5ct=g0Bh{Viq2Zci^4hIcuhBCP_P&X1)~bXXKMGJX^W!{(XQzICO$&BO)AYXkSoXMpA^17ixg$bSplaiT z%>Chp@<-uIVM=c*Zho(@70E1Af3Gi*jH_C5){jUyonZn~R|@fz!#?jOxC;zwZ@gSt zgk}-GuDpfkO2s^BkiP3>oxn!JjMjjW zG)Db>RDv?q!&&x$ubp2U`V#oTtfnWYv3xh zL|`^f_>>$Wh+#=4(&18sJ#ekBwS;&Xk?L;oCq438VflRUL0y-x9;MT*a4AT7Dw+ks zYrOB6L33mdI~a9X3gdJ9kkj*p$Bp5&r+ar!gejAonfCND*Vn(GkNMX-&thK591w-3 zLK*$;_TKzzmM$`L$*Gq+G66dF@=zC%JMIxNigiBuwQ^0qY<160RHR)}k=>KzM-gXT zX`FbAyX!w|+IFT3mW?@qe_getgc3D3%oL&~@uL15b({yFQ>}M!ms%f{fIH_>5pn>B-u6#tWP|u=v<$)4^8? zK%TaDTRu1^a=G+DNSV8D$=a7;Yd*V7|x^d zNpr(g_xWQjIznqx{0{!D{P10^kH}bo{nwlDVU%M$;K$P!)CI~UzkhGRbH447yC(^p z`0Zf1btUu4dP5T+YCOqS)PZ-Y%4Y4{y!37X6rR3Ury}E@rV7$SJ?10n;AC1!s672f zuC7v}fWJQY$T-!2)?Ly1vcpWK(PKA%3SW@;xvhkrN6rr~hF*)MlD`O_Uq^F1&`d#v z;3ci2D$~hWsYVUWZ16x0l;?ePA;V1a!fAk#XE9l9GWGN?%=G;0UIkvC>Zq_Z?8EiX zXq;5iDjD!1JIJ*~OtBwJghz?f^W%#=0;UD{B9FmWK_h@KS!BV)&35VNyRXWlOJCn8 z&`@@RHWLGiCe>f$Mo-PV&4x3^k0Zy$eojp^NT7=CGuAzZ9P%ptl7TTE6-L$>8Bi(Y_?QX%UFCFtq1603 zF0p^fq6%%5epA-ya++;Fo)T7aGw#)lJpMULeVgM*sp1ZyA`nDEBj09|lk^cnBt=Hn z$Y*h#d%o-a^2@brcl7I5uWGZ{Gkb04YR5nOUvEA+roT7|*b5+nv;9Mjn)~OJjiYmtnzVj}-J8+5D!jJw5oDq?M4mHauj?(X#@JyR zL5~KeWzCCPE{DFZ$Cf3Du<4!7xwm$ZXWpuMQhIwTng6t4m#c2dA_-h<76nN$xT%*t zw(vswI=Wv;*HjMlm6PtIwJzs?^FRCCGL4kspSn9x^Um#Z_m;24Q~8(hi!$+aFsIG!6BMG2*Sr+3pBwH8d>;il7P5rqkI3g)|nLRi2yaI4gdVK2SRz` znESVw-z*M%iF!YgJbDa!XmU?KO~>4~sF)wJw{LnD#Bi{ETQ$za$1~^Ns{?|b0)TV=D7p2H93Ne4;_Gcev#2wLa!h ze|Y!qZ`HHyr2->nDH0g=6U{C15j;DMn^dq=9m=dZ7a;>OAA)QEE;;aebEDyHK9N_EM%rmBRn8C1{fU&4o_z zMPvel{($!yO-`u-V>f!HR>}mI#Q90G<<-=t*$hO7*LMjgJ&Y3+?FI8Do}+qfKX9i& zlzO@nLlRF@k@mS(3E|Hi|E$dQ^(+1mDLjHIAgceMuswyos?n&-SJTzmK%t+Hx0O^r zE5zv#PW|PNcBqQq7n1$G1@ph-Y~12gFX??!tjrg@A9!T@k@Fg_EXfL#q_=M~Q}HD| zujafQ4}t9MCeNasf0xzpwaD#s^1R_tc9;HwkDNGULK%|lu5i19>^jo-n#6Lui!5{o zn9PN;et8}-@|tRZ_NTx2eCIfxpP!ekQ&+#&9vg+tX@3|EJ(e+#7MLLu-=FG%FiX=( z_xn8wLzyat09WTex3_cT$fY1ei|`5YI|^D_x&HYU26OqRZnS;3zYjhY@U0A^b7dU> z>p=%2@bR1g^x2Zpt*4gS=~rFCuw|-<607K%asTL>BM*SIG%8s{xPG*fZ8f*=^ps6> zJXkvxwgm!9M=_BTD>&_BrYoq3L;{WjYVNa62s8U%)NPekR_ZnvK4kFP9JQV)iaB=b za*FWEy0|q~tsPgf-{OdmRj*%-w`!XSSQ3wT9&T*8oofuQ+eS^$LO(9BNTiX|x+4w-2~eA(D)+n=LJ*d{fQ! zx>m6mz)nfj_0~62so&Fd#qYAt{$hvGovJ57My61`GzJvWWV0sOG|Dbzz50$~;m1G? zH-=y<%jZb;@9yJsQ!LYbK%7|Rs|Zn{YV2kEmy5DcH*Yn zP6oVBeGs#+U}u22oraC7ppc&}zg|w|__P}6IaMFKBES7f%`wUx$|9g-p3K+?4QBdc zCMywKoQc{S+a1V|vS^;pe;7A9!RfKJ9HSvob^NO~12C=4cPEm1g&;gkSiw764+JD$ zVAGQEiqvu)aCF+)%J(wdiU=CrmtJix|3#CeJD~jr?J_+&wp7FzSDD8bFZ$rraC%zw zj=gj>vy4`u*DpKDyFNj&E!>qKpt!<5wo66dn>y<|OSMbL3B8fO2e5FdJqaV-8m>uE zPz;uJ+D{)BJTzRB(92Ok`lCr=r`_)_nW&8PGHC)kCYErWc6a8{@)B{k)uRX<+Koyk zpSng|nV&R@gSObH*%%>%16?1p)5*bY;`YG`X5oQ7v@g=1rr^W>FJ4{G$lbvf3+CuM#ez1gb?wX*ZDYgb zqc~p)Q_@dw$mv2%qvieEq(c<4JZ;vxP;CNNl63gIv6c=dwZrX$MD7U%!2aicvNQXA zQ+7(zN6Jz*?N{3{q*Z@^iqynY8R-uzcg*=n75vVFgz1r*+v^N~5}zW&QlLR%{p@~b zKp;zx^TkPGmGfmyd#5HJ(VdRM%Q@A!NPSzfzGlJk+UVvF-rpVFRkZN$de&e2IGt*Ju7yw{Yi&)3>N|F zvD}(#FFEVV9wkw+rAk?>V9IcOQjX^Q&Xw5Te709q#AoS0SK9RlOGG0NftA4ley=&E zJWg-js5~QDo3pzg=%c_(fF)ex)%Z2X%|ynKmM3Xw=T6$Q7J0F{|MJ-kbXuz?tkQ!J zyAilhRb6rTY>adw{^Fq@&DwQ7UZGKq|#*OGQw#6qg(hl)V z@vNp+R)KB(#7Zdel2VJK5a-zskp;m$W}cSi_fgUMJ&5kY@4FVK^6u^+6!{;93hW$C zSTE-npycN2=7LA$NSW2MGd61>A>d+D=qe>PZt+El`sI%FnuTuvNPdxsq`>Pws(^%t zdwlIu^pj#KsgIno>))U4#3qR`1Qr?e+{TE~9!=N``IGh14}1;&^_TE;Hdq8vs62<2 z$y2>%*`*;)B0|0*B{~*xUyR*iTJYdj2EKexuYPyDZ^~uEw66;r^8J?k;#wbD!9Jz9 zK;3y$s`Z@EOK!5!je8Sg1(7qkHYn{l$OuVyl_P!r=HJue$;)$+lho9pKHTSu%N^0)U zqN)tdJ-K^v$I|e-KsSOpx@hTlb${0e4wxE8d;7 zX0M1FmS#iY%f|nF{G!%pbg@Oh=lrGRKh!((3*Lm*f&-BqK>!h@Zj>C^1?3Tw)!F(s zV7fuxKcqWK?q$}a7phs4Y7@7w%+*{8mS!%MdpS|ItE=`z+m+jZPb(gz)L90!l6(t7zu z4DQB%EzeOt#@=mA<|WLYHtLAb7_dB0$!nK5JbW zT^_)k`6C3ixZO_v#c3%xjDi?&tutEcN{t|B2X-|Ft-vL*79re zO|4!eovY4|Lo1GR?{43TtS&K*^~{-}1qO+Gr&ioCe) zsLSP4S1l&Nc1W}@P^TpU6w;*p2cBBgefpP-Sn19XoAA_`_%5alj7|;_7Gp3e`NYYf zsg=0w7WXKS);}rhVKyyi;>Wm1;uXbKC#LU>GVklx20P^C*rs&2i`4o!k{mtX!o(&| zJ^=Bp#C*4eMWESxv+WxKy`=BEkire7iRP!|B_{Xp&R0+82I`a=gcbGqmEJLvAMWEq zZw%_!daXpYo(Pk2sXpGYa&ho3=bh+qkr?ac6Coa@q`6A&sjf);m~0e0=Ox4bPBG}p zcF)wXcqMdp6Hcg>mbxaYcW%bw{*I1Ya@3EE)}c?(lLP60 zdgJ%ge@pSEc4UhayA#s)GakkM(swn%R-^Zj8!s+i8$*d&pBk6|pJn@Ll58Y46<&L} zk<$3|cy|nr4bU!h@e^?G%P?J4SPtonmn<83i<8nHB)^C+RlK#s!z4SB7_b?!wmKLm z)sBJtFPtF!FK#t9>=3mh%-69Xts%bjP0aqP=U~fqEbLp3TK14`6o z=tAM!ceI(gh@!%Z@ZO!T>0bXYZL9ANitL;u&u5tUa|b5>Mc^DyVjG^^zPIA7sXN2r z5HqX#rW4*Y4Z2V`YaMWDp|jnXF{9(GS-sjkU-R94_#}DOry>w)&jm29bO5y!r}Pd<6FusP3JaPP%5msN~aarR3x zW`au#STWCQfhHaCZom56dXv$Q3zrbUQp+8O?a|K;=>rY0DyMhnU$84pBjq`;7X#{P z8pAPnEo#(BvK86gbrA1o#*AXlNl)aQ4Peu$F5aMZrpa!u&!x~n%%R!_=(PftBj(lMX@ z#NdJkoWk9o;Qe>(KDP-wkLQ4 zR)-b1>3+kZ3uVt`C#7G(_~ij(<%`i0v&PZ7#%`tG+*KD99PT`&!TOyXqKrfXU$aq$ zaf=Y2tNe+vsu!BN5s7@-{teb|F6=a%dw&LXqI{8al2>_WAl&ukg~M!JqXPXx-7t-5 zeimgI1x+9rM6h&giiuyfZ(Y?_Yb4+Qg0*@D3K;(Vos-Bapk(Crp{(qAPtzF7Hk^)k z&MwR9d)L3uqVCjDSc%QnAi+?4T{OOs5^Q`ph|E3Iw#9LA8W;si8EoX8k0%VAN@;uEesnKT<^pCwy9@A zSx0tCZ$3ymua-$(W>CJdE^8nJv;LVF>U$eJ_>}0+c8~(_PE)I(=XaZ9G5^E7^UKv| zOHXqnH5ISIWwOoe5=Vn>k&k{-Pn2h36amJ1)Y{ zzm!|Au!ws{JRp?*Hrs?%fTW6R&;6Z>a$@DQn^KfSpi;>cqXOdCV~0F}J~~leUU6u) zm(#ybgjdwldIz|@d58e)0dJI=Yb}KvUmiAxGujFe>mOfAc8m8hi4zL%WDpHr-W4cx z(&2RH8El)?ReZdt+~$_^jkWTiCzOeIeC<2miJ&ERus>r#@9KPVH9zA#=dCGv38sb% zmn2mp(TR|A-k*v^=`=o-r!EdanI;V9{TKxFHr3%$ytY2=Zr}KxnjQQ5ZK*%}f4seC zSQOnBC9Hr5lA0{ELX!~zNj6Dx&N(STuu0Nn0Y%9{vSfuuGDsGh3<^k&BAF&Dppqm= zkUXc=JNLc!nR(`$`SJbIhptoIRdwp@wbxpEACDfhjGKYsAz{*7JM z@SGs(RrJC7`+ldnllrNKJp~{GI4w;TUuH}z)O;|M-;q)7>bO z9Kd_lYT{czdo3T0e{lMd;-P7>cA@TDEi%hGVLcJ;#mx5*FFq0@{bGx+L`Tb%hO2d> z+Jv&b#&LS|mekp#cr?|?_S1~xv{mD&IPtwvCGL(o+n-+tOs|$4Gy6I}= z$41T8>>_V!yraj@EWY|Yk}{s}$#`kd^PEL+oS@? zdiDu}Dcs-Rqq_xGuZ@k{mBZuXHgGEH11&GF+Nmo0gu_yxyhv=fQx*a1m;1Wi$eXTW zvggkSoKH_ry`g8Mr!RkAYnH9WyqT6i7OAMJt{ywom{DwA3lL+Kr`Z>lyOm`bNTVu$GvDHdX z)Nm*TvkH5PaYH{$*9L8pte)-k-3o(Ej?(WoAH)-NA9{ z|4Q>32072t|9+>3Qns&cG5J#}rE{A&-BZm~NB5t^lqXIT-)eSucSVvT&ju=V_{y~U z=V343wxV5muIpj;2qdcA@>WNZ;~m*Pe>)4B`eHw^P5h?dcu6Xo^t-#?Ckw-}Y(iL> z5+oOQ?Z?WSjE}vsoE+eBLNsGqjO-n;RJGm2y@n3sc zXr-ykI5st$#N~afb0MYl@u=PQeF^4I-2|+tXnb^lLej-$`+Jt`mI$8Bl?y^H3KnY5YLujiSH4+k=g44eE z&_3L)#b4?$x7luPzU`f2k^mm%cgU9cYSj6_I`aK!_?t)5^c4s3f`m9-^YFw^^|F@_ z4^~-m+Ov1hQ}3l+9Z=j*>|?nyffm$TJL)6-?$esgJ(L`OMpBV6e)&>`+Gn*0iHEI%`CC=M!62!DNLi<97{pr}ZvSE5LB} zqWw`iYq^pf_bm~sbo`-adjqj4$hIP-&MSaq)sK|s-4hKU;TKeZk1H)lSATXL<$+z@8JBO`Nt)OlxMdd=F(v~PYb1XX^tBiE9_iMWl zEq>Lm13oJ}@w2j}ypO-;=Og?|)W_o(8Y{#a%k`ZiMwvh9yM$cTVXCbKMG&@6}vlj|VKNkXnnaWP&a{`BNFOw^u@HW zqSpJV+1PM&SYo}=CiQN4A2MreCr*e0Hu@yf>*>pnj*ktLl%Bgk`^m}Mc=jn)vbDb2 z;JS#5baid*i|p#D;!$R~JP-b_z)etGX*+1h%GZ<6{K`0tKnM?~kR8hTVY%<+)85j; zLY09Gsd*L#h>nHDd;tNAI$EVowJ?$IVbYk^S>6;2W;6o6Mj3D<0~m~K|YcD`VI8~XgY#_O^&-nV9>bXTu-uPI5k(4*IK(&o_>Jms*pLSsLH zkFb;>zk2WTGQ+p#X8xBDRBp+x7lfhrD*7Xm61m{*Aa^T4n0xX*&<=ZLc4uFOZo?1s%D=V;<-(Td!d+Va12+yVVbqRwz$Ro1U3S zfb#8h=tpM*yrkN2A~)JKvSvb=BkQtUhR&@Z#*-FlT;06~Ozj*3iWO7`$!j1W|@4*CZAHMZT1uXHP)}lm;3CbjPkLJX%jG-I&BPYD>LWk{#3TWwnOEfLw%EDuf5^tJ%=;bc;u>@iHneYCJmmh z8XR_|&c7Y3g+Du?&Qu6l4DZdvF0A8WQMM=%DPNCoLa7t0QFx{r7u~Jfzgn@V=*xW) z+3w6w-Iq@kkz|m{Dvd5&9!HolYg-WHCL=FYT9~K^__sTpR8SvsYaNWmN7qrX!C6&j zW8O*+F-Uli;w^NTeSLuecYQYC>~Y+z1sj5r_08zduGi~>%GnUnhz_Lh$s!{P5t-?R z-leN7yv5cs1PKsuo%fTe0<~5-2;zK8Vj^C1Uw=Wk%w%dxhKnHGPUjTuG4AxD>4Y zdM8965A5NRs!VOrm(2o5!)31mm+o znQaNXjwN*@#N7P4h~h z-YmK2R*WLzQ8WP^k+dkeHNz5P^1Di`s85*)IeC~T9t9g}ZQ?DImVQn>L*l9DdXVk& zA(+dTc0%`ZdQwnPohd4MswavGBF?ln)(L5YI`P;uWb_Slm~5@&6^RFBbTH*Tud z+2VV20#f#QBFNSzAM{B%!BT@RlNJ5^Hu!v1l2Szl{&|q@ z9xIM3Q|uM=hx;-j4&x>KZybJ1RY&rt)fq|?oWc}?>f4@(ziQ-L6smd5ih{65YCr;l zA<86}?qB3>CXJq@qK_Qr-aUEvwLfLG>G;QDWCkHmY^Kxpe~7?TA5$>(?zZ3g>HR?9 zq2j@bki{#Khj}1a3`ysP{FxhFJ_j~Rs?_U&PNeIeYc3y{Fu>kW0vj%Gw$C25@`uplHR?wH zwf&9h`T+yxO3?;QdrTCm@VM%Q05`cPMFbt$b4>Qe+NbHVqB>L7`d?I0VW{+M3WDBz zlX$vcFx%(_y)-^Dl7mf~KoVk;p6yD76VuQ#hQZi(t8(XVgHo~2pDp#?6c_ikK|JvB z@u}d~eZGWqjDphnsGYXRvYO%AX_@o#@1WD{C%58#2sX8Ka3*I-rU#u_@#;b3q*l9}Uc zL!?9h55|q;qXBa=&*tnp8G4w+ieBQg3fBb@0!UW34dtby9ltVtcwA+owO$0WcP~hE zz8v&?yBT(%wF4f0dRcTw9(at@EuYoa?w;IVNwH1uaksn_ zZG^a~K$^EyV_oS=5P>0JqG5rK9&`X?}_<0~9&?1)ez&bC6FcKe z5qeBK0h$nKnC9!Ew;Quh_HxatHjKQNyE|)CPvlM3g_|jq&hxmT&DB(h_jqw2+)87> zHw1xuDA&g>&J~DAcwPVSu}Yr_Jf3)gEHu(B=~bAq7zlF4ycAH1NNHV{tWgKq8viqHun_14309Cz7a`O1n}xb|ZiL+J{Ukg7|UZbs1gD0@N^Go|`?x*Os> zrI~AFU#e7F^CoRmC;sw>2cn18iN*a+@KvbJT$Q7`JBc<_lNoxCN8*B(V#J_41dKL0 z)^ZZDNHo7zB3c%G<5FVooe7lRyzgNC@PWyyrOZ%Ba;XU`2I0^|e|Jii8)bC?v(lSq z&ehM=lKnW&HB^7y?Ut5}M;ddj6hHIYqCMWB3Ot&x&qJW|Zg;*WY=JLK+6II*gy$-~ zmP@?G;((}8?ALr3EV16dK0l9UZi7P)=KY!j z0~eD>@tkJrvdT>Bt>CZQaX1~Xm+yC}n%G^kKaID{~4(C7?1m~kfyXAigp@q7tOdm(wG3}E`L_57Pt zGO%4u3=CwzS+H0U3{3GSh;eU@5s>C$xR0LVMQub!;r0u@y-n zbH)`^)RS@`;=h&_79P#m7B!E(ztJwN(ZEyxCkzkGgD^}_b|}WU{iP#vz_kxcX4 zM-*0wq)GYRm4N2B1p|uj-<=(;eKjj=drtOzoV9-($10)U!GuJTFux zghdOcc0@GX+An#{t~lQRj}rnjQ!0zu_N})^vKMaoevwx{fsO<37?BF_<$Y0t>7cWA z!u(nn*ZJ2ENgCgaRNS|a&m01iMd;{Cn9GS}g52bWg2*Gd240vy*~NI0m+}aLM3&3Q zvWTjKHyOSS%yhlgMmj9q01inrgDFK3@Sb0cN0AhkQhKkp=%u;0%KPIVjjL)FN4g6B zu}H4Jv>Hgv-0rT*aY~62^sLXlzZV@9RdAW{nf7bYCFjeBcqCV7#HDcQRcr$1q`ImE zW4eq&I5nSoy2FQFd(GL$uosLLUF2d#9hpF{sVp){qwU{`zHn?eHpPnSU8FOvUDuyn zUcj!8t1q9_A~9Cn%-A+h2yh7+?e60srUAa$#@k*>gd-s)EK3^+O?-hD;LgnGghv4O^ zre|Q-KMJA_UJ&(ESM=KtX?ptf>44F&d2HGZW4&|&8A2TR%AMU$a_mleD;=*i@{}vC zkt;I-c);wA6GBQsE^q;G+FHWje&CUZ2ra%fib8HleDsk~$6X49Z!f*rKc(%(%m*bR z;{%aFK|x=QzY3Y<>1N1?4N9Hg(aRVSvP=vCjibO%#;JYtM#;m&WBCJ*E6qLEBc3GP zJ+M@-GVOWYzuy9xPSBFH59?}@782Rh4a3W&qFNJiS&){QC794q(0ExX|4Y|afe#HK zFCYR4cw7jFSZB&t_J#sHZ#9V-dctlPG4K7s8NdvsE5m7 z;ypaJeFBg^HbL*~(Ck*2(V=*L!xjU|Tw{=*$UK!unUk7-MK<#pzBq5q#qj40Y57&% z{sYE6cag6_?=9%X_~?+(>)&4@{M`^itIMkxf3+EVK~v?k+FX^2`wgmiAZJR)-0HPu0d*9OS^g>a}_KEY~XERt&)}& zP|n%ECq@`fsZ#srwGgm_tAc3{10anzsvAOpy4LD+3ms5t2`=Ocn7S@H@}bg`OAybe zaU^LZlotVv`%E837V!J4zsLnjkb^&A5#4|05Et*lO>wvXW)RPm%Q?X5>@Y4Q|9vv- zNXft1$KQm62cX6O`XShn?a%-GjBTb5=^#hXv~If%p~Lxe5pWl{I!I-(BY*w91WDFO zN^sp($U1yxIT+H7pz(b-{>M;|H2ogQ_-ACtbG)a3r1VcFnUF>h1pmHcm=+A*-;rPb zj{FsSUkki>Sg0CC{O8h{p(223k%x&VN1x-Vanc{}Fy^F<&P;WQ^|=mQCcUP;v9S>z zrXNY1G~V&5)_K0{W4gpWx2WI0mrX!sJ&{0&6Q{5WnYo*M%i_n4%KUux+kR)yHoqf8 zmjy_IA0%kdk89@z>lBwHU9-LZJGU#nS}qBGSQ1ODD%#$`L9fzgKoNS;yW5G!J7Vchlzjx{wmzVyd8RjC~`(pwBv;-@kwVbu&GkVe{z?aX!9< z9{+}}7mS0&9H%jiQs!Am;1WdhK<|~o+^%V4Bz0e*16)a|=jNPa3HL6xW|1W)w%Y%O z$WeHav1p-@=HbiZ_dC|BiNyby`qEs0Inn>ioa}UWFrXa|!**yoVrrx*w9!@}-QP0N zvQ#_JFd|xI)&lbz!t_g6_Jm2E^GDyyTTsz;q8)?jH=J|2dt1|FzFW8>)ZO|7&wk9* z+i(ZIfDlVgBLqQnne1hG-~K+pnM5=Y|9*j>r;cr@;1K~9akU%27pk??*4Fmq!S=li zJ3=@rTwR>Q#GkuC`Zvlzo0;x$eyze5W$Laa@shZ@0sLC9Gc??&Y${34}YL~LQ& zpA4@iNBb!vm||)Mb@vcaEfiSH`hGHzM=ACaZEjT{cXk~&iu$HuLG#&Kv!Sr_oc@E( zC>@LAsR^7e-Ju9Aovit@jd??GcW2pPXN z9yu>gu6Ll&sB%U4JJ%i9k=b{ZMRKq6LKf(S1wZC}4&2FYD)sCQcx5RAUgynfB+~0D zB35L`jtA;DuidZof?=TX;;EKI&PwI0lC#G#r%wjcCDN~TSiz;vwqF=jxS^F7!!Rd} zeg~VQ*fs*y)_pX4DTB`xeYQSa{<^8`PeP+h9C$I(R3_4~$|t$l z_9p4HZZP*RZWE8}DCw9pWR=zOy66oL+>0;2W%$k2z29P#oIW?)5q28&n)fj}o!dN3 zKY;9zoo|t?4kk+$03D_SV0U4| zln|4dEk(%sU0lUGu6{ouwwXj{6n%E`^A`HhtWsC&sNzAIWgiz=P7V{Q+*;--vsUU% zn!%1dXNR5&{OkZJOJ^Shw|aBuIiZXwE&WmJ7jMv7ZImP9nblcPV8uDler;mHyoJx6 z0ZW79Ht)Tfl1vT1<6ovLUc*Lt%R#--`O#RahR*VRkH_&wZE@p|hCX94ES=K44Q-AS z_K>N#oI5v72rH*q%DZ=5Xx5ZqoHd_%@kYxO4k3Q^MVa-@Q-|KS#(9iSv@3g*8NRe8 zS5Jy*;Rtts;aK3?Y~!v@Qbk|%PvWfPzW6%0#MkSAu>e)y)!kY~n?U~zWGlT|{6>op zKJC`|k;Qpm7(iNHug=HI%p6Lf5Y0~p;nplPM5{!Y$@xQ-7T!w0JHONSi~Hcbf*%)? z(H=-~|5LigL!Ita_8DdPMfKpvEDpcPZxk~Or&li~;L5Y@+OSA^q9df2Z;_ZmikFngm( zEMJA^K*rPCOCtPJnqz{Lw%=X9kt5_IgCqYX%6a*cIUr93^PG(I8Z^X=1JzWEGCWJBCp1iY9eY?L4UaAnEwBD# znHS(VY^Sqg7NwX1Y*qp1`zNnAF!%oHK+O%%1~I~CVV& zgt$D+7qEp;IV05%x>-7TW}dV13L!{8YW$oa7Z;=|_>lGa zW{dYHJAIdwz+V_t`RVb>RaidSn*esxu86i&4oyg{30K6kQh>RlKiIjJPe>fhi-K@DC(DPB=*1I35WT=B?c0VMgSa;KQ2?@>jThXzT~&0oQK5N>BB$h z!LJ&xVW!S;REi+uL1jd=0OiHYAUQLVwE?gbVN=A(NzN(lk7!ef!wOv~ zDO^`n!M~=s7zLni&DQ5E{ef-&J{k+`V$HxYnAhT}n!n*D0w3f`Hu({O6u$xK|9?Zd z#{silaEXYffKJc`q38sDd)NgppRe@FqBwp(h5d4T!CPGf)H4)Muf@}i+j}QJ2aC-b zJqx8y+N9L$cd1^0Y(NH!OJ$45TjEBvBoA|$Xk2qnN2E>@a0pAy-;0f*^+W)wb|cVN zfCI5w2NY}M!)LC_$;o|n_SL$UZAv!r*1`z8a45m|Nr-P=F1u~|%L^DHet%rY@TK{d zYMg@sLN1vdM}@fsFMxH(c3wUc4sKxe=dLNBS)jwok)8^8vWnOqz?n#1tWG&(*_RwH zJqjLOw&JUe=b<3$_?ypzJG~xT*){~m(eb#ID1lC<;6a^S(+<1>k`H|Xsz~wxD)6Do z=UnaF``AZGCl6}`Cqj#Rk#LCi`7wv&OyE|qd{=Q>J!g3BogSR zC~eX=SzvkyjXtt{TF#0>4l?FBBf*epiSrbCi5_;n5e3U|X$EXI6V?xS=$?Lk9c^Z9 zkQ{|Z{)3{91!(~~68!c?ccq^2--*M&kN%y`{QY5xX}?kbKgeusM)7y(OZ=X}*Wzyg z*p24I;-pbPjSRyK?F1IiTE-?O-T;9@2R#eKisDV5&8Jvz9JaMDnMSBCN!~E}L&{I3 zm}@#+G1v8Ic}=zd6kp9n0X9dMApn@*3J59LUxkP{&)unZ`7C=I&`vwl4%{;6Lh=^; zw@vmV?0a7#Z+UY*1E}ZWm-cXvpesH3y@fOXL0q9k-SZpH&yK07uDyK(bNSr~x0l-E zXxILyw9C$tO11kp&FV)h!^)RfG4fZOXN7rtjqw{?7Zt}wM)ZWis;)1RRBkg~Vv-XY z8v5JbwvMU1S<+*r!h#i61iiqdv-y#Yc2kC{3_kKa_WSnQYjp92loS4icyc0J1cKE? z`-=_Qj~E@y#p8vo;|D;E{p~Hkd0Cp+O)uYXUiv`AH8Ss=3SQw*Lm{X-f zJ$ftA3@xS7F_5#$GgRi9v4f@BkK`A!sU>6e7lfCBaNQCXFZ`1oBWY@Rc(sYsh|{j! zfz|Wp{5VVBU-2Z^%u`-?R9dvMcDzd)xgg>3}^t+NdSM_ymvWQDbst|%JN7D#?dSUubTgu>F3oaRH zK0y9@YsQ>MXG2ols`WF?sf&5EF_H^34d6iwGW2jJOjc;|Q#}d6?zDZ6>;KXP@85QtYc?;b?37s`RBF_!3hH3YlS=6BVV!9@2 zBDx_|ZwsqTwb#2dR%mE2(jWa!y8Qa5Q{T*}z3w2Gw0N6*2t?jMhP7n*-}HurCLtLH7*6S4P0rjF6b$lqd9- zXW_pXPb)kXIJI0ypnN75-ibJKFkLaZj56FngOJE5$UinnL!frvw^0=J{iFD%kP@Z- z!?8?@c#YfNE4j&)X#o}ZXh6wKC8(p?vJKyTz2&;sOq|z+xm=ZmBHt7*P(gG7ruSF&|#vC&b~6+g`WZP+XdLg=$H0<_nYK_=-AMZ&g~_`c3%`>G4>0~M!OG@OmZ|mRyE|jL79aw6la_l_l}Ob+j=fehfuvf)k37a3 zYOj+H`r`q?l1ma0$^pFqb1Jb)4uTMUw7sY^`SM(t^L(t>=$i?>uKA3eWr9YO^=tzFnsBlvTXOVl zek`o%=YegP=LsRsu*Gm+6e3}lxZIC9mI04G9XTvWy~jb=lYjO#k%bv22jEA+yGzDz zxxt)3Ln^>F1XlrqAQ2qfCQC$6N__>kuUu+?X71YNlP)o1MnAB+5n%oOKz_N{V}k`V zE(Tyu!2mF+qhSWF3QD>v;F~~@f-zt7xy5l3kZ3_8(Dq6mD;PcuhSvGSYv5{0KG(#1 zh8@HDe>dj4;v#(*5jeN0awQ{ktSslD8ZRu*`Z7C zfa^1bU|G)S&f5@CaBsSqY%a?Kr5@4y+d6i!25TLIes%4>&?-44DkyjrX6NVEy!!n+ zU)Mb%#p)=M4afN=v)2+&Yq*TToByGsRX($fyqg#!Z4VUG8NJ8Z+Ky~j1jM{|3xHx! z<~_Wcge=0(R=y-PWHij%lTVRo97-UBkCVcL`m26d1C%2uh^=QnJ2^~GVi^h9^xB*& z1)*%f=xU}ZoF8L2lqTvv3Y7Mr+|b6kg@w$A_Vzc7$z>n^$^xaWdwY8~8j;%2;kUat zJUZ_F?Bm{0u>ewo5oq-^b+PX*Vi@sr~b@a2#1*xV~Qf=iVbQd1& zs^^|44~xd0qCLK1kv$2KlN5IXv+o9+S@BwJIM`@f>rZ~9HS zS0II$_)a2;Z<%^-O_$kTUf!^;K7P5528}U?6>sz@CVFen`}Qf8h(L&WtH^58A!$_? zpA>=WbYN0#=+O2Bp($&Q5Cj_1Y6+11he_WdK!hSsHZkqETFdY8I$XHkzCMc|W4}z0 z@>~UOSDZS!wwt3$7%sgCi={(FXdMMadA49wc&C9Kd?67Mn(Q%W6b?dJv^4W$vu8; zef}iI{7+Zd`fTGw+C}&8pRy(|e3ED%Yxw`Rx zRQ*j%VqL6IM_b}JL)Y#+)_yJJC#ipsqO6K0^!qr;OdduQpdU+uPtT}-)DT&zu%Y`uTo}l{^7e~F^Dy+V2lrS}%EOrObmiazzU?l6mMmuQ{RK8inY#lv zNKyBg_c_?^7!0OS)~$cI7JaOQ8?oLB_K5)&8|*$Y2Bgs)IkNZrU-3U|4gOuY^%dKVL&19B6FU4M4ThII zj5a_&UgI64ytV`;-N$}YP(d_Ynh6_ZBL1B$|39VHiOeVeSz-Y@UCwUVq3YT|4qkn|qUTK>}P zv7ulkj>B&sttZ86=(_XJ@LC1EQ)K~q_LUxDoIERdjckX1 zuY}`eiRzqmM;ZO>?+@7kaL4#O;10tmt9NhWm#}hLd>jr4F;q={*wa~qJdyXGSG6>I zC?gvqG;o^-)jM{%C*Q5aSill5B2FWZPL<*TCvBxn+GQv18ypcMhIQHkK;1HE^x9;o zhLRMH!&eTED?itC{ooK$sld;A2Y~)Vpmjar6>cwvv|ZHf@RhU3Llh<>1IrZ|J*T|# z9@M%jPn!zb=+)~2M(xi%DePx#hH`!f?q#`N-yKrjTp3=@3fPh`VXK;A(zzK5+@2?I zyFPaBNL8y@oPlt6^(00NM$V*?m{cNu;`E|=)S2_1dh$?gboauGjb9Z956rjxotT-YfJu>^=GXU+Z}l>EM{rOPts6$h$%10xC+K zs^ifYLYb$rG;L%{A-u8W-tl7MMmf>h!%1EWA83+nXvpt#y$q6o0z zlbixb8COtkdXrPcd_{VpGa9=3cOm^6u<&;oeFzAHxGT!&a4K`SCbs(IHUifHR4&~9 zI1_m4s%``su=cf&PUOFbCfUo}=#X}xYM-KRU|9@_iL@tcKev1`v zy!751QieMPA2XyF2|t6_lgbH~!=sRfQ6qxnvankeQE1>0!Pa?YME+$ip41S8@;s6U zo-Sa$Xz#)2Pkm|OE4s%4=@e$eY!copEU{>o+`UT5gIa$}5|imlu8ie?+LG5ELtpZz z9+?CK8DRd_I_ou?lTZY3cm=cA-@*E&0nL<3f=hNClyG-X9EQ;Xp(MuAyuwPsj)m=? zff?AfyWizhATj7afq7wX!a{gc^}6IUk*b*gP-zlCt25`O!+h7gZO|TVLH6GN+W#GO zUEI(NluLZJz#6~IPeAm}Zf*WAeTM7BFOEAO8QRk4fnA-F2=s8IzYemMoVWf2U5vji z5y5h{BOqn@g*B?E0B?z?Tg%rX<)?l}D{pSbXPG{wzTGMdK4%RCbBcTtnltjhCOYs4 z{t_Lz*diUSH+ho5I|)It{Xb?K2bk>0B!6X!d93Pp1hHZCiK&S|Km}$kmD$A67t*q% z@SYjp(|N}so?7+ zozZZ5brWo3zDIyAFSKL;E7y(U%l=-To<&Vfk|XZ_Gt9B0u{`@3I zmQqJ@3%s(cjNq=3xSRH(Z|p-aWJvlnh&Er|dIHev2Ep`Sx9Q41>g~k3~>e zxFJu{5%KVov%_!2sy>>J_}0_Fm#2a!IC^2;u19gk>FmggoRAK*PU9*{?qdMGHb3eU zN63PVn~w`zRZieQA=M>J39t?Wr`MRmxPd6T@S?XN*vk<10A#`sUzKxq<74cwehM=P zX-|G=M@Nx-=11@aQEsvq(|j;xZ{3=c+;UV_!8Woln69x>?(Xl$==LItWiL_9@60U!$Gqg`-owb1qpJ>Ncs|4;AX|HYUKvv#JquHf9^ME3h~*u{|twL7R| z!v>tRo~xW9aEeobKH5zFYhpZ#OTCt$f4lp-xnmDXW*{nal%xC36c)#6JTUMZfHIkYIrMmX=rY+@zdbI1gCORDIy03&c3}l^7VPUXQ z%tN4Cq_gRTTLZ@s)q~~!6hWoyQob`)lfa+P%_4;n5*987bvoK4#Ke!2bqbH{)TUxR zkB^Raxbv#G_Nsc5=BDG~$pel6U$lt2*Bn@qjFe`U0F9=Ki;1QMmf{8~2PGZ6<>;oP zZ4#oJC41%mCl!#ok&GMQI9Z;RD}F1S4HU!SctF9)`YQU{OPfo=(21eb>w$Fr#z3i4g>>U{PRK1Qk_ZI7nEp zhFMd}!@&$so~pFfVz_8EjO@clxC4FV0e{{KGbYy_&PhbBOoMe3N|oT(6=s(srAgnu z+lf%RQiY5+`?U_th5?Vk6aTeU#%80}OTylqOtrsU^GoOy0ZNeI3cJJ2?nz8_&fgSopeoL-|FuCsF)BXJRO~E=taFWg|=2JDE0&@ zV^&R9Czu>zK+}yTvps(b-&U7fe(zthepT~s0xn;6c@KVjQhU8x(jciC*f&(07|i1f z0`|HK6w?(QRK@I^PkA-eB#@Ce*Y}5msjZoC_F}+0IA<&JVLJrt(|ZXMT2Cbqx&{zL zC~D^y{Ed7Jrr>&+zGCrpoZ&VnB)vv6fM zZKk~SX!7X99KU`)k7?ell=-;5E+Ob3;jn+_>A>LKeR&DrorknIWDIxs^zzmDf6=Wm zW7=io_aFf~o8LI7$VfOnif zlJ*w7*szq#%oT3TjRV9*L7)xu1DjLKIHZT2KCvW>ATdTd58)0na1?5Jv`{ba=12TU zh5Hz^HN3QSs@fsvwmdoC_FOwBQVJFQ=8>$`4kMx<@MjSA`Z< z0b91B1IV;Vz)sb8s!Ex<=ztnPqJcgU>-<2@*kDGgIek12%lj;~MPz!cq>rT<+&*KJL5UhrTBzAhYFquY^n66O0BT z`CfzGtHhPh4&5j;?ekyEL&ClSoQ5R=blwcg>&9z5Lsck(Ua&aTFeoQcD+UzU%OoxW z%;!$>ri1~Kb-iS$#k2c8?sSLgo1rh%D)4w)0u=otZ%1`P^G44B_IGV!`r~TZo@*y^ zy3aq3r@r}7f)17>ORM~TFE=K>ujjc%f#kC?27=L0BpzwWXXM;F3|LXRxQ)wOfYHBC zJ^fC~WvR6xixMi4f+xrOj_%97)b3{|Pq1%grSp8#cL$-eUWEjb9XuwT^p7b)a4g+O zdQAlK8u<`0Y?%#|zlSL{eFoxa@>`c}h4^B@vh)gcDS7prj1dl9p;3|Y-nA11EfPj1 zCTyh$Tnvjio%7WP@ezWctP)t1;A-5zih%Q_w*KkC4+tjgGOJsCI>el_Ui$9NShLft znES6S%cn%mjAD=MzNc%F@TM5=x$9<>iEpUQCw(w7f66On8(;Ny)9ay8uLDNv=sU~E z{JfO9()QdVpR&u#y+6F+TSAsEz{%;BUQ?)N-yL#64`2LQiN z`VNMxlI;Z?n@s2-i&L=<;d}^(`m6hZ^SpoABvFBrU4@Z1m(^N`ydDu`f9LetlFV!@ ziCg`FP>Up;cE;OxkmSwuSZU=(SvZ)wRgJ2*Wu)OMex&FX*5{G)*!rLd>YK!fSr8%& zsE?FUUqTwrUyQF9>j;6xYA6$-punPykGbDKJV%VL@l2tu4^!uw!v;+w#4YU3o8WTE zCkV<;4{YMa1v8Y45*NQtKlP(JT|h5vv#Fwvi=-{atFU__r`x&58box0R8?Du;sPQ_(h zy>C6x`MTjEF^qtb=@)*7Q+H>GY5wAHH~zshR#pTbV03O}Nn0?XG&hd$eHk&ox^HTb z@-(Us*)vnWmzUJcG1$_zv5Q9}T;!I#%Y$Pb7ftLxdEp|NQnw<0pY|pTJEo-(hdkd* zQdeaMr(JGk2@_nk_7gdCw5OkC?i~N^S$Z@(G*R^NR_5!}(Z)-c$h^hInovxopi)u$ zi(s{MCO4XVQ{m3#m&m2HyEO`Z3xyNz+Ly<@`S{p1UR=~S8B|6ox=#cJ>pz=L@DcFO z6)R23BzbR`JAp)8XDc*&FLS|T$*gj_hm=fh90|+0I?&101cH>-CULgYFO!OGP98@>U=08Py-N zm2IxPGR@YF7tWD@9?rfLgWY)6B!B(WbJ@I8$RN8G>dPV9=eXq13)ZIS?I$l(Jl3w- z#z$PNX~|!p}TN_GFL@jM!&U!HDplhikIK#)=QN@pwAMzKp&uKu6cJ$Q}~%L zH@asye~^-pa6J5?nEimFflxqQVOZ@1X?>fQ+Rzs&1ygdCa{9ajj>(>_m^7(S+9I;NPec<`Sc+2&K(xtS!3| zud${+WgddPuO0SVm0y#eR8;~+Dh!V78mWk*qoaP%9H}2$%mpZ9@#MAr00=tn)`JPPSQ!mJ z%)?tBqIXL!>#5MjCTrzG2RmP8!w8Fw)1%)G2;FhHb%P|pc>q5hj~5d?PUyf?>@bi|`n(r^)sb)a|l@z0Yxtj2r(m z2xy*wS)Z;QEHSM|YhRS>>70#bm(6;|&Ais!vw5_3hey9ztLG{rM*?TPRdp&lG{nhA z<_(}>E-iP)MP&={t;L?oj!NFyl@KNgc!9(td#$1Wc7s);vZw6*oyMUZU=VHQs&YS& zUan?>J{AOyOSJp^@xi9B`?rr97k^D>eF5#EN;bBZiKeNSmxD>K9HnW9Lw;Q6k@gk4 zad#-y{Yt4_@VSK_4`_iP_!o$Py7al7DqA%j@=3IHq$sdY*884%GX+zs!nGrUl=Vkw z3{%!*va_66$bzq+YLcn$Ud|v=%A`RAjm!b`#xfK?~Y^Oqo5b2KG_A)cF4b zj}^8;hNX868onUQjqw1}W;3YzTGB`Ct{f{0RLwIpYm4Ckt1=K91_1T7p-=zTlga-^ z6}H+BbOVsri~1)L1h&U08Mm7!OJlrvE?e%si8t`29sv8Tu`-&0Sz8jEh~)B%9TIb! z$chbUW0OF|{d@A66Cfkm^(N!gJ_m&sjZHWPwqsyP7Xg8FlI@CICZ#B>j;rQC&$$BL3)5>bUykj^de9|LclCY(qiYK2+{s1&iPwOrKMNxm#gaQb z1H+_Qrf!gUQ6Mlx+W|lCEtGTfDcl&CocZ*h#S)BSW&92GVTORgrOhnxKs z&}u*$FA!W6p6vhV?m8QqT)HhyiXg>_ zwO)`s3uVt>A3krOEO|D3(t{fM`OV3118gkbs2rng3djL#Lk;wSO_iv0_0IuvRi7jg z0BKd)@S@*quNAOl_hJEBWer7RVjJn-Vo*5s&k13pQ(@pH6muhhekEWM)A{Y7uJ}FT z2R7eWSvi{+obIDHl-KGI&bTcspFyU0W=NBmlZ;L`-S`m89FycklRpko$*^Q!+CK** zC8rqT($;8wSY~tQc5dbLk|o<71SZKidq?3SEh8&%7oZ?zf4^Z1R=|(JDfb&QnR$Se zQf$+1n>_}%GMopb<9uHvPN}flUtd#3+>ec(VgtzooaZdV(p>;N4Kr0*L12@wtJb|G z8?LI)Wyx0lh@H%AJYY#|025TeBEa6+(min1CZ_juwI5^K7{w$R_<(OJ#5NtC%EFEZ zdi$Z+jKBtngS9WJgJMG`k0>{P%rM9#e5-sboc_kxQ%{{^1|5fBxP~jw;_qZ&U2ZzO zGG+rt38YRgu@oFo-_k^-^bOfUPk^0SbESbr4=AmoKglK=Sb~9vhExOiQ!q4kMLP}{ zKS0}-Vq(rI3?((?1b9w>`Amo?Rt5Vh-mv=-s5=854E_nzEASXY6LYn-``?K?6Ih!$ zKqK_9;jgTI*Ob4Xjt6TX&*S1~!S-h&NHl_E{TX&b3!@Bb>lyf6!PMJ)Bi1Ck*Q`Ab z*5V8Q8AAd?XUD_80H|&nU1)h2Fm#0rjo8s7Vp1YjUyt+v^&yo-%Ktmy;1kw}pj_Md z<0EA1#>JyO@83~746jT*LSb@k)Hh^b1ovI_rkIoQSm_H3U~hSrTI8u?3^84CZEJ*| z&*Jkvu8xz1)yT>KKOl*q5wtPnN4xOqI$9;P>>#trWmXO3)^%W`7Hjn)m-#XeG{IrGs5O5@6C`rZa;AE%De1VINQ$ zy$-y2N}HQ6z72RJThPc>a%OYJ7Sjb*du`3Zr4>%1|LhiDKcfs)ur*t5H=P16l+Up) zc+a-lxK(%#qd*>U$#-ZzL|$deZaCD~soeA<^S+MyvLBz7`gd8c=Rfj*_}}@TYw=v4 z(I{8Uz`=4DEUTg}1?ekZnv&pRir5PUZYFPJcN)CXD(-g49@dpdUZm)1bzgzm${CAo&+fm{iB zGg5N$bv4`zfG0vjO4~etJ-@f4j_Qu^HJxH@OqYa`Q@ACl+kx*AkMhe3$%y`(Mh=(V z_2%QLH10LiSLs;Nl63vSz0(msJoGrE8dd66Rh3kM73-69>|9{95Yeo!_1g+|rY-)b zqB*KfQdK#dW<_inWm+?fbz+1(- zH@ck9Gc8fmX#Wf4jXZ%_S8uzgb%c(RX6?l9v{>BC`Cgh2-4T1>?9Vr8D3b63- z@F={G@XLFq3*n^WlQ`F``a16#vFVe72p@G6F9;afL3cVRFLtJ0`{&9)?ym~1S2kV% za5w}LKr=-^NgX;0g${of@2&0Ew)}g0vZheF;5qWAH0g(^Z~-5tNtEcIt+Xt}ht3jg zOaxj1XTDBiP5agZF1Keb(*rYnVt=V(V-5Zs^L+Ik(2o$4deuAQ4xDo_`e;h#X7Dw< zrTU#RE26qb)Zyz8aL))MT5lOm;J{Ny!?=ANqVl|Z+uaM2(TJv_f<*G5E+iML+Ru8{ zO`THA+-Ht}PNj6@nHal6n&(csAQ4LZy9FmYH)MyhVbh>vj@SpzcNU_V%iq>C%OXm!jywf%itW7V}T zNGCU4dczL2X#VA={4RbN>$Z-(ilSbRO)D9>20n(z#ghQ$q$%bobqjlhdyQ~M9_~2# z*I5=IoCov%3cBWdvR;S$+>1IR-K_-~@n46dz)?W8k|K5LQz@a$!m;ItjBFR zoHs)e87Fp*wrSRmGGiHY`15O>vXW5j{To*`O`&Ehm}A6{Hfew|5bk+h1_D(rSEMsJ zul44@qoZGelq}Q8<>|>V>PF>o6tJ?%p9Kemn0j}T zfK<;*+Y%*#2p~9Trv)^2Aq~QABE`5-G}#n6XJv)S*TdQ*W-%mh(PXQ8xbHL>g?IXc z6jlRCPT73ikXdf!qZW}s&z_F_)%!@}&K&AjARu^$Da2>WxZ)19Z8sycp~_fl;+Ef ze^Z>Rh2RcGlF=mkLaOJO0|<>jbdsL<{o6Wkqki++Qq;D%hh!f=QDbaLdqRo>mm(({ zyh#jFxVB-vOgvp$mXrh|ga9t`?|;1PHnN;3zhO)-G4^TWH)v5`L_KTGFP}<|^XCo6 z4Uva#gT?3&le6b6PUh8I8ZHupooK1JZ9TEl!L;Yjs|(}dZPQ5RDC?#lgyTw-mIs(` z&j)@Rt51}!u<($C)`Sa8=y=H{wqLRnOZtSioy7?*KhWEa;#gCZf_#W$+-0Iy+fm`p zS+u1B3bcSpEpdNz`DLok9_4t%I9o7RsnZ)(kk)39$VAc}YUV}GvWI<+y(~e>JSXyv zAZ=NKgF!?sb_}YH7nU7x2P|5ZRh=ebFBl0LB4C+&@(=ts;HMbjs4p=QUHVT$_zO%O z@UyA|u5;ib!&g@hY76rvZ+{VYcLj%4x^^Jr$6)sEhqLthj(N*dB z@=lWS-02sJS+ebo{s<8=Xq&kqrTM)v zttMDpUI-~rkSPFtexn_oNzvV4uApI{Ct?2zB{+a0SYZRvl&O;t#C+ue9NNGWWV^Ic z#;@wdm}c()Xk1Sh3}Qj^RwX>R3)ql|IILTiBcVx>eJqZR#S#YL>ZX~I3}5orr`g;B z3HzC*k63M47T`WYN*?Vm7oAp=AVCW)JCrLLF;e+Uuc73{P)_i`j0Y3sV!5SZ$Ge?X z=Ls?$WyV2khzI~V2OAq21!^Zd2K8pfTI&DHgNI0~*)}?5YYHa&MgFDI+UH~C?u z@D|0rZov7k0R)B$ytVGG? zG$723gy#Q9_9_XbP(!UJam`W3O7A2} z?-_}u03DHBtj^JziQLGp!Re6TlH_n0q;$`%Mp+&cCDbSDc@wy={0!QT*J!xIXks%e7U^~U@y z4OcXs26M?nc{!}*1;JrdEP>7)9Wl-xJ#50ifzz&>k^FD~#KFJWjY{Xb9#Aa^*{CxK zJjd$?>Wg38-rZ?`axrS^zq~`bKU{+(fP}K(E)^jPQ`y Option<&SingleError> { + use Error::*; + match self { + Single(_, error) => Some(error), + Spanned(_, error) => Some(error), + Multiple(errors) => + match errors.get(0) { + Some(e) => e.get_first_single_error(), + None => None, + }, + } + } } pub(crate) type RoughAddr = i32; diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 7c08b08..2697d03 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -1,5 +1,6 @@ // TODO: docs // TODO: docs URL +#![doc = include_str!("../README.md")] // TODO: add more lints? #![deny(unused)] @@ -24,28 +25,6 @@ type Span = std::ops::Range; type Spanned = (T, Span); type WithErrData = Spanned>; -pub type SourceId = String; - -#[derive(Debug, Clone)] -pub struct SpanWithSource { - id: SourceId, - span: Span, -} - -impl From<(SourceId, Span)> for SpanWithSource { - fn from((id, span): (SourceId, Span)) -> Self { - Self { id, span } - } -} - -impl ariadne::Span for SpanWithSource { - type SourceId = SourceId; - - fn source(&self) -> &Self::SourceId { &self.id } - fn start(&self) -> usize { self.span.start } - fn end(&self) -> usize { self.span.end } -} - fn get(v: &Vec>, i: usize) -> Option<&T> { v.get(i) .and_then(|res| get_result(res).as_ref().ok()) @@ -71,12 +50,65 @@ fn try_map(maybe_v: Option>) -> Result where .map_err(|_| ()) } +/// An identifier for a unique source file. Produced by [`id`]. +/// +/// Used in error messages to indicate +/// which source file the text in the error is from. +pub type SourceId = String; + + +/// Data indicating a substring in a specific source file. +/// +/// Primarily used to identify the exact source code which caused an error. +#[derive(Debug, Clone)] +pub struct SpanWithSource { + id: SourceId, + span: Span, +} + +impl From<(SourceId, Span)> for SpanWithSource { + fn from((id, span): (SourceId, Span)) -> Self { + Self { id, span } + } +} + +impl ariadne::Span for SpanWithSource { + type SourceId = SourceId; + + fn source(&self) -> &Self::SourceId { &self.id } + fn start(&self) -> usize { self.span.start } + fn end(&self) -> usize { self.span.end } +} + +/// The level of leniency to be used when parsing and identifying errors. +/// +/// Officially, LC-3 assembly follows strict syntax rules which can be inconvenient. +/// For example, labels officially cannot exceed 20 characters. +/// To enforce these rules, use [`LeniencyLevel::Strict`]. +/// +/// [`LeniencyLevel::Lenient`] allows the following: +/// (TODO) #[derive(Copy, Clone)] pub enum LeniencyLevel { + /// Indicates that all convenience features (described under [`LeniencyLevel`]) are to be allowed. Lenient, + + /// Indicates that all official rules of the LC-3 assembly language + /// are to be followed, as described in *Introduction to Computing Systems: from Bits & Gates to C/C++ & Beyond*, + /// by Patt and Patel. Strict } + +/// Read and cache the given source files for use in printing error messages. +/// +/// To print error messages correctly, this function must +/// be given the same paths to the source files which were +/// input to the function which returned the errors, +/// then the resulting cache must be passed to the error printing function. +/// +/// This function reads all the given source files to memory, +/// so be aware that large files may cause significant memory usage. pub fn sources(iter: impl IntoIterator) -> Result, std::io::Error> { let sources = iter.into_iter() .map(|input| Ok((id(&input), read(&input)?))) @@ -84,20 +116,94 @@ pub fn sources(iter: impl IntoIterator) -> Result Result { fs::read_to_string(input.clone()) } + +/// Get a [`SourceId`] for the given source file. pub fn id(input: &PathBuf) -> SourceId { input.to_string_lossy().to_string() } + +/// Check whether the given file contains valid LC-3 assembly code. +/// +/// Reads the given file, then parses and analyzes its contents for errors, +/// returning a syntax tree if successful, +/// and otherwise, the errors that were found. +/// +/// # Examples +/// ## Success +/// `add.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/add.asm")] +/// ``` +/// ```ignore +/// # use lc3_assembler::*; +/// let src_path = std::path::PathBuf::from("../docs/tests/add.asm"); +/// let result = parse_and_analyze_file(&src_path, LeniencyLevel::Lenient); +/// assert!(result.is_ok()); +/// ``` +/// +/// ## Error +/// `bad_operand.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/bad_operand.asm")] +/// ``` +/// ```ignore +/// # use lc3_assembler::*; +/// use assert_matches::assert_matches; +/// let src_path = std::path::PathBuf::from("../docs/tests/bad_operand.asm"); +/// let error = parse_and_analyze_file(&src_path, LeniencyLevel::Lenient).unwrap_err(); +/// let first_error = error.get_first_single_error().unwrap(); +/// assert_matches!(first_error, error::SingleError::BadOperand); +/// ``` pub fn parse_and_analyze_file(input: &PathBuf, leniency: LeniencyLevel) -> Result { let id = id(&input); let src = read(input).map_err(|e| (id.clone(), e))?; parse_and_analyze(&id, &src, leniency) } + +/// Check whether the given `String` is valid LC-3 assembly code. +/// +/// Parses, then analyzes the given `String` for errors, +/// returning a syntax tree if successful, +/// and otherwise, the errors that were found. +/// +#[doc = include_str!("../docs/id_arg.md")] +/// +/// # Examples +/// ## Success +/// `add.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/add.asm")] +/// ``` +/// ``` +/// # use lc3_assembler::*; +/// let src = include_str!("../docs/tests/add.asm").to_string(); +/// let src_id = id(&std::path::PathBuf::from("../docs/tests/add.asm")); +/// let result = parse_and_analyze(&src_id, &src, LeniencyLevel::Lenient); +/// assert!(result.is_ok()); +/// ``` +/// +/// ## Error +/// `bad_operand.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/bad_operand.asm")] +/// ``` +/// ``` +/// # use lc3_assembler::*; +/// use assert_matches::assert_matches; +/// let src = include_str!("../docs/tests/bad_operand.asm").to_string(); +/// let src_id = id(&std::path::PathBuf::from("../docs/tests/bad_operand.asm")); +/// let error = parse_and_analyze(&src_id, &src, LeniencyLevel::Lenient).unwrap_err(); +/// let first_error = error.get_first_single_error().unwrap(); +/// assert_matches!(first_error, error::SingleError::BadOperand); +/// ``` pub fn parse_and_analyze(id: &SourceId, src: &String, leniency: LeniencyLevel) -> Result { let (tokens, lex_data) = lex::lex(src, leniency).map_err(|es| error::into_multiple(id.clone(), es))?; let file_spanned = parse::parse(id.clone(), src, tokens, leniency).map_err(|es| error::into_multiple(id.clone(), es))?; @@ -109,12 +215,61 @@ pub fn parse_and_analyze(id: &SourceId, src: &String, leniency: LeniencyLevel) - Ok(file) } + +/// Fully assemble the contents of the given file. +/// +/// Reads the given file, then parses, analyzes, assembles, and links its contents, +/// returning an LC-3 executable image if successful, +/// and otherwise, the error(s) that were found. +/// +#[doc = include_str!("../docs/no_os_arg.md")] +/// +/// # Examples +/// `add.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/add.asm")] +/// ``` +/// ```ignore +/// # use lc3_assembler::*; +/// # fn main() -> Result<(), error::Error> { +/// let src_path = std::path::PathBuf::from("../docs/tests/add.asm"); +/// let mem = assemble_file(&src_path, LeniencyLevel::Lenient, false)?; +/// assert_eq!(mem[0x3000], 0x1000); +/// # Ok(()) +/// # } +/// ``` pub fn assemble_file(input: &PathBuf, leniency: LeniencyLevel, no_os: bool) -> Result { let id = id(&input); let src = read(input).map_err(|e| (id.clone(), e))?; assemble(&id, &src, leniency, no_os) } + +/// Fully assemble the given `String`. +/// +/// Parses, analyzes, assembles, then links the given `String`, +/// returning an LC-3 executable image if successful, +/// and otherwise, the error(s) that were found. +/// +#[doc = include_str!("../docs/id_arg.md")] +/// +#[doc = include_str!("../docs/no_os_arg.md")] +/// +/// # Examples +/// `add.asm`: +/// ```asm +#[doc = include_str!("../docs/tests/add.asm")] +/// ``` +/// ``` +/// # use lc3_assembler::*; +/// # fn main() -> Result<(), error::Error> { +/// let src = include_str!("../docs/tests/add.asm").to_string(); +/// let src_id = id(&std::path::PathBuf::from("../docs/tests/add.asm")); +/// let mem = assemble(&src_id, &src, LeniencyLevel::Lenient, false)?; +/// assert_eq!(mem[0x3000], 0x1000); +/// # Ok(()) +/// # } +/// ``` pub fn assemble(id: &SourceId, src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { let file = parse_and_analyze(id, src, leniency)?; let assemble::Object { symbol_table, regions } = assemble::assemble(file).map_err(|_| (id.clone(), error::SingleError::Assemble))?; From 89b5bac5120d3397ef9fd3a16fb8095a7d94df23 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 28 Jun 2022 14:47:36 -0500 Subject: [PATCH 72/82] assembler: rename link::LinkedRegion to link::Block --- assembler/docs/images/main_workflow.png | Bin 40027 -> 39705 bytes assembler/src/layer.rs | 18 +++++++++--------- assembler/src/lib.rs | 4 ++-- assembler/src/link.rs | 16 ++++++++-------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/assembler/docs/images/main_workflow.png b/assembler/docs/images/main_workflow.png index 201fffb628aa1b6edb7989b9e11d6ae9a9f42bd8..1cdc5d4bd8ea6fa268482b4ed59d52aa1761193e 100644 GIT binary patch literal 39705 zcmXtf2RPf`_rKZF)^63_qr(hh*9fu27JJ1=L}Eq6Dz$62ilSC&ZECAsHA|`8*s5sl zttkHK_w)Nd&pUbE_nvdlJ$Ie=J+E_btbx7;)h(7=L_|bXTAFG`L`1|{L_{R&6gLPt zx794=h=?e}eAS`89{w(Hq%#q}1n9pvet?)8+Q*k)LX95)aPagLfx#VIy&XJ!L{QGY zge(Hx0}X?_z@1_L@c_gC0AVqpFhIscT$W!#1t3HC69b5d0i-Pd<9Bd%M*W{aDG@P( zfCuJq6by~^dH5gQ&)M4tjz;}|aUxaI~hR`G4%v zX5NmD7(IZxftZ7etcRwqvA>ZF#?;@}+rZaQQcJ^0AB;g^Jsm9Ih8{?Wn=wp77X=i< z=y-x8&3(!$l&5;&5;(o4X4$fc;V+kioBQVleOUK#NNf+k} z!a^K8;RsiKBcwY*SK8DqK-|z2#SZ{`gWVm_T4t`A?rO$J7eWOfLw_+e&?k8rf?(8lj>F*>iE#{{y z2GDj>(=nI9VNHBx0Wd9?FB;%yEJcv6r)Pn~xcjPU>p2@30o44}Js=Qqq>lwo$H`9x zDd`56K!S|)WQtT2F2?S% z(w1-ob+9o(TWLKUKwZNZfHnn-0S(oR#RzjyHGuF_QIQBRHHN#2i%aM@t78!!?%Ft0 zF*For=xk;Iko1%=_mVIG=|a>b&@Nbi01D>d>}KF>>7qj@OPFwAPX{j-Pj6|Ax1Xg0 z3??b%p)ToV0`m31SpXfR{JcD%GSa%v00+1a669cJsE0-e_?SCrYoH*8W|nTcMiQP< z;%=5&ZYl`Re?1Ox|JSqrx&}UChVDjiaW@|gBVB!CH3w~ghO7g|1FYcyGJ&gEn!EX% zV-Xl#v>D0`>Vw4im||4La41a)ps%;KvA8?hTnYfd8R&ydfns8ADsWW`BNZbL4^si3VlqIkN#SC;ToK*ewAOVtQvRG+dH+28q(=93*9(%(bOF&?rrD zh`;VX5*DfF34v&NnFB!%gruf~hrhpxftHV$r8^7_g1BHYK0a<(Gj%Tj8jN-KbtTkk z0RqExz_QZbMl$XY6@N1@p`%^U=8jNpC}BFPf*mo^`u=d208N~kxDFJp>ER)1>|LyNb6E77LZ&yuosEc1=}kT()Z7)DhnG(hK{&BDT*TtE`q{>Da7AXE!z z;jAVFgQC#lP_Y05RSO9t3ujd*+)@l;2JuIzL$u8-RE@oiB-~US3^Yv5%#fxU1T(8@ z>8OUlSXy9=u)08FFMkgt$}9k9s%fYpLm>J=!O})24%zqOSuqGs79G{IhfFdOBXJ5}K+8W*8qol((q~K+nZQMOxp|ABFVA zqL4_eDoVmt8tjQRaF;~?vjVCPF7BQt=57)oU62&cRR!(ph%+}Qm}ii<3>XSFBd|z< zkxmu>3wJ3rq1|9HLql&YLLBE~>Z`Az=Idr?22nHf^nw#emipShx&$p8)xF`GCcY+~ zj%xnqaHOG(CN{vx59sS@LKBNY`%AkJCZMXbzpM<* zBmhCs!q`by3JU-RSbCdT$e5x$9n{sOP&yt6eKUZcB!RB3rsIcob{F?>_k%eQjIFa9 z!S)51Asp0YJv||Ca|szIFI_#1A3{%4-vImXGy3lV5la65ujl_hrG)hVZ4M;VlNa(y zh={m|wA55g{B5_|sFLqZ;{<||l^oQm)l6VuP52;M44t>??_8MNMy#S8>Su3=3uR(ZWSr@IW#u^(U)}_ z2m8DH-qm#8B1LZw;jgYRz3UjgP6_yPd-Z2?{!?YTi`vN#rX#}3z@PQ$$I*5? z=1+_Tj7RAn^1hHJd$^L=pA?p$3}j(6+wli9!KfD8@x6v(VwjqZmL~ET1b748ok{h= zW=$)(I_p4cGuH&2Si9Ei@m(pmMxbVWk-4RX3{Y(8owZm)wl5lEe@CeMw$s|@@sz$r z*D39WX}^=XfVjm*v)0sgD|cCUd}D~ubTw8UcdBPL`v*^s|2#LX`R$uR_1yW1g7aeI z6w8nW*HrLU2hIEi%W3M|BK|a~&GC(F`p1#-B;CcD_U-BbiRG{G_I7}e89k$j43HXF z*btQQyZ#2P=VC0G?5Wk1BCF@;mVk#u=WkS2+tNv=THB~sdry)c7bphb`ew>warpB& z>Z*FjVNvL&zFOj(G%5&b7GZHWr{P8FPVh>?CkuOKmTK{@b9o=*hEQ`B#aDyuu~(nW z@@&x@Rv)EE>&n}HZC5h|zX(QKU53kq|J)xC{J|WCWN8?*ShQNL{g9gB*Y`V@_R}}_ zhmfbT^suGD0L?uP<{I_EfSCS|qGV-x?arSpoV|NpReqp(c%v@NcVMn&ub&S4OvTrm zkxYT6yn!^D8K$mP_DX3du8>iy(|2_*r8IU(23hR5?OUs1Q@PQp6`z7?;liAkuGE=k zW$n(0ilimr8qm z%bWA}chq3NBez%q7g}?v((YG6iY|Nm%jiw*o5q_V{bt=9NMgIJfSO8DOlgVmm)Z4WbFPZH_3n826SZLd9s>O5&3P6GR>RFSO1AXuo8>9 zfCk?;+$Yh1kP%82^O5jnX}=w8h$W)qB2nuE*}uA9>+p;yo%~-il){9B_-8J+%jb!8 z?Yu$Y|1oiK!B)DFM>^R z$jgb@&R{9u@m}B9Q~InOF%A`NmXef&v_hNQ3WkMNhFziSob0@9cF}DP@r=L?+Z(t* z>Vx6vS^r`9yVsw~8OVcEw$f#3jy^^&4aDL;7H#45*7|=c8BSo&^M1E{<2ho_7p!US zat`?#{<qURy`tIWOWywb0dI;N@z2|23WX!p*%nZrDdnD)S#%E{d^%Sopx>$!j2LVI3IacjkuV~IHC`V`Z%tUc#l8&Rm_rZlrMbxL%Y<3tnJzvnwjTq# z^s$%tr|m(6z1roE87a)!*vLeQv64~M=FMjeg8Icui_NCORt-Y82M^MCbQr>ZaxXz? z!e?qSmKE1=M&~&OfdK&%&+k13G{yxk;#R|t!YhouQR~)Z+Z()0|VdN{@k@UT0WZDm&9pab~Gm?DOv0> z+c5g2WE5kN{kc7V_>HW)wW&XK#PQxcQN`Ew34A%kmJ3UKzCQb=}Sx3NN}jdGvc+c%eC;QPj57_F{+F_$Egj=k=dHmvk2e zs?(#bq2yl_0BxSzIfGXSb=EyMXpT{BzR5YdNe5DBx|I!@_zFW+W$GWLC){? z5BbxPr1^PEzc8ew6<`O~I3%v8(6pv-lnkDHd*{4vaP`0`TLxMBUCM|FJ@W0r9+G(DAzc9EJSN!YIP z$>nTgnefwge#fNs4`NR5l*!(xMOHoaNH|~0RWfM7?ygyTzboVeSaYAZ{<8CPZ>o%5(?k%|U3oYF@wh+YT_TZ%Ru5nPjhVsAinF&Q8XO zw&y*ec~>SNYY=)_6I6(vSHWb$)evJyo{b zjpdkF{0($4{LM4+q|Eu(1gvBeM^<>1Awm=_e|r7O(s6mI1hG9+kFcm^_#7MJz)w#1`=YiJjI*vo zi=G9F?W$JxyqUV2imb2wc-_ktw8YADum6^w|NZ%}6l+(Nd8y7`5Ni~A(Cz&DC0$eg zg4`A9< zEi19HJv}+<@RLA4xytGu9mcX<5%f^DM5b~OS(mEvT{#osyUn=t z9@GAh8|Hne8TpAgl=4Eg|1isDaeMox7+*^(@#C52#CU~#Vb(c-fLG_}c3OC!S1`Aq zlO2~7o8RxI^_s<6;%YAT89vFTg8*2#)wrGu?Vt-~Ke*4GBScM*pG{`z1^uINq=F+$ zJ}VVfmQ#LqHP{_*e>d*R^94?0CF%Ol=!=&w(9P%D5zjiQ>Rm@{FDoZhFnb-GKj)&J zJ!Fj~Cze*}jK#-~rFB)uzoX?!_R7+mL`kq)OXK0$a`JlaP=arUe}+U#v$PN1a%xf& z_!?L5weOnfemfW244;bXE>D+>L=Lq`H|NE!G^J6&KF6gt8F22uOZ1yghS4;{DfLr`UdCVCqeAFT;TO!U`!mnTZ%;(t zzW->N(n^5jo(4g?ex2+erC(+%rreE-<~?k|*TD>PO73&+`g3c$`_||m<5y(F*UPEc z7L-iNjS6AvX@{jk-p!LYcYo=N3bT^yZXA-v;G4*PzR+7!hg2h#fxw4P{+v4%zp4t6$Oj z{(4oNY%E?`-@5xWC*dh_1mW(G;#VNs&f{jW&WU@=xapQYU^z}nvIbX@XZ`LK$sPC= zC))QBuzk6EU13~#eCgchs!-##IHCeCH2ge6cZulljr%O~dx*aA#glJ>HA~8`^(}BC zE@YN+wl!YE(lK+v>1(FBEck3mx0S<$8Ap98@LfrEiY8G^b6~+)YU9&8^Z`ITEXx;| zk@YnGXyx5`a2jb<@HCE7qvJKaW>gut^wzGW5GMWW(Xa9+%DNBYPI0qtl(J;fuRmX(c<4FPlYw&xgF2UP ztpU&8b5ED$wImTq?uE#Zbj530`WcImyq<#mddKR^kUGma$Pm^o1@x0{?D4 z!Dn7Hdh(^sz+x!t?tobdaT^&1IS}!Q0#6Z}U;=UVqm8lr-2wv*3RJcf*Ia%{(G?$D zyD1ow*DkXew@k@9>~R_kaJOsT3t)E8Nd)hmw6WscBWq=Cbnh_c0KT~Khv~LT;^p78 zbj5lfyfVQdydyxc<6GSJws(W~^2-ZJeIs|*f9;A7eqV60ygZ`wN^r(^&e9!IlO(cM zyatrg_9B5KWP5y0RgX0kWg&hV)SOeIhgbKH{Rda%{>Y5o?|t*BQ)=H@BYJa%<6Xe< z?aw;tLVAU(3Swe;X`9&+p1esvMbiBC%mOOIJ3scF9G`4df$Ao)d7Une znaXH$Xua<^R(sgbOXF(vR)g!2i@Z;_a(6rCx59x?6CF`+=_)r-jd)twu}Q1z?$hyI zuGZp3gAmAkz-9%)?U%O9qrUoA``{CO_HW%HHz;ykI9#6Pc9XW!hTZyht<3pzpdU)n zW5eI+(i~zb30rAlliIS*0W?%F{9R&vRz{D`p`mWthRy`)8>fU&dMZ=dFdx`1D0hU! z!xEE)BN}RA2M;P@7++e?32hG5zY@y#h#LUL?-aLy&@+}*=jVy3jd{>!M+(g!p^cTk zkVQ$+vgWLisBg6?35=hY;qRI)RGE0QVn~V2&p>#m%8tuzblF$Yw!%u=%lnwibJ5A~ z)gA9_ajPAf=>y(5s6%)ONz$i_qZ@hO>$wVqJXW!wU4anG`SEYRK349jb(kn^ufT&# zF6tH*uF4UJ^BOt7t=Aj7yVAu{3**J_H>Dd?teaYQSy|buQm!C_E1642X&g+y+AoL4 zJFG8BFH0Sw4gtB=7)$)s_(dkskyOZEe~hzRe=UpdjT9Hxqw&1(*0BY)?3|hD>GgqV z8|j($X6-ig4D-9$vpNA(k)Gk!-bbZkyK=?%o69YvF6D!|Gs5NR@p1c6B*b?v$1mOk zU?g;s3*+{CYA$!rzZW(_aBEqMl4)NHwNgwDYg{h7mf6@}KXI9{%a7&A(D`#g*(S|o z%SHws{}$Cd60fy;XQ5DwmdRByrn?%9FKV>ii^fGttbZL_imvm{X9z~`WXQ}}B(hTN zv>e$U+||a0KBZ>AKYV_?IcfKI)vh!NgIKS3@7R-EcD-J4owVEeqazh?BseLkkz|(J zx|)JG`dV=8dU+~%yjXyqy1t4!n+_@(Ot|{_tMxYdPGO)Vag0x|t@)n{!)u_%W1>oTt+%cv0xOdW$a%l-@qqw zd|}SD@@-LU2{>|jP94_Ya@(JkefIVDaw=?UQ>*e@10K@wU+WwiCHA^IHxi6Sp;iGS zBD62;UHS)u@guak0ii|F)? z5@1EXpc#q}6wOBsA0Y}Ow0E8a^=bl5lUd4C%3BKbl^iyTRHN$|k zfhm(`4=`~8Y0dkXwb)bh>ysDfzZS3bf{#WuHWrUM{wQ5trCyzaf=*vt?U*;mU!7*1 zmjqn;Y#z#-qknDB3^jOdTc3?^HaIo|9;8e`-N`xs*{xFHB;B&2v!bO5KCe8 zfU0#IZ33XU^J!GY=H{&yV>EY%?c*CYFdEDRGwjY7~0NN7~Zp0l-n|%wZm`GPGX#wPWxRR>(B{^4#2hr&C_ywz=H1d_! zyDRF*1xJdWMf!IuQ~1LnF@ybuv4gaE94-AZlw^1ztJ3PRl`|VCkVV@5j-wPAJm%Ob zoV52a@l*a2C*iIy;q`vXDKAhr$*6{KlQ&5Avt6A~O-EM1gKk6)S^Z$of~-^@5eD}_ z=Jvot_QcW5_@3L+)Iub0va7vTz{k^1FWHaF>@Hh8!(?|Z66JUL8K-J~vY(vTnm%CD z^Bd#_6Z6O3u72Bg{`;(=aRC4uzfJ^^3>#l4&WzzS6;Hl9rIF_i^kVkEqa>T2FLiaT z(modunXk+aTEeo&Mn)3LFb+Nzv5i+4Oeu^-f1F;aw7SKamC5#)^5QG| zX5eR^und_&$v&2Sk$LbYLtEN`xGtYEMXJ+KKEyLd{ zc~Cii?OLU9TyT3qRW95_t2if_AGB#Dx{1{cZ3N{+buMT6+B|o}gw=hpxm9JU+=;V) zF6t7_u35IMcqiCrFeUGDg0d~_PVSA0?-8%$a;JAPsmT`JlyfyPdV+p@<8NXt9RL9E zZf4q-Nh4QKE4j@Hnvg%FPSrsxtczkTtBEp#T1+!W6Fzm%F51mJ4cxAGAx}&1?TUEd ze-#A<^W+cR%h}-kCdev>s!MPoRbQzYJHBz$%Jtq0;uLPUhjyzWrt4S8DDs}Qq~jUW zW;7eUqlZ}d9R6Y?B;K>ZD)+sO85e7}G@DMRmxS{>L3d_84t^mIhHMZJuW}jhywY5% zD`3fZKjXAkP zxfCkC09x($9oOs(4|=}Nww25_3777Z_Y2ij-`!cfoL|gvDtvlWfSnM%9>HvzT_n)G z?{u+t`;obCZArSzl+$>xs9SCCXF{Sl8eGB=`1$s$S(IeA=6T|$nt`o&t{wgYnffvH zmz~CsbG>K02ecf8KfHALb@P^d?U3;neXWYF)+wpcMU4(gFW}o|VWoZx9J9xf##|26 zx1I!WX9YXdIR+924I3@vl6CU>RbhBEqxSb^S^`d!zNT7; zHU6H6#tPVH;0CHrbQ5|%=C)H$yT503ta`K*jfKimCvkLyVv$Xdg_(JU+Z+o=kynAk zO)^0!6F0pnp_o|sEO8|zPV1iZeU$&7rZ&Zwa~k7AfiyGo`74y3vYL0ex|oZR<}M+J zJwMsrC2*ad9V!{xd00vPcz3fke^1wbz6Z~p&ot#+vd^Nu{z&X6+h@2`o6n33Sq3}yG@xu6}L zmNO1Vw$CHh`QTD(v}wcmBecHC5{kj5W-dJZbB}%G zli#U%rL`d_J$nMlH!+l}v{;U{tq%`f0muDTWNjiEyD}=8bM37eg*oMjRAkYKM6B1( zc9E&BUir!N{keu*C8ewlFo<$rjee%d^2SRC%iZVg?M#qa$&nWXl40)w#@@Dm;8$9x zcwrOQlbP$~Y(kH{9MS*$~ zn!WM!NZHmP!H30u<#)EZQt$2xn1o02;@#lc({+QwQK?RfV1GP~X=SQl+o1dtI-R@A zOL)^KWRCi@#fhiL8lH{?ktpUvfnQDL_`8N1E;y}HhqhNg75hZz&N`I7Kwb?G44@@o3ge;H7QBi>k zH+|BZ`tH`S_e15*uVs*nK$C~cnLPAiCS0_$%W&oBU57%)ggZK#&7)D|tdHr!O4Yd< z$cSI+O;$cc+w0O6U&*df#449i$fPC69^Mvo0khWU10G>A$DqUbNBoIUdzR6+Luz@M zbYeNw$$AQ+7{`KMNZ}Rq7s8f{mz|YU0Q{V5lOb$Fxl@PMvtQtcblcgO9j5#(NrAF% zVw&D0MB=6<*>j%c;~YH#R>GI{mf_VJi7u_?;g+Ev5A73@j0Bm2CklGQwXrjAZ*hVe zUf(44p)Y&FB=p08Yn=dPhmrBnt}Me*8$Lhz(mO4Rjb02@j&QNo=%o&eMXQr(YlLdt zd6jriCoPr;8!W;g#wbW)8W-y0U8NPAsNfR5ZnMK>P_J`4x`%sG{GdB!L;V#mFG&fQ ztKoG*VOMHJd^vCW*OI3T4$k5u;cr&idNPJz%0PGA4W0X6%MN+`A${x2aF_`@RpU~j z=;o|t20v4FcaP)F$X2W>ZkFFMfGD*YVd@%_D5k>3NlUBq4F@cKuL6N$d7IMI9U-!p z`Bj0wc5Ua#Gyc^#!c{mYWlr;Xe@OVo{N@DJS{W;1vHi02;vFiu^*FCLz~AA1FY-%< zX&OJtWPIr1OKdBu&YRl9Cw28;HK}AgJRI@rDyWYgvr&@z3AXaPzg+O#zE(Nsr^AC~ z@2l#%@(Rz26SmJAQF4l&mQqb*AAASKbak!cMRUbu!BLm$U?K?F$k?|IX2-giuO*0# zp!Xq!xXlgwwNth?b5e^lGl=s=e60@@OH9+p%7`piBP*pRo~~hDMVT|qGvzNWeU@l> zlD1HT!&3Y5IQ#|H#RmV#-orqUjzO!k|WN=eC4!{PfyFDqL>lj;I{LEXil^3 z=7gy5&QA@9<<*VIA!<8b(1h4e-w{s;Qj?9Ad z^Yc5za|OZ$ZDzqLwK~(kzhNSJbhD+;C*bLl9AmkQL5Hu;aOyg70nY3C<00neMLV0a z=_lY4SsxA~^sjbhu)2pER!_@53LvlU(5e(Cq%yx4tt>{Z9>Y6$GOyq{Z zFTY&Nk{NkPZg^zS{*OR#EN=nH&f|hPd8yd)U4zTrqb!tzM?V_)BpFw=c|gJ)T7E-v zKdju~Cd@~-QMf~k?eX|DebP@|MLm)zCQKH(?YJ@Ot{>tVJC6G?fe= zFbi(zpZ&dC^hW*^D3Z(>A?^fjvph|}kXxfZJ^qAyy|nD_Feiu%r!XNUseYM&`@P#+ zl~5z(r-Iv__KIj|s4C6R=dNl>)@yOb%Q&BwtgX2ec=qs{nMDfD-f;7Ej!C7x#4`eXdp&jR>J~Q$m9soMrTKMogr;iFX&TvMo%6lCB<&qjm8HGG zva?X=^mwL%LMSvLc`#H!<+s%qeXJgHO~%^+aN}7bis5p-iMqc3!{0qpik1lY7hT>v zX40$`f3_I2g0J)m=6Qxr3>Mxt`s4fO(zz75Q#gsm(&xpP}L;j?XOy|BY zXn>mY7kMZdDwNlG?3$dww&fz9jBHF9l#LSdB)_qZS)CHBu`zv+#Z8s`RXz$DL7@zk zkSxg;^@{ws@L{gK?jCT1VMe_;*7@vURbOb3fA-zfANkx}NxsKZaZX{v;;}?P{RcPQ z_50~->{|Utp_o9QpfX$dlz_0^qQcbw-WiynTj z8QrJ27+_7)%My4$C?c-aY8^u=BKYzAd_l~M)+;<;*XOWiC~*SC53?W)#XLz})GmS?#Nv+nagd`c>A%rMG}`8fuxjrEJNoMQ{I18S{ub z@j1nvn_=GW_9M74h+kilnbV;X$+O96n>&H0^VrEmc6nV{>mQ{OD?S6a4tQKr zQXR~HXiMx%dB~m*j5OSBi%>e%*_EDtHTdQ2U@7!1tq_S9-isYl^hr<46Tazam<`TP@?9yl#=o6D~@_ySm<{6c0=x~lDc zdu?E2QG|-v+dmH6d3MqE0Bz1~kBRMIoyVK0nOkdLv)A7}xmdI-nZ~1Chu=sY`1w7l zy7#xyH@j$J?&G1J8FS3d>U-Seg6@?n47zP{WdnB<4K-?`ApB~COpx*Zhm- z^Qd*!BHs$I?*C42+w_T8`cU?QoAwc&OVwHbifl7pS7b_nM2!nU2IJL-F{}tA?$Jx@ zOz_=brgd+BESMtPnj$xtxIg*#4lNN+@?A1VK33&W!9^KA5zSkFCq*=hqgOuVz4(0M zhBvtwTmDl@9mZsw>wy0|^}TxIN8tU*?~5!}m~T_(p_Mi*?>`iL5}p+p#W|5jOAyleMt9!k(WdZ)XB?QakfG+;S~22S>nCa8CIwQ!bQb~wrJpPWm95j}qJ1Nuy-%VE=Cfb4BwLKnN`|46vD;9LI zUtrlnA7dJx{dPJexWY2ZiKXTQ#>x!j%X+cgH>1X-PxgkL%rO=Zi)srxbJ{qWRjOz} zo{3(u^R9G91*b=bf~5XMl`jvKg7f^Ix?(Nr;~3K>Tz3-8?mlAVBGFRg@)7clm)@T& zGoP%3k54$ZkzeJa@jrgtozb_`>TWxSjg8l@ZMq*hcwJ25Yh6K@TuLZm-`?*0nT~)- z31m$`fdTG=Nf>Q-&_JK`#AKP0xks5(nj!D{u0=8WYABqyB#23M}ZMwGtv{O#8REiAc_`F#Wwvqg}IRu9VJ90- zsT~ag_2*spT}#f+lRpIRwm{}(m-QFpa+~v5@ANMa$sGpg-6229JsY@CY`MHcdBO5w z@HWGZyL9i^f{2osyy@qXZUjJ30D-ezG`_wb203w&O<9ovh&#y8C`UyA*xMnfrzGN zH_=FHt^5}($5=dW*z9P{k@YR_e`xKk`n;&&vM3dwIaO62$y^wJagKs- z-A$(r7V1;#Ov$3C=8=mGrNWmT$hF#FN9QH6Lc{gEujR(#;Xl82%~kR$L05(~(`7lS zYp&POiH6t(Srxs@za~=X1qDJJl1BGc2QG&kEvdG9OIY(r!}zhSlGN;eXcp-O^JHlOwKivEWvH zVDV^Lwtl>}6S<p-bRw2QvQP3yqL29&elLjL#l&~P)s&(>b8J`3b={eutB=K_A9X-oE+tFuRiY%G?jb&FZrGJY3d$nZp)U6^DT$J9f8cG#B^C0fAB zcG!2V?e^`q9eEE8=JI8im6?q%^Qt=~{($@jCDkqdgaWeGnjhu)QklXIsM?+mO4CFt*O{~ z15(l=Q>PWhQuCsNg7#$c6JjI^B@)nD|FE&#;(zW1Gu0}FQ}s8&uLxi4iM#)`1#KP` zr4X>+OZ$U#qw_cMD9askRO zy7Ya1zTyUiMs8m1OwDj0jJ(c6C`-+yK>ZF`i_T4wVe#>o9O;iGLOvGkP}h5}j(>us zh?~_so@K_GWeiQ9)!rnnJ*^vK`gW_iy4o;BugVhfVzT`*Q!Q2YIxSOrBjj&Xf8n1o8&nZmk!Pn2P^u&)-|NP>;sB}q&#C5H}7InCBw!j`??^=>AkCAWQxFbCH znZ~`<*X98&E?oC+-2>+rbo`zXl#g72F84M^xPOj7G8}~l7TPW*jM_X>GkBJMw7;5K zRLWaNMC}Ad?OY@ceU8W|#usR)q`Y~vF`;c=*wTvNdr(HLhkG|KV>WN+WIa&?xLd;_o;0&Qe<*; zba}X7#K=3WD>HO!_)EF|A$SV8#p(qvuKjNMobG`Op6UJoVKj*Ge_lOcl($A6u75R` z5A-V~c*pg&kBJ}fuK>oskT7R(-KAye|G{xNzpxNIL)F0NB>vkk|L2k|Nr|#R-*K%P z2Gh3mkg@oV2JT!aEBHbxR!E95lQ*TT-=^p2O&X=ho5OSe8%~D5I&v*e<40A3(MQbA z!31RC+MrL8`6YB5RJqufYe_!bw|qU!EZqr(2r!Px5^3 z?o&-p964jo4(?%H{i1!j&5eyyxbw{=Dcgu-trad0zrEc*&-k=S^%;n<3~6dqZsLLv zIrKC8f7IsmkwRYuHE!$ApZEk+J~^37d0jRa@j?jbV;&GS!6ng8?8Cbqt{j$6OQN4< z^QcLwz-xM%^|kAzC)od4kAI2Jg&61x^eh+1G zwE0ASBaF@bWy7nD`XSknN3IJ?K=%toRxDhGj#{|(3Y8Hu6J4`dByAZCH*P$SGD{RV zfz4%76Y=!zBJkV=hjlygK}$#Q!KU z2H57hc^Jz0?|VUM0xECKl5l0IIBipi41exXoB`nal9S+e3|t&W@mtSYu9g#=GTn|k z5_<}ghQ2YyToR92pG{~>VXv$~2OK+FWBr04%nNp_K4q#QYBW^Az);GCrS8mT^8Im$!UIRU~&G}c;ZK<)tlr= zf!)Rx!*)@H-BOXPn(hJ**nMG2C4(kmN8L3qlw0&1Jq;a{I&X6L6ZU~`T;kdz%kzG& z0wVIRNnZwSloYY4dZm7(eV?3hw4U7mVDjwm;fBRFst$J(_I+wCIUzVU@Sx~=v*`Uv zI>(=vW?u&Hz^xiyyjTenRa~M&0Bx!L4YojpGm%e%oA0xYKwq|jsq#H2T+pBDR8wx3 z%T_N91-)9r&_5oIm6u|={|@wLCnUL=NgF0H1)T89~8i4J6_<;E9c&JmiZU-w2Dv!BlS&jeX2 z<-2p8V`pIIDL}BHlkO zp>n+83u2ap{b|TYcp|rsb#JQ09S9Jt}sKEQUHYOwcK@nrI?6@&3- zME&nyvwnTxE`7}W$^B!Q&FYeK>F-6P`purn`8KUSz8eX4?T{AzI~H!!npJ+AT%K=Y zzY0l-XA#Z^7jMsAsah!{x>HNjmA@pMH)b0IKPBbaFFH6k&K>1+WT2NSqwTPhr` z`z8N{S(LDtb=kPg84HMlcg>VzyZA}o!E!2-H)#$=pXvY^?~a^!%u0`|GWhJT8YaHUY~&>bDN4(~iu)nIdLXI1ht%B8FIlA^u>SeUI*js$JL zi`dxBGk9^i2Kl6}N=h1-^Og5n>^x-zYsN8C#E15B(sR#&fvKgdI;6_o zuF9a$PVyx&XJ?Y4WBw+Yxq?d)C%!`;>_dxzeVrxJ&0<}1w+T$J|$ z{@%EMZT-7pQ(&T`U`1Q#H;#-jq_4tIE%JF=*~$+Gj<0IU;m zxQ+J&b+6Ax4_iQzw5 zyEKSocx(5{PyjG@ndskz75Hd19~|Eu{v=SCnGoO^Fo-l0`=(7%aXf?SM;A_!uw<~4 zY3uTqlN-qN!gvgZ64G=_9^NBB+aF$5q{z*`IsEQLdGc^wf%0dQezvGW$HU&`)6wR| zX#hbq0WuETe0G9&NQLX~Et3+C<27G*t}hpivy35jwC=rspgJNK~6an}CrO|zR} zY38>$X@}qq?_hO}M$;tRYKyd$ZnVVJPhcNA+V$T3chy0NjNTa^^S1Rou59DmQ>DVh z_7Ib7dBTbR<{*p9gL4eTb0E1|!?AtvuV7=j>A=~z*(9}ihK_IBg()hlePKSyZz^$I zZHGpm(Zq8=cUqrl7E+nWhA*3DjrRI;9x}2aFz)i6yzrvwCCkS*kIS+;X$mG@i{~0K z2+Wnd*S`#wGDm0CxGk$<2r$4s8c(^|6u#B`n9K-*YGZWC$hxM*+G@#7+slLTjmfgf z`>tQdo+ERbAsu?b(>7Q*xpc3z$ojzSY|m7{^Tx%h{;}=1$=h{F)}SuKObdZ$zU;71 z-|Lo2H#?ih-b=+3G4wGA4u0?yroGTu>_pb|17>TWj}L7@`ZLLjbwnSLdsR07xxWh^ z4*uu<%6a{Wm}$n8g*>)GuM<;04kk}CeTk~8S#osXNq#_9Zj-wb?D#*BBXxMFv$Ddf z(Zjgbb!0eTM*l8CN+^D>7)eFrB6~IEbs75MOKVV*&4|sr@;62eWhKic%im&^-_m&w zfH`kpRtsclE52lmSc+;6I9^B0%#RNZf3#{qYDwDP6;UE_Z*Ksjp-pz#@_|#d`rpm> zZE`>Fy!dzLg!TrBzh=Vmp5*?N!h1Wei=*vE*0&|TZ)EQ*eFuk-wKupVoUhY7F{s** z-U9{uiBpmUjYPR8Q54<_`#$HU`Y7;p$23bC?YCgImWm>q)9O*NbDyp>9ZdV39S8~v z=vVCEoA)#f;4zS(cpa^dKbGx2?A9Tt-Kl+OEU=ldbCSWN*Ob)tPWzL$0}l~OHso&; zUhUv-WNxun3T-q4#zVY&_v3n`YJ z%*kn?t^U=rJh;dwq5^!C;(-OSo04n?KgbA|bZGC9$Q@g>eEK?jca&4y6C`YX6urit zpurg|SxQ%%8$0qpAxF!Wb{2d0Fe)u$UDax+*9kJKe^Vdyc0T!cy@!TaP=zZo zK}M=E;j*H7jjy%ZsW+-1sq1s3Z~{db>&8s|=0;^K!*R#7YGAVc#$k*=WNNH(Ifef& z-kvr2LB;KXmbBW*f-QNoZujCtg8^bfx1h2+f>+s0^SVSaVXz}l@5qqCW+w7C8 zpDB>EpJ4UCHUEW;sDuJu64RK_db*-F9vrI=GQP0z&(NK&w(swLbgf8Ge~P|QG_*$FJuOrQKUoVdq{Su^tegP4Hi%@RX;H<1u~6NDxUDme!c$%wQH zDmhA%Gf0pqK~fV$6c7;-#~uMk6C5 zZ|{6m^h@ZKiI}W+v-`BpHZ9-KF1`G*7+eEqiqvS|rWD=sYT@-F?iP&`nQ79;Hu+sQ z)86uBp8G*^c;zck5zVvBny>vYgYC4bb;qDBJBIUKjETfN_X0X5E3O}%T^EBc9+g<` zK6jVD>M1tw@7%~k6p>g+ZuMjAEi}g2ABaR(BEvCPS2p3>?!l!R5?x6-Z^wOVt6wZX z7|RZ~n5N^mn5=wundsdoK|#0AZ=b4(AVcmw4E#BZdB#)x^H6u{{d>>moaYVmXZTVt zD=X_7b-U+D(NyMh_Mh|ZA;-TA21nkGPd7iMuRR>p{dv#ZPC*=rl#b4qpZB10p)i>7 z8Fp@E;EdaexcxhMu6}oPs_>iV?y@>tsqo2??NxQHpFl{?Yo-preh`hT>}#E5YMZ#Q zOZk&*lrG-q=+_sw?O%E9bi5P4f8Q~Vx=v<(dBncp$uweoaxy=*(p71^|0DX*bAD5b zuDr|rpY3(7!(Rf`V(L!0p7^l$oVNhy=ThK>v>x}6+)MKFHan9!)eYlo!miDT62;t} z_F5?Z{dwNY{p;HAj0f=!U`n`Z1xX3Hf#r*QW`CTk2$cK=3*f@j*0>X z()~ziR_VE!Q>XfyK7!1^Lbd4j9%b&6093K5nlg1cF_(uxN^)6n_wXpI6~7HgS&@LV z!GYWrek#Ws4a-7~)0{x0r-b%AI@%rV(UsjOBb(7~qun|s|e%vA1N z+%^AN)qe+eRqso`T2u?)@}qRv2NuoE{3~CF0&bZLi5x0^6fG>KI~bLjEJv2FarwZV zgv35XQM6Cy*YgBZ8p?8{1VDy0pHkse&LBBCNpwj=!mPLE63kPCoEL@!M6M|*#w(zq z{X{{s-*KH-5uX&>PgS!Dyd{qOSx>z+wJ+%!$4q3*U8dvSjDt=hfIRIY`$mZw`@BxS zj{HJmB9FVt)-PW>l{_oR$06X3i}lR%EfSDh{c*MHP-isQz>q^<6XLMQOz8u=@={CJ ztJR~3>m0%+V^)T`m=fY3GfIrpx?N+u&yjv7y)-C;g%v=y0yvBrhnh%8&pomtOA+{_ z9L^MMMFSo+l(5o=+C7qB(RHfjGE+^-`=-_Yc!egYnG96L!(P%WLoI~GzloswWSiQq zetEmvQHMj{1KOj3yb)4a-@C$gVW4o%CHoU6VF9NCmXDP`OMn(&$;aWIi$V8x!(&xfuo6O(gmN+yg7vsP5WK zYH|jOR%!(VD-rbI{~U_APa{*&6Y_B^=cu9N;4fwx*r)gdT3!5e9|=5|>1h+ruWz5q zjC8n@`{b#TqypVFEXE2euT5s-Q?LvO>sp%XW{pAqA9({p7F_SUOe8 z)$a}Hu}8d?d^|nGiEgWdnT2}4{C<2?-JNwzzUL{~ag*BkJ-1N*(<=EBZpfEMO&NiV z-IiW$PI>1_j;diFX&?WJYlfqgRh5Ac;Nt)>;ZbHLf=HvakIgc5 zbmz9M5H6nIz(M)N3r@eYb@cDev#uvI;7&7@kcz%qa$yStzaJpT zQaNph*B0+Ai4V{#uA;`TIxC-70!MCwBBT{N->TK%#xLiyT5ek^$Mh2m@eMFW?a;?0 z*K@V#q!H$`elg8xn~bBVcS@;h6};2_8&SgE9~d#*yMa@2w~Sq9&gJ62kb9ahkoz;U z=5t;KBir_4ad(p@cdXqZ+sOx#=k;&;0OX7(j!;QYI+MdVq-YHKRhubQ2b3=FYuOu= zlIsY(yE?GelR(=YSX6pxE!`1LcuO5Vjw15bzUdkI$YPGi8L?wZmKe+wd5EHvai$Fj z;_S-y2LX283*b@f-0Qagf?$UsrJ@&Y(q6p~P|2I41{mp>;*Dhcs{!`SJ%{L6Yubn$ z<#3ddpV8&T@NVw7SM!Nfqpl?cp639K)}gX*Fs+WLu?IuW7Y4_98yHhW{Mq{o z(i@34VPgB8H)pGzjkHgO%9>$~0b4n^fpcfE{;BBKN@-iLh;{1tDePm}dsyk5vyTIDYcL)5Ds;?3D&&UHPxcg`*|QpK5(*0%9INb2JLMc zfNG;5$%V@l6`W~0uKHMZ4ww}1P>q1%`S2be2^C9x@Q2M;Sg3$a>nw{!gG2BAF=-B5 zG%*)52Vp8Fj2~y!7C6;7w(WehiL2pcla(o_AOrzw36ip|Or+;RN}YzXVy3w-fn$^LFmn>7a=>J8cD0h5n|t$KrhvUA+}_?4 z2!>3To4>q%@PVif9A}J~2NzAn$BZCMWlw=(BV;`0YHzs>q;Vmh^cP+~d8i4R0wxk8v1qawR#Q63ne%;Dyl2e+oZz6Nq1(U-h8=od8at(1tyo)MpTi+@=bM0uSk}^y z-&T%}j=D=vpFVZ_w$QZJ@cz9`jp>V%q|@CU>(08C7R5yP)}Z8^r4ufBx84Z#&ki7PFPsSy+es5u2ltDFcs{k z>%J;rH=#9z*Z0gT9cuA0zmz8NhmAz*Oe(V8R)la+-7XOH{ zf`Z@~2%kydGpwgrm~a5?*BY-U*5Q7)P_uuX<+mir2+w3F;Hi z5xcWviD)!fxn()1=<|`#{<$uUX>O~3sXn+~+<8Aafc(_CiloKXU;$2<^6VF%*7YSx zZx%?rKB^{xFyv7F(G^U;c|Vp<1=)Ml^`y$Op|Y5+=a?rH{PsUa&vEfmM>u%a=JDJy zcvhdxMn*Ax-}c;GuIk|+ZS&UAj~QBWg>sG{NktUrefj=gVw_2(RSCCFP6q9@w51*` zMS0hk6%`{7-f*9vr#v6?z$5or;6{l6gUQ%k)q15BeGN}bBy(I|b`s3tI#w|xY+*S0 zob@fP_p+qCYFN!qgnsk{j5h`yd9jVxW-?AR^FXZx#hI1bSdf3A!-2~8bA}S~_jpw8 z<+~rwy>yKVtw|@?Nln_Sq`@l6b+}V(y(~{Dv(`f~FrhyRsn)}>d~cGtQASXr2cJLo zCiWE!@Z+E<=rlR`jJHa8FkI*P-8uKFRmi{=kA35)H-2gBaRN(Q2mhe0-kj=uXo2Yi zG`g%uFXJ%uC%?}x+kWg1OKrtzZ5VEalRQIEwf<^5fm@fN7%G2#diXFW}g_I>_{>>b%FUK`XCcDk0?q4ZmXSNx+#XH=?D({WA=0(4shkZF0PZHGO7UcIf9YU z6SQj7Phys+{1^q6OcsX#z}I6HKK46*f8XY|yD{Ykrg*hfdrVTxw`Og{t{+;&gfo@a zMvsK7MKQV`BR+st+wJRgg&Pj*L`Ro`GBUV}14y2iFoB)f>{^*gg{YUm*L=Os$TPW? z7ep?fmf5{e4t%cN>EC(1aC2}FtVR+p=me-rk^)CRvC1`%Ik7s_08JfOeu_ox!r0kg zIgebb3p)$--`m99hfxi1=O;K^Q-KphXcUpNj6c9?#53OWis1D(n&_WhBy7ctEqCG; zZ)?gxqu=9KA{97rtno1SxFWsYKUp^-R#t3|z0@@=1cX3Jxme|)!dQChI17Jhj~3E9 zcNulPm2#L-XSZCi$J?8O8yQM*EmRp>@=W}7>g45rEyHSIU>Po!TPX|fNn{X4z7ec4 ztLCUpO+zb1rBPBx^i#)YqUi-KQ18d@{x3_r$agJP*}GS3A8H_RCsXr*P%)Yt0xA%i1cCwX)OUugN{0iA8qUXiTN_~U+IS!I{U(pzGl6@r z5SX;*4vHKplHiO9n6wRQjs7m~#~816+!LMKLxK(2FG;oP+``EQ3InH!)eVEd;CAzR z%gHSAM+_UTCVHu!{8pO4Ya(CMsbjdszMI&Q`j6``1Z;Ul)&;C-8^CxY(Kea$qUR%_ zB6_Y?6CWfs*9iOO_ecA3daj4%BNw9QCQMKQchzG;^ZTs(GqaWf;X*i_!K0)f8LlI3d+?g! z#zNs_q6o$?)j^6QMHW1kdutFfRs;u%Ag@p_*YoWnDF@lDK3icn+=rki$C25@{pUSC z4zO(<3un}HknrIU=W$2J$0OCln$2Vd*Q0I&NY_948KJ}I#7;=35{`0=m|A>vQCd`C1QXoN?(%AfF1MA=!QZUweCNlNBGD`AWVx zWI=5=1W48BIS6k?5DfI?sD-m@&Fk^T0G`sOgo*+y)DKJHg9>2T3E8X3-L<-RP?>Tw zvzE^M0eUb~VsmeBD|Z!ua>na~2xbNub_O4HO_suJC!=y35v-#8EUQ$!ar@7w-x|M_Qx4{ECeG$lOJgq33?ZjOLw^EC4Bz~a`NBcrs+SXI- z^nm675?hyw843Sf^-c5#bT(+DWr4IJ8vlEO+ZwNUz_sA<;D1~Rfgp2b#5xT6=a!@q ze*z=;6M(7u&vno-OELo&Z+I5KnYaJpG7PLVn+bnUK==1L?o&RY1kOQ)p;&jg^goyT zFBXJ9!GGK+{;puG|6HzW93%+nq<>D(`_FY$IS{#YgS*OqgZ+_fN2=;*``a<&Q;y}a znTu3q6JC3?ZWg0Y{a;lR?+1VGzE@OnGXpvamWf()-mffS+i!d|KsFTzA(!v#D5+IZ z(rCwY9hzzVKe~yw)LZF4&oR}Z^9Yk_PM1=%&biAIXp`4t|uYySot{=$guU1;| z(>G@)JEZK1AxF>{SEVph`c_yqO~5!V?gOJ?63gEC!@(`1i(bzI54P~JZR>~|F`Ymd zckkmkfOdVT(Lov&-WagdU3t@5ZrLDIHFWjQvsqq3mTUgQ=^vu=T7Rfq7Zb}6r|&oq z*ZZXCDPTD;Ke$kp?@>nDfy+uili+o&&fZ*ln=49waXbbRKdXkbj#Swzvx#G^XH}cHbUck_^wE=jOm(lG!8M(L71Qh7e_h1 ztk$D2-wd@4^av!Jb1$)iz= z^rdbDNy?#)#A1Hv=~r7!*t^h>{gx$hAsn8H4y5xu5@$JU)mW9JH98>z{Q;dQUn#|W z>J+>svPh$IDUdT`6C}vL>&8y z)i2*ecnfe<96;!fww8W->?YctD4h=Eo+KO{&2&^=KgNw_A)b~q%e*%;?2RY0AF*#c zXrra}&uzdodPh~hCVH_Hb6MiD!G5kJ_8h@)9)~c|a8|u8#GL1~`3*(8R^y?*fH+0s z1X{`&?2moQ-r$w~)Nx%(2y2NVvNFBEpN$LI4W%wHRobu6Dnphjbx!-voUsVGO6 zmjib<%Y}j&GM;ifotnE9m?7bU279g~9tt>)PGQAexHjRg3i8AgY)DAo6iGkteJo%y zYQj=q05#t>$8M-FMX;RWTn`6veunGBghI>`o?@QDCv3S}VcLhx5*zbg9TlP9_d`d6 zpoei`2PC@%!XcV%0IryO9L-B4&d(H`HcjqiqKUq)y2u|tzB5ml?gjGyv9|$8VYn;v zNC{cIu183Eq?=}gsys!kcqw13E0;ezNkoUvluxFIZZf*eVh6j|LFN=~KK86X9UKO7U={;Kg=&^M{wA8*(&XUbi(XKTlng9G2IT6gYK zFV^1$HM#L8$kVM)ekDtqwgp)iu1GvV0+jBCir(d(8If2j_iWoWar{zlUZWPWVU#;^U`G9*_0Hb6&zaLg;0QVmGt6_sA1a%4w1@~W?Kn{!&0n~t*r|uKPvUwT@ZowC z`GM`(u5A({Kw8K%ThLUn+EYPN(UdPM$xNVF808rDnde-~aG?$cYj~uqR!}>ULR-}&3HYJ7IYWE{&2LuwUCxWX7e@&q{RlfDK6sF;GmV~oE4+ut>1MC(bwG0 z)Lz%!KR-}t;&MIGCmqc}Aobk(V986nIGc_kQ3i^FKg>k;J!Ex97WH%hDKG+Xj>p#I z+g@g-vnzC2R_HjO6Mw{WfLYp&?d5JV8H%5!LXcOS-G5tqDKRq+aEtNcVASy}5oQC< z<1bX*fR0Eey#51h2l&vxf-Q`5o&O7`XbIR7oiAjO^oH+5)N~JWyt@0t!SDA48CByLOa&ts6!7b%OER6}2LP{d+jk5>?d z0ZK#A^}a1=3iQzPdkE7Ymd$XGgxaO6J$Z*ksQ+Hyv*@Cwt=GubptS6hPS?VMafx1R!dj(e0`G0lVTGE=qX$+s;L`y zJ-g-VC$ajw2n>unQ3n>04bz~&^&e1ochP^XaUL=Jxi(@1qIkK@Ki|q6essA(jwrTl zExz|-3nT*za~tB&h}DVhl7R0dQfpa+cPY=Trl}El2?; zvXkF$*(wzLs`D_TxczI0xc`RnPlfP11y`I##;}@wPmIVot{OsUG?ARF7r&bF03cZE zXCqZQrvTJv=ATw{FA+H7UNK*o_np!n~t|3JwvK#EL%-}G|jAE@RAj+pOU%FFwY=J@~F zuAL5xuk52NUfD(DfON3iN+Gy*sP~LZWkfXRcH8$5z0;G=@}@v_m9Wg{INB(4`_z}@ zHec_t!Ebx0^}3+wF4!E3htTviNdym{gl<;9p{R~$-YSwmEg>YWtE=O(2q04jXoolg z+rI|da^k_?WGu6zon7b2$oB+gu9L8QqG;g}Q=2{fqX)aVk_&GAu&i0LvV20#)Fl;CK2*5(La7L7|$FS_40WKy=Ae z$-n`+GeH_A5m4whKU2T7bhMl2pwx#efTkL$rkigmv*YU*e!)`@z#z$SX&CHN!S0r436gT7c>7ew{$!mZ76PiF$;=fSHLbuYEXicP2Qk8&qXK& zF0+x>FbbSZ+(=as@OFUz+rS?uWbR}<`9n+o%hq^a@%O^i2NM1;k4Z4IYmOKj=>J0< z{_d)oasm);X?y$o_?>>vegmBh8EPuPr%)kTDn^ z{5C$(TpCDD$g94xhK7bR2AjKG%FAv0?~OoP+VV@r8b~MIRs1A{_tSxu^Z`f9_x9sS z>pP1Dc^~d_5#cA`675GqB|z4H{7)}iP#3^gI&K;(r0J+GR~+@JG7Tx@hU}S7O-bEXDv@@6Y{g87HH@DTOd`jX}IlB7X50=>b~GqVO$cwF6CWCuz6; zhc;H&$1n?x6&|D`yY%By!eZ;;8fF7%V#98S05&4O9p??1%oSwSB)nZJ)UgX48WJLZ z4g~@M= zEy@fDC|c=!@$o#5oAIydUTpJ_K{C&47}}I47!EzJ^r()X*Ue~K?9icCatnAez)i?! zS>C?bVZEe>4A_a*DV_*pB&DlPr*sel?mQDlc-4jifnL&IMGjl?aF=?QH(+aSwJD>A z)hWzHzWR94`taGBwxA2YPmADjP)Fjh$!5J1HR>@WHEF5Mxq)4UCL?K$#~4!_DbVTr z8KkEoj?ti+40>OLJQ&ZA@go&VW_&wQpgjdrJj(t>sc0J6FQoJpDJ|p<$3`)y=I*?- zzfiVXO-e#y5jUXlenK)1vPp7|#Ng^oco|o5JM5L2f_?K?bYsesiUDy+eUkz4=q^^7 zfbxxj)}Go7fgOLj2<^9VGpclGBE1Wq;RZP+wNyY_*oImxzGJ@84 zpK3^zj)XJd@N9^=gZ;g>f0W28*`5cFO#Ruz8RUNiAb(T-K@}*E+H^gG`IilX)MrLv zFfAlCq|K4je}_7@LkCm*8jIc;sa!ODzU4;NHgqj5la?Q+fA0ZmBh6Ogk7T*RsX?k9 zslay}hVvAXx$J1k%b~2_S~%&eSj`z$XlaHbk~;grb|&KP{>(wxe~r^FtSxnzWWX7* zUhS#)kCO4}7ch;cBr3Es$g1C`8*t8tVnb~p_fv_GrmAzt>3Y}AY3}#V@t05oeMr9LIt91^lQ! zM$Dh{>;t)E7xV#OzV+^fx5#B!4*=O%RQNXbFnO@! z!ROBOylf1VWC%Fq3h$#89(ZGx5u?D+vb!9|mhS2=NUazA`*S_P+k=ml% zM5lLd-_9-#2s?O^j0UcWH4A84W3KFJ{?8cU;I1BzyWrQ-KZQ=p;%1E~3MhA&E?p9} z?e?DnI^{!|J=@;Ubfo?T_&#X$0?6nhjP!SGj04*|8I7`3O3ITh&fWa8U=)#gJ_9b8#sU`io ziuXfgQ9^ITudF_@MXE-lH}6tVu`7qOn&@300@e&e;RP8rje4(6I zIH1;l-czvg2_lp>V{7$JHO^?|Ovgc(=9q~J7vJkJl$K2KbC+RJlhog`NYx|Emgx-5x)2UnE(w0DB-%H@Bp#XD>4!dIT=kKErw+?6_`5ldq zme4@(k0DgsQ01CJ{m%e1< z9pDZe#&J?>Tmhpc|LqOBepPHqq~;1NUO@v7bqLc&Rsb+sE`<}SDB!`WpfTQt)eDqD z6}Ugx!cfBL3w1Us{aMqCe3iTj@Rut|-joe`*plUe&tnPU7eFQakxDPv)tJHpm?{k8 z04DJ_%DNl@3S*D?wX`5tk^tbDfX;XyBQTeq1yenRjwSVp1EVwoQ=H-~uL3^=ct!I! zOM!_JD6SVhjHHbi1ClJ>cyJNikQF&Vj|fI-}P_^&WJI+*(9^*+!dO#z4zx3v%V#r2>pRsGh64cpOX5T?aYL@tBrq^j;ODMTs8_+};!Zavxv-iu4XHs-DUSzA@BRS>1FR|VL=OTX$ zsw0%=A}JKjV&)naEtDuY9@XYnd9gEp(bLdG)xWs|+;E`HRB(2lT#DiKrtP4RkT#13 z&!_4uz4VL&g^WrkPoa}kge7oh-nsgE3>%>oiy5RkNldFHRe-L7DVD0%Myn2u=D)He3Zl3x?%iG@ z2Ls%;a$iQ-x~m0QtEYFG#$9ofeL5rHnIkyBA*i+c9%3@ml1v1gqj&;j3$$CiAL8QY z1&Y}o_dAGS@{lKFOM2Ip;=OUWki?rGd6>H(D~UZ3N?DX?-1mFYk^S2E4}hfIB{$;c zNTGrlDkFcR#X8+xqL!~=t4?l>9%9F%JGX^JTfK2zUMnGRFuo z8*C_s91Aj6e^j{IICgy@P|16Oc!iILxdMjE_)4#QNXd+h4&*7f?YKUeYzN>deA^Ok-6zFBw_zCI7C_*rsN*k3 zmIbM zhMt`B1KmjaN#0WNCw2no#RyK4-=rM+AC54+<>SQoJLIBLw zTEJ}Of4l|0|6p0KB7HZ2{yL%ibh*L*OZp8k1sKli+yiX?!Lmvnq0%~u$faS{x1YKe zVwj4mdjVLzYL1SC_CzZ<^sDs&P5z&)UQZ=`lsp!j#eXp-b$F%eMADyAWhZn3 z6=*CqKgoAjh15R*F#IQ9fDkbJy!vhd>|azfN7}#i9=x-~Fca$xdfB+qhX*f&BOgtT1Ij{$4Tet(OB9T!XM} zlCZq@p~oKYNcIS$1-idf)zwY?V*hl-beeaN;k895^V?YuT>90S?0takJZo?%^RamK1vfT|g3HrxPCRoqxU(G-StzGk)*Qym4v5n{b)SnGR(^84Y}^ zWo4>n2rtHOV3tmtZSZJn!VX0DbENzVd1}tS*u>MVK8JuW#mJg(R?{Epyn{7A|y-!GZ2EIiF_-hcF`1gO4m!$s4IV;S8JRz%-6)v9^3j>1m2ByA52) z-M(FY!7{4vxf|aCUecKd5O5@r>EDcD6s3mkjXL^$<@fNPoA@*D#P^V!v9j=&EQB!rp;X2X5C7ws&O zFTA6UfT!yO6}1z3hp|2VN5YMX$}P2_eau-<3IDfh-vMr@JLZ=*YfuS_cE&0(CUF*C z^%_53l>o?jo}E;oFOuh=H?OF-RqSF3E8uNTP19-rz1c28--AYQuiPQZqR9|;{?e($ zkI-&H0cUwAN2gcJE1_CAyL#GMPL|G>jVP3nw21)IH89Q!Cn>lRL@p-c;BSc}vNC9U zZ}D64KIT+^2!hoO7v_^- zb}*1-u9t&nl{(egNv$)EE~vxYxdP!ZWDblXRGl>?J_RwTHO(5@@Wx_YB4dPS$Qu~3 zx8DSu%pO+@WlV-+!7HHTF1j|)<+GTMrjg*f$je5U*;t*}w*5E*r!f?S)a>Hdc=+mb zj!^B6GX`aJ*+zhs6Rb^15Zu-sV+R(S@ye2g`$`!N3bTH!8MPw8U))}D4-_KTny#oZ zA#uj%d08OT8l(W%X56^gd;24)Vo(Y-Ghhp8$p8}{`l*mwaKz47_fcQZHxfw`|dOck%t3)2AKq*-h+t*~jc7|9VI;uS?J z`hcVyN#VXf0*he8R|7ui7lXpVQx)27 zLTHON>@5jfa9+Uo_pAZ$hSkVf4UoVwtPBspDn4)q(qMvo#MNd#%`m-LEhq3TpXYD9 zfh9N0!y8hUs89~R2E83TbGp7E(_kZItMl%47ukp2^bFBwbJ|9(L!n&IQ8DSp?h*$W9uM8Pq z9@o2@Ar~vxVc)>OMZcK{osnbGTC1W3)2h-uTx|~ zOuPaIAU+6Ri&AA?d8t1V$0AE)04OY%0;gJ*tQkJ`T@9z!U;&rWqocVCMXcDR4}%IO z`Ym_!YL?~jIHbTrTpA<&CtrU-uKb zA#P=61&`OkAWM?3AMv_tX?fKeFFha>Rra zN5I@T;+^Jl=S~COflMjZ@IJ8Yb?npC#_`xFzOR7yrBHy2<;;!?Mj7>QockasC|Ful z^yb$B4p%ZaKR>dB#Nln&qPGIN&vQX%|3k(Ave4BxF#?$2j&s}ilKRh}?T4C{mKc!d z8RWn`vJEw7mGbR54ux>^S=8xoT+?s?D9x$h`WtD+0MJ3SGaj(d2L$cQX-SQXC%e5I zG@_RxToZ~d1cJ&J9!b-R@RBKltzLnxD9DjThFty5AY}u{+siB1%KND!&^(u$VCpc8 z1>7D&YeI@uM9zJ!>ad}UR>m@QeRR0Vq3;AnK&;erg17DH8Pn<|vZiswZc|y=5R58~ z6kpl8!S|^`4V%CawxhL$E zF<{Ji$NYB^0D&MVrQAG%aN+X3-tvl27heRsXt56k&;jg+|9oWgI51;91k%MvSPc;e zF!#ISAUAFT(dm%>jmXSjkg~V8SDr>h1ll-P8|?1wl@@3}T6?LnJI8!M5V`1g_bB6a+E;7Pm?kkYxcIwW`gSu$GdX$2UoZl?~bXa@lkOUog6>XiD@W$KyOP7PzJ5HW~;%6{zPuoI}u@Ayw`S2*)8ACF z5>JVn-Daw6+1!FaK^&X~ytN13dw>&^yDM!)=ELHZ_2+XS#l#B{I5bsL< zOMx+MU3YIX5YC3*?mAm~Ul)VEd3*W!n38_lu#pW^iI|OyHJ_P%Jdu#j0H#B&`cg(ws>-;~MT4&Rp2R%zD&y<@%+Fb4GbQ)cB!71**n%oTCQRJZeBPYLLz) zYL&wqG)Myy8I#}9uUlS}spwY4f&1|atI$a;Bg=5|{p`b>S$v;Xwr1$|~B?AtL8wE+1<>o9o5zJotSPl+-DZ;um%33*m ztB4wxzBB-rOa)uw|D-T(A6%b@uIN=_GpL z1T;bqhU8lRHJd#FJR=qvuc*Fi+!Ib{uJ}o!$tpeXmfHMtFm3nVhYa;4QU4698$u`8 zjqcBL5ekm|Y9D$o;q~?rP-qLeray{(d|V|+oO7l z%AGmCajE3|kP!I_YrZO{Gthrx7*u>?*LmM#Do6M7&^V7#;%D5OKHuNVMYS44^4-or zGEYwN-B*WP(nMa~b-XF41W$AUwDUy6F(m@g1@c-}$>?9E9lGO5e4PM}KL+L!)hK~M zj*PVYGkU``(VTcRwoB|;DAi7$XFqY%X3(695R-mxSt~5->$4PvtbiCau!UdVBAbGo z!y!VX`nh$em^%R7%>hMgrhd>dVp`+oI;Y2d2Dh^g)y1sR6E>i)w&54%5rBr(~j0 z1UF!$0AU#6F~%seBI)m%d+#YedDxak(Q? z44yUJ`&rl(OT}(iiK{*H;K9`{^;L{t<;TrNl|ECWjXcMjUEdYX6IK0wD&{zlRzV&d zM>qeDTdxjN6ADJ;j5{Bz8ft~*ywFip_jL}$uU2;gKdyKWcz#!Vhv9r5{BwjCR%LbX zEpc6;(H#xyG@ZBytj(pGDB5Pvj9h3fD=5ay3odY+Y9Iwgxo>{Kqx83rWvya>iE8d) z8c5nx9xGf0i%6sh-WQS#Ru20jC_bimweTN%(RCQ!z~b$JcYeq*??u?bwl$G75|nWi z)lQ~tjSpn3a2NCg$*Z3XXwGj~B4j(>X9?`g&DX!1OL|umFH1v_`S|Yf7r8B7ZS1W$ zR+;B9!q~3PU%20Cn_*Q zn?>MYdS^XPQq%-?wSNtIva5|h_4)Gd@Iam_RLG`Pvbjp`yo_LYdy!)(=_{Z25BAV2 zcTC+P8@EqrrWDwuGxt2O<2Dx0h;qidriFDcv)o^74+#>6mXtXjVy(TOW)bd#b@tUY zNK|;$xuHsum8d70qEk`FaQYKDAwdPJD+KJq2p{DMqzp^qLy@ek9EA(tq-Dd>x~!fh z^~BcOJF;&4iP}ov` zLH8#XOoG(G153^R{I6A&_UGa3jYa(B9ih7I+gwac-*&Z;@$73kr8TRG7&W5h4{4px z4;H!Kg-)E$yPhCZBqyC(^}zqt2Wjs&=ecpWTZy9c+b>|d@{+%Z$Sic+C^(l478kWc z4{&WRixq`LNj5Rx2nc9yYpN(3M>bF2va&z6CS9C({nMR!4t!9}c!K&smjT`-o*vLN zUPJmthhKeKEo}t6Rlxt_G7MZ5BOgB4iRJzVy=#17b`?$3j8eY&kAZ=QiIMRt;OWF370#qeR`e#2B zy@5rq#%t+%P4MB4;rU;x$URRn>Px$hAu3+?re(0VEyDAS34~z5DM@!>aJCz9y<2KE z^9)TtH`RtPLhzC8cJ!lZs_zmE5q&9?t&ahkQ}LSz>#+ndBzg+DWuT#iApCY89p`uD zm9@Fe!>V)Ixi!!EhSbB2Tvr`gl*p6*Y97M?Ba8q)l5%H~s5ET0_EtJRK+qI=8ps#@ zDG?2qCh2r*e&xPzD=t`*S;#&H+>N)s0VOBJHD_9ete@z06Ue>Q_J2Fx(L}a8fy)nv zA~^Ly3_Hmgi2iLv{$geaP&<*k;Q79v@zy{t?^C*sSwq~om~Z6TYx7!pRF2mc(K=9) z$Yi_kr z(=}sjk3?_2jss4KV!)Pk1Yg~GIW7reY{5rgiq>jIuU}gVJ8QeQRW~DgP6;~zp=IMB zOaiXTcN#Ud%DfnsXcVmUGIgSD$nMRbz#ZUZl7&iVx^i6uatY`wg+AQ3k`$Q z(XVm`f@8j%gqXKo$^+0vZz+#^5i}ThCx-vCoF za$6a^2x5KVP!#a(rBh!S;7@*2vl2OK=26C`{FzV@FG!sne$G|C9kLw5FpPB?^8I=? zuFke9)D7Y{&`G>F7)OZ+VjtjUb`88l`B?VO_xB#Z4?ZOqZ^u(}QV<5LJysip8~LGSjL+vjsa6zdF4YtN<11K9L=b3$*h5(DLi=FKMZ6>-D_7-cz$dh>BOABt7-G zchG%vt3o8$@~Oq-b|MD3+^D6e1i^xni$kXj&T?-Fm^PT&U&Sqgr-nysfI@y1_7_Bf z$u&1PKCu#*m1Ea4E8LQyV2BOQRm4Etp1be2MGNF0zCa5dK4d>usLUwiW3rV*gH%8* zPn0YZI+Hv83N2ou&CrAuwgczh8i5w3A%{A1Go=RhuXb89@*K@ zY_5faaNXF)j8XQoyL{2=+HlZkUUx1`-4_f3tJE|eRNkj?6|cP*LnJbazVidcAUqtJ0ZlccZtG-g|J+_xwPr*E?=u7~2TRElznGD#S-y9GcAiPgxhh{W{f?G;#qe2RPusnp#xAaw zUqE>_NxV6){A`^?kx>I4iR5#Jkw-I91&Qdg7XiPUBIdPi`(lG_IJ~WCa`N?L0hzY7 z(B_-lr%GrgNameZ{!<&WSAWV&N_VlZ)&{c{VH6cAB(kNGwW7uaU_Jo3R@U zw*j;hqtVDqnZXU;#&fXVqPsKcaQjKjd7bK3!6EHJyPX`amH3NOodU#Jb#_-QuzP3@w&rZ_DnYKqQ9+>zg;E~eE80Zw_K^Hq*>45KoPA)!(Wu%0 zz5<2XCu0{@26~Ob1d{xoQKT;}3oRaP)0JhXAU1}Q5dV4j!sZC3HrX4OrU;{iJ^5w+ zDUXFIeOYv2C0!TO8FLp{>u2QI-=AKQL6989ei|EXk$CwN2_m)%Fo&Gj?O$`2Pzh@% z>OPVQQ~83v3(Lu&HreJ2SKx@e!vI?9b`zPN5la@`getyQ#PK?YB{G! zPM}0ZBYCW1q>0~h((2SXRxYS@lvX_FZn|~UsvkZBO}D0vENK_H4pCD>CR8ts7Oe{B zFasXZ`G{q)jxFG749qI*Z512(mDv#6+t#E98;{A5aH(MzPR~ER3qDGmzOOz;psuBK z^{oQ0pY_t7jz8<(xH@igSE6%u>u0an-)iXDJIB8_iJl_5XtNmYen^FZrww)M^O`=Hs5m2vb;`SwKn*bV&C zZ7!$PzXB@!kA7{!+v7TCUL_?ZwQnw}B`hHQMidHB_1d@hCiSn_E*WbJ0GvPM~5AMcwl*PyePJ>(z7(Or#P zTc2v;p*T7Sh%^ed^*+HqRx5VQbTkXy-3JGT$CXKo88>5LP5jXW5zC$8uT7BXb1ZCD zYV}LgaAy6LGiQ{?j+ATj?KE*zPZN8SR9y1#I_KY1s5tEVPp7+I*7R2R+>|mR4my8H zcKvxs+>s>|6+cX6iYiLMBG=`25Pdf?Stp7!6C<($ze7Z|L^VJic;AjKAoT&zmdKH{SUV{+T5G zen;oix<5ce!KA%rMl^Rbe@Uo&1%NyPOS9|zn{Jt{<(&|P6tcbeoR*TK&FMl*9<21D z|IMN&32lp=520*ZF~JqPk7cYOx;39G>aV2^w$Vs_ySZ>k2*B10y*X+(9rB)?*uY`J znmpRr;6{@xIMCHRxwc2M7PGpTV*R{9(f;y_I#O5~fU^KG zefK(}5LcrT@=$VD|6C}il^eWhT`cpS@Vf(^W8+L-!m0Tt+z#MJV1`ceRB@-f!@kBfAPYIL%{$r||!S{nPb-QDn#rT0rx5}6ZQYfG& zs+x{JA`7q!dg@kk+U5ET^9=e^(uKZt8i{NEZQs_$!SmNdzP1+Yi~s||hPf~qV_;h4 z9p)=X1tK#Qu$7lKE0sQ5NP4X~*@<|$3|EfoK-|j_kJ}zLse0y<`^tL(##Y(=!eUkB z@o(TGXil6S?maqoLr?Opu>Y0E(qqBBdwdk20lt+5YG3S6Ejsj)-Z;1TBE0dNkbr{v zU4>6Ycl+cEhZsPgn1+n1>wtxpbOO5tb$|pfvqfOSQWB_*p_53kgpAhQV1Ygb;g1B< zO{OIs69`r(lgx)jf^Ol9NhC^wj^trFA3y(VU@Pkf^aeH27e9OCAL=8?6nfDkh2wy4 z;XR}b$rTAwV&gLJ?FYtpa{#IS;o$(TyUtC5`HY_o|3Cj}jSfGYtjfh);GlOWKab;4 Wt&1WOH}~DgHvi!W3knPK3j+CtMfC)QxkNxhBKU`(h=2${9QGf+B?6B5A3=bCAfCWe zLwk&kqr1zq|Hj?m&Mx+jnE%KLi3x}b0RJ0+yILatgDTq!d)V0m;5KT)o+x28w6=$( z=YKe^o=))pa1f4;2o(GuR743c&j0wvFK7QhPLZnmC@ct!6IOw#iGnQ*#H}@4{zDft zaJI6-L4=jG1T8@lXcY}Z4;^uwzK5%`maDd?suTUPsmLLC1^^Hmil3+Cs%fVT09q;-i=xD^23BZGC3PDKn47S@ zgoX$XjNc5ce@!X9`&9bJvwbwReeXirfC2pr<><)ti& z1zPDjIVw9tG~m|Gq5znRD$dAIOc^SUa`tl4)KIq*ca*Ti&lQwF0Ud30j5VCS9Q438 zHX4?~NO2WiU3(D&K|?3Fu@=t2$W__P9)ks2qL7*h6?Ob1Fjh$i|IZkvX9&?#f~(jF zLR38!APQbi))HDk2+Tp$!9frRQ&dm`>N{#%LM*YKh6?z7SB9f>4b_zm+~r-d+JdgG z>SFdld24Y6oP@qO$jZi5&rMUy$jipi)7DZKW(yKF)B%X9xd@|`RY4dhFBL_&Awb&` zYlP4fHW1Xe)Y8z`RKinLw8rxyZtJ9p5%sdtl6L|s!hjGxK?N^SJ!h!8zKEfvo*obi zbJ21`Yal$7bv&TXFsLHL!4WCpqM~H&rh%Uh)UY*%*{Q1_9j$B}FnFRUH#kHWWF#-H zsfmOO;W04k4ldUAZVHMLifBb0Wova~eJc?RSi#d!Rm)LKRb5ry(#cs{M-gU;|8&6i z`hs2vRTmeeodKQ?aS=y|vOZefMI7KIXk_H1<>6vvrKh1I4%5<7!9X<~-6bSMG{G1E zekMd&)K=ZvNy$}M9;%7fQc%Y_Awiz1Xb@Nn>VgoF0HM6Z0P;9-D{WmXQ9F4FJZlR7 zG)3DA>Zs%9pbXQ}@zk?)R1)^WsX`#)o)SPuH;k=>n~|rAqBcNGMHC>WhF`|j7L7EL z(6++AimO?wN_arDmGN@RyBQdP0dRXfSQRD?^N?53b+MBdM8fRkt&vU+ieML*ftZ1c zf~PaYTKV670#>qez&{m5l(8xh8y6cLO&j-rJh@pSy#N}Dmd0Y1AbBq*uqH}aQ(4sA z%TiSjq2`LfV_2%&!LdR@Kr4Aqw5F<^AXrxkU@LB=>h7e6zrE!39dKfXDoXBR+6G=Q zZC6K(otvnxiw+v2qU;V8mk@Qrx{K>WV8S{IaFC)KQcT&(MFZ!ERD|Nh^#wHyG$8s$ zb})6QwvwQZjjE-DpsJ-Y2#9b+Icd15!Zkz{m2D-gJypF#D1v`Q!G(mtnp(oPc=DRM z+D1xR!p25WA!iqeAW%n51!)8G($R29j%A6>QKp$3^`pL_{!g#%b4MdJ7|6PMhKN3!1Uw+pC==Gt!6;_S1%C)E zkXr-471B*)n0ua$VSOB0C>|%(k;$F>Dj4~_qI((pT6IgA5$!Hj3z0$KQN`W)p)2PEV)2Cb+X^izRFvAu-?rsc?{Q1F)*F*6W zd1Eyr7;|3Figo{|e2AFu_a+#j7=rVepg`kW5riuS62ww4_WPs7b8D=f9C%O;wNPG&P3!Iw=VjTR#vRH;$%3l){mGz zyAphe3mC&J{$-JHMt*a6VrQ9Imv3>Hi<_|4x|(&r6}Eo7Wb&fqed4u-_KkC2oD4LA zZZoW@MHxJ+-P6hN_fVC4pFBo;R%bn9c7f^#p#|l5vqba#PnJ902<|JE23ph?iY=m$ zE3`V3=E5GldMo^CtFd+eelL%@7E?nBAhPY6=*@!wB_Yo%kD0Z%avvI>`9A84O0Z}% z0}0#*9iq>V)+HZ}5}kcArpI3tbh(~~TKBpi@3hAazwbR0sAY|ex7FylI}o*;vDb8P zPMRTRyCDA|2DD>%UelHSRr-dt3X_=HW4;t{@qC}VH;O;gxg-~tA-OH;o-z#$0ewB) zt0pSmr9R^U-pt=I%|9U;qP33ysF7qMLkZpiKQ(&W^2gawjUIHGFbCH1u0Wy8_>eq~ zR}VKS7lLb`j+54W9*=7ikmi+yee5^ay>pw{l!rVLn_aw3OKYruF!839<3Z(mRXmD& zLW21zX=g`}%7$aQ98wGe!`W(FN8@m9-rsj*RD_S^NW$9esOr`XVxze8-$7rmD39FR z`43J&{WQ9bKOHWT&Cg`681NsqJeVNw$Cq^YA@WM7jg-MT0~P(>0OLJE1z=ajFs2Gx z#S~>a@`^X$D;E)dT4x{uDrsg`ks~guE8>4k%Jq;1m|0>_`ElG!RB0tUl@f+aTKH~d14kKDZ`j34A>M$}wPBQjm z2C2|XST|}@F@nqvsQz!j&WR+Ih2$gq&|+V}SDt^&D24@;qcFJIBFi7#^0`x@|CY!M zq#m-dcB2Yl+NI5VDfU}!aVY%X+QW~X`uNLGo8SmKc#Y=&(R)SGlqhLGCHnjM|8RuZ zW2LMDgi;(2@$*UU^X;F` zl)dOqp2(cd#On$-$KBi2z4*04a_gz#;-&607k4F&Dmp(;T(AuvNvA&%!Ycv5#X+>h z;j}j^F(#ceB~?B?`+ViSEmcm)v+l}!JJXmS z;Ed6v_lGFKce2fXW_ayXWRY^yxVj1<-~K*Uw7uS+QvY=cZUb&_guUFP{yJka`plZ` z&>^{uPJ`8GJU#cMQ5kslx0b(J>7eGSSE@}4YPa<=X6SB>eD!MOU==0fn~~TXc9WZr z!2_y;y&v2|8^+?r?ulSpppQd;l)%!Q0mt`-aw4L0|Rlo=q>*K@ZY@Rt|hR<)h4hH#_1_t^Soi zcEW;SM*N#I`a!h~zk1)x@RtZ&eH_E9;9c&Pv7evRiDL{(eUmC~J&t82+89XF@9*b7uL)W1N&M>9 zwY#&c%PVQPN?*3eHmI<~q?YawyWn$PI7CZ33u$>@<*KCpjkDG#UFU~;7s>YA_QqE( zrctkK+c~aD$lH0`p&6!*_{?Ty0MdNmj_A7OW-rkcKbE?4m!|V3Q*IcCVJv9 z*Wma$nDk=o>~J61Q9skj@w{+AUXlG1+3LHfxMdY`k3^B;(p)|?A})1nG5lpw%! zdBNF3hX|=zPk-}3CoSLo`DTYt94S8Tr&0}+mwT}>IZIKeom_kzPdHN6*BSCCIG9ob z?#k?Fdb9cbmL2U3rdo#{fs>^mY7&F>3aWeLoGd>ig9ST+^INxf5%S>WKv{$W! z9+AuO!ednsi2QK-mQuE+SD;57!nv9J5tY*wAAxhvYAZ&3YPC)Hx}G~!pH!H-rR&Mg1aa8d~lPa9+tdZHqz=nxp7}wgi)OZ}# z$U-uZO{^I+`wc!-lY{=4epIL=h`TyJcChdM8Oi`8g_V-62K{bviee~xZIJi)Nk3P}W!s;y z^JV&cd)}oW!^znGBYOwO1!UpjGYGK3azko&+PM^x0%Vv-#17g=^ipZdU5X-bbJ_F5 zHire8R8C?29GNXG<_ZDpKaZpO27PV{uUmA{({;VE$!Als??=L|%de2u+Rco4@D)y- zP6$xV>}CrGfd6?g=AUWN@3-0~6lc~v&)geLu73Q6N3-DM#0___tOU4k?BL_4;-)is zu}M8dKCQhOUGzN0(lqZZIy}e7CX*a;wM;)2BijCvu|uVVY4-;Mgs(ptb>dJlOh+YP z^qf{!WH|2d4=HTw2DzI~TiFXI=Xm}`3L{jpwk)Cvgblo9940xc@ly0hVA`cCuqt*3 z%Us*u3sS4!!cm!2Lt;WHhQu^%g^HpZ;E|)_aNFuSoO}lG$-+y&S(2|FV|R%5uIfB` ztpJDt=7*tYW;{Jkj~J8)rtCzj$nU8$eP`OQ=#g^L3{8$>XFngQRFjjGJUO_&pyxv> zmzQKkfBsdG@wQXLYm#gvu#=Ad)@&6h$Wh%ca|L7fs+d zNf?H!lt*ya0$*{EI7rzOWjHyrffNnF!L{S3mQ= ze2j8STC%)&hMo!lqON%7go`(q+^Qe4wjLJ!w&%ze=ZwA(}NsTc+XbpJ&gMJ#w=~M*8A|g9Igu ztNHe5DlJS$0yN$SszWjLm)F5!%n2xfw>rzARbfkX>s55Lt_+Nrs6=t{ zn4w0OX38!du6rmrDaT0t6xe!lh5PeHR*%y6tS?J^TBGqKZEfDgz7w4ORL1iO)lVI% zy&2ks6QM~Xk$3q=YW8v&w~OR4rDILuh1rC36fA_DsP(!RI5lR{G!;IcO_t2zyGv<^ zxJOe%6%4zxpBhw2f4e41T{|^1nLF9Z-;0iR zi_>{)hQ5NHA*z0&(P-RCp~v)sWBSQK?p#(zRbyjg|2PZU!hYoC_KL&rW{wdrBr-yK zovd>?nt_>7G-%QL&d;F5*m7bl<;l)Tf}6+qn0K&3;iqq&tsTRE)#i#m?EbDsuCM&f zI|#jZt_?MzAnF=g^tPo(pRvB^x$ooO_72)gMuCL)Z;zQvK~}npioSlWxBOZ$`Zp*2 zDNe*-PPuYEk(};>@7zo-{2XDJ2aPc#P9_UTWN73@i#FkQ435(ft=479qW16YkRu!?Xadtlk~0| z@T`@N=TE=h;M=?-VfpU;zIiZFg(v3F_88%Rg4m9;@Xz*hBeowJIG!5xGq{>cm(_Kj zJ$QEfS!z!!<9syje1B$)CF7_6&OT;hVeafRbZdW4@McNs(%@Ie@PjY>l#kHSYez^U z(Y?;aE!MZRZ?TJY29vNUiPyq*HKWFbAJ)lBkej@JA-o+~?H~nKd96H|H@uO!SMjX| zJze~t%-o&$`_rT*;_3UczM!Kq>idgGglU8D5{O*wkvCDtr%V1i2YxPOr-T#oBi6m@^Fu3* z#|D)k&alm5ftls^l{OE&I*Vs6=N8gzOdYJ!EO~v(IhQayM$;vdttGw6`~9~V4l)G` zgwnEl*OjjP$ONTgVPc3swNM$g#ocq}8{JTs7SQEYsC3l1K&Z)L#&O(pOabTYJC2yA zRqL5rVyMZ?8pnmbv)3R3ybqV}m+knoTTR2T^>VCxJCW^1$M^D}dwa#=TIWjb=Bm#^ z(toAVV%xtTadqY1a>!DBR^PA0rfE>U(sCji|{w0^ojquY<_6LFc z4PHmX%FKPY)r`A2c#lRt%45{Nb8iE>nj0a(5}O$BjtXsp_giT~k*B6AZ4_ zkC=~F|L`2n+PPGv+%+Em2cD%ht^|Q~gPoFb;*U&e+4)g>c2Z<=&%FVk!%|N!r3_vboqmYp zEbElMRK4tuFE0i?2vr|63Utb`@4Fr7M+GzuE5n<%mBo|q?yX2uJF8wYYKo3e1!7gd zWH;YOA#v|eR;^OBTSL<&-196XkpcyYAEOLz_pAmI^*v#d0dG8;j(TW^CUA!&x*@|Z zZi(?ccop-~x3p+SA_Mv49e^5^&wV`A4}6{NcXM?*R=F^rshNJ6Rc_jHwdUV%7!VDq&V&qzn*coxmonc)q1(ky0Bw`|^3q90zGsB*g`= z>IN?SdW4Ts)=Uk>lO6;*8*vOuA4Z0Uf2`3Ev>+V>EH!UDH(92-D;gRL0u ztPm2eKgg2q>w-`Tjwib1to7LTbhgJ9b0h_Z&8a%yq9I{^?4#L@a(xeet~-K+MHe%_ z4%pdp(#^20LCvPryzJrL09U#z#-@xMCfEDDCqBPql}?>6;yaMXEmT`SGa+0rgK% zz}50Mw$EddMTP5p)N=*cz^#0wX~4NB`b*nelA$nATSt7K_Nt`1#>IN->%iKmKcUV5 zjA$oH`n~ge!qo0tcd9m~E9Ow4K2_eTHiG--e3(HO+H8t&3XoQDl0w)U6(D0VeO*n} zV>)@NKJ{fO?(t>VgGVCMd_F_<6Or0B?z33UpFw0~>P&PjC8cc-ZH466x? zaurli69^ZP$F^~&KP_^tqpGlnu7w%tYoQ(el;0}XQ;PZ_a7SK_?{q{U&(xQ`u>Mqu zk*AawwJNuwn-2aY6p4)w-Da89ny)<}J)5~QWWC!Y*dk@$qtJIKZ#=m+f%W_CzmfQz z)g4rP_jqpLhpb=h5#qD8-(8xv4_0g6^GAnsln>`Cg@goagM<8ROqUt?e!3B+8_29+ zq&UaZ-O9IuYn_~T-mv-E&3~59P^nCp^>5*L<_uXCvvQCWw6UfkS@w&wc%gR43MS%t z9U9&;8}r`at*icp?Q3L*w2VVWop08U2uV$s?a^l|E$y~mQ0ar$&Zewc_6ve|NR$9sU;{SJ*$bb{B_k%Zo_bUf?oCSg z0u%skvDC2Ho@SrQ#i%`{qE@+2$Wwz}o>pzz@Ax|MKIOc?yX(p8sQ{sssZi~e&XaIC z1@>2noNpQIi!J!R0hn(I00umkC}n;~{fL6-?DOD3L*)?D{gH!xE?_dYOjDA2amiyV zm0nGWWJeC;PZJ`($>n|S~Tbh6RDT#gPsYpMTCeWEo{ElVsFLr_rhTgo{d+>92 zBVc(WLL}-@2oKS5M1VJ+>!lRgP{rp3C9WqqaNQf1AJjB~YScZ=2wIn4uWfX<9>8l; z!>2l%H_Y9ZvR4%|G1Re%f8$(ol17bedB@i7y3{_{D4EAeG%`P+F1Q=mgsQ6h(1MtX zTxVuEbuBNu&p_J{C$gaI{`Qq-a@p#0$rJ!i%W_Rxs}N?GSH@SuX--#S4jcbhAgWbP+?<4OwVm;?=iaV*VWG6Etx9!GJkv(lwycL1dg0`~q_+W^qs;e+Mz0te zaf2sBxHody%J(8S`8OTEJU6*RLabwSeZ02)E2+{}GC4VUw9v|C;)np$uYy7jv37KvD4m+p4Mpe3Em!2STLsxD(2@36`Vf0xYvI_NYu6X4z8tvBIfW)5{(|DjDxyl~3L z6iY$YW>7XKBePv!vSCV5I=a#@a1cnSkjhV4RVWc7TQdLhx<>nwS5hkKTWHaSQtb-` zhuTfAm=rI0R2gc#_&RU8+Pk*A%gd`_?8argHCgzP;llh&1-jU^kUdN=_4domaEDf8 zdy=znL#j6}#hFQ_qWGJnB@niH`a+7GC<(+~-aOoyZ?T(&UY}zQKimW-?=>5m00-@E zB|Ra?j5*mKGZ?%)I~*%&Zr)b<^1HF2;fK(u6BlSH*&}Es9s44j38mWTketb%=P1s3 zJH+Sg5B%8oODC4EdN7Gdu;pb5~Ymswtp1Xc24tC`^{ zruqoiBnEyFQ%{$2me|{T_n&%S&Sm%AqeugkKRTl@_*gGTf&hRV-g`h3abdfJBlIP^ zGqnu0R&mi56qJ-w2{8%qO_}cZm2CW z(T|r@!sQr9m{;W4$SEe8>%x~6zc5w=cKwpg?Ojh{Pq7??JX8ceQfTGQ`;?5j%(`3m z`PEii2-kSjl^$i$EkorCo&92#kx+FbsQnspR<~|;l62N3x;pDln~`AsM~W7)&h>^W z5{suEq6E&=r|Fyw_wG06!)&VxG|e1K-LR@*w{Q>{8|tO&|KAG`gF4yytJR2AUcOlC z!}0ADN5ct=g0Bh{Viq2Zci^4hIcuhBCP_P&X1)~bXXKMGJX^W!{(XQzICO$&BO)AYXkSoXMpA^17ixg$bSplaiT z%>Chp@<-uIVM=c*Zho(@70E1Af3Gi*jH_C5){jUyonZn~R|@fz!#?jOxC;zwZ@gSt zgk}-GuDpfkO2s^BkiP3>oxn!JjMjjW zG)Db>RDv?q!&&x$ubp2U`V#oTtfnWYv3xh zL|`^f_>>$Wh+#=4(&18sJ#ekBwS;&Xk?L;oCq438VflRUL0y-x9;MT*a4AT7Dw+ks zYrOB6L33mdI~a9X3gdJ9kkj*p$Bp5&r+ar!gejAonfCND*Vn(GkNMX-&thK591w-3 zLK*$;_TKzzmM$`L$*Gq+G66dF@=zC%JMIxNigiBuwQ^0qY<160RHR)}k=>KzM-gXT zX`FbAyX!w|+IFT3mW?@qe_getgc3D3%oL&~@uL15b({yFQ>}M!ms%f{fIH_>5pn>B-u6#tWP|u=v<$)4^8? zK%TaDTRu1^a=G+DNSV8D$=a7;Yd*V7|x^d zNpr(g_xWQjIznqx{0{!D{P10^kH}bo{nwlDVU%M$;K$P!)CI~UzkhGRbH447yC(^p z`0Zf1btUu4dP5T+YCOqS)PZ-Y%4Y4{y!37X6rR3Ury}E@rV7$SJ?10n;AC1!s672f zuC7v}fWJQY$T-!2)?Ly1vcpWK(PKA%3SW@;xvhkrN6rr~hF*)MlD`O_Uq^F1&`d#v z;3ci2D$~hWsYVUWZ16x0l;?ePA;V1a!fAk#XE9l9GWGN?%=G;0UIkvC>Zq_Z?8EiX zXq;5iDjD!1JIJ*~OtBwJghz?f^W%#=0;UD{B9FmWK_h@KS!BV)&35VNyRXWlOJCn8 z&`@@RHWLGiCe>f$Mo-PV&4x3^k0Zy$eojp^NT7=CGuAzZ9P%ptl7TTE6-L$>8Bi(Y_?QX%UFCFtq1603 zF0p^fq6%%5epA-ya++;Fo)T7aGw#)lJpMULeVgM*sp1ZyA`nDEBj09|lk^cnBt=Hn z$Y*h#d%o-a^2@brcl7I5uWGZ{Gkb04YR5nOUvEA+roT7|*b5+nv;9Mjn)~OJjiYmtnzVj}-J8+5D!jJw5oDq?M4mHauj?(X#@JyR zL5~KeWzCCPE{DFZ$Cf3Du<4!7xwm$ZXWpuMQhIwTng6t4m#c2dA_-h<76nN$xT%*t zw(vswI=Wv;*HjMlm6PtIwJzs?^FRCCGL4kspSn9x^Um#Z_m;24Q~8(hi!$+aFsIG!6BMG2*Sr+3pBwH8d>;il7P5rqkI3g)|nLRi2yaI4gdVK2SRz` znESVw-z*M%iF!YgJbDa!XmU?KO~>4~sF)wJw{LnD#Bi{ETQ$za$1~^Ns{?|b0)TV=D7p2H93Ne4;_Gcev#2wLa!h ze|Y!qZ`HHyr2->nDH0g=6U{C15j;DMn^dq=9m=dZ7a;>OAA)QEE;;aebEDyHK9N_EM%rmBRn8C1{fU&4o_z zMPvel{($!yO-`u-V>f!HR>}mI#Q90G<<-=t*$hO7*LMjgJ&Y3+?FI8Do}+qfKX9i& zlzO@nLlRF@k@mS(3E|Hi|E$dQ^(+1mDLjHIAgceMuswyos?n&-SJTzmK%t+Hx0O^r zE5zv#PW|PNcBqQq7n1$G1@ph-Y~12gFX??!tjrg@A9!T@k@Fg_EXfL#q_=M~Q}HD| zujafQ4}t9MCeNasf0xzpwaD#s^1R_tc9;HwkDNGULK%|lu5i19>^jo-n#6Lui!5{o zn9PN;et8}-@|tRZ_NTx2eCIfxpP!ekQ&+#&9vg+tX@3|EJ(e+#7MLLu-=FG%FiX=( z_xn8wLzyat09WTex3_cT$fY1ei|`5YI|^D_x&HYU26OqRZnS;3zYjhY@U0A^b7dU> z>p=%2@bR1g^x2Zpt*4gS=~rFCuw|-<607K%asTL>BM*SIG%8s{xPG*fZ8f*=^ps6> zJXkvxwgm!9M=_BTD>&_BrYoq3L;{WjYVNa62s8U%)NPekR_ZnvK4kFP9JQV)iaB=b za*FWEy0|q~tsPgf-{OdmRj*%-w`!XSSQ3wT9&T*8oofuQ+eS^$LO(9BNTiX|x+4w-2~eA(D)+n=LJ*d{fQ! zx>m6mz)nfj_0~62so&Fd#qYAt{$hvGovJ57My61`GzJvWWV0sOG|Dbzz50$~;m1G? zH-=y<%jZb;@9yJsQ!LYbK%7|Rs|Zn{YV2kEmy5DcH*Yn zP6oVBeGs#+U}u22oraC7ppc&}zg|w|__P}6IaMFKBES7f%`wUx$|9g-p3K+?4QBdc zCMywKoQc{S+a1V|vS^;pe;7A9!RfKJ9HSvob^NO~12C=4cPEm1g&;gkSiw764+JD$ zVAGQEiqvu)aCF+)%J(wdiU=CrmtJix|3#CeJD~jr?J_+&wp7FzSDD8bFZ$rraC%zw zj=gj>vy4`u*DpKDyFNj&E!>qKpt!<5wo66dn>y<|OSMbL3B8fO2e5FdJqaV-8m>uE zPz;uJ+D{)BJTzRB(92Ok`lCr=r`_)_nW&8PGHC)kCYErWc6a8{@)B{k)uRX<+Koyk zpSng|nV&R@gSObH*%%>%16?1p)5*bY;`YG`X5oQ7v@g=1rr^W>FJ4{G$lbvf3+CuM#ez1gb?wX*ZDYgb zqc~p)Q_@dw$mv2%qvieEq(c<4JZ;vxP;CNNl63gIv6c=dwZrX$MD7U%!2aicvNQXA zQ+7(zN6Jz*?N{3{q*Z@^iqynY8R-uzcg*=n75vVFgz1r*+v^N~5}zW&QlLR%{p@~b zKp;zx^TkPGmGfmyd#5HJ(VdRM%Q@A!NPSzfzGlJk+UVvF-rpVFRkZN$de&e2IGt*Ju7yw{Yi&)3>N|F zvD}(#FFEVV9wkw+rAk?>V9IcOQjX^Q&Xw5Te709q#AoS0SK9RlOGG0NftA4ley=&E zJWg-js5~QDo3pzg=%c_(fF)ex)%Z2X%|ynKmM3Xw=T6$Q7J0F{|MJ-kbXuz?tkQ!J zyAilhRb6rTY>adw{^Fq@&DwQ7UZGKq|#*OGQw#6qg(hl)V z@vNp+R)KB(#7Zdel2VJK5a-zskp;m$W}cSi_fgUMJ&5kY@4FVK^6u^+6!{;93hW$C zSTE-npycN2=7LA$NSW2MGd61>A>d+D=qe>PZt+El`sI%FnuTuvNPdxsq`>Pws(^%t zdwlIu^pj#KsgIno>))U4#3qR`1Qr?e+{TE~9!=N``IGh14}1;&^_TE;Hdq8vs62<2 z$y2>%*`*;)B0|0*B{~*xUyR*iTJYdj2EKexuYPyDZ^~uEw66;r^8J?k;#wbD!9Jz9 zK;3y$s`Z@EOK!5!je8Sg1(7qkHYn{l$OuVyl_P!r=HJue$;)$+lho9pKHTSu%N^0)U zqN)tdJ-K^v$I|e-KsSOpx@hTlb${0e4wxE8d;7 zX0M1FmS#iY%f|nF{G!%pbg@Oh=lrGRKh!((3*Lm*f&-BqK>!h@Zj>C^1?3Tw)!F(s zV7fuxKcqWK?q$}a7phs4Y7@7w%+*{8mS!%MdpS|ItE=`z+m+jZPb(gz)L90!l6(t7zu z4DQB%EzeOt#@=mA<|WLYHtLAb7_dB0$!nK5JbW zT^_)k`6C3ixZO_v#c3%xjDi?&tutEcN{t|B2X-|Ft-vL*79re zO|4!eovY4|Lo1GR?{43TtS&K*^~{-}1qO+Gr&ioCe) zsLSP4S1l&Nc1W}@P^TpU6w;*p2cBBgefpP-Sn19XoAA_`_%5alj7|;_7Gp3e`NYYf zsg=0w7WXKS);}rhVKyyi;>Wm1;uXbKC#LU>GVklx20P^C*rs&2i`4o!k{mtX!o(&| zJ^=Bp#C*4eMWESxv+WxKy`=BEkire7iRP!|B_{Xp&R0+82I`a=gcbGqmEJLvAMWEq zZw%_!daXpYo(Pk2sXpGYa&ho3=bh+qkr?ac6Coa@q`6A&sjf);m~0e0=Ox4bPBG}p zcF)wXcqMdp6Hcg>mbxaYcW%bw{*I1Ya@3EE)}c?(lLP60 zdgJ%ge@pSEc4UhayA#s)GakkM(swn%R-^Zj8!s+i8$*d&pBk6|pJn@Ll58Y46<&L} zk<$3|cy|nr4bU!h@e^?G%P?J4SPtonmn<83i<8nHB)^C+RlK#s!z4SB7_b?!wmKLm z)sBJtFPtF!FK#t9>=3mh%-69Xts%bjP0aqP=U~fqEbLp3TK14`6o z=tAM!ceI(gh@!%Z@ZO!T>0bXYZL9ANitL;u&u5tUa|b5>Mc^DyVjG^^zPIA7sXN2r z5HqX#rW4*Y4Z2V`YaMWDp|jnXF{9(GS-sjkU-R94_#}DOry>w)&jm29bO5y!r}Pd<6FusP3JaPP%5msN~aarR3x zW`au#STWCQfhHaCZom56dXv$Q3zrbUQp+8O?a|K;=>rY0DyMhnU$84pBjq`;7X#{P z8pAPnEo#(BvK86gbrA1o#*AXlNl)aQ4Peu$F5aMZrpa!u&!x~n%%R!_=(PftBj(lMX@ z#NdJkoWk9o;Qe>(KDP-wkLQ4 zR)-b1>3+kZ3uVt`C#7G(_~ij(<%`i0v&PZ7#%`tG+*KD99PT`&!TOyXqKrfXU$aq$ zaf=Y2tNe+vsu!BN5s7@-{teb|F6=a%dw&LXqI{8al2>_WAl&ukg~M!JqXPXx-7t-5 zeimgI1x+9rM6h&giiuyfZ(Y?_Yb4+Qg0*@D3K;(Vos-Bapk(Crp{(qAPtzF7Hk^)k z&MwR9d)L3uqVCjDSc%QnAi+?4T{OOs5^Q`ph|E3Iw#9LA8W;si8EoX8k0%VAN@;uEesnKT<^pCwy9@A zSx0tCZ$3ymua-$(W>CJdE^8nJv;LVF>U$eJ_>}0+c8~(_PE)I(=XaZ9G5^E7^UKv| zOHXqnH5ISIWwOoe5=Vn>k&k{-Pn2h36amJ1)Y{ zzm!|Au!ws{JRp?*Hrs?%fTW6R&;6Z>a$@DQn^KfSpi;>cqXOdCV~0F}J~~leUU6u) zm(#ybgjdwldIz|@d58e)0dJI=Yb}KvUmiAxGujFe>mOfAc8m8hi4zL%WDpHr-W4cx z(&2RH8El)?ReZdt+~$_^jkWTiCzOeIeC<2miJ&ERus>r#@9KPVH9zA#=dCGv38sb% zmn2mp(TR|A-k*v^=`=o-r!EdanI;V9{TKxFHr3%$ytY2=Zr}KxnjQQ5ZK*%}f4seC zSQOnBC9Hr5lA0{ELX!~zNj6Dx&N(STuu0Nn0Y%9{vSfuuGDsGh3<^k&BAF&Dppqm= zkUXc=JNLc!nR(`$`SJbIhptoIRdwp@wbxpEACDfhjGKYsAz{*7JM z@SGs(RrJC7`+ldnllrNKJp~{GI4w;TUuH}z)O;|M-;q)7>bO z9Kd_lYT{czdo3T0e{lMd;-P7>cA@TDEi%hGVLcJ;#mx5*FFq0@{bGx+L`Tb%hO2d> z+Jv&b#&LS|mekp#cr?|?_S1~xv{mD&IPtwvCGL(o+n-+tOs|$4Gy6I}= z$41T8>>_V!yraj@EWY|Yk}{s}$#`kd^PEL+oS@? zdiDu}Dcs-Rqq_xGuZ@k{mBZuXHgGEH11&GF+Nmo0gu_yxyhv=fQx*a1m;1Wi$eXTW zvggkSoKH_ry`g8Mr!RkAYnH9WyqT6i7OAMJt{ywom{DwA3lL+Kr`Z>lyOm`bNTVu$GvDHdX z)Nm*TvkH5PaYH{$*9L8pte)-k-3o(Ej?(WoAH)-NA9{ z|4Q>32072t|9+>3Qns&cG5J#}rE{A&-BZm~NB5t^lqXIT-)eSucSVvT&ju=V_{y~U z=V343wxV5muIpj;2qdcA@>WNZ;~m*Pe>)4B`eHw^P5h?dcu6Xo^t-#?Ckw-}Y(iL> z5+oOQ?Z?WSjE}vsoE+eBLNsGqjO-n;RJGm2y@n3sc zXr-ykI5st$#N~afb0MYl@u=PQeF^4I-2|+tXnb^lLej-$`+Jt`mI$8Bl?y^H3KnY5YLujiSH4+k=g44eE z&_3L)#b4?$x7luPzU`f2k^mm%cgU9cYSj6_I`aK!_?t)5^c4s3f`m9-^YFw^^|F@_ z4^~-m+Ov1hQ}3l+9Z=j*>|?nyffm$TJL)6-?$esgJ(L`OMpBV6e)&>`+Gn*0iHEI%`CC=M!62!DNLi<97{pr}ZvSE5LB} zqWw`iYq^pf_bm~sbo`-adjqj4$hIP-&MSaq)sK|s-4hKU;TKeZk1H)lSATXL<$+z@8JBO`Nt)OlxMdd=F(v~PYb1XX^tBiE9_iMWl zEq>Lm13oJ}@w2j}ypO-;=Og?|)W_o(8Y{#a%k`ZiMwvh9yM$cTVXCbKMG&@6}vlj|VKNkXnnaWP&a{`BNFOw^u@HW zqSpJV+1PM&SYo}=CiQN4A2MreCr*e0Hu@yf>*>pnj*ktLl%Bgk`^m}Mc=jn)vbDb2 z;JS#5baid*i|p#D;!$R~JP-b_z)etGX*+1h%GZ<6{K`0tKnM?~kR8hTVY%<+)85j; zLY09Gsd*L#h>nHDd;tNAI$EVowJ?$IVbYk^S>6;2W;6o6Mj3D<0~m~K|YcD`VI8~XgY#_O^&-nV9>bXTu-uPI5k(4*IK(&o_>Jms*pLSsLH zkFb;>zk2WTGQ+p#X8xBDRBp+x7lfhrD*7Xm61m{*Aa^T4n0xX*&<=ZLc4uFOZo?1s%D=V;<-(Td!d+Va12+yVVbqRwz$Ro1U3S zfb#8h=tpM*yrkN2A~)JKvSvb=BkQtUhR&@Z#*-FlT;06~Ozj*3iWO7`$!j1W|@4*CZAHMZT1uXHP)}lm;3CbjPkLJX%jG-I&BPYD>LWk{#3TWwnOEfLw%EDuf5^tJ%=;bc;u>@iHneYCJmmh z8XR_|&c7Y3g+Du?&Qu6l4DZdvF0A8WQMM=%DPNCoLa7t0QFx{r7u~Jfzgn@V=*xW) z+3w6w-Iq@kkz|m{Dvd5&9!HolYg-WHCL=FYT9~K^__sTpR8SvsYaNWmN7qrX!C6&j zW8O*+F-Uli;w^NTeSLuecYQYC>~Y+z1sj5r_08zduGi~>%GnUnhz_Lh$s!{P5t-?R z-leN7yv5cs1PKsuo%fTe0<~5-2;zK8Vj^C1Uw=Wk%w%dxhKnHGPUjTuG4AxD>4Y zdM8965A5NRs!VOrm(2o5!)31mm+o znQaNXjwN*@#N7P4h~h z-YmK2R*WLzQ8WP^k+dkeHNz5P^1Di`s85*)IeC~T9t9g}ZQ?DImVQn>L*l9DdXVk& zA(+dTc0%`ZdQwnPohd4MswavGBF?ln)(L5YI`P;uWb_Slm~5@&6^RFBbTH*Tud z+2VV20#f#QBFNSzAM{B%!BT@RlNJ5^Hu!v1l2Szl{&|q@ z9xIM3Q|uM=hx;-j4&x>KZybJ1RY&rt)fq|?oWc}?>f4@(ziQ-L6smd5ih{65YCr;l zA<86}?qB3>CXJq@qK_Qr-aUEvwLfLG>G;QDWCkHmY^Kxpe~7?TA5$>(?zZ3g>HR?9 zq2j@bki{#Khj}1a3`ysP{FxhFJ_j~Rs?_U&PNeIeYc3y{Fu>kW0vj%Gw$C25@`uplHR?wH zwf&9h`T+yxO3?;QdrTCm@VM%Q05`cPMFbt$b4>Qe+NbHVqB>L7`d?I0VW{+M3WDBz zlX$vcFx%(_y)-^Dl7mf~KoVk;p6yD76VuQ#hQZi(t8(XVgHo~2pDp#?6c_ikK|JvB z@u}d~eZGWqjDphnsGYXRvYO%AX_@o#@1WD{C%58#2sX8Ka3*I-rU#u_@#;b3q*l9}Uc zL!?9h55|q;qXBa=&*tnp8G4w+ieBQg3fBb@0!UW34dtby9ltVtcwA+owO$0WcP~hE zz8v&?yBT(%wF4f0dRcTw9(at@EuYoa?w;IVNwH1uaksn_ zZG^a~K$^EyV_oS=5P>0JqG5rK9&`X?}_<0~9&?1)ez&bC6FcKe z5qeBK0h$nKnC9!Ew;Quh_HxatHjKQNyE|)CPvlM3g_|jq&hxmT&DB(h_jqw2+)87> zHw1xuDA&g>&J~DAcwPVSu}Yr_Jf3)gEHu(B=~bAq7zlF4ycAH1NNHV{tWgKq8viqHun_14309Cz7a`O1n}xb|ZiL+J{Ukg7|UZbs1gD0@N^Go|`?x*Os> zrI~AFU#e7F^CoRmC;sw>2cn18iN*a+@KvbJT$Q7`JBc<_lNoxCN8*B(V#J_41dKL0 z)^ZZDNHo7zB3c%G<5FVooe7lRyzgNC@PWyyrOZ%Ba;XU`2I0^|e|Jii8)bC?v(lSq z&ehM=lKnW&HB^7y?Ut5}M;ddj6hHIYqCMWB3Ot&x&qJW|Zg;*WY=JLK+6II*gy$-~ zmP@?G;((}8?ALr3EV16dK0l9UZi7P)=KY!j z0~eD>@tkJrvdT>Bt>CZQaX1~Xm+yC}n%G^kKaID{~4(C7?1m~kfyXAigp@q7tOdm(wG3}E`L_57Pt zGO%4u3=CwzS+H0U3{3GSh;eU@5s>C$xR0LVMQub!;r0u@y-n zbH)`^)RS@`;=h&_79P#m7B!E(ztJwN(ZEyxCkzkGgD^}_b|}WU{iP#vz_kxcX4 zM-*0wq)GYRm4N2B1p|uj-<=(;eKjj=drtOzoV9-($10)U!GuJTFux zghdOcc0@GX+An#{t~lQRj}rnjQ!0zu_N})^vKMaoevwx{fsO<37?BF_<$Y0t>7cWA z!u(nn*ZJ2ENgCgaRNS|a&m01iMd;{Cn9GS}g52bWg2*Gd240vy*~NI0m+}aLM3&3Q zvWTjKHyOSS%yhlgMmj9q01inrgDFK3@Sb0cN0AhkQhKkp=%u;0%KPIVjjL)FN4g6B zu}H4Jv>Hgv-0rT*aY~62^sLXlzZV@9RdAW{nf7bYCFjeBcqCV7#HDcQRcr$1q`ImE zW4eq&I5nSoy2FQFd(GL$uosLLUF2d#9hpF{sVp){qwU{`zHn?eHpPnSU8FOvUDuyn zUcj!8t1q9_A~9Cn%-A+h2yh7+?e60srUAa$#@k*>gd-s)EK3^+O?-hD;LgnGghv4O^ zre|Q-KMJA_UJ&(ESM=KtX?ptf>44F&d2HGZW4&|&8A2TR%AMU$a_mleD;=*i@{}vC zkt;I-c);wA6GBQsE^q;G+FHWje&CUZ2ra%fib8HleDsk~$6X49Z!f*rKc(%(%m*bR z;{%aFK|x=QzY3Y<>1N1?4N9Hg(aRVSvP=vCjibO%#;JYtM#;m&WBCJ*E6qLEBc3GP zJ+M@-GVOWYzuy9xPSBFH59?}@782Rh4a3W&qFNJiS&){QC794q(0ExX|4Y|afe#HK zFCYR4cw7jFSZB&t_J#sHZ#9V-dctlPG4K7s8NdvsE5m7 z;ypaJeFBg^HbL*~(Ck*2(V=*L!xjU|Tw{=*$UK!unUk7-MK<#pzBq5q#qj40Y57&% z{sYE6cag6_?=9%X_~?+(>)&4@{M`^itIMkxf3+EVK~v?k+FX^2`wgmiAZJR)-0HPu0d*9OS^g>a}_KEY~XERt&)}& zP|n%ECq@`fsZ#srwGgm_tAc3{10anzsvAOpy4LD+3ms5t2`=Ocn7S@H@}bg`OAybe zaU^LZlotVv`%E837V!J4zsLnjkb^&A5#4|05Et*lO>wvXW)RPm%Q?X5>@Y4Q|9vv- zNXft1$KQm62cX6O`XShn?a%-GjBTb5=^#hXv~If%p~Lxe5pWl{I!I-(BY*w91WDFO zN^sp($U1yxIT+H7pz(b-{>M;|H2ogQ_-ACtbG)a3r1VcFnUF>h1pmHcm=+A*-;rPb zj{FsSUkki>Sg0CC{O8h{p(223k%x&VN1x-Vanc{}Fy^F<&P;WQ^|=mQCcUP;v9S>z zrXNY1G~V&5)_K0{W4gpWx2WI0mrX!sJ&{0&6Q{5WnYo*M%i_n4%KUux+kR)yHoqf8 zmjy_IA0%kdk89@z>lBwHU9-LZJGU#nS}qBGSQ1ODD%#$`L9fzgKoNS;yW5G!J7Vchlzjx{wmzVyd8RjC~`(pwBv;-@kwVbu&GkVe{z?aX!9< z9{+}}7mS0&9H%jiQs!Am;1WdhK<|~o+^%V4Bz0e*16)a|=jNPa3HL6xW|1W)w%Y%O z$WeHav1p-@=HbiZ_dC|BiNyby`qEs0Inn>ioa}UWFrXa|!**yoVrrx*w9!@}-QP0N zvQ#_JFd|xI)&lbz!t_g6_Jm2E^GDyyTTsz;q8)?jH=J|2dt1|FzFW8>)ZO|7&wk9* z+i(ZIfDlVgBLqQnne1hG-~K+pnM5=Y|9*j>r;cr@;1K~9akU%27pk??*4Fmq!S=li zJ3=@rTwR>Q#GkuC`Zvlzo0;x$eyze5W$Laa@shZ@0sLC9Gc??&Y${34}YL~LQ& zpA4@iNBb!vm||)Mb@vcaEfiSH`hGHzM=ACaZEjT{cXk~&iu$HuLG#&Kv!Sr_oc@E( zC>@LAsR^7e-Ju9Aovit@jd??GcW2pPXN z9yu>gu6Ll&sB%U4JJ%i9k=b{ZMRKq6LKf(S1wZC}4&2FYD)sCQcx5RAUgynfB+~0D zB35L`jtA;DuidZof?=TX;;EKI&PwI0lC#G#r%wjcCDN~TSiz;vwqF=jxS^F7!!Rd} zeg~VQ*fs*y)_pX4DTB`xeYQSa{<^8`PeP+h9C$I(R3_4~$|t$l z_9p4HZZP*RZWE8}DCw9pWR=zOy66oL+>0;2W%$k2z29P#oIW?)5q28&n)fj}o!dN3 zKY;9zoo|t?4kk+$03D_SV0U4| zln|4dEk(%sU0lUGu6{ouwwXj{6n%E`^A`HhtWsC&sNzAIWgiz=P7V{Q+*;--vsUU% zn!%1dXNR5&{OkZJOJ^Shw|aBuIiZXwE&WmJ7jMv7ZImP9nblcPV8uDler;mHyoJx6 z0ZW79Ht)Tfl1vT1<6ovLUc*Lt%R#--`O#RahR*VRkH_&wZE@p|hCX94ES=K44Q-AS z_K>N#oI5v72rH*q%DZ=5Xx5ZqoHd_%@kYxO4k3Q^MVa-@Q-|KS#(9iSv@3g*8NRe8 zS5Jy*;Rtts;aK3?Y~!v@Qbk|%PvWfPzW6%0#MkSAu>e)y)!kY~n?U~zWGlT|{6>op zKJC`|k;Qpm7(iNHug=HI%p6Lf5Y0~p;nplPM5{!Y$@xQ-7T!w0JHONSi~Hcbf*%)? z(H=-~|5LigL!Ita_8DdPMfKpvEDpcPZxk~Or&li~;L5Y@+OSA^q9df2Z;_ZmikFngm( zEMJA^K*rPCOCtPJnqz{Lw%=X9kt5_IgCqYX%6a*cIUr93^PG(I8Z^X=1JzWEGCWJBCp1iY9eY?L4UaAnEwBD# znHS(VY^Sqg7NwX1Y*qp1`zNnAF!%oHK+O%%1~I~CVV& zgt$D+7qEp;IV05%x>-7TW}dV13L!{8YW$oa7Z;=|_>lGa zW{dYHJAIdwz+V_t`RVb>RaidSn*esxu86i&4oyg{30K6kQh>RlKiIjJPe>fhi-K@DC(DPB=*1I35WT=B?c0VMgSa;KQ2?@>jThXzT~&0oQK5N>BB$h z!LJ&xVW!S;REi+uL1jd=0OiHYAUQLVwE?gbVN=A(NzN(lk7!ef!wOv~ zDO^`n!M~=s7zLni&DQ5E{ef-&J{k+`V$HxYnAhT}n!n*D0w3f`Hu({O6u$xK|9?Zd z#{silaEXYffKJc`q38sDd)NgppRe@FqBwp(h5d4T!CPGf)H4)Muf@}i+j}QJ2aC-b zJqx8y+N9L$cd1^0Y(NH!OJ$45TjEBvBoA|$Xk2qnN2E>@a0pAy-;0f*^+W)wb|cVN zfCI5w2NY}M!)LC_$;o|n_SL$UZAv!r*1`z8a45m|Nr-P=F1u~|%L^DHet%rY@TK{d zYMg@sLN1vdM}@fsFMxH(c3wUc4sKxe=dLNBS)jwok)8^8vWnOqz?n#1tWG&(*_RwH zJqjLOw&JUe=b<3$_?ypzJG~xT*){~m(eb#ID1lC<;6a^S(+<1>k`H|Xsz~wxD)6Do z=UnaF``AZGCl6}`Cqj#Rk#LCi`7wv&OyE|qd{=Q>J!g3BogSR zC~eX=SzvkyjXtt{TF#0>4l?FBBf*epiSrbCi5_;n5e3U|X$EXI6V?xS=$?Lk9c^Z9 zkQ{|Z{)3{91!(~~68!c?ccq^2--*M&kN%y`{QY5xX}?kbKgeusM)7y(OZ=X}*Wzyg z*p24I;-pbPjSRyK?F1IiTE-?O-T;9@2R#eKisDV5&8Jvz9JaMDnMSBCN!~E}L&{I3 zm}@#+G1v8Ic}=zd6kp9n0X9dMApn@*3J59LUxkP{&)unZ`7C=I&`vwl4%{;6Lh=^; zw@vmV?0a7#Z+UY*1E}ZWm-cXvpesH3y@fOXL0q9k-SZpH&yK07uDyK(bNSr~x0l-E zXxILyw9C$tO11kp&FV)h!^)RfG4fZOXN7rtjqw{?7Zt}wM)ZWis;)1RRBkg~Vv-XY z8v5JbwvMU1S<+*r!h#i61iiqdv-y#Yc2kC{3_kKa_WSnQYjp92loS4icyc0J1cKE? z`-=_Qj~E@y#p8vo;|D;E{p~Hkd0Cp+O)uYXUiv`AH8Ss=3SQw*Lm{X-f zJ$ftA3@xS7F_5#$GgRi9v4f@BkK`A!sU>6e7lfCBaNQCXFZ`1oBWY@Rc(sYsh|{j! zfz|Wp{5VVBU-2Z^%u`-?R9dvMcDzd)xgg>3}^t+NdSM_ymvWQDbst|%JN7D#?dSUubTgu>F3oaRH zK0y9@YsQ>MXG2ols`WF?sf&5EF_H^34d6iwGW2jJOjc;|Q#}d6?zDZ6>;KXP@85QtYc?;b?37s`RBF_!3hH3YlS=6BVV!9@2 zBDx_|ZwsqTwb#2dR%mE2(jWa!y8Qa5Q{T*}z3w2Gw0N6*2t?jMhP7n*-}HurCLtLH7*6S4P0rjF6b$lqd9- zXW_pXPb)kXIJI0ypnN75-ibJKFkLaZj56FngOJE5$UinnL!frvw^0=J{iFD%kP@Z- z!?8?@c#YfNE4j&)X#o}ZXh6wKC8(p?vJKyTz2&;sOq|z+xm=ZmBHt7*P(gG7ruSF&|#vC&b~6+g`WZP+XdLg=$H0<_nYK_=-AMZ&g~_`c3%`>G4>0~M!OG@OmZ|mRyE|jL79aw6la_l_l}Ob+j=fehfuvf)k37a3 zYOj+H`r`q?l1ma0$^pFqb1Jb)4uTMUw7sY^`SM(t^L(t>=$i?>uKA3eWr9YO^=tzFnsBlvTXOVl zek`o%=YegP=LsRsu*Gm+6e3}lxZIC9mI04G9XTvWy~jb=lYjO#k%bv22jEA+yGzDz zxxt)3Ln^>F1XlrqAQ2qfCQC$6N__>kuUu+?X71YNlP)o1MnAB+5n%oOKz_N{V}k`V zE(Tyu!2mF+qhSWF3QD>v;F~~@f-zt7xy5l3kZ3_8(Dq6mD;PcuhSvGSYv5{0KG(#1 zh8@HDe>dj4;v#(*5jeN0awQ{ktSslD8ZRu*`Z7C zfa^1bU|G)S&f5@CaBsSqY%a?Kr5@4y+d6i!25TLIes%4>&?-44DkyjrX6NVEy!!n+ zU)Mb%#p)=M4afN=v)2+&Yq*TToByGsRX($fyqg#!Z4VUG8NJ8Z+Ky~j1jM{|3xHx! z<~_Wcge=0(R=y-PWHij%lTVRo97-UBkCVcL`m26d1C%2uh^=QnJ2^~GVi^h9^xB*& z1)*%f=xU}ZoF8L2lqTvv3Y7Mr+|b6kg@w$A_Vzc7$z>n^$^xaWdwY8~8j;%2;kUat zJUZ_F?Bm{0u>ewo5oq-^b+PX*Vi@sr~b@a2#1*xV~Qf=iVbQd1& zs^^|44~xd0qCLK1kv$2KlN5IXv+o9+S@BwJIM`@f>rZ~9HS zS0II$_)a2;Z<%^-O_$kTUf!^;K7P5528}U?6>sz@CVFen`}Qf8h(L&WtH^58A!$_? zpA>=WbYN0#=+O2Bp($&Q5Cj_1Y6+11he_WdK!hSsHZkqETFdY8I$XHkzCMc|W4}z0 z@>~UOSDZS!wwt3$7%sgCi={(FXdMMadA49wc&C9Kd?67Mn(Q%W6b?dJv^4W$vu8; zef}iI{7+Zd`fTGw+C}&8pRy(|e3ED%Yxw`Rx zRQ*j%VqL6IM_b}JL)Y#+)_yJJC#ipsqO6K0^!qr;OdduQpdU+uPtT}-)DT&zu%Y`uTo}l{^7e~F^Dy+V2lrS}%EOrObmiazzU?l6mMmuQ{RK8inY#lv zNKyBg_c_?^7!0OS)~$cI7JaOQ8?oLB_K5)&8|*$Y2Bgs)IkNZrU-3U|4gOuY^%dKVL&19B6FU4M4ThII zj5a_&UgI64ytV`;-N$}YP(d_Ynh6_ZBL1B$|39VHiOeVeSz-Y@UCwUVq3YT|4qkn|qUTK>}P zv7ulkj>B&sttZ86=(_XJ@LC1EQ)K~q_LUxDoIERdjckX1 zuY}`eiRzqmM;ZO>?+@7kaL4#O;10tmt9NhWm#}hLd>jr4F;q={*wa~qJdyXGSG6>I zC?gvqG;o^-)jM{%C*Q5aSill5B2FWZPL<*TCvBxn+GQv18ypcMhIQHkK;1HE^x9;o zhLRMH!&eTED?itC{ooK$sld;A2Y~)Vpmjar6>cwvv|ZHf@RhU3Llh<>1IrZ|J*T|# z9@M%jPn!zb=+)~2M(xi%DePx#hH`!f?q#`N-yKrjTp3=@3fPh`VXK;A(zzK5+@2?I zyFPaBNL8y@oPlt6^(00NM$V*?m{cNu;`E|=)S2_1dh$?gboauGjb9Z956rjxotT-YfJu>^=GXU+Z}l>EM{rOPts6$h$%10xC+K zs^ifYLYb$rG;L%{A-u8W-tl7MMmf>h!%1EWA83+nXvpt#y$q6o0z zlbixb8COtkdXrPcd_{VpGa9=3cOm^6u<&;oeFzAHxGT!&a4K`SCbs(IHUifHR4&~9 zI1_m4s%``su=cf&PUOFbCfUo}=#X}xYM-KRU|9@_iL@tcKev1`v zy!751QieMPA2XyF2|t6_lgbH~!=sRfQ6qxnvankeQE1>0!Pa?YME+$ip41S8@;s6U zo-Sa$Xz#)2Pkm|OE4s%4=@e$eY!copEU{>o+`UT5gIa$}5|imlu8ie?+LG5ELtpZz z9+?CK8DRd_I_ou?lTZY3cm=cA-@*E&0nL<3f=hNClyG-X9EQ;Xp(MuAyuwPsj)m=? zff?AfyWizhATj7afq7wX!a{gc^}6IUk*b*gP-zlCt25`O!+h7gZO|TVLH6GN+W#GO zUEI(NluLZJz#6~IPeAm}Zf*WAeTM7BFOEAO8QRk4fnA-F2=s8IzYemMoVWf2U5vji z5y5h{BOqn@g*B?E0B?z?Tg%rX<)?l}D{pSbXPG{wzTGMdK4%RCbBcTtnltjhCOYs4 z{t_Lz*diUSH+ho5I|)It{Xb?K2bk>0B!6X!d93Pp1hHZCiK&S|Km}$kmD$A67t*q% z@SYjp(|N}so?7+ zozZZ5brWo3zDIyAFSKL;E7y(U%l=-To<&Vfk|XZ_Gt9B0u{`@3I zmQqJ@3%s(cjNq=3xSRH(Z|p-aWJvlnh&Er|dIHev2Ep`Sx9Q41>g~k3~>e zxFJu{5%KVov%_!2sy>>J_}0_Fm#2a!IC^2;u19gk>FmggoRAK*PU9*{?qdMGHb3eU zN63PVn~w`zRZieQA=M>J39t?Wr`MRmxPd6T@S?XN*vk<10A#`sUzKxq<74cwehM=P zX-|G=M@Nx-=11@aQEsvq(|j;xZ{3=c+;UV_!8Woln69x>?(Xl$==LItWiL_9@60U!$Gqg`-owb1qpJ>Ncs|4;AX|HYUKvv#JquHf9^ME3h~*u{|twL7R| z!v>tRo~xW9aEeobKH5zFYhpZ#OTCt$f4lp-xnmDXW*{nal%xC36c)#6JTUMZfHIkYIrMmX=rY+@zdbI1gCORDIy03&c3}l^7VPUXQ z%tN4Cq_gRTTLZ@s)q~~!6hWoyQob`)lfa+P%_4;n5*987bvoK4#Ke!2bqbH{)TUxR zkB^Raxbv#G_Nsc5=BDG~$pel6U$lt2*Bn@qjFe`U0F9=Ki;1QMmf{8~2PGZ6<>;oP zZ4#oJC41%mCl!#ok&GMQI9Z;RD}F1S4HU!SctF9)`YQU{OPfo=(21eb>w$Fr#z3i4g>>U{PRK1Qk_ZI7nEp zhFMd}!@&$so~pFfVz_8EjO@clxC4FV0e{{KGbYy_&PhbBOoMe3N|oT(6=s(srAgnu z+lf%RQiY5+`?U_th5?Vk6aTeU#%80}OTylqOtrsU^GoOy0ZNeI3cJJ2?nz8_&fgSopeoL-|FuCsF)BXJRO~E=taFWg|=2JDE0&@ zV^&R9Czu>zK+}yTvps(b-&U7fe(zthepT~s0xn;6c@KVjQhU8x(jciC*f&(07|i1f z0`|HK6w?(QRK@I^PkA-eB#@Ce*Y}5msjZoC_F}+0IA<&JVLJrt(|ZXMT2Cbqx&{zL zC~D^y{Ed7Jrr>&+zGCrpoZ&VnB)vv6fM zZKk~SX!7X99KU`)k7?ell=-;5E+Ob3;jn+_>A>LKeR&DrorknIWDIxs^zzmDf6=Wm zW7=io_aFf~o8LI7$VfOnif zlJ*w7*szq#%oT3TjRV9*L7)xu1DjLKIHZT2KCvW>ATdTd58)0na1?5Jv`{ba=12TU zh5Hz^HN3QSs@fsvwmdoC_FOwBQVJFQ=8>$`4kMx<@MjSA`Z< z0b91B1IV;Vz)sb8s!Ex<=ztnPqJcgU>-<2@*kDGgIek12%lj;~MPz!cq>rT<+&*KJL5UhrTBzAhYFquY^n66O0BT z`CfzGtHhPh4&5j;?ekyEL&ClSoQ5R=blwcg>&9z5Lsck(Ua&aTFeoQcD+UzU%OoxW z%;!$>ri1~Kb-iS$#k2c8?sSLgo1rh%D)4w)0u=otZ%1`P^G44B_IGV!`r~TZo@*y^ zy3aq3r@r}7f)17>ORM~TFE=K>ujjc%f#kC?27=L0BpzwWXXM;F3|LXRxQ)wOfYHBC zJ^fC~WvR6xixMi4f+xrOj_%97)b3{|Pq1%grSp8#cL$-eUWEjb9XuwT^p7b)a4g+O zdQAlK8u<`0Y?%#|zlSL{eFoxa@>`c}h4^B@vh)gcDS7prj1dl9p;3|Y-nA11EfPj1 zCTyh$Tnvjio%7WP@ezWctP)t1;A-5zih%Q_w*KkC4+tjgGOJsCI>el_Ui$9NShLft znES6S%cn%mjAD=MzNc%F@TM5=x$9<>iEpUQCw(w7f66On8(;Ny)9ay8uLDNv=sU~E z{JfO9()QdVpR&u#y+6F+TSAsEz{%;BUQ?)N-yL#64`2LQiN z`VNMxlI;Z?n@s2-i&L=<;d}^(`m6hZ^SpoABvFBrU4@Z1m(^N`ydDu`f9LetlFV!@ ziCg`FP>Up;cE;OxkmSwuSZU=(SvZ)wRgJ2*Wu)OMex&FX*5{G)*!rLd>YK!fSr8%& zsE?FUUqTwrUyQF9>j;6xYA6$-punPykGbDKJV%VL@l2tu4^!uw!v;+w#4YU3o8WTE zCkV<;4{YMa1v8Y45*NQtKlP(JT|h5vv#Fwvi=-{atFU__r`x&58box0R8?Du;sPQ_(h zy>C6x`MTjEF^qtb=@)*7Q+H>GY5wAHH~zshR#pTbV03O}Nn0?XG&hd$eHk&ox^HTb z@-(Us*)vnWmzUJcG1$_zv5Q9}T;!I#%Y$Pb7ftLxdEp|NQnw<0pY|pTJEo-(hdkd* zQdeaMr(JGk2@_nk_7gdCw5OkC?i~N^S$Z@(G*R^NR_5!}(Z)-c$h^hInovxopi)u$ zi(s{MCO4XVQ{m3#m&m2HyEO`Z3xyNz+Ly<@`S{p1UR=~S8B|6ox=#cJ>pz=L@DcFO z6)R23BzbR`JAp)8XDc*&FLS|T$*gj_hm=fh90|+0I?&101cH>-CULgYFO!OGP98@>U=08Py-N zm2IxPGR@YF7tWD@9?rfLgWY)6B!B(WbJ@I8$RN8G>dPV9=eXq13)ZIS?I$l(Jl3w- z#z$PNX~|!p}TN_GFL@jM!&U!HDplhikIK#)=QN@pwAMzKp&uKu6cJ$Q}~%L zH@asye~^-pa6J5?nEimFflxqQVOZ@1X?>fQ+Rzs&1ygdCa{9ajj>(>_m^7(S+9I;NPec<`Sc+2&K(xtS!3| zud${+WgddPuO0SVm0y#eR8;~+Dh!V78mWk*qoaP%9H}2$%mpZ9@#MAr00=tn)`JPPSQ!mJ z%)?tBqIXL!>#5MjCTrzG2RmP8!w8Fw)1%)G2;FhHb%P|pc>q5hj~5d?PUyf?>@bi|`n(r^)sb)a|l@z0Yxtj2r(m z2xy*wS)Z;QEHSM|YhRS>>70#bm(6;|&Ais!vw5_3hey9ztLG{rM*?TPRdp&lG{nhA z<_(}>E-iP)MP&={t;L?oj!NFyl@KNgc!9(td#$1Wc7s);vZw6*oyMUZU=VHQs&YS& zUan?>J{AOyOSJp^@xi9B`?rr97k^D>eF5#EN;bBZiKeNSmxD>K9HnW9Lw;Q6k@gk4 zad#-y{Yt4_@VSK_4`_iP_!o$Py7al7DqA%j@=3IHq$sdY*884%GX+zs!nGrUl=Vkw z3{%!*va_66$bzq+YLcn$Ud|v=%A`RAjm!b`#xfK?~Y^Oqo5b2KG_A)cF4b zj}^8;hNX868onUQjqw1}W;3YzTGB`Ct{f{0RLwIpYm4Ckt1=K91_1T7p-=zTlga-^ z6}H+BbOVsri~1)L1h&U08Mm7!OJlrvE?e%si8t`29sv8Tu`-&0Sz8jEh~)B%9TIb! z$chbUW0OF|{d@A66Cfkm^(N!gJ_m&sjZHWPwqsyP7Xg8FlI@CICZ#B>j;rQC&$$BL3)5>bUykj^de9|LclCY(qiYK2+{s1&iPwOrKMNxm#gaQb z1H+_Qrf!gUQ6Mlx+W|lCEtGTfDcl&CocZ*h#S)BSW&92GVTORgrOhnxKs z&}u*$FA!W6p6vhV?m8QqT)HhyiXg>_ zwO)`s3uVt>A3krOEO|D3(t{fM`OV3118gkbs2rng3djL#Lk;wSO_iv0_0IuvRi7jg z0BKd)@S@*quNAOl_hJEBWer7RVjJn-Vo*5s&k13pQ(@pH6muhhekEWM)A{Y7uJ}FT z2R7eWSvi{+obIDHl-KGI&bTcspFyU0W=NBmlZ;L`-S`m89FycklRpko$*^Q!+CK** zC8rqT($;8wSY~tQc5dbLk|o<71SZKidq?3SEh8&%7oZ?zf4^Z1R=|(JDfb&QnR$Se zQf$+1n>_}%GMopb<9uHvPN}flUtd#3+>ec(VgtzooaZdV(p>;N4Kr0*L12@wtJb|G z8?LI)Wyx0lh@H%AJYY#|025TeBEa6+(min1CZ_juwI5^K7{w$R_<(OJ#5NtC%EFEZ zdi$Z+jKBtngS9WJgJMG`k0>{P%rM9#e5-sboc_kxQ%{{^1|5fBxP~jw;_qZ&U2ZzO zGG+rt38YRgu@oFo-_k^-^bOfUPk^0SbESbr4=AmoKglK=Sb~9vhExOiQ!q4kMLP}{ zKS0}-Vq(rI3?((?1b9w>`Amo?Rt5Vh-mv=-s5=854E_nzEASXY6LYn-``?K?6Ih!$ zKqK_9;jgTI*Ob4Xjt6TX&*S1~!S-h&NHl_E{TX&b3!@Bb>lyf6!PMJ)Bi1Ck*Q`Ab z*5V8Q8AAd?XUD_80H|&nU1)h2Fm#0rjo8s7Vp1YjUyt+v^&yo-%Ktmy;1kw}pj_Md z<0EA1#>JyO@83~746jT*LSb@k)Hh^b1ovI_rkIoQSm_H3U~hSrTI8u?3^84CZEJ*| z&*Jkvu8xz1)yT>KKOl*q5wtPnN4xOqI$9;P>>#trWmXO3)^%W`7Hjn)m-#XeG{IrGs5O5@6C`rZa;AE%De1VINQ$ zy$-y2N}HQ6z72RJThPc>a%OYJ7Sjb*du`3Zr4>%1|LhiDKcfs)ur*t5H=P16l+Up) zc+a-lxK(%#qd*>U$#-ZzL|$deZaCD~soeA<^S+MyvLBz7`gd8c=Rfj*_}}@TYw=v4 z(I{8Uz`=4DEUTg}1?ekZnv&pRir5PUZYFPJcN)CXD(-g49@dpdUZm)1bzgzm${CAo&+fm{iB zGg5N$bv4`zfG0vjO4~etJ-@f4j_Qu^HJxH@OqYa`Q@ACl+kx*AkMhe3$%y`(Mh=(V z_2%QLH10LiSLs;Nl63vSz0(msJoGrE8dd66Rh3kM73-69>|9{95Yeo!_1g+|rY-)b zqB*KfQdK#dW<_inWm+?fbz+1(- zH@ck9Gc8fmX#Wf4jXZ%_S8uzgb%c(RX6?l9v{>BC`Cgh2-4T1>?9Vr8D3b63- z@F={G@XLFq3*n^WlQ`F``a16#vFVe72p@G6F9;afL3cVRFLtJ0`{&9)?ym~1S2kV% za5w}LKr=-^NgX;0g${of@2&0Ew)}g0vZheF;5qWAH0g(^Z~-5tNtEcIt+Xt}ht3jg zOaxj1XTDBiP5agZF1Keb(*rYnVt=V(V-5Zs^L+Ik(2o$4deuAQ4xDo_`e;h#X7Dw< zrTU#RE26qb)Zyz8aL))MT5lOm;J{Ny!?=ANqVl|Z+uaM2(TJv_f<*G5E+iML+Ru8{ zO`THA+-Ht}PNj6@nHal6n&(csAQ4LZy9FmYH)MyhVbh>vj@SpzcNU_V%iq>C%OXm!jywf%itW7V}T zNGCU4dczL2X#VA={4RbN>$Z-(ilSbRO)D9>20n(z#ghQ$q$%bobqjlhdyQ~M9_~2# z*I5=IoCov%3cBWdvR;S$+>1IR-K_-~@n46dz)?W8k|K5LQz@a$!m;ItjBFR zoHs)e87Fp*wrSRmGGiHY`15O>vXW5j{To*`O`&Ehm}A6{Hfew|5bk+h1_D(rSEMsJ zul44@qoZGelq}Q8<>|>V>PF>o6tJ?%p9Kemn0j}T zfK<;*+Y%*#2p~9Trv)^2Aq~QABE`5-G}#n6XJv)S*TdQ*W-%mh(PXQ8xbHL>g?IXc z6jlRCPT73ikXdf!qZW}s&z_F_)%!@}&K&AjARu^$Da2>WxZ)19Z8sycp~_fl;+Ef ze^Z>Rh2RcGlF=mkLaOJO0|<>jbdsL<{o6Wkqki++Qq;D%hh!f=QDbaLdqRo>mm(({ zyh#jFxVB-vOgvp$mXrh|ga9t`?|;1PHnN;3zhO)-G4^TWH)v5`L_KTGFP}<|^XCo6 z4Uva#gT?3&le6b6PUh8I8ZHupooK1JZ9TEl!L;Yjs|(}dZPQ5RDC?#lgyTw-mIs(` z&j)@Rt51}!u<($C)`Sa8=y=H{wqLRnOZtSioy7?*KhWEa;#gCZf_#W$+-0Iy+fm`p zS+u1B3bcSpEpdNz`DLok9_4t%I9o7RsnZ)(kk)39$VAc}YUV}GvWI<+y(~e>JSXyv zAZ=NKgF!?sb_}YH7nU7x2P|5ZRh=ebFBl0LB4C+&@(=ts;HMbjs4p=QUHVT$_zO%O z@UyA|u5;ib!&g@hY76rvZ+{VYcLj%4x^^Jr$6)sEhqLthj(N*dB z@=lWS-02sJS+ebo{s<8=Xq&kqrTM)v zttMDpUI-~rkSPFtexn_oNzvV4uApI{Ct?2zB{+a0SYZRvl&O;t#C+ue9NNGWWV^Ic z#;@wdm}c()Xk1Sh3}Qj^RwX>R3)ql|IILTiBcVx>eJqZR#S#YL>ZX~I3}5orr`g;B z3HzC*k63M47T`WYN*?Vm7oAp=AVCW)JCrLLF;e+Uuc73{P)_i`j0Y3sV!5SZ$Ge?X z=Ls?$WyV2khzI~V2OAq21!^Zd2K8pfTI&DHgNI0~*)}?5YYHa&MgFDI+UH~C?u z@D|0rZov7k0R)B$ytVGG? zG$723gy#Q9_9_XbP(!UJam`W3O7A2} z?-_}u03DHBtj^JziQLGp!Re6TlH_n0q;$`%Mp+&cCDbSDc@wy={0!QT*J!xIXks%e7U^~U@y z4OcXs26M?nc{!}*1;JrdEP>7)9Wl-xJ#50ifzz&>k^FD~#KFJWjY{Xb9#Aa^*{CxK zJjd$?>Wg38-rZ?`axrS^zq~`bKU{+(fP}K(E)^jPQ`y, layer_onto_os: bool) -> Result { - let regions = regions.into_iter().collect::>(); +pub fn layer(blocks: impl IntoIterator, layer_onto_os: bool) -> Result { + let blocks = blocks.into_iter().collect::>(); let mut image = if layer_onto_os { - let first_region = regions.get(0).ok_or(SingleError::Layer)?; + let first_block = blocks.get(0).ok_or(SingleError::Layer)?; let mut os = lc3_os::OS_IMAGE.clone().0; - os[lc3_os::USER_PROG_START_ADDR as usize] = first_region.origin; + os[lc3_os::USER_PROG_START_ADDR as usize] = first_block.origin; os } else { [0; ADDR_SPACE_SIZE_IN_WORDS] }; - for region in regions { - layer_region(&mut image, region); + for block in blocks { + layer_block(&mut image, block); } Ok(image.into()) diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 2697d03..48427cc 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -273,7 +273,7 @@ pub fn assemble_file(input: &PathBuf, leniency: LeniencyLevel, no_os: bool) -> R pub fn assemble(id: &SourceId, src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { let file = parse_and_analyze(id, src, leniency)?; let assemble::Object { symbol_table, regions } = assemble::assemble(file).map_err(|_| (id.clone(), error::SingleError::Assemble))?; - let linked_regions = link::link_regions(&symbol_table, regions).map_err(|e| (id.clone(), e))?; - let mem = layer::layer(linked_regions, !no_os).map_err(|e| (id.clone(), e))?; + let blocks = link::link_regions(&symbol_table, regions).map_err(|e| (id.clone(), e))?; + let mem = layer::layer(blocks, !no_os).map_err(|e| (id.clone(), e))?; Ok(mem) } diff --git a/assembler/src/link.rs b/assembler/src/link.rs index 90339c9..346e1e2 100644 --- a/assembler/src/link.rs +++ b/assembler/src/link.rs @@ -3,12 +3,12 @@ use lc3_isa::{Addr, Word}; use crate::assemble::{assemble_instruction, AssemblyResult, Object, ObjectWord, Region, SymbolTable}; use crate::error::SingleError; -pub struct LinkedRegion { +pub struct Block { pub(crate) origin: Addr, pub(crate) words: Vec, } -fn link_region(symbol_table: &SymbolTable, region: Region) -> Result { +fn link_region(symbol_table: &SymbolTable, region: Region) -> Result { let mut words = Vec::new(); let Region { origin, words: region_words, .. } = region; let mut location_counter = origin; @@ -40,16 +40,16 @@ fn link_region(symbol_table: &SymbolTable, region: Region) -> Result) -> Result, SingleError> { +pub(crate) fn link_regions(symbol_table: &SymbolTable, regions: Vec) -> Result, SingleError> { regions.into_iter() .map(|region| link_region(symbol_table, region)) .collect() } -pub fn link(objects: impl IntoIterator) -> Result, SingleError> { +pub fn link(objects: impl IntoIterator) -> Result, SingleError> { let objects = objects.into_iter().collect::>(); let mut global_symbol_table = HashMap::new(); @@ -59,13 +59,13 @@ pub fn link(objects: impl IntoIterator) -> Result } } - let linked_regions = + let blocks = objects.into_iter() .map(|object| link_regions(&mut global_symbol_table, object.regions)) - .collect::>, SingleError>>()? + .collect::>, SingleError>>()? .into_iter() .flatten() .collect(); - Ok(linked_regions) + Ok(blocks) } From 786c569d6bcc165d95bc1f5a8b14712ec755d334 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 28 Jun 2022 16:32:48 -0500 Subject: [PATCH 73/82] assembler: rename 'region' to 'block' or 'program block' --- assembler/src/analyze.rs | 42 ++++++++++---------- assembler/src/assemble.rs | 44 ++++++++++----------- assembler/src/error.rs | 18 ++++----- assembler/src/lib.rs | 4 +- assembler/src/link.rs | 18 ++++----- assembler/src/parse.rs | 26 ++++++------ assembler/tests/inputs/very_many_errors.asm | 6 +-- assembler/tests/integ.rs | 10 ++--- 8 files changed, 84 insertions(+), 84 deletions(-) diff --git a/assembler/src/analyze.rs b/assembler/src/analyze.rs index e205d87..d71a815 100644 --- a/assembler/src/analyze.rs +++ b/assembler/src/analyze.rs @@ -5,10 +5,10 @@ use std::string::String; use itertools::{concat, Itertools, zip}; use lc3_isa::{Addr, Word}; use crate::lex::{LexData, Opcode}; -use crate::parse::{File, Instruction, Operand, Region}; +use crate::parse::{File, Instruction, Operand, ProgramBlock}; use crate::{get, get_result, SourceId, Spanned, SpanWithSource, util, WithErrData}; use crate::assemble::calculate_offset; -use crate::error::{Error, InvalidReferenceReason, OperandType, RegionPlacement, RoughAddr, SingleError}; +use crate::error::{Error, InvalidReferenceReason, OperandType, ProgramBlockPlacement, RoughAddr, SingleError}; use crate::error::OperandType::*; use crate::error::Error::*; use crate::error::SingleError::*; @@ -29,8 +29,8 @@ impl ParseErrorsAnalysis { } impl MutVisitor for ParseErrorsAnalysis { - fn enter_region_error(&mut self, span: &SpanWithSource) { - self.push_error(BadRegion, span); + fn enter_program_block_error(&mut self, span: &SpanWithSource) { + self.push_error(BadProgramBlock, span); } fn enter_orig_error(&mut self, span: &SpanWithSource) { self.push_error(BadOperands, span); @@ -365,7 +365,7 @@ struct ObjectPlacementAnalysis { errors: Vec, last_start: RoughAddr, object_index: usize, - object_spans: Vec, + object_spans: Vec, } impl ObjectPlacementAnalysis { @@ -384,14 +384,14 @@ impl MutVisitor for ObjectPlacementAnalysis { self.object_spans.sort_unstable_by_key(|span| span.span_in_memory.start); for (op1, op2) in self.object_spans.iter().tuple_windows() { if op2.span_in_memory.start < op1.span_in_memory.end { - self.errors.push(Single(span.id.clone(), SingleError::regions_overlap(op1.clone(), op2.clone()))); + self.errors.push(Single(span.id.clone(), SingleError::program_blocks_overlap(op1.clone(), op2.clone()))); } } } - fn exit_region(&mut self, _region: &Region, span: &SpanWithSource, location: &LocationCounter) { + fn exit_program_block(&mut self, _program_block: &ProgramBlock, span: &SpanWithSource, location: &LocationCounter) { self.object_spans.push( - RegionPlacement { + ProgramBlockPlacement { position_in_file: self.object_index, span_in_file: span.clone(), span_in_memory: self.last_start..location.value @@ -443,29 +443,29 @@ impl LocationCounterState { fn visit(v: &mut impl MutVisitor, file: &File, span: &SpanWithSource) { v.enter_file(file, span); - for region in file.regions.iter() { - visit_region(v, file.id.clone(), region); + for block in file.blocks.iter() { + visit_program_block(v, file.id.clone(), block); } v.exit_file(file, span); } -fn visit_region(v: &mut impl MutVisitor, id: SourceId, region: &WithErrData) { - let (region_res, span) = region; +fn visit_program_block(v: &mut impl MutVisitor, id: SourceId, program_block: &WithErrData) { + let (pb_res, span) = program_block; let span = (id.clone(), span.clone()).into(); - match region_res { - Err(_) => { v.enter_region_error(&span); } - Ok(r) => { - v.enter_region(r, &span); + match pb_res { + Err(_) => { v.enter_program_block_error(&span); } + Ok(pb) => { + v.enter_program_block(pb, &span); let mut location_counter = LocationCounter::new(); - let Region { orig, instructions } = r; + let ProgramBlock { orig, instructions } = pb; visit_orig(v, id.clone(), orig, &mut location_counter); for instruction in instructions { visit_instruction(v, id.clone(), instruction, &mut location_counter); } - v.exit_region(r, &span, &mut location_counter); + v.exit_program_block(pb, &span, &mut location_counter); } } } @@ -572,9 +572,9 @@ trait MutVisitor { fn enter_file(&mut self, _file: &File, _span: &SpanWithSource) {} fn exit_file(&mut self, _file: &File, _span: &SpanWithSource) {} - fn enter_region_error(&mut self, _span: &SpanWithSource) {} - fn enter_region(&mut self, _region: &Region, _span: &SpanWithSource) {} - fn exit_region(&mut self, _region: &Region, _span: &SpanWithSource, _location: &LocationCounter) {} + fn enter_program_block_error(&mut self, _span: &SpanWithSource) {} + fn enter_program_block(&mut self, _program_block: &ProgramBlock, _span: &SpanWithSource) {} + fn exit_program_block(&mut self, _program_block: &ProgramBlock, _span: &SpanWithSource, _location: &LocationCounter) {} fn enter_orig_error(&mut self, _span: &SpanWithSource) {} fn enter_orig(&mut self, _orig: &Vec>, _span: &SpanWithSource, _location: &LocationCounter) {} diff --git a/assembler/src/assemble.rs b/assembler/src/assemble.rs index 775108d..fd5055c 100644 --- a/assembler/src/assemble.rs +++ b/assembler/src/assemble.rs @@ -136,17 +136,17 @@ impl Instruction { } } -struct FirstPassRegion { +struct FirstPassBlock { origin: Addr, instructions: Vec, } pub struct Object { pub(crate) symbol_table: SymbolTable, - pub(crate) regions: Vec, + pub(crate) blocks: Vec, } -pub struct Region { +pub struct ObjectBlock { pub(crate) origin: Addr, pub(crate) words: Vec, } @@ -345,11 +345,11 @@ pub(crate) fn assemble_instruction(symbol_table: &SymbolTable, location_counter: type ParserInstructions = Vec>; -fn first_pass(region_data: impl IntoIterator) -> Result<(Vec, SymbolTable), ()> { - let mut fp_regions = Vec::new(); +fn first_pass(program_block_data: impl IntoIterator) -> Result<(Vec, SymbolTable), ()> { + let mut fp_blocks = Vec::new(); let mut symbol_table = HashMap::new(); - for (origin, parser_instructions) in region_data { + for (origin, parser_instructions) in program_block_data { let mut instructions = Vec::new(); let mut location_counter = origin; @@ -366,14 +366,14 @@ fn first_pass(region_data: impl IntoIterator) - location_counter += addresses_used; } - fp_regions.push(FirstPassRegion { origin, instructions }); + fp_blocks.push(FirstPassBlock { origin, instructions }); } - Ok((fp_regions, symbol_table)) + Ok((fp_blocks, symbol_table)) } -fn second_pass_one_region(symbol_table: &SymbolTable, fp_region: FirstPassRegion) -> Result { - let FirstPassRegion { origin, instructions } = fp_region; +fn second_pass_one_block(symbol_table: &SymbolTable, fp_block: FirstPassBlock) -> Result { + let FirstPassBlock { origin, instructions } = fp_block; let mut words = Vec::new(); let mut location_counter = origin; @@ -387,16 +387,16 @@ fn second_pass_one_region(symbol_table: &SymbolTable, fp_region: FirstPassRegion location_counter += addresses_used; } - Ok(Region { origin, words }) + Ok(ObjectBlock { origin, words }) } -fn second_pass(symbol_table: SymbolTable, fp_regions: Vec) -> Result { - let regions = - fp_regions.into_iter() - .map(|fp_region| second_pass_one_region(&symbol_table, fp_region)) - .collect::, TryFromIntError>>()?; +fn second_pass(symbol_table: SymbolTable, fp_blocks: Vec) -> Result { + let blocks = + fp_blocks.into_iter() + .map(|fp_block| second_pass_one_block(&symbol_table, fp_block)) + .collect::, TryFromIntError>>()?; - Ok(Object { symbol_table, regions }) + Ok(Object { symbol_table, blocks }) } pub(crate) fn get_orig(orig_operands: WithErrData>>) -> Result { @@ -405,16 +405,16 @@ pub(crate) fn get_orig(orig_operands: WithErrData>>) -> } pub fn assemble(file: parse::File) -> Result { - let region_data = - file.regions.into_iter() + let block_data = + file.blocks.into_iter() .map(|p| { - let parse::Region { orig, instructions } = result(p)?; + let parse::ProgramBlock { orig, instructions } = result(p)?; let origin = get_orig(orig)?; Ok((origin, instructions)) }) .collect::, ()>>()?; - let (fp_regions, symbol_table) = first_pass(region_data)?; + let (fp_blocks, symbol_table) = first_pass(block_data)?; - second_pass(symbol_table, fp_regions).map_err(|_| ()) + second_pass(symbol_table, fp_blocks).map_err(|_| ()) } \ No newline at end of file diff --git a/assembler/src/error.rs b/assembler/src/error.rs index a9d37da..b2ba5e5 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -116,7 +116,7 @@ pub enum SingleError { TooManyInputs, - BadRegion, + BadProgramBlock, BadInstruction, BadLabel, BadOpcode, @@ -127,7 +127,7 @@ pub enum SingleError { DuplicateLabel { label: String, occurrences: Vec, }, InvalidLabelReference { label: String, reason: InvalidReferenceReason }, LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, - RegionsOverlap { placement1: RegionPlacement, placement2: RegionPlacement }, + ProgramBlocksOverlap { placement1: ProgramBlockPlacement, placement2: ProgramBlockPlacement }, NoTokens, NoOrig, NoEnd, @@ -141,19 +141,19 @@ pub enum InvalidReferenceReason { } impl SingleError { - pub(crate) fn regions_overlap(p1: RegionPlacement, p2: RegionPlacement) -> Self { + pub(crate) fn program_blocks_overlap(p1: ProgramBlockPlacement, p2: ProgramBlockPlacement) -> Self { let (placement1, placement2) = if p1.span_in_memory.start <= p2.span_in_memory.start { (p1, p2) } else { (p2, p1) }; - RegionsOverlap { placement1, placement2 } + ProgramBlocksOverlap { placement1, placement2 } } fn message(&self) -> String { match self { - BadRegion => String::from("invalid region"), + BadProgramBlock => String::from("invalid program block"), BadInstruction => String::from("invalid instruction"), BadLabel => String::from("invalid label"), BadOpcode => String::from("invalid opcode"), @@ -180,8 +180,8 @@ impl SingleError { label_pos_width = max(4, min_signed_hex_digits_required(*est_ref_pos) as usize), ref_pos_width = max(4, min_signed_hex_digits_required(*est_label_pos) as usize),) } - RegionsOverlap { placement1, placement2 } => { - format!("region {} in file occupying [{:#0o1s_width$X}, {:#0o1e_width$X}) overlaps region {} occupying [{:#0o2s_width$X}, {:#0o2e_width$X})", + ProgramBlocksOverlap { placement1, placement2 } => { + format!("program block {} in file occupying [{:#0o1s_width$X}, {:#0o1e_width$X}) overlaps program block {} occupying [{:#0o2s_width$X}, {:#0o2e_width$X})", placement1.position_in_file, placement1.span_in_memory.start, placement1.span_in_memory.end, @@ -232,7 +232,7 @@ fn report_single(id: SourceId, span: Option, error: SingleError) -> Report r = r.with_label(Label::new(occurrence).with_message(label_message)) } } - RegionsOverlap { placement1, placement2 } => { + ProgramBlocksOverlap { placement1, placement2 } => { let (first, first_pos_text, second, second_pos_text) = if placement1.position_in_file < placement2.position_in_file { (placement1, "end", placement2, "start") @@ -399,7 +399,7 @@ impl OperandType { } #[derive(Clone, Debug)] -pub struct RegionPlacement { +pub struct ProgramBlockPlacement { pub(crate) position_in_file: usize, pub(crate) span_in_file: SpanWithSource, pub(crate) span_in_memory: Range, diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 48427cc..11fa46e 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -272,8 +272,8 @@ pub fn assemble_file(input: &PathBuf, leniency: LeniencyLevel, no_os: bool) -> R /// ``` pub fn assemble(id: &SourceId, src: &String, leniency: LeniencyLevel, no_os: bool) -> Result { let file = parse_and_analyze(id, src, leniency)?; - let assemble::Object { symbol_table, regions } = assemble::assemble(file).map_err(|_| (id.clone(), error::SingleError::Assemble))?; - let blocks = link::link_regions(&symbol_table, regions).map_err(|e| (id.clone(), e))?; + let assemble::Object { symbol_table, blocks } = assemble::assemble(file).map_err(|_| (id.clone(), error::SingleError::Assemble))?; + let blocks = link::link_object_blocks(&symbol_table, blocks).map_err(|e| (id.clone(), e))?; let mem = layer::layer(blocks, !no_os).map_err(|e| (id.clone(), e))?; Ok(mem) } diff --git a/assembler/src/link.rs b/assembler/src/link.rs index 346e1e2..8c2728c 100644 --- a/assembler/src/link.rs +++ b/assembler/src/link.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; use lc3_isa::{Addr, Word}; -use crate::assemble::{assemble_instruction, AssemblyResult, Object, ObjectWord, Region, SymbolTable}; +use crate::assemble::{assemble_instruction, AssemblyResult, Object, ObjectWord, ObjectBlock, SymbolTable}; use crate::error::SingleError; pub struct Block { @@ -8,12 +8,12 @@ pub struct Block { pub(crate) words: Vec, } -fn link_region(symbol_table: &SymbolTable, region: Region) -> Result { +fn link_object_block(symbol_table: &SymbolTable, block: ObjectBlock) -> Result { let mut words = Vec::new(); - let Region { origin, words: region_words, .. } = region; + let ObjectBlock { origin, words: object_words, .. } = block; let mut location_counter = origin; - for region_word in region_words { - match region_word { + for object_word in object_words { + match object_word { ObjectWord::Value(word) => { words.push(word); location_counter += 1; @@ -43,9 +43,9 @@ fn link_region(symbol_table: &SymbolTable, region: Region) -> Result) -> Result, SingleError> { - regions.into_iter() - .map(|region| link_region(symbol_table, region)) +pub(crate) fn link_object_blocks(symbol_table: &SymbolTable, blocks: Vec) -> Result, SingleError> { + blocks.into_iter() + .map(|block| link_object_block(symbol_table, block)) .collect() } @@ -61,7 +61,7 @@ pub fn link(objects: impl IntoIterator) -> Result, Singl let blocks = objects.into_iter() - .map(|object| link_regions(&mut global_symbol_table, object.regions)) + .map(|object| link_object_blocks(&mut global_symbol_table, object.blocks)) .collect::>, SingleError>>()? .into_iter() .flatten() diff --git a/assembler/src/parse.rs b/assembler/src/parse.rs index afba2ee..bf0f6c8 100644 --- a/assembler/src/parse.rs +++ b/assembler/src/parse.rs @@ -10,7 +10,7 @@ use crate::LeniencyLevel; use crate::lex::{LiteralValue, Opcode, Token}; #[derive(Debug, Eq, PartialEq)] -pub struct Region { +pub struct ProgramBlock { pub(crate) orig: WithErrData>>, pub(crate) instructions: Vec>, } @@ -163,7 +163,7 @@ fn everything_until_orig() -> Repeated>> { none_of(Token::Opcode(Opcode::Orig)).repeated() } -fn region(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { +fn program_block(leniency: LeniencyLevel) -> impl Parser, Error = Simple> { let orig = just(Token::Opcode(Opcode::Orig)) .ignore_then(operands(leniency)); @@ -177,7 +177,7 @@ fn region(leniency: LeniencyLevel) -> impl Parser, Er ) .then_ignore(just::<_, Token, _>(Token::End)) .map_with_span(|(orig, instructions), span| { - (Ok(Region { orig, instructions }), span) + (Ok(ProgramBlock { orig, instructions }), span) }) // Pseudo-recovery strategy -- take everything until next .ORIG .or(any().then(everything_until_orig()) @@ -189,20 +189,20 @@ pub struct File { pub(crate) id: SourceId, #[allow(dead_code)] pub(crate) before_first_orig: Spanned>, // TODO: check that this only contains newlines and comments (at least if strict) - pub regions: Vec> + pub blocks: Vec> } fn file(id: SourceId, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { everything_until_orig() .map_with_span(|toks, span| (toks, span)) .then( - region(leniency) + program_block(leniency) .separated_by(everything_until_orig()) .allow_trailing() ) .then_ignore(end()) - .map_with_span(move |(before_first_orig, regions), span| - (File { id: id.clone(), before_first_orig, regions }, span)) + .map_with_span(move |(before_first_orig, blocks), span| + (File { id: id.clone(), before_first_orig, blocks }, span)) } pub fn parse(id: SourceId, src: &str, tokens: Vec>, leniency: LeniencyLevel) -> Result, Vec>> { @@ -241,13 +241,13 @@ mod tests { let f = file.0; assert_eq!((vec![], 0..5), f.before_first_orig); // TODO: probably doesn't need fixing, but span should probably be 0..0; find source of bug - assert_eq!(vec![(Ok(Region { + assert_eq!(vec![(Ok(ProgramBlock { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), instructions: vec![ (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Ok(Register(R0)), 24..26)]), 16..26) }), 12..26) ], }), 0..31)], - f.regions); + f.blocks); } #[test] @@ -256,13 +256,13 @@ mod tests { let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); let file = parse("".to_string(), source, tokens, LeniencyLevel::Lenient).unwrap(); - assert_eq!(vec![(Ok(Region { + assert_eq!(vec![(Ok(ProgramBlock { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), instructions: vec![ (Ok(Instruction { label: None, opcode: (Ok(Add), 12..15), operands: (Ok(vec![(Ok(Register(R0)), 16..18), (Ok(Register(R0)), 20..22), (Err(()), 24..29)]), 16..29) }), 12..29) ], }), 0..44)], - file.0.regions); + file.0.blocks); } #[test] @@ -271,13 +271,13 @@ mod tests { let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); let file = parse("".to_string(), source, tokens, LeniencyLevel::Lenient).unwrap(); - assert_eq!(vec![(Ok(Region { + assert_eq!(vec![(Ok(ProgramBlock { orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), instructions: vec![ (Ok(Instruction { label: Some((Err(()), 12..18)), opcode: (Ok(Add), 19..22), operands: (Ok(vec![(Ok(Register(R0)), 23..25), (Ok(Register(R0)), 27..29), (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33)]), 23..33) }), 12..33) ], }), 0..48)], - file.0.regions); + file.0.blocks); } macro_rules! parse { diff --git a/assembler/tests/inputs/very_many_errors.asm b/assembler/tests/inputs/very_many_errors.asm index d0a355b..e82cf3b 100644 --- a/assembler/tests/inputs/very_many_errors.asm +++ b/assembler/tests/inputs/very_many_errors.asm @@ -4,12 +4,12 @@ LABEL ADD R0 ; Duplicate label LABEL JMP RET ; Bad operand .END -.ORIG x3000 ; Likely overlapping first region +.ORIG x3000 ; Likely overlapping first block ADD R0, R0, R0 ADD R0, R0, R0 .END -.ORIG x3001 ; Overlaps second region +.ORIG x3001 ; Overlaps second block ADD R0, R0, LABEL ; Operand type mismatch BR LABEL ; Invalid reference to duplicate label TOO_FAR .BLKW 0 @@ -19,4 +19,4 @@ TOO_FAR .BLKW 0 BR TOO_FAR ; Label too distant for offset to fit .END -.ORIG x4000 ; Bad region (missing .END) +.ORIG x4000 ; Bad block (missing .END) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 8c74129..e101169 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -424,7 +424,7 @@ mod error { BR SOMEWHERE\n\ .END" => SingleError::InvalidLabelReference { reason: InvalidReferenceReason::Undefined, .. }, - regions_overlap: + program_blocks_overlap: ".ORIG x3000\n\ ADD R0, R0, R0\n\ ADD R0, R0, R0\n\ @@ -434,7 +434,7 @@ mod error { ADD R0, R0, R0\n\ ADD R0, R0, R0\n\ .END" - => SingleError::RegionsOverlap { .. }, + => SingleError::ProgramBlocksOverlap { .. }, label_too_distant: ".ORIG x3000\n\ LEA R0, LABEL\n\ @@ -520,7 +520,7 @@ mod error { ADD R0, R0, R0" => { - SingleError::BadRegion, + SingleError::BadProgramBlock, SingleError::NoEnd }, two_operand_type_mismatches: @@ -548,10 +548,10 @@ mod error { { SingleError::BadOperand, SingleError::BadInstruction, - SingleError::BadRegion, + SingleError::BadProgramBlock, SingleError::DuplicateLabel { .. }, SingleError::WrongNumberOfOperands { expected: 3, actual: 1 }, - SingleError::RegionsOverlap { .. }, + SingleError::ProgramBlocksOverlap { .. }, SingleError::OperandTypeMismatch { .. }, SingleError::InvalidLabelReference { .. }, SingleError::LabelTooDistant { .. }, From 0479288ff6d2fc8b0b493380c14db4295f18351c Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 29 Jun 2022 11:03:51 -0500 Subject: [PATCH 74/82] assembler: document the lex module --- assembler/src/lex.rs | 159 ++++++++++++++++++++++++++++++++++++++++++- assembler/src/lib.rs | 6 +- 2 files changed, 162 insertions(+), 3 deletions(-) diff --git a/assembler/src/lex.rs b/assembler/src/lex.rs index 01c3be5..f47fd30 100644 --- a/assembler/src/lex.rs +++ b/assembler/src/lex.rs @@ -1,3 +1,64 @@ +//! Functions and data structures for lexing LC-3 assembly. +//! +//! Lexical analysis, or lexing, is the process of splitting a source string into a sequence of meaningful "tokens." +//! Each token is a small data structure which typically represents one "word" or punctuation mark +//! in the source code. Here's an example: +//! +//! ``` +//! # use lc3_assembler::LeniencyLevel; +//! # use lc3_assembler::lex::*; +//! # use lc3_assembler::lex::Token::*; +//! # use lc3_assembler::lex::Opcode::*; +//! # use lc3_isa::Reg::*; +//! # use lc3_assembler::lex::LiteralValue::*; +//! let source = "ADD R0, R0, #1; increment counter"; +//! let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); +//! assert_eq!(tokens, +//! vec![ +//! (Opcode(Add), 0.. 3), +//! (Register(R0), 4.. 6), +//! (Comma, 6.. 7), +//! (Register(R0), 8..10), +//! (Comma, 10..11), +//! (NumberLiteral(Word(1)), 12..14), +//! (Comment, 14..33), +//! ]); +//! ``` +//! +//! The string is split into seven [`Token`]s. For most of them, +//! each part separated by spaces or punctuation becomes its own token. +//! But really, tokens are based on what parts are significant; notice that the +//! entire comment is represented by one token, and there is no information +//! stored about what the comment said. This is because the content of comments +//! doesn't change the code that needs to be assembled. Maybe more obviously, +//! all of the spaces between the opcode and operands aren't represented in +//! the output tokens at all. They were only important for distinguishing separate tokens. +//! +//! Lexing only splits the string. It doesn't check whether the order of tokens makes sense. +//! For example, the following string is not valid LC-3, but it can be lexed successfully: +//! +//! ``` +//! # use lc3_assembler::LeniencyLevel; +//! # use lc3_assembler::lex::*; +//! # use lc3_assembler::lex::Token::*; +//! # use lc3_assembler::lex::Opcode::*; +//! # use lc3_isa::Reg::*; +//! # use lc3_assembler::lex::LiteralValue::*; +//! let source = "hello, world\n"; +//! let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); +//! assert_eq!(tokens, +//! vec![ +//! (Label("HELLO".to_string()), 0.. 5), +//! (Comma, 5.. 6), +//! (Label("WORLD".to_string()), 7..12), +//! (Newline, 12..13), +//! ]); +//! ``` +//! +//! [`lex`] also outputs the locations of the tokens in the source string as index ranges. +//! These are to help construct error messages which refer to specific locations in the source. +//! +//! use chumsky::prelude::*; use lc3_isa::{Addr, Reg, SignedWord, Word}; use std::convert::{TryFrom, TryInto}; @@ -8,21 +69,58 @@ use chumsky::Stream; use crate::Spanned; use crate::LeniencyLevel; +/// A unit representing a string of meaningful text in LC-3 assembly code. +/// +/// Produced by [`lex`]ing. See the [module-level documentation](crate::lex) for examples. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum Token { + /// An opcode, pseudo-op (**except `.END`**), or named TRAP routine. Opcode(Opcode), + /// A register reference (e.g., `R0`). Register(Reg), + /// An unqualified positive decimal number. Used as an officially required operand of `.BLKW`. + /// + /// # Examples + /// - `0` + /// - `10` UnqualifiedNumberLiteral(Word), + /// A number literal, qualified with a base prefix (`#`, `b`, or `x`) and optional negative sign `-`. + /// + /// The qualifiers are used to calculate the numeric value during lexing and are not stored. + /// + /// # Examples + /// - `#-1` + /// - `x3000` + /// - `b0101` NumberLiteral(LiteralValue), + /// A string literal (e.g., `"Hello, world!"`). StringLiteral(String), + /// A label or label reference. + /// + /// Most alphanumeric strings which aren't reserved for other valid tokens + /// are valid labels, depending on the [`LeniencyLevel`](crate::LeniencyLevel) + /// used when [`lex`]ing. Label(String), + + /// The `.END` pseudo-op. + /// + /// Not included as an [`Opcode`] because it denotes + /// the end of a program block. This makes it + /// useful for parsing to distinguish between `.END` + /// and instructions that can occur within a program block. End, + /// A newline. + /// + /// Matches line feeds, carriage returns, + /// and other types of vertical whitespace. Newline, + /// A comma (`,`). Comma, - + /// A comment, including the leading semicolon. Comment, + /// Any string of characters which doesn't represent any other type of token. Invalid, } @@ -32,6 +130,9 @@ impl Display for Token { } } +/// The numeric value represented by a number literal. +/// +/// Can be any unsigned or 2's-complement signed number with a width up to 16 bits. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum LiteralValue { Word(Word), @@ -71,6 +172,7 @@ impl TryFrom for u8 { } } +/// The set of condition codes (`n`, `z`, and/or `p`) on which a `BR` opcode is conditioned. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct ConditionCodes { pub(crate) n: bool, @@ -78,38 +180,78 @@ pub struct ConditionCodes { pub(crate) p: bool, } +/// A specific LC-3 opcode, pseudo-op, or named TRAP routine. +/// +/// Does not include [`.END`](Token::End). +/// +/// Represents a *case-insensitive* string in the source code. +/// That is, [`Opcode::Add`] can represent `ADD`, `add`, or `Add`, etc. +/// All are treated as the same `Opcode`. Below, only the all-uppercase +/// option is listed for each `Opcode` variant. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum Opcode { + /// The opcode `ADD`. Add, + /// The opcode `AND`. And, + /// The opcode `BR`, conditioned on any combination of condition codes. + /// + /// # Examples + /// - `BR` + /// - `BRn` + /// - `BRzp` Br(ConditionCodes), + /// The opcode `JMP`. Jmp, + /// The opcode `JSR`. Jsr, + /// The opcode `JSRR`. Jsrr, + /// The opcode `LD`. Ld, + /// The opcode `LDI`. Ldi, + /// The opcode `LDR`. Ldr, + /// The opcode `LEA`. Lea, + /// The opcode `NOT`. Not, + /// The opcode `RET`. Ret, + /// The opcode `RTI`. Rti, + /// The opcode `ST`. St, + /// The opcode `STI`. Sti, + /// The opcode `STR`. Str, + /// The opcode `TRAP`. Trap, // Pseudo-ops + /// The pseudo-op `.ORIG`. Orig, + /// The pseudo-op `.FILL`. Fill, + /// The pseudo-op `.BLKW`. Blkw, + /// The pseudo-op `.STRINGZ`. Stringz, // Named TRAP routines + /// The named TRAP routine `GETC`. Getc, + /// The named TRAP routine `OUT`. Out, + /// The named TRAP routine `PUTS`. Puts, + /// The named TRAP routine `IN`. In, + /// The named TRAP routine `PUTSP`. Putsp, + /// The named TRAP routine `HALT`. Halt, } @@ -380,6 +522,12 @@ fn case_insensitive_pass(case_sensitive_pass_results: Vec>, token: Token) -> bool { tokens.iter().any(|t| t.0 == token) } +/// Produce a sequence of [`Token`]s representative of the given source string. +/// +/// See the [module-level documentation](crate::lex) for general information and examples. +/// +/// This function also produces index ranges corresponding to each token's location +/// in the source string. It also analyzes the tokens and produces [`LexData`]. +/// Because the tokens are consumed by the [`parse`](crate::parse) step, this data saves the +/// information about the tokens which the [semantic analysis step](crate::analyze) +/// needs to produce some types of error messages. pub fn lex(source: &str, leniency: LeniencyLevel) -> Result<(Vec>, LexData), Vec>> { let (maybe_csprs, mut errors) = case_sensitive_pass(source); let tokens = diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 11fa46e..59ec003 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -13,7 +13,6 @@ use std::path::PathBuf; mod util; pub mod error; - pub mod lex; pub mod parse; pub mod analyze; @@ -23,7 +22,10 @@ pub mod layer; type Span = std::ops::Range; type Spanned = (T, Span); -type WithErrData = Spanned>; + +/// A parsed syntax element, or an error if it was skipped, +/// along with any other data necessary to produce an error indicating this syntax element. +pub type WithErrData = Spanned>; fn get(v: &Vec>, i: usize) -> Option<&T> { v.get(i) From 12cc003322ad7e2d0479f512a68a76a06c731174 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Thu, 30 Jun 2022 11:34:39 -0500 Subject: [PATCH 75/82] assembler: document the parse module --- assembler/src/lib.rs | 4 + assembler/src/parse.rs | 188 ++++++++++++++++++++++++++++++----------- 2 files changed, 145 insertions(+), 47 deletions(-) diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 59ec003..219b33d 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -126,6 +126,10 @@ pub fn read(input: &PathBuf) -> Result { /// Get a [`SourceId`] for the given source file. +/// +/// If working with source code that isn't from a file, +/// you may use the id of an arbitrary path (even `""`) for +/// functions requiring a [`SourceId`]. pub fn id(input: &PathBuf) -> SourceId { input.to_string_lossy().to_string() } diff --git a/assembler/src/parse.rs b/assembler/src/parse.rs index bf0f6c8..01763e9 100644 --- a/assembler/src/parse.rs +++ b/assembler/src/parse.rs @@ -1,3 +1,99 @@ +//! Functions and data structures for parsing LC-3 assembly. +//! +//! Parsing, or syntactic analysis, tries to structure the sequence of tokens produced by [lexing](crate::lex). +//! Tokens between `.ORIG` and `.END` tokens are structured into programs. +//! Within those programs, tokens between newlines are structured into instructions. +//! The result is a [`File`], or syntax tree, corresponding to a single source file. +//! In other words, parsing is where the assembler +//! tries to make sense of the order of the tokens. Here's an example: +//! +//! ``` +//! # use lc3_assembler::id; +//! # use lc3_assembler::LeniencyLevel; +//! # use lc3_assembler::lex::lex; +//! # use lc3_assembler::parse::*; +//! # use lc3_assembler::parse::Operand::*; +//! # use lc3_assembler::lex::Opcode::*; +//! # use lc3_assembler::lex::LiteralValue; +//! # use lc3_isa::Reg::*; +//! # use self::*; +//! let id = id(&std::path::PathBuf::from("")); +//! let source = ".ORIG x3000\nADDING ADD R0, R0, #1\n.END"; +//! let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); +//! let (file, _) = parse(id, source, tokens, LeniencyLevel::Lenient).unwrap(); +//! +//! assert_eq!(file.blocks, +//! vec![(Ok(ProgramBlock { +//! orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), +//! instructions: vec![ +//! (Ok(Instruction { +//! label: Some((Ok("ADDING".to_string()), 12..18)), +//! opcode: (Ok(Add), 19..22), +//! operands: (Ok(vec![ +//! (Ok(Register(R0)), 23..25), +//! (Ok(Register(R0)), 27..29), +//! (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33) +//! ]), 23..33) +//! }), 12..33) +//! ], +//! }), 0..38)]); +//! ``` +//! +//! Often times, the order of tokens may be invalid, but mostly correct. For example, +//! the source code may include an invalid token where a label is expected: +//! +//! ``` +//! # use lc3_assembler::id; +//! # use lc3_assembler::LeniencyLevel; +//! # use lc3_assembler::lex::lex; +//! # use lc3_assembler::parse::*; +//! # use lc3_assembler::parse::Operand::*; +//! # use lc3_assembler::lex::Opcode::*; +//! # use lc3_assembler::lex::LiteralValue; +//! # use lc3_isa::Reg::*; +//! # use self::*; +//! let id = id(&std::path::PathBuf::from("")); +//! let source = ".ORIG x3000\nA%DDER ADD R0, R0, #1\n.END"; +//! let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); +//! let (file, _) = parse(id, source, tokens, LeniencyLevel::Lenient).unwrap(); +//! +//! assert_eq!(file.blocks, +//! vec![(Ok(ProgramBlock { +//! orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), +//! instructions: vec![ +//! (Ok(Instruction { +//! label: Some((Err(()), 12..18)), // <-- Error here! +//! opcode: (Ok(Add), 19..22), // But everything else parses successfully, +//! operands: (Ok(vec![ // or at least reasonably. +//! (Ok(Register(R0)), 23..25), +//! (Ok(Register(R0)), 27..29), +//! (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33) +//! ]), 23..33) +//! }), 12..33) +//! ], +//! }), 0..38)]); +//! ``` +//! +//! +//! [`parse`] is designed to recover when it encounters a token which is out of order. It replaces +//! the smallest possible part of the syntax tree with an error and tries to make +//! a reasonable guess about where to continue. In the example above, it assumes +//! that the invalid token was supposed to be a label, discards it, and checks for an opcode +//! next. In this way, [`parse`] attempts to produce a syntax tree for any input, +//! valid *or invalid*, but the tree will contain location-specific parse errors +//! which [the semantic analysis step](crate::analyze) can try and determine the cause of. +//! By trying to recover, [`parse`] can produce multiple errors instead of +//! failing at a single early error, and semantic analysis can provide clear reasons +//! for some errors. +//! +//! However, including error data for potentially any element +//! in the syntax tree makes the tree more complex. This is why +//! the examples above have so much "noise" in addition to the main data. +//! Most elements of the syntax tree are paired with error data using +//! [`WithErrData`](crate::WithErrData). We use this type to abstract away +//! the error data and make clearer which syntax elements comprise the tree, +//! at least when working with it in code. + use std::convert::TryFrom; use chumsky::combinator::Repeated; use chumsky::prelude::*; @@ -9,25 +105,56 @@ use crate::{SourceId, Spanned, WithErrData}; use crate::LeniencyLevel; use crate::lex::{LiteralValue, Opcode, Token}; +/// A representation of a LC-3 assembly file structured based on correct syntax. The root of the syntax tree. +/// +/// Produced by [`parse`]. +/// +/// This assembler allows multiple "program blocks" in the same file, +/// as long as they wouldn't overlap in memory. They must be assembled +/// together and can reference each other's labels. This part of the syntax +/// tree therefore stores a list of program blocks. +#[derive(Debug)] +pub struct File { + pub(crate) id: SourceId, + #[allow(dead_code)] + pub(crate) before_first_orig: Spanned>, // TODO: check that this only contains newlines and comments (at least if strict) + pub blocks: Vec> +} + +/// A representation of an LC-3 assembly program block, starting with `.ORIG` and ending with `.END`. #[derive(Debug, Eq, PartialEq)] pub struct ProgramBlock { - pub(crate) orig: WithErrData>>, - pub(crate) instructions: Vec>, + pub orig: WithErrData>>, + pub instructions: Vec>, } +/// A representation of an LC-3 assembly instruction. +/// +/// When produced by [`parse`], may contain any number or types of operands. +/// Operands are just parsed as an arbitrarily long list, no matter what opcode +/// was used. The number and types of operands are validated during [semantic analysis](crate::analyze). #[derive(Debug, Eq, PartialEq)] -pub(crate) struct Instruction { - pub(crate) label: Option>, - pub(crate) opcode: WithErrData, - pub(crate) operands: WithErrData>>, +pub struct Instruction { + pub label: Option>, + pub opcode: WithErrData, + pub operands: WithErrData>>, } +/// An operand of an LC-3 assembly instruction. +/// +/// Each variant directly corresponds to a specific [`Token`](crate::lex::Token) variant, +/// noted below. See the [`Token`] documentation for descriptions and examples of each. #[derive(Clone, Debug, Eq, PartialEq)] -pub(crate) enum Operand { +pub enum Operand { + /// Corresponds to [`Token::Register`](crate::lex::Token::Register). Register(Reg), + /// Corresponds to [`Token::UnqualifiedNumberLiteral`](crate::lex::Token::UnqualifiedNumberLiteral). UnqualifiedNumberLiteral(Word), + /// Corresponds to [`Token::NumberLiteral`](crate::lex::Token::NumberLiteral). NumberLiteral(LiteralValue), + /// Corresponds to [`Token::StringLiteral`](crate::lex::Token::StringLiteral). StringLiteral(String), + /// Corresponds to [`Token::Label`](crate::lex::Token::Label). Label(String), } @@ -184,14 +311,6 @@ fn program_block(leniency: LeniencyLevel) -> impl Parser>, // TODO: check that this only contains newlines and comments (at least if strict) - pub blocks: Vec> -} - fn file(id: SourceId, leniency: LeniencyLevel) -> impl Parser, Error = Simple> { everything_until_orig() .map_with_span(|toks, span| (toks, span)) @@ -205,6 +324,13 @@ fn file(id: SourceId, leniency: LeniencyLevel) -> impl Parser>, leniency: LeniencyLevel) -> Result, Vec>> { let len = src.chars().count(); let (maybe_file, errors) = @@ -265,36 +391,4 @@ mod tests { file.0.blocks); } - #[test] - fn label_error() { - let source = ".ORIG x3000\nA%DDER ADD R0, R0, #1; <- error\n.END"; - let (tokens, _) = lex(source, LeniencyLevel::Lenient).unwrap(); - let file = parse("".to_string(), source, tokens, LeniencyLevel::Lenient).unwrap(); - - assert_eq!(vec![(Ok(ProgramBlock { - orig: (Ok(vec![(Ok(NumberLiteral(LiteralValue::Word(12288))), 6..11)]), 6..11), - instructions: vec![ - (Ok(Instruction { label: Some((Err(()), 12..18)), opcode: (Ok(Add), 19..22), operands: (Ok(vec![(Ok(Register(R0)), 23..25), (Ok(Register(R0)), 27..29), (Ok(NumberLiteral(LiteralValue::Word(1))), 31..33)]), 23..33) }), 12..33) - ], - }), 0..48)], - file.0.blocks); - } - - macro_rules! parse { - (let $p:pat = $parser:expr, $src:expr) => { - let (tokens, _) = lex($src, LeniencyLevel::Lenient).unwrap(); - let len = $src.chars().count(); - let $p = - $parser - .parse_recovery_verbose(Stream::from_iter(len..len + 1, tokens.into_iter())); - } - } - - #[test] - fn instruction_error() { - parse!(let (maybe_instruction, errs) = instruction(LeniencyLevel::Lenient), "JMP RET .END"); - println!("{:?}", maybe_instruction); - println!("{:?}", errs); - } - } \ No newline at end of file From 05adfd4b2ece13a3d1946df3ac9e6e84d8cadfd0 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Thu, 30 Jun 2022 21:38:28 -0500 Subject: [PATCH 76/82] assembler: allow multiple semantic analysis visitors in one pass --- assembler/src/analyze.rs | 361 ++++++++++++++++++++++++++++++++------- 1 file changed, 299 insertions(+), 62 deletions(-) diff --git a/assembler/src/analyze.rs b/assembler/src/analyze.rs index d71a815..4ad7f36 100644 --- a/assembler/src/analyze.rs +++ b/assembler/src/analyze.rs @@ -19,16 +19,24 @@ struct ParseErrorsAnalysis { } impl ParseErrorsAnalysis { - fn new() -> Self { - Default::default() - } - fn push_error(&mut self, single_error: SingleError, span: &SpanWithSource) { self.errors.push(Spanned(span.clone(), single_error)); } } -impl MutVisitor for ParseErrorsAnalysis { +impl Visit for ParseErrorsAnalysis { + type Data = (); + + fn new(_data: Self::Data) -> Self { + Default::default() + } + + type Output = (); + fn finish(self) -> (Self::Output, Vec) { + ((), self.errors) + } + + fn enter_program_block_error(&mut self, span: &SpanWithSource) { self.push_error(BadProgramBlock, span); } @@ -59,12 +67,20 @@ struct DuplicateLabelsAnalysis { } impl DuplicateLabelsAnalysis { - fn new() -> Self { +} + +impl Visit for DuplicateLabelsAnalysis { + type Data = (); + + fn new(_data: ()) -> Self { Default::default() } -} -impl MutVisitor for DuplicateLabelsAnalysis { + type Output = (); + fn finish(self) -> (Self::Output, Vec) { + ((), self.errors) + } + fn exit_file(&mut self, _file: &File, _span: &SpanWithSource) { let DuplicateLabelsAnalysis { errors, labels } = self; labels.iter() @@ -132,16 +148,22 @@ struct SymbolTableAnalysis { symbol_table: SymbolTable, } -impl SymbolTableAnalysis { - fn new() -> Self { +const ORIG_ERROR_STARTING_ADDRESS_ESTIMATE: RoughAddr = 0x3000; +const INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE: RoughAddr = 1; + +impl Visit for SymbolTableAnalysis { + type Data = (); + + fn new(_data: ()) -> Self { Default::default() } -} -const ORIG_ERROR_STARTING_ADDRESS_ESTIMATE: RoughAddr = 0x3000; -const INSTRUCTION_ERROR_ADDRESSES_OCCUPIED_ESTIMATE: RoughAddr = 1; + type Output = SymbolTable; + fn finish(self) -> (Self::Output, Vec) { + (self.symbol_table, vec![]) + } + -impl MutVisitor for SymbolTableAnalysis { fn enter_label(&mut self, label: &String, _span: &SpanWithSource, location: &LocationCounter) { self.symbol_table.entry(label.clone()) .and_modify(|e| *e = Err(InvalidSymbolError::Duplicated)) @@ -176,14 +198,6 @@ struct LabelOffsetBoundsAnalysis<'a> { } impl<'a> LabelOffsetBoundsAnalysis<'a> { - fn new(symbol_table: &'a SymbolTable) -> Self { - Self { - errors: Default::default(), - symbol_table, - expected_label: Default::default(), - } - } - fn check_offset(&mut self, label: &String, span: &SpanWithSource, width: u8, label_addr: RoughAddr, ref_addr: RoughAddr) { match calculate_offset(ref_addr, label_addr) { Err(_) => { @@ -212,7 +226,23 @@ impl<'a> LabelOffsetBoundsAnalysis<'a> { } } -impl<'a> MutVisitor for LabelOffsetBoundsAnalysis<'a> { +impl<'a> Visit for LabelOffsetBoundsAnalysis<'a> { + type Data = &'a SymbolTable; + + fn new(symbol_table: &'a SymbolTable) -> Self { + Self { + errors: Default::default(), + symbol_table, + expected_label: Default::default(), + } + } + + type Output = (); + fn finish(self) -> (Self::Output, Vec) { + ((), self.errors) + } + + fn enter_opcode_error(&mut self, _span: &SpanWithSource, _location: &LocationCounter) { self.expected_label = None; } @@ -285,10 +315,6 @@ struct OperandTypesAnalysis { } impl OperandTypesAnalysis { - fn new() -> Self { - Default::default() - } - fn check_operands(&mut self, operands: &Vec>, span: &SpanWithSource) { if let Some(expected) = &self.expected_operands { // TODO: create longest common subsequence diff for more precise errors @@ -319,7 +345,19 @@ fn orig_expected_operands() -> Vec { vec![OperandType::signed_or_unsigned_number(16)] // TODO: Disallow signed? } -impl MutVisitor for OperandTypesAnalysis { +impl Visit for OperandTypesAnalysis { + type Data = (); + + fn new(_data: Self::Data) -> Self { + Default::default() + } + + type Output = (); + fn finish(self) -> (Self::Output, Vec) { + ((), self.errors) + } + + fn enter_orig(&mut self, orig: &Vec>, span: &SpanWithSource, _location: &LocationCounter) { self.expected_operands = Some(orig_expected_operands()); self.check_operands(orig, span); @@ -368,8 +406,8 @@ struct ObjectPlacementAnalysis { object_spans: Vec, } -impl ObjectPlacementAnalysis { - fn new() -> Self { +impl Default for ObjectPlacementAnalysis { + fn default() -> Self { Self { errors: Default::default(), last_start: ORIG_ERROR_STARTING_ADDRESS_ESTIMATE, @@ -379,7 +417,18 @@ impl ObjectPlacementAnalysis { } } -impl MutVisitor for ObjectPlacementAnalysis { +impl Visit for ObjectPlacementAnalysis { + type Data = (); + + fn new(_data: ()) -> Self { + Default::default() + } + + type Output = (); + fn finish(self) -> (Self::Output, Vec) { + ((), self.errors) + } + fn exit_file(&mut self, _file: &File, span: &SpanWithSource) { self.object_spans.sort_unstable_by_key(|span| span.span_in_memory.start); for (op1, op2) in self.object_spans.iter().tuple_windows() { @@ -441,15 +490,19 @@ impl LocationCounterState { } } -fn visit(v: &mut impl MutVisitor, file: &File, span: &SpanWithSource) { +fn visit<'a, V, D, O>(data: D, file: &File, span: &SpanWithSource) -> (O, Vec) + where V: Visit +{ + let mut v = V::new(data); v.enter_file(file, span); for block in file.blocks.iter() { - visit_program_block(v, file.id.clone(), block); + visit_program_block(&mut v, file.id.clone(), block); } v.exit_file(file, span); + v.finish() } -fn visit_program_block(v: &mut impl MutVisitor, id: SourceId, program_block: &WithErrData) { +fn visit_program_block(v: &mut impl Visit, id: SourceId, program_block: &WithErrData) { let (pb_res, span) = program_block; let span = (id.clone(), span.clone()).into(); match pb_res { @@ -470,7 +523,7 @@ fn visit_program_block(v: &mut impl MutVisitor, id: SourceId, program_block: &Wi } } -fn visit_orig(v: &mut impl MutVisitor, id: SourceId, orig: &WithErrData>>, location_counter: &mut LocationCounter) { +fn visit_orig(v: &mut impl Visit, id: SourceId, orig: &WithErrData>>, location_counter: &mut LocationCounter) { let (orig_res, span) = orig; let span = (id.clone(), span.clone()).into(); match orig_res { @@ -497,7 +550,7 @@ fn visit_orig(v: &mut impl MutVisitor, id: SourceId, orig: &WithErrData, location_counter: &mut LocationCounter) { +fn visit_instruction(v: &mut impl Visit, id: SourceId, instruction: &WithErrData, location_counter: &mut LocationCounter) { let (inst_res, span) = instruction; let span = (id.clone(), span.clone()).into(); match inst_res { @@ -527,7 +580,7 @@ fn visit_instruction(v: &mut impl MutVisitor, id: SourceId, instruction: &WithEr } } -fn visit_label(v: &mut impl MutVisitor, id: SourceId, label: &WithErrData, location_counter: &mut LocationCounter) { +fn visit_label(v: &mut impl Visit, id: SourceId, label: &WithErrData, location_counter: &mut LocationCounter) { let (label_res, span) = label; let span = (id, span.clone()).into(); match label_res { @@ -536,7 +589,7 @@ fn visit_label(v: &mut impl MutVisitor, id: SourceId, label: &WithErrData, location_counter: &mut LocationCounter) { +fn visit_opcode(v: &mut impl Visit, id: SourceId, opcode: &WithErrData, location_counter: &mut LocationCounter) { let (opcode_res, span) = opcode; let span = (id, span.clone()).into(); match opcode_res { @@ -545,7 +598,7 @@ fn visit_opcode(v: &mut impl MutVisitor, id: SourceId, opcode: &WithErrData>>, location_counter: &mut LocationCounter) { +fn visit_operands(v: &mut impl Visit, id: SourceId, operands: &WithErrData>>, location_counter: &mut LocationCounter) { let (ops_res, span) = operands; let span = (id.clone(), span.clone()).into(); match ops_res { @@ -559,7 +612,7 @@ fn visit_operands(v: &mut impl MutVisitor, id: SourceId, operands: &WithErrData< } } -fn visit_operand(v: &mut impl MutVisitor, id: SourceId, operand: &WithErrData, location_counter: &mut LocationCounter) { +fn visit_operand(v: &mut impl Visit, id: SourceId, operand: &WithErrData, location_counter: &mut LocationCounter) { let (op_res, span) = operand; let span = (id, span.clone()).into(); match op_res { @@ -568,7 +621,13 @@ fn visit_operand(v: &mut impl MutVisitor, id: SourceId, operand: &WithErrData Self; + + type Output; + fn finish(self) -> (Self::Output, Vec); + fn enter_file(&mut self, _file: &File, _span: &SpanWithSource) {} fn exit_file(&mut self, _file: &File, _span: &SpanWithSource) {} @@ -597,6 +656,193 @@ trait MutVisitor { fn enter_operand(&mut self, _operand: &Operand, _span: &SpanWithSource, _location: &LocationCounter) {} } +macro_rules! impl_visit_tuple { + () => {}; + ($head:ident $head_data:ident $head_output:ident, $($tail:ident $tail_data:ident $tail_output:ident,)*) => { + impl<$head, $head_data, $head_output, $($tail, $tail_data, $tail_output),*> Visit for ($head, $($tail),*) + where + $head: Visit, + $($tail: Visit),* + { + type Data = ($head_data, $($tail_data),*); + + fn new(($head_data, $($tail_data,)*): Self::Data) -> Self { + ( + $head::new($head_data), + $( + $tail::new($tail_data) + ),* + ) + } + + type Output = ($head_output, $($tail_output),*); + + fn finish(self) -> (Self::Output, Vec) { + let ($head, $($tail),*) = self; + let ($head_output, $head_data) = $head.finish(); + $( + let ($tail_output, $tail_data) = $tail.finish(); + )* + ( + ( + $head_output, + $($tail_output),* + ) + , + concat([ + $head_data, + $($tail_data),* + ]) + ) + } + + fn enter_file(&mut self, file: &File, span: &SpanWithSource) { + let ($head, $($tail,)*) = self; + $head.enter_file(file, span); + $( + $tail.enter_file(file, span); + )* + } + fn exit_file(&mut self, file: &File, span: &SpanWithSource) { + let ($head, $($tail,)*) = self; + $head.exit_file(file, span); + $( + $tail.exit_file(file, span); + )* + } + + fn enter_program_block_error(&mut self, span: &SpanWithSource) { + let ($head, $($tail,)*) = self; + $head.enter_program_block_error(span); + $( + $tail.enter_program_block_error(span); + )* + } + fn enter_program_block(&mut self, program_block: &ProgramBlock, span: &SpanWithSource) { + let ($head, $($tail,)*) = self; + $head.enter_program_block(program_block, span); + $( + $tail.enter_program_block(program_block, span); + )* + } + fn exit_program_block(&mut self, program_block: &ProgramBlock, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.exit_program_block(program_block, span, location); + $( + $tail.exit_program_block(program_block, span, location); + )* + } + + fn enter_orig_error(&mut self, span: &SpanWithSource) { + let ($head, $($tail,)*) = self; + $head.enter_orig_error(span); + $( + $tail.enter_orig_error(span); + )* + } + fn enter_orig(&mut self, orig: &Vec>, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_orig(orig, span, location); + $( + $tail.enter_orig(orig, span, location); + )* + } + fn exit_orig(&mut self, orig: &Vec>, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.exit_orig(orig, span, location); + $( + $tail.exit_orig(orig, span, location); + )* + } + + fn enter_instruction_error(&mut self, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_instruction_error(span, location); + $( + $tail.enter_instruction_error(span, location); + )* + } + fn enter_instruction(&mut self, instruction: &Instruction, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_instruction(instruction, span, location); + $( + $tail.enter_instruction(instruction, span, location); + )* + } + fn exit_instruction(&mut self, instruction: &Instruction, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.exit_instruction(instruction, span, location); + $( + $tail.exit_instruction(instruction, span, location); + )* + } + + fn enter_label_error(&mut self, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_label_error(span, location); + $( + $tail.enter_label_error(span, location); + )* + } + fn enter_label(&mut self, label: &String, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_label(label, span, location); + $( + $tail.enter_label(label, span, location); + )* + } + + fn enter_opcode_error(&mut self, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_opcode_error(span, location); + $( + $tail.enter_opcode_error(span, location); + )* + } + fn enter_opcode(&mut self, opcode: &Opcode, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_opcode(opcode, span, location); + $( + $tail.enter_opcode(opcode, span, location); + )* + } + + fn enter_operands_error(&mut self, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_operands_error(span, location); + $( + $tail.enter_operands_error(span, location); + )* + } + fn enter_operands(&mut self, operands: &Vec>, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_operands(operands, span, location); + $( + $tail.enter_operands(operands, span, location); + )* + } + + fn enter_operand_error(&mut self, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_operand_error(span, location); + $( + $tail.enter_operand_error(span, location); + )* + } + fn enter_operand(&mut self, operand: &Operand, span: &SpanWithSource, location: &LocationCounter) { + let ($head, $($tail,)*) = self; + $head.enter_operand(operand, span, location); + $( + $tail.enter_operand(operand, span, location); + )* + } + } + + impl_visit_tuple!($($tail $tail_data $tail_output,)*); + } +} + +impl_visit_tuple!(A DA OA, B DB OB, C DC OC, D DD OD, E DE OE,); fn analyze_lex_data(lex_data: &LexData, file_span: &SpanWithSource) -> Vec { let mut errors = Vec::new(); @@ -621,31 +867,22 @@ pub fn validate(lex_data: &LexData, file_spanned: &Spanned) -> Vec let file_span_with_source = (file.id.clone(), file_span.clone()).into(); let errors_from_lex_data = analyze_lex_data(&lex_data, &file_span_with_source); - let mut pe = ParseErrorsAnalysis::new(); - visit(&mut pe, file, &file_span_with_source); - - let mut dl = DuplicateLabelsAnalysis::new(); - visit(&mut dl, file, &file_span_with_source); - - let mut ot = OperandTypesAnalysis::new(); - visit(&mut ot, file, &file_span_with_source); - - let mut st = SymbolTableAnalysis::new(); - visit(&mut st, file, &file_span_with_source); - - let mut lob = LabelOffsetBoundsAnalysis::new(&st.symbol_table); - visit(&mut lob, file, &file_span_with_source); + let ((symbol_table, _, _, _, _), first_pass_errors) = + visit::<( + SymbolTableAnalysis, + ParseErrorsAnalysis, + DuplicateLabelsAnalysis, + OperandTypesAnalysis, + ObjectPlacementAnalysis, + ), _, _>(((), (), (), (), ()), file, &file_span_with_source); - let mut op = ObjectPlacementAnalysis::new(); - visit(&mut op, file, &file_span_with_source); + let (_, second_pass_errors) = + visit::(&symbol_table, file, &file_span_with_source); concat([ errors_from_lex_data, - pe.errors, - dl.errors, - ot.errors, - lob.errors, - op.errors, + first_pass_errors, + second_pass_errors, ]) } From f25a50ddfc26e3d86e1529e3b5fc9e58377d1b65 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Fri, 1 Jul 2022 17:59:25 -0500 Subject: [PATCH 77/82] assembler: document the analyze module --- assembler/src/analyze.rs | 56 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/assembler/src/analyze.rs b/assembler/src/analyze.rs index 4ad7f36..aae523d 100644 --- a/assembler/src/analyze.rs +++ b/assembler/src/analyze.rs @@ -1,3 +1,29 @@ +//! Functions for identifying errors in the syntax trees produced by [`parse`](crate::parse). +//! +//! This module is primarily for semantic analysis, or identifying semantic errors +//! in a syntax tree. These are errors that don't have to do with incorrect syntax; +//! for example, branching (with `BR`) to a label that isn't defined. It is correct +//! syntax to use `BR` with a label, but the label doesn't refer to any address, +//! so the assembler wouldn't have the necessary information to calculate an offset. +//! In a situation like this where the syntax is correct, but the meaning (semantics) +//! is still invalid or contradictory, it is a semantic error. +//! +//! Secondarily, this module also identifies parse (or *syntax*) errors +//! inserted into the syntax tree during parsing. When performing semantic analysis, +//! the parse errors are typically ignored, in order to identify as many independent errors +//! as possible. +//! When the parse errors affect the meaning, we attempt to work around the missing semantic +//! information by making reasonable assumptions. +//! For example, if program starts with `.ORIG xOOPS`, some analyses may assume +//! that the intent was to place the program at `x3000`, the start of user space, +//! commonly used in examples. +//! +//! In other words, our approach to semantic analysis tries to avoid identifying +//! multiple errors stemming from the same root cause, particularly when the root +//! cause is a parse error. The goal is to be clear where and why a change needs to be +//! made to make the program valid, not to show every problem that an error implies. +//! So when semantic analysis encounters a parse error, it makes whatever assumptions +//! it needs in order to treat the rest of the program as normal. use std::collections::HashMap; use std::convert::{TryFrom, TryInto}; use std::fmt::Debug; @@ -24,6 +50,7 @@ impl ParseErrorsAnalysis { } } +// TODO: use context to provide useful hints as to *why* the error occurred impl Visit for ParseErrorsAnalysis { type Data = (); @@ -36,7 +63,6 @@ impl Visit for ParseErrorsAnalysis { ((), self.errors) } - fn enter_program_block_error(&mut self, span: &SpanWithSource) { self.push_error(BadProgramBlock, span); } @@ -66,9 +92,6 @@ struct DuplicateLabelsAnalysis { labels: HashMap>, } -impl DuplicateLabelsAnalysis { -} - impl Visit for DuplicateLabelsAnalysis { type Data = (); @@ -621,6 +644,11 @@ fn visit_operand(v: &mut impl Visit, id: SourceId, operand: &WithErrData Self; @@ -656,6 +684,9 @@ trait Visit { fn enter_operand(&mut self, _operand: &Operand, _span: &SpanWithSource, _location: &LocationCounter) {} } +/// Implement [`Visit`] for tuples of [`Visit`]. +/// In general, each method is called on the elements of the tuple in sequence, +/// and if there are results, they are combined in a result tuple in the same sequence. macro_rules! impl_visit_tuple { () => {}; ($head:ident $head_data:ident $head_output:ident, $($tail:ident $tail_data:ident $tail_output:ident,)*) => { @@ -861,6 +892,23 @@ fn analyze_lex_data(lex_data: &LexData, file_span: &SpanWithSource) -> Vec) -> Vec { let (file, file_span) = file_spanned; From b77ca8acf018e8956bcdee5122c627afd4c81a1c Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 5 Jul 2022 16:46:13 -0500 Subject: [PATCH 78/82] assembler: complete docs for all public API --- assembler/src/analyze.rs | 12 ++-- assembler/src/assemble.rs | 31 ++++++++- assembler/src/error.rs | 131 ++++++++++++++++++++++++++++++++------ assembler/src/layer.rs | 15 +++++ assembler/src/link.rs | 23 ++++++- assembler/tests/integ.rs | 37 +++++++---- 6 files changed, 210 insertions(+), 39 deletions(-) diff --git a/assembler/src/analyze.rs b/assembler/src/analyze.rs index aae523d..edc6006 100644 --- a/assembler/src/analyze.rs +++ b/assembler/src/analyze.rs @@ -236,12 +236,14 @@ impl<'a> LabelOffsetBoundsAnalysis<'a> { if util::min_signed_width(offset as i32) > width { self.errors.push( Spanned(span.clone(), - LabelTooDistant { + InvalidLabelReference { label: label.clone(), - width, - est_ref_pos: ref_addr, - offset, - est_label_pos: label_addr, + reason: InvalidReferenceReason::TooDistant { + width, + est_ref_pos: ref_addr, + offset, + est_label_pos: label_addr, + } })); } } diff --git a/assembler/src/assemble.rs b/assembler/src/assemble.rs index fd5055c..834895d 100644 --- a/assembler/src/assemble.rs +++ b/assembler/src/assemble.rs @@ -1,3 +1,9 @@ +//! Functions and data structures for assembling the syntax trees produced by [`parse`](crate::parse). +//! +//! This module is for assembling: converting all possible instructions to binary machine code, +//! only leaving those which refer to external labels, which are assembled in +//! the [`link`](crate::link) step. + use std::collections::HashMap; use std::convert::{TryFrom, TryInto}; use std::num::TryFromIntError; @@ -141,12 +147,18 @@ struct FirstPassBlock { instructions: Vec, } +/// An assembled, but unlinked program. +/// +/// Every instruction comprising the Object is assembled to binary +/// unless it refers to an external label. +/// +/// Includes a symbol table of labels which other Objects can refer to. pub struct Object { pub(crate) symbol_table: SymbolTable, pub(crate) blocks: Vec, } -pub struct ObjectBlock { +pub(crate) struct ObjectBlock { pub(crate) origin: Addr, pub(crate) words: Vec, } @@ -399,11 +411,26 @@ fn second_pass(symbol_table: SymbolTable, fp_blocks: Vec) -> Res Ok(Object { symbol_table, blocks }) } -pub(crate) fn get_orig(orig_operands: WithErrData>>) -> Result { +fn get_orig(orig_operands: WithErrData>>) -> Result { let orig_operand = result(orig_operands)?.remove(0); result(orig_operand)?.try_into() } +/// Assemble the given syntax tree. +/// +/// All instructions are converted to binary machine code, +/// except those which refer to labels in other files. +/// +/// *May* return `Err` if the program is invalid, +/// but for ease of assembly, **not all errors are checked**, +/// and the `Err` will not contain information on +/// why the error occurred. **For full error checking and detailed feedback, +/// you should [`validate`](crate::analyze::validate) the input first.** +/// +/// All labels defined in the file are treated as +/// global to all program blocks in the file. +/// In a sense, all the program blocks are +/// "automatically linked." pub fn assemble(file: parse::File) -> Result { let block_data = file.blocks.into_iter() diff --git a/assembler/src/error.rs b/assembler/src/error.rs index b2ba5e5..4853a56 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -1,3 +1,5 @@ +//! Error types and associated functions. + use ariadne::{Label, Report, ReportBuilder, ReportKind}; use std::cmp::max; use lc3_isa::SignedWord; @@ -9,10 +11,14 @@ use crate::parse::Operand; use std::ops::Range; +/// This crate's primary error type. Can represent multiple errors from the entire assembly process. #[derive(Debug)] pub enum Error { + /// A single error and the ID of the source file which caused it. Single(SourceId, SingleError), + /// A single error and a span indicating the main substring of source code which caused it. Spanned(SpanWithSource, SingleError), + /// A set of errors. Multiple(Vec), } @@ -60,6 +66,14 @@ impl From<(SourceId, chumsky::error::Simple)> for Error { } impl Error { + /// Produce a set of error reports for this [`Error`], which can then be printed. + /// + /// One report will be produced for each [`SingleError`] in the [`Error`]. + /// Each report for an [`Error::Spanned`] will annotate the substring + /// which caused the error. + /// + /// To print the reports, you will need an appropriate [`ariadne::Cache`]; + /// use [`sources`](crate::sources). pub fn report(self) -> Vec> { use Error::*; match self { @@ -79,6 +93,9 @@ impl Error { } } + /// Produce a `String` containing error messages for this [`Error`]. + /// + /// To create an appropriate `cache`, use [`sources`](crate::sources). pub fn report_to_string(self, mut cache: impl ariadne::Cache) -> Result { let mut s = Vec::new(); for report in self.report() { @@ -87,6 +104,11 @@ impl Error { Ok(String::from_utf8_lossy(&s).to_string()) } + /// Return the first [`SingleError`] in this [`Error`], if it contains any, otherwise `None`. + /// + /// Can be used to present only one error in a set, + /// or to get the only error in an [`Error`] + /// that is known to only contain one. pub fn get_first_single_error(&self) -> Option<&SingleError> { use Error::*; match self { @@ -105,39 +127,101 @@ pub(crate) type RoughAddr = i32; use SingleError::*; +/// An independent error without associated location data. #[derive(Debug)] pub enum SingleError { + /// A `std::io::Error`. Io(std::io::Error), + /// An error which occurred during lexing. + /// + /// Lexing attempts to be error-tolerant, + /// successfully producing invalid tokens for invalid input, + /// so this error indicates a bug in [`lex`](crate::lex). Lex(chumsky::error::Simple), + /// An error which occurred during parsing. + /// + /// Parsing attempts to be error-tolerant, + /// successfully producing a syntax tree even for invalid input, + /// so this error indicates a bug in [`parse`](crate::parse). Parse(chumsky::error::Simple), + /// An error which occurred during assembly. + /// + /// May indicate that the input was invalid or that + /// there is a bug in [`assemble`](mod@crate::assemble). Assemble, + /// An error which occurred during linking. + /// + /// May indicate that the inputs were invalid or that + /// there is a bug in [`link`](crate::link). Link, + /// An error which occurred during layering due to invalid input. Layer, + /// More inputs were provided than could be assigned [`SourceId`](crate::SourceId)s. + /// Should never occur in reasonable use cases. TooManyInputs, + /// Source assumed to be a program block could not be parsed. BadProgramBlock, + /// Source assumed to be an instruction could not be parsed. BadInstruction, + /// Source assumed to be a label could not be parsed. BadLabel, + /// Source assumed to be an opcode could not be parsed. BadOpcode, + /// Source assumed to be a list of operands could not be parsed. BadOperands, + /// Source assumed to be an operand could not be parsed. BadOperand, - WrongNumberOfOperands { expected: usize, actual: usize }, - OperandTypeMismatch { expected: OperandType, actual: OperandType }, - DuplicateLabel { label: String, occurrences: Vec, }, - InvalidLabelReference { label: String, reason: InvalidReferenceReason }, - LabelTooDistant { label: String, width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, + /// An operand list contained the wrong number of operands for an instruction. + WrongNumberOfOperands { + /// The correct number of operands for the instruction, given the opcode. + expected: usize, + /// The number of operands found in the operand list. + actual: usize + }, + /// The wrong type of operand was given for an instruction. + OperandTypeMismatch { + /// The correct type of operand for the instruction. + expected: OperandType, + /// The given operand's type. + actual: OperandType + }, + /// The same label was defined at two or more addresses. + DuplicateLabel { + /// The label. + label: String, + /// The set of occurrences of the label in the source code. + occurrences: Vec, + }, + /// An instruction can't be assembled due to a label reference given as an operand. + InvalidLabelReference { + /// The label. + label: String, + /// The specific reason the instruction can't be assembled. + reason: InvalidReferenceReason + }, + /// Two program blocks span at least one common memory location. ProgramBlocksOverlap { placement1: ProgramBlockPlacement, placement2: ProgramBlockPlacement }, + /// The lexer produced no tokens; probably indicates no content in the source file. NoTokens, + /// The lexer produced no token for `.ORIG`; this will likely result in no valid program blocks being parsed. NoOrig, + /// The lexer produced no token for `.END`; this will likely result in no valid program blocks being parsed. NoEnd, } +/// A reason that an instruction cannot be assembled due to a label reference operand. #[derive(Debug)] pub enum InvalidReferenceReason { + /// The label is not defined in the file. Undefined, + /// The label is defined at more than one address in the file. Duplicated, + /// The label is defined at an invalid address. OutOfBounds, + /// The label is so far from the reference that the required offset would overflow the available bits. + TooDistant { width: u8, est_ref_pos: RoughAddr, est_label_pos: RoughAddr, offset: SignedWord }, } impl SingleError { @@ -167,19 +251,18 @@ impl SingleError { format!("same label used for multiple locations: {}", label), InvalidLabelReference { label, reason } => { let reason_str = match reason { - InvalidReferenceReason::Undefined => "not previously defined", - InvalidReferenceReason::Duplicated => "defined in multiple locations", - InvalidReferenceReason::OutOfBounds => "defined at invalid address", + InvalidReferenceReason::Undefined => "not previously defined".to_string(), + InvalidReferenceReason::Duplicated => "defined in multiple locations".to_string(), + InvalidReferenceReason::OutOfBounds => "defined at invalid address".to_string(), + InvalidReferenceReason::TooDistant { width, est_ref_pos, est_label_pos, offset } => + format!("label {} at {:#0label_pos_width$X} referenced at {:#0ref_pos_width$X}; too distant, cannot represent offset of {} in available bits: {}", + label, est_label_pos, est_ref_pos, offset, width, + // TODO: Rust '#X' formatter automatically fixes width to multiple of 4... find or implement workaround to control sign-extension; for example, for 9-bit signed offsets, we would want to display 0x2FF, not 0xFEFF. Showing as decimal for now. + label_pos_width = max(4, min_signed_hex_digits_required(*est_ref_pos) as usize), + ref_pos_width = max(4, min_signed_hex_digits_required(*est_label_pos) as usize),) }; format!("reference to label {} invalid: {}", label, reason_str) } - LabelTooDistant { label, width, est_ref_pos, est_label_pos, offset } => { - format!("label {} at {:#0label_pos_width$X} referenced at {:#0ref_pos_width$X}; too distant, cannot represent offset of {} in available bits: {}", - label, est_label_pos, est_ref_pos, offset, width, - // TODO: Rust '#X' formatter automatically fixes width to multiple of 4... find or implement workaround to control sign-extension; for example, for 9-bit signed offsets, we would want to display 0x2FF, not 0xFEFF. Showing as decimal for now. - label_pos_width = max(4, min_signed_hex_digits_required(*est_ref_pos) as usize), - ref_pos_width = max(4, min_signed_hex_digits_required(*est_label_pos) as usize),) - } ProgramBlocksOverlap { placement1, placement2 } => { format!("program block {} in file occupying [{:#0o1s_width$X}, {:#0o1e_width$X}) overlaps program block {} occupying [{:#0o2s_width$X}, {:#0o2e_width$X})", placement1.position_in_file, @@ -250,13 +333,24 @@ fn report_single(id: SourceId, span: Option, error: SingleError) -> Report } +/// A type of operand, including number width constraints. #[derive(Clone, Debug)] pub enum OperandType { + /// A register reference. Register, + /// An unqualified number for use with `.BLKW`. UnqualifiedNumber, + /// A number with a specific sign and width. Number { signed: bool, width: u8 }, + /// A string of characters. String, + /// A label reference. Label, + /// A type of operand that includes multiple other types. + /// An operand of this type can be either of the contained types. + /// + /// Used for operands like PC offsets; the type of a PC offset + /// is a label OR a signed number. Or(Box, Box) } @@ -398,10 +492,11 @@ impl OperandType { } } +/// Data indicating the source string indices and memory addresses spanned by a program block. #[derive(Clone, Debug)] pub struct ProgramBlockPlacement { - pub(crate) position_in_file: usize, - pub(crate) span_in_file: SpanWithSource, - pub(crate) span_in_memory: Range, + pub position_in_file: usize, + pub span_in_file: SpanWithSource, + pub span_in_memory: Range, } diff --git a/assembler/src/layer.rs b/assembler/src/layer.rs index 6ad962d..e0e6675 100644 --- a/assembler/src/layer.rs +++ b/assembler/src/layer.rs @@ -1,3 +1,5 @@ +//! Functions for combining blocks of LC-3 memory into an executable image. + use lc3_isa::util::MemoryDump; use lc3_isa::{ADDR_SPACE_SIZE_IN_WORDS, Word}; use crate::error::SingleError; @@ -12,6 +14,19 @@ fn layer_block(image: &mut [Word; ADDR_SPACE_SIZE_IN_WORDS], block: Block) { } } +/// Combine the given blocks of memory into an executable image by placing them in memory in the given order. +/// +/// Creating the memory image starts with all memory initialized to `0x0000`, +/// or if `layer_onto_os` is `true`, memory is initialized with the +/// [UTP LC-3 OS](https://github.com/ut-utp/core/tree/master/os). The +/// OS must be included for the UTP emulator to successfully load and execute the program. +/// +/// After the image is initialized, each given [`Block`](crate::link::Block) is +/// inserted into memory at its target address, in the given order. +/// No regard is given to addresses that have already been initialized; +/// each block will be layered on top of the image and overwrite its target addresses. +/// If two words in different blocks occupy the same memory location, +/// that location will contain the the second block's word in the end. pub fn layer(blocks: impl IntoIterator, layer_onto_os: bool) -> Result { let blocks = blocks.into_iter().collect::>(); diff --git a/assembler/src/link.rs b/assembler/src/link.rs index 8c2728c..b553d2f 100644 --- a/assembler/src/link.rs +++ b/assembler/src/link.rs @@ -1,11 +1,25 @@ +//! Functions and data structures for linking [`Object`](crate::assemble::Object)s +//! produced by [initial assembly](crate::assemble::assemble). +//! +//! Linking is the process of assembling instructions in an object which +//! refer to labels in other objects. When writing an LC-3 program, this +//! allows referencing code which *other* programmers have assembled and +//! distributed as objects. +//! +//! This module assumes that all objects share a global namespace for labels. +//! **Linking two or more objects which each define the same label will result in undefined behavior.** + use std::collections::HashMap; use lc3_isa::{Addr, Word}; use crate::assemble::{assemble_instruction, AssemblyResult, Object, ObjectWord, ObjectBlock, SymbolTable}; use crate::error::SingleError; +/// A block of LC-3 words and a target starting memory address. +/// +/// `origin` is the intended address of the first word in `words` when loading the block. pub struct Block { - pub(crate) origin: Addr, - pub(crate) words: Vec, + pub origin: Addr, + pub words: Vec, } fn link_object_block(symbol_table: &SymbolTable, block: ObjectBlock) -> Result { @@ -49,6 +63,11 @@ pub(crate) fn link_object_blocks(symbol_table: &SymbolTable, blocks: Vec) -> Result, SingleError> { let objects = objects.into_iter().collect::>(); diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index e101169..2863116 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -442,11 +442,14 @@ mod error { .BLKW 255\n\ LABEL .FILL 0x1234\n\ .END" - => SingleError::LabelTooDistant { - est_ref_pos: 0x3000, - est_label_pos: 0x3101, - offset: 0b1_0000_0000, - width: 9, + => SingleError::InvalidLabelReference { + reason: InvalidReferenceReason::TooDistant { + est_ref_pos: 0x3000, + est_label_pos: 0x3101, + offset: 0b1_0000_0000, + width: 9, + .. + }, .. }, label_too_distant_negative: @@ -456,11 +459,15 @@ mod error { .BLKW 255\n\ LEA R0, LABEL\n\ .END" - => SingleError::LabelTooDistant { - est_ref_pos: 0x3101, - est_label_pos: 0x3001, - offset: -0b1_0000_0001, - width: 9, + => + SingleError::InvalidLabelReference { + reason: InvalidReferenceReason::TooDistant { + est_ref_pos: 0x3101, + est_label_pos: 0x3001, + offset: -0b1_0000_0001, + width: 9, + .. + }, .. }, } @@ -553,8 +560,14 @@ mod error { SingleError::WrongNumberOfOperands { expected: 3, actual: 1 }, SingleError::ProgramBlocksOverlap { .. }, SingleError::OperandTypeMismatch { .. }, - SingleError::InvalidLabelReference { .. }, - SingleError::LabelTooDistant { .. }, + SingleError::InvalidLabelReference { + reason: InvalidReferenceReason::Duplicated, + .. + }, + SingleError::InvalidLabelReference { + reason: InvalidReferenceReason::TooDistant { .. }, + .. + }, }, } From a897466ffbed1a42b65e4a08ca6949b94660662d Mon Sep 17 00:00:00 2001 From: David Gipson Date: Tue, 5 Jul 2022 17:17:33 -0500 Subject: [PATCH 79/82] assembler: document how to use analyze::Visit --- assembler/docs/analyze.md | 25 +++++++++++++++++++++++++ assembler/src/analyze.rs | 1 + 2 files changed, 26 insertions(+) create mode 100644 assembler/docs/analyze.md diff --git a/assembler/docs/analyze.md b/assembler/docs/analyze.md new file mode 100644 index 0000000..1e6f020 --- /dev/null +++ b/assembler/docs/analyze.md @@ -0,0 +1,25 @@ +# `analyze` + +## How to add a new analysis pass + +In `analyze`, different types of errors are detected by different +syntax tree [visitors](https://en.wikipedia.org/wiki/Visitor_pattern). +To analyze the syntax tree for a new type of error: + +1. Implement `Visit` (we'll call the implementor `FooAnalysis`). + 1. Set `Visit::Data` to any data it needs at construct time. + 2. Set `Visit::Output` to any data it outputs besides errors. For example, + `SymbolTableAnalysis` outputs a (estimated) symbol table. + 3. The `visit` function depth-first traverses the syntax tree, + calling the "enter" method as it first reaches each node, then calling the + "exit" method on each node after all of its children have been visited. + Override any of these methods to get the data needed for the error analysis. +2. `Visit` is implemented for small tuples of `Visit`. In `validate`, + add `FooAnalysis` to the tuple type in a call to `visit`, or if it has + data dependencies, add a new call to visit and pass the data to `visit`. + - If necessary, extend the call to the `impl_visit_tuple` macro + to increase the maximum length of tuple `Visit` is implemented for. + - `Visit::Data` for tuples is just a tuple of the component `Visit`s' `Visit::Data`, + in the same order as the `Visit`s. The same is true for `Visit::Output`. +3. (If a new call to `visit` was added:) Add the error vector output by `visit` to the + return value of `validate`. \ No newline at end of file diff --git a/assembler/src/analyze.rs b/assembler/src/analyze.rs index edc6006..d825e1f 100644 --- a/assembler/src/analyze.rs +++ b/assembler/src/analyze.rs @@ -917,6 +917,7 @@ pub fn validate(lex_data: &LexData, file_spanned: &Spanned) -> Vec let file_span_with_source = (file.id.clone(), file_span.clone()).into(); let errors_from_lex_data = analyze_lex_data(&lex_data, &file_span_with_source); + // For instructions on how to add a new analysis pass, see `/assembler/docs/analyze.md`. let ((symbol_table, _, _, _, _), first_pass_errors) = visit::<( SymbolTableAnalysis, From ef1fee1668087d929ba3673cf1bbca683947f0d1 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 6 Jul 2022 00:09:23 -0500 Subject: [PATCH 80/82] assembler: if strict, require BR cond codes in nzp order --- assembler/src/lex.rs | 59 +++++++++++++++++++++++++---------- assembler/tests/integ.rs | 67 ++++++++++++++++++++++++++++++++-------- 2 files changed, 97 insertions(+), 29 deletions(-) diff --git a/assembler/src/lex.rs b/assembler/src/lex.rs index f47fd30..3aa1273 100644 --- a/assembler/src/lex.rs +++ b/assembler/src/lex.rs @@ -322,6 +322,48 @@ fn comment() -> impl Parser> { .to(Token::Comment) } +fn branch_opcode(leniency: LeniencyLevel) -> impl Parser> { + let br = just::("BR"); + let res: Box>> = + match leniency { + LeniencyLevel::Lenient => Box::new( + br + .ignore_then(one_of("NZP").repeated().at_most(3)) + .map::(|cond_code_chars| { + let cond_codes = + if cond_code_chars.is_empty() { + ConditionCodes { n: true, z: true, p: true } + } else { + let n = cond_code_chars.contains(&'N'); + let z = cond_code_chars.contains(&'Z'); + let p = cond_code_chars.contains(&'P'); + ConditionCodes { n, z, p } + }; + Opcode::Br(cond_codes) + }), + ), + LeniencyLevel::Strict => Box::new( + br + .ignore_then(just("N").or_not()) + .then(just("Z").or_not()) + .then(just("P").or_not()) + .map::(|((n, z), p)| { + let cond_codes = + if n.is_none() && z.is_none() && p.is_none() { + ConditionCodes { n: true, z: true, p: true } + } else { + let n = n.is_some(); + let z = z.is_some(); + let p = p.is_some(); + ConditionCodes { n, z, p } + }; + Opcode::Br(cond_codes) + }), + ) + }; + res +} + fn tokens(leniency: LeniencyLevel) -> impl Parser>, Error=Simple> { let newline = text::newline() .to(Token::Newline); @@ -337,21 +379,6 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err .or(end().ignored()); use Opcode::*; - let branch_opcode = - just("BR") - .ignore_then(one_of("NZP").repeated().at_most(3)) - .map::(|cond_code_chars| { - let cond_codes = - if cond_code_chars.is_empty() { - ConditionCodes { n: true, z: true, p: true } - } else { - let n = cond_code_chars.contains(&'N'); - let z = cond_code_chars.contains(&'Z'); - let p = cond_code_chars.contains(&'P'); - ConditionCodes { n, z, p } - }; - Br(cond_codes) - }); // These options are separated by `or` instead of all belonging // to one tuple passed to `choice` because `choice` only supports @@ -360,7 +387,7 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err let opcode = choice(( one_opcode("ADD", Add), one_opcode("AND", And), - branch_opcode, + branch_opcode(leniency), one_opcode("JMP", Jmp), one_opcode("JSRR", Jsrr), one_opcode("JSR", Jsr), diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 2863116..d15f622 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -20,7 +20,8 @@ fn load_store_medium() { 0xBDFF, 0x7E3E, 0xF025, - ] + ], + LeniencyLevel::Lenient ); } @@ -28,8 +29,8 @@ fn load_store_medium() { mod single_instruction { use super::*; - fn single_instruction_test(input: &str, expected: Word) { - multiple_output_test(input, &[expected]); + fn single_instruction_test(input: &str, expected: Word, leniency: LeniencyLevel) { + multiple_output_test(input, &[expected], leniency); } macro_rules! tests { @@ -45,7 +46,24 @@ mod single_instruction { $( #[test] fn $test_name() { - single_instruction_test($instruction, $expected); + single_instruction_test($instruction, $expected, LeniencyLevel::Lenient); + } + )+ + } + }; + ($tests_name:ident (Strict) + $( + $test_name:ident: $instruction:expr => $expected:expr + ),+ + $(,)* + ) => { + mod $tests_name { + use super::*; + + $( + #[test] + fn $test_name() { + single_instruction_test($instruction, $expected, LeniencyLevel::Strict); } )+ } @@ -111,12 +129,12 @@ mod single_instruction { #[test] fn rti() { - single_instruction_test("RTI", 0x8000); + single_instruction_test("RTI", 0x8000, LeniencyLevel::Lenient); } #[test] fn ret() { - single_instruction_test("RET", 0xC1C0); + single_instruction_test("RET", 0xC1C0, LeniencyLevel::Lenient); } tests! { ldr @@ -159,6 +177,21 @@ mod single_instruction { } tests! { br + minimal: "BR #0" => 0x0E00, + n: "BRn #0" => 0x0800, + z: "BRz #0" => 0x0400, + p: "BRp #0" => 0x0200, + nz: "BRnz #0" => 0x0C00, + np: "BRnp #0" => 0x0A00, + zp: "BRzp #0" => 0x0600, + nzp: "BRnzp #0" => 0x0E00, + neg_imm: "BRnzp #-1" => 0x0FFF, + pos_imm: "BRnzp #1" => 0x0E01, + lenient: "BRpnz #1" => 0x0E01, + max_imm: "BRn #255" => 0x08FF, + min_imm: "BRz #-256" => 0x0500, + } + tests! { br_strict (Strict) minimal: "BR #0" => 0x0E00, n: "BRn #0" => 0x0800, z: "BRz #0" => 0x0400, @@ -186,7 +219,7 @@ mod single_instruction { $( #[test] fn $test_name() { - multiple_output_test($instruction, $expected); + multiple_output_test($instruction, $expected, LeniencyLevel::Lenient); } )+ } @@ -291,14 +324,14 @@ mod single_instruction { } } -fn multiple_output_test(input: &str, expected: &[Word]) { +fn multiple_output_test(input: &str, expected: &[Word], leniency: LeniencyLevel) { let input = format!(".ORIG x3000\n{}\n.END", input); - test(input.as_str(), 0x3000, expected); + test(input.as_str(), 0x3000, expected, leniency); } -fn test(input: &str, orig: usize, expected_mem: &[Word]) { +fn test(input: &str, orig: usize, expected_mem: &[Word], leniency: LeniencyLevel) { let src = input.to_string(); - let mem = assemble(&"".to_string(), &src, LeniencyLevel::Lenient, true).unwrap(); + let mem = assemble(&"".to_string(), &src, leniency, true).unwrap(); for i in 0..orig { assert_mem(&mem, i, 0x0000); @@ -326,7 +359,7 @@ mod error { macro_rules! single_error_tests { ($tests_name:ident $( - $test_name:ident: $source:expr => $expected:pat + $test_name:ident $(($leniency:ident))?: $source:expr => $expected:pat ),+ $(,)* ) => { @@ -337,7 +370,9 @@ mod error { #[test] fn $test_name() { let src = $source.to_string(); - match parse_and_analyze(&"".to_string(), &src, LeniencyLevel::Lenient) { + let mut leniency = LeniencyLevel::Lenient; + $(leniency = LeniencyLevel::$leniency;)? + match parse_and_analyze(&"".to_string(), &src, leniency) { Err(error) => { match error { Error::Multiple(errors) => { @@ -374,6 +409,12 @@ mod error { #OOPS\n\ .END" => SingleError::BadInstruction, + strict_br_nzp_out_of_order (Strict): + ".ORIG x3000\n\ + LOOP ADD R0, R0, R0\n\ + BRpnz LOOP\n\ + .END" + => SingleError::BadInstruction, // Doesn't have to be this error, specifically. bad_label: ".ORIG x3000\n\ #OOPS ADD R0, R0, R0\n\ From b3dd742a1b46c8fcfd9bede2c572f63dda51af29 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 6 Jul 2022 14:07:32 -0500 Subject: [PATCH 81/82] assembler: disallow hanging labels in strict mode --- assembler/bin/as.rs | 1 - assembler/src/parse.rs | 12 ++++++++++-- assembler/tests/integ.rs | 8 +++++++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 7371567..9a925ae 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -30,7 +30,6 @@ struct Args { /// /// By default, the assembler is lenient about restrictions such as label length. /// This option enforces restrictions specified in Patt and Patel's Introduction to Computing Systems, 3rd edition. - // TODO: provide full list of restrictions #[clap(long, short)] strict: bool, diff --git a/assembler/src/parse.rs b/assembler/src/parse.rs index 01763e9..4045ed3 100644 --- a/assembler/src/parse.rs +++ b/assembler/src/parse.rs @@ -260,8 +260,16 @@ fn instruction(leniency: LeniencyLevel) -> impl Parser>, Error=Simple>> = + match leniency { + LeniencyLevel::Lenient => + Box::new(label.or_not() + .then_ignore(comments_and_newlines().or_not())), + LeniencyLevel::Strict => + Box::new(label.or_not()), + }; + + label_and_separator .then(opcode) .then(operands(leniency)) .then_ignore(terminator.rewind()) diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index d15f622..3512d17 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -80,6 +80,7 @@ mod single_instruction { tests! { labels minimal: "A ADD R0 R0 R0" => 0x1000, + on_separate_line: "A\n ADD R0 R0 R0" => 0x1000, begins_with_opcode: "ADDER ADD R0 R0 R0" => 0x1000, begins_with_trap: "INIT ADD R0 R0 R0" => 0x1000, } @@ -350,7 +351,6 @@ fn assert_mem(mem: &MemoryDump, location: usize, expected: Word) { } - mod error { use assert_matches::assert_matches; use lc3_assembler::error::{InvalidReferenceReason, OperandType, SingleError}; @@ -415,6 +415,12 @@ mod error { BRpnz LOOP\n\ .END" => SingleError::BadInstruction, // Doesn't have to be this error, specifically. + strict_label_on_separate_line (Strict): + ".ORIG x3000\n\ + LABEL + ADD R0, R0, R0\n\ + .END" + => SingleError::BadInstruction, // Doesn't have to be this error, specifically. bad_label: ".ORIG x3000\n\ #OOPS ADD R0, R0, R0\n\ From 29749ec026a5298b4a4054545bc60ed4dbac7037 Mon Sep 17 00:00:00 2001 From: David Gipson Date: Wed, 6 Jul 2022 16:14:22 -0500 Subject: [PATCH 82/82] assembler: add strict label restrictions, document all restrictions --- assembler/bin/as.rs | 16 +++++++++---- assembler/src/analyze.rs | 51 +++++++++++++++++++++++++++++++++++++--- assembler/src/error.rs | 28 ++++++++++++++++++++++ assembler/src/lex.rs | 2 +- assembler/src/lib.rs | 17 ++++++++++---- assembler/tests/integ.rs | 35 ++++++++++++++++++++++++--- 6 files changed, 133 insertions(+), 16 deletions(-) diff --git a/assembler/bin/as.rs b/assembler/bin/as.rs index 9a925ae..a19f6e4 100644 --- a/assembler/bin/as.rs +++ b/assembler/bin/as.rs @@ -16,9 +16,7 @@ const MEM_DUMP_FILE_EXTENSION: &'static str = "mem"; #[derive(Parser)] #[clap(author, version, about, - long_about = "Analyzes, assembles, and/or links LC-3 assembly and object files. \ - Each given assembly file is assembled to a single object file, \ - then all assembled or given object files are linked into a single executable image \ + long_about = "Analyzes, and/or assembles an LC-3 assembly file into an executable image of LC-3 machine code." )] struct Args { @@ -30,7 +28,17 @@ struct Args { /// /// By default, the assembler is lenient about restrictions such as label length. /// This option enforces restrictions specified in Patt and Patel's Introduction to Computing Systems, 3rd edition. - #[clap(long, short)] + /// + /// These include: + /// - Labels cannot contain underscores + /// - Labels cannot exceed 20 characters in length + /// - Labels must be defined on the same line as an instruction, not separately on a previous line + /// - Qualified number literals cannot be prefixed with `0` (i.e., `0x3000` is not allowed, only `x3000`) + /// - Operands must be separated with commas (`,`), not just whitespace. + /// - Condition codes for BR instructions *must* be listed in the following order: `n`, `z`, then `p`. + // NOTE TO DEVS (THIS SHOULD NOT BE IN THE DOCS): + // When updating this list, remember to update the library's list in the docs for LeniencyLevel. + #[clap(long, short, verbatim_doc_comment)] strict: bool, /// Check the correctness of the program without assembling diff --git a/assembler/src/analyze.rs b/assembler/src/analyze.rs index d825e1f..f6c5133 100644 --- a/assembler/src/analyze.rs +++ b/assembler/src/analyze.rs @@ -32,9 +32,9 @@ use itertools::{concat, Itertools, zip}; use lc3_isa::{Addr, Word}; use crate::lex::{LexData, Opcode}; use crate::parse::{File, Instruction, Operand, ProgramBlock}; -use crate::{get, get_result, SourceId, Spanned, SpanWithSource, util, WithErrData}; +use crate::{get, get_result, LeniencyLevel, SourceId, Spanned, SpanWithSource, util, WithErrData}; use crate::assemble::calculate_offset; -use crate::error::{Error, InvalidReferenceReason, OperandType, ProgramBlockPlacement, RoughAddr, SingleError}; +use crate::error::{Error, InvalidReferenceReason, OperandType, ProgramBlockPlacement, RoughAddr, SingleError, StrictlyInvalidLabelReason}; use crate::error::OperandType::*; use crate::error::Error::*; use crate::error::SingleError::*; @@ -424,6 +424,42 @@ impl Visit for OperandTypesAnalysis { } +#[derive(Default)] +struct StrictLabelAnalysis { + errors: Vec, +} + +fn validate_strict_label(label: &String) -> Option { + let contains_underscores = label.contains('_'); + let too_long = label.len() > 20; + + match (contains_underscores, too_long) { + (false, false) => None, + (true, false) => Some(StrictlyInvalidLabelReason::ContainsUnderscores), + (false, true) => Some(StrictlyInvalidLabelReason::TooLong), + (true, true) => Some(StrictlyInvalidLabelReason::ContainsUnderscoresAndTooLong), + } +} + +impl Visit for StrictLabelAnalysis { + type Data = (); + fn new(_data: Self::Data) -> Self { Default::default() } + type Output = (); + fn finish(self) -> (Self::Output, Vec) { ((), self.errors) } + + fn enter_label(&mut self, label: &String, span: &SpanWithSource, _location: &LocationCounter) { + if let Some(error_reason) = validate_strict_label(label) { + self.errors.push( + Error::Spanned(span.clone(), + StrictlyInvalidLabel { + label: label.clone(), + reason: error_reason + })); + } + } +} + + struct ObjectPlacementAnalysis { errors: Vec, last_start: RoughAddr, @@ -911,7 +947,7 @@ fn analyze_lex_data(lex_data: &LexData, file_span: &SpanWithSource) -> Vec) -> Vec { +pub fn validate(lex_data: &LexData, file_spanned: &Spanned, leniency: LeniencyLevel) -> Vec { let (file, file_span) = file_spanned; let file_span_with_source = (file.id.clone(), file_span.clone()).into(); @@ -930,10 +966,19 @@ pub fn validate(lex_data: &LexData, file_spanned: &Spanned) -> Vec let (_, second_pass_errors) = visit::(&symbol_table, file, &file_span_with_source); + let strict_errors = + if let LeniencyLevel::Strict = leniency { + let (_, errors) = visit::((), file, &file_span_with_source); + errors + } else { + vec![] + }; + concat([ errors_from_lex_data, first_pass_errors, second_pass_errors, + strict_errors ]) } diff --git a/assembler/src/error.rs b/assembler/src/error.rs index 4853a56..2d8272a 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -201,6 +201,13 @@ pub enum SingleError { /// The specific reason the instruction can't be assembled. reason: InvalidReferenceReason }, + /// A label does not follow the strict LC-3 requirements. + StrictlyInvalidLabel { + /// The label. + label: String, + /// The specific reason the label doesn't meet strict LC-3 requirements. + reason: StrictlyInvalidLabelReason + }, /// Two program blocks span at least one common memory location. ProgramBlocksOverlap { placement1: ProgramBlockPlacement, placement2: ProgramBlockPlacement }, /// The lexer produced no tokens; probably indicates no content in the source file. @@ -211,6 +218,17 @@ pub enum SingleError { NoEnd, } +/// A reason that a label doesn't meet strict LC-3 requirements. +#[derive(Debug)] +pub enum StrictlyInvalidLabelReason { + /// The label contains underscores. + ContainsUnderscores, + /// The label is over 20 characters. + TooLong, + /// The label is over 20 characters and contains underscores. + ContainsUnderscoresAndTooLong, +} + /// A reason that an instruction cannot be assembled due to a label reference operand. #[derive(Debug)] pub enum InvalidReferenceReason { @@ -287,6 +305,16 @@ impl SingleError { Link => "unexpected link error".to_string(), Layer => "unexpected layering error".to_string(), TooManyInputs => "too many input files provided".to_string(), + StrictlyInvalidLabel { label, reason } => { + use StrictlyInvalidLabelReason::*; + let reason_str = + match reason { + ContainsUnderscores => "contains underscores", + TooLong => "over 20 characters long", + ContainsUnderscoresAndTooLong => "contains underscores and over 20 characters long" + }; + format!("label {} invalid: {}", label, reason_str) + } } } } diff --git a/assembler/src/lex.rs b/assembler/src/lex.rs index 3aa1273..ff52f62 100644 --- a/assembler/src/lex.rs +++ b/assembler/src/lex.rs @@ -457,7 +457,7 @@ fn tokens(leniency: LeniencyLevel) -> impl Parser>, Err let label = text::ident() // C-style identifier. Follows all LC-3 label rules but allows arbitrary length and underscores. .then_ignore(terminator.rewind()) - .map(Token::Label); // TODO: validate length, underscores in strict mode + .map(Token::Label); let token = choice(( opcode, diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 219b33d..bdfcf5b 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -88,15 +88,22 @@ impl ariadne::Span for SpanWithSource { /// For example, labels officially cannot exceed 20 characters. /// To enforce these rules, use [`LeniencyLevel::Strict`]. /// -/// [`LeniencyLevel::Lenient`] allows the following: -/// (TODO) +/// [`LeniencyLevel::Strict`] enforces the following: +/// - Labels cannot contain underscores +/// - Labels cannot exceed 20 characters in length +/// - Labels must be defined on the same line as an instruction, not separately on a previous line +/// - Qualified number literals cannot be prefixed with `0` (i.e., `0x3000` is not allowed, only `x3000`) +/// - Operands must be separated with commas (`,`), not just whitespace. +/// - Condition codes for BR instructions *must* be listed in the following order: `n`, `z`, then `p`. +// NOTE TO DEVS (THIS SHOULD NOT BE IN THE DOCS): +// When updating this list, remember to update the command line app's list. #[derive(Copy, Clone)] pub enum LeniencyLevel { - /// Indicates that all convenience features (described under [`LeniencyLevel`]) are to be allowed. + /// Indicates that all convenience features are to be allowed. Lenient, /// Indicates that all official rules of the LC-3 assembly language - /// are to be followed, as described in *Introduction to Computing Systems: from Bits & Gates to C/C++ & Beyond*, + /// are to be followed, as described in *Introduction to Computing Systems: from Bits & Gates to C/C++ & Beyond (3rd ed.)*, /// by Patt and Patel. Strict } @@ -213,7 +220,7 @@ pub fn parse_and_analyze_file(input: &PathBuf, leniency: LeniencyLevel) -> Resul pub fn parse_and_analyze(id: &SourceId, src: &String, leniency: LeniencyLevel) -> Result { let (tokens, lex_data) = lex::lex(src, leniency).map_err(|es| error::into_multiple(id.clone(), es))?; let file_spanned = parse::parse(id.clone(), src, tokens, leniency).map_err(|es| error::into_multiple(id.clone(), es))?; - let errors = analyze::validate(&lex_data, &file_spanned); + let errors = analyze::validate(&lex_data, &file_spanned, leniency); if !errors.is_empty() { return Err(errors.into()); } diff --git a/assembler/tests/integ.rs b/assembler/tests/integ.rs index 3512d17..be1aba7 100644 --- a/assembler/tests/integ.rs +++ b/assembler/tests/integ.rs @@ -85,6 +85,12 @@ mod single_instruction { begins_with_trap: "INIT ADD R0 R0 R0" => 0x1000, } + tests! { labels_strict (Strict) + minimal: "A ADD R0, R0, R0" => 0x1000, + begins_with_opcode: "ADDER ADD R0, R0, R0" => 0x1000, + begins_with_trap: "INIT ADD R0, R0, R0" => 0x1000, + } + tests! { add minimal: "ADD R0 R0 R0" => 0x1000, r1_2_3: "ADD R1 R2 R3" => 0x1283, @@ -353,7 +359,7 @@ fn assert_mem(mem: &MemoryDump, location: usize, expected: Word) { mod error { use assert_matches::assert_matches; - use lc3_assembler::error::{InvalidReferenceReason, OperandType, SingleError}; + use lc3_assembler::error::{StrictlyInvalidLabelReason, InvalidReferenceReason, OperandType, SingleError}; use super::*; macro_rules! single_error_tests { @@ -506,8 +512,7 @@ mod error { .BLKW 255\n\ LEA R0, LABEL\n\ .END" - => - SingleError::InvalidLabelReference { + => SingleError::InvalidLabelReference { reason: InvalidReferenceReason::TooDistant { est_ref_pos: 0x3101, est_label_pos: 0x3001, @@ -517,6 +522,30 @@ mod error { }, .. }, + label_contains_underscores (Strict): + ".ORIG x3000\n\ + OH_NO HALT\n\ + .END" + => SingleError::StrictlyInvalidLabel { + reason: StrictlyInvalidLabelReason::ContainsUnderscores, + .. + }, + label_too_long (Strict): + ".ORIG x3000\n\ + REALLYLONGLABEL0123456789 HALT\n\ + .END" + => SingleError::StrictlyInvalidLabel { + reason: StrictlyInvalidLabelReason::TooLong, + .. + }, + label_too_long_and_contains_underscores (Strict): + ".ORIG x3000\n\ + REALLYLONGLABEL_0123456789 HALT\n\ + .END" + => SingleError::StrictlyInvalidLabel { + reason: StrictlyInvalidLabelReason::ContainsUnderscoresAndTooLong, + .. + } } macro_rules! contains_error {