From 5cc9e670d90f0721cbeda3b7e8ecb833e77618f1 Mon Sep 17 00:00:00 2001 From: Yikai Zhao Date: Fri, 29 Dec 2023 00:10:07 +0800 Subject: [PATCH] init bytecode working version --- .gitignore | 1 + Cargo.lock | 102 ++++++++++++++++++++++++ Cargo.toml | 11 +++ src/bytecode.rs | 201 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 4 + src/main.rs | 9 +++ 6 files changed, 328 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/bytecode.rs create mode 100644 src/lib.rs create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..c764ca7 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,102 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anyhow" +version = "1.0.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9d19de80eff169429ac1e9f48fffb163916b448a44e8e046186232046d9e1f9" + +[[package]] +name = "emacs-lsp-booster" +version = "0.1.0" +dependencies = [ + "anyhow", + "lazy_static", + "serde_json", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "proc-macro2" +version = "1.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "serde" +version = "1.0.193" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.193" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..eef7398 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "emacs-lsp-booster" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +serde_json = "1.0" +anyhow = "1.0" +lazy_static = "1.4" diff --git a/src/bytecode.rs b/src/bytecode.rs new file mode 100644 index 0000000..cbe3161 --- /dev/null +++ b/src/bytecode.rs @@ -0,0 +1,201 @@ +use std::collections::BTreeMap; + +use serde_json as json; + + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +enum LispObject { + Symbol(String), + Keyword(String), + Str(String), + Int(i64), + Float(String), // use string for Eq and Ord + Nil, + T, +} + +impl LispObject { + fn to_repl(&self) -> String { + match self { + LispObject::Symbol(s) => s.clone(), + LispObject::Keyword(s) => format!(":{}", s), + // TODO: properly quote + LispObject::Str(s) => format!("\"{}\"", s.replace("\"", "\\\"").replace("\\", "\\\\")), + LispObject::Int(i) => i.to_string(), + LispObject::Float(s) => s.clone(), + LispObject::Nil => "nil".into(), + LispObject::T => "t".into(), + } + } +} + + +// Only for generating json. Sequential execution only. +struct BytecodeBuilder { + code: Vec, + constants: BTreeMap, + + current_stack_size: i32, + max_stack_size: i32, +} + +lazy_static! { + static ref CONSTANT_FUNC_MAKE_VECTOR: LispObject = LispObject::Symbol("make-vector".to_string()); +} + +impl BytecodeBuilder { + fn create_or_get_constant(&mut self, obj: LispObject) -> u32 { + if let Some(&idx) = self.constants.get(&obj) { + return idx; + } + let next_id = self.constants.len() as u32; + self.constants.insert(obj, next_id); + return next_id; + } + + fn add_opcode(&mut self, opcode: [u8; N], stack_delta: i32) { + assert!(self.current_stack_size + stack_delta >= 0); + self.current_stack_size += stack_delta; + self.max_stack_size = self.max_stack_size.max(self.current_stack_size); + self.code.extend(&opcode); + } + + fn add_opcode_constant(&mut self, obj: LispObject) { + let idx = self.create_or_get_constant(obj); + if idx < 64 { + self.add_opcode([(192 + idx) as u8], 1); + } else if idx < (2<<16) { + // constant2 + self.add_opcode([129, (idx & 0xff) as u8, (idx >> 8) as u8], 1); + } else { + unimplemented!(); + } + } + + fn add_opcode_call(&mut self, n_args: u16) { + // https://github.com/rocky/elisp-bytecode/issues/79 + let delta = -(n_args as i32 + 1) + 1; + if n_args <= 5 { + self.add_opcode([(32 + n_args) as u8], delta); + } else if n_args < (1 << 8) { + self.add_opcode([(32 + 6), n_args as u8], delta); + } else { + self.add_opcode([(32 + 7), (n_args & 0xff) as u8, (n_args >> 8) as u8], delta); + } + } + + fn build_one_value_array(&mut self, arr: &[json::Value]) { + if arr.len() < (1 << 16) { + // use "vector" call + self.add_opcode_constant(LispObject::Symbol("vector".into())); + for value in arr { + self.build_one_value(value); + } + self.add_opcode_call(arr.len() as u16); + } else { + // fallback to make-vector & aset + self.add_opcode_constant(CONSTANT_FUNC_MAKE_VECTOR.clone()); + self.add_opcode_constant(LispObject::Int(arr.len() as i64)); + self.add_opcode_constant(LispObject::Nil); + self.add_opcode([32 + 2], -3 + 1); // call + + self.add_opcode_constant(LispObject::Int(0)); // index for aset + + for value in arr { + self.add_opcode([1], 1); // stack ref 1, the vector + self.add_opcode([1], 1); // stack ref 1, the index + self.build_one_value(value); + self.add_opcode([73], -3+1); // aset + self.add_opcode([136], -1); // discard aset result + self.add_opcode([84], 0); // add1 + } + self.add_opcode([136], -1); // discard index + // the vector remains + } + } + + fn build_one_value_map(&mut self, map: &json::Map) { + // list + let list_len = map.len() * 2; + let use_list_call = list_len < (1 << 16); + if use_list_call { + self.add_opcode_constant(LispObject::Symbol("list".into())); + } + + for (key, value) in map { + self.add_opcode_constant(LispObject::Keyword(key.clone())); + self.build_one_value(value); + } + + if use_list_call { + self.add_opcode_call(list_len as u16); + } else { + self.add_opcode_constant(LispObject::Nil); + for _ in 0..list_len { + self.add_opcode([66], -2 + 1); // cons + } + } + } + + // current only support: + // object-type: plist, null-object: nil, false-object: nil, array-type: vector + fn build_one_value(&mut self, value: &json::Value) { + match value { + &json::Value::Null | &json::Value::Bool(false) => { + self.add_opcode_constant(LispObject::Nil); + }, + &json::Value::Bool(true) => { + self.add_opcode_constant(LispObject::T); + }, + &json::Value::Number(ref num) => { + if num.is_f64() { + self.add_opcode_constant(LispObject::Float(num.to_string())); + } else { + self.add_opcode_constant(LispObject::Int(num.as_i64().unwrap())); + } + }, + &json::Value::String(ref s) => { + self.add_opcode_constant(LispObject::Str(s.clone())); + }, + &json::Value::Array(ref arr) => { + self.build_one_value_array(&arr); + }, + &json::Value::Object(ref map) => { + self.build_one_value_map(&map); + }, + } + } + + fn build(&mut self, value: &json::Value) { + self.build_one_value(value); + self.add_opcode([135], -1); // return + } + + fn into_repl(self) -> String { + let mut result: String = "#[0 \"".into(); + for c in self.code { + result.push_str(&format!("\\{:o}", c)); + } + result += "\" ["; + + let mut constants_array = self.constants.into_iter().collect::>(); + constants_array.sort_by_key(|(_, idx)| *idx); + result += &constants_array.into_iter() + .map(|(obj, _)| obj.to_repl()) + .collect::>().join(" "); + + result += &format!("] {}]", self.max_stack_size); + return result; + } +} + +pub fn generate_bytecode_repl(value: &json::Value) -> String { + let mut builder = BytecodeBuilder { + code: Vec::new(), + constants: BTreeMap::new(), + current_stack_size: 0, + max_stack_size: 0, + }; + builder.build(value); + return builder.into_repl(); +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..8eabd82 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,4 @@ +#[macro_use] +extern crate lazy_static; + +pub mod bytecode; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..eabb922 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,9 @@ +use serde_json as json; + +use emacs_lsp_booster::bytecode; + + +fn main() { + let value: json::Value = json::from_reader(std::io::stdin()).unwrap(); + println!("{}", bytecode::generate_bytecode_repl(&value)); +}