Skip to content

Commit

Permalink
fix: bytecode reader
Browse files Browse the repository at this point in the history
  • Loading branch information
mtshiba committed Aug 24, 2023
1 parent afed802 commit 9db6e5f
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 103 deletions.
45 changes: 22 additions & 23 deletions crates/erg_common/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ impl TryFrom<&str> for ErgMode {
"desugar" | "desugarer" => Ok(Self::Desugar),
"typecheck" | "lower" | "tc" => Ok(Self::TypeCheck),
"fullcheck" | "check" | "checker" => Ok(Self::FullCheck),
"compile" | "compiler" => Ok(Self::Compile),
"transpile" | "transpiler" => Ok(Self::Transpile),
"comp" | "compile" | "compiler" => Ok(Self::Compile),
"trans" | "transpile" | "transpiler" => Ok(Self::Transpile),
"run" | "execute" => Ok(Self::Execute),
"server" | "language-server" => Ok(Self::LanguageServer),
"byteread" | "read" | "reader" => Ok(Self::Read),
"byteread" | "read" | "reader" | "dis" => Ok(Self::Read),
_ => Err(()),
}
}
Expand Down Expand Up @@ -217,11 +217,6 @@ impl ErgConfig {
// not `for` because we need to consume the next argument
while let Some(arg) = args.next() {
match &arg[..] {
/* Commands */
"lex" | "parse" | "desugar" | "typecheck" | "check" | "compile" | "transpile"
| "run" | "execute" | "server" | "tc" => {
cfg.mode = ErgMode::try_from(&arg[..]).unwrap();
}
/* Options */
"--" => {
for arg in args {
Expand Down Expand Up @@ -407,24 +402,28 @@ USAGE:
process::exit(2);
}
_ => {
let path = PathBuf::from_str(&arg[..])
.unwrap_or_else(|_| panic!("invalid file path: {arg}"));
let path = normalize_path(path);
cfg.input = Input::file(path);
match args.next().as_ref().map(|s| &s[..]) {
Some("--") => {
for arg in args {
cfg.runtime_args.push(Box::leak(arg.into_boxed_str()));
if let Ok(mode) = ErgMode::try_from(&arg[..]) {
cfg.mode = mode;
} else {
let path = PathBuf::from_str(&arg[..])
.unwrap_or_else(|_| panic!("invalid file path: {arg}"));
let path = normalize_path(path);
cfg.input = Input::file(path);
match args.next().as_ref().map(|s| &s[..]) {
Some("--") => {
for arg in args {
cfg.runtime_args.push(Box::leak(arg.into_boxed_str()));
}
}
Some(some) => {
println!("invalid argument: {some}");
println!("Do not pass options after the file path. If you want to pass runtime arguments, use `--` before them.");
process::exit(1);
}
_ => {}
}
Some(some) => {
println!("invalid argument: {some}");
println!("Do not pass options after the file path. If you want to pass runtime arguments, use `--` before them.");
process::exit(1);
}
_ => {}
break;
}
break;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/erg_common/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ pub fn strs_into_bytes(names: Vec<Str>) -> Vec<u8> {

pub fn str_into_bytes(cont: Str, is_interned: bool) -> Vec<u8> {
let mut bytes = vec![];
if cont.is_ascii() {
if cont.is_ascii() && cont.len() <= u8::MAX as usize {
if is_interned {
bytes.push(DataTypePrefix::ShortAsciiInterned as u8);
} else {
Expand Down
36 changes: 25 additions & 11 deletions crates/erg_compiler/ty/codeobj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,18 @@ pub enum FastKind {
Free = 0x80,
}

impl TryFrom<u8> for FastKind {
type Error = &'static str;
fn try_from(kind: u8) -> Result<Self, Self::Error> {
match kind {
0x20 => Ok(Self::Local),
0x40 => Ok(Self::Cell),
0x80 => Ok(Self::Free),
_ => Err("invalid kind"),
}
}
}

/// Bit masks for CodeObj.flags
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u32)]
Expand Down Expand Up @@ -290,7 +302,7 @@ impl CodeObj {
}
}

pub fn from_pyc<P: AsRef<Path>>(path: P) -> DeserializeResult<Self> {
pub fn from_pyc<P: AsRef<Path>>(path: P) -> DeserializeResult<(Self, PythonVersion)> {
let mut f = BufReader::new(File::open(path)?);
let v = &mut Vec::with_capacity(16);
f.read_to_end(v)?;
Expand All @@ -300,10 +312,11 @@ impl CodeObj {
let _timestamp = Deserializer::deserialize_u32(v);
let _padding = Deserializer::deserialize_u32(v);
let code = Self::from_bytes(v, python_ver)?;
Ok(code)
Ok((code, python_ver))
}

pub fn from_bytes(v: &mut Vec<u8>, python_ver: PythonVersion) -> DeserializeResult<Self> {
assert_eq!(v.remove(0), DataTypePrefix::Code as u8, "not a code object");
let mut des = Deserializer::new();
let argcount = Deserializer::deserialize_u32(v);
let posonlyargcount = if python_ver.minor >= Some(8) {
Expand All @@ -320,15 +333,16 @@ impl CodeObj {
let stacksize = Deserializer::deserialize_u32(v);
let flags = Deserializer::deserialize_u32(v);
let code = des.deserialize_bytes(v)?;
let consts = des.deserialize_const_vec(v, python_ver)?;
let names = des.deserialize_str_vec(v, python_ver)?;
// TODO: localplusnames
let varnames = des.deserialize_str_vec(v, python_ver)?;
let freevars = des.deserialize_str_vec(v, python_ver)?;
let cellvars = des.deserialize_str_vec(v, python_ver)?;
let filename = des.deserialize_str(v, python_ver)?;
let name = des.deserialize_str(v, python_ver)?;
let qualname = des.deserialize_str(v, python_ver)?;
let consts = des.deserialize_const_vec(v, python_ver, Some("consts"))?;
let names = des.deserialize_str_vec(v, python_ver, Some("names"))?;
let (varnames, freevars, cellvars) = des.deserialize_locals(v, python_ver)?;
let filename = des.deserialize_str(v, python_ver, Some("filename"))?;
let name = des.deserialize_str(v, python_ver, Some("name"))?;
let qualname = if python_ver.minor >= Some(11) {
des.deserialize_str(v, python_ver, Some("qualname"))?
} else {
name.clone()
};
let firstlineno = Deserializer::deserialize_u32(v);
let lnotab = des.deserialize_bytes(v)?;
let exceptiontable = if python_ver.minor >= Some(11) {
Expand Down
143 changes: 75 additions & 68 deletions crates/erg_compiler/ty/deserialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use erg_common::traits::ExitStatus;
use erg_common::{fn_name, switch_lang};
use erg_common::{ArcArray, Str};

use super::codeobj::CodeObj;
use super::codeobj::{CodeObj, FastKind};
use super::constructors::array_t;
use super::typaram::TyParam;
use super::value::ValueObj;
Expand Down Expand Up @@ -70,22 +70,29 @@ impl DeserializeError {
)
}

pub fn type_error(expect: &Type, found: &Type) -> Self {
pub fn type_error(field: Option<&str>, expect: &Type, found: &Type) -> Self {
let field = switch_lang!(
"japanese" => field.map(|f| format!("フィールド{f}の読み取りに失敗しました: ")),
"simplified_chinese" => field.map(|f| format!("读取字段{f}失败: ")),
"traditional_chinese" => field.map(|f| format!("讀取字段{f}失敗: ")),
"english" => field.map(|f| format!("failed to read field {f}: ")),
)
.unwrap_or("".to_string());
Self::new(
0,
fn_name!(),
switch_lang!(
"japanese" => format!(
"{expect}型オブジェクトを予期しましたが、 読み込んだオブジェクトは{found}型です",
"{field}{expect}型オブジェクトを予期しましたが、 読み込んだオブジェクトは{found}型です",
),
"simplified_chinese" => format!(
"期望{expect}对象,但反序列化的对象是{found}",
"{field}期望{expect}对象,但反序列化的对象是{found}",
),
"traditional_chinese" => format!(
"期望一個{expect}對象,但反序列化的對像是{found}",
"{field}期望一個{expect}對象,但反序列化的對像是{found}",
),
"english" => format!(
"expect a {expect} object, but the deserialized object is {found}",
"{field}expect a {expect} object, but the deserialized object is {found}",
),
),
)
Expand Down Expand Up @@ -113,8 +120,8 @@ impl Deserializer {
pub fn run(cfg: ErgConfig) -> ExitStatus {
let filename = cfg.input.path();
match CodeObj::from_pyc(filename) {
Ok(codeobj) => {
println!("{}", codeobj.code_info(None));
Ok((codeobj, ver)) => {
println!("{}", codeobj.code_info(Some(ver)));
ExitStatus::OK
}
Err(e) => {
Expand Down Expand Up @@ -195,60 +202,8 @@ impl Deserializer {
Ok(self.get_cached_arr(&arr))
}
DataTypePrefix::Code => {
let argcount = Self::deserialize_u32(v);
let posonlyargcount = if python_ver.minor >= Some(8) {
Self::deserialize_u32(v)
} else {
0
};
let kwonlyargcount = Self::deserialize_u32(v);
let nlocals = if python_ver.minor < Some(11) {
Self::deserialize_u32(v)
} else {
0
};
let stacksize = Self::deserialize_u32(v);
let flags = Self::deserialize_u32(v);
let code = self.deserialize_bytes(v)?;
let consts = self.deserialize_const_vec(v, python_ver)?;
let names = self.deserialize_str_vec(v, python_ver)?;
let varnames = self.deserialize_str_vec(v, python_ver)?;
let freevars = self.deserialize_str_vec(v, python_ver)?;
let cellvars = self.deserialize_str_vec(v, python_ver)?;
let filename = self.deserialize_str(v, python_ver)?;
let name = self.deserialize_str(v, python_ver)?;
let qualname = if python_ver.minor >= Some(11) {
self.deserialize_str(v, python_ver)?
} else {
name.clone()
};
let firstlineno = Self::deserialize_u32(v);
let lnotab = self.deserialize_bytes(v)?;
let exceptiontable = if python_ver.minor >= Some(11) {
self.deserialize_bytes(v)?
} else {
vec![]
};
Ok(ValueObj::from(CodeObj {
argcount,
posonlyargcount,
kwonlyargcount,
nlocals,
stacksize,
flags,
code,
consts,
names,
varnames,
freevars,
cellvars,
filename,
name,
qualname,
firstlineno,
lnotab,
exceptiontable,
}))
v.insert(0, DataTypePrefix::Code as u8);
Ok(ValueObj::from(CodeObj::from_bytes(v, python_ver)?))
}
DataTypePrefix::None => Ok(ValueObj::None),
other => Err(DeserializeError::new(
Expand All @@ -268,21 +223,31 @@ impl Deserializer {
&mut self,
v: &mut Vec<u8>,
python_ver: PythonVersion,
field: Option<&str>,
) -> DeserializeResult<Vec<ValueObj>> {
match self.deserialize_const(v, python_ver)? {
ValueObj::Array(arr) => Ok(arr.to_vec()),
other => Err(DeserializeError::type_error(&Type::Str, other.ref_t())),
other => Err(DeserializeError::type_error(
field,
&Type::Str,
other.ref_t(),
)),
}
}

pub fn deserialize_const_array(
&mut self,
v: &mut Vec<u8>,
python_ver: PythonVersion,
field: Option<&str>,
) -> DeserializeResult<ArcArray<ValueObj>> {
match self.deserialize_const(v, python_ver)? {
ValueObj::Array(arr) => Ok(arr),
other => Err(DeserializeError::type_error(&Type::Str, other.ref_t())),
other => Err(DeserializeError::type_error(
field,
&Type::Str,
other.ref_t(),
)),
}
}

Expand All @@ -293,38 +258,80 @@ impl Deserializer {
pub fn try_into_str(&mut self, c: ValueObj) -> DeserializeResult<Str> {
match c {
ValueObj::Str(s) => Ok(s),
other => Err(DeserializeError::type_error(&Type::Str, other.ref_t())),
other => Err(DeserializeError::type_error(
None,
&Type::Str,
other.ref_t(),
)),
}
}

pub fn deserialize_str_vec(
&mut self,
v: &mut Vec<u8>,
python_ver: PythonVersion,
field: Option<&str>,
) -> DeserializeResult<Vec<Str>> {
match self.deserialize_const(v, python_ver)? {
ValueObj::Array(arr) => {
ValueObj::Array(arr) | ValueObj::Tuple(arr) => {
let mut strs = Vec::with_capacity(arr.len());
for c in arr.iter().cloned() {
strs.push(self.try_into_str(c)?);
}
Ok(strs)
}
other => Err(DeserializeError::type_error(
field,
&array_t(Type::Str, TyParam::erased(Type::Nat)),
other.ref_t(),
&other.class(),
)),
}
}

pub fn deserialize_locals(
&mut self,
v: &mut Vec<u8>,
python_ver: PythonVersion,
) -> DeserializeResult<(Vec<Str>, Vec<Str>, Vec<Str>)> {
if python_ver.minor >= Some(11) {
let names =
self.deserialize_str_vec(v, python_ver, Some("varnames, freevars, cellvars"))?;
let kinds = self.deserialize_bytes(v)?;
assert_eq!(names.len(), kinds.len());
// partition
let mut varnames = vec![];
let mut freevars = vec![];
let mut cellvars = vec![];
for (name, kind) in names.into_iter().zip(kinds.into_iter()) {
match FastKind::try_from(kind) {
Ok(FastKind::Local) => varnames.push(name),
Ok(FastKind::Free) => freevars.push(name),
Ok(FastKind::Cell) => cellvars.push(name),
_ => unreachable!(),
}
}
Ok((varnames, freevars, cellvars))
} else {
let varnames = self.deserialize_str_vec(v, python_ver, Some("varnames"))?;
let freevars = self.deserialize_str_vec(v, python_ver, Some("freevars"))?;
let cellvars = self.deserialize_str_vec(v, python_ver, Some("cellvars"))?;
Ok((varnames, freevars, cellvars))
}
}

pub fn deserialize_str(
&mut self,
v: &mut Vec<u8>,
python_ver: PythonVersion,
field: Option<&str>,
) -> DeserializeResult<Str> {
match self.deserialize_const(v, python_ver)? {
ValueObj::Str(s) => Ok(s),
other => Err(DeserializeError::type_error(&Type::Str, other.ref_t())),
other => Err(DeserializeError::type_error(
field,
&Type::Str,
other.ref_t(),
)),
}
}

Expand Down

0 comments on commit 9db6e5f

Please sign in to comment.