Skip to content

Formalize linetable parsing and propagate out-of-bounds errors #755

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions src/native_stack_trace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,13 @@ impl NativeStack {

// Merge all python frames until we hit one with `is_entry`.
while python_frame_index < frames.len() {
let is_entry = frames[python_frame_index].is_entry;
merged.push(frames[python_frame_index].clone());

if frames[python_frame_index].is_entry {
python_frame_index += 1;
if is_entry {
break;
}

python_frame_index += 1;
}
python_frame_index += 1;
}
}
};
Expand Down
179 changes: 136 additions & 43 deletions src/python_interpreters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ This means we can't dereference them directly.

#![allow(clippy::unnecessary_cast)]

use anyhow::{Error, Result};

// these bindings are automatically generated by rust bindgen
// using the generate_bindings.py script
use crate::python_bindings::{
Expand Down Expand Up @@ -65,7 +67,7 @@ pub trait CodeObject {
fn argcount(&self) -> i32;
fn varnames(&self) -> *mut Self::TupleObject;

fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32;
fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result<i32, Error>;
}

pub trait BytesObject {
Expand Down Expand Up @@ -216,7 +218,7 @@ macro_rules! PythonCodeObjectImpl {
self.co_varnames as *mut Self::TupleObject
}

fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 {
fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result<i32, Error> {
let lasti = lasti as i32;

// unpack the line table. format is specified here:
Expand All @@ -240,34 +242,89 @@ macro_rules! PythonCodeObjectImpl {
line_number += increment;
i += 2;
}
line_number
Ok(line_number)
}
}
};
}

fn read_varint(index: &mut usize, table: &[u8]) -> usize {
let mut ret: usize;
let mut byte = table[*index];
let mut shift = 0;
*index += 1;
ret = (byte & 63) as usize;
struct ByteStream<'a> {
cursor: usize,
buffer: &'a [u8],
}

while byte & 64 != 0 {
byte = table[*index];
*index += 1;
shift += 6;
ret += ((byte & 63) as usize) << shift;
impl<'a> ByteStream<'a> {
fn new(buffer: &'a [u8]) -> Self {
Self { cursor: 0, buffer }
}

fn try_read(&mut self) -> Option<u8> {
if self.cursor >= self.buffer.len() {
None
} else {
let byte = self.buffer[self.cursor];
self.cursor += 1;
Some(byte)
}
}

fn read(&mut self) -> Result<u8, Error> {
match self.try_read() {
Some(byte) => Ok(byte),
None => Err(Error::msg(format!(
"Tried to read after end of buffer {:?}",
self.buffer
))),
}
}

fn try_read_header(&mut self) -> Result<Option<u8>, Error> {
match self.try_read() {
Some(byte) => {
if byte & 0b10000000 == 0 {
Err(Error::msg(format!(
"Expected header bit at index {} in {:?}",
self.cursor - 1, self.buffer
)))
} else {
Ok(Some(byte))
}
}
None => Ok(None),
}
}

fn read_body(&mut self) -> Result<u8, Error> {
let byte = self.read()?;
if byte & 0b10000000 != 0 {
Err(Error::msg(format!(
"Expected non-header bit at index {} in {:?}",
self.cursor - 1, self.buffer
)))
} else {
Ok(byte)
}
}

fn read_varint(&mut self) -> Result<usize, Error> {
let mut byte = self.read_body()?;
let mut value = (byte & 63) as usize;
let mut shift: usize = 0;
while byte & 64 != 0 {
byte = self.read_body()?;
shift += 6;
value |= ((byte & 63) as usize) << shift;
}
Ok(value)
}
ret
}

fn read_signed_varint(index: &mut usize, table: &[u8]) -> isize {
let unsigned_val = read_varint(index, table);
if unsigned_val & 1 != 0 {
-((unsigned_val >> 1) as isize)
} else {
(unsigned_val >> 1) as isize
fn read_signed_varint(&mut self) -> Result<isize, Error> {
let value = self.read_varint()?;
Ok(if value & 1 != 0 {
-((value >> 1) as isize)
} else {
(value >> 1) as isize
})
}
}

Expand Down Expand Up @@ -301,49 +358,46 @@ macro_rules! CompactCodeObjectImpl {
self.co_localsplusnames as *mut Self::TupleObject
}

fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 {
fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result<i32, Error> {
// unpack compressed table format from python 3.11
// https://github.com/python/cpython/pull/91666/files
let lasti = lasti - offset_of(self, &self.co_code_adaptive) as i32;
let mut line_number: i32 = self.first_lineno();
let mut bytecode_address: i32 = 0;
let mut stream = ByteStream::new(table);

let mut index: usize = 0;
loop {
if index >= table.len() {
break;
}
let byte = table[index];
index += 1;

while let Some(byte) = stream.try_read_header()? {
let code = (byte >> 3) & 15;
let delta = ((byte & 7) as i32) + 1;
bytecode_address += delta * 2;
let code = (byte >> 3) & 15;

let line_delta = match code {
15 => 0,
14 => {
let delta = read_signed_varint(&mut index, table);
read_varint(&mut index, table); // end line
read_varint(&mut index, table); // start column
read_varint(&mut index, table); // end column
let delta = stream.read_signed_varint()?;
stream.read_varint()?; // end line
stream.read_varint()?; // start column
stream.read_varint()?; // end column
delta
}
13 => read_signed_varint(&mut index, table),
13 => stream.read_signed_varint()?,
10..=12 => {
index += 2; // start column / end column
stream.read_body()?; // start column
stream.read_body()?; // end column
(code - 10).into()
}
_ => {
index += 1; // column
stream.read_body()?; // column
0
}
};

line_number += line_delta as i32;
if bytecode_address >= lasti {
break;
}
}
line_number
Ok(line_number)
}
}
};
Expand Down Expand Up @@ -701,7 +755,7 @@ impl CodeObject for v3_10_0::PyCodeObject {
fn varnames(&self) -> *mut Self::TupleObject {
self.co_varnames as *mut Self::TupleObject
}
fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 {
fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result<i32, Error> {
// in Python 3.10 we need to double the lasti instruction value here (and no I don't know why)
// https://github.com/python/cpython/blob/7b88f63e1dd4006b1a08b9c9f087dd13449ecc76/Python/ceval.c#L5999
// Whereas in python versions up to 3.9 we didn't.
Expand Down Expand Up @@ -730,7 +784,7 @@ impl CodeObject for v3_10_0::PyCodeObject {
}
}

line_number
Ok(line_number)
}
}

Expand Down Expand Up @@ -827,6 +881,45 @@ mod tests {
128_u8, 0, 221, 4, 8, 132, 74, 136, 118, 209, 4, 22, 212, 4, 22, 208, 4, 22, 208, 4,
22, 208, 4, 22,
];
assert_eq!(code.get_line_number(214, &table), 5);
assert_eq!(code.get_line_number(214, &table).unwrap(), 5);
}

#[test]
fn test_py3_12_line_numbers() {
use crate::python_bindings::v3_12_0::PyCodeObject;
let code = PyCodeObject {
co_firstlineno: 4,
..Default::default()
};

let table = [
128_u8, 0, 221, 4, 8, 132, 74, 136, 118, 209, 4, 22, 212, 4, 22, 208, 4, 22, 208, 4,
22, 208, 4, 22,
];
assert_eq!(code.get_line_number(214, &table).unwrap(), 5);
}

#[test]
fn test_py3_12_line_numbers_truncated() {
use crate::python_bindings::v3_12_0::PyCodeObject;
let code = PyCodeObject {
co_firstlineno: 4,
..Default::default()
};

let table = [128_u8];
assert!(code.get_line_number(214, &table).is_err());
}

#[test]
fn test_py3_12_line_numbers_invalid() {
use crate::python_bindings::v3_12_0::PyCodeObject;
let code = PyCodeObject {
co_firstlineno: 4,
..Default::default()
};

let table = [0];
assert!(code.get_line_number(214, &table).is_err());
}
}
2 changes: 1 addition & 1 deletion src/stack_trace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ fn get_line_number<C: CodeObject, P: ProcessMemory>(
) -> Result<i32, Error> {
let table =
copy_bytes(code.line_table(), process).context("Failed to copy line number table")?;
Ok(code.get_line_number(lasti, &table))
code.get_line_number(lasti, &table)
}

fn get_locals<C: CodeObject, F: FrameObject, P: ProcessMemory>(
Expand Down