Skip to content

Commit

Permalink
Bump pyo3 and tokio (#8008)
Browse files Browse the repository at this point in the history
The PyO3 API changes pretty dramatically, but we can use this
opportunity to clean up a lot of the Python<->Rust interaction.

The main pain is that `to_object` and `into_py` are now deprecated.
  • Loading branch information
mmastrac authored Nov 21, 2024
1 parent f5396fd commit 6815130
Show file tree
Hide file tree
Showing 20 changed files with 332 additions and 456 deletions.
24 changes: 12 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ members = [
resolver = "2"

[workspace.dependencies]
pyo3 = { version = "0.22.2", features = ["extension-module", "serde"] }
pyo3 = { version = "0.23.1", features = ["extension-module", "serde"] }
tokio = { version = "1", features = ["rt", "rt-multi-thread", "macros", "time", "sync", "net", "io-util"] }
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
Expand Down
13 changes: 7 additions & 6 deletions edb/edgeql-parser/edgeql-parser-python/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,17 @@ impl ParserResult {
let mut buf = vec![0u8]; // type and version
bincode::serialize_into(&mut buf, &rv)
.map_err(|e| PyValueError::new_err(format!("Failed to pack: {e}")))?;
Ok(PyBytes::new_bound(py, buf.as_slice()).into())
Ok(PyBytes::new(py, buf.as_slice()).into())
}
}

pub fn parser_error_into_tuple(py: Python, error: Error) -> PyObject {
pub fn parser_error_into_tuple(
error: &Error,
) -> (&str, (u64, u64), Option<&String>, Option<&String>) {
(
error.message,
&error.message,
(error.span.start, error.span.end),
error.hint,
error.details,
error.hint.as_ref(),
error.details.as_ref(),
)
.into_py(py)
}
10 changes: 5 additions & 5 deletions edb/edgeql-parser/edgeql-parser-python/src/hash.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::cell::RefCell;
use std::sync::RwLock;

use edgeql_parser::hash;
use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyString};
Expand All @@ -7,7 +7,7 @@ use crate::errors::SyntaxError;

#[pyclass]
pub struct Hasher {
_hasher: RefCell<Option<hash::Hasher>>,
_hasher: RwLock<Option<hash::Hasher>>,
}

#[pymethods]
Expand All @@ -16,13 +16,13 @@ impl Hasher {
fn start_migration(parent_id: &Bound<PyString>) -> PyResult<Hasher> {
let hasher = hash::Hasher::start_migration(parent_id.to_str()?);
Ok(Hasher {
_hasher: RefCell::new(Some(hasher)),
_hasher: RwLock::new(Some(hasher)),
})
}

fn add_source(&self, py: Python, data: &Bound<PyString>) -> PyResult<PyObject> {
let text = data.to_str()?;
let mut cell = self._hasher.borrow_mut();
let mut cell = self._hasher.write().unwrap();
let hasher = cell
.as_mut()
.ok_or_else(|| PyRuntimeError::new_err(("cannot add source after finish",)))?;
Expand All @@ -36,7 +36,7 @@ impl Hasher {
}

fn make_migration_id(&self) -> PyResult<String> {
let mut cell = self._hasher.borrow_mut();
let mut cell = self._hasher.write().unwrap();
let hasher = cell
.take()
.ok_or_else(|| PyRuntimeError::new_err(("cannot do migration id twice",)))?;
Expand Down
51 changes: 20 additions & 31 deletions edb/edgeql-parser/edgeql-parser-python/src/keywords.rs
Original file line number Diff line number Diff line change
@@ -1,46 +1,35 @@
use pyo3::{
prelude::*,
types::{PyList, PyString},
};
use pyo3::{prelude::*, types::PyFrozenSet};

use edgeql_parser::keywords;

pub struct AllKeywords {
pub current: PyObject,
pub future: PyObject,
pub unreserved: PyObject,
pub partial: PyObject,
pub current: Py<PyFrozenSet>,
pub future: Py<PyFrozenSet>,
pub unreserved: Py<PyFrozenSet>,
pub partial: Py<PyFrozenSet>,
}

pub fn get_keywords(py: Python) -> PyResult<AllKeywords> {
let intern = py.import_bound("sys")?.getattr("intern")?;
let frozen = py.import_bound("builtins")?.getattr("frozenset")?;
let intern = py.import("sys")?.getattr("intern")?;

let current = prepare_keywords(py, keywords::CURRENT_RESERVED_KEYWORDS.iter(), &intern)?;
let unreserved = prepare_keywords(py, keywords::UNRESERVED_KEYWORDS.iter(), &intern)?;
let future = prepare_keywords(py, keywords::FUTURE_RESERVED_KEYWORDS.iter(), &intern)?;
let partial = prepare_keywords(py, keywords::PARTIAL_RESERVED_KEYWORDS.iter(), &intern)?;
Ok(AllKeywords {
current: frozen
.call((PyList::new_bound(py, &current),), None)?
.into(),
unreserved: frozen
.call((PyList::new_bound(py, &unreserved),), None)?
.into(),
future: frozen.call((PyList::new_bound(py, &future),), None)?.into(),
partial: frozen
.call((PyList::new_bound(py, &partial),), None)?
.into(),
current: prepare_keywords(py, &keywords::CURRENT_RESERVED_KEYWORDS, &intern)?,
unreserved: prepare_keywords(py, &keywords::UNRESERVED_KEYWORDS, &intern)?,
future: prepare_keywords(py, &keywords::FUTURE_RESERVED_KEYWORDS, &intern)?,
partial: prepare_keywords(py, &keywords::PARTIAL_RESERVED_KEYWORDS, &intern)?,
})
}

fn prepare_keywords<'py, I: Iterator<Item = &'py &'static str>>(
fn prepare_keywords<'a, 'py, I: IntoIterator<Item = &'a &'static str>>(
py: Python<'py>,
keyword_set: I,
intern: &'py Bound<'py, PyAny>,
) -> Result<Vec<Bound<'py, PyAny>>, PyErr> {
keyword_set
.cloned()
.map(|s: &str| intern.call((PyString::new_bound(py, s),), None))
.collect()
intern: &Bound<'py, PyAny>,
) -> PyResult<Py<PyFrozenSet>> {
PyFrozenSet::new(
py,
keyword_set
.into_iter()
.map(|s| intern.call((&s,), None).unwrap()),
)
.map(|o| o.unbind())
}
8 changes: 4 additions & 4 deletions edb/edgeql-parser/edgeql-parser-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ use pyo3::prelude::*;
/// Rust bindings to the edgeql-parser crate
#[pymodule]
fn _edgeql_parser(py: Python, m: &Bound<PyModule>) -> PyResult<()> {
m.add("SyntaxError", py.get_type_bound::<errors::SyntaxError>())?;
m.add("ParserResult", py.get_type_bound::<errors::ParserResult>())?;
m.add("SyntaxError", py.get_type::<errors::SyntaxError>())?;
m.add("ParserResult", py.get_type::<errors::ParserResult>())?;

m.add_class::<hash::Hasher>()?;

Expand All @@ -36,15 +36,15 @@ fn _edgeql_parser(py: Python, m: &Bound<PyModule>) -> PyResult<()> {
m.add_class::<parser::Terminal>()?;

m.add_function(wrap_pyfunction!(position::offset_of_line, m)?)?;
m.add("SourcePoint", py.get_type_bound::<position::SourcePoint>())?;
m.add("SourcePoint", py.get_type::<position::SourcePoint>())?;

m.add_class::<tokenizer::OpaqueToken>()?;
m.add_function(wrap_pyfunction!(tokenizer::tokenize, m)?)?;
m.add_function(wrap_pyfunction!(tokenizer::unpickle_token, m)?)?;

m.add_function(wrap_pyfunction!(unpack::unpack, m)?)?;

tokenizer::fini_module(py, m);
tokenizer::fini_module(m);

Ok(())
}
111 changes: 52 additions & 59 deletions edb/edgeql-parser/edgeql-parser-python/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@ use once_cell::sync::OnceCell;
use edgeql_parser::parser;
use pyo3::exceptions::{PyAssertionError, PyValueError};
use pyo3::prelude::*;
use pyo3::types::{PyList, PyString, PyTuple};
use pyo3::types::{PyList, PyString};

use crate::errors::{parser_error_into_tuple, ParserResult};
use crate::pynormalize::value_to_py_object;
use crate::pynormalize::TokenizerValue;
use crate::tokenizer::OpaqueToken;

#[pyfunction]
pub fn parse(
py: Python,
start_token_name: &Bound<PyString>,
tokens: PyObject,
) -> PyResult<(ParserResult, PyObject)> {
) -> PyResult<(ParserResult, &'static Py<PyAny>)> {
let start_token_name = start_token_name.to_string();

let (spec, productions) = get_spec()?;
Expand All @@ -24,28 +24,22 @@ pub fn parse(
let context = parser::Context::new(spec);
let (cst, errors) = parser::parse(&tokens, &context);

let cst = cst.map(|c| to_py_cst(&c, py)).transpose()?;

let errors = errors
.into_iter()
.map(|e| parser_error_into_tuple(py, e))
.collect::<Vec<_>>();
let errors = PyList::new_bound(py, &errors);
let errors = PyList::new(py, errors.iter().map(|e| parser_error_into_tuple(e)))?;

let res = ParserResult {
out: cst.into_py(py),
out: cst.as_ref().map(ParserCSTNode).into_pyobject(py)?.unbind(),
errors: errors.into(),
};

Ok((res, productions.to_object(py)))
Ok((res, productions))
}

#[pyclass]
pub struct CSTNode {
#[pyo3(get)]
production: PyObject,
production: Option<Py<Production>>,
#[pyo3(get)]
terminal: PyObject,
terminal: Option<Py<Terminal>>,
}

#[pyclass]
Expand Down Expand Up @@ -136,56 +130,55 @@ pub fn save_spec(spec_json: &Bound<PyString>, dst: &Bound<PyString>) -> PyResult

fn load_productions(py: Python<'_>, spec: &parser::Spec) -> PyResult<PyObject> {
let grammar_name = "edb.edgeql.parser.grammar.start";
let grammar_mod = py.import_bound(grammar_name)?;
let grammar_mod = py.import(grammar_name)?;
let load_productions = py
.import_bound("edb.common.parsing")?
.import("edb.common.parsing")?
.getattr("load_spec_productions")?;

let production_names: Vec<_> = spec
.production_names
.iter()
.map(|(a, b)| PyTuple::new_bound(py, [a, b]))
.collect();

let productions = load_productions.call((production_names, grammar_mod), None)?;
let productions = load_productions.call((&spec.production_names, grammar_mod), None)?;
Ok(productions.into())
}

fn to_py_cst<'a>(cst: &'a parser::CSTNode<'a>, py: Python) -> PyResult<CSTNode> {
Ok(match cst {
parser::CSTNode::Empty => CSTNode {
production: py.None(),
terminal: py.None(),
},
parser::CSTNode::Terminal(token) => CSTNode {
production: py.None(),
terminal: Terminal {
text: token.text.clone(),
value: if let Some(val) = &token.value {
value_to_py_object(py, val)?
} else {
py.None()
},
start: token.span.start,
end: token.span.end,
}
.into_py(py),
},
parser::CSTNode::Production(prod) => CSTNode {
production: Production {
id: prod.id,
args: PyList::new_bound(
/// Newtype required to define a trait for a foreign type.
struct ParserCSTNode<'a>(&'a parser::CSTNode<'a>);

impl<'a, 'py> IntoPyObject<'py> for ParserCSTNode<'a> {
type Target = CSTNode;
type Output = Bound<'py, Self::Target>;
type Error = PyErr;

fn into_pyobject(self, py: Python<'py>) -> PyResult<Self::Output> {
let res = match self.0 {
parser::CSTNode::Empty => CSTNode {
production: None,
terminal: None,
},
parser::CSTNode::Terminal(token) => CSTNode {
production: None,
terminal: Some(Py::new(
py,
prod.args
.iter()
.map(|a| to_py_cst(a, py).map(|x| x.into_py(py)))
.collect::<PyResult<Vec<_>>>()?
.as_slice(),
)
.into(),
}
.into_py(py),
terminal: py.None(),
},
})
Terminal {
text: token.text.clone(),
value: (token.value.as_ref())
.map(TokenizerValue)
.into_pyobject(py)?
.unbind(),
start: token.span.start,
end: token.span.end,
},
)?),
},
parser::CSTNode::Production(prod) => CSTNode {
production: Some(Py::new(
py,
Production {
id: prod.id,
args: PyList::new(py, prod.args.iter().map(ParserCSTNode))?.into(),
},
)?),
terminal: None,
},
};
Ok(Py::new(py, res)?.bind(py).clone())
}
}
Loading

0 comments on commit 6815130

Please sign in to comment.