Skip to content

Commit

Permalink
Tighten JsonPointer and methods
Browse files Browse the repository at this point in the history
The `JsonPointer` type ought to be `#[repr(transparent)]` if
`std::mem::transmute` is going to be used to coerce one from a `str`.
`ReferenceToken` already did.

The `validate` method did not ensure that a string starts with a `/`. This
may not have led a direct path to any undefined behavior, because it was
ensured that all `~` are followed by a `0` or `1`, but other methods do
act as though they assumed a pointer string always starts with a `/`. I
think this makes things a bit more clear.

The private `token_end` can be mostly replaced by `find("/")`.

`as_array_index` can be implemented in terms of the standard library's
integer parsing. (Allowing "+000" is an annoying gotcha in seemingly
every major language's standard library!).

I've added `Deserialize` for `&JsonPointer`.

I've made the internals of the new `json_pointer!` macro `const {}` to
better ensure that its checks actually happen at compile time.

I've adjusted some methods to reduce the total number of `unsafe` blocks
necessary.
  • Loading branch information
jeddenlea committed Sep 18, 2024
1 parent a585e0c commit 2a592bc
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 111 deletions.
5 changes: 4 additions & 1 deletion crates/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,7 @@ documentation = "https://docs.rs/ssi-core/"
thiserror.workspace = true
async-trait.workspace = true
serde = { workspace = true, features = ["derive"] }
pin-project.workspace = true
pin-project.workspace = true

[dev-dependencies]
serde_json.workspace = true
206 changes: 96 additions & 110 deletions crates/core/src/json_pointer.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
use core::fmt;
use std::{
borrow::{Borrow, Cow},
ops::Deref,
str::FromStr,
};
use core::{fmt, ops::Deref, str::FromStr};
use std::borrow::{Borrow, Cow};

use serde::{Deserialize, Serialize};

Expand All @@ -12,9 +8,11 @@ use crate::BytesBuf;
#[macro_export]
macro_rules! json_pointer {
($value:literal) => {
match $crate::JsonPointer::from_str_const($value) {
Ok(p) => p,
Err(_) => panic!("invalid JSON pointer"),
const {
match $crate::JsonPointer::from_str_const($value) {
Ok(p) => p,
Err(_) => panic!("invalid JSON pointer"),
}
}
};
}
Expand All @@ -27,7 +25,8 @@ pub struct InvalidJsonPointer<T = String>(pub T);
///
/// See: <https://datatracker.ietf.org/doc/html/rfc6901>
#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct JsonPointer([u8]);
#[repr(transparent)]
pub struct JsonPointer(str);

impl<'a> Default for &'a JsonPointer {
fn default() -> Self {
Expand All @@ -36,25 +35,22 @@ impl<'a> Default for &'a JsonPointer {
}

impl JsonPointer {
pub const ROOT: &'static Self = unsafe {
// SAFETY: the empty string is a valid JSON pointer.
JsonPointer::new_unchecked(&[])
};
pub const ROOT: &'static Self = json_pointer!("");

/// Converts the given string into a JSON pointer.
pub fn new<S: AsRef<[u8]>>(s: &S) -> Result<&Self, InvalidJsonPointer<&S>> {
let bytes = s.as_ref();
if Self::validate(bytes) {
Ok(unsafe { Self::new_unchecked(bytes) })
} else {
Err(InvalidJsonPointer(s))
}
pub fn new<S>(s: &S) -> Result<&Self, InvalidJsonPointer<&S>>
where
S: AsRef<[u8]> + ?Sized,
{
core::str::from_utf8(s.as_ref())
.ok()
.and_then(|s| Self::from_str_const(s).ok())
.ok_or(InvalidJsonPointer(s))
}

pub const fn from_str_const(s: &str) -> Result<&Self, InvalidJsonPointer<&str>> {
let bytes = s.as_bytes();
if Self::validate(bytes) {
Ok(unsafe { Self::new_unchecked(bytes) })
if Self::validate(s) {
Ok(unsafe { Self::new_unchecked(s) })
} else {
Err(InvalidJsonPointer(s))
}
Expand All @@ -65,14 +61,17 @@ impl JsonPointer {
/// # Safety
///
/// The input string *must* be a valid JSON pointer.
pub const unsafe fn new_unchecked(s: &[u8]) -> &Self {
pub const unsafe fn new_unchecked(s: &str) -> &Self {
std::mem::transmute(s)
}

pub const fn validate(bytes: &[u8]) -> bool {
if std::str::from_utf8(bytes).is_err() {
/// Confirms the validity of a string such that it may be safely used for
/// [`Self::new_unchecked`].
pub const fn validate(s: &str) -> bool {
let bytes = s.as_bytes();
if !matches!(bytes, [] | [b'/', ..]) {
return false;
};
}

let mut i = 0;
while i < bytes.len() {
Expand All @@ -91,49 +90,30 @@ impl JsonPointer {
}

pub fn as_bytes(&self) -> &[u8] {
&self.0
self.0.as_bytes()
}

pub fn as_str(&self) -> &str {
unsafe {
// SAFETY: a JSON pointer is an UTF-8 encoded string by definition.
std::str::from_utf8_unchecked(&self.0)
}
&self.0
}

pub fn is_empty(&self) -> bool {
self.0.is_empty()
}

fn token_end(&self) -> Option<usize> {
if self.is_empty() {
None
} else {
let mut i = 1;

while i < self.0.len() {
if self.0[i] == b'/' {
break;
}

i += 1
}

Some(i)
}
}

pub fn split_first(&self) -> Option<(&ReferenceToken, &Self)> {
self.token_end().map(|i| unsafe {
(
ReferenceToken::new_unchecked(&self.0[1..i]),
Self::new_unchecked(&self.0[i..]),
)
self.0.strip_prefix("/").map(|s| {
let (left, right) = s.find("/").map(|idx| s.split_at(idx)).unwrap_or((s, ""));
// Safety: the token is guaranteed not to include a '/', and remaining shall be either
// empty or a valid pointer starting with '/'.
let token = unsafe { ReferenceToken::new_unchecked(left) };
let remaining = unsafe { Self::new_unchecked(right) };
(token, remaining)
})
}

pub fn iter(&self) -> JsonPointerIter {
let mut tokens = self.as_str().split('/');
let mut tokens = self.0.split('/');
tokens.next();
JsonPointerIter(tokens)
}
Expand Down Expand Up @@ -178,11 +158,21 @@ impl<'a> Iterator for JsonPointerIter<'a> {
}
}

impl<'de> Deserialize<'de> for &'de JsonPointer {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: &str = <&str as Deserialize>::deserialize(deserializer)?;
JsonPointer::new(s).map_err(serde::de::Error::custom)
}
}

/// JSON Pointer buffer.
///
/// See: <https://datatracker.ietf.org/doc/html/rfc6901>
#[derive(Debug, Clone, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct JsonPointerBuf(Vec<u8>);
pub struct JsonPointerBuf(String);

impl Default for JsonPointerBuf {
fn default() -> Self {
Expand All @@ -193,37 +183,34 @@ impl Default for JsonPointerBuf {
impl JsonPointerBuf {
/// Converts the given byte string into an owned JSON pointer.
pub fn new<B: BytesBuf>(value: B) -> Result<Self, InvalidJsonPointer<B>> {
if JsonPointer::validate(value.as_ref()) {
Ok(Self(value.into()))
let Ok(s) = core::str::from_utf8(value.as_ref()) else {
return Err(InvalidJsonPointer(value));
};
if JsonPointer::validate(s) {
let v: Vec<u8> = value.into();
// SAFETY: we've just ensured the contents of the BytesBuf is a valid UTF-8 string and
// JsonPointer.
Ok(Self(unsafe { String::from_utf8_unchecked(v) }))
} else {
Err(InvalidJsonPointer(value))
}
}

pub fn push(&mut self, token: &str) {
self.0.push(b'/');
self.0.reserve(1 + token.len());
self.0.push('/');
for c in token.chars() {
match c {
'~' => {
self.0.push(b'~');
self.0.push(b'0');
}
'/' => {
self.0.push(b'~');
self.0.push(b'1');
}
_ => {
let i = self.0.len();
let len = c.len_utf8();
self.0.resize(i + len, 0);
c.encode_utf8(&mut self.0[i..]);
}
'~' => self.0.push_str("~0"),
'/' => self.0.push_str("~1"),
_ => self.0.push(c),
}
}
}

pub fn push_index(&mut self, i: usize) {
self.push(&i.to_string())
use core::fmt::Write;
write!(self.0, "/{i}").unwrap()
}

pub fn as_json_pointer(&self) -> &JsonPointer {
Expand All @@ -239,7 +226,7 @@ impl Deref for JsonPointerBuf {
type Target = JsonPointer;

fn deref(&self) -> &Self::Target {
unsafe { JsonPointer::new_unchecked(&self.0) }
self.as_json_pointer()
}
}

Expand Down Expand Up @@ -290,7 +277,7 @@ impl<'de> Deserialize<'de> for JsonPointerBuf {

#[derive(Debug)]
#[repr(transparent)]
pub struct ReferenceToken([u8]);
pub struct ReferenceToken(str);

impl ReferenceToken {
/// Converts the given string into a JSON pointer reference token without
Expand All @@ -299,24 +286,20 @@ impl ReferenceToken {
/// # Safety
///
/// The input string *must* be a valid JSON pointer reference token.
pub const unsafe fn new_unchecked(s: &[u8]) -> &Self {
pub const unsafe fn new_unchecked(s: &str) -> &Self {
std::mem::transmute(s)
}

pub fn is_escaped(&self) -> bool {
self.0.contains(&b'~')
self.0.contains("~")
}

pub fn as_bytes(&self) -> &[u8] {
&self.0
self.0.as_bytes()
}

pub fn as_str(&self) -> &str {
unsafe {
// SAFETY: a reference token is an UTF-8 encoded string by
// definition.
std::str::from_utf8_unchecked(&self.0)
}
&self.0
}

pub fn to_decoded(&self) -> Cow<str> {
Expand All @@ -328,39 +311,27 @@ impl ReferenceToken {
}

pub fn decode(&self) -> String {
let mut result = String::new();
let mut chars = self.as_str().chars();
while let Some(c) = chars.next() {
let decoded_c = match c {
let mut buf = String::with_capacity(self.0.len());
let mut chars = self.0.chars();
buf.extend(core::iter::from_fn(|| {
Some(match chars.next()? {
'~' => match chars.next() {
Some('0') => '~',
Some('1') => '/',
_ => unreachable!(),
},
c => c,
};

result.push(decoded_c);
}

result
})
}));
buf
}

pub fn as_array_index(&self) -> Option<usize> {
let mut chars = self.as_str().chars();
let mut i = chars.next()?.to_digit(10)? as usize;
if i == 0 {
match chars.next() {
Some(_) => None,
None => Some(0),
}
} else {
for c in chars {
let d = c.to_digit(10)? as usize;
i = i * 10 + d;
}

Some(i)
// Like usize::from_str, but don't allow leading '+' or '0'.
match self.0.as_bytes() {
[c @ b'0'..=b'9'] => Some((c - b'0') as usize),
[b'1'..=b'9', ..] => self.0.parse().ok(),
_ => None,
}
}
}
Expand All @@ -370,3 +341,18 @@ impl fmt::Display for ReferenceToken {
self.as_str().fmt(f)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_serde_borrow() {
let s = String::from(r#""/foo/b~1ar""#);
let p: JsonPointerBuf = serde_json::from_str(&s).unwrap();
let jp: &JsonPointer = serde_json::from_str(&s).unwrap();
assert_eq!(p.0, jp.0);

serde_json::from_str::<&JsonPointer>("invalid").unwrap_err();
}
}

0 comments on commit 2a592bc

Please sign in to comment.