-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Related issue: #20
- Loading branch information
Showing
3 changed files
with
218 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
use std::{borrow::Borrow, fmt::Display, ops::Deref, str::from_utf8}; | ||
|
||
// Largest CowStr variant is Owned(String). A String uses 3 words of memory, but a fourth word is | ||
// needed to hold the tag (the tag takes a byte, but a full word is used for alignment reasons.) | ||
// This means that the available space we have for an inline string is 4 words - 2 bytes for the | ||
// tag and length. | ||
const MAX_INLINE_STR_LEN: usize = 4 * std::mem::size_of::<usize>() - 2; | ||
|
||
#[derive(Debug, Eq)] | ||
pub enum CowStr<'s> { | ||
Owned(String), | ||
Borrowed(&'s str), | ||
Inlined([u8; MAX_INLINE_STR_LEN], u8), | ||
} | ||
|
||
impl<'s> CowStr<'s> { | ||
pub fn replace(self, from: &str, to: &str) -> Self { | ||
if from.is_empty() { | ||
return self; | ||
} | ||
|
||
match self { | ||
CowStr::Inlined(mut inner, len) => { | ||
let mut len = len as usize; | ||
let diff = to.len() as isize - from.len() as isize; | ||
|
||
for (start, _) in self.deref().match_indices(from) { | ||
if diff > 0 { | ||
len += to.len() - from.len(); | ||
if len > MAX_INLINE_STR_LEN { | ||
return CowStr::Owned(self.deref().replace(from, to)); | ||
} | ||
inner[start + from.len()..].rotate_right(diff as usize); | ||
} else if diff < 0 { | ||
len -= (-diff) as usize; | ||
inner[start..].rotate_left((-diff) as usize); | ||
} | ||
|
||
inner[start..start + to.len()].copy_from_slice(to.as_bytes()); | ||
} | ||
|
||
CowStr::Inlined(inner, len as u8) | ||
} | ||
CowStr::Borrowed(s) if s.contains(from) => { | ||
let mut inner = [0; MAX_INLINE_STR_LEN]; | ||
let mut len = s.len(); | ||
inner[..len].copy_from_slice(s.as_bytes()); | ||
|
||
for (start, _) in s.match_indices(from) { | ||
if from.len() < to.len() { | ||
len += to.len() - from.len(); | ||
if len > MAX_INLINE_STR_LEN { | ||
return CowStr::Owned(self.deref().replace(from, to)); | ||
} | ||
inner[start + from.len()..].rotate_right(to.len() - from.len()); | ||
} else if from.len() > to.len() { | ||
len -= from.len() - to.len(); | ||
inner[start..].rotate_left(from.len() - to.len()); | ||
} | ||
|
||
inner[start..start + to.len()].copy_from_slice(to.as_bytes()); | ||
} | ||
|
||
CowStr::Inlined(inner, len as u8) | ||
} | ||
CowStr::Owned(s) if s.contains(from) => CowStr::Owned(s.replace(from, to)), | ||
_ => self, | ||
} | ||
} | ||
|
||
pub fn push(&mut self, c: char) { | ||
match self { | ||
CowStr::Owned(this) => this.push(c), | ||
CowStr::Inlined(inner, len) => { | ||
let l = *len as usize + c.len_utf8(); | ||
if l > MAX_INLINE_STR_LEN { | ||
let mut s = self.to_string(); | ||
s.push(c); | ||
*self = CowStr::Owned(s); | ||
} else { | ||
c.encode_utf8(&mut inner[*len as usize..l]); | ||
*len = l as u8; | ||
} | ||
} | ||
CowStr::Borrowed(this) => { | ||
let len = this.len() + c.len_utf8(); | ||
if len > MAX_INLINE_STR_LEN { | ||
let mut s = self.to_string(); | ||
s.push(c); | ||
*self = CowStr::Owned(s); | ||
} else { | ||
let mut inner = [0; MAX_INLINE_STR_LEN]; | ||
inner[..this.len()].copy_from_slice(this.as_bytes()); | ||
c.encode_utf8(&mut inner[this.len()..len]); | ||
*self = CowStr::Inlined(inner, len as u8); | ||
} | ||
} | ||
} | ||
} | ||
|
||
pub fn push_str(&mut self, s: &str) { | ||
if s.is_empty() { | ||
return; | ||
} | ||
|
||
match self { | ||
CowStr::Owned(this) => this.push_str(s), | ||
CowStr::Inlined(inner, len) => { | ||
let l = *len as usize + s.len(); | ||
if l > MAX_INLINE_STR_LEN { | ||
*self = CowStr::Owned(self.to_string() + s); | ||
} else { | ||
inner[*len as usize..l].copy_from_slice(s.as_bytes()); | ||
*len = l as u8; | ||
} | ||
} | ||
CowStr::Borrowed(this) => { | ||
let len = this.len() + s.len(); | ||
if len > MAX_INLINE_STR_LEN { | ||
*self = CowStr::Owned(this.to_string() + s); | ||
} else { | ||
let mut inner = [0; MAX_INLINE_STR_LEN]; | ||
inner[..this.len()].copy_from_slice(this.as_bytes()); | ||
inner[this.len()..len].copy_from_slice(s.as_bytes()); | ||
*self = CowStr::Inlined(inner, len as u8); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl<'s> Deref for CowStr<'s> { | ||
type Target = str; | ||
|
||
fn deref(&self) -> &Self::Target { | ||
match *self { | ||
Self::Owned(ref s) => s.borrow(), | ||
Self::Borrowed(s) => s, | ||
// NOTE: Inlined strings can only be constructed from strings or chars, which means they | ||
// are guaranteed to be valid UTF-8. We could consider unchecked conversion as well, but | ||
// a benchmark should be done before introducing unsafes. | ||
Self::Inlined(ref inner, len) => from_utf8(&inner[..len as usize]).unwrap(), | ||
} | ||
} | ||
} | ||
|
||
impl<'s> AsRef<str> for CowStr<'s> { | ||
fn as_ref(&self) -> &str { | ||
self.deref() | ||
} | ||
} | ||
|
||
impl<'s> From<char> for CowStr<'s> { | ||
fn from(value: char) -> Self { | ||
let mut inner = [0u8; MAX_INLINE_STR_LEN]; | ||
value.encode_utf8(&mut inner); | ||
CowStr::Inlined(inner, value.len_utf8() as u8) | ||
} | ||
} | ||
|
||
impl<'s> From<&'s str> for CowStr<'s> { | ||
fn from(value: &'s str) -> Self { | ||
CowStr::Borrowed(value) | ||
} | ||
} | ||
|
||
impl<'s> From<String> for CowStr<'s> { | ||
fn from(value: String) -> Self { | ||
CowStr::Owned(value) | ||
} | ||
} | ||
|
||
impl<'s> Clone for CowStr<'s> { | ||
fn clone(&self) -> Self { | ||
match self { | ||
CowStr::Owned(s) => { | ||
let len = s.len(); | ||
if len > MAX_INLINE_STR_LEN { | ||
CowStr::Owned(s.clone()) | ||
} else { | ||
let mut inner = [0u8; MAX_INLINE_STR_LEN]; | ||
inner[..len].copy_from_slice(s.as_bytes()); | ||
CowStr::Inlined(inner, len as u8) | ||
} | ||
} | ||
CowStr::Borrowed(s) => CowStr::Borrowed(s), | ||
CowStr::Inlined(inner, len) => CowStr::Inlined(*inner, *len), | ||
} | ||
} | ||
} | ||
|
||
impl<'s> PartialEq for CowStr<'s> { | ||
fn eq(&self, other: &Self) -> bool { | ||
self.deref() == other.deref() | ||
} | ||
} | ||
|
||
impl<'s> Display for CowStr<'s> { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
f.write_str(self.deref()) | ||
} | ||
} | ||
|
||
impl<'s, 'a> FromIterator<&'a str> for CowStr<'s> { | ||
fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self { | ||
CowStr::Owned(FromIterator::from_iter(iter)) | ||
} | ||
} |