Skip to content

Commit bc8c782

Browse files
authored
Rollup merge of rust-lang#60659 - nnethercote:tweak-Symbol-and-InternedString, r=Zoxc
Tweak `Symbol` and `InternedString` Some minor improvements to speed and code cleanliness. r? @Zoxc
2 parents 20dff29 + e53bb1a commit bc8c782

File tree

1 file changed

+60
-24
lines changed

1 file changed

+60
-24
lines changed

src/libsyntax_pos/symbol.rs

+60-24
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,22 @@ impl Decodable for Ident {
344344
}
345345
}
346346

347-
/// A symbol is an interned or gensymed string. The use of `newtype_index!` means
348-
/// that `Option<Symbol>` only takes up 4 bytes, because `newtype_index!` reserves
349-
/// the last 256 values for tagging purposes.
347+
/// A symbol is an interned or gensymed string. A gensym is a symbol that is
348+
/// never equal to any other symbol. E.g.:
349+
/// ```
350+
/// assert_eq!(Symbol::intern("x"), Symbol::intern("x"))
351+
/// assert_ne!(Symbol::gensym("x"), Symbol::intern("x"))
352+
/// assert_ne!(Symbol::gensym("x"), Symbol::gensym("x"))
353+
/// ```
354+
/// Conceptually, a gensym can be thought of as a normal symbol with an
355+
/// invisible unique suffix. Gensyms are useful when creating new identifiers
356+
/// that must not match any existing identifiers, e.g. during macro expansion
357+
/// and syntax desugaring.
358+
///
359+
/// Internally, a Symbol is implemented as an index, and all operations
360+
/// (including hashing, equality, and ordering) operate on that index. The use
361+
/// of `newtype_index!` means that `Option<Symbol>` only takes up 4 bytes,
362+
/// because `newtype_index!` reserves the last 256 values for tagging purposes.
350363
///
351364
/// Note that `Symbol` cannot directly be a `newtype_index!` because it implements
352365
/// `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
@@ -367,10 +380,6 @@ impl Symbol {
367380
with_interner(|interner| interner.intern(string))
368381
}
369382

370-
pub fn interned(self) -> Self {
371-
with_interner(|interner| interner.interned(self))
372-
}
373-
374383
/// Gensyms a new `usize`, using the current interner.
375384
pub fn gensym(string: &str) -> Self {
376385
with_interner(|interner| interner.gensym(string))
@@ -380,6 +389,7 @@ impl Symbol {
380389
with_interner(|interner| interner.gensymed(self))
381390
}
382391

392+
// WARNING: this function is deprecated and will be removed in the future.
383393
pub fn is_gensymed(self) -> bool {
384394
with_interner(|interner| interner.is_gensymed(self))
385395
}
@@ -488,11 +498,11 @@ impl Interner {
488498
name
489499
}
490500

491-
pub fn interned(&self, symbol: Symbol) -> Symbol {
501+
fn interned(&self, symbol: Symbol) -> Symbol {
492502
if (symbol.0.as_usize()) < self.strings.len() {
493503
symbol
494504
} else {
495-
self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize])
505+
self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]
496506
}
497507
}
498508

@@ -510,10 +520,15 @@ impl Interner {
510520
symbol.0.as_usize() >= self.strings.len()
511521
}
512522

523+
// Get the symbol as a string. `Symbol::as_str()` should be used in
524+
// preference to this function.
513525
pub fn get(&self, symbol: Symbol) -> &str {
514526
match self.strings.get(symbol.0.as_usize()) {
515527
Some(string) => string,
516-
None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]),
528+
None => {
529+
let symbol = self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize];
530+
self.strings[symbol.0.as_usize()]
531+
}
517532
}
518533
}
519534
}
@@ -611,11 +626,17 @@ fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
611626
GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
612627
}
613628

614-
/// Represents a string stored in the interner. Because the interner outlives any thread
615-
/// which uses this type, we can safely treat `string` which points to interner data,
616-
/// as an immortal string, as long as this type never crosses between threads.
617-
// FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`,
618-
// by creating a new thread right after constructing the interner.
629+
/// An alternative to `Symbol` and `InternedString`, useful when the chars
630+
/// within the symbol need to be accessed. It is best used for temporary
631+
/// values.
632+
///
633+
/// Because the interner outlives any thread which uses this type, we can
634+
/// safely treat `string` which points to interner data, as an immortal string,
635+
/// as long as this type never crosses between threads.
636+
//
637+
// FIXME: ensure that the interner outlives any thread which uses
638+
// `LocalInternedString`, by creating a new thread right after constructing the
639+
// interner.
619640
#[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
620641
pub struct LocalInternedString {
621642
string: &'static str,
@@ -708,7 +729,19 @@ impl Encodable for LocalInternedString {
708729
}
709730
}
710731

711-
/// Represents a string stored in the string interner.
732+
/// An alternative to `Symbol` that is focused on string contents. It has two
733+
/// main differences to `Symbol`.
734+
///
735+
/// First, its implementations of `Hash`, `PartialOrd` and `Ord` work with the
736+
/// string chars rather than the symbol integer. This is useful when hash
737+
/// stability is required across compile sessions, or a guaranteed sort
738+
/// ordering is required.
739+
///
740+
/// Second, gensym-ness is irrelevant. E.g.:
741+
/// ```
742+
/// assert_ne!(Symbol::gensym("x"), Symbol::gensym("x"))
743+
/// assert_eq!(Symbol::gensym("x").as_interned_str(), Symbol::gensym("x").as_interned_str())
744+
/// ```
712745
#[derive(Clone, Copy, Eq)]
713746
pub struct InternedString {
714747
symbol: Symbol,
@@ -725,6 +758,15 @@ impl InternedString {
725758
unsafe { f(&*str) }
726759
}
727760

761+
fn with2<F: FnOnce(&str, &str) -> R, R>(self, other: &InternedString, f: F) -> R {
762+
let (self_str, other_str) = with_interner(|interner| {
763+
(interner.get(self.symbol) as *const str,
764+
interner.get(other.symbol) as *const str)
765+
});
766+
// This is safe for the same reason that `with` is safe.
767+
unsafe { f(&*self_str, &*other_str) }
768+
}
769+
728770
pub fn as_symbol(self) -> Symbol {
729771
self.symbol
730772
}
@@ -745,7 +787,7 @@ impl PartialOrd<InternedString> for InternedString {
745787
if self.symbol == other.symbol {
746788
return Some(Ordering::Equal);
747789
}
748-
self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
790+
self.with2(other, |self_str, other_str| self_str.partial_cmp(other_str))
749791
}
750792
}
751793

@@ -754,7 +796,7 @@ impl Ord for InternedString {
754796
if self.symbol == other.symbol {
755797
return Ordering::Equal;
756798
}
757-
self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
799+
self.with2(other, |self_str, other_str| self_str.cmp(other_str))
758800
}
759801
}
760802

@@ -794,12 +836,6 @@ impl<'a> PartialEq<InternedString> for &'a String {
794836
}
795837
}
796838

797-
impl std::convert::From<InternedString> for String {
798-
fn from(val: InternedString) -> String {
799-
val.as_symbol().to_string()
800-
}
801-
}
802-
803839
impl fmt::Debug for InternedString {
804840
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
805841
self.with(|str| fmt::Debug::fmt(&str, f))

0 commit comments

Comments
 (0)