Skip to content

Commit fdfcdad

Browse files
committed
Auto merge of rust-lang#113850 - cjgillot:span-shorthand, r=compiler-errors
Encode shorthands for spans in metadata. Spans occupy a typically large proportion of metadata. This PR deduplicates encoded spans in order to reduce encoded length.
2 parents 77e24f9 + 186be72 commit fdfcdad

File tree

3 files changed

+48
-13
lines changed

3 files changed

+48
-13
lines changed

compiler/rustc_metadata/src/rmeta/decoder.rs

+18-3
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use rustc_session::cstore::{
3434
use rustc_session::Session;
3535
use rustc_span::hygiene::ExpnIndex;
3636
use rustc_span::symbol::{kw, Ident, Symbol};
37-
use rustc_span::{self, BytePos, ExpnId, Pos, Span, SyntaxContext, DUMMY_SP};
37+
use rustc_span::{self, BytePos, ExpnId, Pos, Span, SpanData, SyntaxContext, DUMMY_SP};
3838

3939
use proc_macro::bridge::client::ProcMacro;
4040
use std::iter::TrustedLen;
@@ -513,11 +513,26 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnId {
513513

514514
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
515515
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span {
516+
let mode = SpanEncodingMode::decode(decoder);
517+
let data = match mode {
518+
SpanEncodingMode::Direct => SpanData::decode(decoder),
519+
SpanEncodingMode::Shorthand(position) => decoder.with_position(position, |decoder| {
520+
let mode = SpanEncodingMode::decode(decoder);
521+
debug_assert!(matches!(mode, SpanEncodingMode::Direct));
522+
SpanData::decode(decoder)
523+
}),
524+
};
525+
Span::new(data.lo, data.hi, data.ctxt, data.parent)
526+
}
527+
}
528+
529+
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for SpanData {
530+
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> SpanData {
516531
let ctxt = SyntaxContext::decode(decoder);
517532
let tag = u8::decode(decoder);
518533

519534
if tag == TAG_PARTIAL_SPAN {
520-
return DUMMY_SP.with_ctxt(ctxt);
535+
return DUMMY_SP.with_ctxt(ctxt).data();
521536
}
522537

523538
debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
@@ -612,7 +627,7 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
612627
let hi = hi + source_file.translated_source_file.start_pos;
613628

614629
// Do not try to decode parent for foreign spans.
615-
Span::new(lo, hi, ctxt, None)
630+
SpanData { lo, hi, ctxt, parent: None }
616631
}
617632
}
618633

compiler/rustc_metadata/src/rmeta/encoder.rs

+24-10
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ use rustc_session::config::{CrateType, OptLevel};
3737
use rustc_session::cstore::{ForeignModule, LinkagePreference, NativeLib};
3838
use rustc_span::hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind};
3939
use rustc_span::symbol::{sym, Symbol};
40-
use rustc_span::{self, ExternalSource, FileName, SourceFile, Span, SyntaxContext};
40+
use rustc_span::{self, ExternalSource, FileName, SourceFile, Span, SpanData, SyntaxContext};
4141
use std::borrow::Borrow;
4242
use std::collections::hash_map::Entry;
4343
use std::hash::Hash;
@@ -53,6 +53,7 @@ pub(super) struct EncodeContext<'a, 'tcx> {
5353
tables: TableBuilders,
5454

5555
lazy_state: LazyState,
56+
span_shorthands: FxHashMap<Span, usize>,
5657
type_shorthands: FxHashMap<Ty<'tcx>, usize>,
5758
predicate_shorthands: FxHashMap<ty::PredicateKind<'tcx>, usize>,
5859

@@ -177,8 +178,20 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for ExpnId {
177178

178179
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
179180
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
180-
let span = self.data();
181+
match s.span_shorthands.entry(*self) {
182+
Entry::Occupied(o) => SpanEncodingMode::Shorthand(*o.get()).encode(s),
183+
Entry::Vacant(v) => {
184+
let position = s.opaque.position();
185+
v.insert(position);
186+
SpanEncodingMode::Direct.encode(s);
187+
self.data().encode(s);
188+
}
189+
}
190+
}
191+
}
181192

193+
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
194+
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
182195
// Don't serialize any `SyntaxContext`s from a proc-macro crate,
183196
// since we don't load proc-macro dependencies during serialization.
184197
// This means that any hygiene information from macros used *within*
@@ -213,26 +226,26 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
213226
if s.is_proc_macro {
214227
SyntaxContext::root().encode(s);
215228
} else {
216-
span.ctxt.encode(s);
229+
self.ctxt.encode(s);
217230
}
218231

219232
if self.is_dummy() {
220233
return TAG_PARTIAL_SPAN.encode(s);
221234
}
222235

223236
// The Span infrastructure should make sure that this invariant holds:
224-
debug_assert!(span.lo <= span.hi);
237+
debug_assert!(self.lo <= self.hi);
225238

226-
if !s.source_file_cache.0.contains(span.lo) {
239+
if !s.source_file_cache.0.contains(self.lo) {
227240
let source_map = s.tcx.sess.source_map();
228-
let source_file_index = source_map.lookup_source_file_idx(span.lo);
241+
let source_file_index = source_map.lookup_source_file_idx(self.lo);
229242
s.source_file_cache =
230243
(source_map.files()[source_file_index].clone(), source_file_index);
231244
}
232245
let (ref source_file, source_file_index) = s.source_file_cache;
233-
debug_assert!(source_file.contains(span.lo));
246+
debug_assert!(source_file.contains(self.lo));
234247

235-
if !source_file.contains(span.hi) {
248+
if !source_file.contains(self.hi) {
236249
// Unfortunately, macro expansion still sometimes generates Spans
237250
// that malformed in this way.
238251
return TAG_PARTIAL_SPAN.encode(s);
@@ -286,11 +299,11 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
286299

287300
// Encode the start position relative to the file start, so we profit more from the
288301
// variable-length integer encoding.
289-
let lo = span.lo - source_file.start_pos;
302+
let lo = self.lo - source_file.start_pos;
290303

291304
// Encode length which is usually less than span.hi and profits more
292305
// from the variable-length integer encoding that we use.
293-
let len = span.hi - span.lo;
306+
let len = self.hi - self.lo;
294307

295308
tag.encode(s);
296309
lo.encode(s);
@@ -2182,6 +2195,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) {
21822195
feat: tcx.features(),
21832196
tables: Default::default(),
21842197
lazy_state: LazyState::NoNode,
2198+
span_shorthands: Default::default(),
21852199
type_shorthands: Default::default(),
21862200
predicate_shorthands: Default::default(),
21872201
source_file_cache,

compiler/rustc_metadata/src/rmeta/mod.rs

+6
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ const METADATA_VERSION: u8 = 8;
6666
/// unsigned integer, and further followed by the rustc version string.
6767
pub const METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, METADATA_VERSION];
6868

69+
#[derive(Encodable, Decodable)]
70+
enum SpanEncodingMode {
71+
Shorthand(usize),
72+
Direct,
73+
}
74+
6975
/// A value of type T referred to by its absolute position
7076
/// in the metadata, and which can be decoded lazily.
7177
///

0 commit comments

Comments
 (0)