Skip to content

Commit

Permalink
Auto merge of rust-lang#113850 - cjgillot:span-shorthand, r=compiler-…
Browse files Browse the repository at this point in the history
…errors

Encode shorthands for spans in metadata.

Spans occupy a typically large proportion of metadata.
This PR deduplicates encoded spans in order to reduce encoded length.
  • Loading branch information
bors committed Jul 19, 2023
2 parents 77e24f9 + 186be72 commit fdfcdad
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 13 deletions.
21 changes: 18 additions & 3 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use rustc_session::cstore::{
use rustc_session::Session;
use rustc_span::hygiene::ExpnIndex;
use rustc_span::symbol::{kw, Ident, Symbol};
use rustc_span::{self, BytePos, ExpnId, Pos, Span, SyntaxContext, DUMMY_SP};
use rustc_span::{self, BytePos, ExpnId, Pos, Span, SpanData, SyntaxContext, DUMMY_SP};

use proc_macro::bridge::client::ProcMacro;
use std::iter::TrustedLen;
Expand Down Expand Up @@ -513,11 +513,26 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnId {

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span {
let mode = SpanEncodingMode::decode(decoder);
let data = match mode {
SpanEncodingMode::Direct => SpanData::decode(decoder),
SpanEncodingMode::Shorthand(position) => decoder.with_position(position, |decoder| {
let mode = SpanEncodingMode::decode(decoder);
debug_assert!(matches!(mode, SpanEncodingMode::Direct));
SpanData::decode(decoder)
}),
};
Span::new(data.lo, data.hi, data.ctxt, data.parent)
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for SpanData {
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> SpanData {
let ctxt = SyntaxContext::decode(decoder);
let tag = u8::decode(decoder);

if tag == TAG_PARTIAL_SPAN {
return DUMMY_SP.with_ctxt(ctxt);
return DUMMY_SP.with_ctxt(ctxt).data();
}

debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
Expand Down Expand Up @@ -612,7 +627,7 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
let hi = hi + source_file.translated_source_file.start_pos;

// Do not try to decode parent for foreign spans.
Span::new(lo, hi, ctxt, None)
SpanData { lo, hi, ctxt, parent: None }
}
}

Expand Down
34 changes: 24 additions & 10 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ use rustc_session::config::{CrateType, OptLevel};
use rustc_session::cstore::{ForeignModule, LinkagePreference, NativeLib};
use rustc_span::hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind};
use rustc_span::symbol::{sym, Symbol};
use rustc_span::{self, ExternalSource, FileName, SourceFile, Span, SyntaxContext};
use rustc_span::{self, ExternalSource, FileName, SourceFile, Span, SpanData, SyntaxContext};
use std::borrow::Borrow;
use std::collections::hash_map::Entry;
use std::hash::Hash;
Expand All @@ -53,6 +53,7 @@ pub(super) struct EncodeContext<'a, 'tcx> {
tables: TableBuilders,

lazy_state: LazyState,
span_shorthands: FxHashMap<Span, usize>,
type_shorthands: FxHashMap<Ty<'tcx>, usize>,
predicate_shorthands: FxHashMap<ty::PredicateKind<'tcx>, usize>,

Expand Down Expand Up @@ -177,8 +178,20 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for ExpnId {

impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
let span = self.data();
match s.span_shorthands.entry(*self) {
Entry::Occupied(o) => SpanEncodingMode::Shorthand(*o.get()).encode(s),
Entry::Vacant(v) => {
let position = s.opaque.position();
v.insert(position);
SpanEncodingMode::Direct.encode(s);
self.data().encode(s);
}
}
}
}

impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
// Don't serialize any `SyntaxContext`s from a proc-macro crate,
// since we don't load proc-macro dependencies during serialization.
// This means that any hygiene information from macros used *within*
Expand Down Expand Up @@ -213,26 +226,26 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
if s.is_proc_macro {
SyntaxContext::root().encode(s);
} else {
span.ctxt.encode(s);
self.ctxt.encode(s);
}

if self.is_dummy() {
return TAG_PARTIAL_SPAN.encode(s);
}

// The Span infrastructure should make sure that this invariant holds:
debug_assert!(span.lo <= span.hi);
debug_assert!(self.lo <= self.hi);

if !s.source_file_cache.0.contains(span.lo) {
if !s.source_file_cache.0.contains(self.lo) {
let source_map = s.tcx.sess.source_map();
let source_file_index = source_map.lookup_source_file_idx(span.lo);
let source_file_index = source_map.lookup_source_file_idx(self.lo);
s.source_file_cache =
(source_map.files()[source_file_index].clone(), source_file_index);
}
let (ref source_file, source_file_index) = s.source_file_cache;
debug_assert!(source_file.contains(span.lo));
debug_assert!(source_file.contains(self.lo));

if !source_file.contains(span.hi) {
if !source_file.contains(self.hi) {
// Unfortunately, macro expansion still sometimes generates Spans
// that malformed in this way.
return TAG_PARTIAL_SPAN.encode(s);
Expand Down Expand Up @@ -286,11 +299,11 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {

// Encode the start position relative to the file start, so we profit more from the
// variable-length integer encoding.
let lo = span.lo - source_file.start_pos;
let lo = self.lo - source_file.start_pos;

// Encode length which is usually less than span.hi and profits more
// from the variable-length integer encoding that we use.
let len = span.hi - span.lo;
let len = self.hi - self.lo;

tag.encode(s);
lo.encode(s);
Expand Down Expand Up @@ -2182,6 +2195,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) {
feat: tcx.features(),
tables: Default::default(),
lazy_state: LazyState::NoNode,
span_shorthands: Default::default(),
type_shorthands: Default::default(),
predicate_shorthands: Default::default(),
source_file_cache,
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ const METADATA_VERSION: u8 = 8;
/// unsigned integer, and further followed by the rustc version string.
pub const METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, METADATA_VERSION];

#[derive(Encodable, Decodable)]
enum SpanEncodingMode {
Shorthand(usize),
Direct,
}

/// A value of type T referred to by its absolute position
/// in the metadata, and which can be decoded lazily.
///
Expand Down

0 comments on commit fdfcdad

Please sign in to comment.