Skip to content

Commit 1f93be1

Browse files
authored
Rollup merge of #65074 - Rantanen:json-byte-pos, r=matklad
Fix the start/end byte positions in the compiler JSON output Track the changes made during normalization in the `SourceFile` and use this information to correct the `start_byte` and `end_byte` fields in the JSON output. This should ensure the start/end byte fields can be used to index the original file, even if Rust normalized the source code for parsing purposes. Both CRLF to LF and BOM removal are handled with this one. The rough plan was discussed with @matklad in rust-lang/rustfix#176 - although I ended up going with `u32` offset tracking so I wouldn't need to deal with `u32 + i32` arithmetics when applying the offset to the span byte positions. Fixes #65029
2 parents 959b6e3 + ff1860a commit 1f93be1

14 files changed

+543
-22
lines changed

src/librustc/ich/impls_syntax.rs

+19
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
425425
ref lines,
426426
ref multibyte_chars,
427427
ref non_narrow_chars,
428+
ref normalized_pos,
428429
} = *self;
429430

430431
(name_hash as u64).hash_stable(hcx, hasher);
@@ -453,6 +454,12 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
453454
for &char_pos in non_narrow_chars.iter() {
454455
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
455456
}
457+
458+
normalized_pos.len().hash_stable(hcx, hasher);
459+
for &char_pos in normalized_pos.iter() {
460+
stable_normalized_pos(char_pos, start_pos).hash_stable(hcx, hasher);
461+
}
462+
456463
}
457464
}
458465

@@ -482,6 +489,18 @@ fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar,
482489
(pos.0 - source_file_start.0, width as u32)
483490
}
484491

492+
fn stable_normalized_pos(np: ::syntax_pos::NormalizedPos,
493+
source_file_start: ::syntax_pos::BytePos)
494+
-> (u32, u32) {
495+
let ::syntax_pos::NormalizedPos {
496+
pos,
497+
diff
498+
} = np;
499+
500+
(pos.0 - source_file_start.0, diff)
501+
}
502+
503+
485504
impl<'tcx> HashStable<StableHashingContext<'tcx>> for feature_gate::Features {
486505
fn hash_stable(&self, hcx: &mut StableHashingContext<'tcx>, hasher: &mut StableHasher) {
487506
// Unfortunately we cannot exhaustively list fields here, since the

src/librustc_metadata/decoder.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,7 @@ impl<'a, 'tcx> CrateMetadata {
13191319
mut lines,
13201320
mut multibyte_chars,
13211321
mut non_narrow_chars,
1322+
mut normalized_pos,
13221323
name_hash,
13231324
.. } = source_file_to_import;
13241325

@@ -1338,6 +1339,9 @@ impl<'a, 'tcx> CrateMetadata {
13381339
for swc in &mut non_narrow_chars {
13391340
*swc = *swc - start_pos;
13401341
}
1342+
for np in &mut normalized_pos {
1343+
np.pos = np.pos - start_pos;
1344+
}
13411345

13421346
let local_version = local_source_map.new_imported_source_file(name,
13431347
name_was_remapped,
@@ -1347,7 +1351,8 @@ impl<'a, 'tcx> CrateMetadata {
13471351
source_length,
13481352
lines,
13491353
multibyte_chars,
1350-
non_narrow_chars);
1354+
non_narrow_chars,
1355+
normalized_pos);
13511356
debug!("CrateMetaData::imported_source_files alloc \
13521357
source_file {:?} original (start_pos {:?} end_pos {:?}) \
13531358
translated (start_pos {:?} end_pos {:?})",

src/libsyntax/json.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ use std::sync::{Arc, Mutex};
2525

2626
use rustc_serialize::json::{as_json, as_pretty_json};
2727

28+
#[cfg(test)]
29+
mod tests;
30+
2831
pub struct JsonEmitter {
2932
dst: Box<dyn Write + Send>,
3033
registry: Option<Registry>,
@@ -336,8 +339,8 @@ impl DiagnosticSpan {
336339

337340
DiagnosticSpan {
338341
file_name: start.file.name.to_string(),
339-
byte_start: span.lo().0 - start.file.start_pos.0,
340-
byte_end: span.hi().0 - start.file.start_pos.0,
342+
byte_start: start.file.original_relative_byte_pos(span.lo()).0,
343+
byte_end: start.file.original_relative_byte_pos(span.hi()).0,
341344
line_start: start.line,
342345
line_end: end.line,
343346
column_start: start.col.0 + 1,

src/libsyntax/json/tests.rs

+186
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
use super::*;
2+
3+
use crate::json::JsonEmitter;
4+
use crate::source_map::{FilePathMapping, SourceMap};
5+
use crate::tests::Shared;
6+
use crate::with_default_globals;
7+
8+
use errors::emitter::{ColorConfig, HumanReadableErrorType};
9+
use errors::Handler;
10+
use rustc_serialize::json::decode;
11+
use syntax_pos::{BytePos, Span};
12+
13+
use std::str;
14+
15+
#[derive(RustcDecodable, Debug, PartialEq, Eq)]
16+
struct TestData {
17+
spans: Vec<SpanTestData>,
18+
}
19+
20+
#[derive(RustcDecodable, Debug, PartialEq, Eq)]
21+
struct SpanTestData {
22+
pub byte_start: u32,
23+
pub byte_end: u32,
24+
pub line_start: u32,
25+
pub column_start: u32,
26+
pub line_end: u32,
27+
pub column_end: u32,
28+
}
29+
30+
/// Test the span yields correct positions in JSON.
31+
fn test_positions(code: &str, span: (u32, u32), expected_output: SpanTestData) {
32+
let expected_output = TestData { spans: vec![expected_output] };
33+
34+
with_default_globals(|| {
35+
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
36+
sm.new_source_file(Path::new("test.rs").to_owned().into(), code.to_owned());
37+
38+
let output = Arc::new(Mutex::new(Vec::new()));
39+
let je = JsonEmitter::new(
40+
Box::new(Shared { data: output.clone() }),
41+
None,
42+
sm,
43+
true,
44+
HumanReadableErrorType::Short(ColorConfig::Never),
45+
false,
46+
);
47+
48+
let span = Span::with_root_ctxt(BytePos(span.0), BytePos(span.1));
49+
let handler = Handler::with_emitter(true, None, Box::new(je));
50+
handler.span_err(span, "foo");
51+
52+
let bytes = output.lock().unwrap();
53+
let actual_output = str::from_utf8(&bytes).unwrap();
54+
let actual_output: TestData = decode(actual_output).unwrap();
55+
56+
assert_eq!(expected_output, actual_output)
57+
})
58+
}
59+
60+
#[test]
61+
fn empty() {
62+
test_positions(
63+
" ",
64+
(0, 1),
65+
SpanTestData {
66+
byte_start: 0,
67+
byte_end: 1,
68+
line_start: 1,
69+
column_start: 1,
70+
line_end: 1,
71+
column_end: 2,
72+
},
73+
)
74+
}
75+
76+
#[test]
77+
fn bom() {
78+
test_positions(
79+
"\u{feff} ",
80+
(0, 1),
81+
SpanTestData {
82+
byte_start: 3,
83+
byte_end: 4,
84+
line_start: 1,
85+
column_start: 1,
86+
line_end: 1,
87+
column_end: 2,
88+
},
89+
)
90+
}
91+
92+
#[test]
93+
fn lf_newlines() {
94+
test_positions(
95+
"\nmod foo;\nmod bar;\n",
96+
(5, 12),
97+
SpanTestData {
98+
byte_start: 5,
99+
byte_end: 12,
100+
line_start: 2,
101+
column_start: 5,
102+
line_end: 3,
103+
column_end: 3,
104+
},
105+
)
106+
}
107+
108+
#[test]
109+
fn crlf_newlines() {
110+
test_positions(
111+
"\r\nmod foo;\r\nmod bar;\r\n",
112+
(5, 12),
113+
SpanTestData {
114+
byte_start: 6,
115+
byte_end: 14,
116+
line_start: 2,
117+
column_start: 5,
118+
line_end: 3,
119+
column_end: 3,
120+
},
121+
)
122+
}
123+
124+
#[test]
125+
fn crlf_newlines_with_bom() {
126+
test_positions(
127+
"\u{feff}\r\nmod foo;\r\nmod bar;\r\n",
128+
(5, 12),
129+
SpanTestData {
130+
byte_start: 9,
131+
byte_end: 17,
132+
line_start: 2,
133+
column_start: 5,
134+
line_end: 3,
135+
column_end: 3,
136+
},
137+
)
138+
}
139+
140+
#[test]
141+
fn span_before_crlf() {
142+
test_positions(
143+
"foo\r\nbar",
144+
(2, 3),
145+
SpanTestData {
146+
byte_start: 2,
147+
byte_end: 3,
148+
line_start: 1,
149+
column_start: 3,
150+
line_end: 1,
151+
column_end: 4,
152+
},
153+
)
154+
}
155+
156+
#[test]
157+
fn span_on_crlf() {
158+
test_positions(
159+
"foo\r\nbar",
160+
(3, 4),
161+
SpanTestData {
162+
byte_start: 3,
163+
byte_end: 5,
164+
line_start: 1,
165+
column_start: 4,
166+
line_end: 2,
167+
column_end: 1,
168+
},
169+
)
170+
}
171+
172+
#[test]
173+
fn span_after_crlf() {
174+
test_positions(
175+
"foo\r\nbar",
176+
(4, 5),
177+
SpanTestData {
178+
byte_start: 5,
179+
byte_end: 6,
180+
line_start: 2,
181+
column_start: 1,
182+
line_end: 2,
183+
column_end: 2,
184+
},
185+
)
186+
}

src/libsyntax/source_map.rs

+6
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ impl SourceMap {
283283
mut file_local_lines: Vec<BytePos>,
284284
mut file_local_multibyte_chars: Vec<MultiByteChar>,
285285
mut file_local_non_narrow_chars: Vec<NonNarrowChar>,
286+
mut file_local_normalized_pos: Vec<NormalizedPos>,
286287
) -> Lrc<SourceFile> {
287288
let start_pos = self.next_start_pos();
288289

@@ -301,6 +302,10 @@ impl SourceMap {
301302
*swc = *swc + start_pos;
302303
}
303304

305+
for nc in &mut file_local_normalized_pos {
306+
nc.pos = nc.pos + start_pos;
307+
}
308+
304309
let source_file = Lrc::new(SourceFile {
305310
name: filename,
306311
name_was_remapped,
@@ -314,6 +319,7 @@ impl SourceMap {
314319
lines: file_local_lines,
315320
multibyte_chars: file_local_multibyte_chars,
316321
non_narrow_chars: file_local_non_narrow_chars,
322+
normalized_pos: file_local_normalized_pos,
317323
name_hash,
318324
});
319325

src/libsyntax/tests.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,8 @@ struct SpanLabel {
111111
label: &'static str,
112112
}
113113

114-
struct Shared<T: Write> {
115-
data: Arc<Mutex<T>>,
114+
crate struct Shared<T: Write> {
115+
pub data: Arc<Mutex<T>>,
116116
}
117117

118118
impl<T: Write> Write for Shared<T> {

0 commit comments

Comments
 (0)