Skip to content

Commit 9568911

Browse files
authored
Rollup merge of rust-lang#65074 - Rantanen:json-byte-pos, r=matklad
Fix the start/end byte positions in the compiler JSON output Track the changes made during normalization in the `SourceFile` and use this information to correct the `start_byte` and `end_byte` fields in the JSON output. This should ensure the start/end byte fields can be used to index the original file, even if Rust normalized the source code for parsing purposes. Both CRLF to LF and BOM removal are handled with this one. The rough plan was discussed with @matklad in rust-lang/rustfix#176 - although I ended up going with `u32` offset tracking so I wouldn't need to deal with `u32 + i32` arithmetics when applying the offset to the span byte positions. Fixes rust-lang#65029
2 parents 1e1f25e + bbf262d commit 9568911

File tree

11 files changed

+403
-22
lines changed

11 files changed

+403
-22
lines changed

src/librustc/ich/impls_syntax.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
424424
ref lines,
425425
ref multibyte_chars,
426426
ref non_narrow_chars,
427+
ref normalized_pos,
427428
} = *self;
428429

429430
(name_hash as u64).hash_stable(hcx, hasher);
@@ -452,6 +453,12 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
452453
for &char_pos in non_narrow_chars.iter() {
453454
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
454455
}
456+
457+
normalized_pos.len().hash_stable(hcx, hasher);
458+
for &char_pos in normalized_pos.iter() {
459+
stable_normalized_pos(char_pos, start_pos).hash_stable(hcx, hasher);
460+
}
461+
455462
}
456463
}
457464

@@ -481,6 +488,18 @@ fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar,
481488
(pos.0 - source_file_start.0, width as u32)
482489
}
483490

491+
fn stable_normalized_pos(np: ::syntax_pos::NormalizedPos,
492+
source_file_start: ::syntax_pos::BytePos)
493+
-> (u32, u32) {
494+
let ::syntax_pos::NormalizedPos {
495+
pos,
496+
diff
497+
} = np;
498+
499+
(pos.0 - source_file_start.0, diff)
500+
}
501+
502+
484503
impl<'tcx> HashStable<StableHashingContext<'tcx>> for feature_gate::Features {
485504
fn hash_stable(&self, hcx: &mut StableHashingContext<'tcx>, hasher: &mut StableHasher) {
486505
// Unfortunately we cannot exhaustively list fields here, since the

src/librustc_metadata/decoder.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1317,6 +1317,7 @@ impl<'a, 'tcx> CrateMetadata {
13171317
mut lines,
13181318
mut multibyte_chars,
13191319
mut non_narrow_chars,
1320+
mut normalized_pos,
13201321
name_hash,
13211322
.. } = source_file_to_import;
13221323

@@ -1336,6 +1337,9 @@ impl<'a, 'tcx> CrateMetadata {
13361337
for swc in &mut non_narrow_chars {
13371338
*swc = *swc - start_pos;
13381339
}
1340+
for np in &mut normalized_pos {
1341+
np.pos = np.pos - start_pos;
1342+
}
13391343

13401344
let local_version = local_source_map.new_imported_source_file(name,
13411345
name_was_remapped,
@@ -1345,7 +1349,8 @@ impl<'a, 'tcx> CrateMetadata {
13451349
source_length,
13461350
lines,
13471351
multibyte_chars,
1348-
non_narrow_chars);
1352+
non_narrow_chars,
1353+
normalized_pos);
13491354
debug!("CrateMetaData::imported_source_files alloc \
13501355
source_file {:?} original (start_pos {:?} end_pos {:?}) \
13511356
translated (start_pos {:?} end_pos {:?})",

src/libsyntax/json.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ use std::sync::{Arc, Mutex};
2525

2626
use rustc_serialize::json::{as_json, as_pretty_json};
2727

28+
#[cfg(test)]
29+
mod tests;
30+
2831
pub struct JsonEmitter {
2932
dst: Box<dyn Write + Send>,
3033
registry: Option<Registry>,
@@ -332,8 +335,8 @@ impl DiagnosticSpan {
332335

333336
DiagnosticSpan {
334337
file_name: start.file.name.to_string(),
335-
byte_start: span.lo().0 - start.file.start_pos.0,
336-
byte_end: span.hi().0 - start.file.start_pos.0,
338+
byte_start: start.file.original_relative_byte_pos(span.lo()).0,
339+
byte_end: start.file.original_relative_byte_pos(span.hi()).0,
337340
line_start: start.line,
338341
line_end: end.line,
339342
column_start: start.col.0 + 1,

src/libsyntax/json/tests.rs

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
use super::*;
2+
3+
use crate::json::JsonEmitter;
4+
use crate::source_map::{FilePathMapping, SourceMap};
5+
use crate::tests::Shared;
6+
use crate::with_default_globals;
7+
8+
use errors::emitter::{ColorConfig, HumanReadableErrorType};
9+
use errors::Handler;
10+
use rustc_serialize::json::decode;
11+
use syntax_pos::{BytePos, Span};
12+
13+
use std::str;
14+
15+
#[derive(RustcDecodable, Debug, PartialEq, Eq)]
16+
struct TestData {
17+
spans: Vec<SpanTestData>,
18+
}
19+
20+
#[derive(RustcDecodable, Debug, PartialEq, Eq)]
21+
struct SpanTestData {
22+
pub byte_start: u32,
23+
pub byte_end: u32,
24+
pub line_start: u32,
25+
pub column_start: u32,
26+
pub line_end: u32,
27+
pub column_end: u32,
28+
}
29+
30+
/// Test the span yields correct positions in JSON.
31+
fn test_positions(code: &str, span: (u32, u32), expected_output: SpanTestData) {
32+
let expected_output = TestData { spans: vec![expected_output] };
33+
34+
with_default_globals(|| {
35+
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
36+
sm.new_source_file(Path::new("test.rs").to_owned().into(), code.to_owned());
37+
38+
let output = Arc::new(Mutex::new(Vec::new()));
39+
let je = JsonEmitter::new(
40+
Box::new(Shared { data: output.clone() }),
41+
None,
42+
sm,
43+
true,
44+
HumanReadableErrorType::Short(ColorConfig::Never),
45+
false,
46+
);
47+
48+
let span = Span::with_root_ctxt(BytePos(span.0), BytePos(span.1));
49+
let handler = Handler::with_emitter(true, None, Box::new(je));
50+
handler.span_err(span, "foo");
51+
52+
let bytes = output.lock().unwrap();
53+
let actual_output = str::from_utf8(&bytes).unwrap();
54+
let actual_output: TestData = decode(actual_output).unwrap();
55+
56+
assert_eq!(expected_output, actual_output)
57+
})
58+
}
59+
60+
#[test]
61+
fn empty() {
62+
test_positions(
63+
" ",
64+
(0, 1),
65+
SpanTestData {
66+
byte_start: 0,
67+
byte_end: 1,
68+
line_start: 1,
69+
column_start: 1,
70+
line_end: 1,
71+
column_end: 2,
72+
},
73+
)
74+
}
75+
76+
#[test]
77+
fn bom() {
78+
test_positions(
79+
"\u{feff} ",
80+
(0, 1),
81+
SpanTestData {
82+
byte_start: 3,
83+
byte_end: 4,
84+
line_start: 1,
85+
column_start: 1,
86+
line_end: 1,
87+
column_end: 2,
88+
},
89+
)
90+
}
91+
92+
#[test]
93+
fn lf_newlines() {
94+
test_positions(
95+
"\nmod foo;\nmod bar;\n",
96+
(5, 12),
97+
SpanTestData {
98+
byte_start: 5,
99+
byte_end: 12,
100+
line_start: 2,
101+
column_start: 5,
102+
line_end: 3,
103+
column_end: 3,
104+
},
105+
)
106+
}
107+
108+
#[test]
109+
fn crlf_newlines() {
110+
test_positions(
111+
"\r\nmod foo;\r\nmod bar;\r\n",
112+
(5, 12),
113+
SpanTestData {
114+
byte_start: 6,
115+
byte_end: 14,
116+
line_start: 2,
117+
column_start: 5,
118+
line_end: 3,
119+
column_end: 3,
120+
},
121+
)
122+
}
123+
124+
#[test]
125+
fn crlf_newlines_with_bom() {
126+
test_positions(
127+
"\u{feff}\r\nmod foo;\r\nmod bar;\r\n",
128+
(5, 12),
129+
SpanTestData {
130+
byte_start: 9,
131+
byte_end: 17,
132+
line_start: 2,
133+
column_start: 5,
134+
line_end: 3,
135+
column_end: 3,
136+
},
137+
)
138+
}
139+
140+
#[test]
141+
fn span_before_crlf() {
142+
test_positions(
143+
"foo\r\nbar",
144+
(2, 3),
145+
SpanTestData {
146+
byte_start: 2,
147+
byte_end: 3,
148+
line_start: 1,
149+
column_start: 3,
150+
line_end: 1,
151+
column_end: 4,
152+
},
153+
)
154+
}
155+
156+
#[test]
157+
fn span_on_crlf() {
158+
test_positions(
159+
"foo\r\nbar",
160+
(3, 4),
161+
SpanTestData {
162+
byte_start: 3,
163+
byte_end: 5,
164+
line_start: 1,
165+
column_start: 4,
166+
line_end: 2,
167+
column_end: 1,
168+
},
169+
)
170+
}
171+
172+
#[test]
173+
fn span_after_crlf() {
174+
test_positions(
175+
"foo\r\nbar",
176+
(4, 5),
177+
SpanTestData {
178+
byte_start: 5,
179+
byte_end: 6,
180+
line_start: 2,
181+
column_start: 1,
182+
line_end: 2,
183+
column_end: 2,
184+
},
185+
)
186+
}

src/libsyntax/source_map.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ impl SourceMap {
283283
mut file_local_lines: Vec<BytePos>,
284284
mut file_local_multibyte_chars: Vec<MultiByteChar>,
285285
mut file_local_non_narrow_chars: Vec<NonNarrowChar>,
286+
mut file_local_normalized_pos: Vec<NormalizedPos>,
286287
) -> Lrc<SourceFile> {
287288
let start_pos = self.next_start_pos();
288289

@@ -301,6 +302,10 @@ impl SourceMap {
301302
*swc = *swc + start_pos;
302303
}
303304

305+
for nc in &mut file_local_normalized_pos {
306+
nc.pos = nc.pos + start_pos;
307+
}
308+
304309
let source_file = Lrc::new(SourceFile {
305310
name: filename,
306311
name_was_remapped,
@@ -314,6 +319,7 @@ impl SourceMap {
314319
lines: file_local_lines,
315320
multibyte_chars: file_local_multibyte_chars,
316321
non_narrow_chars: file_local_non_narrow_chars,
322+
normalized_pos: file_local_normalized_pos,
317323
name_hash,
318324
});
319325

src/libsyntax/tests.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ struct SpanLabel {
110110
label: &'static str,
111111
}
112112

113-
struct Shared<T: Write> {
114-
data: Arc<Mutex<T>>,
113+
crate struct Shared<T: Write> {
114+
pub data: Arc<Mutex<T>>,
115115
}
116116

117117
impl<T: Write> Write for Shared<T> {

0 commit comments

Comments
 (0)