Skip to content

Commit 51ca7ef

Browse files
committed
Response file parsing for GCC and Clang
This fixes mozilla#1780 This moves the response file parsing code implemented by @temportalflux out of the MSVC compiler and shares it for all compilers. This enables GCC and Clang to use response files with quoted arguments. I've added some lib tests to the response file module, so that it can be reasoned about, separately from the compilers using it.
1 parent ab9f113 commit 51ca7ef

File tree

4 files changed

+175
-130
lines changed

4 files changed

+175
-130
lines changed

src/compiler/gcc.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use crate::compiler::c::{
1717
ArtifactDescriptor, CCompilerImpl, CCompilerKind, Language, ParsedArguments,
1818
};
1919
use crate::compiler::{clang, Cacheable, ColorMode, CompileCommand, CompilerArguments};
20+
use crate::compiler::response_file::SplitResponseFileArgs;
2021
use crate::mock_command::{CommandCreatorSync, RunCommand};
2122
use crate::util::{run_input_output, OsStrExt};
2223
use crate::{counted_array, dist};
@@ -887,11 +888,11 @@ impl<'a> Iterator for ExpandIncludeFile<'a> {
887888
debug!("failed to read @-file `{}`: {}", file.display(), e);
888889
return Some(arg);
889890
}
890-
if contents.contains('"') || contents.contains('\'') {
891-
return Some(arg);
892-
}
893-
let new_args = contents.split_whitespace().collect::<Vec<_>>();
894-
self.stack.extend(new_args.iter().rev().map(|s| s.into()));
891+
// Parse the response file contents, taking into account quote-wrapped strings and new-line separators.
892+
let resp_file_args = SplitResponseFileArgs::from(&contents).collect::<Vec<_>>();
893+
// Pump arguments back to the stack, in reverse order so we can `Vec::pop` and visit in original front-to-back order.
894+
let rev_args = resp_file_args.iter().rev().map(|s| s.into());
895+
self.stack.extend(rev_args);
895896
}
896897
}
897898
}

src/compiler/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ mod diab;
2323
mod gcc;
2424
mod msvc;
2525
mod nvcc;
26+
mod response_file;
2627
mod rust;
2728
mod tasking_vx;
2829
#[macro_use]

src/compiler/msvc.rs

Lines changed: 2 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use crate::compiler::c::{
1919
use crate::compiler::{
2020
clang, gcc, write_temp_file, Cacheable, ColorMode, CompileCommand, CompilerArguments,
2121
};
22+
use crate::compiler::response_file::SplitResponseFileArgs;
2223
use crate::mock_command::{CommandCreatorSync, RunCommand};
2324
use crate::util::{run_input_output, OsStrExt};
2425
use crate::{counted_array, dist};
@@ -1196,8 +1197,7 @@ impl<'a> Iterator for ExpandIncludeFile<'a> {
11961197
trace!("Expanded response file {:?} to {:?}", file_path, content);
11971198

11981199
// Parse the response file contents, taking into account quote-wrapped strings and new-line separators.
1199-
// Special implementation to account for MSVC response file format.
1200-
let resp_file_args = SplitMsvcResponseFileArgs::from(&content).collect::<Vec<_>>();
1200+
let resp_file_args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
12011201
// Pump arguments back to the stack, in reverse order so we can `Vec::pop` and visit in original front-to-back order.
12021202
let rev_args = resp_file_args.iter().rev().map(|s| s.into());
12031203
self.stack.extend(rev_args);
@@ -1222,129 +1222,6 @@ where
12221222
result.map_err(|err| io::Error::new(io::ErrorKind::Other, err.into_owned()))
12231223
}
12241224

1225-
/// An iterator over the arguments in a Windows command line.
1226-
///
1227-
/// This produces results identical to `CommandLineToArgvW` except in the
1228-
/// following cases:
1229-
///
1230-
/// 1. When passed an empty string, CommandLineToArgvW returns the path to the
1231-
/// current executable file. Here, the iterator will simply be empty.
1232-
/// 2. CommandLineToArgvW interprets the first argument differently than the
1233-
/// rest. Here, all arguments are treated in identical fashion.
1234-
///
1235-
/// Parsing rules:
1236-
///
1237-
/// - Arguments are delimited by whitespace (either a space or tab).
1238-
/// - A string surrounded by double quotes is interpreted as a single argument.
1239-
/// - Backslashes are interpreted literally unless followed by a double quote.
1240-
/// - 2n backslashes followed by a double quote reduce to n backslashes and we
1241-
/// enter the "in quote" state.
1242-
/// - 2n+1 backslashes followed by a double quote reduces to n backslashes,
1243-
/// we do *not* enter the "in quote" state, and the double quote is
1244-
/// interpreted literally.
1245-
///
1246-
/// References:
1247-
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx
1248-
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
1249-
#[derive(Clone, Debug)]
1250-
struct SplitMsvcResponseFileArgs<'a> {
1251-
/// String slice of the file content that is being parsed.
1252-
/// Slice is mutated as this iterator is executed.
1253-
file_content: &'a str,
1254-
}
1255-
1256-
impl<'a, T> From<&'a T> for SplitMsvcResponseFileArgs<'a>
1257-
where
1258-
T: AsRef<str> + 'static,
1259-
{
1260-
fn from(file_content: &'a T) -> Self {
1261-
Self {
1262-
file_content: file_content.as_ref(),
1263-
}
1264-
}
1265-
}
1266-
1267-
impl<'a> SplitMsvcResponseFileArgs<'a> {
1268-
/// Appends backslashes to `target` by decrementing `count`.
1269-
/// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`.
1270-
fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) {
1271-
while *count >= step {
1272-
target.push('\\');
1273-
*count -= step;
1274-
}
1275-
}
1276-
}
1277-
1278-
impl<'a> Iterator for SplitMsvcResponseFileArgs<'a> {
1279-
type Item = String;
1280-
1281-
fn next(&mut self) -> Option<String> {
1282-
let mut in_quotes = false;
1283-
let mut backslash_count: usize = 0;
1284-
1285-
// Strip any leading whitespace before relevant characters
1286-
let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r');
1287-
self.file_content = self.file_content.trim_start_matches(is_whitespace);
1288-
1289-
if self.file_content.is_empty() {
1290-
return None;
1291-
}
1292-
1293-
// The argument string to return, built by analyzing the current slice in the iterator.
1294-
let mut arg = String::new();
1295-
// All characters still in the string slice. Will be mutated by consuming
1296-
// values until the current arg is built.
1297-
let mut chars = self.file_content.chars();
1298-
// Build the argument by evaluating each character in the string slice.
1299-
for c in &mut chars {
1300-
match c {
1301-
// In order to handle the escape character based on the char(s) which come after it,
1302-
// they are counted instead of appended literally, until a non-backslash character is encountered.
1303-
'\\' => backslash_count += 1,
1304-
// Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped).
1305-
'"' => {
1306-
// Only append half the number of backslashes encountered, because this is an escaped string.
1307-
// This will reduce `backslash_count` to either 0 or 1.
1308-
Self::append_backslashes_to(&mut arg, &mut backslash_count, 2);
1309-
match backslash_count == 0 {
1310-
// If there are no remaining encountered backslashes,
1311-
// then we have found either the start or end of a quoted argument.
1312-
true => in_quotes = !in_quotes,
1313-
// The quote character is escaped, so it is treated as a literal and appended to the arg string.
1314-
false => {
1315-
backslash_count = 0;
1316-
arg.push('"');
1317-
}
1318-
}
1319-
}
1320-
// If whitespace is encountered, only preserve it if we are currently in quotes.
1321-
// Otherwise it marks the end of the current argument.
1322-
' ' | '\t' | '\n' | '\r' => {
1323-
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
1324-
// If not in a quoted string, then this is the end of the argument.
1325-
if !in_quotes {
1326-
break;
1327-
}
1328-
// Otherwise, the whitespace must be preserved in the argument.
1329-
arg.push(c);
1330-
}
1331-
// All other characters treated as is
1332-
_ => {
1333-
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
1334-
arg.push(c);
1335-
}
1336-
}
1337-
}
1338-
1339-
// Flush any backslashes at the end of the string.
1340-
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
1341-
// Save the current remaining characters for the next step in the iterator.
1342-
self.file_content = chars.as_str();
1343-
1344-
Some(arg)
1345-
}
1346-
}
1347-
13481225
#[cfg(test)]
13491226
mod test {
13501227
use std::str::FromStr;

src/compiler/response_file.rs

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
2+
/// An iterator over the arguments in a response file.
3+
///
4+
/// This produces results identical to `CommandLineToArgvW` except in the
5+
/// following cases:
6+
///
7+
/// 1. When passed an empty string, CommandLineToArgvW returns the path to the
8+
/// current executable file. Here, the iterator will simply be empty.
9+
/// 2. CommandLineToArgvW interprets the first argument differently than the
10+
/// rest. Here, all arguments are treated in identical fashion.
11+
///
12+
/// Parsing rules:
13+
///
14+
/// - Arguments are delimited by whitespace (either a space or tab).
15+
/// - A string surrounded by double quotes is interpreted as a single argument.
16+
/// - Backslashes are interpreted literally unless followed by a double quote.
17+
/// - 2n backslashes followed by a double quote reduce to n backslashes and we
18+
/// enter the "in quote" state.
19+
/// - 2n+1 backslashes followed by a double quote reduces to n backslashes,
20+
/// we do *not* enter the "in quote" state, and the double quote is
21+
/// interpreted literally.
22+
///
23+
/// References:
24+
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx
25+
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
26+
#[derive(Clone, Debug)]
27+
pub struct SplitResponseFileArgs<'a> {
28+
/// String slice of the file content that is being parsed.
29+
/// Slice is mutated as this iterator is executed.
30+
file_content: &'a str,
31+
}
32+
33+
impl<'a, T> From<&'a T> for SplitResponseFileArgs<'a>
34+
where
35+
T: AsRef<str> + 'static,
36+
{
37+
fn from(file_content: &'a T) -> Self {
38+
Self {
39+
file_content: file_content.as_ref(),
40+
}
41+
}
42+
}
43+
44+
impl<'a> SplitResponseFileArgs<'a> {
45+
/// Appends backslashes to `target` by decrementing `count`.
46+
/// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`.
47+
fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) {
48+
while *count >= step {
49+
target.push('\\');
50+
*count -= step;
51+
}
52+
}
53+
}
54+
55+
impl<'a> Iterator for SplitResponseFileArgs<'a> {
56+
type Item = String;
57+
58+
fn next(&mut self) -> Option<String> {
59+
let mut in_quotes = false;
60+
let mut backslash_count: usize = 0;
61+
62+
// Strip any leading whitespace before relevant characters
63+
let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r');
64+
self.file_content = self.file_content.trim_start_matches(is_whitespace);
65+
66+
if self.file_content.is_empty() {
67+
return None;
68+
}
69+
70+
// The argument string to return, built by analyzing the current slice in the iterator.
71+
let mut arg = String::new();
72+
// All characters still in the string slice. Will be mutated by consuming
73+
// values until the current arg is built.
74+
let mut chars = self.file_content.chars();
75+
// Build the argument by evaluating each character in the string slice.
76+
for c in &mut chars {
77+
match c {
78+
// In order to handle the escape character based on the char(s) which come after it,
79+
// they are counted instead of appended literally, until a non-backslash character is encountered.
80+
'\\' => backslash_count += 1,
81+
// Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped).
82+
'"' => {
83+
// Only append half the number of backslashes encountered, because this is an escaped string.
84+
// This will reduce `backslash_count` to either 0 or 1.
85+
Self::append_backslashes_to(&mut arg, &mut backslash_count, 2);
86+
match backslash_count == 0 {
87+
// If there are no remaining encountered backslashes,
88+
// then we have found either the start or end of a quoted argument.
89+
true => in_quotes = !in_quotes,
90+
// The quote character is escaped, so it is treated as a literal and appended to the arg string.
91+
false => {
92+
backslash_count = 0;
93+
arg.push('"');
94+
}
95+
}
96+
}
97+
// If whitespace is encountered, only preserve it if we are currently in quotes.
98+
// Otherwise it marks the end of the current argument.
99+
' ' | '\t' | '\n' | '\r' => {
100+
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
101+
// If not in a quoted string, then this is the end of the argument.
102+
if !in_quotes {
103+
break;
104+
}
105+
// Otherwise, the whitespace must be preserved in the argument.
106+
arg.push(c);
107+
}
108+
// All other characters treated as is
109+
_ => {
110+
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
111+
arg.push(c);
112+
}
113+
}
114+
}
115+
116+
// Flush any backslashes at the end of the string.
117+
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
118+
// Save the current remaining characters for the next step in the iterator.
119+
self.file_content = chars.as_str();
120+
121+
Some(arg)
122+
}
123+
}
124+
125+
#[cfg(test)]
126+
mod test {
127+
use super::*;
128+
129+
#[test]
130+
fn parse_simple_args() {
131+
let content = "-A1 -A2 -A3 -I ../includes";
132+
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
133+
assert_eq!(args[0], "-A1");
134+
assert_eq!(args[1], "-A2");
135+
assert_eq!(args[2], "-A3");
136+
assert_eq!(args[3], "-I");
137+
assert_eq!(args[4], "../includes");
138+
}
139+
140+
#[test]
141+
fn parse_quoted_path_arg() {
142+
let content = "-I \"../included headers\"";
143+
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
144+
assert_eq!(args[0], "-I");
145+
assert_eq!(args[1], "../included headers");
146+
}
147+
148+
#[test]
149+
fn parse_escaped_quoted_path_arg() {
150+
let content = "-I \"../included \\\"headers\\\"\"";
151+
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
152+
assert_eq!(args[0], "-I");
153+
assert_eq!(args[1], "../included \"headers\"");
154+
}
155+
156+
#[test]
157+
fn parse_various_whitespace_characters() {
158+
let content = "-A1 -A2\n-A3\n\r-A4\r-A5\n ";
159+
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
160+
assert_eq!(args[0], "-A1");
161+
assert_eq!(args[1], "-A2");
162+
assert_eq!(args[2], "-A3");
163+
assert_eq!(args[3], "-A4");
164+
assert_eq!(args[4], "-A5");
165+
}
166+
}

0 commit comments

Comments
 (0)