Skip to content

Commit 27b601f

Browse files
committed
Response file parsing for GCC and Clang
This fixes #1780 This moves the response file parsing code implemented by @temportalflux out of the MSVC compiler and shares it for all compilers. This enables GCC and Clang to use response files with quoted arguments. I've added some lib tests to the response file module, so that it can be reasoned about, separately from the compilers using it.
1 parent 100d26a commit 27b601f

File tree

4 files changed

+174
-130
lines changed

4 files changed

+174
-130
lines changed

src/compiler/gcc.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use crate::compiler::args::*;
1616
use crate::compiler::c::{
1717
ArtifactDescriptor, CCompilerImpl, CCompilerKind, Language, ParsedArguments,
1818
};
19+
use crate::compiler::response_file::SplitResponseFileArgs;
1920
use crate::compiler::{clang, Cacheable, ColorMode, CompileCommand, CompilerArguments};
2021
use crate::mock_command::{CommandCreatorSync, RunCommand};
2122
use crate::util::{run_input_output, OsStrExt};
@@ -887,11 +888,11 @@ impl<'a> Iterator for ExpandIncludeFile<'a> {
887888
debug!("failed to read @-file `{}`: {}", file.display(), e);
888889
return Some(arg);
889890
}
890-
if contents.contains('"') || contents.contains('\'') {
891-
return Some(arg);
892-
}
893-
let new_args = contents.split_whitespace().collect::<Vec<_>>();
894-
self.stack.extend(new_args.iter().rev().map(|s| s.into()));
891+
// Parse the response file contents, taking into account quote-wrapped strings and new-line separators.
892+
let resp_file_args = SplitResponseFileArgs::from(&contents).collect::<Vec<_>>();
893+
// Pump arguments back to the stack, in reverse order so we can `Vec::pop` and visit in original front-to-back order.
894+
let rev_args = resp_file_args.iter().rev().map(|s| s.into());
895+
self.stack.extend(rev_args);
895896
}
896897
}
897898
}

src/compiler/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ mod diab;
2323
mod gcc;
2424
mod msvc;
2525
mod nvcc;
26+
mod response_file;
2627
mod rust;
2728
mod tasking_vx;
2829
#[macro_use]

src/compiler/msvc.rs

Lines changed: 2 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use crate::compiler::args::*;
1616
use crate::compiler::c::{
1717
ArtifactDescriptor, CCompilerImpl, CCompilerKind, Language, ParsedArguments,
1818
};
19+
use crate::compiler::response_file::SplitResponseFileArgs;
1920
use crate::compiler::{
2021
clang, gcc, write_temp_file, Cacheable, ColorMode, CompileCommand, CompilerArguments,
2122
};
@@ -1174,8 +1175,7 @@ impl<'a> Iterator for ExpandIncludeFile<'a> {
11741175
trace!("Expanded response file {:?} to {:?}", file_path, content);
11751176

11761177
// Parse the response file contents, taking into account quote-wrapped strings and new-line separators.
1177-
// Special implementation to account for MSVC response file format.
1178-
let resp_file_args = SplitMsvcResponseFileArgs::from(&content).collect::<Vec<_>>();
1178+
let resp_file_args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
11791179
// Pump arguments back to the stack, in reverse order so we can `Vec::pop` and visit in original front-to-back order.
11801180
let rev_args = resp_file_args.iter().rev().map(|s| s.into());
11811181
self.stack.extend(rev_args);
@@ -1200,129 +1200,6 @@ where
12001200
result.map_err(|err| io::Error::new(io::ErrorKind::Other, err.into_owned()))
12011201
}
12021202

1203-
/// An iterator over the arguments in a Windows command line.
1204-
///
1205-
/// This produces results identical to `CommandLineToArgvW` except in the
1206-
/// following cases:
1207-
///
1208-
/// 1. When passed an empty string, CommandLineToArgvW returns the path to the
1209-
/// current executable file. Here, the iterator will simply be empty.
1210-
/// 2. CommandLineToArgvW interprets the first argument differently than the
1211-
/// rest. Here, all arguments are treated in identical fashion.
1212-
///
1213-
/// Parsing rules:
1214-
///
1215-
/// - Arguments are delimited by whitespace (either a space or tab).
1216-
/// - A string surrounded by double quotes is interpreted as a single argument.
1217-
/// - Backslashes are interpreted literally unless followed by a double quote.
1218-
/// - 2n backslashes followed by a double quote reduce to n backslashes and we
1219-
/// enter the "in quote" state.
1220-
/// - 2n+1 backslashes followed by a double quote reduces to n backslashes,
1221-
/// we do *not* enter the "in quote" state, and the double quote is
1222-
/// interpreted literally.
1223-
///
1224-
/// References:
1225-
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx
1226-
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
1227-
#[derive(Clone, Debug)]
1228-
struct SplitMsvcResponseFileArgs<'a> {
1229-
/// String slice of the file content that is being parsed.
1230-
/// Slice is mutated as this iterator is executed.
1231-
file_content: &'a str,
1232-
}
1233-
1234-
impl<'a, T> From<&'a T> for SplitMsvcResponseFileArgs<'a>
1235-
where
1236-
T: AsRef<str> + 'static,
1237-
{
1238-
fn from(file_content: &'a T) -> Self {
1239-
Self {
1240-
file_content: file_content.as_ref(),
1241-
}
1242-
}
1243-
}
1244-
1245-
impl<'a> SplitMsvcResponseFileArgs<'a> {
1246-
/// Appends backslashes to `target` by decrementing `count`.
1247-
/// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`.
1248-
fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) {
1249-
while *count >= step {
1250-
target.push('\\');
1251-
*count -= step;
1252-
}
1253-
}
1254-
}
1255-
1256-
impl<'a> Iterator for SplitMsvcResponseFileArgs<'a> {
1257-
type Item = String;
1258-
1259-
fn next(&mut self) -> Option<String> {
1260-
let mut in_quotes = false;
1261-
let mut backslash_count: usize = 0;
1262-
1263-
// Strip any leading whitespace before relevant characters
1264-
let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r');
1265-
self.file_content = self.file_content.trim_start_matches(is_whitespace);
1266-
1267-
if self.file_content.is_empty() {
1268-
return None;
1269-
}
1270-
1271-
// The argument string to return, built by analyzing the current slice in the iterator.
1272-
let mut arg = String::new();
1273-
// All characters still in the string slice. Will be mutated by consuming
1274-
// values until the current arg is built.
1275-
let mut chars = self.file_content.chars();
1276-
// Build the argument by evaluating each character in the string slice.
1277-
for c in &mut chars {
1278-
match c {
1279-
// In order to handle the escape character based on the char(s) which come after it,
1280-
// they are counted instead of appended literally, until a non-backslash character is encountered.
1281-
'\\' => backslash_count += 1,
1282-
// Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped).
1283-
'"' => {
1284-
// Only append half the number of backslashes encountered, because this is an escaped string.
1285-
// This will reduce `backslash_count` to either 0 or 1.
1286-
Self::append_backslashes_to(&mut arg, &mut backslash_count, 2);
1287-
match backslash_count == 0 {
1288-
// If there are no remaining encountered backslashes,
1289-
// then we have found either the start or end of a quoted argument.
1290-
true => in_quotes = !in_quotes,
1291-
// The quote character is escaped, so it is treated as a literal and appended to the arg string.
1292-
false => {
1293-
backslash_count = 0;
1294-
arg.push('"');
1295-
}
1296-
}
1297-
}
1298-
// If whitespace is encountered, only preserve it if we are currently in quotes.
1299-
// Otherwise it marks the end of the current argument.
1300-
' ' | '\t' | '\n' | '\r' => {
1301-
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
1302-
// If not in a quoted string, then this is the end of the argument.
1303-
if !in_quotes {
1304-
break;
1305-
}
1306-
// Otherwise, the whitespace must be preserved in the argument.
1307-
arg.push(c);
1308-
}
1309-
// All other characters treated as is
1310-
_ => {
1311-
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
1312-
arg.push(c);
1313-
}
1314-
}
1315-
}
1316-
1317-
// Flush any backslashes at the end of the string.
1318-
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
1319-
// Save the current remaining characters for the next step in the iterator.
1320-
self.file_content = chars.as_str();
1321-
1322-
Some(arg)
1323-
}
1324-
}
1325-
13261203
#[cfg(test)]
13271204
mod test {
13281205
use std::str::FromStr;

src/compiler/response_file.rs

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/// An iterator over the arguments in a response file.
2+
///
3+
/// This produces results identical to `CommandLineToArgvW` except in the
4+
/// following cases:
5+
///
6+
/// 1. When passed an empty string, CommandLineToArgvW returns the path to the
7+
/// current executable file. Here, the iterator will simply be empty.
8+
/// 2. CommandLineToArgvW interprets the first argument differently than the
9+
/// rest. Here, all arguments are treated in identical fashion.
10+
///
11+
/// Parsing rules:
12+
///
13+
/// - Arguments are delimited by whitespace (either a space or tab).
14+
/// - A string surrounded by double quotes is interpreted as a single argument.
15+
/// - Backslashes are interpreted literally unless followed by a double quote.
16+
/// - 2n backslashes followed by a double quote reduce to n backslashes and we
17+
/// enter the "in quote" state.
18+
/// - 2n+1 backslashes followed by a double quote reduces to n backslashes,
19+
/// we do *not* enter the "in quote" state, and the double quote is
20+
/// interpreted literally.
21+
///
22+
/// References:
23+
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx
24+
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
25+
#[derive(Clone, Debug)]
26+
pub struct SplitResponseFileArgs<'a> {
27+
/// String slice of the file content that is being parsed.
28+
/// Slice is mutated as this iterator is executed.
29+
file_content: &'a str,
30+
}
31+
32+
impl<'a, T> From<&'a T> for SplitResponseFileArgs<'a>
33+
where
34+
T: AsRef<str> + 'static,
35+
{
36+
fn from(file_content: &'a T) -> Self {
37+
Self {
38+
file_content: file_content.as_ref(),
39+
}
40+
}
41+
}
42+
43+
impl<'a> SplitResponseFileArgs<'a> {
44+
/// Appends backslashes to `target` by decrementing `count`.
45+
/// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`.
46+
fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) {
47+
while *count >= step {
48+
target.push('\\');
49+
*count -= step;
50+
}
51+
}
52+
}
53+
54+
impl<'a> Iterator for SplitResponseFileArgs<'a> {
55+
type Item = String;
56+
57+
fn next(&mut self) -> Option<String> {
58+
let mut in_quotes = false;
59+
let mut backslash_count: usize = 0;
60+
61+
// Strip any leading whitespace before relevant characters
62+
let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r');
63+
self.file_content = self.file_content.trim_start_matches(is_whitespace);
64+
65+
if self.file_content.is_empty() {
66+
return None;
67+
}
68+
69+
// The argument string to return, built by analyzing the current slice in the iterator.
70+
let mut arg = String::new();
71+
// All characters still in the string slice. Will be mutated by consuming
72+
// values until the current arg is built.
73+
let mut chars = self.file_content.chars();
74+
// Build the argument by evaluating each character in the string slice.
75+
for c in &mut chars {
76+
match c {
77+
// In order to handle the escape character based on the char(s) which come after it,
78+
// they are counted instead of appended literally, until a non-backslash character is encountered.
79+
'\\' => backslash_count += 1,
80+
// Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped).
81+
'"' => {
82+
// Only append half the number of backslashes encountered, because this is an escaped string.
83+
// This will reduce `backslash_count` to either 0 or 1.
84+
Self::append_backslashes_to(&mut arg, &mut backslash_count, 2);
85+
match backslash_count == 0 {
86+
// If there are no remaining encountered backslashes,
87+
// then we have found either the start or end of a quoted argument.
88+
true => in_quotes = !in_quotes,
89+
// The quote character is escaped, so it is treated as a literal and appended to the arg string.
90+
false => {
91+
backslash_count = 0;
92+
arg.push('"');
93+
}
94+
}
95+
}
96+
// If whitespace is encountered, only preserve it if we are currently in quotes.
97+
// Otherwise it marks the end of the current argument.
98+
' ' | '\t' | '\n' | '\r' => {
99+
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
100+
// If not in a quoted string, then this is the end of the argument.
101+
if !in_quotes {
102+
break;
103+
}
104+
// Otherwise, the whitespace must be preserved in the argument.
105+
arg.push(c);
106+
}
107+
// All other characters treated as is
108+
_ => {
109+
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
110+
arg.push(c);
111+
}
112+
}
113+
}
114+
115+
// Flush any backslashes at the end of the string.
116+
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
117+
// Save the current remaining characters for the next step in the iterator.
118+
self.file_content = chars.as_str();
119+
120+
Some(arg)
121+
}
122+
}
123+
124+
#[cfg(test)]
125+
mod test {
126+
use super::*;
127+
128+
#[test]
129+
fn parse_simple_args() {
130+
let content = "-A1 -A2 -A3 -I ../includes";
131+
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
132+
assert_eq!(args[0], "-A1");
133+
assert_eq!(args[1], "-A2");
134+
assert_eq!(args[2], "-A3");
135+
assert_eq!(args[3], "-I");
136+
assert_eq!(args[4], "../includes");
137+
}
138+
139+
#[test]
140+
fn parse_quoted_path_arg() {
141+
let content = "-I \"../included headers\"";
142+
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
143+
assert_eq!(args[0], "-I");
144+
assert_eq!(args[1], "../included headers");
145+
}
146+
147+
#[test]
148+
fn parse_escaped_quoted_path_arg() {
149+
let content = "-I \"../included \\\"headers\\\"\"";
150+
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
151+
assert_eq!(args[0], "-I");
152+
assert_eq!(args[1], "../included \"headers\"");
153+
}
154+
155+
#[test]
156+
fn parse_various_whitespace_characters() {
157+
let content = "-A1 -A2\n-A3\n\r-A4\r-A5\n ";
158+
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
159+
assert_eq!(args[0], "-A1");
160+
assert_eq!(args[1], "-A2");
161+
assert_eq!(args[2], "-A3");
162+
assert_eq!(args[3], "-A4");
163+
assert_eq!(args[4], "-A5");
164+
}
165+
}

0 commit comments

Comments
 (0)